-- | -- Module : Main -- Copyright : (c) OleksandrZhabenko 2020 -- License : MIT -- Stability : Experimental -- Maintainer : olexandr543@yahoo.com -- -- Analyzes a poetic text in Ukrainian, for every line prints statistic data and -- then for the whole poem prints the hypothesis evaluation information. -- Is used in pair with some other programs, e. g. with propertiesText from uniqueness-periods-vector-exampls package -- or with a new phonetic-languages-ukrainian series. -- The program tries to be more accurate in cases of the lines consisting entirely of the words -- which are unique in phonetic meaning alongside the line. Another hypothesis is for the seventh command line -- argument equal to \"y0\" that the distribution -- of the placement of the actual poetic text in Ukrainian is not one of the standard distributions. -- It can probably have approximately a form of and is different for different authors: -- -- > -- -- -- -- > / \_/ \_/ \ -- -- To enable parallel computations (potentially, they can speed up the work), please, run the @distributionText@ executable with -- @+RTS -threaded -RTS@ command line options with possibly @-N@ option inside. -- {-# OPTIONS_GHC -threaded -rtsopts #-} {-# LANGUAGE CPP, BangPatterns #-} module Main where #ifdef __GLASGOW_HASKELL__ #if __GLASGOW_HASKELL__>=710 /* code that applies only to GHC 7.10.* and higher versions */ import GHC.Base (mconcat) #endif #endif import Control.Parallel.Strategies import Data.Maybe (fromMaybe) import Text.Read (readMaybe) import System.Environment import Numeric (showFFloat) import Data.List (sort) import Numeric.Stats import qualified Data.ByteString.Char8 as B import Data.Lists.FLines hiding (mconcat) import Data.Statistics.RulesIntervals #ifdef __GLASGOW_HASKELL__ #if __GLASGOW_HASKELL__==708 /* code that applies only to GHC 7.8.* */ mconcat = concat #endif #endif main :: IO () main = do args <- getArgs let !gzS = concat . take 1 $ args !printInput = concat . drop 1 . take 2 $ args contents <- B.getContents innerProc gzS printInput contents innerProc :: String -> String -> B.ByteString -> IO () innerProc gzS printInput contents = do if printInput == "1" then B.putStr contents else B.putStr B.empty (!data31,!wordsCnt0_data32) <- processContents contents let !gz = getIntervalsN gzS data31 -- Obtained from the first command line argument except those ones that are for RTS !data4 = filter (/= 0.0) data31 if null data4 then putStrLn (replicate 102 '-') >> putStrLn "1.000+-0.000\tALL!" >> putStrLn (replicate 102 '=') -- Well, this means that all the text consists of the unique (in phonetic meaning) words alongside every line. A rather rare occurrence. else do let (!mean1,!disp) = meanWithDisp data4 !pairs = sort . filter ((/= 0) . snd) $ wordsCnt0_data32 g !m !n = (length . takeWhile (\(_,v) -> v == n) . dropWhile (\(_,v) -> v /= n) . takeWhile (\(u,_) -> u == m) . dropWhile (\(u,_) -> u /= m) $ pairs) `using` rdeepseq h !y !x = mconcat [mconcat . map (\m1 -> mconcat [mconcat . map (\n1 -> (if y then show (g m1 n1) else if g m1 n1 == 0 then "." else show (g m1 n1)) ++ "\t") $ [1..gz],newLineEnding]) $ [2..7],replicate 102 x] putStrLn . generalInfo1 gz pairs (mean1, disp) $ data31 putStrLn (h False '~') putStrLn (h True '=') getIntervalsN :: String -> [a] -> Int getIntervalsN xs yss | xs == "s" = sturgesH (length yss) | xs == "l" = levynskyiMod (length yss) | otherwise = fromMaybe 9 (readMaybe xs::(Maybe Int)) {-# INLINE getIntervalsN #-} processContents :: B.ByteString -> IO ([Float],[(Int,Int)]) processContents contents = do let !anlines = B.lines contents !anStrs = map (drop 6 . take 9 . B.words) anlines !ratioStrs = map (B.unpack . head) anStrs !wordsNStrs = map (B.unpack . (!! 1)) anStrs !intervalNStrs = map (B.unpack . last) anStrs !ratios = map (\xs -> fromMaybe 1.0 (readMaybe xs::Maybe Float)) ratioStrs !wordsNs = map (\xs -> fromMaybe 0 (readMaybe xs::Maybe Int)) wordsNStrs !intervalNs = map (\xs -> fromMaybe 0 (readMaybe xs::Maybe Int)) intervalNStrs return (ratios,zip wordsNs intervalNs) generalInfo1 :: Int -> [(Int,Int)] -> (Float,Float) -> [Float] -> String generalInfo1 gz pairs (mean1, disp) data31 = let !ks = map (\r -> length . takeWhile (== r) . dropWhile (/= r) . sort . map snd $ pairs) [1..gz] !s = sum ks in mconcat [replicate 102 '-', newLineEnding, mconcat . map (\r -> show r ++ "\t") $ [1..gz], newLineEnding, mconcat . map (\r -> show r ++ "\t") $ ks, newLineEnding, mconcat . map (\r -> showFFloat (Just 2) (fromIntegral (r * 100) / fromIntegral s) "%\t") $ ks,newLineEnding, mconcat [showFFloat (Just 4) mean1 "+-", showFFloat (Just 4) (sqrt disp) "\t", show (length . filter (== 0.0) $ data31), '\t':show (length data31)], newLineEnding, mconcat . map (\r -> show r ++ "\t") $ [2..7], newLineEnding, mconcat . map (\r -> (show . length . takeWhile (== r) . dropWhile (/= r) . map fst $ pairs) ++ "\t") $ [2..7], newLineEnding, replicate 102 '*'] {-# INLINE generalInfo1 #-}