-- | -- Module : Main -- Copyright : (c) OleksandrZhabenko 2020 -- License : MIT -- Stability : Experimental -- Maintainer : olexandr543@yahoo.com -- -- Analyzes a poetic text in Ukrainian, for every line prints statistic data and -- then for the whole poem prints the hypothesis evaluation information. Since the 0.4.0.0 version -- the program tries to be more accurate in cases of the lines consisting entirely of the words -- which are unique in phonetic meaning alongside the line. Another hypothesis is for the seventh command line -- argument (since the 0.12.0.0 version) equal to \"y0\" that the distribution -- of the placement of the actual poetic text in Ukrainian is not one of the standard distributions. -- It can probably have approximately a form of and is different for different authors: -- -- > -- -- -- -- > / \_/ \_/ \ -- -- To enable parallel computations (potentially, they can speed up the work), please, run the @uniqVec03@ executable with -- @+RTS -threaded -RTS@ command line options with possibly @-N@ option inside. -- {-# OPTIONS_GHC -threaded -rtsopts #-} {-# LANGUAGE CPP, BangPatterns, FlexibleInstances, MultiParamTypeClasses #-} module Main where #ifdef __GLASGOW_HASKELL__ #if __GLASGOW_HASKELL__>=710 /* code that applies only to GHC 7.10.* and higher versions */ import GHC.Base (mconcat) #endif #endif import Control.Parallel.Strategies import Data.Maybe (fromMaybe) import Text.Read (readMaybe) import qualified Data.Vector as V import String.Languages.UniquenessPeriods.Vector import Languages.UniquenessPeriods.Vector.General.Debug hiding (newLineEnding) import Languages.UniquenessPeriods.Vector.Properties import Languages.UniquenessPeriods.Vector.PropertiesFuncRep import Melodics.Ukrainian import System.Environment import Languages.Phonetic.Ukrainian.PrepareText import Languages.UniquenessPeriods.Vector.Data import Languages.UniquenessPeriods.Vector.Auxiliary import Languages.UniquenessPeriods.Vector.StrictV hiding (mconcat) import Numeric (showFFloat) import Languages.UniquenessPeriods.Vector.Filters import GHC.Float (int2Float) import GHC.Real (ceiling) import Data.List (sort) import Data.Char (isAlpha) import Numeric.Stats import Data.Lists.FLines hiding (mconcat) import Data.Statistics.RulesIntervals import Languages.UniquenessPeriods.Vector.FuncRepRelated #ifdef __GLASGOW_HASKELL__ #if __GLASGOW_HASKELL__==708 /* code that applies only to GHC 7.8.* */ mconcat = concat #endif #endif instance GetTransL (FLines [String]) [String] where getTL g (F1 xss) = g xss getTL g (FL xsss) = mconcat . map g $ xsss main :: IO () main = do args <- getArgs let !file = concat . take 1 $ args -- The first command line arguments except those ones that are RTS arguments !printLine = fromMaybe 0 (readMaybe (concat . take 1 . drop 2 $ args)::(Maybe Int)) -- The third command line argument except those ones that are RTS arguments. Set to 1 if you would like to print the current line within the information !toOneLine = fromMaybe 0 (readMaybe (concat . take 1 . drop 3 $ args)::(Maybe Int)) -- The fourth command line argument except those ones that are RTS arguments. Set to 1 if you would like to convert the text into one single line before applying to it the processment (it can be more conceptually consistent in such a case) !compareMode = fromMaybe 0 (readMaybe (concat . take 1 . drop 4 $ args)::(Maybe Int)) -- The fifth command line argument except those ones that are RTS arguments. Set to 1 if you would like to print the original version and the concatenated one's (into one line) analyses. !gzS = concat . take 1 . drop 1 $ args !twoInParallel = fromMaybe 1 (readMaybe (concat . take 1 . drop 5 $ args)::(Maybe Int)) -- The sixth command line argument that controls whether if the fifth command line argument is set to \"1\" then the evaluation is done in parallel or not (set to \"1\" --- the default one --- if you would like to evaluate in parallel, otherwise --- just usual sequential evaluation). !choice = concat . drop 6 . take 7 $ args -- The seventh command line argument that controls what properties are used. contents <- readFile file if compareMode == 0 then do let !flines = fLines toOneLine contents innerProc printLine choice flines gzS else let tau !k = (let !flines1 = fLines k contents in innerProc printLine choice flines1 gzS) in case twoInParallel of 1 -> ($|) (mapM_ id) rpar (map tau [1, 0]) _ -> mapM_ tau [1, 0] innerProc :: Int -> String -> [String] -> String -> IO () innerProc printLine choice flInes gzS = do let !gz = getIntervalsN gzS flInes -- Obtained from the second command line argument except those ones that are for RTS (!data31,!wordsCnt0_data32) = unzip . getData3 printLine gz choice $ flInes !data4 = filter (/= 0) . map fst $ data31 if null data4 then putStrLn (replicate 102 '-') >> putStrLn "1.000+-0.000\tALL!" >> putStrLn (replicate 102 '=') -- Well, this means that all the text consists of the unique (in phonetic meaning) words alongside every line. A rather rare occurrence. else do let (!mean1,!disp) = meanWithDisp data4 !pairs = sort . filter ((/= 0) . snd) $ wordsCnt0_data32 g !m !n = (length . takeWhile (\(_,v) -> v == n) . dropWhile (\(_,v) -> v /= n) . takeWhile (\(u,_) -> u == m) . dropWhile (\(u,_) -> u /= m) $ pairs) `using` rseq h !y !x = mconcat [mconcat . map (\m1 -> mconcat [mconcat . map (\n1 -> (if y then show (g m1 n1) else if g m1 n1 == 0 then "." else show (g m1 n1)) ++ "\t") $ [1..gz],newLineEnding]) $ [2..7],replicate 102 x] mapM_ putStrLn . map snd $ data31 putStrLn . generalInfo1 gz pairs (mean1, disp) $ data31 putStrLn (h False '~') putStrLn (h True '=') getIntervalsN :: String -> [String] -> Int getIntervalsN xs yss | xs == "s" = sturgesH (length yss) | xs == "l" = levynskyiMod (length yss) | otherwise = fromMaybe 9 (readMaybe xs::(Maybe Int)) {-# INLINE getIntervalsN #-} getData3 :: Int -> Int -> String -> [String] -> [((Float,String),(Int,Int))] getData3 printLine gz choice = parMap rseq (\ts -> let (!maxE,!minE,!data2) = runEval ((parTuple3 rpar rseq rseq) ((\k -> if k == 0.0 then 1.0 else k) . (\rs -> if null rs then 0.0 else head rs) . firstFrom3 . maximumElBy 1 (V.singleton (oneProperty)) $ UL2 ([],uniquenessVariants2GN " 01-" (V.singleton (oneProperty)) (chooseMax choice) $ ts), (\k -> if k == 0.0 then 1.0 else k) . abs . (\rs -> if null rs then 0.0 else head rs) . firstFrom3 . maximumElBy 1 (V.singleton (oneProperty)) $ UL2 ([],uniquenessVariants2GN " 01-" (V.singleton (oneProperty)) (chooseMin choice) $ ts), (\k -> if k == 0.0 then 1.0 else k) . head . getAC (chooseMax choice) $ ts)) (!wordsN,!intervalN) | maxE == 1 = (0, 0) | otherwise = runEval ((parTuple2 rpar rpar) (length . words $ ts, intervalNRealFrac minE maxE gz data2)) (!ratio,!printedLine) = (if maxE == 1 then 0.0 else 2.0 * data2 / (minE + maxE), mconcat [showFFloat (Just 4) minE $ "\t", showFFloat (Just 4) data2 "\t", showFFloat (Just 4) maxE "\t", showFFloat (Just 4) (data2 / minE) "\t", showFFloat (Just 4) (maxE / data2) "\t", showFFloat (Just 4) ratio "\t", '\t':show (wordsN::Int), '\t':show (intervalN::Int), if printLine == 1 then '\t':ts else ""]) in ((ratio,printedLine),(wordsN,intervalN))) {-# INLINABLE getData3 #-} fLines :: Int -> String -> [String] fLines !toOneLine = getTL (filter (any (\x -> isUkrainian x && isAlpha x))) . F1 . prepareText . (\z -> if toOneLine == 1 then unwords . words $ z else z) {-# INLINE fLines #-} generalInfo1 :: Int -> [(Int,Int)] -> (Float,Float) -> [(Float,String)] -> String generalInfo1 gz pairs (mean1, disp) data31 = let !ks = map (\r -> length . takeWhile (== r) . dropWhile (/= r) . sort . map snd $ pairs) [1..gz] !s = sum ks in mconcat [replicate 102 '-', newLineEnding, mconcat . map (\r -> show r ++ "\t") $ [1..gz], newLineEnding, mconcat . map (\r -> show r ++ "\t") $ ks, newLineEnding, mconcat . map (\r -> showFFloat (Just 2) (fromIntegral (r * 100) / fromIntegral s) "%\t") $ ks,newLineEnding, mconcat [showFFloat (Just 4) mean1 "+-", showFFloat (Just 4) (sqrt disp) "\t", show (length . filter (== 0) . map fst $ data31), '\t':show (length data31)], newLineEnding, mconcat . map (\r -> show r ++ "\t") $ [2..7], newLineEnding, mconcat . map (\r -> (show . length . takeWhile (== r) . dropWhile (/= r) . map fst $ pairs) ++ "\t") $ [2..7], newLineEnding, replicate 102 '*'] {-# INLINE generalInfo1 #-}