-- | -- Module : Main -- Copyright : (c) OleksandrZhabenko 2020 -- License : MIT -- Stability : Experimental -- Maintainer : olexandr543@yahoo.com -- -- Analyzes a poetic text in Ukrainian, for every line prints statistic data and -- then for the whole poem prints the hypothesis evaluation information. Since the 0.4.0.0 version -- the program tries to be more accurate in cases of the lines consisting entirely of the words -- which are unique in phonetic meaning alongside the line. Another hypothesis is that the distribution -- of the placement of the actual poetic text in Ukrainian is not one of the standard distributions. -- It can probably have approximately a form of and is different for different authors: -- -- > -- -- -- -- > / \_/ \_/ \ -- -- To enable parallel computations (potentially, they can speed up the work), please, run the @uniqVec03@ executable with -- @+RTS -threaded -RTS@ command line options with possibly @-N@ option inside. -- {-# OPTIONS_GHC -threaded -rtsopts #-} {-# LANGUAGE CPP, BangPatterns #-} module Main where #ifdef __GLASGOW_HASKELL__ #if __GLASGOW_HASKELL__>=710 /* code that applies only to GHC 7.10.* and higher versions */ import GHC.Base (mconcat) #endif #endif import Control.Parallel.Strategies import Data.Maybe (fromMaybe) import Text.Read (readMaybe) import qualified Data.Vector as V import String.Languages.UniquenessPeriods.Vector import Languages.UniquenessPeriods.Vector.General.Debug import Languages.UniquenessPeriods.Vector.Properties import Melodics.Ukrainian import System.Environment import Languages.Phonetic.Ukrainian.PrepareText import Languages.UniquenessPeriods.Vector.Data import Languages.UniquenessPeriods.Vector.Auxiliary import Languages.UniquenessPeriods.Vector.StrictV hiding (mconcat) import Numeric (showFFloat) import Languages.UniquenessPeriods.Vector.Filters import GHC.Float (int2Float) import Data.List (sort) import Data.Char (isAlpha) import Numeric.Stats #ifdef __GLASGOW_HASKELL__ #if __GLASGOW_HASKELL__==708 /* code that applies only to GHC 7.8.* */ mconcat = concat #endif #endif main :: IO () main = do args <- getArgs let !file = concat . take 1 $ args -- The first command line arguments except those ones that are RTS arguments !gz = fromMaybe 9 (readMaybe (concat . take 1 . drop 1 $ args)::(Maybe Int)) -- The second command line arguments except those ones that are RTS arguments !printLine = fromMaybe 0 (readMaybe (concat . take 1 . drop 2 $ args)::(Maybe Int)) -- The third command line arguments except those ones that are RTS arguments. Set to 1 if you would like to print the current line within the information !toOneLine = fromMaybe 0 (readMaybe (concat . take 1 . drop 3 $ args)::(Maybe Int)) -- The fourth command line arguments except those ones that are RTS arguments. Seti to 1 if you would like to convert the text into one single line before applying to it the processment (it can be more conceptually consistent in such a case) contents <- readFile file let !flines = fLines toOneLine contents (!data31,!wordsCnt0_data32) = unzip . getData3 printLine gz $ flines !data4 = filter (/= 0) . map fst $ data31 if null data4 then putStrLn (replicate 102 '-') >> putStrLn "1.000+-0.000\tALL!" >> putStrLn (replicate 102 '=') -- Well, this means that all the text consists of the unique (in phonetic meaning) words alongside every line. A rather rare occurrence. else do let (!mean1,!disp) = meanWithDisp data4 !pairs = sort . filter ((/= 0) . snd) $ wordsCnt0_data32 g !m !n = (length . takeWhile (\(_,v) -> v == n) . dropWhile (\(_,v) -> v /= n) . takeWhile (\(u,_) -> u == m) . dropWhile (\(u,_) -> u /= m) $ pairs) `using` rseq h !y !x = mconcat [mconcat . map (\m1 -> mconcat [mconcat . map (\n1 -> (if y then show (g m1 n1) else if g m1 n1 == 0 then "." else show (g m1 n1)) ++ "\t") $ [1..gz],newLineEnding]) $ [2..7],replicate 102 x] mapM_ putStrLn . map snd $ data31 putStrLn . generalInfo1 gz pairs (mean1, disp) $ data31 putStrLn (h False '~') putStrLn (h True '=') getData3 :: Int -> Int -> [String] -> [((Float,String),(Int,Int))] getData3 printLine gz = parMap rseq (\ts -> let (!maxE,!minE,!data2) = runEval ((parTuple3 rpar rseq rseq) ((\k -> if k == 0 then 1 else k) . (\rs -> if null rs then 0 else head rs) . firstFrom3 . maximumElBy 1 (V.singleton (oneProperty)) $ UL2 ([],uniquenessVariants2GN " 01-" (V.singleton (oneProperty)) (uniquenessPeriodsVector3 " 01-" . aux0 . convertToProperUkrainian) (justOneValue2Property . diverse2) $ ts), (\k -> if k == 0 then 1 else k) . abs . (\rs -> if null rs then 0 else head rs) . firstFrom3 . maximumElBy 1 (V.singleton (oneProperty)) $ UL2 ([],uniquenessVariants2GN " 01-" (V.singleton (oneProperty)) (uniquenessPeriodsVector3 " 01-" . aux0 . convertToProperUkrainian) (justOneValue2Property . negate . diverse2) $ ts), (\k -> if k == 0 then 1 else k) . diverse2 . uniquenessPeriodsVector3 " 01-" . aux0 . convertToProperUkrainian $ ts)) (!wordsN,!intervalN) | maxE == 1 = (0, 0) | otherwise = runEval ((parTuple2 rpar rpar) (length . words $ ts, intervalNRealFrac (int2Float minE) (int2Float maxE) gz (int2Float data2))) (!ratio,!printedLine) = (if maxE == 1 then 0.0 else 2.0 * fromIntegral data2 / fromIntegral (minE + maxE), mconcat [show $ (minE::Int), '\t':show (data2::Int), '\t':show (maxE::Int), '\t':showFFloat (Just 4) (fromIntegral data2 / fromIntegral minE) "\t", showFFloat (Just 4) (fromIntegral maxE / fromIntegral data2) "\t", showFFloat (Just 4) ratio "\t", '\t':show (wordsN::Int), '\t':show (intervalN::Int), if printLine == 1 then '\t':ts else ""]) in ((ratio,printedLine),(wordsN,intervalN))) {-# INLINABLE getData3 #-} fLines :: Int -> String -> [String] fLines !toOneLine = filter (any (\x -> isUkrainian x && isAlpha x)) . prepareText . (\z -> if toOneLine == 1 then unwords . words $ z else z) {-# INLINE fLines #-} generalInfo1 :: Int -> [(Int,Int)] -> (Float,Float) -> [(Float,String)] -> String generalInfo1 gz pairs (mean1, disp) data31 = let !ks = map (\r -> length . takeWhile (== r) . dropWhile (/= r) . sort . map snd $ pairs) [1..gz] !s = sum ks in mconcat [replicate 102 '-', newLineEnding, mconcat . map (\r -> show r ++ "\t") $ [1..gz], newLineEnding, mconcat . map (\r -> show r ++ "\t") $ ks, newLineEnding, mconcat . map (\r -> showFFloat (Just 2) (fromIntegral (r * 100) / fromIntegral s) "%\t") $ ks,newLineEnding, mconcat [showFFloat (Just 4) mean1 "+-", showFFloat (Just 4) (sqrt disp) "\t", show (length . filter (== 0) . map fst $ data31), '\t':show (length data31)], newLineEnding, mconcat . map (\r -> show r ++ "\t") $ [2..7], newLineEnding, mconcat . map (\r -> (show . length . takeWhile (== r) . dropWhile (/= r) . map fst $ pairs) ++ "\t") $ [2..7], newLineEnding, replicate 102 '*'] {-# INLINE generalInfo1 #-}