-- | -- Module : Phladiprelio.Distribution -- Copyright : (c) Oleksandr Zhabenko 2020-2023 -- License : MIT -- Stability : Experimental -- Maintainer : oleksandr.zhabenko@yahoo.com -- -- Analyzes a poetic text in Ukrainian, for every line prints statistic data and -- then for the whole poem prints the hypothesis evaluation information. -- Is used in pair with some other programs, e. g. with propertiesTextG3 from phonetic-languages-simplified-examples-array package -- or with a new phonetic-languages-ukrainian series. -- The module contains library functions for the program. -- -- -- To enable parallel computations (potentially, they can speed up the work), please, run the @distributionText@ executable with -- @+RTS -threaded -RTS@ command line options with possibly @-N@ option inside. -- {-# OPTIONS_GHC -threaded -rtsopts #-} {-# LANGUAGE BangPatterns, NoImplicitPrelude #-} module Phladiprelio.Distribution where import GHC.Base import GHC.Real import GHC.Float import Data.Tuple import System.IO import GHC.Num ((+),subtract,(*)) import Control.Parallel.Strategies import Data.Maybe (fromMaybe,isJust,fromJust) import Text.Read (readMaybe) import Numeric (showFFloat) import Data.List import Numeric.Stats import Data.Char (isDigit) import Data.Lists.FLines hiding (mconcat) import Phladiprelio.RulesIntervals import Phladiprelio.RulesIntervalsPlus import Text.Show (Show(..),show) -- | Sum data type to control whether the functions work with multiple properties or with just one. data ControlStatsIntervals = U ([Double],[(Int,Int)]) | M [(Int,Int)] deriving (Eq,Show) isU :: ControlStatsIntervals -> Bool isU (U _) = True isU _ = False isM :: ControlStatsIntervals -> Bool isM (M _) = True isM _ = False data31F (U (x,y)) = Just x data31F (M _) = Nothing wordsCnt0_data32F (U (x,y)) = y wordsCnt0_data32F (M y) = y maybeDII :: (Int -> Bool) -> ControlStatsIntervals -> Maybe [(Double,(Int,Int))] maybeDII p (U (xs,ys)) = Just . filter (\(_,(y,_)) -> p y) . zip xs $ ys maybeDII _ _ = Nothing numberProps :: String -> Int numberProps contents = maximum . map (subtract 1 . length . filter (all isDigit) . words) . lines $ contents innerProcG :: Bool -> Bool -> String -> Bool -> String -> IO () innerProcG pairwisePermutations whitelines gzS multiprop contents | multiprop = mapM (\i -> processContentsMultiprop i contents >>= \csi -> innerProc pairwisePermutations whitelines gzS csi contents) [1..numberProps contents] >> return () | otherwise = processContents whitelines contents >>= \csi -> innerProc pairwisePermutations whitelines gzS csi contents innerProc :: Bool -> Bool -> String -> ControlStatsIntervals -> String -> IO () innerProc pairwisePermutations whitelines gzS csi contents = do if all ((< 2) . fst) . wordsCnt0_data32F $ csi then putStrLn (replicate 80 '-') >> putStrLn "1.000+-0.000\tALL!" >> putStrLn (replicate 80 '=') -- Well, this means that all the text consists of the lines that have no variativity from the program perspective and, therefore, they cannot be analyzed effectively by it. Nevertheless, you can accurately exclude them from the consideration. A rather rare occurrence. else do let !gz | isU csi = getIntervalsN gzS (fromJust . data31F $ csi) -- Obtained from the first command line argument except those ones that are for RTS | otherwise = getIntervalsN gzS . wordsCnt0_data32F $ csi !mndsp | isU csi = Just . meanWithDispD2 . map fst . fromJust . maybeDII (>1) $ csi -- Since the 0.6.0.0 version switched to the sample unbiased dispersion with (n - 1) in the denominator. | otherwise = Nothing !pairs = sort . filter ((/= 0) . snd) . wordsCnt0_data32F $ csi g !m !n = (length . takeWhile (\(_,v) -> v == n) . dropWhile (\(_,v) -> v /= n) . takeWhile (\(u,_) -> u == m) . dropWhile (\(u,_) -> u /= m) $ pairs) `using` rdeepseq h !y !x = mconcat [mconcat . map (\m1 -> mconcat [mconcat . map (\n1 -> (if y then show (g m1 n1) else if g m1 n1 == 0 then "." else show (g m1 n1)) ++ "\t") $ [1..gz],newLineEnding]) $ [2..(if pairwisePermutations then 10 else 7)],replicate 80 x] putStrLn . generalInfo1 pairwisePermutations gz pairs mndsp . length . wordsCnt0_data32F $ csi putStrLn (h False '~') putStrLn (h True '=') processContents :: Bool -> String -> IO ControlStatsIntervals processContents whitelines contents = do let !anlines = lines contents !anStrs | whitelines = filter (not . null) . map (drop 6 . take 9 . words) $ anlines | otherwise = map (drop 6 . take 9 . words) anlines !ratioStrs = map head anStrs !wordsNStrs = map (!! 1) anStrs !intervalNStrs = map last anStrs !ratios = map (\xs -> fromMaybe 1.0 (readMaybe xs::Maybe Double)) ratioStrs !wordsNs = map (\xs -> fromMaybe 0 (readMaybe xs::Maybe Int)) wordsNStrs !intervalNs = map (\xs -> fromMaybe 0 (readMaybe xs::Maybe Int)) intervalNStrs return . U $ (ratios,zip wordsNs intervalNs) processContentsMultiprop :: Int -> String -> IO ControlStatsIntervals processContentsMultiprop propN contents = do let !anwords = map words . lines $ contents !wordsNStrs = map head anwords !intervalNStrs = map (concat . drop propN . take (propN + 1)) anwords !wordsNs = drop 1 . filter (> 0) . map (\xs -> fromMaybe 0 (readMaybe xs::Maybe Int)) $ wordsNStrs !intervalNs = filter (> 0) . map (\xs -> fromMaybe 0 (readMaybe xs::Maybe Int)) $ intervalNStrs return . M . zip wordsNs $ intervalNs generalInfo1 :: Bool -> Int -> [(Int,Int)] -> Maybe (Double,Double) -> Int -> String generalInfo1 pairwisePermutations gz pairs mndsp ll = let !ks = map (\r -> length . takeWhile (== r) . dropWhile (/= r) . sort . map snd $ pairs) [1..gz] !s = sum ks stringMD | isJust mndsp = let (mean1,disp) = fromJust mndsp in mconcat [showFFloat (Just 4) mean1 "+-", showFFloat (Just 4) (sqrt disp) "\t"] | otherwise = "" in mconcat [replicate 80 '-', newLineEnding, mconcat . map (\r -> show r ++ "\t") $ [1..gz], newLineEnding, mconcat . map (\r -> show r ++ "\t") $ ks, newLineEnding, mconcat . map (\r -> showFFloat (Just 2) (fromIntegral (r * 100) / fromIntegral s) "%\t") $ ks, newLineEnding, stringMD, show (length . filter ((<= 1) . fst) $ pairs), '\t':show ll, newLineEnding, mconcat . map (\r -> show r ++ "\t") $ [2..(if pairwisePermutations then 10 else 7)], newLineEnding, mconcat . map (\r -> (show . length . takeWhile (== r) . dropWhile (/= r) . map fst $ pairs) ++ "\t") $ [2..(if pairwisePermutations then 10 else 7)], newLineEnding, replicate 80 '*'] {-# INLINE generalInfo1 #-}