-- |
-- Module      :  MMSyn7s
-- Copyright   :  (c) OleksandrZhabenko 2020
-- License     :  MIT
-- Stability   :  Experimental
-- Maintainer  :  olexandr543@yahoo.com
--
-- A program and a library that show a sorted list of the Ukrainian sounds 
-- representations that can be used by mmsyn7 series of programs.
--

module MMSyn7s (
  -- * Used in the program
  main7s
  -- * Library functions
  -- ** For the text as a whole object
  , show7s2
  -- ** For the text being treated as partial one
  , show7s'
  , show7s''
  , show7s'''
  , show7s3
  , show7s4
  , show7s5
  , show7s6
  , show7s7
  , show7s8
  , show7s9
  -- *** Inner predicate (auxiliary)
  , eqSnds
  -- *** Inner backward conversion function
  , listToString
  -- * Some descriptive statistics metrices
  , countSnds
  , countSnds2
  , sndsDensity
  , uniquenessPeriods
  , uniqMax
  , uniqMin
  , uniqPeriodsMean
  , uniqPeriodsDispersion
  , uniqStdQDeviation
) where

import Data.Char (isSpace)
import qualified Data.Vector as V
import Data.List (sort, nub,(\\),nubBy)
import Melodics.Ukrainian (convertToProperUkrainian)
import System.Environment (getArgs)
import MMSyn7.Syllable
import Control.Exception (onException)
import MMSyn6Ukr.Show7s

-- | Function takes the first command line argument and (may be) a Ukrainian text being written without quotes as the next command line arguments
-- and prints the sorted list of the Ukrainian sounds representations that can be used further in mmsyn7 series of programs.
--
-- Depending on the first command line argument the program behaves as follows:
-- 
-- \"-h\" -- prints help and exits;
-- 
-- \"-v\" -- prints version number and exits;
--
-- \"-s\" -- prints some general descriptive statistics metrices for the given text; 
--
-- \"-s2\" -- prints a syllable segmentation for the {number of words (or their parts) to be taken for statistics, which is a second command line argument} and 
-- some information about its structure that can be interesting.
-- 
-- \"1\"  -- prints the list of String being unique (without silence) and then the rest of the text with whitespaces and some phonetical conversions;
-- 
-- \"-1\" -- prints the rest of the text after the first duplicated sound representation (except silent ones) including it with whitespaces and some phonetical conversions;
-- 
-- \"0\"  -- prints the list of String being the Ukrainian sounds representations for the whole text.
--
-- \"2\"  -- prints the list of String being the Ukrainian sounds representations for the whole text where every sound representation is unique; 
--
-- \"3\"  -- prints the list of lists of Strings being the Ukrainian sounds representations for the whole text, which shows what sound representations
-- are needed to be created if every sound is unique;
-- 
-- All other variants of the beginning for the command line arguments are the same as \"0\" (the arguments are treated as a Ukrainian text
-- and processed as a whole one object).
main7s :: IO ()
main7s = do
  texts <- getArgs
  let arg1 = concat . take 1 $ texts in
    case arg1 of
      "-h" -> do { putStrLn "mmsyn7s: "
                 ; putStrLn "SYNOPSYS: "
                 ; putStrLn "mmsyn7s -h      OR: "
                 ; putStrLn "mmsyn7s -v      OR: "
                 ; putStrLn "mmsyn7s -s {Ukrainian text}     OR: "
                 ; putStrLn "mmsyn7s -s2 {number of words (or their parts) to be taken for statistics} {Ukrainian text}     OR: "
                 ; putStrLn "mmsyn7s 1 {Ukrainian text}     OR: "
                 ; putStrLn "mmsyn7s -1 {Ukrainian text}     OR: "
                 ; putStrLn "mmsyn7s 0 {Ukrainian text}     OR: "
                 ; putStrLn "mmsyn7s {Ukrainian text}"
                 ; putStrLn "where the first one prints this help message; "
                 ; putStrLn "      the second one prints a version number; "
                 ; putStrLn "      the third one prints some general descriptive statistics metrices for the given text; "
                 ; putStr "      the fourth one prints a syllable segmentation for the {number of words (or their parts) to be taken for statistics} and "
                 ; putStrLn "some information about its structure that can be interesting."
                 ; putStrLn "      the \"1\" option prints the list of String being the Ukrainian sounds representations and being unique alongside the text (without silence);"
                 ; putStr   "      the \"-1\" option prints the rest of the text after the first duplicated sound representation (except silent ones); "
                 ; putStrLn "including it with whitespaces and some phonetical conversions; "
                 ; putStrLn "      the \"0\" option prints the list of String  being the Ukrainian sounds representations for the whole text; "
                 ; putStrLn "      the \"2\" option prints the list of String  being the Ukrainian sounds representations for the whole text where every sound representation is unique; "
                 ; putStr "      the \"3\" option prints the list of lists of Strings  being the Ukrainian sounds representations for the whole text, "
                 ; putStrLn "which shows what sound representations are needed to be created if every sound is unique; "
                 ; putStrLn "      the other beginning is equivalent to the \"0\" behaviour."
                  }
      "-v" -> putStrLn "mmsyn7s: version 0.8.0.0"
      "-s" -> let ys = unwords . drop 1 $ texts
                  zs = uniquenessPeriods ys in
                  if zs /= [0::Int]
                    then do { let t  = sum zs
                                  m  = length zs
                                  n  = fromIntegral t / fromIntegral m
                                  d  = ((sum . fmap (\w -> (fromIntegral w - n) * (fromIntegral w - n)) $ zs) / fromIntegral m)
                                  s  = sqrt d
                                  mx = maximum zs
                                  mn = minimum zs
                            ; putStrLn "---------------------------------------------------------------------------"
                            ; putStrLn $ "Uniqueness periods:                                                      " ++ (show zs)
                            ; putStrLn $ "Possibly unique sounds representations density:                          " ++ show (fromIntegral t / (fromIntegral . length . show7s $ ys))
                            ; putStrLn $ "Number of sounds representations that are enough to cover the text:      " ++ show t
                            ; putStrLn $ "Mean for the uniqueness periods list:                                    " ++ show n
                            ; putStrLn $ "Dispersion for the uniqueness periods list:                              " ++ show d
                            ; putStrLn $ "Standard quadratic deviation for the uniqueness periods list:            " ++ show s
                            ; putStrLn $ "The maximum element in the uniqueness periods list:                      " ++ show mx
                            ; putStrLn $ "The minimum element in the uniqueness periods list:                      " ++ show mn }
                    else do { putStrLn "------------------------------------------------------------------------------"
                            ; putStrLn $ "Uniqueness periods:                                                      [0]"
                            ; putStrLn $ "Possibly unique sounds representations density:                          0.0"
                            ; putStrLn $ "Number of sounds representations that are enough to cover the text:      0"
                            ; putStrLn $ "Mean for the uniqueness periods list:                                    0.0"
                            ; putStrLn $ "Dispersion for the uniqueness periods list:                              0.0"
                            ; putStrLn $ "Standard quadratic deviation for the uniqueness periods list:            0.0"
                            ; putStrLn $ "The maximum element in the uniqueness periods list:                      0"
                            ; putStrLn $ "The minimum element in the uniqueness periods list:                      0" }
      "-s2" -> let n = concat . drop 1 . take 2 $ texts in exceptRead n (drop 2 texts)
      "1"  -> print . fst . show7s5 . unwords . drop 1 $ texts
      "-1" -> putStrLn . snd . show7s5 . unwords . drop 1 $ texts
      "0"  -> putStrLn . show7s2 . unwords . drop 1 $ texts
      "2"  -> print . show7s8 . unwords . drop 1 $ texts
      "3"  -> print . show7s6 . unwords . drop 1 $ texts
      _    -> putStrLn . show7s2 . unwords $ texts

-- | Is used internally in the 'exceptRead' function.
printStatSyl :: String -> [String] -> IO ()
printStatSyl xs yss =
  do { let m = read xs :: Int
           zss = takeWordSP m . unwords $ yss
           tss = sylLengthsP2 zss
     ; putStrLn "---------------------------------------------------------------------------"
     ; putStrLn $ "Syllables in the text:                                                   " ++ (show . fmap (fmap (concatMap show)) $ zss)
     ; putStrLn "---------------------------------------------------------------------------"
     ; putStrLn $ "Number of the Ukrainian sounds in the syllables in the text:             " ++ show tss
     ; putStrLn ""
     }

-- | Is used internally in the 'mmsyn7s' function.
exceptRead :: String -> [String] -> IO ()
exceptRead xs yss = onException (printStatSyl xs yss)
  (do { putStrLn "Please, specify a number of words (or their parts being created with apostrophe or dash (hyphen) signs (or both)) "
      ; ns <- getLine
      ; exceptRead ns yss
      })

-- | Function takes a Ukrainian text being a @String@ and returns a @String@ that shows a sorted list of the Ukrainian sounds representations that can be used further
-- in mmsyn7 series of programs.
show7s2 :: String -> String
show7s2 = show . sort . nub . V.toList . V.filter (\x -> x /= "-" && x /= "1" && x /= "0") . convertToProperUkrainian

-- | Function 'show7s3' takes a Ukrainian text being a @String@ and returns a tuple, the first element of which is a list of Strings that correspond to the Ukrainian 
-- sounds representations that (except pauses) are unique and are not repeated starting from the beginning of the given text, and the second one is a remainder
-- list of Strings starting from the first duplicated non-silent Ukrainian sound representation.
show7s3 :: String -> ([String], [String])
show7s3 = show7s' . V.toList . convertToProperUkrainian

-- | Function 'eqSnds' compares two non-silent Strings representations for Ukrainian sounds by equality. If one of them is a representation for silence (e. g. pause),
-- then the predicate is @False@.
eqSnds :: String -> String -> Bool
eqSnds xs ys | xs `elem` ["-","0","1"] || ys `elem` ["-","0","1"] = False
             | otherwise = xs == ys

-- | Function @show7s'@ is auxiliary to the 'show7s3' and is used internally in the latter one.
show7s' :: [String] -> ([String],[String])
show7s' zss =
  let (xss, yss) = splitAt 68 zss
      uss = xss \\ nubBy eqSnds xss
      (wss, vss) = if null uss then (xss,[]) else (takeWhile (/= head uss) xss ++ head uss:(takeWhile (/= head uss) . tail . dropWhile (/= head uss) $ xss),
        dropWhile (/= head uss) . tail . dropWhile (/= head uss) $ xss) in
          (wss, vss ++ yss)

-- | The same as @show7s'@, but the first list in the tuple is filtered from the silent representations and is sorted not in the order of appearance in the text,
-- but in the ascending order.
show7s'' :: [String] -> ([String],[String])
show7s'' zss =
  let (xss, yss) = splitAt 68 zss
      uss = xss \\ nubBy eqSnds xss
      (wss,vss) = if null uss then (xss,[]) else (takeWhile (/= head uss) xss ++ head uss:(takeWhile (/= head uss) . tail . dropWhile (/= head uss) $ xss),
        dropWhile (/= head uss) . tail . dropWhile (/= head uss) $ xss) in
          (sort . filter (\x -> x /= "-" && x /= "1" && x /= "0") $ wss,vss ++ yss)

-- | Function 'show7s4' takes a Ukrainian text being a @String@ and returns a tuple, the first element of which is a list of Strings that correspond to the Ukrainian 
-- sounds representations that (except pauses) are unique and are not repeated starting from the beginning of the given text (this list is filtered from 
-- the representations for the silence and then sorted in the ascending order), and the second one is a remainder
-- list of Strings starting from the first duplicated non-silent Ukrainian sound representation.
show7s4 :: String -> ([String], [String])
show7s4 = show7s'' . V.toList . convertToProperUkrainian

-- | Function 'listToString' converts the list of Strings being the sequential Ukrainian sounds representations into the Ukrainian text with whitespaces
-- (whitespaces are substituted instead of punctuation symbols, too) and some phonetical conversions.
listToString :: [String] -> String
listToString =
  concatMap (\t ->
    case t of
      "0" -> " "
      "1" -> " "
      "-" -> " "
      x   -> x)

-- | The same as @show7s''@, but the second element in the resulting tuple is again the Ukrainian text with whitespaces (whitespaces are substituted
-- instead of punctuation symbols, too) and some phonetical conversions.
show7s''' :: [String] -> ([String],String)
show7s''' zss =
  let (xss, yss) = splitAt 68 zss
      uss = xss \\ nubBy eqSnds xss
      (wss,vss) = if null uss then (xss,[]) else (takeWhile (/= head uss) xss ++ head uss:(takeWhile (/= head uss) . tail . dropWhile (/= head uss) $ xss),
        dropWhile (/= head uss) . tail . dropWhile (/= head uss) $ xss) in
          (sort . filter (\x -> x /= "-" && x /= "1" && x /= "0") $ wss, listToString $ vss ++ yss)

-- | Function 'show7s5' takes a Ukrainian text being a @String@ and returns a tuple, the first element of which is a list of Strings that correspond to the Ukrainian 
-- sounds representations that (except pauses) are unique and are not repeated starting from the beginning of the given text (this list is filtered from 
-- the representations for the silence and then sorted in the ascending order), and the second one is a @String@ obtained from the remainder
-- list of Strings starting from the first duplicated non-silent Ukrainian sound representation with whitespaces (whitespaces are substituted
-- instead of punctiuation symbols, too) and some phonetical conversions. 
show7s5 :: String -> ([String], String)
show7s5 = show7s''' . V.toList . convertToProperUkrainian

-- | Function 'show7s6' takes a Ukrainian text being a @String@ and returns a list of lists of Strings, each latter one of which is obtained for the unique parts of
-- the text from the Ukrainian sounds representations point of view. It can show how many and what sound representations are needed to be created to completely cover
-- the given text providing all the needed sound parameters.
show7s6 :: String -> [[String]]
show7s6 t@(_:_) = (fst . show7s5 $ t):(show7s6 . snd . show7s5 $ t)
show7s6 _ = []

-- | Function 'countSnds' counts total number of Strings in the list of list of Strings. It can be successfully used to count how many Ukrainian sounds representations
-- are needed to be created to completely cover the given Ukrainian text. It can be used as a some statistics parameter for the text.
countSnds :: [[String]] -> Int
countSnds = sum . map length

-- | Function 'countSnds2' gives the same result as (countSnds . show7s6), but may be is sligtly more efficient in calculation.
countSnds2 :: String -> Int
countSnds2 = length . filter (\x -> x /= "-" && x /= "1" && x /= "0") . V.toList . convertToProperUkrainian

-- | Function 'sndsDensity' counts the ratio of total number of Ukrainian sounds representations (each of which gives an opportunity to use unique ones) to the
-- total number of the Ukrainian sounds representations if all the non-silent sounds in the text are the same for the one sound representation no matter where it is
-- located. It can be used as a some statistics parameter for the text. The greater is the result, the greater number of the Ukrainian sounds representations
-- is needed to be created for the text to create a unique sound for every location alongside the text. If it is equal to 1.0, then every non-silent sound
-- in the text appears just once (if at all appears).
sndsDensity :: String -> Double
sndsDensity xs | not . any (not . isSpace) $ xs = 1.0
               | otherwise =
  let x = countSnds . show7s6 $ xs
      y = length . show7s $ xs in fromIntegral x / fromIntegral y

-- | Function 'show7s7' takes a Ukrainian text being a @String@ and returns a tuple of Strings. The first element is a String corresponding to the beginning of the
-- text with only unique non-silent sounds representations, and the second one is the rest of the text. Each resulting String is modified so that it contains some
-- phonetical conversions and (may be) changed punctuation and whitespaces.
show7s7 :: String -> (String, String)
show7s7 xs = (listToString . fst $ y, listToString . snd $ y)
  where y = show7s3 xs

-- | Function 'show7s8' takes a Ukrainian text being a @String@ and returns a list of Strings. Each String contains only unique Ukrainian sounds representations so
-- that being sequenced from head of the list they all correspond to the whole text.
show7s8 :: String -> [String]
show7s8 t@(_:_) = (fst . show7s7 $ t):(show7s8 . snd . show7s7 $ t)
show7s8 _ = []

-- | Function 'show7s9' takes a Ukrainian text being a @String@ and returns a list of Strings. Each String is a Ukrainian sound representation of the duplicated 
-- non-silent sounds, it begins a new second list of Strings in the 'show7s4' function. This information can be helpful e. g. in music and composing.
show7s9 :: String -> [String]
show7s9 xs@(_:_) = (if not . null . snd . show7s4 $ xs then head . snd . show7s4 $ xs else []):(show7s9 . snd . show7s5 $ xs)
show7s9 _ = []

-- | Function 'uniquenessPeriods' takes a Ukrainian text being a @String@ and returns a list of Ints. Each Int value is a number of 
-- the Ukrainian sounds representations (non-silent ones) being unique and not duplicated alongside the given text starting from the beginning to the end.
-- This function provides some important information about the phonetical and in some cases semantical structures of the text.
uniquenessPeriods :: String -> [Int]
uniquenessPeriods xs | any (not . isSpace) xs = fmap length . show7s6 $ xs
                     | otherwise = [0::Int]

uniquenessPeriodsV :: String -> V.Vector Int
uniquenessPeriodsV xs
 | any (not . isSpace) xs = V.fromList . fmap length . show7s6 $ xs
 | otherwise = V.singleton 0

-- | Function 'uniqPeriodsMean' is a mathematical expectation for the list obtained by 'uniquenessPeriods' function. It is a statistic metric.
-- It is a mean for the quantities of the unique (not duplicated, not repeated) Ukrainian sounds in the given Ukrainian text as a @String@.
-- If there are no Ukrainian letters in the text, it is equal to 0.0. The greater it is, the more diverse (phonetically) the text is.
uniqPeriodsMean :: String -> Float
uniqPeriodsMean xs | any (not . isSpace) xs = let ys = uniquenessPeriods xs in (fromIntegral . sum $ ys) / (fromIntegral . length $ ys)
                   | otherwise = 0.0

-- | Function 'uniqPeriodsDispersion' is a dispersion for the list obtained by 'uniquenessPeriods' function. It is a statistic metric.
-- It is a dispersion for the quantities of the unique (not duplicated, not repeated) Ukrainian sounds in the given Ukrainian text as a @String@.
-- If there are no Ukrainian letters in the text, it is equal to 0.0. The greater it is, the more suitable for changing pronunciation for the sounds 
-- (and may be for intonation changes, too) the text is.
uniqPeriodsDispersion :: String -> Float
uniqPeriodsDispersion xs | any (not . isSpace) xs =
  let ys = uniquenessPeriods xs
      z  = uniqPeriodsMean xs
      l  = length ys in
        (sum . map (\t -> (fromIntegral t - z) * (fromIntegral t - z)) $ ys) / fromIntegral l
                         | otherwise = 0.0

-- | Function 'uniqStdQDeviation' is a standard quadratic deviation for the list obtained by 'uniquenessPeriods' function. It is a statistic metric.
-- It is a standard quadratic deviation for the quantities of the unique (not duplicated, not repeated) Ukrainian sounds in the given Ukrainian text as a @String@.
-- If there are no Ukrainian letters in the text, it is equal to 0.0. The greater it is, the more suitable for changing pronunciation for the sounds
-- (and may be for intonation changes, too) the text is.
uniqStdQDeviation :: String -> Float
uniqStdQDeviation = sqrt . uniqPeriodsDispersion

-- | Function 'uniqMax' is a maximum element in the 'uniquenessPeriods' function for the same argument. It is provided as a standard element for the
-- descriptive statistics.
uniqMax :: String -> Int
uniqMax xs | any (not . isSpace) xs = maximum . uniquenessPeriods $ xs
           | otherwise = 0::Int

-- | Function 'uniqMin' is a minimum element in the 'uniquenessPeriods' function for the same argument. It is provided as a standard element for the
-- descriptive statistics.
uniqMin :: String -> Int
uniqMin xs | any (not . isSpace) xs = minimum . uniquenessPeriods $ xs
           | otherwise = 0::Int