-- |
-- Module      :  String.UniquenessPeriodsG
-- Copyright   :  (c) OleksandrZhabenko 2020
-- License     :  MIT
-- Stability   :  Experimental
-- Maintainer  :  olexandr543@yahoo.com
--
-- Can be used to produce the similar to 'String.Ukrainian.UniquenessPeriods' from 
-- @uniqueness-periods@ package functions. Provides the generalization of them. 
-- For all the used conversion functions of the type @g :: String -> Vector String@ 
-- it is important that they are stable for the repeated application (their result after 
-- the first application cannot be changed by the rules in the function into new variants). 
-- Otherwise, the recursive scheme of the functions in the module will lead to wrong results.
-- So the conversion function should work the following way (@xs@ denotes a word in the language) in GHCi: 
--
-- > let v = g xs
-- > let ys = concat . toList $ v 
-- > let v2 = g ys
-- > v == v2
-- > True
-- 
-- Or in the other words, for the single word, @g . concat . toList . g = g@;
-- 

module String.UniquenessPeriodsG (
  -- * Auxiliary functions
  show7s'''
  , show7s5
  , show7s6
  , show7sn4'
  , show7snc
  -- ** Inner predicate (auxiliary)
  , eqSnds
  -- ** Inner backward conversion function
  , listToString
  -- * uniquenessPeriods function
  , uniquenessPeriods
) where

import Data.Char (isSpace)
import qualified Data.Vector as V
import Data.List ((\\),nubBy)

-- | Function 'listToString' converts the list of Strings being the sequential sounds representations into the text with whitespaces
-- (whitespaces are substituted instead of punctuation symbols, too) and some phonetic conversions. The first argument must be a list of 'String', 
-- each of which is a representation for the white space (or more generally, non-sound symbol representation).
listToString :: [String] -> [String] -> String
listToString whspss = concatMap (\ts -> if ts `elem` whspss then " " else ts)

-- | Function 'eqSnds' compares two non-silent Strings representations for sounds by equality. If one of them is a representation for silence (e. g. pause),
-- then the predicate is @False@. The first argument must be a list of 'String', 
-- each of which is a representation for the white space (or more generally, non-sound symbol representation).
eqSnds :: [String] -> String -> String -> Bool
eqSnds whspss xs ys
 | xs `elem` whspss || ys `elem` whspss = False
 | otherwise = xs == ys

-- | The same as @show7s''@ from MMSyn7s module (@mmsyn7s@ package), but the second element in the resulting tuple is again the text with 
-- whitespaces (whitespaces are substituted
-- instead of punctuation symbols, too) and some phonetic conversions. The first argument must be a list of 'String', 
-- each of which is a representation for the white space (or more generally, non-sound symbol representation).
show7s''' :: [String] -> [String] -> ([String],String)
show7s''' whspss zss =
  let (xss, yss) = splitAt 200 zss
      uss = xss \\ nubBy (eqSnds whspss) xss
      (wss,vss) = if null uss then (xss,[]) else (takeWhile (/= head uss) xss ++ head uss:(takeWhile (/= head uss) . tail . dropWhile (/= head uss) $ xss),
        dropWhile (/= head uss) . tail . dropWhile (/= head uss) $ xss) in
          (filter (\x -> x `notElem` whspss) $ wss, listToString whspss $ vss ++ yss)

-- | Function 'show7s5' takes a text being a @String@ and returns a tuple, the first element of which is a list of Strings that correspond to the 
-- sounds representations that (except pauses) are unique and are not repeated starting from the beginning of the given text (this list is filtered from 
-- the representations for the silence), and the second one is a @String@ obtained from the remainder
-- list of Strings starting from the first duplicated non-silent sound representation with whitespaces (whitespaces are substituted
-- instead of punctiuation symbols, too) and some phonetic conversions. The first argument must be a list of 'String', 
-- each of which is a representation for the white space (or more generally, non-sound symbol representation). The second argument is a function that 
-- converts a 'String' of the text into the 'V.Vector' of sound representations for that language. 
show7s5 :: [String] -> (String -> V.Vector String) -> String -> ([String], String)
show7s5 whspss g = show7s''' whspss . V.toList . g

-- | Function 'show7s6' takes a text being a @String@ and returns a list of lists of Strings, each latter one of which is obtained for the unique parts of
-- the text from the sounds representations point of view. It can show how many and what sound representations are needed to be created to completely cover
-- the given text providing all the needed sound parameters. The first argument must be a list of 'String', 
-- each of which is a representation for the white space (or more generally, non-sound symbol representation). The second argument is a function that 
-- converts a 'String' of the text into the 'V.Vector' of sound representations for that language. 
show7s6 :: [String] -> (String -> V.Vector String) -> String -> [[String]]
show7s6 whspss g t@(_:_) = (fst . show7s5 whspss g $ t):(show7s6 whspss g . snd . show7s5 whspss g $ t)
show7s6 _ _ _ = []

-- | Function 'uniquenessPeriods' takes a text being a @String@ and returns a list of Ints. Each Int value is a number of 
-- the sounds representations (non-silent ones) being unique and not duplicated alongside the given text starting from the beginning to the end.
-- This function provides some important information about the phonetic and in some cases semantic structures of the text. 
-- The first argument must be a list of 'String', 
-- each of which is a representation for the white space (or more generally, non-sound symbol representation). The second argument is a function that 
-- converts a 'String' of the text into the 'V.Vector' of sound representations for that language. 
uniquenessPeriods :: [String] -> (String -> V.Vector String) -> String -> [Int]
uniquenessPeriods whspss g xs
  | any (not . isSpace) xs = fmap length . show7s6 whspss g $ xs
  | otherwise = [0::Int]

-- | Converts a list of 'String' each one being a non-silent sound representation into a list of 'Int' using recursively @show7sn4'@. 
-- The first argument must be a list of 'String', 
-- each of which is a representation for the white space (or more generally, non-sound symbol representation). 
show7snc :: [String] -> [String] -> [Int]
show7snc whspss xss = let (tss,vss) = show7sn4' whspss xss in if null vss then [length tss] else length tss:show7snc whspss vss

-- | The same as @show7sn'''@ from the MMSyn7s module from the @mmsyn7s@ package, but does not concatenate the list of 'String' as the second tuple's element. 
-- The first argument must be a list of 'String', 
-- each of which is a representation for the white space (or more generally, non-sound symbol representation). 
show7sn4' :: [String] -> [String] -> ([String],[String])
show7sn4' whspss zss =
  let (xss, yss) = splitAt 200 zss
      uss = xss \\ nubBy (eqSnds whspss) xss
      (wss,vss) = if null uss then (xss,[]) else (takeWhile (/= head uss) xss ++ head uss:(takeWhile (/= head uss) . tail . dropWhile (/= head uss) $ xss),
        dropWhile (/= head uss) . tail . dropWhile (/= head uss) $ xss) in
          (filter (\x -> x `notElem` whspss) $ wss, vss ++ yss)