{-# OPTIONS_HADDOCK show-extensions #-}

{-# LANGUAGE MultiWayIf, NoImplicitPrelude #-}

-- |
-- Module      :  Phladiprelio.General.PrepareText
-- Copyright   :  (c) Oleksandr Zhabenko 2020-2023
-- License     :  MIT
-- Stability   :  Experimental
-- Maintainer  :  oleksandr.zhabenko@yahoo.com
--
-- Helps to order the 7 or less phonetic language words (or their concatenations)
-- to obtain (to some extent) suitable for poetry or music text.
-- Earlier it has been a module DobutokO.Poetry.Ukrainian.PrepareText
-- from the @dobutokO-poetry@ package.
-- In particular, this module can be used to prepare the phonetic language text
-- by applying the most needed grammar to avoid misunderstanding
-- for the produced text. The attention is paid to the prepositions, pronouns, conjunctions
-- and particles that are most commonly connected (or not) in a significant way
-- with the next text.
-- Uses the information from:
-- https://uk.wikipedia.org/wiki/%D0%A1%D0%BF%D0%BE%D0%BB%D1%83%D1%87%D0%BD%D0%B8%D0%BA
-- and
-- https://uk.wikipedia.org/wiki/%D0%A7%D0%B0%D1%81%D1%82%D0%BA%D0%B0_(%D0%BC%D0%BE%D0%B2%D0%BE%D0%B7%D0%BD%D0%B0%D0%B2%D1%81%D1%82%D0%B2%D0%BE)
--
-- Uses arrays instead of vectors.
-- A list of basic (but, probably not complete and needed to be extended as needed) English words (the articles, pronouns,
-- particles, conjunctions etc.) the corresponding phonetic language translations of which are intended to be used as a
-- 'Concatenations' here is written to the file EnglishConcatenated.txt in the source tarball.

module Phladiprelio.General.PrepareText (
  Concatenations
  -- * Basic functions
  , concatWordsFromLeftToRight
  , splitLines
  , splitLinesN
  , isSpC
  , sort2Concat
  -- * The end-user functions
  , prepareText
  , prepareTextN
  , growLinesN
  , prepareGrowTextMN
  , tuneLinesN
  , prepareTuneTextMN
  -- * Used to transform after convertToProperphonetic language from mmsyn6ukr package
  , isPLL
) where

import GHC.Base
import Data.List
import Data.Bits (shiftR)
import GHC.Num ((+),(-),abs)
import CaseBi.Arr (getBFstL',getBFst')
import Data.List.InnToOut.Basic (mapI)
import Data.Char (isAlpha,toLower)
import GHC.Arr
import Data.Tuple (fst)

-- | The lists in the list are sorted in the descending order by the word counts in the inner 'String's. All the 'String's
-- in each inner list have the same number of words, and if there is no 'String' with some intermediate number of words (e. g. there
-- are 'String's for 4 and 2 words, but there is no one for 3 words 'String's) then such corresponding list is absent (since
-- the 0.9.0.0 version). Probably the maximum number of words can be not more than 4, and the minimum number is
-- not less than 1, but it depends. The 'String's in the inner lists must be (unlike the inner
-- lists themselves) sorted in the ascending order for the data type to work correctly in the functions of the module.
type Concatenations = [[String]]

type ConcatenationsArr = [Array Int (String,Bool)]

defaultConversion :: Concatenations -> ConcatenationsArr
defaultConversion :: Concatenations -> ConcatenationsArr
defaultConversion Concatenations
ysss = forall a b. (a -> b) -> [a] -> [b]
map ([String] -> Array Int (String, Bool)
f forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t a -> Bool
null)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t a -> Bool
null) forall a b. (a -> b) -> a -> b
$ Concatenations
ysss
  where f :: [String] -> Array Int (String,Bool)
        f :: [String] -> Array Int (String, Bool)
f [String]
yss = let l :: Int
l = forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
yss in forall i e. Ix i => (i, i) -> [e] -> Array i e
listArray (Int
0,Int
lforall a. Num a => a -> a -> a
-Int
1) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. [a] -> [b] -> [(a, b)]
zip [String]
yss forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
cycle forall a b. (a -> b) -> a -> b
$ [Bool
True]

-- | Is used to convert a phonetic language text into list of 'String' each of which is ready to be
-- used by the functions from the other modules in the package.
-- It applies minimal grammar links and connections between the most commonly used phonetic language
-- words that \"should\" be paired and not dealt with separately
-- to avoid the misinterpretation and preserve maximum of the semantics for the
-- \"phonetic\" language on the phonetic language basis.
prepareText
  :: [[String]] -- ^ Is intended to become a valid 'Concatenations'.
  -> [[String]] -- ^ Is intended to become a valid 'Concatenations'.
  -> String -- ^ A sorted 'String' of possible characters in the phonetic language representation.
  -> String
  -> [String]
prepareText :: Concatenations -> Concatenations -> String -> String -> [String]
prepareText = Int
-> Concatenations -> Concatenations -> String -> String -> [String]
prepareTextN Int
7
{-# INLINE prepareText #-}

sort2Concat
 :: [[String]]
 -> Concatenations  -- ^ Data used to concatenate the basic grammar preserving words and word sequences to the next word or
 -- to the previous word to
 -- leave the most of the meaning (semantics) of the text available to easy understanding while reading and listening to.
sort2Concat :: Concatenations -> Concatenations
sort2Concat Concatenations
xsss
 | forall (t :: * -> *) a. Foldable t => t a -> Bool
null Concatenations
xsss = []
 | Bool
otherwise = forall a b. (a -> b) -> [a] -> [b]
map forall a. Ord a => [a] -> [a]
sort forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
reverse forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn (forall a b. (a -> b) -> [a] -> [b]
map (forall (t :: * -> *) a. Foldable t => t a -> Int
length forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
words)) forall a b. (a -> b) -> a -> b
$ Concatenations
xsss

-----------------------------------------------------

complexWords2 :: ConcatenationsArr -> String -> (String -> String,String)
complexWords2 :: ConcatenationsArr -> String -> (String -> String, String)
complexWords2 ysss :: ConcatenationsArr
ysss@(Array Int (String, Bool)
yss:ConcatenationsArr
zsss) zs :: String
zs@(Char
r:String
rs)
 | forall a b. Ord a => (b, Array Int (a, b)) -> a -> b
getBFst' (Bool
False, Array Int (String, Bool)
yss) forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> String
unwords forall a b. (a -> b) -> a -> b
$ [String]
tss = ((String
uwxs forall a. Monoid a => a -> a -> a
`mappend`), [String] -> String
unwords [String]
uss)
 | Bool
otherwise = ConcatenationsArr -> String -> (String -> String, String)
complexWords2 ConcatenationsArr
zsss String
zs
      where y :: Int
y = forall (t :: * -> *) a. Foldable t => t a -> Int
length forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
words forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall i e. Array i e -> Int -> e
unsafeAt Array Int (String, Bool)
yss forall a b. (a -> b) -> a -> b
$ Int
0
            ([String]
tss,[String]
uss) = forall a. Int -> [a] -> ([a], [a])
splitAt Int
y forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
words forall a b. (a -> b) -> a -> b
$ String
zs
            uwxs :: String
uwxs = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [String]
tss
complexWords2 ConcatenationsArr
_ String
zs = (forall a. a -> a
id,String
zs)

pairCompl :: (String -> String,String) -> (String,String)
pairCompl :: (String -> String, String) -> (String, String)
pairCompl (String -> String
f,String
xs) = (String -> String
f [],String
xs)

splitWords :: ConcatenationsArr -> [String] -> String -> (String,String)
splitWords :: ConcatenationsArr -> [String] -> String -> (String, String)
splitWords ConcatenationsArr
ysss [String]
tss String
zs = let (String
ws,String
us) = (String -> String, String) -> (String, String)
pairCompl forall b c a. (b -> c) -> (a -> b) -> a -> c
. ConcatenationsArr -> String -> (String -> String, String)
complexWords2 ConcatenationsArr
ysss forall a b. (a -> b) -> a -> b
$ String
zs in if
  | forall (t :: * -> *) a. Foldable t => t a -> Bool
null forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
words forall a b. (a -> b) -> a -> b
$ String
zs -> (forall a. Monoid a => [a] -> a
mconcat [String]
tss,[])
  | forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
ws -> (\([String]
xss,[String]
uss) -> (forall a. Monoid a => [a] -> a
mconcat ([String]
tss forall a. Monoid a => a -> a -> a
`mappend` [String]
xss), [String] -> String
unwords [String]
uss)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Int -> [a] -> ([a], [a])
splitAt Int
1 forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
words forall a b. (a -> b) -> a -> b
$ String
zs
  | Bool
otherwise -> ConcatenationsArr -> [String] -> String -> (String, String)
splitWords ConcatenationsArr
ysss ([String]
tss forall a. Monoid a => a -> a -> a
`mappend` [String
ws]) String
us

concatWordsFromLeftToRight :: ConcatenationsArr -> String -> [String]
concatWordsFromLeftToRight :: ConcatenationsArr -> String -> [String]
concatWordsFromLeftToRight ConcatenationsArr
ysss String
zs = let (String
ws,String
us) = ConcatenationsArr -> [String] -> String -> (String, String)
splitWords ConcatenationsArr
ysss [] String
zs in
  if forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
us then [String
ws] else String
ws forall a. a -> [a] -> [a]
: ConcatenationsArr -> String -> [String]
concatWordsFromLeftToRight ConcatenationsArr
ysss String
us

-----------------------------------------------------

append2prependConv :: Concatenations -> Concatenations
append2prependConv :: Concatenations -> Concatenations
append2prependConv = forall a b. (a -> b) -> [a] -> [b]
map (forall a b. (a -> b) -> [a] -> [b]
map ([String] -> String
unwords forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
reverse forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
words))
{-# INLINE append2prependConv #-}

left2right :: [String] -> String
left2right :: [String] -> String
left2right = [String] -> String
unwords forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
reverse forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map forall a. [a] -> [a]
reverse
{-# INLINE left2right #-}

-----------------------------------------------------

-- | A generalized variant of the 'prepareText' with the arbitrary maximum number of the words in the lines given as the first argument.
prepareTextN
 :: Int -- ^ A maximum number of the words or their concatenations in the resulting list of 'String's.
 -> [[String]] -- ^ Is intended to become a valid 'Concatenations'.
 -> [[String]] -- ^ Is intended to become a valid 'Concatenations'.
 -> String -- ^ A sorted 'String' of possible characters in the phonetic language representation.
 -> String
 -> [String]
prepareTextN :: Int
-> Concatenations -> Concatenations -> String -> String -> [String]
prepareTextN Int
n Concatenations
ysss Concatenations
zsss String
xs = forall a. (a -> Bool) -> [a] -> [a]
filter (forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (String -> Char -> Bool
isPLL String
xs)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [String] -> [String]
splitLinesN Int
n forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map ([String] -> String
left2right forall b c a. (b -> c) -> (a -> b) -> a -> c
.
  ConcatenationsArr -> String -> [String]
concatWordsFromLeftToRight (Concatenations -> ConcatenationsArr
defaultConversion forall b c a. (b -> c) -> (a -> b) -> a -> c
. Concatenations -> Concatenations
sort2Concat forall b c a. (b -> c) -> (a -> b) -> a -> c
. Concatenations -> Concatenations
append2prependConv forall a b. (a -> b) -> a -> b
$ Concatenations
zsss) forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> String
left2right forall b c a. (b -> c) -> (a -> b) -> a -> c
.
  ConcatenationsArr -> String -> [String]
concatWordsFromLeftToRight (Concatenations -> ConcatenationsArr
defaultConversion forall b c a. (b -> c) -> (a -> b) -> a -> c
. Concatenations -> Concatenations
sort2Concat forall a b. (a -> b) -> a -> b
$ Concatenations
ysss)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t a -> Bool
null) forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
lines

-- | A predicate to check whether the given character is one of the \"\' \\x2019\\x02BC-\".
isSpC :: Char -> Bool
isSpC :: Char -> Bool
isSpC Char
x = Char
x forall a. Eq a => a -> a -> Bool
== Char
'\'' Bool -> Bool -> Bool
|| Char
x forall a. Eq a => a -> a -> Bool
== Char
' ' Bool -> Bool -> Bool
|| Char
x forall a. Eq a => a -> a -> Bool
== Char
'\x2019' Bool -> Bool -> Bool
|| Char
x forall a. Eq a => a -> a -> Bool
== Char
'\x02BC' Bool -> Bool -> Bool
|| Char
x forall a. Eq a => a -> a -> Bool
== Char
'-'
{-# INLINE isSpC #-}
{-# DEPRECATED #-}

-- | The first argument must be a 'String' of sorted 'Char's in the ascending order of all possible symbols that can be
-- used for the text in the phonetic language selected. Can be prepared beforehand, or read from the file.
isPLL :: String -> Char -> Bool
isPLL :: String -> Char -> Bool
isPLL String
xs Char
y = forall a b. Ord a => b -> [(a, b)] -> a -> b
getBFstL' Bool
False (forall a b. [a] -> [b] -> [(a, b)]
zip String
xs forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Int -> a -> [a]
replicate Int
10000 forall a b. (a -> b) -> a -> b
$ Bool
True) Char
y

-- | The function is recursive and is applied so that all returned elements ('String') are no longer than 7 words in them.
splitLines :: [String] -> [String]
splitLines :: [String] -> [String]
splitLines = Int -> [String] -> [String]
splitLinesN Int
7
{-# INLINE splitLines #-}

-- | A generalized variant of the 'splitLines' with the arbitrary maximum number of the words in the lines given as the first argument.
splitLinesN :: Int -> [String] -> [String]
splitLinesN :: Int -> [String] -> [String]
splitLinesN Int
n [String]
xss
 | forall (t :: * -> *) a. Foldable t => t a -> Bool
null [String]
xss Bool -> Bool -> Bool
|| Int
n forall a. Ord a => a -> a -> Bool
<= Int
0 = []
 | Bool
otherwise = forall a. (a -> Bool) -> (a -> [a]) -> [a] -> [a]
mapI (\String
xs -> forall a. Ord a => a -> a -> Ordering
compare (forall (t :: * -> *) a. Foldable t => t a -> Int
length forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
words forall a b. (a -> b) -> a -> b
$ String
xs) Int
n forall a. Eq a => a -> a -> Bool
== Ordering
GT) (\String
xs -> let yss :: [String]
yss = String -> [String]
words String
xs in
     Int -> [String] -> [String]
splitLinesN Int
n forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map [String] -> String
unwords forall b c a. (b -> c) -> (a -> b) -> a -> c
. (\([String]
q,[String]
r) -> [[String]
q,[String]
r]) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Int -> [a] -> ([a], [a])
splitAt (forall a. Bits a => a -> Int -> a
shiftR (forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
yss) Int
1) forall a b. (a -> b) -> a -> b
$ [String]
yss) forall a b. (a -> b) -> a -> b
$ [String]
xss

------------------------------------------------

{-| @ since 0.8.0.0
Given a positive number and a list tries to rearrange the list's 'String's by concatenation of the several elements of the list
so that the number of words in every new 'String' in the resulting list is not greater than the 'Int' argument. If some of the
'String's have more than that number quantity of the words then these 'String's are preserved.
-}
growLinesN :: Int -> [String] -> [String]
growLinesN :: Int -> [String] -> [String]
growLinesN Int
n [String]
xss
 | forall (t :: * -> *) a. Foldable t => t a -> Bool
null [String]
xss Bool -> Bool -> Bool
|| Int
n forall a. Ord a => a -> a -> Bool
< Int
0 = []
 | Bool
otherwise = [String] -> String
unwords [String]
yss forall a. a -> [a] -> [a]
: Int -> [String] -> [String]
growLinesN Int
n [String]
zss
     where l :: Int
l = forall (t :: * -> *) a. Foldable t => t a -> Int
length forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
takeWhile (forall a. Ord a => a -> a -> Bool
<= Int
n) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> a -> a) -> [a] -> [a]
scanl1 forall a. Num a => a -> a -> a
(+) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (forall (t :: * -> *) a. Foldable t => t a -> Int
length forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
words) forall a b. (a -> b) -> a -> b
$ [String]
xss -- the maximum number of lines to be taken
           ([String]
yss,[String]
zss) = forall a. Int -> [a] -> ([a], [a])
splitAt (forall a. Ord a => a -> a -> a
max Int
l Int
1) [String]
xss

{-| @ since 0.8.0.0
The function combines the 'prepareTextN' and 'growLinesN' function. Applies needed phonetic language preparations
to the text and tries to \'grow\' the resulting 'String's in the list so that the number of the words in every
of them is no greater than the given first 'Int' number.
-}
prepareGrowTextMN
 :: Int -- ^ A maximum number of the words or their concatenations in the resulting list of 'String's.
 -> Int -- ^ A number of words in every 'String' that the function firstly forms. To have some sense of usage, must be less than the first argument.
 -> [[String]] -- ^ Is intended to become a valid 'Concatenations'.
 -> [[String]] -- ^ Is intended to become a valid 'Concatenations'.
 -> String -- ^ A sorted 'String' of possible characters in the phonetic language representation.
 -> String
 -> [String]
prepareGrowTextMN :: Int
-> Int
-> Concatenations
-> Concatenations
-> String
-> String
-> [String]
prepareGrowTextMN Int
m Int
n Concatenations
ysss Concatenations
zsss String
xs = Int -> [String] -> [String]
growLinesN Int
m forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int
-> Concatenations -> Concatenations -> String -> String -> [String]
prepareTextN Int
n Concatenations
ysss Concatenations
zsss String
xs
{-# INLINE prepareGrowTextMN #-}

-------------------------------------

{-| @ since 0.6.0.0
Recursively splits the concatenated list of lines of words so that in every resulting 'String' in the list
except the last one there is just 'Int' -- the first argument -- words.
-}
tuneLinesN :: Int -> [String] -> [String]
tuneLinesN :: Int -> [String] -> [String]
tuneLinesN Int
n [String]
xss
 | forall (t :: * -> *) a. Foldable t => t a -> Bool
null [String]
xss Bool -> Bool -> Bool
|| Int
n forall a. Ord a => a -> a -> Bool
< Int
0 = []
 | Bool
otherwise =
    let wss :: [String]
wss = String -> [String]
words forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> String
unwords forall a b. (a -> b) -> a -> b
$ [String]
xss
        ([String]
yss,[String]
zss) = forall a. Int -> [a] -> ([a], [a])
splitAt Int
n [String]
wss
          in [String] -> String
unwords [String]
yss forall a. a -> [a] -> [a]
: Int -> [String] -> [String]
tuneLinesN Int
n [String]
zss

{-| @ since 0.6.0.0
The function combines the 'prepareTextN' and 'tuneLinesN' functions. Applies needed phonetic language preparations
to the phonetic language text and splits the list of 'String's so that the number of the words in each of them (except the last one)
is equal the given first 'Int' number.
-}
prepareTuneTextMN
  :: Int -- ^ A maximum number of the words or their concatenations in the resulting list of 'String's.
  -> Int -- ^ A number of words in every 'String' that the function firstly forms. To have some sense of usage, must be less than the first argument.
  -> [[String]] -- ^ Is intended to become a valid 'Concatenations'.
  -> [[String]] -- ^ Is intended to become a valid 'Concatenations'.
  -> String -- ^ A sorted 'String' of possible characters in the phonetic language representation.
  -> String
  -> [String]
prepareTuneTextMN :: Int
-> Int
-> Concatenations
-> Concatenations
-> String
-> String
-> [String]
prepareTuneTextMN Int
m Int
n Concatenations
ysss Concatenations
zsss String
xs = Int -> [String] -> [String]
tuneLinesN Int
m forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int
-> Concatenations -> Concatenations -> String -> String -> [String]
prepareTextN Int
n Concatenations
ysss Concatenations
zsss String
xs
{-# INLINE prepareTuneTextMN #-}