-- |
-- Module      :  Phonetic.Languages.General.Simple.Parsing
-- Copyright   :  (c) OleksandrZhabenko 2020-2022
-- License     :  MIT
-- Stability   :  Experimental
-- Maintainer  :  olexandr543@yahoo.com
--
-- Prints the rearrangements with the \"property\" information for the phonetic language text. Is used for the
-- Phonetic.Languages.General.Simple module functionality.

{-# OPTIONS_GHC -threaded #-}
{-# OPTIONS_HADDOCK show-extensions #-}
{-# LANGUAGE BangPatterns #-}

module Phonetic.Languages.General.Simple.Parsing where

import Phonetic.Languages.Array.General.PropertiesSyllablesG2
import Data.Phonetic.Languages.PrepareText
import System.Environment (getArgs)
import Phonetic.Languages.General.Simple
import Data.List (sort)
import GHC.Arr
import Data.Phonetic.Languages.Base
import Data.Phonetic.Languages.Syllables
import Text.Read (readMaybe)
import Data.Maybe (fromMaybe)
import Data.Phonetic.Languages.SpecificationsRead
import Interpreter.StringConversion (readFileIfAny)
import CLI.Arguments
import CLI.Arguments.Parsing
import CLI.Arguments.Get
import qualified Phonetic.Languages.Permutations.Represent as R
import Phonetic.Languages.EmphasisG

-- | Prints the rearrangements with the \"property\" information for the phonetic language text.
-- Most of the arguments are obtained from the 'getArgs' function.
-- While used, it distinguishes between two groups of command line arguments: the first four ones and the others afterwards.
-- The first 5 arguments are the file names with the specifications.
-- 1) with the 'GWritingSystemPRPLX' specifications only (see the gwrsysExample.txt file in the @phonetic-languages-phonetics-basics@ package as a schema);
--
-- 2) with the 5 meaningful lines that are delimited with the \'~\' line one from another with the specifications for the possible allophones (if any), 'CharPhoneticClassification', white spaces information (two 'String's) and the 'String' of all the possible 'PLL' 'Char's;
--
-- 3) with the 'SegmentRulesG' specifications only;
--
-- 4) with the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be prepended to the next word.
--
-- 5) with the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be appended to the previous word.
--
-- Afterwards, the meaning of the command line arguments are as follows (from the left to the right).
--
-- The first next command line argument must be a
-- positive 'Int' number and is a number of printed variants for the line (if they are present, otherwise just all possible variants are printed).
-- The second one is the number of the intervals into which the all range of possible metrics values are divided. The next numeric arguments that must be
-- sequenced without interruptions further are treated as the numbers of the intervals (counting is started from 1) which values are moved to the maximum
-- values of the metrics interval using the 'unsafeSwapVecIWithMaxI' function. The first textual command line argument should be in the form either \"y0\",
-- or \"0y\", or \"yy\", or \"y\", or \"02y\", or \"y2\", or \"03y\", or \"yy2\", or \"y3\", or some other variant and specifies, which property or properties is or are evaluated.
-- The rest of the command line arguments is the phonetic text. Besides, you can use multiple metrices (no more than 5 different ones) together by
-- using \"+m\" ... \"-m\" command line arguments.
--
-- You can specify constraints according to the 'decodeLConstraints' function between +A and -A next command line arguments. If so, the program will
-- ask you additional question before proceeding. The \"+m\" ... \"-m\" and \"+a\" ... \"-a\" groups must not mutually intersect one another.
argsToSimplePrepare
 :: (Double -> String -> MappingFunctionPL) -- ^ The function that is needed in the 'procRhythmicity23F' function.
 -- Specifies a way how the syllables represented in the phonetic language approach transforms into their durations and
 -- depends on two parameters. Is specific for every phonetic language and every representation, so must be provided
 -- by the user in every case. The example of the function can be found in the package @phonetic-languages-simplified-properties-array@.
 -> [MappingFunctionPL] -- ^ A list of 'PhoPaaW'-based different functions that specifies the syllables durations in the PhoPaaW mode, analogues of the
   -- syllableDurationsD functions from the @ukrainian-phonetics-basics-array@ package. The first one must be probably the most
   --   -- exact one and, therefore, the default one.
 -> (String -> Bool) -- ^ The predicate that checks whether the given argument is not a phonetic language word in the representation.
 -> IO ()
argsToSimplePrepare h qs p = do
 args50 <- getArgs
 let (argsA,argsB,argsC1,argss) = args2Args31R fstCharsMA specs1 args50
     args00000 = snd . takeBsR [("+p",1)] $ args50
     pairwisePermutations = R.bTransform2Perms . getB "+p" $ argsB
     (txtPFs,args0000F) = takeCs1R fstCharsT cSpecs1T args00000
     textProcessmentFssFs = getC "+t" txtPFs
     textProcessment0
       | null . concat . getB "+t" . fst . takeBsR [("+t",1)] $ args00000 = []
       | otherwise = "+t" `mappend` (concat . getB "+t" . fst . takeBsR [("+t",1)] $ args00000)
     textProcessment1 = fromMaybe 70 (readMaybe (drop 2 textProcessment0)::Maybe Int)
     (rcrs,args000) = takeAsR [("+r",0)] . filter (not . null) $ args0000F
     recursiveMode = oneA "+r" rcrs -- Specifies whether to use the interactive recursive mode
     (!args15,!args00) = splitAt 5 args000
     [fileGWrSys, controlFile, segmentRulesFile, concatenationsFileP,concatenationsFileA] = args15
 (gwrsCnts, controlConts, segmentData, concatenationsFileP1, concatenationsFileA1, toFileMode1, interactiveP, jstL0, args0, coeffs, coeffsWX, args, lstW,syllables,syllablesVs,verbose) <- argsProcessment fileGWrSys controlFile segmentRulesFile concatenationsFileP concatenationsFileA args00
 let (wrs, ks, arr, gs, js, vs, ysss, zzzsss, ws) = innerProcessmentSimple gwrsCnts controlConts segmentData concatenationsFileP1 concatenationsFileA1
 textProcessmentFss0 <- mapM (readFileIfAny) textProcessmentFssFs
 let textProcessmentFss = filter (not . null) textProcessmentFss0
 if isPair coeffs then generalProc3G pairwisePermutations p textProcessmentFss textProcessment0 textProcessment1 wrs ks arr gs js vs h qs ysss zzzsss ws {- old arguments follow -} toFileMode1 recursiveMode interactiveP jstL0 args0 coeffs coeffsWX (drop 1 args) lstW syllables syllablesVs verbose
 else generalProc3G pairwisePermutations p textProcessmentFss textProcessment0 textProcessment1 wrs ks arr gs js vs h qs ysss zzzsss ws toFileMode1 recursiveMode interactiveP jstL0 args0 coeffs coeffsWX args lstW syllables syllablesVs verbose

-- | Similar to the 'argsToSimplePrepare' function, but takes explicitly the four 'FilePath's for the files
-- respectively and the last argument the 'String' with all the other specifications. If it is not proper,
-- the functions returns an error.
argsToSimplePrepare4Files
 :: R.PermutationsType -- ^ Whether to use just one of the express permutations, or the full universal set.
 -> FilePath -- ^ With the 'GWritingSystemPRPLX' specifications only (see the gwrsysExample.txt file in the @phonetic-languages-phonetics-basics@ package as a schema);
 -> FilePath -- ^ With the 5 meaningful lines that are delimited with the \'~\' line one from another with the specifications for the possible allophones (if any), 'CharPhoneticClassification', white spaces information (two 'String's) and the 'String' of all the possible 'PLL' 'Char's;
 -> FilePath -- ^ With the 'SegmentRulesG' specifications only;
 -> FilePath -- ^ With the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be prepended to the next word.
 -> FilePath -- ^ With the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be appended to the previous word.
 -> (Double -> String -> MappingFunctionPL) -- ^ The function that is needed in the 'procRhythmicity23F' function.
 -- Specifies a way how the syllables represented in the phonetic language approach transforms into their durations and
 -- depends on two parameters. Is specific for every phonetic language and every representation, so must be provided
 -- by the user in every case. The example of the function can be found in the package @phonetic-languages-simplified-properties-array@.
 -> [MappingFunctionPL] -- ^ A list of 4 different functions that specifies the syllables durations, analogues of the
  -- syllableDurationsD functions from the @ukrainian-phonetics-basics-array@ package. The last one must be probably the most
  -- exact one and, therefore, the default one.
 -> String -- ^ A 'String' of data that are the further command line arguments for the function 'argsToSimplePrepare'.
 -> (String -> Bool) -- ^ The predicate that checks whether the given argument is not a phonetic language word in the representation.
 -> [String]
 -> String -- ^ If empty, the function is just 'generalProc2G' with the arguments starting from the first 'Bool' here.
 -> Int
 -> IO ()
argsToSimplePrepare4Files pairwisePermutations fileGWrSys controlFile segmentRulesFile concatenationsFileP concatenationsFileA h qs other_args p textProcessmentFss textProcessment0 textProcessment1 = do
 let args000 = drop 5 . words $ other_args
     (rcs,args00) = takeAsR [("+r",0)] args000
     recursiveMode = oneA "+r" rcs -- Specifies whether to use the interactive recursive mode
 (gwrsCnts, controlConts, segmentData, concatenationsFileP1, concatenationsFileA1, toFileMode1, interactiveP, jstL0, args0, coeffs, coeffsWX, args, lstW,syllables,syllablesVs,verbose) <- argsProcessment fileGWrSys controlFile segmentRulesFile concatenationsFileP concatenationsFileA args00
 let (wrs, ks, arr, gs, js, vs, ysss, zzzsss, ws) = innerProcessmentSimple gwrsCnts controlConts segmentData concatenationsFileP1 concatenationsFileA1
 if isPair coeffs then generalProc3G pairwisePermutations p textProcessmentFss textProcessment0 textProcessment1 wrs ks arr gs js vs h qs ysss zzzsss ws {- old arguments follow -} toFileMode1 recursiveMode interactiveP jstL0 args0 coeffs coeffsWX (drop 1 args) lstW syllables syllablesVs verbose
 else generalProc3G pairwisePermutations p textProcessmentFss textProcessment0 textProcessment1 wrs ks arr gs js vs h qs ysss zzzsss ws toFileMode1 recursiveMode interactiveP jstL0 args0 coeffs coeffsWX args lstW syllables syllablesVs verbose

innerProcessmentSimple
  :: String -- ^ Must be a valid 'GWritingSystemPRPLX' specifications 'String' representation only (see the gwrsysExample.txt file in the @phonetic-languages-phonetics-basics@ package as a schema);
  -> String -- ^ Must be a 'String' with the 5 meaningful lines that are delimited with the \'~\' line one from another with the specifications for the possible allophones (if any), 'CharPhoneticClassification', white spaces information (two 'String's) and the 'String' of all the possible 'PLL' 'Char's;
  -> String -- ^ Must be a 'String' with the 'SegmentRulesG' specifications only;
  -> String -- ^ Must be a 'String' with the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be prepended to the next word.
  -> String -- ^ Must be a 'String' with the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be appended to the previous word.
  -> (GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification, SegmentRulesG, String, String, Concatenations, Concatenations, String)
innerProcessmentSimple gwrsCnts controlConts segmentData concatenationsFileP concatenationsFileA =
 let [allophonesGs, charClfs, jss, vss, wss] = groupBetweenChars '~' . lines $ controlConts
     wrs = getGWritingSystem '~' gwrsCnts
     ks = sort . fromMaybe [] $ (readMaybe (unwords allophonesGs)::Maybe [(Char, Char)])
     arr = read (unwords charClfs)::Array Int PRS -- The 'Array' must be previously sorted in the ascending order.
     gs = read segmentData::SegmentRulesG
     ysss = sort2Concat . fromMaybe [] $ (readMaybe concatenationsFileP::Maybe [[String]])
     zzzsss = sort2Concat . fromMaybe [] $ (readMaybe concatenationsFileA::Maybe [[String]])
     js = concat jss
     vs = concat vss
     ws = sort . concat $ wss
       in (wrs, ks, arr, gs, js, vs, ysss, zzzsss, ws)

{-| @ since 0.5.0.0
The function also can process \"w\" and \"x\" lines so returns two 'Coeffs2' values.
-}
argsProcessment
 :: FilePath -- ^ With the 'GWritingSystemPRPLX' specifications only (see the gwrsysExample.txt file in the @phonetic-languages-phonetics-basics@ package as a schema);
 -> FilePath -- ^ With the 5 meaningful lines that are delimited with the \'~\' line one from another with the specifications for the possible allophones (if any), 'CharPhoneticClassification', white spaces information (two 'String's) and the 'String' of all the possible 'PLL' 'Char's;
 -> FilePath -- ^ With the 'SegmentRulesG' specifications only;
 -> FilePath -- ^ With the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be prepended to the next word.
 -> FilePath -- ^ With the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be appended to the previous word.
 -> [String] -- ^ List of other args afterwards.
 -> IO (String, String, String, String, String, String, Bool, Bool, [String], Coeffs2, Coeffs2, [String], Bool, Bool,Int,Int)
argsProcessment fileGWrSys controlFile segmentRulesFile concatenationsFileP concatenationsFileA args00 = do
 let args0 = snd . takeBsR [("+x",1)] . snd . takeAsR (zip ["+b","+l","+bl","+i"] . cycle $ [0]) $ args00
     lstW = any (\x -> x == "+b" || x == "+bl") args00 -- If one of the command line options is \"+b\" or \"+bl\" then the last word of the line will remain the last one.
     jstL0 = any (\x -> x == "+l" || x == "+bl") args00 -- If one of the command line options is \"+l\" or \"+bl\" then the program outputs just lines without metrices values.
     toFileMode1 = concat . getB "+f" . bSpcs $ args0  -- Prints the last resulting line of the interactive mode processment (the last selected variant) to the file and also to the stdout.
     interactiveP = any (== "+i") args00 || not (null toFileMode1) -- If one of the command line options is \"+i\", or \"+f\" then the program prints the variants and then prompts for the preferred variant. Afterwards, it prints just that variant alone.
     args01 = snd . takeCs1R fstCharsMA [("+a",-1)] $ args0
     syllables = oneB "+s" . fst . takeBsR [("+s",1)] $ args00
     syllablesVs = if syllables then fromMaybe 1 (readMaybe (concat . getB "+s" . fst . takeBsR [("+s",1)] $ args00)::Maybe Int) else 0
     verbose = if (oneB "+v" . fst . takeBsR [("+v",1)] $ args00) then fromMaybe 1 (readMaybe (concat . getB "+v" . fst . takeBsR [("+v",1)] $ args00)::Maybe Int) else 0
     args02
         | null toFileMode1 = filter (/= "+f") args01
         | otherwise = snd . takeBsR [("+f",1)] $ args01
     args = snd . takeCs1R fstCharsMA [("+m",-1)] $ args02
     coeffs = readCF . concat . take 1 $ args
     coeffsWX = readCF . concat . getB "+x" . fst . takeBsR [("+x",1)] $ args00
 [controlConts, gwrsCnts, segmentData, concatenationsFileP1, concatenationsFileA1] <- mapM readFile [controlFile, fileGWrSys, segmentRulesFile, concatenationsFileP, concatenationsFileA]
 return (gwrsCnts, controlConts, segmentData, concatenationsFileP1, concatenationsFileA1, toFileMode1, interactiveP, jstL0, args0, coeffs, coeffsWX, args, lstW,syllables,syllablesVs,verbose)

aSpecs :: CLSpecifications
aSpecs = zip ["+r","+b","+l","+bl","+i"] . cycle $ [0]

aSpcs :: [String] -> Args
aSpcs = fst . takeAsR aSpecs

cSpecs1MA :: CLSpecifications
cSpecs1MA = zip ["+m","+a"] . cycle $ [-1]

fstCharsMA :: FirstChars
fstCharsMA = ('+','-')

cSpecs1T :: CLSpecifications
cSpecs1T = [("+t",-1)]

fstCharsT :: FirstChars
fstCharsT = ('+','^')

bSpecs :: CLSpecifications
bSpecs = zip ["+d","+f","+p","+s","+v"] . cycle $ [1]

bSpcs :: [String] -> Args
bSpcs = fst . takeBsR bSpecs

specs1 :: CLSpecifications
specs1 = aSpecs `mappend` bSpecs `mappend` cSpecs1MA