-- | -- Module : Phonetic.Languages.General.Simple.Parsing -- Copyright : (c) OleksandrZhabenko 2020-2021 -- License : MIT -- Stability : Experimental -- Maintainer : olexandr543@yahoo.com -- -- Prints the rearrangements with the \"property\" information for the phonetic language text. Is used for the -- Phonetic.Languages.General.Simple module functionality. {-# OPTIONS_GHC -threaded #-} {-# OPTIONS_HADDOCK show-extensions #-} {-# LANGUAGE BangPatterns #-} module Phonetic.Languages.General.Simple.Parsing where import Phonetic.Languages.Array.General.PropertiesSyllablesG2 import Data.Phonetic.Languages.PrepareText import System.Environment (getArgs) import Phonetic.Languages.General.Simple import Data.List (sort) import GHC.Arr import Data.Phonetic.Languages.Base import Data.Phonetic.Languages.Syllables import Text.Read (readMaybe) import Data.Maybe (fromMaybe) import Data.Phonetic.Languages.SpecificationsRead -- | Prints the rearrangements with the \"property\" information for the phonetic language text. -- Most of the arguments are obtained from the 'getArgs' function. -- While used, it distinguishes between two groups of command line arguments: the first four ones and the others afterwards. -- The first 4 arguments are the file names with the specifications. -- 1) with the 'GWritingSystemPRPLX' specifications only (see the gwrsysExample.txt file in the @phonetic-languages-phonetics-basics@ package as a schema); -- -- 2) with the 5 meaningful lines that are delimited with the \'~\' line one from another with the specifications for the possible allophones (if any), 'CharPhoneticClassification', white spaces information (two 'String's) and the 'String' of all the possible 'PLL' 'Char's; -- -- 3) with the 'SegmentRulesG' specifications only; -- -- 4) with the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). -- -- Afterwards, the meaning of the command line arguments are as follows (from the left to the right). -- -- The first next command line argument must be a -- positive 'Int' number and is a number of printed variants for the line (if they are present, otherwise just all possible variants are printed). -- The second one is the number of the intervals into which the all range of possible metrics values are divided. The next numeric arguments that must be -- sequenced without interruptions further are treated as the numbers of the intervals (counting is started from 1) which values are moved to the maximum -- values of the metrics interval using the 'unsafeSwapVecIWithMaxI' function. The first textual command line argument should be in the form either \"y0\", -- or \"0y\", or \"yy\", or \"y\", or \"02y\", or \"y2\", or \"03y\", or \"yy2\", or \"y3\", or some other variant and specifies, which property or properties is or are evaluated. -- The rest of the command line arguments is the phonetic text. Besides, you can use multiple metrices (no more than 5 different ones) together by -- using \"+M\" ... \"-M\" command line arguments. -- -- You can specify constraints according to the 'decodeLConstraints' function between +A and -A next command line arguments. If so, the program will -- ask you additional question before proceeding. The \"+M\" ... \"-M\" and \"+A\" ... \"-A\" groups must not mutually intersect one another. argsToSimplePrepare :: (Double -> String -> ([[[PRS]]] -> [[Double]])) -- ^ The function that is needed in the 'procRhythmicity23F' function. -- Specifies a way how the syllables represented in the phonetic language approach transforms into their durations and -- depends on two parameters. Is specific for every phonetic language and every representation, so must be provided -- by the user in every case. The example of the function can be found in the package @phonetic-languages-simplified-properties-array@. -> [([[[PRS]]] -> [[Double]])] -- ^ A list of 4 different functions that specifies the syllables durations, analogues of the -- syllableDurationsD functions from the @ukrainian-phonetics-basics-array@ package. The last one must be probably the most -- exact one and, therefore, the default one. -> IO () argsToSimplePrepare h qs = do args000 <- getArgs let (!args14,!args00) = splitAt 4 args000 [fileGWrSys, controlFile, segmentRulesFile, concatenationsFileP] = args14 (gwrsCnts, controlConts, segmentData, concatenationsFile, toFileMode1, interactiveP, jstL0, args0, coeffs, coeffsWX, args, lstW) <- argsProcessment fileGWrSys controlFile segmentRulesFile concatenationsFileP args00 let (wrs, ks, arr, gs, js, vs, ysss, ws) = innerProcessmentSimple gwrsCnts controlConts segmentData concatenationsFile if isPair coeffs then generalProc2G wrs ks arr gs js vs h qs ysss ws {- old arguments follow -} toFileMode1 interactiveP jstL0 args0 coeffs coeffsWX (drop 1 args) lstW else generalProc2G wrs ks arr gs js vs h qs ysss ws toFileMode1 interactiveP jstL0 args0 coeffs coeffsWX args lstW -- | Similar to the 'argsToSimplePrepare' function, but takes explicitly the four 'FilePath's for the files -- respectively and the last argument the 'String' with all the other specifications. If it is not proper, -- the functions returns an error. argsToSimplePrepare4Files :: FilePath -- ^ With the 'GWritingSystemPRPLX' specifications only (see the gwrsysExample.txt file in the @phonetic-languages-phonetics-basics@ package as a schema); -> FilePath -- ^ With the 5 meaningful lines that are delimited with the \'~\' line one from another with the specifications for the possible allophones (if any), 'CharPhoneticClassification', white spaces information (two 'String's) and the 'String' of all the possible 'PLL' 'Char's; -> FilePath -- ^ With the 'SegmentRulesG' specifications only; -> FilePath -- ^ With the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). -> (Double -> String -> ([[[PRS]]] -> [[Double]])) -- ^ The function that is needed in the 'procRhythmicity23F' function. -- Specifies a way how the syllables represented in the phonetic language approach transforms into their durations and -- depends on two parameters. Is specific for every phonetic language and every representation, so must be provided -- by the user in every case. The example of the function can be found in the package @phonetic-languages-simplified-properties-array@. -> [([[[PRS]]] -> [[Double]])] -- ^ A list of 4 different functions that specifies the syllables durations, analogues of the -- syllableDurationsD functions from the @ukrainian-phonetics-basics-array@ package. The last one must be probably the most -- exact one and, therefore, the default one. -> String -- ^ A 'String' of data that are the further command line arguments for the function 'argsToSimplePrepare'. -> IO () argsToSimplePrepare4Files fileGWrSys controlFile segmentRulesFile concatenationsFileP h qs other_args = do let args000 = words other_args args00 = drop 4 args000 (gwrsCnts, controlConts, segmentData, concatenationsFile, toFileMode1, interactiveP, jstL0, args0, coeffs, coeffsWX, args, lstW) <- argsProcessment fileGWrSys controlFile segmentRulesFile concatenationsFileP args00 let (wrs, ks, arr, gs, js, vs, ysss, ws) = innerProcessmentSimple gwrsCnts controlConts segmentData concatenationsFile if isPair coeffs then generalProc2G wrs ks arr gs js vs h qs ysss ws {- old arguments follow -} toFileMode1 interactiveP jstL0 args0 coeffs coeffsWX (drop 1 args) lstW else generalProc2G wrs ks arr gs js vs h qs ysss ws toFileMode1 interactiveP jstL0 args0 coeffs coeffsWX args lstW innerProcessmentSimple :: String -- ^ Must be a valid 'GWritingSystemPRPLX' specifications 'String' representation only (see the gwrsysExample.txt file in the @phonetic-languages-phonetics-basics@ package as a schema); -> String -- ^ Must be a 'String' with the 5 meaningful lines that are delimited with the \'~\' line one from another with the specifications for the possible allophones (if any), 'CharPhoneticClassification', white spaces information (two 'String's) and the 'String' of all the possible 'PLL' 'Char's; -> String -- ^ Must be a 'String' with the 'SegmentRulesG' specifications only; -> String -- ^ Must be a 'String' with the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). -> (GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification, SegmentRulesG, String, String, Concatenations, String) innerProcessmentSimple gwrsCnts controlConts segmentData concatenationsFile = let [allophonesGs, charClfs, jss, vss, wss] = groupBetweenChars '~' . lines $ controlConts wrs = getGWritingSystem '~' gwrsCnts ks = sort . fromMaybe [] $ (readMaybe (unwords allophonesGs)::Maybe [(Char, Char)]) arr = read (unwords charClfs)::Array Int PRS -- The 'Array' must be previously sorted in the ascending order. gs = read segmentData::SegmentRulesG ysss = sort2Concat . fromMaybe [] $ (readMaybe segmentData::Maybe [[String]]) js = concat jss vs = concat vss ws = sort . concat $ wss in (wrs, ks, arr, gs, js, vs, ysss, ws) {-| @ since 0.5.0.0 The function also can process \"w\" and \"x\" lines so returns two 'Coeffs2' values. -} argsProcessment :: FilePath -- ^ With the 'GWritingSystemPRPLX' specifications only (see the gwrsysExample.txt file in the @phonetic-languages-phonetics-basics@ package as a schema); -> FilePath -- ^ With the 5 meaningful lines that are delimited with the \'~\' line one from another with the specifications for the possible allophones (if any), 'CharPhoneticClassification', white spaces information (two 'String's) and the 'String' of all the possible 'PLL' 'Char's; -> FilePath -- ^ With the 'SegmentRulesG' specifications only; -> FilePath -- ^ With the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). -> [String] -- ^ List of other args afterwards. -> IO (String, String, String, String, String, Bool, Bool, [String], Coeffs2, Coeffs2, [String], Bool) argsProcessment fileGWrSys controlFile segmentRulesFile concatenationsFileP args00 = do let args0 = filter (\xs -> take 2 xs /= "+x" && xs /= "+b" && xs /= "+l" && xs /= "+bl" && xs /= "+i" && xs /= "+u") args00 lstW = if any (\x -> x == "+b" || x == "+bl") args00 then True else False -- If one of the command line options is \"+b\" or \"+bl\" then the last word of the line will remain the last one. jstL0 = if any (\x -> x == "+l" || x == "+bl") args00 then True else False -- If one of the command line options is \"+l\" or \"+bl\" then the program outputs just lines without metrices values. toFileMode1 = concat . take 1 . drop 1 . dropWhile (/= "+f") $ args0 -- Prints the last resulting line of the interactive mode processment (the last selected variant) to the file and also to the stdout. interactiveP = if any (\xs -> xs == "+i" || xs == "+f") args00 then True else False -- If one of the command line options is \"+i\", or \"+f\" then the program prints the variants and then prompts for the preferred variant. Afterwards, it prints just that variant alone. args01 = takeWhile (/= "+a") args0 `mappend` (drop 1 . dropWhile (/= "-a") $ args0) args02 | null toFileMode1 = filter (/= "+f") args01 | otherwise = let (krs,lrs) = break (== "+f") args01 mrs = drop 2 lrs in krs `mappend` mrs args = takeWhile (/= "+m") args02 `mappend` (drop 1 . dropWhile (/= "-m") $ args02) coeffs = readCF . concat . take 1 $ args coeffsWX = readCF . drop 2 . concat . take 1 . filter (\ts -> take 2 ts == "+x") $ args00 [controlConts, gwrsCnts, segmentData, concatenationsFile] <- mapM readFile [controlFile, fileGWrSys, segmentRulesFile, concatenationsFileP] return (gwrsCnts, controlConts, segmentData, concatenationsFile, toFileMode1, interactiveP, jstL0, args0, coeffs, coeffsWX, args, lstW)