{-# LANGUAGE CPP #-} {-# OPTIONS_HADDOCK show-extensions #-} {-| Module : Numeric.Wrapper.R.GLPK.Phonetic.Languages.Durations Copyright : (c) OleksandrZhabenko 2020-2021 License : MIT Stability : Experimental Maintainer : olexandr543@yahoo.com -- Can be used to calculate the durations of the approximations of the phonemes using some prepared text with its correct (at least mostly) pronunciation. The prepared text is located in the same directory and contains lines -the phonetic language word and its duration in seconds separated with whitespace. The library is intended to use the functionality of the : 1) R programming language https://www.r-project.org/ 2) Rglpk library https://cran.r-project.org/web/packages/Rglpk/index.html 3) GNU GLPK library https://www.gnu.org/software/glpk/glpk.html For more information, please, see the documentation for them. For the model correctness the js here refers to sorted list of the 'Char' representations of the phonetic language phenomenae. The length of the 'String' js is refered to as 'lng'::'Int'. The number of 'pairs'' function elements in the lists is refered to as 'nn'::'Int'. The number of constraints is refered here as 'nc'::'Int'. @nc == nn `quot` 2@. Is generalized from the Numeric.Wrapper.R.GLPK.Phonetics.Ukrainian.Durations module from the @r-glpk-phonetic-languages-ukrainian-durations@ package. -} module Numeric.Wrapper.R.GLPK.Phonetic.Languages.Durations where #ifdef __GLASGOW_HASKELL__ #if __GLASGOW_HASKELL__>=710 /* code that applies only to GHC 7.10.* and higher versions */ import GHC.Base (mconcat) #endif #endif import Data.Monoid hiding (mconcat) import Text.Read import Data.Maybe import CaseBi.Arr (getBFstL') import Data.Foldable (foldl') import GHC.Arr import Numeric import Data.List (intercalate,find,(\\)) import Data.Lists.FLines (newLineEnding) import Data.Foldable.Ix (findIdx1) #ifdef __GLASGOW_HASKELL__ #if __GLASGOW_HASKELL__==708 /* code that applies only to GHC 7.8.* */ mconcat = concat #endif #endif createCoeffsObj :: Int -> [String] -> [Double] createCoeffsObj l xss | length xss < l = f (xss `mappend` replicate (l - length xss) "1.0") | otherwise = f (take l xss) where f = map (\ts -> fromMaybe 1.0 (readMaybe ts::Maybe Double)) countCharInWords :: [String] -> Char -> [Int] countCharInWords xss x | null xss = [] | otherwise = map (length . filter (== x)) xss matrix1Column :: PairwiseC -> [String] -> String -> Char -> [Int] matrix1Column pw xss js x = pairwiseComparings x pw (mconcat [countCharInWords xss x, rs, rs]) where l = length js iX = fromMaybe (-l - 1) . findIdx1 x $ js rs = if iX < 0 then [] else mconcat [replicate iX 0, [1], replicate (l - 1 - iX) 0] pairwiseComparings :: Char -> PairwiseC -> [Int] -> [Int] pairwiseComparings x y zs = zs `mappend` pairs' y x {-| A way to encode the pairs of the phonetic language representations that give some additional associations, connections between elements, usually being caused by some similarity or commonality of the pronunciation act for the phenomenae corresponding to these elements. All ['Int'] must be equal in 'length' throughout the same namespace and this length is given as 'Int' argument in the 'PairwisePL'. This 'Int' parameter is @nn@. -} data PairwisePL = PW Char Int [Int] deriving (Eq, Read, Show) lengthPW :: PairwisePL -> Int lengthPW (PW _ l _) = l charPW :: PairwisePL -> Char charPW (PW c _ _) = c listPW :: PairwisePL -> [Int] listPW (PW _ _ xs) = xs data PairwiseC = LL [PairwisePL] Int deriving (Eq, Read, Show) isCorrectPWC :: PairwiseC -> Bool isCorrectPWC (LL xs n) = n == minimum (map lengthPW xs) pwsC :: PairwiseC -> [PairwisePL] pwsC (LL xs n) = map (\(PW c m ys) -> PW c n . take n $ ys) xs pairs' :: PairwiseC -> Char -> [Int] pairs' y@(LL xs n) x | isCorrectPWC y = let z = find ((== x) . charPW) . pwsC $ y in if isJust z then listPW . fromJust $ z else replicate n 0 | otherwise = error "Numeric.Wrapper.R.GLPK.Phonetic.Languages.Durations.pairs': Not defined for the arguments. " -- | Actually @n@ is a 'length' bss. matrixLine :: Int -- ^ The number of 'pairs'' function elements in the lists. -> PairwiseC -- ^ Actually the data type value that sets the behaviour of the 'pairs'' function. -> [String] -> String -- ^ A sorted list of the 'Char' representations of the phonetic language phenomenae. -> String matrixLine nn pw bss js | null bss || n <=0 = [] | otherwise = mconcat ["mat1 <- matrix(c(", intercalate ", " . map show . concatMap (matrix1Column pw (bss `mappend` bss) js) $ js, "), nrow = ", show (2 * n + 2 * length js + nn), ")", newLineEnding] where n = length bss objLine :: Int -- ^ The length of the 'String' js that is a sorted list of the phonetic language representations as 'Char's that -- appears in the file with test words and their spoken durations. -> [(Int,Int)] -- ^ List of pairs of indices that shows how the input data is related to the representation -- (which coefficients relates to which representation elements). -> Array Int Double -- ^ An array of coefficients. -> String objLine lng xs arr | numElements arr >= lng = mconcat ["obj1 <- c(", intercalate ", " . map (\t -> showFFloat Nothing t "") . objCoeffsNew lng xs $ arr, ")", newLineEnding] | otherwise = error "Numeric.Wrapper.R.GLPK.Phonetic.Languages.Durations.objLine: Not defined for the short argument. " {-| A way to reorder the coefficients of the input and the elements representations related to each other. -} objCoeffsNew :: Int -- ^ The length of the 'String' js that is a sorted list of the phonetic language representations as 'Char's that -- appears in the file with test words and their spoken durations. -> [(Int, Int)] -- ^ List of pairs of indices that shows how the input data is related to the representation -- (which coefficients relates to which representation elements). -> Array Int Double -- ^ An array of coefficients. -> [Double] objCoeffsNew lng xs arr = let lst = map (\(x,y) -> (x,unsafeAt arr y)) xs in map (getBFstL' 1.0 lst) [0..lng - 1] maxLine :: String maxLine = "max1 <- TRUE\n" dirLine :: Int -- ^ The length of the 'String' js that is a sorted list of the phonetic language representations as 'Char's that -- appears in the file with test words and their spoken durations. -> Int -- ^ The number of 'pairs'' function elements in the lists. -> [String] -- ^ An argument of the 'matrixLine' function. -> String -- ^ A sorted list of the 'Char' representations of the phonetic language phenomenae. -> String dirLine lng nn bss js = mconcat ["dir1 <- c(\"<", g "<" bss, "\", \">", g ">" (bss, map (:[]) js), "\"", h0 lng, h (nn `quot` 2), ")", newLineEnding] where g xs ys = (intercalate ("\", \"" `mappend` xs) . replicate (length ys) $ "=") h n = concat . replicate n $ ", \">=\", \"<=\"" h0 n = concat . replicate n $ ", \"<=\"" rhsLineG :: [Double] -> [Double] -> [Double] -> String rhsLineG zs xs ys = mconcat ["rhs1 <- c(" , f (mconcat [xs , ys , zs]) , ")", newLineEnding] where f ts = (intercalate ", " . map (\t -> showFFloat Nothing t "") $ ts) rhsLine :: Int -- ^ The length of the 'String' js that is a sorted list of the phonetic language representations as 'Char's that -- appears in the file with test words and their spoken durations. -> Int -- ^ The number of 'pairs'' function elements in the lists. -> Double -- ^ Maximum duration of the phonetic language element representation in seconds. -> Double -- ^ A minimum positive duration value for some group of phonetic language representation (usually, some sorts of -- phonemes, e. g. vowels) to set some peculiar behaviour for the set of resulting values. -> Double -- ^ A minimum positive duration value for some *special* group of phonetic language representation (usually, some sorts of -- phonemes, e. g. soft sign representation) to set some peculiar behaviour for the set of resulting values. -> Double -- ^ A minimum positive duration value for all other phonetic language representations (usually, some sorts of -- phonemes) to set a general (common) behaviour for the set of resulting values. -> [Int] -- ^ A list of indices of the phonetic languages representations in their sorted in ascending order sequence that -- corresponds to the elements from the some group of representations (e. g. vowels). -> [Int] -- ^ A list of indices of the phonetic languages representations in their sorted in ascending order sequence that -- corresponds to the elements from the special group of representations (e. g. soft sign). -> [Double] -> [Double] -> String rhsLine lng nn mx mn1 mnSpecial mnG xs1 sps1 = rhsLineG . mconcat $ [minDurations lng mn1 mnSpecial mnG xs1 sps1, maxDurations lng mx, constraintsR1 (nn `quot` 2)] constraintsR1 :: Int -> [Double] constraintsR1 n = replicate (2 * n) 0.0 minDurations :: Int -- ^ The length of the 'String' js that is a sorted list of the phonetic language representations as 'Char's that -- appears in the file with test words and their spoken durations. -> Double -- ^ A minimum positive duration value for some group of phonetic language representation (usually, some sorts of -- phonemes, e. g. vowels) to set some peculiar behaviour for the set of resulting values. -> Double -- ^ A minimum positive duration value for some *special* group of phonetic language representation (usually, some sorts of -- phonemes, e. g. soft sign representation) to set some peculiar behaviour for the set of resulting values. -> Double -- ^ A minimum positive duration value for all other phonetic language representations (usually, some sorts of -- phonemes) to set a general (common) behaviour for the set of resulting values. -> [Int] -- ^ A list of indices of the phonetic languages representations in their sorted in ascending order sequence that -- corresponds to the elements from the some group of representations (e. g. vowels). -> [Int] -- ^ A list of indices of the phonetic languages representations in their sorted in ascending order sequence that -- corresponds to the elements from the special group of representations (e. g. soft sign). -> [Double] minDurations lng mn1 mnSpecial mnG xs1 sps1 = map h [0..lng - 1] where xs2 | maximum xs1 <= lng - 1 = filter (>= 0) xs1 | otherwise = error "Numeric.Wrapper.R.GLPK.Phonetic.Languages.Durations.objLine: Not defined for these arguments. " sps2 | maximum sps1 <= lng - 1 = filter (>= 0) sps1 \\ xs2 | otherwise = error "Numeric.Wrapper.R.GLPK.Phonetic.Languages.Durations.objLine: Not defined for these arguments. " h i | i `elem` xs2 = mn1 | i `elem` sps2 = mnSpecial | otherwise = mnG maxDurations :: Int -- ^ The length of the 'String' js that is a sorted list of the phonetic language representations as 'Char's that -- appears in the file with test words and their spoken durations. -> Double -- ^ Maximum duration of the phonetic language element representation in seconds. -> [Double] maxDurations lng mx = replicate lng mx -- | A variant of the more general 'answer2' where the randomization parameters are used to produce every time being run -- a new result (e. g. this allows to model accents). answer :: Int -- ^ The length of the 'String' js that is a sorted list of the phonetic language representations as 'Char's that -- appears in the file with test words and their spoken durations. -> Int -- ^ The number of 'pairs'' function elements in the lists. -> PairwiseC -- ^ Actually the data type value that sets the behaviour of the 'pairs'' function. -> Double -- ^ Maximum duration of the phonetic language element representation in seconds. -> [(Int, Int)] -- ^ List of pairs of indices that shows how the input data is related to the representation -- (which coefficients relates to which representation elements). -> Double -- ^ A minimum positive duration value for some group of phonetic language representation (usually, some sorts of -- phonemes, e. g. vowels) to set some peculiar behaviour for the set of resulting values. -> Double -- ^ A minimum positive duration value for some *special* group of phonetic language representation (usually, some sorts of -- phonemes, e. g. soft sign representation) to set some peculiar behaviour for the set of resulting values. -> Double -- ^ A minimum positive duration value for all other phonetic language representations (usually, some sorts of -- phonemes) to set a general (common) behaviour for the set of resulting values. -> [Int] -- ^ A list of indices of the phonetic languages representations in their sorted in ascending order sequence that -- corresponds to the elements from the some group of representations (e. g. vowels). -> [Int] -- ^ A list of indices of the phonetic languages representations in their sorted in ascending order sequence that -- corresponds to the elements from the special group of representations (e. g. soft sign). -> Array Int Double -- ^ An array of coefficients. -> [String] -- ^ An argument of the 'matrixLine' function. -> [Double] -> [Double] -> String -- ^ A sorted list of the 'Char' representations of the phonetic language phenomenae. -> String answer lng nn pw mx ts = answer2 lng nn pw mx ts (-0.003) 0.003 (-0.0012) 0.0012 answer2 :: Int -- ^ The length of the 'String' js that is a sorted list of the phonetic language representations as 'Char's that -- appears in the file with test words and their spoken durations. -> Int -- ^ The number of 'pairs'' function elements in the lists. -> PairwiseC -- ^ Actually the data type value that sets the behaviour of the 'pairs'' function. -> Double -- ^ Maximum duration of the phonetic language element representation in seconds. -> [(Int, Int)] -- ^ List of pairs of indices that shows how the input data is related to the representation -- (which coefficients relates to which representation elements). -> Double -- ^ A maximum in absolute value (being, usually, a negative one) possible random deviation from the computed value to be additionally applied to emulate -- 'more natural' behaviour and to get every time while running new sets of values. -> Double -- ^ A maximum in absolute value (being, usually, a positive one) possible random deviation from the computed value to be additionally applied to emulate -- 'more natural' behaviour and to get every time while running new sets of values. -> Double -- ^ A minimum in absolute value (being, usually, a negative one) possible random deviation from the computed value to be -- additionally applied to emulate 'more natural' behaviour and to get every time while running new sets of values. -> Double -- ^ A minimum in absolute value (being, usually, a positive one) possible random deviation from the computed value to be -- additionally applied to emulate 'more natural' behaviour and to get every time while running new sets of values. -> Double -- ^ A minimum positive duration value for some group of phonetic language representation (usually, some sorts of -- phonemes, e. g. vowels) to set some peculiar behaviour for the set of resulting values. -> Double -- ^ A minimum positive duration value for some *special* group of phonetic language representation (usually, some sorts of -- phonemes, e. g. soft sign representation) to set some peculiar behaviour for the set of resulting values. -> Double -- ^ A minimum positive duration value for all other phonetic language representations (usually, some sorts of -- phonemes) to set a general (common) behaviour for the set of resulting values. -> [Int] -- ^ A list of indices of the phonetic languages representations in their sorted in ascending order sequence that -- corresponds to the elements from the some group of representations (e. g. vowels). -> [Int] -- ^ A list of indices of the phonetic languages representations in their sorted in ascending order sequence that -- corresponds to the elements from the special group of representations (e. g. soft sign). -> Array Int Double -- ^ An array of coefficients. -> [String] -- ^ An argument of the 'matrixLine' function. -> [Double] -> [Double] -> String -- ^ A sorted list of the 'Char' representations of the phonetic language phenomenae. -> String answer2 lng nn pw mx ts min1 max1 min2 max2 mn1 mnSpecial mnG xs1 sps1 lsts bss xs ys js = mconcat ["library(\"Rglpk\")",newLineEnding,objLine lng ts lsts, matrixLine nn pw bss js,dirLine lng nn bss js, rhsLine lng nn mx mn1 mnSpecial mnG xs1 sps1 xs ys,maxLine,newLineEnding, "k <- Rglpk_solve_LP(obj = obj1, mat = mat1, dir = dir1, rhs = rhs1, max = max1)",newLineEnding, "y <- runif(",show lng, ", min = ", showFFloat Nothing (-(abs min1)) ", max = ", showFFloat Nothing (abs max1) ")", newLineEnding, "if (k$status == 0){k$solution / mean(k$solution)} else {c()}", newLineEnding, "\")}"] -- read ("SylS {charS=\'k\', phoneType=P 6")::PRS -- pldUkr 0.01 0.01 0.1 0.2 words.txt 1.8 -1 0.01 0.01 2.5 2.5 2.5 2.5 2.5 2.5