{-# OPTIONS_GHC -threaded -rtsopts #-} {-# OPTIONS_HADDOCK show-extensions #-} {-# LANGUAGE BangPatterns #-} -- | -- Module : Phonetic.Languages.General.DeEnCoding -- Copyright : (c) OleksandrZhabenko 2020-2022 -- License : MIT -- Stability : Experimental -- Maintainer : olexandr543@yahoo.com -- -- Functions to encode and decode 'String' -> \['Int8'\] used in the Simple/Main.hs code. -- Firstly were taken from the @phonetic-languages-simplified-lists-examples@ package. -- Is modified from the module Phonetic.Languages.Simplified.DeEnCoding from the -- @phonetic-languages-simplified-examples-common@ package. module Phonetic.Languages.General.DeEnCoding where import Data.Heap (Heap) import qualified Data.Heap as Heap import GHC.Int import Data.Foldable (foldl') import Data.List (sortBy,sort,partition) import System.IO import Data.Maybe (fromJust) import Numeric import Phonetic.Languages.EmphasisG --import Melodics.Ukrainian.ArrInt8 (FlowSound) --import Languages.Phonetic.Ukrainian.Syllable.ArrInt8 import Data.Phonetic.Languages.Base import Data.Phonetic.Languages.Syllables --default (Int, Double) encodeToInt :: Ord a => [[a]] -> Int encodeToInt yss | null ks = -1 | otherwise = fst . head $ ks where ks = readHex (showHex (foldl' (\x y -> x * 16 + y) 0 . map (\(j,_) -> fromEnum j) . sortBy (\x y -> compare (snd x) (snd y)) . trans2 $ yss) $ "") {-# INLINABLE encodeToInt #-} -- | Is taken mostly from the Phonetic.Languages.Simplified.Lists.UniquenessPeriodsG module from the @phonetic-languages-simplified-common@ package. indexedL :: Foldable t => b -> t b -> [(Int8, b)] indexedL y zs = foldr f v zs where !v = [(toEnum (length zs + 1),y)] f x ((j,z):ys) = (j-1,x):(j,z):ys {-# INLINE indexedL #-} trans2 :: [[a]] -> [(Int8, [a])] trans2 = init . indexedL [] {-# INLINE trans2 #-} trans232 :: [[a]] -> [(Int, [a])] trans232 zs = init . foldr f v $ zs where !v = [(length zs + 1,[])] f x ((j,z):ys) = (j-1,x):(j,z):ys {-# INLINE trans232 #-} int2l :: Int -> [Int8] int2l n | n < 16 = [toEnum n] | otherwise = int2l n1 `mappend` [l] where (!n1,!l0) = quotRem n 16 !l = toEnum l0 {-# INLINABLE int2l #-} -- | So for the strings consisting of no more than 10 words: -- > decodeToStr (int2l . encodeToInt . words $ xs) xs == unwords . words $ xs -- decodeToStr :: [Int8] -> String -> String decodeToStr ys = unwords . map snd . sortBy (\x y -> compare (fst x) (fst y)) . zip ys . sort . words {-# INLINE decodeToStr #-} -- | For the 'ReadyForConstructionUkr' that corresponds to the 'String' consisting of no more than 10 words. -- decodeToReadyFCPL :: [Int8] -> ReadyForConstructionPL -> ReadyForConstructionPL decodeToReadyFCPL ys (StrG ts) = StrG . unwords . map snd . sortBy (\x y -> compare (fst x) (fst y)) . zip ys . sort . words $ ts decodeToReadyFCPL ys (FSLG tsss) = FSLG . map snd . sortBy (\x y -> compare (fst x) (fst y)) . zip ys . sort $ tsss {-# INLINE decodeToReadyFCPL #-} -- | Every 'String' consists of words with whitespace symbols in between. toHeap :: [String] -> Heap Int toHeap yss@(xs:xss) | null xss = Heap.singleton . encodeToInt . words $ xs | otherwise = Heap.fromList . map (encodeToInt . words) $ yss toHeap _ = Heap.empty {-# INLINE toHeap #-} -- | Every 'ReadyForConstructionUkr' corresponds to the 'String' that consists of words with whitespace symbols in between. -- The list must be consistent -- either 'FSL'-constructed or 'Str'-constructed. toHeapR :: [ReadyForConstructionPL] -> Heap Int toHeapR yss@(xs@(StrG ts):xss) | null xss = Heap.singleton . encodeToInt . words $ ts | otherwise = Heap.fromList . map (encodeToInt . words . fromJust . fromReadyFCPLS) $ yss toHeapR yss@(xs@(FSLG tsss):xss) | null xss = Heap.singleton . encodeToInt $ tsss | otherwise = Heap.fromList . map (encodeToInt . fromJust . fromReadyFCPLF) $ yss toHeapR _ = Heap.empty {-# INLINE toHeapR #-} fromHeap :: String -> Heap Int -> [String] fromHeap ys heap | Heap.null heap = [] | otherwise = map (flip decodeToStr ys . int2l) . Heap.toUnsortedList $ heap {-# INLINE fromHeap #-} fromHeapReadyFCPL :: ReadyForConstructionPL -> Heap Int -> [ReadyForConstructionPL] fromHeapReadyFCPL ys heap | Heap.null heap = [] | otherwise = map (flip decodeToReadyFCPL ys . int2l) . Heap.toUnsortedList $ heap {-# INLINE fromHeapReadyFCPL #-} intersectInterResults :: [String] -> [String] -> [String] intersectInterResults zss | null zss = const [] | otherwise = fromHeap (head zss) . Heap.intersect (toHeap zss) . toHeap {-# INLINE intersectInterResults #-} -- | It seems like it works, but it isn't. (??). The main reason is the hardness with defining convF to work properly. intersectInterReadyFCPL :: (String -> [[[Int8]]]) -> [ReadyForConstructionPL] -> [ReadyForConstructionPL] -> [ReadyForConstructionPL] intersectInterReadyFCPL convF zss tss | null zss = [] | null tss = [] | (isStr . head $ tss) && (isStr . head $ zss) = fromHeapReadyFCPL (head zss) . Heap.intersect (toHeapR zss) . toHeapR $ tss | (isFSL . head $ tss) && (isFSL . head $ zss) = fromHeapReadyFCPL (head zss) . Heap.intersect (toHeapR zss) . toHeapR $ tss | isStr . head $ tss = fromHeapReadyFCPL (head zss) . Heap.intersect (toHeapR zss) . toHeapR . map (FSLG . convF . fromJust . fromReadyFCPLS) $ tss | otherwise = fromHeapReadyFCPL (head tss) . Heap.intersect (toHeapR . map (FSLG . convF . fromJust . fromReadyFCPLS) $ zss) . toHeapR $ tss {-# INLINE intersectInterReadyFCPL #-} intersectInterReadyFCPL2 :: GWritingSystemPRPLX -- ^ Data used to obtain the phonetic language representation of the text. -> [(Char,Char)] -- ^ The pairs of the 'Char' that corresponds to the similar phonetic languages consonant phenomenon -- (e. g. allophones). Must be sorted in the ascending order to be used correctly. -> CharPhoneticClassification -- ^ The 'Array' 'Int' 'PRS' must be sorted in the ascending order to be used in the module correctly. -> SegmentRulesG -> String -- ^ Corresponds to the \'0\' symbol delimiter in the @ukrainian-phonetics-basic-array@ package. -> String -- ^ Corresponds to the \'1\' and \'-\' symbol delimiters in the @ukrainian-phonetics-basic-array@ package. -> String -> [ReadyForConstructionPL] -> [ReadyForConstructionPL] -> [ReadyForConstructionPL] intersectInterReadyFCPL2 wrs ks arr hs us vs ts = intersectInterReadyFCPL (convFI wrs ks arr hs us vs ts) {-# INLINE intersectInterReadyFCPL2 #-} -- | Auxiliary printing function to define the line ending in some cases. Is taken from the -- Languages.UniquenessPeriods.Vector.General.DebugG module from the @phonetic-languages-general@ package newLineEnding :: String newLineEnding | nativeNewline == LF = "\n" | otherwise = "\r\n" foldlI :: GWritingSystemPRPLX -- ^ Data used to obtain the phonetic language representation of the text. -> [(Char,Char)] -- ^ The pairs of the 'Char' that corresponds to the similar phonetic languages consonant phenomenon -- (e. g. allophones). Must be sorted in the ascending order to be used correctly. -> CharPhoneticClassification -- ^ The 'Array' 'Int' 'PRS' must be sorted in the ascending order to be used in the module correctly. -> SegmentRulesG -> String -- ^ Corresponds to the \'0\' symbol delimiter in the @ukrainian-phonetics-basic-array@ package. -> String -- ^ Corresponds to the \'1\' and \'-\' symbol delimiters in the @ukrainian-phonetics-basic-array@ package. -> String -> [[ReadyForConstructionPL]] -> [ReadyForConstructionPL] foldlI wrs ks arr hs us vs ts jss@((!xs):ys:xss) | null pss = foldlI' wrs ks arr hs us vs ts qss | null qss = foldlI' wrs ks arr hs us vs ts pss | otherwise = intersectInterReadyFCPL2 wrs ks arr hs us vs ts (foldlI' wrs ks arr hs us vs ts pss) (foldlI' wrs ks arr hs us vs ts qss) where (pss,qss) = partition (< [FSLG []]) jss foldlI' wrs ks arr hs us vs ts rss@(rs:ps:yss) = foldlI' wrs ks arr hs us vs ts (intersectInterReadyFCPL2 wrs ks arr hs us vs ts rs ps : yss) foldlI' wrs ks arr hs us vs ts ((!xs):_) = xs foldlI' _ _ _ _ _ _ _ _ = [] foldlI _ _ _ _ _ _ _ ((!xs):_) = xs foldlI _ _ _ _ _ _ _ _ = []