{-# OPTIONS_GHC -threaded -rtsopts #-} {-# OPTIONS_HADDOCK show-extensions #-} {-# LANGUAGE BangPatterns #-} -- | -- Module : Phonetic.Languages.General.DeEnCoding -- Copyright : (c) OleksandrZhabenko 2020-2022 -- License : MIT -- Stability : Experimental -- Maintainer : olexandr543@yahoo.com -- -- Functions to encode and decode 'String' -> \['Int8'\] used in the Simple/Main.hs code. -- Firstly were taken from the @phonetic-languages-simplified-lists-examples@ package. -- Is modified from the module Phonetic.Languages.Simplified.DeEnCoding from the -- @phonetic-languages-simplified-examples-common@ package. module Phonetic.Languages.General.DeEnCoding where import Data.Heap (Heap) import qualified Data.Heap as Heap import GHC.Int import Data.Foldable (foldl') import Data.List (sortBy,sort,partition) import System.IO import Data.Maybe (fromJust) import Numeric import Phonetic.Languages.EmphasisG import Data.Phonetic.Languages.Base import Data.Phonetic.Languages.Syllables --import Phonetic.Languages.Coeffs --default (Int, Double) encodeToInt :: Ord a => [[a]] -> Int encodeToInt yss | null ks = -1 | otherwise = fst . head $ ks where ks = readHex (showHex (foldl' (\x y -> x * 16 + y) 0 . map (\(j,_) -> fromEnum j) . sortBy (\x y -> compare (snd x) (snd y)) . trans2 $ yss) $ "") {-# INLINABLE encodeToInt #-} -- | Is taken mostly from the Phonetic.Languages.Simplified.Lists.UniquenessPeriodsG module from the @phonetic-languages-simplified-common@ package. indexedL :: Foldable t => b -> t b -> [(Int8, b)] indexedL y zs = foldr f v zs where !v = [(toEnum (length zs + 1),y)] f x ((j,z):ys) = (j-1,x):(j,z):ys {-# INLINE indexedL #-} trans2 :: [[a]] -> [(Int8, [a])] trans2 = init . indexedL [] {-# INLINE trans2 #-} trans232 :: [[a]] -> [(Int, [a])] trans232 zs = init . foldr f v $ zs where !v = [(length zs + 1,[])] f x ((j,z):ys) = (j-1,x):(j,z):ys {-# INLINE trans232 #-} int2l :: Int -> [Int8] int2l n | n < 16 = [toEnum n] | otherwise = int2l n1 `mappend` [l] where (!n1,!l0) = quotRem n 16 !l = toEnum l0 {-# INLINABLE int2l #-} -- | So for the strings consisting of no more than 10 words: -- > decodeToStr (int2l . encodeToInt . words $ xs) xs == unwords . words $ xs -- decodeToStr :: [Int8] -> String -> String decodeToStr ys = unwords . map snd . sortBy (\x y -> compare (fst x) (fst y)) . zip ys . sort . words {-# INLINE decodeToStr #-} -- | For the 'ReadyForConstructionUkr' that corresponds to the 'String' consisting of no more than 10 words. -- decodeToReadyFCPL :: [Int8] -> ReadyForConstructionPL -> ReadyForConstructionPL decodeToReadyFCPL ys (StrG ts) = StrG . unwords . map snd . sortBy (\x y -> compare (fst x) (fst y)) . zip ys . sort . words $ ts decodeToReadyFCPL ys (FSLG tsss) = FSLG . map snd . sortBy (\x y -> compare (fst x) (fst y)) . zip ys . sort $ tsss {-# INLINE decodeToReadyFCPL #-} -- | Every 'String' consists of words with whitespace symbols in between. toHeap :: [String] -> Heap Int toHeap yss@(xs:xss) | null xss = Heap.singleton . encodeToInt . words $ xs | otherwise = Heap.fromList . map (encodeToInt . words) $ yss toHeap _ = Heap.empty {-# INLINE toHeap #-} -- | Every 'ReadyForConstructionUkr' corresponds to the 'String' that consists of words with whitespace symbols in between. -- The list must be consistent -- either 'FSL'-constructed or 'Str'-constructed. toHeapR :: [ReadyForConstructionPL] -> Heap Int toHeapR yss@(xs@(StrG ts):xss) | null xss = Heap.singleton . encodeToInt . words $ ts | otherwise = Heap.fromList . map (encodeToInt . words . fromJust . fromReadyFCPLS) $ yss toHeapR yss@(xs@(FSLG tsss):xss) | null xss = Heap.singleton . encodeToInt $ tsss | otherwise = Heap.fromList . map (encodeToInt . fromJust . fromReadyFCPLF) $ yss toHeapR _ = Heap.empty {-# INLINE toHeapR #-} fromHeap :: String -> Heap Int -> [String] fromHeap ys heap | Heap.null heap = [] | otherwise = map (flip decodeToStr ys . int2l) . Heap.toUnsortedList $ heap {-# INLINE fromHeap #-} fromHeapReadyFCPL :: ReadyForConstructionPL -> Heap Int -> [ReadyForConstructionPL] fromHeapReadyFCPL ys heap | Heap.null heap = [] | otherwise = map (flip decodeToReadyFCPL ys . int2l) . Heap.toUnsortedList $ heap {-# INLINE fromHeapReadyFCPL #-} intersectInterResults :: [String] -> [String] -> [String] intersectInterResults zss | null zss = const [] | otherwise = fromHeap (head zss) . Heap.intersect (toHeap zss) . toHeap {-# INLINE intersectInterResults #-} -- | It seems like it works, but it isn't. (??). The main reason is the hardness with defining convF to work properly. intersectInterReadyFCPL :: (String -> [[[Int8]]]) -> [ReadyForConstructionPL] -> [ReadyForConstructionPL] -> [ReadyForConstructionPL] intersectInterReadyFCPL convF zss tss | null zss = [] | null tss = [] | (isStr . head $ tss) && (isStr . head $ zss) = fromHeapReadyFCPL (head zss) . Heap.intersect (toHeapR zss) . toHeapR $ tss | (isFSL . head $ tss) && (isFSL . head $ zss) = fromHeapReadyFCPL (head zss) . Heap.intersect (toHeapR zss) . toHeapR $ tss | isStr . head $ tss = fromHeapReadyFCPL (head zss) . Heap.intersect (toHeapR zss) . toHeapR . map (FSLG . convF . fromJust . fromReadyFCPLS) $ tss | otherwise = fromHeapReadyFCPL (head tss) . Heap.intersect (toHeapR . map (FSLG . convF . fromJust . fromReadyFCPLS) $ zss) . toHeapR $ tss {-# INLINE intersectInterReadyFCPL #-} intersectInterReadyFCPL2 :: GWritingSystemPRPLX -- ^ Data used to obtain the phonetic language representation of the text. -> [(Char,Char)] -- ^ The pairs of the 'Char' that corresponds to the similar phonetic languages consonant phenomenon -- (e. g. allophones). Must be sorted in the ascending order to be used correctly. -> CharPhoneticClassification -- ^ The 'Array' 'Int' 'PRS' must be sorted in the ascending order to be used in the module correctly. -> SegmentRulesG -> String -- ^ Corresponds to the \'0\' symbol delimiter in the @ukrainian-phonetics-basic-array@ package. -> String -- ^ Corresponds to the \'1\' and \'-\' symbol delimiters in the @ukrainian-phonetics-basic-array@ package. -> String -> [ReadyForConstructionPL] -> [ReadyForConstructionPL] -> [ReadyForConstructionPL] intersectInterReadyFCPL2 wrs ks arr hs us vs ts = intersectInterReadyFCPL (convFI wrs ks arr hs us vs ts) {-# INLINE intersectInterReadyFCPL2 #-} -- | Auxiliary printing function to define the line ending in some cases. Is taken from the -- Languages.UniquenessPeriods.Vector.General.DebugG module from the @phonetic-languages-general@ package newLineEnding :: String newLineEnding | nativeNewline == LF = "\n" | otherwise = "\r\n" foldlI :: GWritingSystemPRPLX -- ^ Data used to obtain the phonetic language representation of the text. -> [(Char,Char)] -- ^ The pairs of the 'Char' that corresponds to the similar phonetic languages consonant phenomenon -- (e. g. allophones). Must be sorted in the ascending order to be used correctly. -> CharPhoneticClassification -- ^ The 'Array' 'Int' 'PRS' must be sorted in the ascending order to be used in the module correctly. -> SegmentRulesG -> String -- ^ Corresponds to the \'0\' symbol delimiter in the @ukrainian-phonetics-basic-array@ package. -> String -- ^ Corresponds to the \'1\' and \'-\' symbol delimiters in the @ukrainian-phonetics-basic-array@ package. -> String -> [[ReadyForConstructionPL]] -> [ReadyForConstructionPL] foldlI wrs ks arr hs us vs ts jss@((!xs):ys:xss) | null pss = foldlI' wrs ks arr hs us vs ts qss | null qss = foldlI' wrs ks arr hs us vs ts pss | otherwise = intersectInterReadyFCPL2 wrs ks arr hs us vs ts (foldlI' wrs ks arr hs us vs ts pss) (foldlI' wrs ks arr hs us vs ts qss) where (pss,qss) = partition (< [FSLG []]) jss foldlI' wrs ks arr hs us vs ts rss@(rs:ps:yss) = foldlI' wrs ks arr hs us vs ts (intersectInterReadyFCPL2 wrs ks arr hs us vs ts rs ps : yss) foldlI' wrs ks arr hs us vs ts ((!xs):_) = xs foldlI' _ _ _ _ _ _ _ _ = [] foldlI _ _ _ _ _ _ _ ((!xs):_) = xs foldlI _ _ _ _ _ _ _ _ = []