{-# OPTIONS_GHC -threaded -rtsopts #-} {-# OPTIONS_HADDOCK show-extensions #-} {-# LANGUAGE BangPatterns, NoImplicitPrelude #-} -- | -- Module : Phladiprelio.General.DeEnCoding -- Copyright : (c) Oleksandr Zhabenko 2020-2023 -- License : MIT -- Stability : Experimental -- Maintainer : oleksandr.zhabenko@yahoo.com -- -- Functions to encode and decode 'String' -> \['Int8'\] used in the Simple/Main.hs code. -- Firstly were taken from the @phonetic-languages-simplified-lists-examples@ package. -- Is modified from the module Phladiprelio.Simplified.DeEnCoding from the -- @phonetic-languages-simplified-examples-common@ package. module Phladiprelio.General.DeEnCoding where import GHC.Base hiding (foldr) import GHC.Num ((+),(-),(*)) import GHC.List (head,zip,init) import GHC.Enum (fromEnum,toEnum) import qualified Data.Heap as Heap import GHC.Int import Data.Foldable import Data.Tuple (fst,snd) import Data.List (sortOn,sort,partition,words,unwords) import System.IO (putStrLn,Newline(LF),nativeNewline) import Data.Maybe (fromJust) import Numeric import Phladiprelio.General.EmphasisG import Phladiprelio.General.Base import Phladiprelio.General.Syllables --default (Int, Double) encodeToInt :: Ord a => [[a]] -> Int encodeToInt yss | null ks = -1 | otherwise = fst . head $ ks where ks = readHex (showHex (foldl' (\x y -> x * 16 + y) 0 . map (\(j,_) -> fromEnum j) . sortOn snd . trans2 $ yss) $ "") {-# INLINABLE encodeToInt #-} -- | Is taken mostly from the Phladiprelio.Simplified.Lists.UniquenessPeriodsG module from the @phonetic-languages-simplified-common@ package. indexedL :: Foldable t => b -> t b -> [(Int8, b)] indexedL y zs = foldr f v zs where !v = [(toEnum (length zs + 1),y)] f x ((j,z):ys) = (j-1,x):(j,z):ys {-# INLINE indexedL #-} trans2 :: [[a]] -> [(Int8, [a])] trans2 = init . indexedL [] {-# INLINE trans2 #-} trans232 :: [[a]] -> [(Int, [a])] trans232 zs = init . foldr f v $ zs where !v = [(length zs + 1,[])] f x ((j,z):ys) = (j-1,x):(j,z):ys {-# INLINE trans232 #-} int2l :: Int -> [Int8] int2l n | n < 16 = [toEnum n] | otherwise = int2l n1 `mappend` [l] where (!n1,!l0) = quotRemInt n 16 !l = toEnum l0 {-# INLINABLE int2l #-} -- | So for the strings consisting of no more than 10 words: -- > decodeToStr (int2l . encodeToInt . words $ xs) xs == unwords . words $ xs -- decodeToStr :: [Int8] -> String -> String decodeToStr ys = unwords . map snd . sortOn fst . zip ys . sort . words {-# INLINE decodeToStr #-} -- | For the 'ReadyForConstructionUkr' that corresponds to the 'String' consisting of no more than 10 words. -- decodeToReadyFCPL :: [Int8] -> ReadyForConstructionPL -> ReadyForConstructionPL decodeToReadyFCPL ys (StrG ts) = StrG . unwords . map snd . sortOn fst . zip ys . sort . words $ ts decodeToReadyFCPL ys (FSLG tsss) = FSLG . map snd . sortOn fst . zip ys . sort $ tsss {-# INLINE decodeToReadyFCPL #-} -- | Every 'String' consists of words with whitespace symbols in between. toHeap :: [String] -> Heap.Heap Int toHeap yss@(xs:xss) | null xss = Heap.singleton . encodeToInt . words $ xs | otherwise = Heap.fromList . map (encodeToInt . words) $ yss toHeap _ = Heap.empty {-# INLINE toHeap #-} -- | Every 'ReadyForConstructionUkr' corresponds to the 'String' that consists of words with whitespace symbols in between. -- The list must be consistent -- either 'FSL'-constructed or 'Str'-constructed. toHeapR :: [ReadyForConstructionPL] -> Heap.Heap Int toHeapR yss@(xs@(StrG ts):xss) | null xss = Heap.singleton . encodeToInt . words $ ts | otherwise = Heap.fromList . map (encodeToInt . words . fromJust . fromReadyFCPLS) $ yss toHeapR yss@(xs@(FSLG tsss):xss) | null xss = Heap.singleton . encodeToInt $ tsss | otherwise = Heap.fromList . map (encodeToInt . fromJust . fromReadyFCPLF) $ yss toHeapR _ = Heap.empty {-# INLINE toHeapR #-} fromHeap:: String -> Heap.Heap Int -> [String] fromHeap ys heap | Heap.null heap = [] | otherwise = map (flip decodeToStr ys . int2l) . Heap.toUnsortedList $ heap {-# INLINE fromHeap #-} fromHeapReadyFCPL :: ReadyForConstructionPL -> Heap.Heap Int -> [ReadyForConstructionPL] fromHeapReadyFCPL ys heap | Heap.null heap = [] | otherwise = map (flip decodeToReadyFCPL ys . int2l) . Heap.toUnsortedList $ heap {-# INLINE fromHeapReadyFCPL #-} intersectInterResults :: [String] -> [String] -> [String] intersectInterResults zss | null zss = const [] | otherwise = fromHeap (head zss) . Heap.intersect (toHeap zss) . toHeap {-# INLINE intersectInterResults #-} -- | It is not well tested. Just as the whole module. intersectInterReadyFCPL :: (String -> [[[Int8]]]) -> [ReadyForConstructionPL] -> [ReadyForConstructionPL] -> [ReadyForConstructionPL] intersectInterReadyFCPL convF zss tss | null zss = [] | null tss = [] | (isStr . head $ tss) && (isStr . head $ zss) = fromHeapReadyFCPL (head zss) . Heap.intersect (toHeapR zss) . toHeapR $ tss | (isFSL . head $ tss) && (isFSL . head $ zss) = fromHeapReadyFCPL (head zss) . Heap.intersect (toHeapR zss) . toHeapR $ tss | isStr . head $ tss = fromHeapReadyFCPL (head zss) . Heap.intersect (toHeapR zss) . toHeapR . map (FSLG . convF . fromJust . fromReadyFCPLS) $ tss | otherwise = fromHeapReadyFCPL (head tss) . Heap.intersect (toHeapR . map (FSLG . convF . fromJust . fromReadyFCPLS) $ zss) . toHeapR $ tss {-# INLINE intersectInterReadyFCPL #-} intersectInterReadyFCPL2 :: GWritingSystemPRPLX -- ^ Data used to obtain the phonetic language representation of the text. -> [(Char,Char)] -- ^ The pairs of the 'Char' that corresponds to the similar phonetic languages consonant phenomenon -- (e. g. allophones). Must be sorted in the ascending order to be used correctly. -> CharPhoneticClassification -- ^ The 'Array' 'Int' 'PRS' must be sorted in the ascending order to be used in the module correctly. -> SegmentRulesG -> String -- ^ Corresponds to the \'0\' symbol delimiter in the @ukrainian-phonetics-basic-array@ package. -> String -- ^ Corresponds to the \'1\' and \'-\' symbol delimiters in the @ukrainian-phonetics-basic-array@ package. -> String -> [ReadyForConstructionPL] -> [ReadyForConstructionPL] -> [ReadyForConstructionPL] intersectInterReadyFCPL2 wrs ks arr hs us vs ts = intersectInterReadyFCPL (convFI wrs ks arr hs us vs ts) {-# INLINE intersectInterReadyFCPL2 #-} -- | Auxiliary printing function to define the line ending in some cases. Is taken from the -- Languages.UniquenessPeriods.Vector.General.DebugG module from the @phonetic-languages-general@ package newLineEnding :: String newLineEnding | nativeNewline == LF = "\n" | otherwise = "\r\n" foldlI :: GWritingSystemPRPLX -- ^ Data used to obtain the phonetic language representation of the text. -> [(Char,Char)] -- ^ The pairs of the 'Char' that corresponds to the similar phonetic languages consonant phenomenon -- (e. g. allophones). Must be sorted in the ascending order to be used correctly. -> CharPhoneticClassification -- ^ The 'Array' 'Int' 'PRS' must be sorted in the ascending order to be used in the module correctly. -> SegmentRulesG -> String -- ^ Corresponds to the \'0\' symbol delimiter in the @ukrainian-phonetics-basic-array@ package. -> String -- ^ Corresponds to the \'1\' and \'-\' symbol delimiters in the @ukrainian-phonetics-basic-array@ package. -> String -> [[ReadyForConstructionPL]] -> [ReadyForConstructionPL] foldlI wrs ks arr hs us vs ts jss@((!xs):ys:xss) | null pss = foldlI' wrs ks arr hs us vs ts qss | null qss = foldlI' wrs ks arr hs us vs ts pss | otherwise = intersectInterReadyFCPL2 wrs ks arr hs us vs ts (foldlI' wrs ks arr hs us vs ts pss) (foldlI' wrs ks arr hs us vs ts qss) where (pss,qss) = partition (< [FSLG []]) jss foldlI' wrs ks arr hs us vs ts rss@(rs:ps:yss) = foldlI' wrs ks arr hs us vs ts (intersectInterReadyFCPL2 wrs ks arr hs us vs ts rs ps : yss) foldlI' wrs ks arr hs us vs ts ((!xs):_) = xs foldlI' _ _ _ _ _ _ _ _ = [] foldlI _ _ _ _ _ _ _ ((!xs):_) = xs foldlI _ _ _ _ _ _ _ _ = []