{-# OPTIONS_GHC -threaded -rtsopts #-} {-# OPTIONS_HADDOCK show-extensions #-} {-# LANGUAGE BangPatterns, NoImplicitPrelude #-} -- | -- Module : Phladiprelio.Ukrainian.DeEnCoding -- Copyright : (c) Oleksandr Zhabenko 2020-2023 -- License : MIT -- Stability : Experimental -- Maintainer : oleksandr.zhabenko@yahoo.com -- -- Functions to encode and decode 'String' -> \['Int8'\] used in the Simple/Main.hs code. -- Is taken from the @phonetic-languages-simplified-lists-examples@ package. module Phladiprelio.Ukrainian.DeEnCoding where import GHC.Base hiding (foldr) import GHC.Num ((+),(-),(*)) import GHC.Real (quotRem) import GHC.Enum (fromEnum,toEnum) import Data.Tuple (fst,snd) import qualified Data.Heap as Heap import GHC.Int import System.IO import Data.Maybe (fromJust) import Numeric (showHex,readHex) import Phladiprelio.Ukrainian.Emphasis import Phladiprelio.Ukrainian.Melodics (FlowSound) import Phladiprelio.Ukrainian.Syllable import Data.Foldable (Foldable, foldr,null,foldl') import Data.List (sort,head,words,zip,length,unwords,init,partition,sortOn) --default (Int, Double) encodeToInt :: Ord a => [[a]] -> Int encodeToInt yss | null ks = -1 | otherwise = fst . head $ ks where ks = readHex (showHex (foldl' (\x y -> x * 16 + y) 0 . map (\(j,_) -> fromEnum j) . sortOn snd . trans2 $ yss) $ "") {-# INLINABLE encodeToInt #-} -- | Is taken mostly from the Phonetic.Languages.Ukrainian.Simplified.Lists.UniquenessPeriodsG module from the @phonetic-languages-simplified-common@ package. indexedL :: Foldable t => b -> t b -> [(Int8, b)] indexedL y zs = foldr f v zs where !v = [(toEnum (length zs + 1),y)] f x ((j,z):ys) = (j-1,x):(j,z):ys {-# INLINE indexedL #-} trans2 :: [[a]] -> [(Int8, [a])] trans2 = init . indexedL [] {-# INLINE trans2 #-} trans232 :: [[a]] -> [(Int, [a])] trans232 zs = init . foldr f v $ zs where !v = [(length zs + 1,[])] f x ((j,z):ys) = (j-1,x):(j,z):ys {-# INLINE trans232 #-} int2l :: Int -> [Int8] int2l n | n < 16 = [toEnum n] | otherwise = int2l n1 `mappend` [l] where (!n1,!l0) = quotRem n 16 !l = toEnum l0 {-# INLINABLE int2l #-} -- | So for the strings consisting of no more than 10 words: -- > decodeToStr (int2l . encodeToInt . words $ xs) xs == unwords . words $ xs -- decodeToStr :: [Int8] -> String -> String decodeToStr ys = unwords . map snd . sortOn fst . zip ys . sort . words {-# INLINE decodeToStr #-} -- | For the 'ReadyForConstructionUkr' that corresponds to the 'String' consisting of no more than 10 words. -- decodeToReadyFCUkr :: [Int8] -> ReadyForConstructionUkr -> ReadyForConstructionUkr decodeToReadyFCUkr ys (Str ts) = Str . unwords . map snd . sortOn fst . zip ys . sort . words $ ts decodeToReadyFCUkr ys (FSL tsss) = FSL . map snd . sortOn fst . zip ys . sort $ tsss {-# INLINE decodeToReadyFCUkr #-} -- | Every 'String' consists of words with whitespace symbols in between. toHeap :: [String] -> Heap.Heap Int toHeap yss@(xs:xss) | null xss = Heap.singleton . encodeToInt . words $ xs | otherwise = Heap.fromList . map (encodeToInt . words) $ yss toHeap _ = Heap.empty {-# INLINE toHeap #-} -- | Every 'ReadyForConstructionUkr' corresponds to the 'String' that consists of words with whitespace symbols in between. -- The list must be consistent -- either 'FSL'-constructed or 'Str'-constructed. toHeapR :: [ReadyForConstructionUkr] -> Heap.Heap Int toHeapR yss@(xs@(Str ts):xss) | null xss = Heap.singleton . encodeToInt . words $ ts | otherwise = Heap.fromList . map (encodeToInt . words . fromJust . fromReadyFCUkrS) $ yss toHeapR yss@(xs@(FSL tsss):xss) | null xss = Heap.singleton . encodeToInt $ tsss | otherwise = Heap.fromList . map (encodeToInt . fromJust . fromReadyFCUkrF) $ yss toHeapR _ = Heap.empty {-# INLINE toHeapR #-} fromHeap :: String -> Heap.Heap Int -> [String] fromHeap ys heap | Heap.null heap = [] | otherwise = map (flip decodeToStr ys . int2l) . Heap.toUnsortedList $ heap {-# INLINE fromHeap #-} fromHeapReadyFCUkr :: ReadyForConstructionUkr -> Heap.Heap Int -> [ReadyForConstructionUkr] fromHeapReadyFCUkr ys heap | Heap.null heap = [] | otherwise = map (flip decodeToReadyFCUkr ys . int2l) . Heap.toUnsortedList $ heap {-# INLINE fromHeapReadyFCUkr #-} intersectInterResults :: [String] -> [String] -> [String] intersectInterResults zss | null zss = const [] | otherwise = fromHeap (head zss) . Heap.intersect (toHeap zss) . toHeap {-# INLINE intersectInterResults #-} intersectInterReadyFCUkr :: (String -> [[FlowSound]]) -> [ReadyForConstructionUkr] -> [ReadyForConstructionUkr] -> [ReadyForConstructionUkr] intersectInterReadyFCUkr convF zss tss | null zss = [] | null tss = [] | (isStr . head $ tss) && (isStr . head $ zss) = fromHeapReadyFCUkr (head zss) . Heap.intersect (toHeapR zss) . toHeapR $ tss | (isFSL . head $ tss) && (isFSL . head $ zss) = fromHeapReadyFCUkr (head zss) . Heap.intersect (toHeapR zss) . toHeapR $ tss | isStr . head $ tss = fromHeapReadyFCUkr (head zss) . Heap.intersect (toHeapR zss) . toHeapR . map (FSL . convF . fromJust . fromReadyFCUkrS) $ tss | otherwise = fromHeapReadyFCUkr (head tss) . Heap.intersect (toHeapR . map (FSL . convF . fromJust . fromReadyFCUkrS) $ zss) . toHeapR $ tss {-# INLINE intersectInterReadyFCUkr #-} intersectInterReadyFCUkr2 :: String -> [ReadyForConstructionUkr] -> [ReadyForConstructionUkr] -> [ReadyForConstructionUkr] intersectInterReadyFCUkr2 ts = intersectInterReadyFCUkr (convFI ts) {-# INLINE intersectInterReadyFCUkr2 #-} -- | Auxiliary printing function to define the line ending in some cases. Is taken from the -- Languages.UniquenessPeriods.Vector.General.DebugG module from the @phonetic-languages-general@ package newLineEnding :: String newLineEnding | nativeNewline == LF = "\n" | otherwise = "\r\n" foldlI :: String -> [[ReadyForConstructionUkr]] -> [ReadyForConstructionUkr] foldlI ts jss@((!xs):ys:xss) | null pss = foldlI' ts qss | null qss = foldlI' ts pss | otherwise = intersectInterReadyFCUkr2 ts (foldlI' ts pss) (foldlI' ts qss) where (pss,qss) = partition (< [FSL []]) jss foldlI' ts rss@(rs:ps:yss) = foldlI' ts (intersectInterReadyFCUkr2 ts rs ps : yss) foldlI' ts ((!xs):_) = xs foldlI' _ _ = [] foldlI _ ((!xs):_) = xs foldlI _ _ = []