{-# OPTIONS_GHC -threaded -rtsopts #-}
{-# OPTIONS_HADDOCK show-extensions #-}
{-# LANGUAGE BangPatterns #-}

-- |
-- Module      :  Phonetic.Languages.General.DeEnCoding
-- Copyright   :  (c) OleksandrZhabenko 2020-2022
-- License     :  MIT
-- Stability   :  Experimental
-- Maintainer  :  olexandr543@yahoo.com
--
-- Functions to encode and decode 'String' -> \['Int8'\] used in the Simple/Main.hs code.
-- Firstly were taken from the @phonetic-languages-simplified-lists-examples@ package.
-- Is modified from the module Phonetic.Languages.Simplified.DeEnCoding from the
-- @phonetic-languages-simplified-examples-common@ package.

module Phonetic.Languages.General.DeEnCoding where

import Data.Heap (Heap)
import qualified Data.Heap as Heap
import GHC.Int
import Data.Foldable (foldl')
import Data.List (sortBy,sort,partition)
import System.IO
import Data.Maybe (fromJust)
import Numeric
import Phonetic.Languages.EmphasisG
--import Melodics.Ukrainian.ArrInt8 (FlowSound)
--import Languages.Phonetic.Ukrainian.Syllable.ArrInt8
import Data.Phonetic.Languages.Base
import Data.Phonetic.Languages.Syllables

--default (Int, Double)

encodeToInt :: Ord a => [[a]] -> Int
encodeToInt yss
 | null ks = -1
 | otherwise = fst . head $ ks
  where ks = readHex (showHex (foldl' (\x y -> x * 16 + y) 0 . map (\(j,_) -> fromEnum j) . sortBy (\x y -> compare (snd x) (snd y)) . trans2 $ yss) $ "")
{-# INLINABLE encodeToInt #-}


-- | Is taken mostly from the Phonetic.Languages.Simplified.Lists.UniquenessPeriodsG module from the @phonetic-languages-simplified-common@ package.
indexedL :: Foldable t => b -> t b -> [(Int8, b)]
indexedL y zs = foldr f v zs
  where !v = [(toEnum (length zs + 1),y)]
        f x ((j,z):ys) = (j-1,x):(j,z):ys
{-# INLINE indexedL #-}

trans2 :: [[a]] -> [(Int8, [a])]
trans2 = init . indexedL []
{-# INLINE trans2 #-}

trans232 :: [[a]] -> [(Int, [a])]
trans232 zs = init . foldr f v $ zs
  where !v = [(length zs + 1,[])]
        f x ((j,z):ys) = (j-1,x):(j,z):ys
{-# INLINE trans232 #-}

int2l :: Int -> [Int8]
int2l n
 | n < 16 = [toEnum n]
 | otherwise = int2l n1 `mappend` [l]
     where (!n1,!l0) = quotRem n 16
           !l = toEnum l0
{-# INLINABLE int2l #-}

-- | So for the strings consisting of no more than 10 words:
-- > decodeToStr (int2l . encodeToInt . words $ xs) xs == unwords . words $ xs
--
decodeToStr :: [Int8] -> String -> String
decodeToStr ys = unwords . map snd . sortBy (\x y -> compare (fst x) (fst y)) . zip ys . sort . words
{-# INLINE decodeToStr #-}

-- | For the 'ReadyForConstructionUkr' that corresponds to the 'String' consisting of no more than 10 words.
--
decodeToReadyFCPL :: [Int8] -> ReadyForConstructionPL -> ReadyForConstructionPL
decodeToReadyFCPL ys (StrG ts) = StrG . unwords . map snd . sortBy (\x y -> compare (fst x) (fst y)) . zip ys . sort . words $ ts
decodeToReadyFCPL ys (FSLG tsss) = FSLG . map snd . sortBy (\x y -> compare (fst x) (fst y)) . zip ys . sort $ tsss
{-# INLINE decodeToReadyFCPL #-}

-- | Every 'String' consists of words with whitespace symbols in between.
toHeap :: [String] -> Heap Int
toHeap yss@(xs:xss)
  | null xss = Heap.singleton . encodeToInt . words $ xs
  | otherwise = Heap.fromList . map (encodeToInt . words) $ yss
toHeap _ = Heap.empty
{-# INLINE toHeap #-}

-- | Every 'ReadyForConstructionUkr' corresponds to the 'String' that consists of words with whitespace symbols in between.
-- The list must be consistent -- either 'FSL'-constructed or 'Str'-constructed.
toHeapR :: [ReadyForConstructionPL] -> Heap Int
toHeapR yss@(xs@(StrG ts):xss)
  | null xss = Heap.singleton . encodeToInt . words $ ts
  | otherwise = Heap.fromList . map (encodeToInt . words . fromJust . fromReadyFCPLS) $ yss
toHeapR yss@(xs@(FSLG tsss):xss)
  | null xss = Heap.singleton . encodeToInt $ tsss
  | otherwise = Heap.fromList . map (encodeToInt . fromJust . fromReadyFCPLF) $ yss  
toHeapR _ = Heap.empty
{-# INLINE toHeapR #-}

fromHeap :: String -> Heap Int -> [String]
fromHeap ys heap
 | Heap.null heap = []
 | otherwise = map (flip decodeToStr ys . int2l) . Heap.toUnsortedList $ heap
{-# INLINE fromHeap #-}

fromHeapReadyFCPL :: ReadyForConstructionPL -> Heap Int -> [ReadyForConstructionPL]
fromHeapReadyFCPL ys heap
 | Heap.null heap = []
 | otherwise = map (flip decodeToReadyFCPL ys . int2l) . Heap.toUnsortedList $ heap
{-# INLINE fromHeapReadyFCPL #-}

intersectInterResults :: [String] -> [String] -> [String]
intersectInterResults zss
 | null zss = const []
 | otherwise = fromHeap (head zss) . Heap.intersect (toHeap zss) . toHeap
{-# INLINE intersectInterResults #-}

-- | It seems like it works, but it isn't. (??). The main reason is the hardness with defining convF to work properly.
intersectInterReadyFCPL :: (String -> [[[Int8]]]) -> [ReadyForConstructionPL] -> [ReadyForConstructionPL] -> [ReadyForConstructionPL]
intersectInterReadyFCPL convF zss tss
 | null zss = []
 | null tss = []
 | (isStr . head $ tss) && (isStr . head $ zss) = fromHeapReadyFCPL (head zss) . Heap.intersect (toHeapR zss) . toHeapR $ tss
 | (isFSL . head $ tss) && (isFSL . head $ zss) = fromHeapReadyFCPL (head zss) . Heap.intersect (toHeapR zss) . toHeapR $ tss
 | isStr . head $ tss = fromHeapReadyFCPL (head zss) . Heap.intersect (toHeapR zss) . toHeapR . map (FSLG . convF . fromJust . fromReadyFCPLS) $ tss
 | otherwise = fromHeapReadyFCPL (head tss) . Heap.intersect (toHeapR . map (FSLG . convF . fromJust . fromReadyFCPLS) $ zss) . toHeapR $ tss
{-# INLINE intersectInterReadyFCPL #-}

intersectInterReadyFCPL2 
  :: GWritingSystemPRPLX -- ^ Data used to obtain the phonetic language representation of the text.
  -> [(Char,Char)] -- ^ The pairs of the 'Char' that corresponds to the similar phonetic languages consonant phenomenon
  -- (e. g. allophones). Must be sorted in the ascending order to be used correctly.
  -> CharPhoneticClassification -- ^ The 'Array' 'Int' 'PRS' must be sorted in the ascending order to be used in the module correctly.
  -> SegmentRulesG
  -> String -- ^ Corresponds to the \'0\' symbol delimiter in the @ukrainian-phonetics-basic-array@ package.
  -> String -- ^ Corresponds to the \'1\' and \'-\' symbol delimiters in the @ukrainian-phonetics-basic-array@ package.
  -> String 
  -> [ReadyForConstructionPL] 
  -> [ReadyForConstructionPL] 
  -> [ReadyForConstructionPL]
intersectInterReadyFCPL2 wrs ks arr hs us vs ts = intersectInterReadyFCPL (convFI wrs ks arr hs us vs ts)
{-# INLINE intersectInterReadyFCPL2 #-}

-- | Auxiliary printing function to define the line ending in some cases. Is taken from the
-- Languages.UniquenessPeriods.Vector.General.DebugG module from the @phonetic-languages-general@ package
newLineEnding :: String
newLineEnding
  | nativeNewline == LF = "\n"
  | otherwise = "\r\n"

  
foldlI 
  :: GWritingSystemPRPLX -- ^ Data used to obtain the phonetic language representation of the text.
  -> [(Char,Char)] -- ^ The pairs of the 'Char' that corresponds to the similar phonetic languages consonant phenomenon
  -- (e. g. allophones). Must be sorted in the ascending order to be used correctly.
  -> CharPhoneticClassification -- ^ The 'Array' 'Int' 'PRS' must be sorted in the ascending order to be used in the module correctly.
  -> SegmentRulesG
  -> String -- ^ Corresponds to the \'0\' symbol delimiter in the @ukrainian-phonetics-basic-array@ package.
  -> String -- ^ Corresponds to the \'1\' and \'-\' symbol delimiters in the @ukrainian-phonetics-basic-array@ package.
  -> String 
  -> [[ReadyForConstructionPL]] 
  -> [ReadyForConstructionPL]
foldlI wrs ks arr hs us vs ts jss@((!xs):ys:xss) 
 | null pss = foldlI' wrs ks arr hs us vs ts qss
 | null qss = foldlI' wrs ks arr hs us vs ts pss
 | otherwise = intersectInterReadyFCPL2 wrs ks arr hs us vs ts (foldlI' wrs ks arr hs us vs ts pss) (foldlI' wrs ks arr hs us vs ts qss) 
  where (pss,qss) = partition (< [FSLG []]) jss
        foldlI' wrs ks arr hs us vs ts rss@(rs:ps:yss) = foldlI' wrs ks arr hs us vs ts (intersectInterReadyFCPL2 wrs ks arr hs us vs ts rs ps : yss)
        foldlI' wrs ks arr hs us vs ts ((!xs):_) = xs
        foldlI' _ _ _ _ _ _ _ _ = []
foldlI _ _ _ _ _ _ _ ((!xs):_) = xs
foldlI _ _ _ _ _ _ _ _ = []