{-# OPTIONS_GHC -threaded -rtsopts #-}
{-# OPTIONS_HADDOCK show-extensions #-}
{-# LANGUAGE BangPatterns #-}

-- |
-- Module      :  Phonetic.Languages.General.DeEnCoding
-- Copyright   :  (c) OleksandrZhabenko 2020-2022
-- License     :  MIT
-- Stability   :  Experimental
-- Maintainer  :  olexandr543@yahoo.com
--
-- Functions to encode and decode 'String' -> \['Int8'\] used in the Simple/Main.hs code.
-- Firstly were taken from the @phonetic-languages-simplified-lists-examples@ package.
-- Is modified from the module Phonetic.Languages.Simplified.DeEnCoding from the
-- @phonetic-languages-simplified-examples-common@ package.

module Phonetic.Languages.General.DeEnCoding where

import Data.Heap (Heap)
import qualified Data.Heap as Heap
import GHC.Int
import Data.Foldable (foldl')
import Data.List (sortBy,sort,partition)
import System.IO
import Data.Maybe (fromJust)
import Numeric
import Phonetic.Languages.EmphasisG
import Data.Phonetic.Languages.Base
import Data.Phonetic.Languages.Syllables
--import Phonetic.Languages.Coeffs

--default (Int, Double)

encodeToInt :: Ord a => [[a]] -> Int
encodeToInt yss
 | null ks = -1
 | otherwise = fst . head $ ks
  where ks = readHex (showHex (foldl' (\x y -> x * 16 + y) 0 . map (\(j,_) -> fromEnum j) . sortBy (\x y -> compare (snd x) (snd y)) . trans2 $ yss) $ "")
{-# INLINABLE encodeToInt #-}


-- | Is taken mostly from the Phonetic.Languages.Simplified.Lists.UniquenessPeriodsG module from the @phonetic-languages-simplified-common@ package.
indexedL :: Foldable t => b -> t b -> [(Int8, b)]
indexedL y zs = foldr f v zs
  where !v = [(toEnum (length zs + 1),y)]
        f x ((j,z):ys) = (j-1,x):(j,z):ys
{-# INLINE indexedL #-}

trans2 :: [[a]] -> [(Int8, [a])]
trans2 = init . indexedL []
{-# INLINE trans2 #-}

trans232 :: [[a]] -> [(Int, [a])]
trans232 zs = init . foldr f v $ zs
  where !v = [(length zs + 1,[])]
        f x ((j,z):ys) = (j-1,x):(j,z):ys
{-# INLINE trans232 #-}

int2l :: Int -> [Int8]
int2l n
 | n < 16 = [toEnum n]
 | otherwise = int2l n1 `mappend` [l]
     where (!n1,!l0) = quotRem n 16
           !l = toEnum l0
{-# INLINABLE int2l #-}

-- | So for the strings consisting of no more than 10 words:
-- > decodeToStr (int2l . encodeToInt . words $ xs) xs == unwords . words $ xs
--
decodeToStr :: [Int8] -> String -> String
decodeToStr ys = unwords . map snd . sortBy (\x y -> compare (fst x) (fst y)) . zip ys . sort . words
{-# INLINE decodeToStr #-}

-- | For the 'ReadyForConstructionUkr' that corresponds to the 'String' consisting of no more than 10 words.
--
decodeToReadyFCPL :: [Int8] -> ReadyForConstructionPL -> ReadyForConstructionPL
decodeToReadyFCPL ys (StrG ts) = StrG . unwords . map snd . sortBy (\x y -> compare (fst x) (fst y)) . zip ys . sort . words $ ts
decodeToReadyFCPL ys (FSLG tsss) = FSLG . map snd . sortBy (\x y -> compare (fst x) (fst y)) . zip ys . sort $ tsss
{-# INLINE decodeToReadyFCPL #-}

-- | Every 'String' consists of words with whitespace symbols in between.
toHeap :: [String] -> Heap Int
toHeap yss@(xs:xss)
  | null xss = Heap.singleton . encodeToInt . words $ xs
  | otherwise = Heap.fromList . map (encodeToInt . words) $ yss
toHeap _ = Heap.empty
{-# INLINE toHeap #-}

-- | Every 'ReadyForConstructionUkr' corresponds to the 'String' that consists of words with whitespace symbols in between.
-- The list must be consistent -- either 'FSL'-constructed or 'Str'-constructed.
toHeapR :: [ReadyForConstructionPL] -> Heap Int
toHeapR yss@(xs@(StrG ts):xss)
  | null xss = Heap.singleton . encodeToInt . words $ ts
  | otherwise = Heap.fromList . map (encodeToInt . words . fromJust . fromReadyFCPLS) $ yss
toHeapR yss@(xs@(FSLG tsss):xss)
  | null xss = Heap.singleton . encodeToInt $ tsss
  | otherwise = Heap.fromList . map (encodeToInt . fromJust . fromReadyFCPLF) $ yss  
toHeapR _ = Heap.empty
{-# INLINE toHeapR #-}

fromHeap :: String -> Heap Int -> [String]
fromHeap ys heap
 | Heap.null heap = []
 | otherwise = map (flip decodeToStr ys . int2l) . Heap.toUnsortedList $ heap
{-# INLINE fromHeap #-}

fromHeapReadyFCPL :: ReadyForConstructionPL -> Heap Int -> [ReadyForConstructionPL]
fromHeapReadyFCPL ys heap
 | Heap.null heap = []
 | otherwise = map (flip decodeToReadyFCPL ys . int2l) . Heap.toUnsortedList $ heap
{-# INLINE fromHeapReadyFCPL #-}

intersectInterResults :: [String] -> [String] -> [String]
intersectInterResults zss
 | null zss = const []
 | otherwise = fromHeap (head zss) . Heap.intersect (toHeap zss) . toHeap
{-# INLINE intersectInterResults #-}

-- | It seems like it works, but it isn't. (??). The main reason is the hardness with defining convF to work properly.
intersectInterReadyFCPL :: (String -> [[[Int8]]]) -> [ReadyForConstructionPL] -> [ReadyForConstructionPL] -> [ReadyForConstructionPL]
intersectInterReadyFCPL convF zss tss
 | null zss = []
 | null tss = []
 | (isStr . head $ tss) && (isStr . head $ zss) = fromHeapReadyFCPL (head zss) . Heap.intersect (toHeapR zss) . toHeapR $ tss
 | (isFSL . head $ tss) && (isFSL . head $ zss) = fromHeapReadyFCPL (head zss) . Heap.intersect (toHeapR zss) . toHeapR $ tss
 | isStr . head $ tss = fromHeapReadyFCPL (head zss) . Heap.intersect (toHeapR zss) . toHeapR . map (FSLG . convF . fromJust . fromReadyFCPLS) $ tss
 | otherwise = fromHeapReadyFCPL (head tss) . Heap.intersect (toHeapR . map (FSLG . convF . fromJust . fromReadyFCPLS) $ zss) . toHeapR $ tss
{-# INLINE intersectInterReadyFCPL #-}

intersectInterReadyFCPL2 
  :: GWritingSystemPRPLX -- ^ Data used to obtain the phonetic language representation of the text.
  -> [(Char,Char)] -- ^ The pairs of the 'Char' that corresponds to the similar phonetic languages consonant phenomenon
  -- (e. g. allophones). Must be sorted in the ascending order to be used correctly.
  -> CharPhoneticClassification -- ^ The 'Array' 'Int' 'PRS' must be sorted in the ascending order to be used in the module correctly.
  -> SegmentRulesG
  -> String -- ^ Corresponds to the \'0\' symbol delimiter in the @ukrainian-phonetics-basic-array@ package.
  -> String -- ^ Corresponds to the \'1\' and \'-\' symbol delimiters in the @ukrainian-phonetics-basic-array@ package.
  -> String 
  -> [ReadyForConstructionPL] 
  -> [ReadyForConstructionPL] 
  -> [ReadyForConstructionPL]
intersectInterReadyFCPL2 wrs ks arr hs us vs ts = intersectInterReadyFCPL (convFI wrs ks arr hs us vs ts)
{-# INLINE intersectInterReadyFCPL2 #-}

-- | Auxiliary printing function to define the line ending in some cases. Is taken from the
-- Languages.UniquenessPeriods.Vector.General.DebugG module from the @phonetic-languages-general@ package
newLineEnding :: String
newLineEnding
  | nativeNewline == LF = "\n"
  | otherwise = "\r\n"

  
foldlI 
  :: GWritingSystemPRPLX -- ^ Data used to obtain the phonetic language representation of the text.
  -> [(Char,Char)] -- ^ The pairs of the 'Char' that corresponds to the similar phonetic languages consonant phenomenon
  -- (e. g. allophones). Must be sorted in the ascending order to be used correctly.
  -> CharPhoneticClassification -- ^ The 'Array' 'Int' 'PRS' must be sorted in the ascending order to be used in the module correctly.
  -> SegmentRulesG
  -> String -- ^ Corresponds to the \'0\' symbol delimiter in the @ukrainian-phonetics-basic-array@ package.
  -> String -- ^ Corresponds to the \'1\' and \'-\' symbol delimiters in the @ukrainian-phonetics-basic-array@ package.
  -> String 
  -> [[ReadyForConstructionPL]] 
  -> [ReadyForConstructionPL]
foldlI wrs ks arr hs us vs ts jss@((!xs):ys:xss) 
 | null pss = foldlI' wrs ks arr hs us vs ts qss
 | null qss = foldlI' wrs ks arr hs us vs ts pss
 | otherwise = intersectInterReadyFCPL2 wrs ks arr hs us vs ts (foldlI' wrs ks arr hs us vs ts pss) (foldlI' wrs ks arr hs us vs ts qss) 
  where (pss,qss) = partition (< [FSLG []]) jss
        foldlI' wrs ks arr hs us vs ts rss@(rs:ps:yss) = foldlI' wrs ks arr hs us vs ts (intersectInterReadyFCPL2 wrs ks arr hs us vs ts rs ps : yss)
        foldlI' wrs ks arr hs us vs ts ((!xs):_) = xs
        foldlI' _ _ _ _ _ _ _ _ = []
foldlI _ _ _ _ _ _ _ ((!xs):_) = xs
foldlI _ _ _ _ _ _ _ _ = []