-- |
-- Module      :  Melodics.Ukrainian
-- Copyright   :  (c) OleksandrZhabenko 2019-2020
-- License     :  MIT
-- Maintainer  :  olexandr543@yahoo.com
--
-- Functions used in the main function to provide functionality of a musical instrument synthesizer or for Ukrainian speech synthesis 
-- especially for poets, translators and writers. 
--

module Melodics.Ukrainian (
  appendS16LEFile,
  convertToProperUkrainian,
  takeData
) where

import Data.Char
import qualified Data.Vector as V
import qualified Data.ByteString.Lazy as B
import System.IO
import CaseBi (getBFst')
import Data.List.InnToOut.Basic (mapI, mapI2)
import Paths_mmsyn6ukr

{-
-- Inspired by: https://mail.haskell.org/pipermail/beginners/2011-October/008649.html
-}

data Triple = Z | O | T
  deriving (Eq,Ord,Show)

-- | Function to take raw sound data from the \".wav\" file given.
takeData :: FilePath -> IO B.ByteString
takeData file = do
  data1 <- B.readFile file
  let dataN = B.drop 44 data1 in return dataN

-- | The function that actually produces a .raw file. The mapping table is given in the @Map.txt@ file.
appendS16LEFile ::  V.Vector String -> Handle -> IO ()
appendS16LEFile xs hdl | not (V.null xs) =
  do
    dataFileList <- mapM getDataFileName
      ["-.wav", "0.wav", "1.wav", "A.wav", "B.wav", "C.wav", "D.wav", "E.wav", "F.wav", "G.wav", "H.wav",
        "I.wav", "J.wav", "K.wav", "L.wav", "M.wav", "N.wav", "O.wav", "P.wav", "Q.wav", "R.wav",
          "S.wav", "T.wav", "U.wav", "V.wav", "W.wav", "X.wav", "Y.wav", "Z.wav", "a.wav", "b.wav", "c.wav",
            "d.wav", "e.wav", "f.wav"]
    dataList <- V.mapM takeData . V.fromList $! dataFileList
    V.mapM_ (\u ->
      if V.all (\z -> B.length z > 0) dataList
        then let rs =  tail . dropWhile (/= ' ') . takeWhile (/= '}') . show $ hdl in do
          hClose hdl
          closedHdl <- hIsClosed hdl
          if closedHdl
            then do
                   B.appendFile rs $ dataList V.! (getBFst' (0, V.fromList [("-", 0), ("0", 1), ("1", 2), ("а", 3), ("б", 4),
                     ("в", 5), ("г", 6), ("д", 7), ("дж", 8), ("дз", 9), ("е", 10), ("ж", 11), ("з", 12), ("и", 13),
                        ("й", 14), ("к", 15), ("л", 16), ("м", 17), ("н", 18), ("о", 19), ("п", 20), ("р", 21),
                          ("с", 22), ("сь", 23), ("т", 24), ("у", 25), ("ф", 26), ("х", 27), ("ц", 28), ("ць", 29), ("ч", 30),
                            ("ш", 31), ("ь", 32), ("і", 33), ("ґ", 34)]) u)
            else error "File is not closed!"
        else error "Data sound file is not read!") xs
    hClose hdl
                       | otherwise = return ()

-- | The function that converts a written Ukrainian text into the sounding in the program phonetical respesentation. 
-- It is not exact phonetically but you can make for yourself a general impression of the Ukrainian sounding.
convertToProperUkrainian :: String -> V.Vector String
convertToProperUkrainian ys = toVector . correctA . applyChanges . createTuplesByAnalysis . changeJotted . secondConv . wasFstConverted . filterUkr $ ys

correctB :: [String] -> [String]
correctB ys@(xs:xss) | compare (length . filter (== "1") . takeFromFT2 6 $ ys) 1 == GT =
  map (\t -> if t == "1" || isPunctuation (head t) then "-" else t) (takeFromFT2 6 ys) ++ correctB (dropFromFT2 6 ys)
                     | otherwise = (if isPunctuation . head $ xs then "-" else xs):correctB xss
correctB [] = []

filterUkr :: String -> String
filterUkr xs = concatMap (\x -> if isUkrainian x then [toLower x] else if isSpace x || isControl x || isPunctuation x then [x] else []) xs

secondConv :: String -> String
secondConv (y:ys) | if isSpace y then True else isControl y = '1':secondConv ys
                  | otherwise = y:secondConv ys
secondConv _ = []

createTuplesByAnalysis :: String -> [(String, Triple)]
createTuplesByAnalysis x@(y:ys) | getBFst' (False, V.fromList $ zip "\1075\1076\1078\1079\1082\1085\1087\1089\1090\1092\1093\1094\1095\1096" (repeat True)) y = initialA x
                                | not (null ys) && head ys == '\1081' && isConsNotJ y = case y of
  '\1089' -> ("\1089\1100", T):createTuplesByAnalysis (tail ys)
  '\1094' -> ("\1094\1100", T):createTuplesByAnalysis (tail ys)
  _       -> ([y], T):("\1100", Z):createTuplesByAnalysis (tail ys)
                                | otherwise = ([y], Z):createTuplesByAnalysis ys
createTuplesByAnalysis _ = []

canChange :: Char -> Triple
canChange x | isSpace x || isControl x || x == '-' = O
            | getBFst' (False, V.fromList $ zip "\1075\1076\1078\1079\1082\1085\1087\1089\1090\1092\1093\1094\1095\1096" (repeat True)) x = T
            | otherwise = Z

isVoicedObstruent :: String -> Bool
isVoicedObstruent xs | not (null xs) = getBFst' (False, V.fromList $ zip ["\1073","\1075","\1076","\1076\1078","\1076\1079","\1078","\1079","\1169"] (repeat True)) xs
                     | otherwise = False

initialA :: String -> [(String, Triple)]
initialA t1@(t:ts) | canChange t == O = ("1", Z):initialA ts
                   | canChange t == T =
 if getBFst' (False, V.fromList $ zip "\1076\1085\1089\1090\1093\1094" (repeat True)) t
   then let (us,vs) = splitAt 2 t1 in
    if getBFst' (False, V.fromList $ zip ["\1076\1078","\1076\1079","\1085\1090","\1089\1090","\1089\1100","\1090\1089","\1090\1100","\1093\1075","\1094\1100"] (repeat True)) us
      then (us, T):initialA (vs)
      else ([t], T):initialA ts
   else case (getBFst' (False, V.fromList $ zip "\1075\1078\1079\1082\1087\1092\1095\1096" (repeat True)) t) of
          ~True -> ([t], T):initialA ts
                   | otherwise = ([t], Z):initialA ts
initialA _ = []

wasFstConverted :: String -> String
wasFstConverted = mapI mustBeConverted convertionFst

mustBeConverted :: Char -> Bool
mustBeConverted c = getBFst' (False, V.fromList $ zip "'-\700\1097\1102\1103\1108\1111\8217" (repeat True)) c

convertionFst :: Char -> String
convertionFst u = getBFst' ([u], V.fromList $ zip "'-\700\1097\1102\1103\1108\1111\8217" ["0","0","0","\1096\1095","\1081\1091","\1081\1072","\1081\1077","\1081\1110","0"]) u

isConsNotJ :: Char -> Bool
isConsNotJ = getBFst' (False, V.fromList $ zip "\1073\1074\1075\1076\1078\1079\1082\1083\1084\1085\1087\1088\1089\1090\1092\1093\1094\1095\1096\1169" (repeat True))

changeJotted :: String -> String
changeJotted (x:y:z:zs) | (getBFst' (False, V.fromList $ zip "\1072\1077\1080\1091\1110" (repeat True)) z) && (y == '\1081') && (isConsNotJ x) = x:'\1100':z:changeJotted zs
                        | isConsNotJ x && y == '\1110' = x:'\1100':y:changeJotted (z:zs)
                        | otherwise = x:changeJotted (y:z:zs)
changeJotted xs = xs

applyChanges :: [(String, Triple)] -> [(String, Triple)]
applyChanges (z:t:zs) | snd z == T =
  getBFst' ((fst z, Z), V.fromList . zip ["\1075","\1076","\1076\1079","\1078","\1079","\1082","\1085\1090",
    "\1087","\1089","\1089\1090","\1089\1100","\1090","\1090\1089","\1090\1100","\1092","\1093","\1093\1075","\1094","\1094\1100","\1095","\1096"]
      $ [гT (t:zs), дT (t:zs), дзT (t:zs), жT (t:zs), зT (t:zs), кT (t:zs),
        нтT (t:zs), пT (t:zs), сT (t:zs), стT (t:zs), сьT (t:zs), тT (t:zs), тсT (t:zs), тьT (t:zs),
          фT (t:zs), хT (t:zs), хгT (t:zs), цT (t:zs), цьT (t:zs), чT (t:zs), шT (t:zs)]) (fst z):applyChanges (t:zs)
                      | otherwise = z:applyChanges (t:zs)
applyChanges [(xs, _)] = [(xs, Z)]
applyChanges _ = []

-- in the further ??T functions the last (, T) means that it must be afterwards be separated with the soft sign into two tuples (1 additional function in the composition)
-- need further processing means that there should be additional checks and may be transformations. May be they can be omitted

isSoftDOrL :: [(String, Triple)] -> Bool
isSoftDOrL xs = getBFst' (False, V.fromList . zip ["\1073\1100","\1074\1100","\1076\1100","\1079\1100","\1083\1100",
  "\1084\1100","\1085\1100","\1087\1100","\1089\1100","\1090\1100","\1092\1100","\1094\1100"] $ (repeat True)) (takeFromFT_ 2 xs)

isSoftDen :: [(String, Triple)] -> Bool
isSoftDen xs = getBFst' (False, V.fromList . zip ["\1076\1100","\1079\1100","\1083\1100","\1085\1100","\1089\1100",
  "\1090\1100","\1094\1100"] $ (repeat True)) (takeFromFT_ 2 xs) || takeFromFT_ 3 xs == "\1076\1079\1100"

гT :: [(String, Triple)] -> (String, Triple)
гT (t:_) | head (fst t) == '\1082' || head (fst t) == '\1090' = ("\1093", Z)
         | otherwise = ("\1075", Z)
гT _ = ("г", Z)

дT :: [(String, Triple)] -> (String, Triple)
дT t1@(_:_) | takeFromFT_ 1 t1 `elem` ["\1078","\1095","\1096"] = ("\1076\1078", Z) -- need further processing д дж
            | takeFromFT_ 2 t1 `elem` ["\1089\1100","\1094\1100"] = ("\1076\1079\1100", T) -- need further processing д дзь
            | takeFromFT_ 1 t1 `elem` ["\1079","\1089","\1094"] = ("\1076\1079", Z) -- need further processing  д дз             
            | otherwise = ("\1076", Z)
дT _ = ("д", Z)

дзT :: [(String, Triple)] -> (String, Triple)
дзT t1@(_:_) | isSoftDOrL t1 = ("\1076\1079\1100", T)
             | otherwise = ("\1076\1079", Z)
дзT _ = ("дз", Z)

жT :: [(String, Triple)] -> (String, Triple)
жT t1@(_:_) | takeFromFT 2 t1 `elem` ["\1089\1100","\1094\1100"] = ("\1079\1100", T)
            | otherwise = ("\1078", Z)
жT _ = ("ж", Z)

зT :: [(String, Triple)] -> (String, Triple)
зT t1@(_:_) | takeFromFT_ 1 t1 `elem` ["\1078","\1095","\1096"] || takeFromFT_ 2 t1 == "\1076\1078" = ("\1078", Z)
            | isSoftDOrL t1 = ("\1079\1100", T)
            | takeFromFT 1 t1 `elem` ["\1095","\1096"] = ("\1096", Z) -- need further processing з ш
            | takeFromFT 1 t1  `elem` ["\1089","\1094"] || takeFromFT_ 1 t1 `elem` ["\1082","\1087","\1090","\1092","\1093"] = ("\1089", Z) -- need further processing з с
            | otherwise = ("\1079", Z)
зT _ = ("з", Z)

кT :: [(String, Triple)] -> (String, Triple)
кT t1@(_:_) | isVoicedObstruent (takeFromFT_ 1 t1) || isVoicedObstruent (takeFromFT_ 2 t1) = ("\1169", Z)
            | otherwise = ("\1082", Z)
кT _ = ("к", Z)

нтT :: [(String, Triple)] -> (String, Triple)
нтT t1@(_:_) | takeFromFT 2 t1 == "\1089\1090" = ("\1085", Z)
             | takeFromFT 3 t1 == "\1089\1100\1082" = ("\1085\1100", T)
             | otherwise = ("\1085\1090", Z)
нтT _ = ("нт", T)

пT :: [(String, Triple)] -> (String, Triple)
пT t1@(_:_) | isVoicedObstruent (takeFromFT_ 1 t1) || isVoicedObstruent (takeFromFT_ 2 t1) = ("\1073", Z)
            | otherwise = ("\1087", Z)
пT _ = ("п", Z)

сT :: [(String, Triple)] -> (String, Triple)
сT t1@(_:_) | (isVoicedObstruent (takeFromFT_ 1 t1) && drop 1 (takeFromFT_ 2 t1) == "\1100") ||
   (isVoicedObstruent (takeFromFT_ 2 t1) && drop 2 (takeFromFT_ 3 t1) == "\1100") = ("\1079\1100", T)
            | isVoicedObstruent (takeFromFT_ 1 t1) || isVoicedObstruent (takeFromFT_ 2 t1) = ("\1073", Z)
            | isSoftDOrL t1 = ("\1089\1100", Z)
            | takeFromFT_ 1 t1 == "\1096" = ("\1096", Z)
            | otherwise = ("\1089", Z)
сT _ = ("с", Z)

стT :: [(String, Triple)] -> (String, Triple)
стT t1@(_:_) | isVoicedObstruent (takeFromFT_ 1 t1) || isVoicedObstruent (takeFromFT_ 2 t1) = ("\1079", Z)
             | takeFromFT_ 3 t1 == "\1089\1100\1082" || takeFromFT_ 2 t1 == "\1094\1100" = ("\1089\1100", Z)
             | takeFromFT_ 1 t1 `elem` ["\1089","\1085"] = ("\1089", Z)
             | takeFromFT_ 1 t1 == "\1095" = ("\1096", Z)
             | otherwise = ("\1089\1090", T)
стT _ = ("ст", T)

сьT :: [(String, Triple)] -> (String, Triple)
сьT t1@(_:_) | isVoicedObstruent (takeFromFT_ 2 t1) || isVoicedObstruent (takeFromFT_ 1 t1) = ("\1079\1100", T)
             | otherwise = ("\1089\1100", Z)
сьT _ = ("сь", Z)

тT :: [(String, Triple)] -> (String, Triple)
тT t1@(_:_) | (isVoicedObstruent (takeFromFT_ 1 t1) && drop 1 (takeFromFT_ 2 t1) == "\1100") ||
   (isVoicedObstruent (takeFromFT_ 2 t1) && drop 2 (takeFromFT_ 3 t1) == "\1100") = ("\1076\1100", T)
            | isVoicedObstruent (takeFromFT_ 2 t1) || isVoicedObstruent (takeFromFT_ 1 t1) = ("\1076", Z)
            | takeFromFT_ 2 t1 == "\1094\1100" = ("\1094\1100", Z)
            | takeFromFT_ 1 t1 == "\1094" = ("\1094", Z)
            | isSoftDen t1 = ("\1090\1100", T)
            | takeFromFT_ 1 t1 `elem` ["\1095","\1096"] = ("\1095", Z)
            | otherwise = ("\1090", Z)
тT _ = ("т", Z)

тсT :: [(String, Triple)] -> (String, Triple)
тсT _ = ("\1094", Z)

тьT :: [(String, Triple)] -> (String, Triple)
тьT t1@(_:_) | isVoicedObstruent (takeFromFT_ 2 t1) || isVoicedObstruent (takeFromFT_ 1 t1) = ("\1076\1100", T)
             | takeFromFT_ 3 t1 == "\1089\1100\1072" = ("\1094\1100", Z)
             | otherwise = ("\1090\1100", T)
тьT _ = ("ть", T)

фT :: [(String, Triple)] -> (String, Triple)
фT t1@(_:_) | isVoicedObstruent (takeFromFT_ 2 t1) || isVoicedObstruent (takeFromFT_ 1 t1) = ("\1074", Z)
            | otherwise = ("\1092", Z)
фT _ = ("ф", Z)

хT :: [(String, Triple)] -> (String, Triple)
хT t1@(_:_) | isVoicedObstruent (takeFromFT_ 2 t1) || isVoicedObstruent (takeFromFT_ 1 t1) = ("\1075", Z)
            | otherwise = ("\1093", Z)
хT _ = ("х", Z)

хгT :: [(String, Triple)] -> (String, Triple)
хгT _ = ("\1075", Z)

цT :: [(String, Triple)] -> (String, Triple)
цT t1@(_:_) | (isVoicedObstruent (takeFromFT_ 1 t1) && drop 1 (takeFromFT_ 2 t1) == "\1100") ||
   (isVoicedObstruent (takeFromFT_ 2 t1) && drop 2 (takeFromFT_ 3 t1) == "\1100") = ("\1076\1079\1100", T)
            | isSoftDOrL t1 = ("\1094\1100", Z)
            | isVoicedObstruent (takeFromFT_ 2 t1) || isVoicedObstruent (takeFromFT_ 1 t1) = ("\1076\1079", Z)
            | otherwise = ("\1094", Z)
цT _ = ("ц", Z)

цьT :: [(String, Triple)] -> (String, Triple)
цьT t1@(_:_) | (isVoicedObstruent (takeFromFT_ 1 t1) && drop 1 (takeFromFT_ 2 t1) == "\1100") ||
   (isVoicedObstruent (takeFromFT_ 2 t1) && drop 2 (takeFromFT_ 3 t1) == "\1100") = ("\1076\1079\1100", T)
             | otherwise = ("\1094\1100", Z)
цьT _ = ("ць", Z)

чT :: [(String, Triple)] -> (String, Triple)
чT t1@(_:_) | takeFromFT_ 2 t1 `elem` ["\1089\1100","\1094\110"] = ("\1094\1100", Z)
            | isVoicedObstruent (takeFromFT_ 2 t1) || isVoicedObstruent (takeFromFT_ 1 t1) = ("\1076\1078", Z)
            | otherwise = ("\1095", Z)
чT _ = ("ч", Z)

шT :: [(String, Triple)] -> (String, Triple)
шT t1@(_:_) | takeFromFT_ 2 t1 `elem` ["\1089\1100","\1094\110"] = ("\1089\1100", Z)
            | isVoicedObstruent (takeFromFT_ 2 t1) || isVoicedObstruent (takeFromFT_ 1 t1) = ("\1078", Z)
            | otherwise = ("\1096", Z)
шT _ = ("ш", Z)

correctA :: [(String, Triple)] -> [(String, Triple)]
correctA = correctSomeW . separateSoftS

separateSoftS :: [(String, Triple)] -> [(String, Triple)]
separateSoftS xss = mapI (\x -> snd x == T) divideToParts xss

divideToParts :: (String, Triple) -> [(String, Triple)]
divideToParts (xs, _) = [(init xs, Z),([last xs], Z)]

correctSomeW :: [(String, Triple)] -> [(String, Triple)]
correctSomeW (x:y:z:xs) | fst x == "\1094\1100" && fst y == "\1089\1100" && fst z == "\1072" = x:("\1094\1100", Z):z:correctSomeW xs
                        | (fst x == "1" || fst x == "0") && fst y == "\1081" && fst z == "\1072" =
  if takeFromFT 2 xs == "\1095\1085"
    then x:y:z:("\1096", Z):correctSomeW (tail xs)
    else x:correctSomeW (y:z:xs)
                        | otherwise = x:correctSomeW (y:z:xs)
correctSomeW zs = zs

takeFromFT :: Int -> [(String, Triple)] -> String
takeFromFT n ts | if compare 0 n /= LT then True else null ts = []
                | compare k n /= LT = take n ks
                | otherwise = ks ++ takeFromFT (n - k) (tail ts)
    where ks = fst (head ts)
          k = length ks

takeFromFT2 :: Int -> [String] -> [String]
takeFromFT2 n ts | if compare 0 n /= LT then True else null ts = []
                 | compare k n /= LT = [ks]
                 | otherwise = ks:takeFromFT2 (n - k) (tail ts)
    where ks = head ts
          k = length ks

dropFromFT2 :: Int -> [String] -> [String]
dropFromFT2 n ts | if compare 0 n /= LT then True else null ts = []
                 | compare k n /= LT = tail ts
                 | otherwise = dropFromFT2 (n - k) (tail ts)
    where k = length (head ts)

takeFromFT_ :: Int -> [(String, Triple)] -> String
takeFromFT_ n ts = takeFromFT n (filter (\(xs, _) -> (xs /= "1" && xs /= "0")) ts)

toVector :: [(String, Triple)] -> V.Vector String
toVector ts = V.fromList . correctB . mapI2 (\x -> not . null . fst $ x) fst (\_ -> []) $ ts

isUkrainian :: Char -> Bool
isUkrainian y | (y >= '\1040' && y <= '\1065') || (y >= '\1070' && y <= '\1097') = True
              | otherwise = getBFst' (False, V.fromList . map (\x -> (x, True)) $ "'-\700\1028\1030\1031\1068\1100\1102\1103\1108\1110\1111\1168\1169\8217") y