{-# LANGUAGE DeriveDataTypeable #-}
module MMSyn7.Syllable (
UZP
, syllablesUkr
, takeWordS
, sylLengths
, createSyllables
, divideConsonants
, groupConsonants
, sndGroups
, takeWithV
, vecToUZPs
, vecWords
, uzpsToList
, isNotVowel2
, isSonorous1
, isVoicedC1
, isVoicelessC1
, isVowel1
, isVwl
, notEqC
) where
import Data.Typeable
import qualified Data.Vector as V
import qualified Data.List as L (groupBy,span)
import Melodics.Ukrainian (convertToProperUkrainian)
import CaseBi (getBFst')
import Data.List.InnToOut.Basic (mapI)
data UZP = Vowel String | Sonorous String | SonorousP String | Voiced String | VoicedP String | Voiceless String | VoicelessP String | VoicelessP2 String
deriving ( Eq, Typeable )
instance Ord UZP
where compare x y = compare (show x) (show y)
instance Show UZP
where show (SonorousP xs) = xs ++ "ь"
show (VoicedP xs) = xs ++ "ь"
show (VoicelessP xs) = xs ++ "ь"
show (Vowel xs) = xs
show (VoicelessP2 xs) = xs
show (Sonorous xs) = xs
show (Voiced xs) = xs
show (Voiceless xs) = xs
vecToUZPs :: V.Vector String -> [UZP]
vecToUZPs v
| V.null v = []
| getBFst' (False, (V.fromList . zip ["а","е","и","о","у","і"] $ (replicate 6 True))) . V.unsafeHead $ v = (Vowel (V.unsafeHead v)):vecToUZPs (V.unsafeTail v)
| V.unsafeHead v == "сь" || V.unsafeHead v == "ць" = (VoicelessP2 (V.unsafeHead v)):(vecToUZPs (V.unsafeTail v))
| ((V.null . V.unsafeTail $ v) || (V.unsafeIndex v 1 /= "ь")) && getBFst' (False, V.fromList . zip ["в","й","л","м","н","р"] $ (replicate 6 True)) (V.unsafeHead v) = (Sonorous (V.unsafeHead v)):(if V.null . V.unsafeTail $ v then [] else vecToUZPs (V.unsafeTail v))
| ((V.null . V.unsafeTail $ v) || (V.unsafeIndex v 1 /= "ь")) && getBFst' (False, V.fromList . zip ["б","г","д","дж","дз","ж","з","ґ"] $ (replicate 8 True)) (V.unsafeHead v) = (Voiced (V.unsafeHead v)):(if V.null . V.unsafeTail $ v then [] else vecToUZPs (V.unsafeTail v))
| ((V.null . V.unsafeTail $ v) || (V.unsafeIndex v 1 /= "ь")) = (Voiceless (V.unsafeHead v)):(if V.null . V.unsafeTail $ v then [] else vecToUZPs (V.unsafeTail v))
| getBFst' (False, V.fromList . zip ["в","л","м","н","р"] $ (replicate 5 True)) (V.unsafeHead v) = (SonorousP (V.unsafeHead v)):vecToUZPs (V.unsafeDrop 2 v)
| getBFst' (False, V.fromList . zip ["б","г","д","дж","дз","ж","з","ґ"] $ (replicate 8 True)) (V.unsafeHead v) = (VoicedP (V.unsafeHead v)):vecToUZPs (V.unsafeDrop 2 v)
| otherwise = (VoicelessP (V.unsafeHead v)):vecToUZPs (V.unsafeDrop 2 v)
sndGroups :: [UZP] -> [[UZP]]
sndGroups ys@(_:_) = L.groupBy isNotVowel2 ys
sndGroups _ = []
isVowel1 :: UZP -> Bool
isVowel1 (Vowel _) = True
isVowel1 _ = False
isVwl :: Char -> Bool
isVwl = getBFst' (False, (V.fromList . zip "аеиоуі" $ (replicate 6 True)))
isSonorous1 :: UZP -> Bool
isSonorous1 (Sonorous _) = True
isSonorous1 (SonorousP _) = True
isSonorous1 _ = False
isVoicedC1 :: UZP -> Bool
isVoicedC1 (Voiced _) = True
isVoicedC1 (VoicedP _) = True
isVoicedC1 _ = False
isVoicelessC1 :: UZP -> Bool
isVoicelessC1 (Voiceless _) = True
isVoicelessC1 (VoicelessP _) = True
isVoicelessC1 _ = False
isNotVowel2 :: UZP -> UZP -> Bool
isNotVowel2 x y | isVowel1 x || isVowel1 y = False
| otherwise = True
notEqC :: UZP -> UZP -> Bool
notEqC x y =
case x of
(Vowel _) -> True
_ ->
case y of
(Vowel _) -> True
_ -> compare x y /= EQ
vecWords :: V.Vector String -> [V.Vector String]
vecWords v | V.null v = []
| V.unsafeHead v == "-" || V.unsafeHead v == "0" || V.unsafeHead v == "1" = vecWords (V.unsafeTail v)
| otherwise =
let (v1, v2) = V.break (\x -> x == "-" || x == "0" || x == "1") v
v3 = snd . V.span (\x -> x == "-" || x == "0" || x == "1") $ v2 in v1:vecWords v3
divideConsonants :: [UZP] -> [[UZP]]
divideConsonants xs = case length xs of
1 -> [[],xs]
2 -> if ((isSonorous1 . head $ xs) && (head xs `notEqC` last xs)) || ((isVoicedC1 . head $ xs) && (isVoicelessC1 . head . tail $ xs)) then [[head xs], tail xs] else [[],xs]
3 -> if isSonorous1 . head $ xs then [[head xs], tail xs]
else if isSonorous1 . head . tail $ xs then [[head xs, head . tail $ xs], [last xs]]
else [[],xs]
_ -> if (isSonorous1 . head $ xs) || (isVoicedC1 . head $ xs) then [[head xs], tail xs] else [[],xs]
groupConsonants :: [[UZP]] -> [[UZP]]
groupConsonants = mapI (not . isVowel1 . head) divideConsonants
uzpsToList :: [[UZP]] -> [String]
uzpsToList xss = map (concatMap show) xss
createSyllables :: [[UZP]] -> [String]
createSyllables xss =
let (tss, vss) = L.span (any isVwl) . takeWithV $ xss in
if null tss
then [concat . takeWithV $ xss]
else concat [init tss, [last tss ++ concat vss]]
syllablesUkr :: String -> [[String]]
syllablesUkr xs = map ( createSyllables . groupConsonants . sndGroups . vecToUZPs) . vecWords . convertToProperUkrainian $ xs
takeWithV :: [[UZP]] -> [String]
takeWithV ((x@(t:ts)):ys:xss)
| (isVowel1 t) && (null ys) = show t:takeWithV xss
| (isVowel1 t) && (isVowel1 . head $ ys) = show t:takeWithV (ys:xss)
| (isVowel1 t) = (show t ++ (show . head $ ys)):takeWithV xss
| otherwise = (concatMap show x ++ (head . takeWithV $ (ys:xss))):(tail . takeWithV $ (ys:xss))
takeWithV (_:ys:xss) = takeWithV (ys:xss)
takeWithV (x:xs) = map show x
takeWithV _ = []
takeWordS :: Int -> String -> [[String]]
takeWordS n xs = take n . syllablesUkr $ xs
sylLengths :: [[String]] -> [[Int]]
sylLengths = fmap (fmap (length . filter (/= 'ь')))