-- | -- Module : Ukrainian.ReverseConcatenations -- Copyright : (c) OleksandrZhabenko 2020-2021 -- License : MIT -- Stability : Experimental -- Maintainer : olexandr543@yahoo.com -- -- Contains several functions that helps to reverse many of the phonetic languages approach concatenations -- for the Ukrainian language. {-# OPTIONS_GHC -threaded -rtsopts #-} {-# LANGUAGE CPP, BangPatterns #-} module Ukrainian.ReverseConcatenations where #ifdef __GLASGOW_HASKELL__ #if __GLASGOW_HASKELL__>=710 /* code that applies only to GHC 7.10.* and higher versions */ import GHC.Base (mconcat) #endif #endif import Data.List (sort,isPrefixOf) import CaseBi.Arr (getBFstL') import Data.List.InnToOut.Basic (mapI) #ifdef __GLASGOW_HASKELL__ #if __GLASGOW_HASKELL__==708 /* code that applies only to GHC 7.8.* */ mconcat = concat #endif #endif {-| Reverses many phonetic languages approach related concatenations for the Ukrainian text. Is intended to be used with the text on several lines. -} reverseConcatenations :: String -> String reverseConcatenations xs | null xs = [] | otherwise = unlines . map (unwords . mapI (const True) reverseConcat1 . words) . lines $ xs where yss = lines xs zsss = map words yss {-| Reverses many phonetic languages approach related concatenations for just one Ukrainian word. Is used internally in the 'reverseConcatenations'. -} reverseConcat1 :: String -> [String] reverseConcat1 xs | null xs = [] | otherwise = getBFstL' [xs] [howConcat1WordEntirely "\1072\1073\1086" us,("\1072\1076\1078", howConcat1Word 1 us "\1077" "\1072\1076\1078\1077" xs),howConcat1WordEntirely "\1072\1083\1077" us, ("\1072\1085\1110", howConcat1Word 1 us "\1078" "\1072\1085\1110\1078" xs), ("\1073\1086\1076",howConcat1Word 2 us "\1072\1081" "\1073\1086\1076\1072\1081" xs), ("\1073\1091\1094",howConcat1Word 4 us "\1110\1084\1090\1086" "\1073\1091\1094\1110\1084\1090\1086" xs), howConcat1WordEntirely "\1074\1078\1077" us, ("\1074\1080\1082", howConcat1Word 5 us "\1083\1102\1095\1085\1086" "\1074\1080\1082\1083\1102\1095\1085\1086" xs), ("\1074\1083\1072",howConcat1Word 3 us "\1089\1085\1077" "\1074\1083\1072\1089\1085\1077" xs), ("\1074\1084\1110",if take 8 us == "\1088\1091\1090\1086\1075\1086\1103\1082" then "\1074":"\1084\1110\1088\1091":"\1090\1086\1075\1086":"\1103\1082":[drop 11 xs] else [xs]), ("\1074\1090\1110",howConcat1Word 1 us "\1084" "\1074\1090\1110\1084" xs), ("\1076\1072\1074",howConcat1Word 2 us "\1072\1081" "\1076\1072\1074\1072\1081" xs), ("\1076\1072\1088",howConcat1Word 4 us "\1084\1072\1097\1086" "\1076\1072\1088\1084\1072\1097\1086" xs), ("\1076\1083\1103", if take 7 us == "\1090\1086\1075\1086\1097\1086\1073" then "\1076\1083\1103":"\1090\1086\1075\1086":"\1097\1086\1073":[drop 7 xs] else [xs]), ("\1079\1072\1083", if take 7 us == "\1077\1078\1085\1086\1074\1110\1076" then "\1079\1072\1083\1077\1078\1085\1086":"\1074\1110\1076":[drop 7 xs] else [xs]), ("\1079\1072\1084", if take 11 us == "\1110\1089\1090\1100\1090\1086\1075\1086\1097\1086\1073" then "\1079\1072\1084\1110\1089\1090\1100":"\1090\1086\1075\1086":"\1097\1086\1073":[drop 11 xs] else [xs]), ("\1079\1072\1090", howConcat1Word 1 us "\1077" "\1079\1072\1090\1077" xs), ("\1079\1090\1080", if take 4 us == "\1084\1097\1086\1073" then "\1079":"\1090\1080\1084":"\1097\1086\1073":[drop 4 xs] else [xs]), ("\1079\1090\1086", if take 8 us == "\1075\1086\1095\1072\1089\1091\1103\1082" then "\1079":"\1090\1086\1075\1086":"\1095\1072\1089\1091":"\1103\1082":[drop 8 xs] else [xs]), ("\1082\1086\1083", howConcat1Word 1 us "\1080" "\1082\1086\1083\1080" xs),("\1083\1077\1076", howConcat1Word 2 us "\1074\1077" "\1083\1077\1076\1074\1077" xs), ("\1083\1080\1096", howConcat1Word 1 us "\1077" "\1083\1080\1096\1077" xs), ("\1084\1072\1081", howConcat1Word 2 us "\1078\1077" "\1084\1072\1081\1078\1077" xs), ("\1084\1086\1074", if take 4 us == "\1073\1080\1090\1086" then "\1084\1086\1074\1073\1080\1090\1086":[drop 4 us] else if take 2 us == "\1073\1080" then "\1084\1086\1074\1073\1080":[drop 2 us] else snd (howConcat1WordEntirely "\1084\1086\1074" us)), ("\1085\1072\1074", howConcat1Word 3 us "\1110\1090\1100" "\1085\1072\1074\1110\1090\1100" xs), ("\1085\1072\1089", howConcat1Word 6 us "\1082\1110\1083\1100\1082\1080" "\1085\1072\1089\1082\1110\1083\1100\1082\1080" xs), ("\1085\1072\1095", if take 4 us == "\1077\1073\1090\1086" then "\1085\1072\1095\1077\1073\1090\1086":[drop 4 us] else if take 2 us == "\1077\1073" then "\1085\1072\1095\1077\1073":[drop 2 us] else if take 1 us == "\1077" then "\1085\1072\1095\1077":[drop 1 us] else [xs]),("\1085\1077\1074", howConcat1Word 2 us "\1078\1077" "\1085\1077\1074\1078\1077" xs), ("\1085\1077\1079", if take 9 us == "\1072\1083\1077\1078\1085\1086\1074\1110\1076" then "\1085\1077\1079\1072\1083\1077\1078\1085\1086\1074\1110\1076":[drop 9 us] else if take 13 us == "\1074\1072\1078\1072\1102\1095\1080\1085\1072\1090\1077\1097\1086" then "\1085\1077\1079\1074\1072\1078\1072\1102\1095\1080\1085\1072\1090\1077\1097\1086":[drop 13 us] else [xs]),("\1085\1077\1084", if take 6 us == "\1086\1074\1073\1080\1090\1086" then "\1085\1077\1084\1086\1074\1073\1080\1090\1086":[drop 6 us] else if take 4 us == "\1086\1074\1073\1080" then "\1085\1077\1084\1086\1074\1073\1080":[drop 4 us] else if take 2 us == "\1086\1074" then "\1085\1077\1084\1086\1074":[drop 2 us] else [xs]), ("\1085\1077\1085", if take 6 us == "\1072\1095\1077\1073\1090\1086" then "\1085\1077\1085\1072\1095\1077\1073\1090\1086":[drop 6 us] else if take 3 us == "\1072\1095\1077" then "\1085\1077\1085\1072\1095\1077":[drop 3 us] else [xs]), ("\1085\1077\1093", howConcat1Word 2 us "\1072\1081" "\1085\1077\1093\1072\1081" xs), ("\1085\1110\1073", if take 3 us == "\1080\1090\1086" then "\1085\1110\1073\1080\1090\1086":[drop 3 us] else if take 1 us == "\1080" then "\1085\1110\1073\1080":[drop 1 us] else [xs]),howConcat1WordEntirely "\1085\1110\1078" us, ("\1086\1090\1078", howConcat1Word 1 us "\1077" "\1086\1090\1078\1077" xs),("\1086\1090\1086", howConcat1Word 1 us "\1078" "\1086\1090\1086\1078" xs), ("\1087\1086\1087", if take 6 us == "\1088\1080\1090\1077\1097\1086" then "\1087\1086\1087\1088\1080":"\1090\1077":"\1097\1086":[drop 6 us] else [xs]), ("\1087\1088\1080", if take 4 us == "\1090\1086\1084\1091" then "\1087\1088\1080\1090\1086\1084\1091":[drop 4 us] else if take 3 us == "\1090\1110\1084" then "\1087\1088\1080\1090\1110\1084":[drop 3 us] else if take 5 us == "\1094\1100\1086\1084\1091" then "\1087\1088\1080\1094\1100\1086\1084\1091":[drop 5 us] else if take 4 us == "\1095\1086\1084\1091" then "\1087\1088\1080\1095\1086\1084\1091":[drop 4 us] else if take 3 us == "\1095\1110\1084" then "\1087\1088\1080\1095\1110\1084":[drop 3 us] else [xs]), ("\1087\1088\1086", howConcat1Word 2 us "\1090\1077" "\1087\1088\1086\1090\1077" xs), ("\1087\1110\1089", if take 8 us == "\1083\1103\1090\1086\1075\1086\1103\1082" then "\1087\1110\1089\1083\1103":"\1090\1086\1075\1086":"\1103\1082":[drop 8 us] else [xs]), ("\1089\1072\1084", howConcat1Word 1 us "\1077" "\1089\1072\1084\1077" xs), ("\1089\1077\1073", howConcat1Word 2 us "\1090\1086" "\1089\1077\1073\1090\1086" xs), ("\1090\1072\1082", if take 1 us == "\1080" then "\1090\1072\1082\1080":[drop 1 us] else if take 3 us == "\1081\1072\1082" then "\1090\1072\1082":"\1103\1082":[drop 3 us] else if take 2 us == "\1097\1086" then "\1090\1072\1082":"\1097\1086":[drop 2 us] else [xs]), ("\1090\1080\1084", if take 8 us == "\1095\1072\1089\1086\1084\1081\1072\1082" then "\1090\1080\1084":"\1095\1072\1089\1086\1084":"\1103\1082":[drop 8 us] else [xs]), ("\1090\1086\1073", howConcat1Word 2 us "\1090\1086" "\1090\1086\1073\1090\1086" xs), ("\1090\1086\1084", if take 3 us == "\1091\1097\1086" then "\1090\1086\1084\1091":"\1097\1086":[drop 3 us] else if take 3 us == "\1091\1103\1082" then "\1090\1086\1084\1091":"\1103\1082":[drop 3 us] else [xs]), ("\1090\1110\1083", howConcat1Word 3 us "\1100\1082\1080" "\1090\1110\1083\1100\1082\1080" xs), ("\1091\1079\1074", if take 6 us == "\1081\1072\1079\1082\1091\1079" then "\1091":"\1079\1074\x02BC\1103\1079\1082\1091":"\1079":[drop 6 us] else [xs]), ("\1091\1084\1110", if take 8 us == "\1088\1091\1090\1086\1075\1086\1103\1082" then "\1091":"\1084\1110\1088\1091":"\1090\1086\1075\1086":"\1103\1082":[drop 8 us] else [xs]), howConcat1WordEntirely "\1093\1072\1081" us,("\1093\1086\1095", if take 2 us == "\1073\1080" then "\1093\1086\1095":"\1073\1080":[drop 2 us] else if take 2 us == "\1072\1073" then "\1093\1086\1095\1072":"\1073":[drop 2 us] else "\1093\1086\1095":[us]), ("\1093\1110\1073", howConcat1Word 1 us "\1072" "\1093\1110\1073\1072" xs), ("\1094\1077\1073", howConcat1Word 2 us "\1090\1086" "\1094\1077\1073\1090\1086" xs), ("\1095\1077\1088", if take 6 us == "\1077\1079\1090\1077\1097\1086" then "\1095\1077\1088\1077\1079":"\1090\1077":"\1097\1086":[drop 6 us] else [xs]), howConcat1WordEntirely "\1097\1086\1073" us, ("\1103\1082\1073", howConcat1Word 1 us "\1080" "\1103\1082\1073\1080" xs), ("\1103\1082\1088", howConcat1Word 2 us "\1072\1079" "\1103\1082\1088\1072\1079" xs), ("\1103\1082\1097", howConcat1Word 1 us "\1086" "\1103\1082\1097\1086" xs)] ts where (ts,us) = splitAt 3 xs howConcat1Word :: Int -> String -> String -> String -> String -> [String] howConcat1Word n us us' us'' xs | take n us == us' = us'':[drop n us] | otherwise = [xs] {-# INLINE howConcat1Word #-} howConcat1WordEntirely :: String -> String -> (String, [String]) howConcat1WordEntirely ts us = (ts,ts:[us]) {-# INLINE howConcat1WordEntirely #-}