-- | Lexers and unlexers - they work on space-separated word strings
module GF.Text.Lexing (stringOp,opInEnv,bindTok) where

import GF.Text.Transliterations

import Data.Char (isSpace,toUpper,toLower)
import Data.List (intersperse)

stringOp :: (String -> Bool) -> String -> Maybe (String -> String)
stringOp :: (String -> Bool) -> String -> Maybe (String -> String)
stringOp String -> Bool
good String
name = case String
name of
  String
"chars"      -> (String -> String) -> Maybe (String -> String)
forall a. a -> Maybe a
Just ((String -> String) -> Maybe (String -> String))
-> (String -> String) -> Maybe (String -> String)
forall a b. (a -> b) -> a -> b
$ (String -> [String]) -> String -> String
appLexer ((String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (String -> Bool) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isSpace) ([String] -> [String])
-> (String -> [String]) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> String) -> String -> [String]
forall a b. (a -> b) -> [a] -> [b]
map Char -> String
forall (m :: * -> *) a. Monad m => a -> m a
return)
  String
"lextext"    -> (String -> String) -> Maybe (String -> String)
forall a. a -> Maybe a
Just ((String -> String) -> Maybe (String -> String))
-> (String -> String) -> Maybe (String -> String)
forall a b. (a -> b) -> a -> b
$ (String -> [String]) -> String -> String
appLexer ((String -> Bool) -> String -> [String]
lexText String -> Bool
good)
  String
"lexcode"    -> (String -> String) -> Maybe (String -> String)
forall a. a -> Maybe a
Just ((String -> String) -> Maybe (String -> String))
-> (String -> String) -> Maybe (String -> String)
forall a b. (a -> b) -> a -> b
$ (String -> [String]) -> String -> String
appLexer String -> [String]
lexCode
  String
"lexmixed"   -> (String -> String) -> Maybe (String -> String)
forall a. a -> Maybe a
Just ((String -> String) -> Maybe (String -> String))
-> (String -> String) -> Maybe (String -> String)
forall a b. (a -> b) -> a -> b
$ (String -> [String]) -> String -> String
appLexer ((String -> Bool) -> String -> [String]
lexMixed String -> Bool
good)
  String
"lexgreek"   -> (String -> String) -> Maybe (String -> String)
forall a. a -> Maybe a
Just ((String -> String) -> Maybe (String -> String))
-> (String -> String) -> Maybe (String -> String)
forall a b. (a -> b) -> a -> b
$ (String -> [String]) -> String -> String
appLexer String -> [String]
lexAGreek
  String
"lexgreek2"  -> (String -> String) -> Maybe (String -> String)
forall a. a -> Maybe a
Just ((String -> String) -> Maybe (String -> String))
-> (String -> String) -> Maybe (String -> String)
forall a b. (a -> b) -> a -> b
$ (String -> [String]) -> String -> String
appLexer String -> [String]
lexAGreek2
  String
"words"      -> (String -> String) -> Maybe (String -> String)
forall a. a -> Maybe a
Just ((String -> String) -> Maybe (String -> String))
-> (String -> String) -> Maybe (String -> String)
forall a b. (a -> b) -> a -> b
$ (String -> [String]) -> String -> String
appLexer String -> [String]
words
  String
"bind"       -> (String -> String) -> Maybe (String -> String)
forall a. a -> Maybe a
Just ((String -> String) -> Maybe (String -> String))
-> (String -> String) -> Maybe (String -> String)
forall a b. (a -> b) -> a -> b
$ ([String] -> String) -> String -> String
appUnlexer ([String] -> String
unwords ([String] -> String)
-> ([String] -> [String]) -> [String] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> [String]
bindTok)
  String
"unchars"    -> (String -> String) -> Maybe (String -> String)
forall a. a -> Maybe a
Just ((String -> String) -> Maybe (String -> String))
-> (String -> String) -> Maybe (String -> String)
forall a b. (a -> b) -> a -> b
$ ([String] -> String) -> String -> String
appUnlexer [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
  String
"unlextext"  -> (String -> String) -> Maybe (String -> String)
forall a. a -> Maybe a
Just ((String -> String) -> Maybe (String -> String))
-> (String -> String) -> Maybe (String -> String)
forall a b. (a -> b) -> a -> b
$ ([String] -> String) -> String -> String
appUnlexer ([String] -> String
unlexText ([String] -> String)
-> ([String] -> [String]) -> [String] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> [String]
unquote ([String] -> [String])
-> ([String] -> [String]) -> [String] -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> [String]
bindTok)
  String
"unlexcode"  -> (String -> String) -> Maybe (String -> String)
forall a. a -> Maybe a
Just ((String -> String) -> Maybe (String -> String))
-> (String -> String) -> Maybe (String -> String)
forall a b. (a -> b) -> a -> b
$ ([String] -> String) -> String -> String
appUnlexer [String] -> String
unlexCode
  String
"unlexmixed" -> (String -> String) -> Maybe (String -> String)
forall a. a -> Maybe a
Just ((String -> String) -> Maybe (String -> String))
-> (String -> String) -> Maybe (String -> String)
forall a b. (a -> b) -> a -> b
$ ([String] -> String) -> String -> String
appUnlexer ((String -> Bool) -> [String] -> String
unlexMixed String -> Bool
good ([String] -> String)
-> ([String] -> [String]) -> [String] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> [String]
unquote ([String] -> [String])
-> ([String] -> [String]) -> [String] -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> [String]
bindTok)
  String
"unlexgreek" -> (String -> String) -> Maybe (String -> String)
forall a. a -> Maybe a
Just ((String -> String) -> Maybe (String -> String))
-> (String -> String) -> Maybe (String -> String)
forall a b. (a -> b) -> a -> b
$ ([String] -> String) -> String -> String
appUnlexer [String] -> String
unlexAGreek
  String
"unlexnone"  -> (String -> String) -> Maybe (String -> String)
forall a. a -> Maybe a
Just String -> String
forall a. a -> a
id
  String
"unlexid"    -> (String -> String) -> Maybe (String -> String)
forall a. a -> Maybe a
Just String -> String
forall a. a -> a
id
  String
"unwords"    -> (String -> String) -> Maybe (String -> String)
forall a. a -> Maybe a
Just ((String -> String) -> Maybe (String -> String))
-> (String -> String) -> Maybe (String -> String)
forall a b. (a -> b) -> a -> b
$ ([String] -> String) -> String -> String
appUnlexer [String] -> String
unwords
  String
"to_html"    -> (String -> String) -> Maybe (String -> String)
forall a. a -> Maybe a
Just String -> String
wrapHTML
  String
_            -> String -> Maybe (String -> String)
transliterate String
name

-- perform op in environments beg--end, t.ex. between "--"
--- suboptimal implementation
opInEnv :: String -> String -> (String -> String) -> (String -> String)
opInEnv :: String -> String -> (String -> String) -> String -> String
opInEnv String
beg String
end String -> String
op = [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([String] -> String) -> (String -> [String]) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> [String] -> [String]
altern Bool
False ([String] -> [String])
-> (String -> [String]) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int, String) -> String -> String -> [String]
chop (Int
lbeg, String
beg) [] where
  chop :: (Int, String) -> String -> String -> [String]
chop mk :: (Int, String)
mk@(Int
lg, String
mark) String
s0 String
s = 
    let (String
tag,String
rest) = Int -> String -> (String, String)
forall a. Int -> [a] -> ([a], [a])
splitAt Int
lg String
s in
    if String
tagString -> String -> Bool
forall a. Eq a => a -> a -> Bool
==String
mark then (String -> String
forall a. [a] -> [a]
reverse String
s0) String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String
mark String -> [String] -> [String]
forall a. a -> [a] -> [a]
: (Int, String) -> String -> String -> [String]
chop ((Int, String) -> (Int, String)
forall a. (a, String) -> (Int, String)
switch (Int, String)
mk) [] String
rest 
      else case String
s of
        Char
c:String
cs -> (Int, String) -> String -> String -> [String]
chop (Int, String)
mk (Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String
s0) String
cs
        [] -> [String -> String
forall a. [a] -> [a]
reverse String
s0]
  switch :: (a, String) -> (Int, String)
switch (a
lg,String
mark) = if String
markString -> String -> Bool
forall a. Eq a => a -> a -> Bool
==String
beg then (Int
lend,String
end) else (Int
lbeg,String
beg)
  (Int
lbeg,Int
lend) = (String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
beg, String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
end)
  altern :: Bool -> [String] -> [String]
altern Bool
m [String]
ts = case [String]
ts of
    String
t:[String]
ws | Bool -> Bool
not Bool
m Bool -> Bool -> Bool
&& String
tString -> String -> Bool
forall a. Eq a => a -> a -> Bool
==String
beg -> String
t String -> [String] -> [String]
forall a. a -> [a] -> [a]
: Bool -> [String] -> [String]
altern Bool
True [String]
ws
    String
t:[String]
ws | Bool
m     Bool -> Bool -> Bool
&& String
tString -> String -> Bool
forall a. Eq a => a -> a -> Bool
==String
end -> String
t String -> [String] -> [String]
forall a. a -> [a] -> [a]
: Bool -> [String] -> [String]
altern Bool
False [String]
ws
    String
t:[String]
ws -> (if Bool
m then String -> String
op String
t else String
t) String -> [String] -> [String]
forall a. a -> [a] -> [a]
: Bool -> [String] -> [String]
altern Bool
m [String]
ws
    [] -> []

appLexer :: (String -> [String]) -> String -> String
appLexer :: (String -> [String]) -> String -> String
appLexer String -> [String]
f = [String] -> String
unwords ([String] -> String) -> (String -> [String]) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (String -> Bool) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null) ([String] -> [String])
-> (String -> [String]) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
f

appUnlexer :: ([String] -> String) -> String -> String
----appUnlexer f = unlines . map (f . words) . lines
appUnlexer :: ([String] -> String) -> String -> String
appUnlexer [String] -> String
f = [String] -> String
f ([String] -> String) -> (String -> [String]) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
words

wrapHTML :: String -> String
wrapHTML :: String -> String
wrapHTML = [String] -> String
unlines ([String] -> String) -> (String -> [String]) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> [String]
tag ([String] -> [String])
-> (String -> [String]) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String] -> [String]
forall a. a -> [a] -> [a]
intersperse String
"<br>" ([String] -> [String])
-> (String -> [String]) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
lines where
  tag :: [String] -> [String]
tag [String]
ss = String
"<html>"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:String
"<head>"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:String
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:String
"</head>"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:String
"<body>" String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String]
ss [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ [String
"</body>",String
"</html>"]


-- * Text lexing
-- | Text lexing with standard word capitalization of the first word of every sentence
lexText :: (String -> Bool) -> String -> [String]
lexText :: (String -> Bool) -> String -> [String]
lexText String -> Bool
good = (String -> String) -> String -> [String]
lexText' ((String -> Bool) -> String -> String
uncapitInit String -> Bool
good)

-- | Text lexing with custom treatment of the first word of every sentence.
lexText' :: (String->String) -> String -> [String]
lexText' :: (String -> String) -> String -> [String]
lexText' String -> String
uncap1 = [String] -> [String]
uncap ([String] -> [String])
-> (String -> [String]) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
lext where
  lext :: String -> [String]
lext String
s = case String
s of
    Char
c:String
cs | Char -> Bool
isMajorPunct Char
c -> [Char
c] String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String] -> [String]
uncap (String -> [String]
lext String
cs)
    Char
c:String
cs | Char -> Bool
isMinorPunct Char
c -> [Char
c] String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> [String]
lext String
cs
    Char
c:String
cs | Char -> Bool
isSpace Char
c ->       String -> [String]
lext String
cs
    Char
_:String
_ -> let (String
w,String
cs) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (\Char
x -> Char -> Bool
isSpace Char
x Bool -> Bool -> Bool
|| Char -> Bool
isPunct Char
x) String
s in String
w String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> [String]
lext String
cs
    String
_ -> [String
s]
  uncap :: [String] -> [String]
uncap [String]
s = case [String]
s of
    String
w:[String]
ws -> String -> String
uncap1 String
wString -> [String] -> [String]
forall a. a -> [a] -> [a]
:[String]
ws
    [String]
_ -> [String]
s

unlexText :: [String] -> String
unlexText :: [String] -> String
unlexText = String -> String
capitInit (String -> String) -> ([String] -> String) -> [String] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> String
unlext where
  unlext :: [String] -> String
unlext [String]
s = case [String]
s of
    String
w:[] -> String
w
    String
w:[Char
c]:[] | Char -> Bool
isPunct Char
c -> String
w String -> String -> String
forall a. [a] -> [a] -> [a]
++ [Char
c]
    String
w:[Char
c]:[String]
cs | Char -> Bool
isMajorPunct Char
c -> String
w String -> String -> String
forall a. [a] -> [a] -> [a]
++ [Char
c] String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
capitInit ([String] -> String
unlext [String]
cs)
    String
w:[Char
c]:[String]
cs | Char -> Bool
isMinorPunct Char
c -> String
w String -> String -> String
forall a. [a] -> [a] -> [a]
++ [Char
c] String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" " String -> String -> String
forall a. [a] -> [a] -> [a]
++ [String] -> String
unlext [String]
cs
    String
w:[String]
ws -> String
w String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" " String -> String -> String
forall a. [a] -> [a] -> [a]
++ [String] -> String
unlext [String]
ws
    [String]
_ -> []

-- | Bind tokens separated by Prelude.BIND, i.e. &+
bindTok :: [String] -> [String]
bindTok :: [String] -> [String]
bindTok [String]
ws = case [String]
ws of
               String
w1:String
"&+":String
w2:[String]
ws -> [String] -> [String]
bindTok ((String
w1String -> String -> String
forall a. [a] -> [a] -> [a]
++String
w2)String -> [String] -> [String]
forall a. a -> [a] -> [a]
:[String]
ws)
               String
"&+":[String]
ws       -> [String] -> [String]
bindTok [String]
ws
               String
"&|":(Char
c:String
cs):[String]
ws-> [String] -> [String]
bindTok ((Char -> Char
toUpper Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String
cs) String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String]
ws)
               String
"&|":[String]
ws       -> [String] -> [String]
bindTok [String]
ws
               String
w:[String]
ws          -> String
wString -> [String] -> [String]
forall a. a -> [a] -> [a]
:[String] -> [String]
bindTok [String]
ws
               []            -> []

-- * Code lexing

-- | Haskell lexer, usable for much code
lexCode :: String -> [String]
lexCode :: String -> [String]
lexCode String
ss = case ReadS String
lex String
ss of
  [(w :: String
w@(Char
_:String
_),String
ws)] -> String
w String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> [String]
lexCode String
ws
  [(String, String)]
_ -> []
  

-- * Ancient Greek lexing

lexTextAGreek :: String -> [String]
lexTextAGreek :: String -> [String]
lexTextAGreek String
s = String -> [String]
lext String
s where
  lext :: String -> [String]
lext String
s = case String
s of
    Char
c:String
cs | Char -> Bool
isAGreekPunct Char
c -> [Char
c] String -> [String] -> [String]
forall a. a -> [a] -> [a]
: (String -> [String]
lext String
cs)
    Char
c:String
cs | Char -> Bool
isSpace Char
c -> String -> [String]
lext String
cs
    Char
_:String
_ -> let (String
w,String
cs) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (\Char
x -> Char -> Bool
isSpace Char
x Bool -> Bool -> Bool
|| Char -> Bool
isAGreekPunct Char
x) String
s 
           in String
w String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> [String]
lext String
cs
    [] -> []

-- Philological greek text may use vowel length indicators. Then '.' is not a sentence 
-- separator, nor is 'v. ' for vowel v. Sentence ends at 'v..' or 'c. ' with non-vowel c.

lexTextAGreek2 :: String -> [String]
lexTextAGreek2 :: String -> [String]
lexTextAGreek2 String
s = String -> [String]
lext String
s where
  lext :: String -> [String]
lext String
s = case String
s of
    Char
c:String
cs | Char -> Bool
isAGreekPunct Char
c -> [Char
c] String -> [String] -> [String]
forall a. a -> [a] -> [a]
: (String -> [String]
lext String
cs)
    Char
c:String
cs | Char -> Bool
isSpace Char
c -> String -> [String]
lext String
cs
    Char
_:String
_ -> let (String
w,String
cs) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (\Char
x -> Char -> Bool
isSpace Char
x Bool -> Bool -> Bool
|| Char -> Bool
isAGreekPunct Char
x) String
s 
           in case String
cs of 
                Char
'.':Char
'.':Char
d:String
ds | Char -> Bool
isSpace Char
d 
                  -> (String
wString -> String -> String
forall a. [a] -> [a] -> [a]
++[Char
'.']) String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> [String]
lext (Char
'.'Char -> String -> String
forall a. a -> [a] -> [a]
:Char
dChar -> String -> String
forall a. a -> [a] -> [a]
:String
ds)
                Char
'.':Char
d:String
ds | Char -> Bool
isAGreekPunct Char
d Bool -> Bool -> Bool
|| Char -> Bool
isSpace Char
d 
                  -> (String
wString -> String -> String
forall a. [a] -> [a] -> [a]
++[Char
'.']) String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> [String]
lext (Char
dChar -> String -> String
forall a. a -> [a] -> [a]
:String
ds)
                Char
'.':Char
d:String
ds | Bool -> Bool
not (Char -> Bool
isSpace Char
d) 
                  -> case String -> [String]
lext (Char
dChar -> String -> String
forall a. a -> [a] -> [a]
:String
ds) of
                       String
e:[String]
es -> (String
wString -> String -> String
forall a. [a] -> [a] -> [a]
++[Char
'.']String -> String -> String
forall a. [a] -> [a] -> [a]
++String
e) String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String]
es
                       [String]
es -> (String
wString -> String -> String
forall a. [a] -> [a] -> [a]
++[Char
'.']) String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String]
es 
                Char
'.':[] -> (String
wString -> String -> String
forall a. [a] -> [a] -> [a]
++[Char
'.']) String -> [String] -> [String]
forall a. a -> [a] -> [a]
: []
                String
_ -> String
w String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> [String]
lext String
cs      
    [] -> []

unlexTextAGreek :: [String] -> String
unlexTextAGreek :: [String] -> String
unlexTextAGreek = [String] -> String
unlext where
  unlext :: [String] -> String
unlext [String]
s = case [String]
s of
    String
w:[] -> String
w
    String
w:[Char
c]:[] | Char -> Bool
isAGreekPunct Char
c -> String
w String -> String -> String
forall a. [a] -> [a] -> [a]
++ [Char
c]
    String
w:[Char
c]:[String]
cs | Char -> Bool
isAGreekPunct Char
c -> String
w String -> String -> String
forall a. [a] -> [a] -> [a]
++ [Char
c] String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" " String -> String -> String
forall a. [a] -> [a] -> [a]
++ [String] -> String
unlext [String]
cs
    String
w:[String]
ws -> String
w String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" " String -> String -> String
forall a. [a] -> [a] -> [a]
++ [String] -> String
unlext [String]
ws
    [] -> []

isAGreekPunct :: Char -> Bool
isAGreekPunct = (Char -> String -> Bool) -> String -> Char -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem String
".,;··"  -- colon: first version · not in charset,
                                   -- second version · = 00B7 standard code point

-- * Text lexing and unlexing for Ancient Greek: 
--   1. no capitalization of initial word, 
--   2. grave/acute accent switch on final syllables of words not followed by punctuation, 
--   3. accent move from/to support word to/from following clitic words (iterated).

lexAGreek :: String -> [String]
lexAGreek :: String -> [String]
lexAGreek = [String] -> [String]
fromAGreek ([String] -> [String])
-> (String -> [String]) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
lexTextAGreek

lexAGreek2 :: String -> [String]
lexAGreek2 :: String -> [String]
lexAGreek2 = [String] -> [String]
fromAGreek ([String] -> [String])
-> (String -> [String]) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
lexTextAGreek2

unlexAGreek :: [String] -> String
unlexAGreek :: [String] -> String
unlexAGreek = [String] -> String
unlexTextAGreek ([String] -> String)
-> ([String] -> [String]) -> [String] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> [String]
toAGreek

-- Note: unlexAGreek does not glue punctuation with the previous word, so that short
-- vowel indication (like a.) differs from sentence end (a .). 

-- | normalize = change grave accent on sentence internal words to acute, 
-- and shift inherited acutes to the following enclitic (where they are
-- visible only as shown in the list of enclitics above)

normalize :: String -> String
normalize :: String -> String
normalize = ([String] -> String
unlexTextAGreek ([String] -> String) -> (String -> [String]) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> [String]
fromAGreek ([String] -> [String])
-> (String -> [String]) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
lexTextAGreek) 

fromAGreek :: [String] -> [String]
fromAGreek :: [String] -> [String]
fromAGreek [String]
s = case [String]
s of
  String
w:[]:[String]
vs -> String
wString -> [String] -> [String]
forall a. a -> [a] -> [a]
:[]String -> [String] -> [String]
forall a. a -> [a] -> [a]
:([String] -> [String]
fromAGreek [String]
vs)
  String
w:(String
v:[String]
vs) | Char -> Bool
isAGreekPunct (String -> Char
forall a. [a] -> a
head String
v) -> String
wString -> [String] -> [String]
forall a. a -> [a] -> [a]
:String
vString -> [String] -> [String]
forall a. a -> [a] -> [a]
:([String] -> [String]
fromAGreek [String]
vs)
  String
w:String
v:[String]
vs | String -> Bool
wasEnclitic String
v Bool -> Bool -> Bool
&& String -> Bool
wasEnclitic String
w ->
    String -> String
getEnclitic String
w String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String] -> [String]
fromAGreek (String
vString -> [String] -> [String]
forall a. a -> [a] -> [a]
:[String]
vs)
  String
w:String
v:[String]
vs | String -> Bool
wasEnclitic String
v Bool -> Bool -> Bool
&& String -> Bool
wasProclitic String
w ->  -- "ei)' tines*"
    String -> String
getProclitic String
w String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> String
getEnclitic String
v String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String] -> [String]
fromAGreek [String]
vs
  String
w:String
v:[String]
vs | String -> Bool
wasEnclitic String
v Bool -> Bool -> Bool
&& (String -> Bool
hasEndCircum String
w Bool -> Bool -> Bool
|| 
    (String -> Bool
hasEndAcute String
w Bool -> Bool -> Bool
&& String -> Bool
hasSingleAccent String
w)) ->  
    String
w String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> String
getEnclitic String
v String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String] -> [String]
fromAGreek [String]
vs        -- ok "sofoi' tines*"
  String
w:String
v:[String]
vs | String -> Bool
wasEnclitic String
v Bool -> Bool -> Bool
&& String -> Bool
hasPrefinalAcute String
w ->
    String
w String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> String
getEnclitic String
v String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String] -> [String]
fromAGreek [String]
vs
  String
w:String
v:[String]
vs | String -> Bool
wasEnclitic String
v Bool -> Bool -> Bool
&& String -> Bool
hasEndAcute String
w ->  -- ok "a)'nvrwpoi' tines*"
    String -> String
dropLastAccent String
w String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> String
getEnclitic String
v String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String] -> [String]
fromAGreek [String]
vs
  String
w:String
v:[String]
vs | String -> Bool
wasEnclitic String
w ->
    String -> String
getEnclitic String
w String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String] -> [String]
fromAGreek (String
vString -> [String] -> [String]
forall a. a -> [a] -> [a]
:[String]
vs)
  String
w:[String]
ws -> (String -> String
toAcute String
w) String -> [String] -> [String]
forall a. a -> [a] -> [a]
: ([String] -> [String]
fromAGreek [String]
ws)
  [String]
ws -> [String]
ws 

-- | de-normalize = change acute accent of end syllables in sentence internal 
--  (non-enclitic) words to grave accent, and move accents of enclitics to the 
--  previous word to produce ordinary ancient greek

denormalize :: String -> String
denormalize :: String -> String
denormalize = ([String] -> String
unlexTextAGreek ([String] -> String) -> (String -> [String]) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> [String]
toAGreek ([String] -> [String])
-> (String -> [String]) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
lexTextAGreek) 

toAGreek :: [String] -> [String]
toAGreek :: [String] -> [String]
toAGreek [String]
s = case [String]
s of
  String
w:[]:[String]
vs -> String
wString -> [String] -> [String]
forall a. a -> [a] -> [a]
:[]String -> [String] -> [String]
forall a. a -> [a] -> [a]
:([String] -> [String]
toAGreek [String]
vs)
  String
w:String
v:[String]
vs | Char -> Bool
isAGreekPunct (String -> Char
forall a. [a] -> a
head String
v) -> String
wString -> [String] -> [String]
forall a. a -> [a] -> [a]
:[]String -> [String] -> [String]
forall a. a -> [a] -> [a]
:String
vString -> [String] -> [String]
forall a. a -> [a] -> [a]
:([String] -> [String]
toAGreek [String]
vs)  -- w:[] for following -to_ancientgreek
  String
w:String
v:[String]
vs | String -> Bool
isEnclitic String
v Bool -> Bool -> Bool
&& String -> Bool
isEnclitic String
w -> 
    String -> String
addAcute String
w String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String] -> [String]
toAGreek (String -> String
dropAccent String
vString -> [String] -> [String]
forall a. a -> [a] -> [a]
:[String]
vs)    -- BR 11 Anm.2
  String
w:String
v:[String]
vs | String -> Bool
isEnclitic String
v Bool -> Bool -> Bool
&& String -> Bool
isProclitic String
w ->    -- BR 11 a.beta
    String -> String
addAcute String
wString -> [String] -> [String]
forall a. a -> [a] -> [a]
: ([String] -> [String]
toAGreek (String -> String
dropAccent String
vString -> [String] -> [String]
forall a. a -> [a] -> [a]
:[String]
vs))
  String
w:String
v:[String]
vs | String -> Bool
isEnclitic String
v Bool -> Bool -> Bool
&& (String -> Bool
hasEndCircum String
w Bool -> Bool -> Bool
|| String -> Bool
hasEndAcute String
w) -> 
    String
wString -> [String] -> [String]
forall a. a -> [a] -> [a]
:([String] -> [String]
toAGreek (String -> String
dropAccent String
vString -> [String] -> [String]
forall a. a -> [a] -> [a]
:[String]
vs))             -- BR 11 a.alpha,beta
  String
w:String
v:[String]
vs | String -> Bool
isEnclitic String
v Bool -> Bool -> Bool
&& String -> Bool
hasPrefinalAcute String
w -> 
    String
wString -> [String] -> [String]
forall a. a -> [a] -> [a]
:String
vString -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String] -> [String]
toAGreek [String]
vs   -- bisyllabic v keeps its accent  BR 11 b.
  String
w:String
v:[String]
vs | String -> Bool
isEnclitic String
v -> 
    (String -> String
addAcute String
w)String -> [String] -> [String]
forall a. a -> [a] -> [a]
:([String] -> [String]
toAGreek (String -> String
dropAccent String
vString -> [String] -> [String]
forall a. a -> [a] -> [a]
:[String]
vs))  -- BR 11 a.gamma
  String
w:String
v:[String]
vs | String -> Bool
isEnclitic String
w -> String
wString -> [String] -> [String]
forall a. a -> [a] -> [a]
:([String] -> [String]
toAGreek (String
vString -> [String] -> [String]
forall a. a -> [a] -> [a]
:[String]
vs))
  String
w:[String]
ws -> (String -> String
toGrave String
w) String -> [String] -> [String]
forall a. a -> [a] -> [a]
: ([String] -> [String]
toAGreek [String]
ws)
  [String]
ws -> [String]
ws 

-- | Change accent on the final syllable of a word

toGrave :: String -> String 
toGrave :: String -> String
toGrave = String -> String
forall a. [a] -> [a]
reverse (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
grave (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. [a] -> [a]
reverse where
  grave :: String -> String
grave String
s = case String
s of
    Char
'\'':String
cs -> Char
'`'Char -> String -> String
forall a. a -> [a] -> [a]
:String
cs 
    Char
c:String
cs | Char -> Bool
isAGreekVowel Char
c -> Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String
cs 
    Char
c:String
cs -> Char
cChar -> String -> String
forall a. a -> [a] -> [a]
: String -> String
grave String
cs
    String
_ -> String
s

toAcute :: String -> String 
toAcute :: String -> String
toAcute = String -> String
forall a. [a] -> [a]
reverse (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
acute (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. [a] -> [a]
reverse where
  acute :: String -> String
acute String
s = case String
s of
    Char
'`':String
cs -> Char
'\''Char -> String -> String
forall a. a -> [a] -> [a]
:String
cs
    Char
c:String
cs | Char -> Bool
isAGreekVowel Char
c -> Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String
cs 
    Char
c:String
cs -> Char
cChar -> String -> String
forall a. a -> [a] -> [a]
: String -> String
acute String
cs
    String
_ -> String
s

isAGreekVowel :: Char -> Bool
isAGreekVowel = (Char -> String -> Bool) -> String -> Char -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem String
"aeioyhw"

-- | Accent moves for enclitics and proclitics (atona)

enclitics :: [String]
enclitics = [
  String
"moy",String
"moi",String
"me",     -- personal pronouns
  String
"soy",String
"soi",String
"se",
  String
"oy(",String
"oi(",String
"e(",
  String
"tis*",String
"ti",String
"tina'",  -- indefinite pronoun 
  String
"tino's*",String
"tini'",
  String
"tine's*",String
"tina's*",
  String
"tinw~n",String
"tisi'",String
"tisi'n",
  String
"poy",String
"poi",          -- indefinite adverbs
  String
"pove'n",String
"pws*",
  String
"ph|",String
"pote'",
  String
"ge",String
"te",String
"toi",      -- particles
  String
"nyn",String
"per",String
"pw" 
   -- suffix -"de"
   -- praes.indik. of fhmi', ei)mi' (except fh's*, ei)~)
  ] -- and more, BR 11

proclitics :: [String]
proclitics = [
  String
"o(",String
"h(",String
"oi(",String
"ai(",     -- articles
  String
"e)n",String
"ei)s*",String
"e)x",String
"e)k", -- prepositions
  String
"ei)",String
"w(s*",              -- conjunctions
  String
"oy)",String
"oy)k",String
"oy)c"        -- negation
  ]

isEnclitic :: String -> Bool
isEnclitic = (String -> [String] -> Bool) -> [String] -> String -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem [String]
enclitics
isProclitic :: String -> Bool
isProclitic = (String -> [String] -> Bool) -> [String] -> String -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem [String]
proclitics

-- Check if a word is an enclitic or accented enclitic and extract the enclitic

wasEnclitic :: String -> Bool
wasEnclitic = let unaccented :: [String]
unaccented = ((String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (String -> Bool) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Bool
hasAccent) [String]
enclitics) 
                    [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ ((String -> String) -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map String -> String
dropAccent ((String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter String -> Bool
hasAccent [String]
enclitics))
                  accented :: [String]
accented = ((String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter String -> Bool
hasAccent [String]
enclitics) 
                    [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ (String -> String) -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map String -> String
addAcute ((String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (String -> Bool) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Bool
hasAccent) [String]
enclitics) 
              in (String -> [String] -> Bool) -> [String] -> String -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem ([String]
accented [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ [String]
unaccented)

wasProclitic :: String -> Bool
wasProclitic = (String -> [String] -> Bool) -> [String] -> String -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem ((String -> String) -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map String -> String
addAcute [String]
proclitics)

getEnclitic :: String -> String
getEnclitic = 
  let pairs :: [(String, String)]
pairs = [String] -> [String] -> [(String, String)]
forall a b. [a] -> [b] -> [(a, b)]
zip ([String]
enclitics [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ ((String -> String) -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map String -> String
dropAccent ((String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter String -> Bool
hasAccent [String]
enclitics))
                   [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ ((String -> String) -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map String -> String
addAcute ((String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (String -> Bool) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Bool
hasAccent) [String]
enclitics)))
                  ([String]
enclitics [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ ((String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter String -> Bool
hasAccent [String]
enclitics)
                   [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ ((String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (String -> Bool) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Bool
hasAccent) [String]
enclitics))
      find :: String -> Maybe String
find = \String
v -> String -> [(String, String)] -> Maybe String
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup String
v [(String, String)]
pairs
  in \String
v -> case (String -> Maybe String
find String
v) of 
    Just String
x -> String
x 
    Maybe String
_ -> String
v
getProclitic :: String -> String
getProclitic = 
  let pairs :: [(String, String)]
pairs = [String] -> [String] -> [(String, String)]
forall a b. [a] -> [b] -> [(a, b)]
zip ((String -> String) -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map String -> String
addAcute [String]
proclitics) [String]
proclitics 
      find :: String -> Maybe String
find = \String
v -> String -> [(String, String)] -> Maybe String
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup String
v [(String, String)]
pairs
  in \String
v -> case (String -> Maybe String
find String
v) of 
    Just String
x -> String
x 
    Maybe String
_ -> String
v

-- | Accent manipulation

dropAccent :: String -> String
dropAccent = String -> String
forall a. [a] -> [a]
reverse (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
drop (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. [a] -> [a]
reverse where 
  drop :: String -> String
drop String
s = case String
s of
    [] -> []
    Char
'\'':String
cs -> String
cs
    Char
'`':String
cs -> String
cs
    Char
'~':String
cs -> String
cs
    Char
c:String
cs -> Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String -> String
drop String
cs

dropLastAccent :: String -> String
dropLastAccent = String -> String
forall a. [a] -> [a]
reverse (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
drop (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. [a] -> [a]
reverse where 
  drop :: String -> String
drop String
s = case String
s of
    [] -> []
    Char
'\'':String
cs -> String
cs
    Char
'`':String
cs -> String
cs
    Char
'~':String
cs -> String
cs
    Char
c:String
cs -> Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String -> String
drop String
cs

addAcute :: String -> String
addAcute :: String -> String
addAcute = String -> String
forall a. [a] -> [a]
reverse (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
acu (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. [a] -> [a]
reverse where
  acu :: String -> String
acu String
w = case String
w of 
    Char
c:String
cs | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\'' -> Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String
cs
    Char
c:String
cs | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'(' -> Char
'\''Char -> String -> String
forall a. a -> [a] -> [a]
:Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String
cs
    Char
c:String
cs | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
')' -> Char
'\''Char -> String -> String
forall a. a -> [a] -> [a]
:Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String
cs
    Char
c:String
cs | Char -> Bool
isAGreekVowel Char
c -> Char
'\''Char -> String -> String
forall a. a -> [a] -> [a]
:Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String
cs
    Char
c:String
cs -> Char
c Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
acu String
cs
    String
_ -> String
w

-- | Accent checking on end syllables

hasEndAcute :: String -> Bool
hasEndAcute = String -> Bool
find (String -> Bool) -> (String -> String) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. [a] -> [a]
reverse where
  find :: String -> Bool
find String
s = case String
s of
    [] -> Bool
False
    Char
'\'':String
cs -> Bool
True
    Char
'`':String
cs -> Bool
False
    Char
'~':String
cs -> Bool
False
    Char
c:String
cs | Char -> Bool
isAGreekVowel Char
c -> Bool
False
    Char
_:String
cs -> String -> Bool
find String
cs

hasEndCircum :: String -> Bool
hasEndCircum = String -> Bool
find (String -> Bool) -> (String -> String) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. [a] -> [a]
reverse where
  find :: String -> Bool
find String
s = case String
s of
    [] -> Bool
False
    Char
'\'':String
cs -> Bool
False
    Char
'`':String
cs -> Bool
False
    Char
'~':String
cs -> Bool
True
    Char
c:String
cs | Char -> Bool
isAGreekVowel Char
c -> Bool
False
    Char
_:String
cs -> String -> Bool
find String
cs

hasPrefinalAcute :: String -> Bool
hasPrefinalAcute = String -> Bool
find (String -> Bool) -> (String -> String) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. [a] -> [a]
reverse where
  find :: String -> Bool
find String
s = case String
s of
    [] -> Bool
False
    Char
'\'':String
cs -> Bool
False  -- final acute
    Char
'`':String
cs -> Bool
False
    Char
'~':String
cs -> Bool
False
    Char
c:Char
d:String
cs | Char -> Bool
isAGreekVowel Char
c Bool -> Bool -> Bool
&& Char -> Bool
isAGreekVowel Char
d -> String -> Bool
findNext String
cs
    Char
c:String
cs | Char -> Bool
isAGreekVowel Char
c -> String -> Bool
findNext String
cs
    Char
_:String
cs -> String -> Bool
find String
cs where
  findNext :: String -> Bool
findNext String
s = case String
s of 
    [] -> Bool
False
    Char
'\'':String
cs -> Bool
True  -- prefinal acute
    Char
'`':String
cs -> Bool
False
    Char
'~':String
cs -> Bool
False
    Char
c:String
cs | Char -> Bool
isAGreekVowel Char
c -> Bool
False
    Char
_:String
cs -> String -> Bool
findNext String
cs where
    
hasSingleAccent :: String -> Bool
hasSingleAccent String
v = 
  String -> Bool
hasAccent String
v Bool -> Bool -> Bool
&& Bool -> Bool
not (String -> Bool
hasAccent (String -> String
dropLastAccent String
v))

hasAccent :: String -> Bool
hasAccent String
v = case String
v of 
  [] -> Bool
False
  Char
c:String
cs -> Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem Char
c [Char
'\'',Char
'`',Char
'~'] Bool -> Bool -> Bool
|| String -> Bool
hasAccent String
cs

{- Tests: 

-- denormalization. Examples in BR 11 work: 
-}
enclitics_expls :: [String]
enclitics_expls = -- normalized
  String
"sofw~n tis*"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:String
"sofw~n tine's*"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:String
"sof~n tinw~n"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:  -- a.alpha
  String
"sofo's tis*"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:String
"sofoi' tine's*"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:                 -- a.beta
  String
"ei) tis*"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:String
"ei) tine's*"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:
  String
"a)'nvrwpos* tis*"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:String
"a)'nvrwpoi tine's*"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:        -- a.gamma
  String
"doy~los* tis*"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:String
"doy~loi tine's*"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:
  String
"lo'gos* tis*"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:String
"lo'goi tine's*"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:String
"lo'gwn tinw~n"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:  -- b.
  String
"ei) poy tis* tina' i)'doi"String -> [String] -> [String]
forall a. a -> [a] -> [a]
:                    -- Anm. 2.
  [] 


unlexCode :: [String] -> String
unlexCode :: [String] -> String
unlexCode [String]
s = case [String]
s of
  String
w:[] -> String
w
  [Char
c]:[String]
cs | Char -> Bool
isParen Char
c -> [Char
c] String -> String -> String
forall a. [a] -> [a] -> [a]
++ [String] -> String
unlexCode [String]
cs
  String
w:cs :: [String]
cs@([Char
c]:[String]
_) | Char -> Bool
isClosing Char
c -> String
w String -> String -> String
forall a. [a] -> [a] -> [a]
++ [String] -> String
unlexCode [String]
cs
  String
w:[String]
ws -> String
w String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" " String -> String -> String
forall a. [a] -> [a] -> [a]
++ [String] -> String
unlexCode [String]
ws
  [String]
_ -> []


-- | LaTeX lexer in the math mode: \ should not be separated from the next word

lexLatexCode :: String -> [String]
lexLatexCode :: String -> [String]
lexLatexCode = [String] -> [String]
restoreBackslash ([String] -> [String])
-> (String -> [String]) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
lexCode where --- quick hack: postprocess Haskell's lex
  restoreBackslash :: [String] -> [String]
restoreBackslash [String]
ws = case [String]
ws of
    String
"\\":String
w:[String]
ww -> (String
"\\" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
w) String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String] -> [String]
restoreBackslash [String]
ww
    String
w:[String]
ww -> String
wString -> [String] -> [String]
forall a. a -> [a] -> [a]
:[String] -> [String]
restoreBackslash [String]
ww
    [String]
_ -> [String]
ws

-- * Mixed lexing

-- | LaTeX style lexer, with "math" environment using Code between $...$
lexMixed :: (String -> Bool) -> String -> [String]
lexMixed :: (String -> Bool) -> String -> [String]
lexMixed String -> Bool
good = [[String]] -> [String]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[String]] -> [String])
-> (String -> [[String]]) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> String -> String -> [[String]]
alternate Bool
False [] where
  alternate :: Bool -> String -> String -> [[String]]
alternate Bool
env String
t String
s = case String
s of
    Char
'$':String
cs -> Bool -> String -> [String]
lex Bool
env (String -> String
forall a. [a] -> [a]
reverse String
t) [String] -> [[String]] -> [[String]]
forall a. a -> [a] -> [a]
: [String
"$"] [String] -> [[String]] -> [[String]]
forall a. a -> [a] -> [a]
: Bool -> String -> String -> [[String]]
alternate (Bool -> Bool
not Bool
env) [] String
cs
    Char
'\\':Char
c:String
cs | Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem Char
c String
"()[]" -> Bool -> String -> [String]
lex Bool
env (String -> String
forall a. [a] -> [a]
reverse String
t) [String] -> [[String]] -> [[String]]
forall a. a -> [a] -> [a]
: [[Char
'\\',Char
c]] [String] -> [[String]] -> [[String]]
forall a. a -> [a] -> [a]
: Bool -> String -> String -> [[String]]
alternate (Bool -> Bool
not Bool
env) [] String
cs
    Char
c:String
cs -> Bool -> String -> String -> [[String]]
alternate Bool
env (Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String
t) String
cs
    String
_ -> [Bool -> String -> [String]
lex Bool
env (String -> String
forall a. [a] -> [a]
reverse String
t)]
  lex :: Bool -> String -> [String]
lex Bool
env = if Bool
env then String -> [String]
lexLatexCode else (String -> Bool) -> String -> [String]
lexText String -> Bool
good

unlexMixed :: (String -> Bool) -> [String] -> String
unlexMixed :: (String -> Bool) -> [String] -> String
unlexMixed String -> Bool
good = String -> String
capitInit (String -> String) -> ([String] -> String) -> [String] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([String] -> String)
-> ([String] -> [String]) -> [String] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> [String] -> [String]
alternate Bool
False where
  alternate :: Bool -> [String] -> [String]
alternate Bool
env [String]
s = case [String]
s of
    String
_:[String]
_ -> case (String -> Bool) -> [String] -> ([String], [String])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break ((String -> [String] -> Bool) -> [String] -> String -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem [String
"$",String
"\\[",String
"\\]",String
"\\(",String
"\\)"]) [String]
s of
      ([String]
t,[])  -> Bool -> [String] -> String
unlex Bool
env [String]
t String -> [String] -> [String]
forall a. a -> [a] -> [a]
: []
      ([String]
t,String
c:[String]
m) -> Bool -> [String] -> String
unlex Bool
env [String]
t String -> [String] -> [String]
forall a. a -> [a] -> [a]
: Bool -> String -> [String] -> String
sep Bool
env String
c [String]
m String -> [String] -> [String]
forall a. a -> [a] -> [a]
: Bool -> [String] -> [String]
alternate (Bool -> Bool
not Bool
env) [String]
m
    [String]
_ -> []
  unlex :: Bool -> [String] -> String
unlex Bool
env = if Bool
env then [String] -> String
unlexCode else ((String -> Bool) -> String -> String
uncapitInit String -> Bool
good (String -> String) -> ([String] -> String) -> [String] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> String
unlexText)
  sep :: Bool -> String -> [String] -> String
sep Bool
env String
c [String]
m = case ([String]
m,Bool
env) of
    ([Char
p]:[String]
_,Bool
True) | Char -> Bool
isPunct Char
p -> String
c   -- closing $ glued to next punct 
    ([String]
_,  Bool
True) -> String
c String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" "   -- closing $ otherwise separated by space from what follows
    ([String], Bool)
_ -> String
" " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
c   -- put space before opening $

-- * Additional lexing uitilties

-- | Capitalize first letter
capitInit :: String -> String
capitInit String
s = case String
s of
  Char
c:String
cs -> Char -> Char
toUpper Char
c Char -> String -> String
forall a. a -> [a] -> [a]
: String
cs
  String
_ -> String
s

-- | Uncapitalize first letter
uncapitInit :: (String -> Bool) -> String -> String
uncapitInit String -> Bool
good String
s = 
  case String
s of
    Char
c:String
cs | Bool -> Bool
not (String -> Bool
good String
s) -> Char -> Char
toLower Char
c Char -> String -> String
forall a. a -> [a] -> [a]
: String
cs
    String
_                   -> String
s

-- | Unquote each string wrapped in double quotes
unquote :: [String] -> [String]
unquote = (String -> String) -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map String -> String
unq where 
  unq :: String -> String
unq String
s = case String
s of
    Char
'"':cs :: String
cs@(Char
_:String
_) | String -> Char
forall a. [a] -> a
last String
cs Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'"' -> String -> String
forall a. [a] -> [a]
init String
cs
    String
_ -> String
s

isPunct :: Char -> Bool
isPunct = (Char -> String -> Bool) -> String -> Char -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem String
".?!,:;"
isMajorPunct :: Char -> Bool
isMajorPunct = (Char -> String -> Bool) -> String -> Char -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem String
".?!"
isMinorPunct :: Char -> Bool
isMinorPunct = (Char -> String -> Bool) -> String -> Char -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem String
",:;"
isParen :: Char -> Bool
isParen = (Char -> String -> Bool) -> String -> Char -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem String
"()[]{}"
isClosing :: Char -> Bool
isClosing = (Char -> String -> Bool) -> String -> Char -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem String
")]}"