{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ViewPatterns #-}
module Brassica.MDF
(
MDF(..)
, MDFLanguage(..)
, fieldLangs
, parseMDFRaw
, parseMDFWithTokenisation
, errorBundlePretty
, componentiseMDF
, componentiseMDFWordsOnly
, duplicateEtymologies
) where
import Control.Category ((>>>))
import Data.Char (isSpace)
import Data.Void (Void)
import qualified Data.Map as M
import Text.Megaparsec
import Text.Megaparsec.Char
import Brassica.SoundChange.Tokenise
import Brassica.SoundChange.Types (Grapheme, PWord)
import Data.Maybe (fromMaybe)
newtype MDF v = MDF { forall v. MDF v -> [(String, String, Either String v)]
unMDF :: [(String, String, Either String v)] }
deriving (Int -> MDF v -> ShowS
forall v. Show v => Int -> MDF v -> ShowS
forall v. Show v => [MDF v] -> ShowS
forall v. Show v => MDF v -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [MDF v] -> ShowS
$cshowList :: forall v. Show v => [MDF v] -> ShowS
show :: MDF v -> String
$cshow :: forall v. Show v => MDF v -> String
showsPrec :: Int -> MDF v -> ShowS
$cshowsPrec :: forall v. Show v => Int -> MDF v -> ShowS
Show, forall a b. a -> MDF b -> MDF a
forall a b. (a -> b) -> MDF a -> MDF b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
<$ :: forall a b. a -> MDF b -> MDF a
$c<$ :: forall a b. a -> MDF b -> MDF a
fmap :: forall a b. (a -> b) -> MDF a -> MDF b
$cfmap :: forall a b. (a -> b) -> MDF a -> MDF b
Functor)
type Parser = Parsec Void String
sc :: Parser String
sc :: Parser String
sc = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall a. a -> Maybe a -> a
fromMaybe String
"") forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P (forall a. a -> Maybe a
Just String
"white space") Char -> Bool
isSpace
parseToSlash :: Parser String
parseToSlash :: Parser String
parseToSlash = forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhileP (forall a. a -> Maybe a
Just String
"field value") (forall a. Eq a => a -> a -> Bool
/= Char
'\\')
entry :: Parser v -> Parser (String, String, Either String v)
entry :: forall v. Parser v -> Parser (String, String, Either String v)
entry Parser v
pv = do
Token String
_ <- forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'\\'
String
marker <- forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P (forall a. a -> Maybe a
Just String
"field name") (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace)
String
s <- Parser String
sc
Either String v
value <- case forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup String
marker Map String MDFLanguage
fieldLangs of
Just MDFLanguage
Vernacular -> forall a b. b -> Either a b
Right forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser v
pv
Maybe MDFLanguage
_ -> forall a b. a -> Either a b
Left forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser String
parseToSlash
forall (f :: * -> *) a. Applicative f => a -> f a
pure (String
marker, String
s, Either String v
value)
parseMDFRaw :: String -> Either (ParseErrorBundle String Void) (MDF String)
parseMDFRaw :: String -> Either (ParseErrorBundle String Void) (MDF String)
parseMDFRaw = forall e s a.
Parsec e s a -> String -> s -> Either (ParseErrorBundle s e) a
runParser (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall v. [(String, String, Either String v)] -> MDF v
MDF forall a b. (a -> b) -> a -> b
$ Parser String
sc forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many (forall v. Parser v -> Parser (String, String, Either String v)
entry Parser String
parseToSlash) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e s (m :: * -> *). MonadParsec e s m => m ()
eof) String
""
parseMDFWithTokenisation
:: [Grapheme]
-> String
-> Either (ParseErrorBundle String Void) (MDF [Component PWord])
parseMDFWithTokenisation :: [String]
-> String
-> Either (ParseErrorBundle String Void) (MDF [Component [String]])
parseMDFWithTokenisation (forall a. [[a]] -> [[a]]
sortByDescendingLength -> [String]
gs) =
forall e s a.
Parsec e s a -> String -> s -> Either (ParseErrorBundle s e) a
runParser (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall v. [(String, String, Either String v)] -> MDF v
MDF forall a b. (a -> b) -> a -> b
$ Parser String
sc forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ParsecT
Void
String
Identity
[(String, String, Either String [Component [String]])]
p forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e s (m :: * -> *). MonadParsec e s m => m ()
eof) String
""
where
p :: ParsecT
Void
String
Identity
[(String, String, Either String [Component [String]])]
p = forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many forall a b. (a -> b) -> a -> b
$ forall v. Parser v -> Parser (String, String, Either String v)
entry forall a b. (a -> b) -> a -> b
$ forall a.
ParsecT Void String Identity a
-> ParsecT Void String Identity [Component a]
componentsParser forall a b. (a -> b) -> a -> b
$ String -> [String] -> ParsecT Void String Identity [String]
wordParser String
"\\" [String]
gs
componentiseMDF :: MDF [Component a] -> [Component a]
componentiseMDF :: forall a. MDF [Component a] -> [Component a]
componentiseMDF = forall v. MDF v -> [(String, String, Either String v)]
unMDF forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap \case
(String
m, String
s, Left String
v) -> [forall a. String -> Component a
Separator (Char
'\\'forall a. a -> [a] -> [a]
:String
m forall a. [a] -> [a] -> [a]
++ String
s forall a. [a] -> [a] -> [a]
++ String
v)]
(String
m, String
s, Right [Component a]
v) -> forall a. String -> Component a
Separator (Char
'\\'forall a. a -> [a] -> [a]
:String
m forall a. [a] -> [a] -> [a]
++ String
s) forall a. a -> [a] -> [a]
: [Component a]
v
componentiseMDFWordsOnly :: MDF [Component a] -> [Component a]
componentiseMDFWordsOnly :: forall a. MDF [Component a] -> [Component a]
componentiseMDFWordsOnly = forall v. MDF v -> [(String, String, Either String v)]
unMDF forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap \case
(String
_, String
_, Right [Component a]
v) -> [Component a]
v
(String, String, Either String [Component a])
_ -> []
duplicateEtymologies
:: (v -> String)
-> MDF v -> MDF v
duplicateEtymologies :: forall v. (v -> String) -> MDF v -> MDF v
duplicateEtymologies v -> String
typeset = forall v. [(String, String, Either String v)] -> MDF v
MDF forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe v
-> Maybe String
-> [(String, String, Either String v)]
-> [(String, String, Either String v)]
go forall a. Maybe a
Nothing forall a. Maybe a
Nothing forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall v. MDF v -> [(String, String, Either String v)]
unMDF
where
mkEt :: Maybe v -> Maybe String -> [(String, String, Either String b)]
mkEt Maybe v
word Maybe String
gloss = forall {b}.
[(String, String, Either String b)]
-> [(String, String, Either String b)]
word' forall {b}. [(String, String, Either String b)]
gloss'
where
word' :: [(String, String, Either String b)]
-> [(String, String, Either String b)]
word' = case Maybe v
word of
Just v
et -> ((String
"et", String
" ", forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ v -> String
typeset v
et) forall a. a -> [a] -> [a]
:)
Maybe v
Nothing -> forall a. a -> a
id
gloss' :: [(String, String, Either String b)]
gloss' = case Maybe String
gloss of
Just String
eg -> [(String
"eg", String
" ", forall a b. a -> Either a b
Left String
eg)]
Maybe String
Nothing -> []
go :: Maybe v
-> Maybe String
-> [(String, String, Either String v)]
-> [(String, String, Either String v)]
go Maybe v
word Maybe String
gloss [] = forall {b}.
Maybe v -> Maybe String -> [(String, String, Either String b)]
mkEt Maybe v
word Maybe String
gloss
go Maybe v
word Maybe String
_ (f :: (String, String, Either String v)
f@(String
"ge", String
_, Left String
gloss'):[(String, String, Either String v)]
fs)
= (String, String, Either String v)
f forall a. a -> [a] -> [a]
: Maybe v
-> Maybe String
-> [(String, String, Either String v)]
-> [(String, String, Either String v)]
go Maybe v
word (forall a. a -> Maybe a
Just String
gloss') [(String, String, Either String v)]
fs
go Maybe v
word Maybe String
gloss (f :: (String, String, Either String v)
f@(String
m, String
_, Right v
word'):[(String, String, Either String v)]
fs)
| String
m forall a. Eq a => a -> a -> Bool
== String
"lx" Bool -> Bool -> Bool
|| String
m forall a. Eq a => a -> a -> Bool
== String
"se"
= forall {b}.
Maybe v -> Maybe String -> [(String, String, Either String b)]
mkEt Maybe v
word Maybe String
gloss forall a. [a] -> [a] -> [a]
++ (String, String, Either String v)
f forall a. a -> [a] -> [a]
: Maybe v
-> Maybe String
-> [(String, String, Either String v)]
-> [(String, String, Either String v)]
go (forall a. a -> Maybe a
Just v
word') forall a. Maybe a
Nothing [(String, String, Either String v)]
fs
go Maybe v
word Maybe String
gloss (f :: (String, String, Either String v)
f@(String
"dt", String
_, Either String v
_):[(String, String, Either String v)]
fs)
= forall {b}.
Maybe v -> Maybe String -> [(String, String, Either String b)]
mkEt Maybe v
word Maybe String
gloss forall a. [a] -> [a] -> [a]
++ (String, String, Either String v)
f forall a. a -> [a] -> [a]
: Maybe v
-> Maybe String
-> [(String, String, Either String v)]
-> [(String, String, Either String v)]
go forall a. Maybe a
Nothing forall a. Maybe a
Nothing [(String, String, Either String v)]
fs
go Maybe v
word Maybe String
gloss ((String, String, Either String v)
f:[(String, String, Either String v)]
fs) = (String, String, Either String v)
f forall a. a -> [a] -> [a]
: Maybe v
-> Maybe String
-> [(String, String, Either String v)]
-> [(String, String, Either String v)]
go Maybe v
word Maybe String
gloss [(String, String, Either String v)]
fs
data MDFLanguage = English | National | Regional | Vernacular | Other
deriving (MDFLanguage -> MDFLanguage -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: MDFLanguage -> MDFLanguage -> Bool
$c/= :: MDFLanguage -> MDFLanguage -> Bool
== :: MDFLanguage -> MDFLanguage -> Bool
$c== :: MDFLanguage -> MDFLanguage -> Bool
Eq, Int -> MDFLanguage -> ShowS
[MDFLanguage] -> ShowS
MDFLanguage -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [MDFLanguage] -> ShowS
$cshowList :: [MDFLanguage] -> ShowS
show :: MDFLanguage -> String
$cshow :: MDFLanguage -> String
showsPrec :: Int -> MDFLanguage -> ShowS
$cshowsPrec :: Int -> MDFLanguage -> ShowS
Show)
fieldLangs :: M.Map String MDFLanguage
fieldLangs :: Map String MDFLanguage
fieldLangs = forall k a. Ord k => [(k, a)] -> Map k a
M.fromList
[ (String
"1d" , MDFLanguage
Vernacular) , (String
"1e" , MDFLanguage
Vernacular) , (String
"1i" , MDFLanguage
Vernacular)
, (String
"1p" , MDFLanguage
Vernacular) , (String
"1s" , MDFLanguage
Vernacular) , (String
"2d" , MDFLanguage
Vernacular)
, (String
"2p" , MDFLanguage
Vernacular) , (String
"2s" , MDFLanguage
Vernacular) , (String
"3d" , MDFLanguage
Vernacular)
, (String
"3p" , MDFLanguage
Vernacular) , (String
"3s" , MDFLanguage
Vernacular) , (String
"4d" , MDFLanguage
Vernacular)
, (String
"4p" , MDFLanguage
Vernacular) , (String
"4s" , MDFLanguage
Vernacular) , (String
"a" , MDFLanguage
Vernacular)
, (String
"an" , MDFLanguage
Vernacular) , (String
"bb" , MDFLanguage
English) , (String
"bw" , MDFLanguage
English)
, (String
"ce" , MDFLanguage
English) , (String
"cf" , MDFLanguage
Vernacular) , (String
"cn" , MDFLanguage
National)
, (String
"cr" , MDFLanguage
National) , (String
"de" , MDFLanguage
English) , (String
"dn" , MDFLanguage
National)
, (String
"dr" , MDFLanguage
Regional) , (String
"dt" , MDFLanguage
Other) , (String
"dv" , MDFLanguage
Vernacular)
, (String
"ec" , MDFLanguage
English) , (String
"ee" , MDFLanguage
English) , (String
"eg" , MDFLanguage
English)
, (String
"en" , MDFLanguage
National) , (String
"er" , MDFLanguage
Regional) , (String
"es" , MDFLanguage
English)
, (String
"et" , MDFLanguage
Other)
, (String
"ev" , MDFLanguage
Vernacular) , (String
"ge" , MDFLanguage
English)
, (String
"gn" , MDFLanguage
National) , (String
"gr" , MDFLanguage
Regional) , (String
"gv" , MDFLanguage
Vernacular)
, (String
"hm" , MDFLanguage
English) , (String
"is" , MDFLanguage
English) , (String
"lc" , MDFLanguage
Vernacular)
, (String
"le" , MDFLanguage
English) , (String
"lf" , MDFLanguage
English) , (String
"ln" , MDFLanguage
National)
, (String
"lr" , MDFLanguage
Regional) , (String
"lt" , MDFLanguage
English) , (String
"lv" , MDFLanguage
Vernacular)
, (String
"lx" , MDFLanguage
Vernacular) , (String
"mn" , MDFLanguage
Vernacular) , (String
"mr" , MDFLanguage
Vernacular)
, (String
"na" , MDFLanguage
English) , (String
"nd" , MDFLanguage
English) , (String
"ng" , MDFLanguage
English)
, (String
"np" , MDFLanguage
English) , (String
"nq" , MDFLanguage
English) , (String
"ns" , MDFLanguage
English)
, (String
"nt" , MDFLanguage
English) , (String
"oe" , MDFLanguage
English) , (String
"on" , MDFLanguage
National)
, (String
"or" , MDFLanguage
Regional) , (String
"ov" , MDFLanguage
Vernacular) , (String
"pc" , MDFLanguage
English)
, (String
"pd" , MDFLanguage
English) , (String
"pde", MDFLanguage
English) , (String
"pdl", MDFLanguage
English)
, (String
"pdn", MDFLanguage
National) , (String
"pdr", MDFLanguage
Regional) , (String
"pdv", MDFLanguage
Vernacular)
, (String
"ph" , MDFLanguage
Other) , (String
"pl" , MDFLanguage
Vernacular) , (String
"pn" , MDFLanguage
National)
, (String
"ps" , MDFLanguage
English) , (String
"rd" , MDFLanguage
Vernacular) , (String
"re" , MDFLanguage
English)
, (String
"rf" , MDFLanguage
English) , (String
"rn" , MDFLanguage
National) , (String
"rr" , MDFLanguage
Regional)
, (String
"sc" , MDFLanguage
English) , (String
"sd" , MDFLanguage
English) , (String
"se" , MDFLanguage
Vernacular)
, (String
"sg" , MDFLanguage
Vernacular) , (String
"sn" , MDFLanguage
English) , (String
"so" , MDFLanguage
English)
, (String
"st" , MDFLanguage
English) , (String
"sy" , MDFLanguage
Vernacular) , (String
"tb" , MDFLanguage
English)
, (String
"th" , MDFLanguage
Vernacular) , (String
"u" , MDFLanguage
Vernacular) , (String
"ue" , MDFLanguage
English)
, (String
"un" , MDFLanguage
National) , (String
"ur" , MDFLanguage
Regional) , (String
"uv" , MDFLanguage
Vernacular)
, (String
"va" , MDFLanguage
Vernacular) , (String
"ve" , MDFLanguage
English) , (String
"vn" , MDFLanguage
National)
, (String
"vr" , MDFLanguage
Regional) , (String
"we" , MDFLanguage
English) , (String
"wn" , MDFLanguage
National)
, (String
"wr" , MDFLanguage
Regional) , (String
"xe" , MDFLanguage
English) , (String
"xn" , MDFLanguage
National)
, (String
"xr" , MDFLanguage
Regional) , (String
"xv" , MDFLanguage
Vernacular)
]