{-# LANGUAGE NamedFieldPuns #-}

-- |
-- Module      : Brassica.SFM.MDF
-- Copyright   : See LICENSE file
-- License     : BSD3
-- Maintainer  : Brad Neimann
--
-- This module contains types and functions for working with the MDF
-- dictionary format. For more on the MDF format, refer to e.g.
-- [Coward & Grimes (2000)](http://downloads.sil.org/legacy/shoebox/MDF_2000.pdf).
module Brassica.SFM.MDF where

import Brassica.SFM.SFM

import qualified Data.Map as M
import Brassica.SoundChange.Tokenise
import Brassica.SoundChange.Types (PWord)
import Text.Megaparsec (State(..), PosState (..), ParseErrorBundle, runParser')
import Text.Megaparsec.State (initialPosState)
import Data.Void (Void)
import Data.Char (isSpace)
import Data.List (dropWhileEnd)

-- | The designated language of an MDF field.
data MDFLanguage = English | National | Regional | Vernacular | Other
    deriving (MDFLanguage -> MDFLanguage -> Bool
(MDFLanguage -> MDFLanguage -> Bool)
-> (MDFLanguage -> MDFLanguage -> Bool) -> Eq MDFLanguage
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: MDFLanguage -> MDFLanguage -> Bool
== :: MDFLanguage -> MDFLanguage -> Bool
$c/= :: MDFLanguage -> MDFLanguage -> Bool
/= :: MDFLanguage -> MDFLanguage -> Bool
Eq, Int -> MDFLanguage -> ShowS
[MDFLanguage] -> ShowS
MDFLanguage -> String
(Int -> MDFLanguage -> ShowS)
-> (MDFLanguage -> String)
-> ([MDFLanguage] -> ShowS)
-> Show MDFLanguage
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> MDFLanguage -> ShowS
showsPrec :: Int -> MDFLanguage -> ShowS
$cshow :: MDFLanguage -> String
show :: MDFLanguage -> String
$cshowList :: [MDFLanguage] -> ShowS
showList :: [MDFLanguage] -> ShowS
Show)

-- | A 'M.Map' from the most common field markers to the language of
-- their values.
--
-- (Note: This is currently hardcoded in the source code, based on the
-- values in the MDF definitions from SIL Toolbox. The exception is
-- @\et@, which is assigned as 'Other' rather than
-- 'Vernacular'. There’s probably a more principled way of defining
-- this, but hardcoding should suffice for now.)
fieldLangs :: M.Map String MDFLanguage
fieldLangs :: Map String MDFLanguage
fieldLangs = [(String, MDFLanguage)] -> Map String MDFLanguage
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList
    [ (String
"1d" , MDFLanguage
Vernacular) , (String
"1e" , MDFLanguage
Vernacular) , (String
"1i" , MDFLanguage
Vernacular)
    , (String
"1p" , MDFLanguage
Vernacular) , (String
"1s" , MDFLanguage
Vernacular) , (String
"2d" , MDFLanguage
Vernacular)
    , (String
"2p" , MDFLanguage
Vernacular) , (String
"2s" , MDFLanguage
Vernacular) , (String
"3d" , MDFLanguage
Vernacular)
    , (String
"3p" , MDFLanguage
Vernacular) , (String
"3s" , MDFLanguage
Vernacular) , (String
"4d" , MDFLanguage
Vernacular)
    , (String
"4p" , MDFLanguage
Vernacular) , (String
"4s" , MDFLanguage
Vernacular) , (String
"a"  , MDFLanguage
Vernacular)
    , (String
"an" , MDFLanguage
Vernacular) , (String
"bb" , MDFLanguage
English)    , (String
"bw" , MDFLanguage
English)
    , (String
"ce" , MDFLanguage
English)    , (String
"cf" , MDFLanguage
Vernacular) , (String
"cn" , MDFLanguage
National)
    , (String
"cr" , MDFLanguage
National)   , (String
"de" , MDFLanguage
English)    , (String
"dn" , MDFLanguage
National)
    , (String
"dr" , MDFLanguage
Regional)   , (String
"dt" , MDFLanguage
Other)      , (String
"dv" , MDFLanguage
Vernacular)
    , (String
"ec" , MDFLanguage
English)    , (String
"ee" , MDFLanguage
English)    , (String
"eg" , MDFLanguage
English)
    , (String
"en" , MDFLanguage
National)   , (String
"er" , MDFLanguage
Regional)   , (String
"es" , MDFLanguage
English)
    , (String
"et" , MDFLanguage
Other)  {- defined as vernacular in SIL Toolbox, but by
                         definition it's really a different language -}
    , (String
"ev" , MDFLanguage
Vernacular) , (String
"ge" , MDFLanguage
English)
    , (String
"gn" , MDFLanguage
National)   , (String
"gr" , MDFLanguage
Regional)   , (String
"gv" , MDFLanguage
Vernacular)
    , (String
"hm" , MDFLanguage
English)    , (String
"is" , MDFLanguage
English)    , (String
"lc" , MDFLanguage
Vernacular)
    , (String
"le" , MDFLanguage
English)    , (String
"lf" , MDFLanguage
English)    , (String
"ln" , MDFLanguage
National)
    , (String
"lr" , MDFLanguage
Regional)   , (String
"lt" , MDFLanguage
English)    , (String
"lv" , MDFLanguage
Vernacular)
    , (String
"lx" , MDFLanguage
Vernacular) , (String
"mn" , MDFLanguage
Vernacular) , (String
"mr" , MDFLanguage
Vernacular)
    , (String
"na" , MDFLanguage
English)    , (String
"nd" , MDFLanguage
English)    , (String
"ng" , MDFLanguage
English)
    , (String
"np" , MDFLanguage
English)    , (String
"nq" , MDFLanguage
English)    , (String
"ns" , MDFLanguage
English)
    , (String
"nt" , MDFLanguage
English)    , (String
"oe" , MDFLanguage
English)    , (String
"on" , MDFLanguage
National)
    , (String
"or" , MDFLanguage
Regional)   , (String
"ov" , MDFLanguage
Vernacular) , (String
"pc" , MDFLanguage
English)
    , (String
"pd" , MDFLanguage
English)    , (String
"pde", MDFLanguage
English)    , (String
"pdl", MDFLanguage
English)
    , (String
"pdn", MDFLanguage
National)   , (String
"pdr", MDFLanguage
Regional)   , (String
"pdv", MDFLanguage
Vernacular)
    , (String
"ph" , MDFLanguage
Other)      , (String
"pl" , MDFLanguage
Vernacular) , (String
"pn" , MDFLanguage
National)
    , (String
"ps" , MDFLanguage
English)    , (String
"rd" , MDFLanguage
Vernacular) , (String
"re" , MDFLanguage
English)
    , (String
"rf" , MDFLanguage
English)    , (String
"rn" , MDFLanguage
National)   , (String
"rr" , MDFLanguage
Regional)
    , (String
"sc" , MDFLanguage
English)    , (String
"sd" , MDFLanguage
English)    , (String
"se" , MDFLanguage
Vernacular)
    , (String
"sg" , MDFLanguage
Vernacular) , (String
"sn" , MDFLanguage
English)    , (String
"so" , MDFLanguage
English)
    , (String
"st" , MDFLanguage
English)    , (String
"sy" , MDFLanguage
Vernacular) , (String
"tb" , MDFLanguage
English)
    , (String
"th" , MDFLanguage
Vernacular) , (String
"u"  , MDFLanguage
Vernacular) , (String
"ue" , MDFLanguage
English)
    , (String
"un" , MDFLanguage
National)   , (String
"ur" , MDFLanguage
Regional)   , (String
"uv" , MDFLanguage
Vernacular)
    , (String
"va" , MDFLanguage
Vernacular) , (String
"ve" , MDFLanguage
English)    , (String
"vn" , MDFLanguage
National)
    , (String
"vr" , MDFLanguage
Regional)   , (String
"we" , MDFLanguage
English)    , (String
"wn" , MDFLanguage
National)
    , (String
"wr" , MDFLanguage
Regional)   , (String
"xe" , MDFLanguage
English)    , (String
"xn" , MDFLanguage
National)
    , (String
"xr" , MDFLanguage
Regional)   , (String
"xv" , MDFLanguage
Vernacular)
    ]

-- | Standard MDF hierarchy, with @\lx@ > @\se@ > @\ps@ > @\sn@.
-- Intended for use with 'toTree'.
mdfHierarchy :: Hierarchy
mdfHierarchy :: Hierarchy
mdfHierarchy = [(String, String)] -> Hierarchy
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList
    [ (String
"1d", String
"ps"), (String
"1e", String
"ps"), (String
"1i", String
"ps"), (String
"1p", String
"ps"), (String
"1s", String
"ps")
    , (String
"2d", String
"ps"), (String
"2p", String
"ps"), (String
"2s", String
"ps"), (String
"3d", String
"ps"), (String
"3p", String
"ps")
    , (String
"3s", String
"ps"), (String
"4d", String
"ps"), (String
"4p", String
"ps"), (String
"4s", String
"ps"), (String
"a", String
"lx")
    , (String
"an", String
"sn"), (String
"bb", String
"sn"), (String
"bw", String
"se"), (String
"ce", String
"cf"), (String
"cf", String
"sn")
    , (String
"cn", String
"cf"), (String
"cr", String
"cf"), (String
"de", String
"sn"), (String
"dn", String
"sn"), (String
"dr", String
"sn")
    , (String
"dt", String
"lx"), (String
"dv", String
"sn"), (String
"ec", String
"et"), (String
"ee", String
"sn"), (String
"eg", String
"et")
    , (String
"en", String
"sn"), (String
"er", String
"sn"), (String
"es", String
"et"), (String
"et", String
"se"), (String
"ev", String
"sn")
    , (String
"ge", String
"sn"), (String
"gn", String
"sn"), (String
"gr", String
"sn"), (String
"gv", String
"sn"), (String
"hm", String
"lx")
    , (String
"is", String
"sn"), (String
"lc", String
"lx"), (String
"le", String
"lv"), (String
"lf", String
"sn"), (String
"ln", String
"lv")
    , (String
"lr", String
"lv"), (String
"lt", String
"sn"), (String
"lv", String
"lf"), (String
"mn", String
"se"), (String
"mr", String
"se")
    , (String
"na", String
"sn"), (String
"nd", String
"sn"), (String
"ng", String
"sn"), (String
"np", String
"sn"), (String
"nq", String
"sn")
    , (String
"ns", String
"sn"), (String
"nt", String
"sn"), (String
"oe", String
"sn"), (String
"on", String
"sn"), (String
"or", String
"sn")
    , (String
"ov", String
"sn"), (String
"pc", String
"sn"), (String
"pd", String
"ps"), (String
"pde", String
"pdl")
    , (String
"pdl", String
"pd"), (String
"pdn", String
"pdl"), (String
"pdr", String
"pdl"), (String
"pdv", String
"pdl")
    , (String
"ph", String
"se"), (String
"pl", String
"ps"), (String
"pn", String
"ps"), (String
"ps", String
"se"), (String
"rd", String
"ps")
    , (String
"re", String
"sn"), (String
"rf", String
"sn"), (String
"rn", String
"sn"), (String
"rr", String
"sn"), (String
"sc", String
"sn")
    , (String
"sd", String
"sn"), (String
"se", String
"lx"), (String
"sg", String
"ps"), (String
"sn", String
"ps"), (String
"so", String
"sn")
    , (String
"st", String
"lx"), (String
"sy", String
"sn"), (String
"tb", String
"sn"), (String
"th", String
"sn"), (String
"u", String
"lx")
    , (String
"ue", String
"sn"), (String
"un", String
"sn"), (String
"ur", String
"sn"), (String
"uv", String
"sn"), (String
"va", String
"sn")
    , (String
"ve", String
"va"), (String
"vn", String
"va"), (String
"vr", String
"va"), (String
"we", String
"sn"), (String
"wn", String
"sn")
    , (String
"wr", String
"sn"), (String
"xe", String
"xv"), (String
"xn", String
"xv"), (String
"xr", String
"xv"), (String
"xv", String
"rf")
    ]

-- | Alternate MDF hierarchy, with @\lx@ > @\sn@ > @\se@ > @\ps@.
-- Intended for use with 'toTree'.
mdfAlternateHierarchy :: Hierarchy
mdfAlternateHierarchy :: Hierarchy
mdfAlternateHierarchy = [(String, String)] -> Hierarchy
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList
    [ (String
"1d", String
"ps"), (String
"1e", String
"ps"), (String
"1i", String
"ps"), (String
"1p", String
"ps"), (String
"1s", String
"ps")
    , (String
"2d", String
"ps"), (String
"2p", String
"ps"), (String
"2s", String
"ps"), (String
"3d", String
"ps"), (String
"3p", String
"ps")
    , (String
"3s", String
"ps"), (String
"4d", String
"ps"), (String
"4p", String
"ps"), (String
"4s", String
"ps")
    , (String
"an", String
"ps"), (String
"bb", String
"ps"), (String
"bw", String
"se"), (String
"ce", String
"cf"), (String
"cf", String
"ps")
    , (String
"cn", String
"cf"), (String
"cr", String
"cf"), (String
"de", String
"ps"), (String
"dn", String
"ps"), (String
"dr", String
"ps")
    , (String
"dt", String
"lx"), (String
"dv", String
"ps"), (String
"ec", String
"et"), (String
"ee", String
"ps"), (String
"eg", String
"et")
    , (String
"en", String
"ps"), (String
"er", String
"ps"), (String
"es", String
"et"), (String
"et", String
"se"), (String
"ev", String
"ps")
    , (String
"ge", String
"ps"), (String
"gn", String
"ps"), (String
"gr", String
"ps"), (String
"gv", String
"ps"), (String
"hm", String
"lx")
    , (String
"is", String
"ps"), (String
"lc", String
"lx"), (String
"le", String
"lv"), (String
"lf", String
"ps"), (String
"ln", String
"lv")
    , (String
"lr", String
"lv"), (String
"lt", String
"ps"), (String
"lv", String
"lf"), (String
"mn", String
"se"), (String
"mr", String
"se")
    , (String
"na", String
"ps"), (String
"nd", String
"ps"), (String
"ng", String
"ps"), (String
"np", String
"ps"), (String
"nq", String
"ps")
    , (String
"ns", String
"ps"), (String
"nt", String
"ps"), (String
"oe", String
"ps"), (String
"on", String
"ps"), (String
"or", String
"ps")
    , (String
"ov", String
"ps"), (String
"pc", String
"ps"), (String
"pd", String
"ps"), (String
"pde", String
"pdl")
    , (String
"pdl", String
"pd"), (String
"pdn", String
"pdl"), (String
"pdr", String
"pdl"), (String
"pdv", String
"pdl")
    , (String
"ph", String
"se"), (String
"pl", String
"ps"), (String
"pn", String
"ps"), (String
"ps", String
"se"), (String
"rd", String
"ps")
    , (String
"re", String
"ps"), (String
"rf", String
"ps"), (String
"rn", String
"ps"), (String
"rr", String
"ps"), (String
"sc", String
"ps")
    , (String
"sd", String
"ps"), (String
"se", String
"sn"), (String
"sg", String
"ps"), (String
"sn", String
"lx"), (String
"so", String
"ps")
    , (String
"st", String
"lx"), (String
"sy", String
"ps"), (String
"tb", String
"ps"), (String
"th", String
"ps")
    , (String
"ue", String
"ps"), (String
"un", String
"ps"), (String
"ur", String
"ps"), (String
"uv", String
"ps"), (String
"va", String
"se")
    , (String
"ve", String
"va"), (String
"vn", String
"va"), (String
"vr", String
"va"), (String
"we", String
"ps"), (String
"wn", String
"ps")
    , (String
"wr", String
"ps"), (String
"xe", String
"xv"), (String
"xn", String
"xv"), (String
"xr", String
"xv"), (String
"xv", String
"rf")
    ]

-- | Convert an 'SFM' document to a list of 'Component's representing
-- the same textual content. 'Vernacular' field values are tokenised as
-- if using 'tokeniseWords'; everything else is treated as a
-- 'Separator', so that it is not disturbed by operations such as rule
-- application or rendering to text.
--
-- (This is a simple wrapper around 'tokeniseField'.)
tokeniseMDF
    :: [String]  -- ^ List of available multigraphs (as with 'tokeniseWord')
    -> SFM -> Either (ParseErrorBundle String Void) [Component PWord]
tokeniseMDF :: [String]
-> SFM
-> Either (ParseErrorBundle String Void) [Component [String]]
tokeniseMDF [String]
gs = ([[Component [String]]] -> [Component [String]])
-> Either (ParseErrorBundle String Void) [[Component [String]]]
-> Either (ParseErrorBundle String Void) [Component [String]]
forall a b.
(a -> b)
-> Either (ParseErrorBundle String Void) a
-> Either (ParseErrorBundle String Void) b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [[Component [String]]] -> [Component [String]]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (Either (ParseErrorBundle String Void) [[Component [String]]]
 -> Either (ParseErrorBundle String Void) [Component [String]])
-> (SFM
    -> Either (ParseErrorBundle String Void) [[Component [String]]])
-> SFM
-> Either (ParseErrorBundle String Void) [Component [String]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Field
 -> Either (ParseErrorBundle String Void) [Component [String]])
-> SFM
-> Either (ParseErrorBundle String Void) [[Component [String]]]
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
forall (f :: * -> *) a b.
Applicative f =>
(a -> f b) -> [a] -> f [b]
traverse ([String]
-> Field
-> Either (ParseErrorBundle String Void) [Component [String]]
tokeniseField [String]
gs)

-- | Like 'tokeniseMDF', but for a single 'Field' rather than a whole
-- SFM file.
tokeniseField :: [String] -> Field -> Either (ParseErrorBundle String Void) [Component PWord]
tokeniseField :: [String]
-> Field
-> Either (ParseErrorBundle String Void) [Component [String]]
tokeniseField [String]
gs Field
f = case String -> Map String MDFLanguage -> Maybe MDFLanguage
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup (Field -> String
fieldMarker Field
f) Map String MDFLanguage
fieldLangs of
    Just MDFLanguage
Vernacular ->
        -- initialise megaparsec state with position starting at given
        -- field
        let ps :: PosState String
ps = String -> String -> PosState String
forall s. String -> s -> PosState s
initialPosState String
"" (Field -> String
fieldValue Field
f)
            s :: State String e
s = State
                { stateInput :: String
stateInput = Field -> String
fieldValue Field
f
                , stateOffset :: Int
stateOffset = Int
0
                , statePosState :: PosState String
statePosState = case Field -> Maybe SourcePos
fieldSourcePos Field
f of
                    Maybe SourcePos
Nothing -> PosState String
ps
                    Just SourcePos
sp -> PosState String
ps { pstateSourcePos = sp }
                , stateParseErrors :: [ParseError String e]
stateParseErrors = []
                }
        in case Parsec Void String [Component [String]]
-> State String Void
-> (State String Void,
    Either (ParseErrorBundle String Void) [Component [String]])
forall e s a.
Parsec e s a
-> State s e -> (State s e, Either (ParseErrorBundle s e) a)
runParser' (ParsecT Void String Identity [String]
-> Parsec Void String [Component [String]]
forall a.
ParsecT Void String Identity a
-> ParsecT Void String Identity [Component a]
componentsParser (ParsecT Void String Identity [String]
 -> Parsec Void String [Component [String]])
-> ParsecT Void String Identity [String]
-> Parsec Void String [Component [String]]
forall a b. (a -> b) -> a -> b
$ String -> [String] -> ParsecT Void String Identity [String]
wordParser String
"[" [String]
gs) State String Void
forall {e}. State String e
s of
            (State String Void
_, Right [Component [String]]
cs) -> [Component [String]]
-> Either (ParseErrorBundle String Void) [Component [String]]
forall a b. b -> Either a b
Right ([Component [String]]
 -> Either (ParseErrorBundle String Void) [Component [String]])
-> [Component [String]]
-> Either (ParseErrorBundle String Void) [Component [String]]
forall a b. (a -> b) -> a -> b
$ String -> Component [String]
forall a. String -> Component a
Separator (Char
'\\' Char -> ShowS
forall a. a -> [a] -> [a]
: Field -> String
fieldMarker Field
f String -> ShowS
forall a. [a] -> [a] -> [a]
++ Field -> String
fieldWhitespace Field
f) Component [String] -> [Component [String]] -> [Component [String]]
forall a. a -> [a] -> [a]
: [Component [String]]
cs
            (State String Void
_, Left ParseErrorBundle String Void
err) -> ParseErrorBundle String Void
-> Either (ParseErrorBundle String Void) [Component [String]]
forall a b. a -> Either a b
Left ParseErrorBundle String Void
err

    Maybe MDFLanguage
_ -> [Component [String]]
-> Either (ParseErrorBundle String Void) [Component [String]]
forall a b. b -> Either a b
Right [String -> Component [String]
forall a. String -> Component a
Separator (String -> Component [String]) -> String -> Component [String]
forall a b. (a -> b) -> a -> b
$ Char
'\\' Char -> ShowS
forall a. a -> [a] -> [a]
: Field -> String
fieldMarker Field
f String -> ShowS
forall a. [a] -> [a] -> [a]
++ Field -> String
fieldWhitespace Field
f String -> ShowS
forall a. [a] -> [a] -> [a]
++ Field -> String
fieldValue Field
f]

-- | Add etymological fields to an MDF file by duplicating the values in
-- @\lx@, @\se@ and @\ge@ fields. e.g.:
--
-- > \lx kapa
-- > \ps n
-- > \ge parent
-- > \se sakapa
-- > \ge father
--
-- Would become:
--
-- > \lx kapa
-- > \ps n
-- > \ge parent
-- > \et kapa
-- > \eg parent
-- > \se sakapa
-- > \ge father
-- > \et sakapa
-- > \eg father
--
-- This can be helpful when applying sound changes to an MDF file: the
-- vernacular words can be copied as etymologies, and then the sound
-- changes can be applied leaving the etymologies as is.
--
-- Note that the hierarchy must already be resolved before this
-- function can be used, as it depends on the tree structure to know
-- where the etymologies should be placed.
duplicateEtymologies
    :: (String -> String)
    -- ^ Transformation to apply to etymologies, e.g. @('*':)@
    -> SFMTree
    -> SFMTree
duplicateEtymologies :: ShowS -> SFMTree -> SFMTree
duplicateEtymologies ShowS
f = Maybe String -> Maybe String -> SFMTree -> SFMTree
go Maybe String
forall a. Maybe a
Nothing Maybe String
forall a. Maybe a
Nothing
  where
    -- strategy: find each \se (implicit or explicit) with its \ge
    -- and make an \et under it
    go :: Maybe String -> Maybe String -> SFMTree -> SFMTree
go Maybe String
lx Maybe String
gl (Root [SFMTree]
ts) = [SFMTree] -> SFMTree
Root ([SFMTree] -> SFMTree) -> [SFMTree] -> SFMTree
forall a b. (a -> b) -> a -> b
$ Maybe String -> Maybe String -> SFMTree -> SFMTree
go Maybe String
lx Maybe String
gl (SFMTree -> SFMTree) -> [SFMTree] -> [SFMTree]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [SFMTree]
ts
    go Maybe String
_lx Maybe String
gl t :: SFMTree
t@(Filled m :: Field
m@(Field { fieldMarker :: Field -> String
fieldMarker=String
"lx", String
fieldValue :: Field -> String
fieldValue :: String
fieldValue }) [SFMTree]
ts) =
        let lx :: Maybe String
lx = String -> Maybe String
forall a. a -> Maybe a
Just String
fieldValue
            gl' :: Maybe String
gl' = case (Field -> Maybe String) -> SFMTree -> [String]
forall a. (Field -> Maybe a) -> SFMTree -> [a]
searchField Field -> Maybe String
isGloss SFMTree
t of
                String
gl'':[String]
_ -> String -> Maybe String
forall a. a -> Maybe a
Just String
gl''
                [String]
_ -> Maybe String
gl
        in Field -> [SFMTree] -> SFMTree
Filled Field
m ([SFMTree] -> SFMTree) -> [SFMTree] -> SFMTree
forall a b. (a -> b) -> a -> b
$ Maybe String -> Maybe String -> SFMTree -> SFMTree
go Maybe String
lx Maybe String
gl' (SFMTree -> SFMTree) -> [SFMTree] -> [SFMTree]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [SFMTree]
ts
    go Maybe String
_lx Maybe String
gl t :: SFMTree
t@(Filled m :: Field
m@(Field { fieldMarker :: Field -> String
fieldMarker=String
"se", String
fieldValue :: Field -> String
fieldValue :: String
fieldValue }) [SFMTree]
ts) =
        let lx :: Maybe String
lx = String -> Maybe String
forall a. a -> Maybe a
Just String
fieldValue
            gl' :: Maybe String
gl' = case (Field -> Maybe String) -> SFMTree -> [String]
forall a. (Field -> Maybe a) -> SFMTree -> [a]
searchField Field -> Maybe String
isGloss SFMTree
t of
                String
gl'':[String]
_ -> String -> Maybe String
forall a. a -> Maybe a
Just String
gl''
                [String]
_ -> Maybe String
gl
        in Field -> [SFMTree] -> SFMTree
Filled Field
m ([SFMTree] -> SFMTree) -> [SFMTree] -> SFMTree
forall a b. (a -> b) -> a -> b
$ [SFMTree]
ts [SFMTree] -> [SFMTree] -> [SFMTree]
forall a. [a] -> [a] -> [a]
++ Maybe String -> Maybe String -> [SFMTree]
mkEt Maybe String
lx Maybe String
gl'
    go Maybe String
lx Maybe String
gl (Filled Field
m [SFMTree]
ts) = Field -> [SFMTree] -> SFMTree
Filled Field
m ([SFMTree] -> SFMTree) -> [SFMTree] -> SFMTree
forall a b. (a -> b) -> a -> b
$ Maybe String -> Maybe String -> SFMTree -> SFMTree
go Maybe String
lx Maybe String
gl (SFMTree -> SFMTree) -> [SFMTree] -> [SFMTree]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [SFMTree]
ts
    go Maybe String
lx Maybe String
gl (Missing String
"se" [SFMTree]
ts) = String -> [SFMTree] -> SFMTree
Missing String
"se" ([SFMTree] -> SFMTree) -> [SFMTree] -> SFMTree
forall a b. (a -> b) -> a -> b
$ [SFMTree]
ts [SFMTree] -> [SFMTree] -> [SFMTree]
forall a. [a] -> [a] -> [a]
++ Maybe String -> Maybe String -> [SFMTree]
mkEt Maybe String
lx Maybe String
gl
    go Maybe String
lx Maybe String
gl (Missing String
m [SFMTree]
ts) = String -> [SFMTree] -> SFMTree
Missing String
m ([SFMTree] -> SFMTree) -> [SFMTree] -> SFMTree
forall a b. (a -> b) -> a -> b
$ Maybe String -> Maybe String -> SFMTree -> SFMTree
go Maybe String
lx Maybe String
gl (SFMTree -> SFMTree) -> [SFMTree] -> [SFMTree]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [SFMTree]
ts

    isGloss :: Field -> Maybe String
isGloss Field{String
fieldMarker :: Field -> String
fieldMarker :: String
fieldMarker,String
fieldValue :: Field -> String
fieldValue :: String
fieldValue}
        | String
fieldMarker String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"ge" = String -> Maybe String
forall a. a -> Maybe a
Just String
fieldValue
        | Bool
otherwise = Maybe String
forall a. Maybe a
Nothing

    mkEt :: Maybe String -> Maybe String -> [SFMTree]
    mkEt :: Maybe String -> Maybe String -> [SFMTree]
mkEt Maybe String
Nothing Maybe String
_ = []  -- can't make etymology without lexeme
    mkEt (Just String
lx) Maybe String
gl = SFMTree -> [SFMTree]
forall a. a -> [a]
forall (f :: * -> *) a. Applicative f => a -> f a
pure (SFMTree -> [SFMTree]) -> SFMTree -> [SFMTree]
forall a b. (a -> b) -> a -> b
$
        Field -> [SFMTree] -> SFMTree
Filled Field
            { fieldMarker :: String
fieldMarker = String
"et"
            , fieldWhitespace :: String
fieldWhitespace = String
" "
            , fieldSourcePos :: Maybe SourcePos
fieldSourcePos = Maybe SourcePos
forall a. Maybe a
Nothing
            , fieldValue :: String
fieldValue = ShowS
ensureNewline ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ ShowS
f ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ ShowS
trim String
lx
            }
        ([SFMTree] -> SFMTree) -> [SFMTree] -> SFMTree
forall a b. (a -> b) -> a -> b
$ case Maybe String
gl of
              Maybe String
Nothing -> []
              Just String
gl' ->
                  [ Field -> [SFMTree] -> SFMTree
Filled Field
                      { fieldMarker :: String
fieldMarker = String
"eg"
                      , fieldWhitespace :: String
fieldWhitespace = String
" "
                      , fieldSourcePos :: Maybe SourcePos
fieldSourcePos = Maybe SourcePos
forall a. Maybe a
Nothing
                      , fieldValue :: String
fieldValue = ShowS
ensureNewline String
gl'
                      } []
                  ]

    trim :: ShowS
trim = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhileEnd Char -> Bool
isSpace

    ensureNewline :: ShowS
ensureNewline String
s
        | String -> Char
forall a. HasCallStack => [a] -> a
last String
s Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n' = String
s
        | Bool
otherwise = String
s String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"\n"