{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE LambdaCase #-}

-- |

-- Module      : Brassica.SFM.SFM

-- Copyright   : See LICENSE file

-- License     : BSD3

-- Maintainer  : Brad Neimann

--

-- This module implements basic support for the SIL Standard Format

-- Marker (SFM) format, used by dictionary software such as

-- [FieldWorks](https://software.sil.org/fieldworks/). This format forms

-- the basis of standards such as Multi-Dictionary Formatter (MDF),

-- implemented here in @Brassica.SFM.MDF@.

module Brassica.SFM.SFM
       ( -- * Linear SFM documents

         Field(..)
       , SFM
       , parseSFM
       , exactPrintField
       , exactPrintSFM
       , stripSourcePos
         -- * Hierarchies

       , Hierarchy
       , SFMTree(..)
       , toTree
       , fromTree
       , mapField
       , searchField
       ) where

import Data.Char (isSpace)
import Data.Maybe (fromMaybe)
import Data.Void (Void)

import Text.Megaparsec
import Text.Megaparsec.Char

import qualified Data.Map as M

-- | A single field of an SFM file.

data Field = Field
    { Field -> String
fieldMarker :: String
    -- ^ The field marker, ommitting the initial backslash

    , Field -> String
fieldWhitespace :: String
    -- ^ Whitespace after the field marker

    , Field -> Maybe SourcePos
fieldSourcePos :: Maybe SourcePos
    -- ^ Optionally, a Megaparsec 'SourcePos' marking the start of the value

    -- (to enable further parsing)

    , Field -> String
fieldValue :: String
    -- ^ The value of the field, including all whitespace until the next marker

    } deriving (Int -> Field -> ShowS
[Field] -> ShowS
Field -> String
(Int -> Field -> ShowS)
-> (Field -> String) -> ([Field] -> ShowS) -> Show Field
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Field -> ShowS
showsPrec :: Int -> Field -> ShowS
$cshow :: Field -> String
show :: Field -> String
$cshowList :: [Field] -> ShowS
showList :: [Field] -> ShowS
Show)

-- | An SFM file, being a list of fields.

type SFM = [Field]

-- | Set the 'fieldSourcePos' of a 'Field' to 'Nothing'. Useful for

-- making debug output shorter.

stripSourcePos :: Field -> Field
stripSourcePos :: Field -> Field
stripSourcePos Field
f = Field
f { fieldSourcePos = Nothing }

type Parser = Parsec Void String

sc :: Parser String
sc :: Parser String
sc = (Maybe String -> String)
-> ParsecT Void String Identity (Maybe String) -> Parser String
forall a b.
(a -> b)
-> ParsecT Void String Identity a -> ParsecT Void String Identity b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (String -> Maybe String -> String
forall a. a -> Maybe a -> a
fromMaybe String
"") (ParsecT Void String Identity (Maybe String) -> Parser String)
-> ParsecT Void String Identity (Maybe String) -> Parser String
forall a b. (a -> b) -> a -> b
$ Parser String -> ParsecT Void String Identity (Maybe String)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (Parser String -> ParsecT Void String Identity (Maybe String))
-> Parser String -> ParsecT Void String Identity (Maybe String)
forall a b. (a -> b) -> a -> b
$
    Maybe String
-> (Token String -> Bool)
-> ParsecT Void String Identity (Tokens String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P (String -> Maybe String
forall a. a -> Maybe a
Just String
"white space") (Bool -> Bool -> Bool
(&&) (Bool -> Bool -> Bool)
-> (Token String -> Bool) -> Token String -> Bool -> Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Bool
Token String -> Bool
isSpace (Token String -> Bool -> Bool)
-> (Token String -> Bool) -> Token String -> Bool
forall a b.
(Token String -> a -> b)
-> (Token String -> a) -> Token String -> b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'\n'))

-- | Parse until the next backslash at the beginning of a line.

parseFieldValue :: Parser String
parseFieldValue :: Parser String
parseFieldValue = do
    String
val <- Maybe String
-> (Token String -> Bool)
-> ParsecT Void String Identity (Tokens String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhileP Maybe String
forall a. Maybe a
Nothing (Token String -> Token String -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
Token String
'\n')
    ParsecT Void String Identity Char
-> ParsecT
     Void String Identity (Either (ParseError String Void) Char)
forall a.
ParsecT Void String Identity a
-> ParsecT Void String Identity (Either (ParseError String Void) a)
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> m (Either (ParseError s e) a)
observing (Token String -> ParsecT Void String Identity (Token String)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token String
'\n') ParsecT Void String Identity (Either (ParseError String Void) Char)
-> (Either (ParseError String Void) Char -> Parser String)
-> Parser String
forall a b.
ParsecT Void String Identity a
-> (a -> ParsecT Void String Identity b)
-> ParsecT Void String Identity b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
        Left ParseError String Void
_ -> String
val String -> ParsecT Void String Identity () -> Parser String
forall a b.
a
-> ParsecT Void String Identity b -> ParsecT Void String Identity a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ParsecT Void String Identity ()
forall e s (m :: * -> *). MonadParsec e s m => m ()
eof
        Right Char
_ -> do
            let val' :: String
val' = String
valString -> ShowS
forall a. [a] -> [a] -> [a]
++String
"\n"
            -- parse more lines if no following backslash

            (ParsecT Void String Identity Char
-> ParsecT Void String Identity ()
forall a.
ParsecT Void String Identity a -> ParsecT Void String Identity ()
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m ()
notFollowedBy (Token String -> ParsecT Void String Identity (Token String)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token String
'\\') ParsecT Void String Identity () -> Parser String -> Parser String
forall a b.
ParsecT Void String Identity a
-> ParsecT Void String Identity b -> ParsecT Void String Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ((String
val'String -> ShowS
forall a. [a] -> [a] -> [a]
++) ShowS -> Parser String -> Parser String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser String
parseFieldValue))
                Parser String -> Parser String -> Parser String
forall a.
ParsecT Void String Identity a
-> ParsecT Void String Identity a -> ParsecT Void String Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> String -> Parser String
forall a. a -> ParsecT Void String Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure String
val'

entry :: Parser (String, String, SourcePos, String)
entry :: Parser (String, String, SourcePos, String)
entry = do
    Char
_ <- Token String -> ParsecT Void String Identity (Token String)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token String
'\\'
    String
marker <- Maybe String
-> (Token String -> Bool)
-> ParsecT Void String Identity (Tokens String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P (String -> Maybe String
forall a. a -> Maybe a
Just String
"field name") (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace)
    String
s <- Parser String
sc
    SourcePos
ps <- ParsecT Void String Identity SourcePos
forall s e (m :: * -> *).
(TraversableStream s, MonadParsec e s m) =>
m SourcePos
getSourcePos
    String
value <- Parser String
parseFieldValue
    (String, String, SourcePos, String)
-> Parser (String, String, SourcePos, String)
forall a. a -> ParsecT Void String Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (String
marker, String
s, SourcePos
ps, String
value)

-- | Parse an SFM file to an 'SFM' value.

parseSFM
    :: String  -- ^ Name of source file

    -> String  -- ^ Input SFM data to parse

    -> Either (ParseErrorBundle String Void) SFM
parseSFM :: String -> String -> Either (ParseErrorBundle String Void) [Field]
parseSFM = Parsec Void String [Field]
-> String
-> String
-> Either (ParseErrorBundle String Void) [Field]
forall e s a.
Parsec e s a -> String -> s -> Either (ParseErrorBundle s e) a
runParser (Parser String
sc Parser String
-> Parsec Void String [Field] -> Parsec Void String [Field]
forall a b.
ParsecT Void String Identity a
-> ParsecT Void String Identity b -> ParsecT Void String Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ParsecT Void String Identity Field -> Parsec Void String [Field]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many ((String, String, SourcePos, String) -> Field
toField ((String, String, SourcePos, String) -> Field)
-> Parser (String, String, SourcePos, String)
-> ParsecT Void String Identity Field
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser (String, String, SourcePos, String)
entry) Parsec Void String [Field]
-> ParsecT Void String Identity () -> Parsec Void String [Field]
forall a b.
ParsecT Void String Identity a
-> ParsecT Void String Identity b -> ParsecT Void String Identity a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void String Identity ()
forall e s (m :: * -> *). MonadParsec e s m => m ()
eof)
  where
    toField :: (String, String, SourcePos, String) -> Field
toField (String
f, String
s, SourcePos
p, String
v) = String -> String -> Maybe SourcePos -> String -> Field
Field String
f String
s (SourcePos -> Maybe SourcePos
forall a. a -> Maybe a
Just SourcePos
p) String
v

-- | Print a single field as 'String'.

exactPrintField :: Field -> String
exactPrintField :: Field -> String
exactPrintField Field
f = Char
'\\' Char -> ShowS
forall a. a -> [a] -> [a]
: Field -> String
fieldMarker Field
f String -> ShowS
forall a. [a] -> [a] -> [a]
++ Field -> String
fieldWhitespace Field
f String -> ShowS
forall a. [a] -> [a] -> [a]
++ Field -> String
fieldValue Field
f

-- | Given an 'SFM', reconstruct the original file. A trivial wrapper

-- around 'exactPrintField'.

exactPrintSFM :: SFM -> String
exactPrintSFM :: [Field] -> String
exactPrintSFM = (Field -> String) -> [Field] -> String
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Field -> String
exactPrintField

-- | Rose tree describing a hierarchical SFM document.

data SFMTree
    = Root [SFMTree]             -- ^ Root node

    | Filled Field [SFMTree]     -- ^ A 'Field' with zero or more children.

    | Missing String [SFMTree]
    -- ^ A missing level of the hierarchy: a marker which is inferred

    -- from the presence of its children.

    deriving (Int -> SFMTree -> ShowS
[SFMTree] -> ShowS
SFMTree -> String
(Int -> SFMTree -> ShowS)
-> (SFMTree -> String) -> ([SFMTree] -> ShowS) -> Show SFMTree
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> SFMTree -> ShowS
showsPrec :: Int -> SFMTree -> ShowS
$cshow :: SFMTree -> String
show :: SFMTree -> String
$cshowList :: [SFMTree] -> ShowS
showList :: [SFMTree] -> ShowS
Show)

-- | The hierarchy underlying an SFM document, defined as a map from

-- field names to their parents. Fields which are absent from the map

-- are treated as roots.

type Hierarchy = M.Map String String

-- | Returns the full hierarchy of a marker, starting with its

-- immediate parent and finishing with the root.

hierarchyFor :: Hierarchy -> String -> [String]
hierarchyFor :: Hierarchy -> String -> [String]
hierarchyFor Hierarchy
h = String -> [String]
go
  where
    go :: String -> [String]
    go :: String -> [String]
go String
m = case String -> Hierarchy -> Maybe String
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup String
m Hierarchy
h of
        Just String
m' -> String
m' String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> [String]
go String
m'
        Maybe String
Nothing -> []

(<+:>) :: SFMTree -> SFMTree -> SFMTree
(Root [SFMTree]
s) <+:> :: SFMTree -> SFMTree -> SFMTree
<+:> SFMTree
t = [SFMTree] -> SFMTree
Root ([SFMTree]
s [SFMTree] -> [SFMTree] -> [SFMTree]
forall a. [a] -> [a] -> [a]
++ [SFMTree
t])
(Filled Field
f [SFMTree]
s) <+:> SFMTree
t = Field -> [SFMTree] -> SFMTree
Filled Field
f ([SFMTree]
s [SFMTree] -> [SFMTree] -> [SFMTree]
forall a. [a] -> [a] -> [a]
++ [SFMTree
t])
(Missing String
f [SFMTree]
s) <+:> SFMTree
t = String -> [SFMTree] -> SFMTree
Missing String
f ([SFMTree]
s [SFMTree] -> [SFMTree] -> [SFMTree]
forall a. [a] -> [a] -> [a]
++ [SFMTree
t])

-- | Generate a tree structure from an 'SFM' document according to the

-- given 'Hierarchy'. Fields are converted to 'Filled' nodes,

-- containing as many following nodes as possible, until the next node

-- which is at the same level of the hierarchy or lower. 'Missing'

-- nodes are created for any missing levels of the hierarchy.

toTree :: Hierarchy -> SFM -> SFMTree
toTree :: Hierarchy -> [Field] -> SFMTree
toTree Hierarchy
h = (SFMTree, [Field]) -> SFMTree
forall a b. (a, b) -> a
fst ((SFMTree, [Field]) -> SFMTree)
-> ([Field] -> (SFMTree, [Field])) -> [Field] -> SFMTree
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SFMTree -> [Field] -> (SFMTree, [Field])
go ([SFMTree] -> SFMTree
Root [])
  where
    go :: SFMTree -> SFM -> (SFMTree, SFM)
    go :: SFMTree -> [Field] -> (SFMTree, [Field])
go SFMTree
t [] = (SFMTree
t, [])
    go s :: SFMTree
s@(Root [SFMTree]
_) (Field
f:[Field]
fs) =
        let (SFMTree
subtree, [Field]
fs') = SFMTree -> [Field] -> (SFMTree, [Field])
go (Field -> [SFMTree] -> SFMTree
Filled Field
f []) [Field]
fs
        in SFMTree -> [Field] -> (SFMTree, [Field])
go (SFMTree
s SFMTree -> SFMTree -> SFMTree
<+:> SFMTree
subtree) [Field]
fs'
    go SFMTree
s (Field
f:[Field]
fs) =
        let parentMarker :: String
parentMarker = case SFMTree
s of
                Filled (Field{fieldMarker :: Field -> String
fieldMarker=String
m}) [SFMTree]
_ -> String
m
                Missing String
m [SFMTree]
_ -> String
m
            hierarchy :: [String]
hierarchy = Hierarchy -> String -> [String]
hierarchyFor Hierarchy
h (Field -> String
fieldMarker Field
f)
        in case (String -> Bool) -> [String] -> ([String], [String])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (String -> String -> Bool
forall a. Eq a => a -> a -> Bool
==String
parentMarker) [String]
hierarchy of
            -- this marker is unrelated to parentMarker, need to

            -- rewind back up the tree and try again

            ([String]
_, []) -> (SFMTree
s, Field
fField -> [Field] -> [Field]
forall a. a -> [a] -> [a]
:[Field]
fs)

            -- otherwise this marker belongs somewhere under

            -- parentMarker


            -- if it is an immediate child, add the subtree directly

            ([], [String]
_) ->
                let (SFMTree
subtree, [Field]
fs') = SFMTree -> [Field] -> (SFMTree, [Field])
go (Field -> [SFMTree] -> SFMTree
Filled Field
f []) [Field]
fs
                in SFMTree -> [Field] -> (SFMTree, [Field])
go (SFMTree
s SFMTree -> SFMTree -> SFMTree
<+:> SFMTree
subtree) [Field]
fs'

            -- otherwise, recurse into the hierarchy

            ([String]
ms, [String]
_) ->
                -- NB. 'last ms' is the /highest/ missing level! Then

                -- the recursive call infers the next-highest level,

                -- and so on until all the levels are created. This

                -- could be sped up by creating all the levels at once,

                -- but it's not worth it for now.

                let (SFMTree
subtree, [Field]
fs') = SFMTree -> [Field] -> (SFMTree, [Field])
go (String -> [SFMTree] -> SFMTree
Missing ([String] -> String
forall a. HasCallStack => [a] -> a
last [String]
ms) []) (Field
fField -> [Field] -> [Field]
forall a. a -> [a] -> [a]
:[Field]
fs)
                in SFMTree -> [Field] -> (SFMTree, [Field])
go (SFMTree
s SFMTree -> SFMTree -> SFMTree
<+:> SFMTree
subtree) [Field]
fs'

-- | Inverse of 'toTree': convert an 'SFMTree' back into a linear

-- 'SFM' document.

fromTree :: SFMTree -> SFM
fromTree :: SFMTree -> [Field]
fromTree (Root [SFMTree]
s) = (SFMTree -> [Field]) -> [SFMTree] -> [Field]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap SFMTree -> [Field]
fromTree [SFMTree]
s
fromTree (Filled Field
f [SFMTree]
s) = Field
f Field -> [Field] -> [Field]
forall a. a -> [a] -> [a]
: (SFMTree -> [Field]) -> [SFMTree] -> [Field]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap SFMTree -> [Field]
fromTree [SFMTree]
s
fromTree (Missing String
_ [SFMTree]
s) = (SFMTree -> [Field]) -> [SFMTree] -> [Field]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap SFMTree -> [Field]
fromTree [SFMTree]
s

-- | Map a function over all the 'Field's in an 'SFMTree'.

mapField :: (Field -> Field) -> SFMTree -> SFMTree
mapField :: (Field -> Field) -> SFMTree -> SFMTree
mapField Field -> Field
g (Root [SFMTree]
s) = [SFMTree] -> SFMTree
Root ([SFMTree] -> SFMTree) -> [SFMTree] -> SFMTree
forall a b. (a -> b) -> a -> b
$ (Field -> Field) -> SFMTree -> SFMTree
mapField Field -> Field
g (SFMTree -> SFMTree) -> [SFMTree] -> [SFMTree]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [SFMTree]
s
mapField Field -> Field
g (Filled Field
f [SFMTree]
s) = Field -> [SFMTree] -> SFMTree
Filled (Field -> Field
g Field
f) ([SFMTree] -> SFMTree) -> [SFMTree] -> SFMTree
forall a b. (a -> b) -> a -> b
$ (Field -> Field) -> SFMTree -> SFMTree
mapField Field -> Field
g (SFMTree -> SFMTree) -> [SFMTree] -> [SFMTree]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [SFMTree]
s
mapField Field -> Field
g (Missing String
m [SFMTree]
s) = String -> [SFMTree] -> SFMTree
Missing String
m ([SFMTree] -> SFMTree) -> [SFMTree] -> SFMTree
forall a b. (a -> b) -> a -> b
$ (Field -> Field) -> SFMTree -> SFMTree
mapField Field -> Field
g (SFMTree -> SFMTree) -> [SFMTree] -> [SFMTree]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [SFMTree]
s

-- | Depth-first search for fields under an 'SFMTree' which satisfy

-- the given predicate.

searchField :: (Field -> Maybe a) -> SFMTree -> [a]
searchField :: forall a. (Field -> Maybe a) -> SFMTree -> [a]
searchField Field -> Maybe a
p (Root [SFMTree]
ts) = (Field -> Maybe a) -> SFMTree -> [a]
forall a. (Field -> Maybe a) -> SFMTree -> [a]
searchField Field -> Maybe a
p (SFMTree -> [a]) -> [SFMTree] -> [a]
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< [SFMTree]
ts
searchField Field -> Maybe a
p (Filled Field
f [SFMTree]
ts)
    | Just a
a <- Field -> Maybe a
p Field
f = a
a a -> [a] -> [a]
forall a. a -> [a] -> [a]
: ((Field -> Maybe a) -> SFMTree -> [a]
forall a. (Field -> Maybe a) -> SFMTree -> [a]
searchField Field -> Maybe a
p (SFMTree -> [a]) -> [SFMTree] -> [a]
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< [SFMTree]
ts)
    | Bool
otherwise     =      (Field -> Maybe a) -> SFMTree -> [a]
forall a. (Field -> Maybe a) -> SFMTree -> [a]
searchField Field -> Maybe a
p (SFMTree -> [a]) -> [SFMTree] -> [a]
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< [SFMTree]
ts
searchField Field -> Maybe a
p (Missing String
_ [SFMTree]
ts) = (Field -> Maybe a) -> SFMTree -> [a]
forall a. (Field -> Maybe a) -> SFMTree -> [a]
searchField Field -> Maybe a
p (SFMTree -> [a]) -> [SFMTree] -> [a]
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< [SFMTree]
ts