-- | The module provides several abstractions for representing external
-- data formats.  Concraft will be able to work with any format which
-- implements those abstractions.

module NLP.Concraft.Format
( Tag
, Word (..)
, Sent (..)
, Doc (..)
) where

import Prelude hiding (words, unwords)
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified NLP.Concraft.Morphosyntax as M

-- | Textual representation of morphposyntactic tag.
type Tag = T.Text

-- | Words handler.
data Word w = Word {
    -- | Extract information relevant for tagging.
      extract       :: w -> M.Word Tag
    -- | Select the set of morphosyntactic interpretations.
    , select        :: M.WMap Tag -> w -> w }

-- | Sentence handler.
data Sent s w = Sent {
    -- | Split sentence into a list of words.
      parseSent     :: s -> [w]
    -- | Merge words with a sentence.
    , mergeSent     :: [w] -> s -> s
    -- | Words handler.
    , wordHandler   :: Word w }

-- | Document format.
data Doc f s w = Doc {
    -- | Parse textual interpretations into a functor with
    -- sentence elements.
      parseDoc      :: L.Text -> f s
    -- | Show textual reprezentation of a document.
    , showDoc       :: f s -> L.Text
    -- | Sentence handler.
    , sentHandler   :: Sent s w }