{-# LANGUAGE OverloadedStrings #-} -- | Printing utilities for the LMF dictionary format. module NLP.Polh.LMF.Show ( showPolh , showLexEntry ) where import Data.Monoid (Monoid, mappend, mconcat) import Data.List (intersperse) import Data.Maybe (maybeToList) import qualified Data.Text as T import qualified Data.Text.Lazy as L import qualified Data.Text.Lazy.Builder as L import Text.XML.PolySoup (escapeXml) import NLP.Polh.Types -- | An infix synonym for 'mappend'. {-# INLINE (<>) #-} (<>) :: Monoid m => m -> m -> m (<>) = mappend -- | Indentation parameter. indentSize :: Int indentSize = 2 identPref :: L.Builder identPref = L.fromLazyText (L.replicate (fromIntegral indentSize) " ") {-# INLINE ident #-} ident :: L.Builder -> L.Builder ident = (identPref <>) prolog :: [L.Builder] prolog = [ "" , "" , " " , " " , " " , " " ] epilog :: [L.Builder] epilog = [ " " , "" ] -- | Show the entire dictionary as a lazy text in the LMF format. showPolh :: Polh -> L.Text showPolh = L.toLazyText . mconcat . map (<> "\n") . embed . concatMap buildLexEntry where embed body = prolog ++ map (ident.ident) body ++ epilog -- | Show lexical entry using the LMF format. showLexEntry :: LexEntry -> L.Text showLexEntry = L.toLazyText . mconcat . map (<> "\n") . buildLexEntry buildElem :: L.Builder -> [L.Builder] -> L.Builder -> [L.Builder] buildElem beg body end = beg : map ident body ++ [end] -- | Each output line is represented as a builder. We use separate builders -- for separate lines because we want to easilly indent the output text. buildLexEntry :: LexEntry -> [L.Builder] buildLexEntry lx = buildElem beg body end where beg = " L.fromText (lexId lx) <> "\">" end = "" body = map (buildFeat "lineRef") (maybeToList $ lineRef lx) ++ map (buildFeat "status") (maybeToList $ status lx) ++ map (buildFeat "partOfSpeech") (pos lx) ++ buildLemma (lemma lx) ++ concatMap buildForm (forms lx) ++ concatMap buildRelForm (related lx) ++ buildComps (components lx) ++ concatMap buildSyn (syntactic lx) ++ concatMap buildSense (senses lx) buildLemma :: Lemma -> [L.Builder] buildLemma base = buildElem beg body end where beg = "" end = "" body = concatMap (buildRepr "FormRepresentation") (repr base) buildForm :: WordForm -> [L.Builder] buildForm form = buildElem beg body end where beg = "" end = "" body = concatMap (buildRepr "FormRepresentation") (repr form) buildRelForm :: RelForm -> [L.Builder] buildRelForm form = buildElem beg body end where beg = " L.fromText (relTo form) <> "\">" end = "" body = concatMap (buildRepr "FormRepresentation") (repr form) buildComps :: [T.Text] -> [L.Builder] buildComps [] = [] buildComps xs = buildElem beg body end where beg = "" end = "" body = map comp xs comp x = " L.fromText x <> "\"/>" buildSyn :: SynBehaviour -> [L.Builder] buildSyn syn = buildElem beg body end where ids = mconcat . intersperse " " . map L.fromText $ synSenseIds syn beg = " ids <> "\">" end = "" body = concatMap (buildRepr "TextRepresentation") (repr syn) buildSense :: Sense -> [L.Builder] buildSense sense = buildElem beg body end where beg = case senseId sense of Just x -> " L.fromText x <> "\">" Nothing -> "" end = "" body = map (buildFeat "style") (style sense) ++ concatMap buildDef (defs sense) ++ concatMap buildCxt (cxts sense) buildDef :: Definition -> [L.Builder] buildDef def = buildElem beg body end where beg = "" end = "" body = concatMap (buildRepr "TextRepresentation") (repr def) buildCxt :: Context -> [L.Builder] buildCxt cxt = buildElem beg body end where beg = "" end = "" body = concatMap (buildRepr "TextRepresentation") (repr cxt) buildRepr :: L.Builder -> Repr -> [L.Builder] buildRepr tag rp = buildElem beg body end where beg = "<" <> tag <> ">" end = " tag <> ">" body = [ buildFeat "writtenForm" . escapeXml $ writtenForm rp , buildFeat "language" (language rp) ] ++ source source = case sourceID rp of Just x -> [buildFeat "sourceID" x] Nothing -> [] buildFeat :: L.Builder -> T.Text -> L.Builder buildFeat att val = " att <> "\" val=\"" <> L.fromText val <> "\"/>"