{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {- Copyright (C) 2006-2018 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | Module : Text.Pandoc.Writers.Docbook Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane Stability : alpha Portability : portable Conversion of 'Pandoc' documents to Docbook XML. -} module Text.Pandoc.Writers.TEI (writeTEI) where import Prelude import Data.Char (toLower) import Data.List (isPrefixOf, stripPrefix) import Data.Text (Text) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (languages, languagesByExtension) import Text.Pandoc.ImageSize import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Pretty import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate') import Text.Pandoc.Writers.Shared import Text.Pandoc.XML -- | Convert list of authors to a docbook section authorToTEI :: PandocMonad m => WriterOptions -> [Inline] -> m B.Inlines authorToTEI opts name' = do name <- render Nothing <$> inlinesToTEI opts name' let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing return $ B.rawInline "tei" $ render colwidth $ inTagsSimple "author" (text $ escapeStringForXML name) -- | Convert Pandoc document to string in Docbook format. writeTEI :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeTEI opts (Pandoc meta blocks) = do let elements = hierarchicalize blocks colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing render' :: Doc -> Text render' = render colwidth startLvl = case writerTopLevelDivision opts of TopLevelPart -> -1 TopLevelChapter -> 0 TopLevelSection -> 1 TopLevelDefault -> 1 auths' <- mapM (authorToTEI opts) $ docAuthors meta let meta' = B.setMeta "author" auths' meta metadata <- metaToJSON opts (fmap (render' . vcat) . mapM (elementToTEI opts startLvl) . hierarchicalize) (fmap render' . inlinesToTEI opts) meta' main <- (render' . vcat) <$> mapM (elementToTEI opts startLvl) elements let context = defField "body" main $ defField "mathml" (case writerHTMLMathMethod opts of MathML -> True _ -> False) metadata case writerTemplate opts of Nothing -> return main Just tpl -> renderTemplate' tpl context -- | Convert an Element to TEI. elementToTEI :: PandocMonad m => WriterOptions -> Int -> Element -> m Doc elementToTEI opts _ (Blk block) = blockToTEI opts block elementToTEI opts lvl (Sec _ _num attr title elements) = do -- TEI doesn't allow sections with no content, so insert some if needed let elements' = if null elements then [Blk (Para [])] else elements -- level numbering correspond to LaTeX internals divType = case lvl of n | n == -1 -> "part" | n == 0 -> "chapter" | n >= 1 && n <= 5 -> "level" ++ show n | otherwise -> "section" contents <- vcat <$> mapM (elementToTEI opts (lvl + 1)) elements' titleContents <- inlinesToTEI opts title return $ inTags True "div" (("type", divType) : idFromAttr opts attr) $ inTagsSimple "head" titleContents $$ contents -- | Convert a list of Pandoc blocks to TEI. blocksToTEI :: PandocMonad m => WriterOptions -> [Block] -> m Doc blocksToTEI opts bs = vcat <$> mapM (blockToTEI opts) bs -- | Auxiliary function to convert Plain block to Para. plainToPara :: Block -> Block plainToPara (Plain x) = Para x plainToPara x = x -- | Convert a list of pairs of terms and definitions into a TEI -- list with labels and items. deflistItemsToTEI :: PandocMonad m => WriterOptions -> [([Inline],[[Block]])] -> m Doc deflistItemsToTEI opts items = vcat <$> mapM (uncurry (deflistItemToTEI opts)) items -- | Convert a term and a list of blocks into a TEI varlistentry. deflistItemToTEI :: PandocMonad m => WriterOptions -> [Inline] -> [[Block]] -> m Doc deflistItemToTEI opts term defs = do term' <- inlinesToTEI opts term defs' <- blocksToTEI opts $ concatMap (map plainToPara) defs return $ inTagsIndented "label" term' $$ inTagsIndented "item" defs' -- | Convert a list of lists of blocks to a list of TEI list items. listItemsToTEI :: PandocMonad m => WriterOptions -> [[Block]] -> m Doc listItemsToTEI opts items = vcat <$> mapM (listItemToTEI opts) items -- | Convert a list of blocks into a TEI list item. listItemToTEI :: PandocMonad m => WriterOptions -> [Block] -> m Doc listItemToTEI opts item = inTagsIndented "item" <$> blocksToTEI opts (map plainToPara item) imageToTEI :: PandocMonad m => WriterOptions -> Attr -> String -> m Doc imageToTEI opts attr src = return $ selfClosingTag "graphic" $ ("url", src) : idFromAttr opts attr ++ dims where dims = go Width "width" ++ go Height "height" go dir dstr = case dimension dir attr of Just a -> [(dstr, show a)] Nothing -> [] -- | Convert a Pandoc block element to TEI. blockToTEI :: PandocMonad m => WriterOptions -> Block -> m Doc blockToTEI _ Null = return empty -- Add ids to paragraphs in divs with ids - this is needed for -- pandoc-citeproc to get link anchors in bibliographies: blockToTEI opts (Div attr [Para lst]) = do let attribs = idFromAttr opts attr inTags False "p" attribs <$> inlinesToTEI opts lst blockToTEI opts (Div _ bs) = blocksToTEI opts $ map plainToPara bs blockToTEI _ h@Header{} = do -- should not occur after hierarchicalize, except inside lists/blockquotes report $ BlockNotRendered h return empty -- For TEI simple, text must be within containing block element, so -- we use treat as Para to ensure that Plain text ends up contained by -- something: blockToTEI opts (Plain lst) = blockToTEI opts $ Para lst -- title beginning with fig: indicates that the image is a figure --blockToTEI opts (Para [Image attr txt (src,'f':'i':'g':':':_)]) = -- let alt = inlinesToTEI opts txt -- capt = if null txt -- then empty -- else inTagsSimple "title" alt -- in inTagsIndented "figure" $ -- capt $$ -- (inTagsIndented "mediaobject" $ -- (inTagsIndented "imageobject" -- (imageToTEI opts attr src)) $$ -- inTagsSimple "textobject" (inTagsSimple "phrase" alt)) blockToTEI opts (Para lst) = inTags False "p" [] <$> inlinesToTEI opts lst blockToTEI opts (LineBlock lns) = blockToTEI opts $ linesToPara lns blockToTEI opts (BlockQuote blocks) = inTagsIndented "quote" <$> blocksToTEI opts blocks blockToTEI _ (CodeBlock (_,classes,_) str) = return $ text ("") <> cr <> flush (text (escapeStringForXML str) <> cr <> text "") where lang = if null langs then "" else escapeStringForXML (head langs) isLang l = map toLower l `elem` map (map toLower) languages langsFrom s = if isLang s then [s] else languagesByExtension . map toLower $ s langs = concatMap langsFrom classes blockToTEI opts (BulletList lst) = do let attribs = [("type", "unordered")] inTags True "list" attribs <$> listItemsToTEI opts lst blockToTEI _ (OrderedList _ []) = return empty blockToTEI opts (OrderedList (start, numstyle, _) (first:rest)) = do let attribs = case numstyle of DefaultStyle -> [] Decimal -> [("type", "ordered:arabic")] Example -> [("type", "ordered:arabic")] UpperAlpha -> [("type", "ordered:upperalpha")] LowerAlpha -> [("type", "ordered:loweralpha")] UpperRoman -> [("type", "ordered:upperroman")] LowerRoman -> [("type", "ordered:lowerroman")] items <- if start == 1 then listItemsToTEI opts (first:rest) else do fi <- blocksToTEI opts $ map plainToPara first re <- listItemsToTEI opts rest return $ inTags True "item" [("n",show start)] fi $$ re return $ inTags True "list" attribs items blockToTEI opts (DefinitionList lst) = do let attribs = [("type", "definition")] inTags True "list" attribs <$> deflistItemsToTEI opts lst blockToTEI _ b@(RawBlock f str) | f == "tei" = return $ text str -- raw TEI block (should such a thing exist). | otherwise = do report $ BlockNotRendered b return empty blockToTEI _ HorizontalRule = return $ selfClosingTag "milestone" [("unit","undefined") ,("type","separator") ,("rendition","line")] -- | TEI Tables -- TEI Simple's tables are composed of cells and rows; other -- table info in the AST is here lossily discard. blockToTEI opts (Table _ _ _ headers rows) = do headers' <- tableHeadersToTEI opts headers rows' <- mapM (tableRowToTEI opts) rows return $ inTags True "table" [] $ headers' $$ vcat rows' tableRowToTEI :: PandocMonad m => WriterOptions -> [[Block]] -> m Doc tableRowToTEI opts cols = (inTagsIndented "row" . vcat) <$> mapM (tableItemToTEI opts) cols tableHeadersToTEI :: PandocMonad m => WriterOptions -> [[Block]] -> m Doc tableHeadersToTEI opts cols = (inTags True "row" [("role","label")] . vcat) <$> mapM (tableItemToTEI opts) cols tableItemToTEI :: PandocMonad m => WriterOptions -> [Block] -> m Doc tableItemToTEI opts item = (inTags False "cell" [] . vcat) <$> mapM (blockToTEI opts) item -- | Convert a list of inline elements to TEI. inlinesToTEI :: PandocMonad m => WriterOptions -> [Inline] -> m Doc inlinesToTEI opts lst = hcat <$> mapM (inlineToTEI opts) lst -- | Convert an inline element to TEI. inlineToTEI :: PandocMonad m => WriterOptions -> Inline -> m Doc inlineToTEI _ (Str str) = return $ text $ escapeStringForXML str inlineToTEI opts (Emph lst) = inTags False "hi" [("rendition","simple:italic")] <$> inlinesToTEI opts lst inlineToTEI opts (Strong lst) = inTags False "hi" [("rendition", "simple:bold")] <$> inlinesToTEI opts lst inlineToTEI opts (Strikeout lst) = inTags False "hi" [("rendition", "simple:strikethrough")] <$> inlinesToTEI opts lst inlineToTEI opts (Superscript lst) = inTags False "hi" [("rendition", "simple:superscript")] <$> inlinesToTEI opts lst inlineToTEI opts (Subscript lst) = inTags False "hi" [("rendition", "simple:subscript")] <$> inlinesToTEI opts lst inlineToTEI opts (SmallCaps lst) = inTags False "hi" [("rendition", "simple:smallcaps")] <$> inlinesToTEI opts lst inlineToTEI opts (Quoted _ lst) = inTagsSimple "quote" <$> inlinesToTEI opts lst inlineToTEI opts (Cite _ lst) = inlinesToTEI opts lst inlineToTEI opts (Span _ ils) = inlinesToTEI opts ils inlineToTEI _ (Code _ str) = return $ inTags False "seg" [("type","code")] $ text (escapeStringForXML str) -- Distinguish display from inline math by wrapping the former in a "figure." inlineToTEI _ (Math t str) = return $ case t of InlineMath -> inTags False "formula" [("notation","TeX")] $ text str DisplayMath -> inTags True "figure" [("type","math")] $ inTags False "formula" [("notation","TeX")] $ text str inlineToTEI _ il@(RawInline f x) | f == "tei" = return $ text x | otherwise = empty <$ report (InlineNotRendered il) inlineToTEI _ LineBreak = return $ selfClosingTag "lb" [] inlineToTEI _ Space = return space -- because we use \n for LineBreak, we can't do soft breaks: inlineToTEI _ SoftBreak = return space inlineToTEI opts (Link attr txt (src, _)) | Just email <- stripPrefix "mailto:" src = do let emailLink = text $ escapeStringForXML email case txt of [Str s] | escapeURI s == email -> return emailLink _ -> do linktext <- inlinesToTEI opts txt return $ linktext <+> char '(' <> emailLink <> char ')' | otherwise = (if "#" `isPrefixOf` src then inTags False "ref" $ ("target", drop 1 src) : idFromAttr opts attr else inTags False "ref" $ ("target", src) : idFromAttr opts attr ) <$> inlinesToTEI opts txt inlineToTEI opts (Image attr description (src, tit)) = do let titleDoc = if null tit then empty else inTags False "figDesc" [] (text $ escapeStringForXML tit) imageDesc <- if null description then return empty else inTags False "head" [] <$> inlinesToTEI opts description img <- imageToTEI opts attr src return $ inTagsIndented "figure" $ imageDesc $$ img $$ titleDoc inlineToTEI opts (Note contents) = inTagsIndented "note" <$> blocksToTEI opts contents idFromAttr :: WriterOptions -> Attr -> [(String, String)] idFromAttr opts (id',_,_) = if null id' then [] else [("xml:id", writerIdentifierPrefix opts ++ id')]