{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Readers.CommonMark Copyright : Copyright (C) 2015-2022 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane Stability : alpha Portability : portable Conversion of CommonMark-formatted plain text to 'Pandoc' document. CommonMark is a strongly specified variant of Markdown: http://commonmark.org. -} module Text.Pandoc.Readers.CommonMark (readCommonMark) where import Commonmark import Commonmark.Extensions import Commonmark.Pandoc import Data.Text (Text) import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Builder as B import Text.Pandoc.Options import Text.Pandoc.Error import Text.Pandoc.Readers.Metadata (yamlMetaBlock) import Control.Monad.Except import Data.Functor.Identity (runIdentity) import Data.Typeable import Text.Pandoc.Parsing (runParserT, getInput, getPosition, runF, defaultParserState, option, many1, anyChar, Sources(..), ToSources(..), ParserT, Future, sourceName, sourceLine, incSourceLine) import Text.Pandoc.Walk (walk) import qualified Data.Text as T import qualified Data.Attoparsec.Text as A import Control.Applicative ((<|>)) -- | Parse a CommonMark formatted string into a 'Pandoc' structure. readCommonMark :: (PandocMonad m, ToSources a) => ReaderOptions -> a -> m Pandoc readCommonMark opts s | isEnabled Ext_yaml_metadata_block opts = do let sources = toSources s let firstSourceName = case unSources sources of ((pos,_):_) -> sourceName pos _ -> "" let toks = concatMap sourceToToks (unSources sources) res <- runParserT (do meta <- yamlMetaBlock (metaValueParser opts) pos <- getPosition rest <- getInput let rest' = case rest of -- update position of first source (#7863): Sources ((_,t):xs) -> Sources ((pos,t):xs) _ -> rest return (meta, rest')) defaultParserState firstSourceName sources case res of Left _ -> readCommonMarkBody opts sources toks Right (meta, rest) -> do -- strip off metadata section and parse body let body = concatMap sourceToToks (unSources rest) Pandoc _ bs <- readCommonMarkBody opts sources body return $ Pandoc (runF meta defaultParserState) bs | otherwise = do let sources = toSources s let toks = concatMap sourceToToks (unSources sources) readCommonMarkBody opts sources toks sourceToToks :: (SourcePos, Text) -> [Tok] sourceToToks (pos, s) = map adjust $ tokenize (sourceName pos) s where adjust = case sourceLine pos of 1 -> id n -> \tok -> tok{ tokPos = incSourceLine (tokPos tok) (n - 1) } metaValueParser :: Monad m => ReaderOptions -> ParserT Sources st m (Future st MetaValue) metaValueParser opts = do inp <- option "" $ T.pack <$> many1 anyChar let toks = concatMap sourceToToks (unSources (toSources inp)) case runIdentity (parseCommonmarkWith (specFor opts) toks) of Left _ -> mzero Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc readCommonMarkBody opts s toks = (if readerStripComments opts then walk stripBlockComments . walk stripInlineComments else id) <$> if isEnabled Ext_sourcepos opts then case runIdentity (parseCommonmarkWith (specFor opts) toks) of Left err -> throwError $ PandocParsecError s err Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls else case runIdentity (parseCommonmarkWith (specFor opts) toks) of Left err -> throwError $ PandocParsecError s err Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls stripBlockComments :: Block -> Block stripBlockComments (RawBlock (B.Format "html") s) = RawBlock (B.Format "html") (removeComments s) stripBlockComments x = x stripInlineComments :: Inline -> Inline stripInlineComments (RawInline (B.Format "html") s) = RawInline (B.Format "html") (removeComments s) stripInlineComments x = x removeComments :: Text -> Text removeComments s = either (const s) id $ A.parseOnly pRemoveComments s where pRemoveComments = mconcat <$> A.many' ("" <$ (A.string "