{-# LANGUAGE RankNTypes #-} {-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE BangPatterns #-} module Commonmark.Blocks ( mkBlockParser , defaultBlockSpecs , BlockStartResult(..) , BlockSpec(..) , BlockData(..) , defBlockData , BlockNode , BPState(..) , BlockParser , LinkInfo(..) , defaultFinalizer , runInlineParser , addNodeToStack , collapseNodeStack , getBlockText , removeIndent , bspec , endOfBlock , interruptsParagraph , linkReferenceDef , renderChildren , reverseSubforests , getParentListType -- * BlockSpecs , docSpec , indentedCodeSpec , fencedCodeSpec , blockQuoteSpec , atxHeadingSpec , setextHeadingSpec , thematicBreakSpec , listItemSpec , bulletListMarker , orderedListMarker , rawHtmlSpec , attributeSpec , paraSpec , plainSpec ) where import Commonmark.Tag import Commonmark.TokParsers import Commonmark.ReferenceMap import Commonmark.Inlines (pEscaped, pLinkDestination, pLinkLabel, pLinkTitle) import Commonmark.Entity (unEntity) import Commonmark.Tokens import Commonmark.Types import Control.Monad (foldM, guard, mzero, void, unless, when) import Control.Monad.Trans.Class (lift) import Data.Foldable (foldrM) import Unicode.Char (isAsciiUpper, isDigit) import Unicode.Char.General.Compat (isSpace) import Data.Dynamic import Data.Text (Text) import qualified Data.Map.Strict as M import qualified Data.Text as T import qualified Data.Text.Read as TR import Data.Tree import Text.Parsec mkBlockParser :: (Monad m, IsBlock il bl) => [BlockSpec m il bl] -- ^ Defines block syntax -> [BlockParser m il bl bl] -- ^ Parsers to run at end -> (ReferenceMap -> [Tok] -> m (Either ParseError il)) -- ^ Inline parser -> [BlockParser m il bl Attributes] -- ^ attribute parsers -> [Tok] -- ^ Tokenized commonmark input -> m (Either ParseError bl) -- ^ Result or error mkBlockParser specs finalParsers ilParser attrParsers ts = runParserT (do case ts of (t:_) -> setPosition (tokPos t) [] -> return () processLines specs finalParsers) BPState{ referenceMap = emptyReferenceMap , inlineParser = ilParser , nodeStack = [Node (defBlockData docSpec) []] , blockMatched = False , maybeLazy = True , maybeBlank = True , counters = M.empty , failurePositions = M.empty , attributeParsers = attrParsers , nextAttributes = mempty } "source" (length ts `seq` ts) -- we evaluate length ts to make sure the list is -- fully evaluated; this helps performance. note that -- we can't use deepseq because there's no instance for SourcePos. processLines :: (Monad m, IsBlock il bl) => [BlockSpec m il bl] -> [BlockParser m il bl bl] -- ^ Parsers to run at end -> BlockParser m il bl bl processLines specs finalParsers = {-# SCC processLines #-} do let go = eof <|> (processLine specs >> go) in go tree <- getState >>= collapseNodeStack . nodeStack updateState $ \st -> st{ nodeStack = [reverseSubforests tree] } endContent <- mconcat <$> sequence finalParsers tree':_ <- nodeStack <$> getState body <- blockConstructor (blockSpec (rootLabel tree')) tree' return $! body <> endContent reverseSubforests :: Tree a -> Tree a reverseSubforests (Node x cs) = Node x $ map reverseSubforests $ reverse cs processLine :: (Monad m, IsBlock il bl) => [BlockSpec m il bl] -> BlockParser m il bl () processLine specs = do -- check block continuations for each node in stack st' <- getState putState $ st'{ blockMatched = True , maybeLazy = True , maybeBlank = True , failurePositions = M.empty } (matched, unmatched) <- foldrM checkContinue ([],[]) (nodeStack st') -- if not everything matched, and last unmatched is paragraph, -- then we may have a lazy paragraph continuation updateState $ \st -> st{ maybeLazy = maybeLazy st && case unmatched of m:_ -> blockParagraph (bspec m) _ -> False } -- close unmatched blocks if null unmatched then updateState $ \st -> st{ nodeStack = matched } -- this update is needed or we lose startpos information else case matched of [] -> error "no blocks matched" m:ms -> do m' <- collapseNodeStack (unmatched ++ [m]) updateState $ \st -> st{ nodeStack = m':ms } restBlank <- option False $ True <$ lookAhead blankLine {-# SCC block_starts #-} unless restBlank $ (do skipMany1 (doBlockStarts specs) optional (try (blockStart paraSpec))) <|> (do getState >>= guard . maybeLazy -- lazy line sp <- getPosition updateState $ \st -> st{ nodeStack = map (addStartPos sp) (unmatched ++ matched) }) <|> void (try (blockStart paraSpec)) <|> return () (cur:rest) <- nodeStack <$> getState -- add line contents let curdata = rootLabel cur when (blockParagraph (bspec cur)) $ skipMany spaceTok pos <- getPosition toks <- {-# SCC restOfLine #-} restOfLine updateState $ \st -> st{ nodeStack = cur{ rootLabel = if blockContainsLines (bspec cur) then curdata{ blockLines = toks : blockLines curdata } else if maybeBlank st && restBlank then curdata{ blockBlanks = sourceLine pos : blockBlanks curdata } else curdata } : rest } -- showNodeStack addStartPos :: SourcePos -> BlockNode m il bl -> BlockNode m il bl addStartPos sp (Node bd cs) = Node bd{ blockStartPos = sp : blockStartPos bd } cs doBlockStarts :: Monad m => [BlockSpec m il bl] -> BlockParser m il bl () doBlockStarts specs = do st' <- getState initPos <- getPosition let failurePosMap = failurePositions st' let specs' = foldr (\spec sps -> case M.lookup (blockType spec) failurePosMap of Just pos' | initPos < pos' -> sps _ -> spec:sps) [] specs go initPos specs' where go _ [] = mzero go initPos (spec:otherSpecs) = try (do pst <- getParserState res <- blockStart spec case res of BlockStartMatch -> return () BlockStartNoMatchBefore pos -> do setParserState pst unless (pos == initPos) $ updateState $ \st -> st{ failurePositions = M.insert (blockType spec) pos (failurePositions st) } go initPos otherSpecs) <|> go initPos otherSpecs checkContinue :: Monad m => BlockNode m il bl -> ([BlockNode m il bl],[BlockNode m il bl]) -> BlockParser m il bl ([BlockNode m il bl],[BlockNode m il bl]) checkContinue nd (matched, unmatched) = do ismatched <- blockMatched <$> getState if ismatched then {-# SCC blockContinues #-} (do (startpos, Node bdata children) <- blockContinue (bspec nd) nd matched' <- blockMatched <$> getState -- if blockContinue set blockMatched to False, it's -- because of characters on the line closing the block, -- so it's not to be counted as blank: unless matched' $ updateState $ \st -> st{ maybeBlank = False, maybeLazy = False } let new = Node bdata{ blockStartPos = startpos : blockStartPos bdata } children return $! if matched' then (new:matched, unmatched) else (matched, new:unmatched)) <|> (matched, nd:unmatched) <$ updateState (\st -> st{ blockMatched = False }) else return (matched, nd:unmatched) {- --- for debugging showNodeStack :: Monad m => BlockParser m il bl a showNodeStack = do ns <- nodeStack <$> getState trace (unlines ("NODESTACK:" : map showNode ns)) (return $! ()) return undefined where showNode (Node bdata children) = unlines [ "-----" , show (blockSpec bdata) , show (blockStartPos bdata) , show (length children) ] -} data BlockStartResult = BlockStartMatch | BlockStartNoMatchBefore !SourcePos deriving (Show, Eq) -- | Defines a block-level element type. data BlockSpec m il bl = BlockSpec { blockType :: !Text -- ^ Descriptive name of block type , blockStart :: BlockParser m il bl BlockStartResult -- ^ Parses beginning -- of block. The parser should verify any -- preconditions, parse the opening of the block, -- and add the new block to the block stack using -- 'addNodeToStack', returning 'BlockStartMatch' on -- success. If the match fails, the parser can -- either fail or return 'BlockStartNoMatchBefore' and a -- 'SourcePos' before which the parser is known -- not to succeed (this will be stored in -- 'failurePositions' for the line, to ensure -- that future matches won't be attempted until -- after that position). , blockCanContain :: BlockSpec m il bl -> Bool -- ^ Returns True if -- this kind of block can contain the specified -- block type. , blockContainsLines :: !Bool -- ^ True if this kind of block -- can contain text lines. , blockParagraph :: !Bool -- ^ True if this kind of block -- is paragraph. , blockContinue :: BlockNode m il bl -> BlockParser m il bl (SourcePos, BlockNode m il bl) -- ^ Parser that checks to see if the current -- block (the 'BlockNode') can be kept open. -- If it fails, the block will be closed, unless -- we have a lazy paragraph continuation within -- the block. , blockConstructor :: BlockNode m il bl -> BlockParser m il bl bl -- ^ Renders the node into its target format, -- possibly after rendering inline content. , blockFinalize :: BlockNode m il bl -> BlockNode m il bl -> BlockParser m il bl (BlockNode m il bl) -- ^ Runs when the block is closed, but prior -- to rendering. The first parameter is the -- child, the second the parent. } instance Show (BlockSpec m il bl) where show bs = "" defaultBlockSpecs :: (Monad m, IsBlock il bl) => [BlockSpec m il bl] defaultBlockSpecs = [ indentedCodeSpec , fencedCodeSpec , blockQuoteSpec , atxHeadingSpec , setextHeadingSpec , thematicBreakSpec , listItemSpec (bulletListMarker <|> orderedListMarker) , rawHtmlSpec , attributeSpec ] defaultFinalizer :: Monad m => BlockNode m il bl -> BlockNode m il bl -> BlockParser m il bl (BlockNode m il bl) defaultFinalizer !child !parent = do -- ensure that 'counters' carries information about all -- the block identifiers used, so that auto_identifiers works properly. case lookup "id" (blockAttributes (rootLabel child)) of Nothing -> return () Just !ident -> updateState $ \st -> st{ counters = M.insert ("identifier:" <> ident) (toDyn (0 :: Int)) (counters st) } return $! parent{ subForest = child : subForest parent } data BlockData m il bl = BlockData { blockSpec :: BlockSpec m il bl , blockLines :: [[Tok]] -- in reverse order , blockStartPos :: [SourcePos] -- in reverse order , blockData :: !Dynamic , blockBlanks :: [Int] -- non-content blank lines in block , blockAttributes :: !Attributes } deriving Show defBlockData :: BlockSpec m il bl -> BlockData m il bl defBlockData spec = BlockData { blockSpec = spec , blockLines = [] , blockStartPos = [] , blockData = toDyn () , blockBlanks = [] , blockAttributes = mempty } type BlockNode m il bl = Tree (BlockData m il bl) data BPState m il bl = BPState { referenceMap :: !ReferenceMap , inlineParser :: ReferenceMap -> [Tok] -> m (Either ParseError il) , nodeStack :: [BlockNode m il bl] -- reverse order, head is tip , blockMatched :: !Bool , maybeLazy :: !Bool , maybeBlank :: !Bool , counters :: M.Map Text Dynamic , failurePositions :: M.Map Text SourcePos -- record known positions -- where parsers fail to avoid repetition , attributeParsers :: [ParsecT [Tok] (BPState m il bl) m Attributes] , nextAttributes :: !Attributes } type BlockParser m il bl = ParsecT [Tok] (BPState m il bl) m data ListData = ListData { listType :: !ListType , listSpacing :: !ListSpacing } deriving (Show, Eq) data ListItemData = ListItemData { listItemType :: !ListType , listItemIndent :: !Int , listItemBlanksInside :: !Bool , listItemBlanksAtEnd :: !Bool } deriving (Show, Eq) -- | Get type of the enclosing List block. If the parent isn't -- a List block, return Nothing. getParentListType :: Monad m => BlockParser m il bl (Maybe ListType) getParentListType = do (cur:_) <- nodeStack <$> getState if blockType (bspec cur) == "List" then do let ListData lt _ = fromDyn (blockData (rootLabel cur)) (ListData (BulletList '*') TightList) return $ Just lt else return Nothing runInlineParser :: Monad m => [Tok] -> BlockParser m il bl il runInlineParser toks = {-# SCC runInlineParser #-} do refmap <- referenceMap <$> getState ilParser <- inlineParser <$> getState res <- lift $ ilParser refmap toks case res of Right ils -> return $! ils Left err -> mkPT (\_ -> return (Empty (return (Error err)))) -- pass up ParseError addRange :: (Monad m, IsBlock il bl) => BlockNode m il bl -> bl -> bl addRange (Node b _) = ranged (SourceRange (go . reverse $ map (\pos -> (pos, setSourceColumn (incSourceLine pos 1) 1)) (blockStartPos b))) where go [] = [] go ((!startpos1, !endpos1):(!startpos2, !endpos2):rest) | startpos1 == startpos2 , endpos1 == endpos2 = go ((startpos1, endpos2):rest) | endpos1 == startpos2 = go ((startpos1, endpos2):rest) go (x:xs) = x : go xs -- Add a new node to the block stack. If current tip can contain -- it, add it there; otherwise, close the tip and repeat til we get -- to a block that can contain this node. addNodeToStack :: Monad m => BlockNode m bl il -> BlockParser m bl il () addNodeToStack node = do (cur:rest) <- nodeStack <$> getState guard $ blockParagraph (bspec cur) || not (blockContainsLines (bspec cur)) if blockCanContain (bspec cur) (bspec node) then do nextAttr <- nextAttributes <$> getState let node' = if null nextAttr then node else let rl = rootLabel node in node{ rootLabel = rl{ blockAttributes = nextAttr }} updateState $ \st -> st{ nextAttributes = mempty , nodeStack = node' : cur : rest , maybeLazy = False } else case rest of (x:xs) -> do stack <- (:xs) <$> collapseNodeStack [cur,x] updateState $ \st -> st{ nodeStack = stack } addNodeToStack node _ -> mzero interruptsParagraph :: Monad m => BlockParser m bl il Bool interruptsParagraph = do (cur:_) <- nodeStack <$> getState return $! blockParagraph (bspec cur) renderChildren :: (Monad m, IsBlock il bl) => BlockNode m il bl -> BlockParser m il bl [bl] renderChildren node = mapM renderC $ subForest node where renderC n = do let attrs = blockAttributes (rootLabel n) (if null attrs then id else addAttributes attrs) . addRange n <$> blockConstructor (blockSpec (rootLabel n)) n docSpec :: (Monad m, IsBlock il bl, Monoid bl) => BlockSpec m il bl docSpec = BlockSpec { blockType = "Doc" , blockStart = mzero , blockCanContain = const True , blockContainsLines = False , blockParagraph = False , blockContinue = \n -> (,n) <$> getPosition , blockConstructor = fmap mconcat . renderChildren , blockFinalize = defaultFinalizer } refLinkDefSpec :: (Monad m, IsBlock il bl) => BlockSpec m il bl refLinkDefSpec = BlockSpec { blockType = "ReferenceLinkDefinition" , blockStart = mzero , blockCanContain = const False , blockContainsLines = False , blockParagraph = False , blockContinue = const mzero , blockConstructor = \node -> do let linkdefs = fromDyn (blockData (rootLabel node)) undefined :: [((SourceRange, Text), LinkInfo)] return $! mconcat $ map (\((range, lab), linkinfo) -> ranged range (addAttributes (linkAttributes linkinfo) (referenceLinkDefinition lab (linkDestination linkinfo, linkTitle linkinfo)))) linkdefs , blockFinalize = defaultFinalizer } -- Parse reference links from beginning of block text; -- update reference map and block text; return maybe altered node -- (if it still contains lines) and maybe ref link node. extractReferenceLinks :: (Monad m, IsBlock il bl) => BlockNode m il bl -> BlockParser m il bl (Maybe (BlockNode m il bl), Maybe (BlockNode m il bl)) extractReferenceLinks node = do st <- getState res <- lift $ runParserT ((,) <$> ((lookAhead anyTok >>= setPosition . tokPos) >> many1 (linkReferenceDef (choice $ attributeParsers st))) <*> getInput) st "" (getBlockText node) case res of Left _ -> return (Just node, Nothing) Right (linkdefs, toks') -> do mapM_ (\((_,lab),linkinfo) -> updateState $ \s -> s{ referenceMap = insertReference lab linkinfo (referenceMap s) }) linkdefs let isRefPos = case toks' of (t:_) -> (< tokPos t) _ -> const False let node' = if null toks' then Nothing else Just node{ rootLabel = (rootLabel node){ blockLines = [toks'], blockStartPos = dropWhile isRefPos (blockStartPos (rootLabel node)) } } let refnode = node{ rootLabel = (rootLabel node){ blockLines = takeWhile (any (isRefPos . tokPos)) (blockLines (rootLabel node)) , blockStartPos = takeWhile isRefPos (blockStartPos (rootLabel node)) , blockData = toDyn linkdefs , blockSpec = refLinkDefSpec }} return (node', Just refnode) attributeSpec :: (Monad m, IsBlock il bl) => BlockSpec m il bl attributeSpec = BlockSpec { blockType = "Attribute" , blockStart = do attrParsers <- attributeParsers <$> getState guard $ not (null attrParsers) interruptsParagraph >>= guard . not nonindentSpaces pos <- getPosition attrs <- choice attrParsers skipWhile (hasType Spaces) lookAhead (void lineEnd <|> eof) addNodeToStack $ Node (defBlockData attributeSpec){ blockData = toDyn attrs, blockStartPos = [pos] } [] return BlockStartMatch , blockCanContain = const False , blockContainsLines = False , blockParagraph = False , blockContinue = \n -> do attrParsers <- attributeParsers <$> getState guard $ not (null attrParsers) nonindentSpaces pos <- getPosition attrs <- choice attrParsers skipWhile (hasType Spaces) lookAhead (void lineEnd <|> eof) let oldattrs = fromDyn (blockData (rootLabel n)) mempty :: Attributes let attrs' = oldattrs <> attrs return (pos, n{ rootLabel = (rootLabel n){ blockData = toDyn attrs' }}) , blockConstructor = \_ -> return $! mempty , blockFinalize = \node parent -> do let attrs = fromDyn (blockData (rootLabel node)) mempty :: Attributes updateState $ \st -> st{ nextAttributes = attrs } defaultFinalizer node parent } paraSpec :: (Monad m, IsBlock il bl) => BlockSpec m il bl paraSpec = BlockSpec { blockType = "Paragraph" , blockStart = do interruptsParagraph >>= guard . not skipWhile (hasType Spaces) pos <- getPosition notFollowedBy lineEnd addNodeToStack $ Node (defBlockData paraSpec){ blockStartPos = [pos] } [] return BlockStartMatch , blockCanContain = const False , blockContainsLines = True , blockParagraph = True , blockContinue = \n -> lookAhead $ try $ do skipWhile (hasType Spaces) pos <- getPosition notFollowedBy lineEnd return $! (pos, n) , blockConstructor = \node -> paragraph <$> runInlineParser (getBlockText node) , blockFinalize = \child parent -> do (mbchild, mbrefdefs) <- extractReferenceLinks child case (mbchild, mbrefdefs) of (_, Nothing) -> defaultFinalizer child parent (Nothing, Just refnode) -> return $! parent{ subForest = refnode : subForest parent } (Just child', Just refnode) -> return $! parent{ subForest = child' : refnode : subForest parent } } plainSpec :: (Monad m, IsBlock il bl) => BlockSpec m il bl plainSpec = paraSpec{ blockConstructor = \node -> plain <$> runInlineParser (getBlockText node) } linkReferenceDef :: Monad m => ParsecT [Tok] s m Attributes -> ParsecT [Tok] s m ((SourceRange, Text), LinkInfo) linkReferenceDef attrParser = try $ do startpos <- getPosition lab <- pLinkLabel guard $ not $ T.all isSpace lab symbol ':' optional whitespace linkpos <- getPosition dest <- pLinkDestination (title, attrs) <- option (mempty, mempty) $ try $ do tit <- option mempty $ try (whitespace *> pLinkTitle) skipWhile (hasType Spaces) as <- option mempty attrParser skipWhile (hasType Spaces) lookAhead (void lineEnd <|> eof) return (tit, as) endpos <- getPosition void lineEnd <|> eof return ((SourceRange [(startpos, endpos)], lab), LinkInfo{ linkDestination = unEntity dest , linkTitle = unEntity title , linkAttributes = attrs , linkPos = Just linkpos }) atxHeadingSpec :: (Monad m, IsBlock il bl) => BlockSpec m il bl atxHeadingSpec = BlockSpec { blockType = "ATXHeading" , blockStart = do nonindentSpaces pos <- getPosition hashes <- many1 (symbol '#') let level = length hashes guard $ level <= 6 (spaceTok *> skipMany spaceTok) <|> void (lookAhead lineEnd) <|> lookAhead eof raw <- many (satisfyTok (not . hasType LineEnd)) -- trim off closing ### let removeClosingHash (_ :: Int) [] = [] removeClosingHash 0 (Tok Spaces _ _ : xs) = removeClosingHash 0 xs removeClosingHash _ (Tok (Symbol '#') _ _ : Tok (Symbol '\\') _ _ : _) = reverse raw removeClosingHash _ (Tok (Symbol '#') _ _ : xs) = removeClosingHash 1 xs removeClosingHash 1 (Tok Spaces _ _ : xs) = xs removeClosingHash 1 (x:_) | tokType x /= Symbol '#' = reverse raw removeClosingHash _ xs = xs let raw' = reverse . removeClosingHash 0 . reverse $ raw addNodeToStack $ Node (defBlockData atxHeadingSpec){ blockLines = [raw'], blockData = toDyn level, blockStartPos = [pos] } [] return BlockStartMatch , blockCanContain = const False , blockContainsLines = False , blockParagraph = False , blockContinue = const mzero , blockConstructor = \node -> do let level = fromDyn (blockData (rootLabel node)) 1 ils <- runInlineParser (getBlockText node) return $! heading level ils , blockFinalize = \node@(Node cdata children) parent -> do let oldAttr = blockAttributes cdata let toks = getBlockText node (newtoks, attr) <- parseFinalAttributes True toks <|> (return (toks, mempty)) defaultFinalizer (Node cdata{ blockAttributes = oldAttr <> attr , blockLines = [newtoks] } children) parent } setextHeadingSpec :: (Monad m, IsBlock il bl) => BlockSpec m il bl setextHeadingSpec = BlockSpec { blockType = "SetextHeading" , blockStart = do (cur:rest) <- nodeStack <$> getState guard $ blockParagraph (bspec cur) nonindentSpaces pos <- getPosition level <- (2 :: Int) <$ skipMany1 (symbol '-') <|> (1 :: Int) <$ skipMany1 (symbol '=') skipWhile (hasType Spaces) lookAhead (eof <|> void lineEnd) -- process any reference links, make sure there's some -- content left (mbcur, mbrefdefs) <- extractReferenceLinks cur updateState $ \st -> st{ nodeStack = case mbrefdefs of Nothing -> rest Just rd -> case rest of (x:xs) -> x{ subForest = rd : subForest x }:xs [] -> [rd] } case mbcur of Nothing -> mzero -- should not happen Just cur' -> do -- replace cur with new setext heading node addNodeToStack $ Node (rootLabel cur'){ blockSpec = setextHeadingSpec, blockData = toDyn level, blockStartPos = blockStartPos (rootLabel cur') ++ [pos] } [] return BlockStartMatch , blockCanContain = const False , blockContainsLines = True , blockParagraph = False , blockContinue = const mzero , blockConstructor = \node -> do let level = fromDyn (blockData (rootLabel node)) 1 ils <- runInlineParser (getBlockText node) return $! heading level ils , blockFinalize = \node@(Node cdata children) parent -> do let oldAttr = blockAttributes cdata let toks = getBlockText node (newtoks, attr) <- parseFinalAttributes True toks <|> (return (toks, mempty)) defaultFinalizer (Node cdata{ blockAttributes = oldAttr <> attr , blockLines = [newtoks] } children) parent } parseFinalAttributes :: Monad m => Bool -> [Tok] -> BlockParser m il bl ([Tok], Attributes) parseFinalAttributes requireWhitespace ts = do attrParsers <- attributeParsers <$> getState let pAttr' = try $ (if requireWhitespace then () <$ whitespace else optional whitespace) *> choice attrParsers <* optional whitespace <* eof st <- getState res <- lift $ runParserT ((,) <$> many (notFollowedBy pAttr' >> anyTok) <*> option [] pAttr') st "heading contents" ts case res of Left _ -> mzero Right (xs, ys) -> return (xs, ys) blockQuoteSpec :: (Monad m, IsBlock il bl) => BlockSpec m il bl blockQuoteSpec = BlockSpec { blockType = "BlockQuote" , blockStart = do nonindentSpaces pos <- getPosition _ <- symbol '>' _ <- option 0 (gobbleSpaces 1) addNodeToStack $ Node (defBlockData blockQuoteSpec){ blockStartPos = [pos] } [] return BlockStartMatch , blockCanContain = const True , blockContainsLines = False , blockParagraph = False , blockContinue = \n -> try $ do nonindentSpaces pos <- getPosition _ <- symbol '>' _ <- gobbleUpToSpaces 1 return (pos, n) , blockConstructor = fmap (blockQuote . mconcat) . renderChildren , blockFinalize = defaultFinalizer } listItemSpec :: (Monad m, IsBlock il bl) => BlockParser m il bl ListType -> BlockSpec m il bl listItemSpec parseListMarker = BlockSpec { blockType = "ListItem" , blockStart = do (pos, lidata) <- itemStart parseListMarker let linode = Node (defBlockData $ listItemSpec parseListMarker){ blockData = toDyn lidata, blockStartPos = [pos] } [] let listdata = ListData{ listType = listItemType lidata , listSpacing = TightList } -- spacing gets set in finalize let listnode = Node (defBlockData listSpec){ blockData = toDyn listdata, blockStartPos = [pos] } [] -- list can only interrupt paragraph if bullet -- list or ordered list w/ startnum == 1, -- and not followed by blank (cur:_) <- nodeStack <$> getState when (blockParagraph (bspec cur)) $ do guard $ case listType listdata of BulletList _ -> True OrderedList 1 Decimal _ -> True _ -> False notFollowedBy blankLine let curdata = fromDyn (blockData (rootLabel cur)) (ListData (BulletList '*') TightList) let isSingleRomanDigit n = n == 1 || n == 5 || n == 10 || n == 50 || n == 100 || n == 500 || n == 1000 let matchesOrderedListStyle (OrderedList _s1 e1 d1) (OrderedList s2 e2 d2) = d1 == d2 && -- roman can match alphabetic if single-digit: case (e1, e2) of (LowerAlpha, LowerRoman) -> isSingleRomanDigit s2 (UpperAlpha, UpperRoman) -> isSingleRomanDigit s2 (LowerRoman, LowerAlpha) -> isSingleRomanDigit s2 (UpperRoman, UpperAlpha) -> isSingleRomanDigit s2 _ -> e1 == e2 matchesOrderedListStyle _ _ = False let matchesList (BulletList c) (BulletList d) = c == d matchesList x@OrderedList{} y@OrderedList{} = matchesOrderedListStyle x y matchesList _ _ = False case blockType (bspec cur) of "List" | listType curdata `matchesList` listItemType lidata -> addNodeToStack linode _ -> addNodeToStack listnode >> addNodeToStack linode return BlockStartMatch , blockCanContain = const True , blockContainsLines = False , blockParagraph = False , blockContinue = \node@(Node ndata children) -> do let lidata = fromDyn (blockData ndata) (ListItemData (BulletList '*') 0 False False) -- a marker followed by two blanks is just an empty item: guard $ null (blockBlanks ndata) || not (null children) pos <- getPosition gobbleSpaces (listItemIndent lidata) <|> 0 <$ lookAhead blankLine return (pos, node) , blockConstructor = fmap mconcat . renderChildren , blockFinalize = \(Node cdata children) parent -> do let lidata = fromDyn (blockData cdata) (ListItemData (BulletList '*') 0 False False) let allblanks = concat $ blockBlanks cdata : map (blockBlanks . rootLabel) (filter ((== "List") . blockType . blockSpec . rootLabel) children) curline <- sourceLine <$> getPosition let blanksAtEnd = case allblanks of (l:_) -> l >= curline - 1 _ -> False let blanksInside = case length (removeConsecutive allblanks) of n | n > 1 -> True | n == 1 -> not blanksAtEnd | otherwise -> False let lidata' = toDyn $ lidata{ listItemBlanksInside = blanksInside , listItemBlanksAtEnd = blanksAtEnd } defaultFinalizer (Node cdata{ blockData = lidata' } children) parent } itemStart :: Monad m => BlockParser m il bl ListType -> BlockParser m il bl (SourcePos, ListItemData) itemStart parseListMarker = do beforecol <- sourceColumn <$> getPosition gobbleUpToSpaces 3 pos <- getPosition ty <- parseListMarker aftercol <- sourceColumn <$> getPosition lookAhead whitespace numspaces <- try (gobbleUpToSpaces 4 <* notFollowedBy whitespace) <|> gobbleSpaces 1 <|> 1 <$ lookAhead lineEnd return (pos, ListItemData{ listItemType = ty , listItemIndent = (aftercol - beforecol) + numspaces , listItemBlanksInside = False , listItemBlanksAtEnd = False }) bulletListMarker :: Monad m => BlockParser m il bl ListType bulletListMarker = do Tok (Symbol c) _ _ <- symbol '-' <|> symbol '*' <|> symbol '+' return $! BulletList c orderedListMarker :: Monad m => BlockParser m il bl ListType orderedListMarker = do Tok WordChars _ ds <- satisfyWord (\t -> T.all isDigit t && T.length t < 10) (start :: Int) <- either fail (return . fst) (TR.decimal ds) delimtype <- Period <$ symbol '.' <|> OneParen <$ symbol ')' return $! OrderedList start Decimal delimtype listSpec :: (Monad m, IsBlock il bl) => BlockSpec m il bl listSpec = BlockSpec { blockType = "List" , blockStart = mzero , blockCanContain = \sp -> blockType sp == "ListItem" , blockContainsLines = False , blockParagraph = False , blockContinue = \n -> (,n) <$> getPosition , blockConstructor = \node -> do let ListData lt ls = fromDyn (blockData (rootLabel node)) (ListData (BulletList '*') TightList) list lt ls <$> renderChildren node , blockFinalize = \(Node cdata children) parent -> do let ListData lt _ = fromDyn (blockData cdata) (ListData (BulletList '*') TightList) let getListItemData (Node d _) = fromDyn (blockData d) (ListItemData (BulletList '*') 0 False False) let childrenData = map getListItemData children let ls = case childrenData of c:cs | any listItemBlanksInside (c:cs) || (not (null cs) && any listItemBlanksAtEnd cs) -> LooseList _ -> TightList blockBlanks' <- case childrenData of c:_ | listItemBlanksAtEnd c -> do curline <- sourceLine <$> getPosition return $! curline - 1 : blockBlanks cdata _ -> return $! blockBlanks cdata let ldata' = toDyn (ListData lt ls) -- need to transform paragraphs on tight lists let totight (Node nd cs) | blockType (blockSpec nd) == "Paragraph" = Node nd{ blockSpec = plainSpec } cs | otherwise = Node nd cs let childrenToTight (Node nd cs) = Node nd (map totight cs) let children' = if ls == TightList then map childrenToTight children else children defaultFinalizer (Node cdata{ blockData = ldata' , blockBlanks = blockBlanks' } children') parent } thematicBreakSpec :: (Monad m, IsBlock il bl) => BlockSpec m il bl thematicBreakSpec = BlockSpec { blockType = "ThematicBreak" , blockStart = do nonindentSpaces pos <- getPosition Tok (Symbol c) _ _ <- symbol '-' <|> symbol '_' <|> symbol '*' skipWhile (hasType Spaces) let tbchar = symbol c <* skipWhile (hasType Spaces) count 2 tbchar skipMany tbchar (do lookAhead lineEnd addNodeToStack (Node (defBlockData thematicBreakSpec){ blockStartPos = [pos] } []) return BlockStartMatch) <|> (BlockStartNoMatchBefore <$> getPosition) , blockCanContain = const False , blockContainsLines = False , blockParagraph = False , blockContinue = const mzero , blockConstructor = \_ -> return thematicBreak , blockFinalize = defaultFinalizer } indentedCodeSpec :: (Monad m, IsBlock il bl) => BlockSpec m il bl indentedCodeSpec = BlockSpec { blockType = "IndentedCode" , blockStart = do interruptsParagraph >>= guard . not getState >>= guard . not . maybeLazy _ <- gobbleSpaces 4 pos <- getPosition notFollowedBy blankLine addNodeToStack $ Node (defBlockData indentedCodeSpec){ blockStartPos = [pos] } [] return BlockStartMatch , blockCanContain = const False , blockContainsLines = True , blockParagraph = False , blockContinue = \node -> do void (gobbleSpaces 4) <|> try (skipWhile (hasType Spaces) <* lookAhead lineEnd) pos <- getPosition return (pos, node) , blockConstructor = \node -> return $! codeBlock mempty (untokenize (getBlockText node)) , blockFinalize = \(Node cdata children) parent -> do -- strip off blank lines at end: let blanks = takeWhile isblankLine $ blockLines cdata let numblanks = length blanks let cdata' = cdata{ blockLines = drop numblanks $ blockLines cdata , blockStartPos = drop numblanks $ blockStartPos cdata } defaultFinalizer (Node cdata' children) parent } isblankLine :: [Tok] -> Bool isblankLine [] = True isblankLine [Tok LineEnd _ _] = True isblankLine (Tok Spaces _ _ : xs) = isblankLine xs isblankLine _ = False fencedCodeSpec :: (Monad m, IsBlock il bl) => BlockSpec m il bl fencedCodeSpec = BlockSpec { blockType = "FencedCode" , blockStart = do prepos <- getPosition nonindentSpaces pos <- getPosition let indentspaces = sourceColumn pos - sourceColumn prepos (c, ticks) <- (('`',) <$> many1 (symbol '`')) <|> (('~',) <$> many1 (symbol '~')) let fencelength = length ticks guard $ fencelength >= 3 skipWhile (hasType Spaces) let infoTok = noneOfToks (LineEnd : [Symbol '`' | c == '`']) info <- T.strip . unEntity <$> many (pEscaped <|> infoTok) lookAhead $ void lineEnd <|> eof let infotoks = tokenize "info string" info (content, attrs) <- parseFinalAttributes False infotoks <|> (return (infotoks, mempty)) addNodeToStack $ Node (defBlockData fencedCodeSpec){ blockData = toDyn (c, fencelength, indentspaces, untokenize content, attrs), blockStartPos = [pos] } [] return BlockStartMatch , blockCanContain = const False , blockContainsLines = True , blockParagraph = False , blockContinue = \node -> try (do let ((c, fencelength, _, _, _) :: (Char, Int, Int, Text, Attributes)) = fromDyn (blockData (rootLabel node)) ('`', 3, 0, mempty, mempty) nonindentSpaces pos <- getPosition ts <- many1 (symbol c) guard $ length ts >= fencelength skipWhile (hasType Spaces) lookAhead $ void lineEnd <|> eof endOfBlock return $! (pos, node)) <|> (do let ((_, _, indentspaces, _, _) :: (Char, Int, Int, Text, Attributes)) = fromDyn (blockData (rootLabel node)) ('`', 3, 0, mempty, mempty) pos <- getPosition _ <- gobbleUpToSpaces indentspaces return (pos, node)) , blockConstructor = \node -> do let ((_, _, _, info, attrs) :: (Char, Int, Int, Text, Attributes)) = fromDyn (blockData (rootLabel node)) ('`', 3, 0, mempty, mempty) let codetext = untokenize $ drop 1 (getBlockText node) return $! if null attrs then codeBlock info codetext else addAttributes attrs $ codeBlock info codetext , blockFinalize = defaultFinalizer } rawHtmlSpec :: (Monad m, IsBlock il bl) => BlockSpec m il bl rawHtmlSpec = BlockSpec { blockType = "RawHTML" , blockStart = do pos <- getPosition (rawHtmlType, toks) <- withRaw $ do nonindentSpaces symbol '<' ty <- choice $ map (\n -> n <$ startCond n) [1..7] -- some blocks can end on same line finished <- option False $ do guard (ty /= 6 && ty /= 7) endCond ty return True when (ty == 7) $ do -- type 7 blocks can't interrupt a paragraph (n:_) <- nodeStack <$> getState guard $ not $ blockParagraph (bspec n) skipWhile (not . hasType LineEnd) -- we use 0 as a code to indicate that the block is closed return $! if finished then 0 else ty addNodeToStack $ Node (defBlockData rawHtmlSpec){ blockData = toDyn rawHtmlType, blockLines = [toks], blockStartPos = [pos] } [] return BlockStartMatch , blockCanContain = const False , blockContainsLines = True , blockParagraph = False , blockContinue = \node@(Node ndata children) -> try $ do pos <- getPosition case fromDyn (blockData (rootLabel node)) (0 :: Int) of 0 -> mzero -- 0 means that the block start already closed 6 -> (pos, node) <$ notFollowedBy blankLine 7 -> (pos, node) <$ notFollowedBy blankLine n -> (do pos' <- getPosition lookAhead (endCond n) endOfBlock toks <- many (satisfyTok (not . hasType LineEnd)) le <- option [] $ (:[]) <$> lookAhead lineEnd return $! (pos', Node ndata{ blockData = toDyn (0 :: Int) , blockLines = (toks ++ le) : blockLines ndata } children)) <|> (return (pos, node)) , blockConstructor = \node -> return $! rawBlock (Format "html") (untokenize (getBlockText node)) , blockFinalize = defaultFinalizer } ---------------- for raw html: startCond :: Monad m => Int -> BlockParser m il bl () startCond 1 = void $ try $ do satisfyWord (isOneOfCI ["script","pre","style","textarea"]) spaceTok <|> symbol '>' <|> lookAhead lineEnd startCond 2 = void $ try $ do symbol '!' symbol '-' symbol '-' startCond 3 = void $ symbol '?' startCond 4 = void $ try $ do symbol '!' satisfyWord (\t -> case T.uncons t of Just (c, _) -> isAsciiUpper c _ -> False) startCond 5 = void $ try $ do symbol '!' symbol '[' satisfyWord (== "CDATA") symbol '[' startCond 6 = void $ try $ do optional (symbol '/') satisfyWord (isOneOfCI ["address", "article", "aside", "base", "basefont", "blockquote", "body", "caption", "center", "col", "colgroup", "dd", "details", "dialog", "dir", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hr", "html", "iframe", "legend", "li", "link", "main", "menu", "menuitem", "nav", "noframes", "ol", "optgroup", "option", "p", "param", "section", "source", "summary", "table", "tbody", "td", "tfoot", "th", "thead", "title", "tr", "track", "ul"]) spaceTok <|> lookAhead lineEnd <|> symbol '>' <|> (symbol '/' >> symbol '>') startCond 7 = void $ try $ do toks <- htmlOpenTag <|> htmlClosingTag guard $ not $ any (hasType LineEnd) toks skipWhile (hasType Spaces) lookAhead lineEnd startCond n = fail $ "Unknown HTML block type " ++ show n endCond :: Monad m => Int -> BlockParser m il bl () endCond 1 = try $ do let closer = try $ do symbol '<' symbol '/' satisfyWord (isOneOfCI ["script","pre","style","textarea"]) symbol '>' skipManyTill (satisfyTok (not . hasType LineEnd)) closer endCond 2 = try $ do let closer = try $ symbol '-' >> symbol '-' >> symbol '>' skipManyTill (satisfyTok (not . hasType LineEnd)) closer endCond 3 = try $ do let closer = try $ symbol '?' >> symbol '>' skipManyTill (satisfyTok (not . hasType LineEnd)) closer endCond 4 = try $ skipManyTill (satisfyTok (not . hasType LineEnd)) (symbol '>') endCond 5 = try $ do let closer = try $ symbol ']' >> symbol ']' >> symbol '>' skipManyTill (satisfyTok (not . hasType LineEnd)) closer endCond 6 = void blankLine endCond 7 = void blankLine endCond n = fail $ "Unknown HTML block type " ++ show n -------------------------------- getBlockText :: BlockNode m il bl -> [Tok] getBlockText = concat . reverse . blockLines . rootLabel removeIndent :: [Tok] -> [Tok] removeIndent = dropWhile (hasType Spaces) removeConsecutive :: [Int] -> [Int] removeConsecutive (x:y:zs) | x == y + 1 = removeConsecutive (y:zs) removeConsecutive xs = xs ------------------------------------------------------------------------- collapseNodeStack :: [BlockNode m il bl] -> BlockParser m il bl (BlockNode m il bl) collapseNodeStack [] = error "Empty node stack!" -- should not happen collapseNodeStack (n:ns) = foldM go n ns where go child parent = if blockCanContain (bspec parent) (bspec child) then blockFinalize (bspec child) child parent else error $ "collapseNodeStack: " ++ T.unpack (blockType (bspec parent)) ++ " cannot contain " ++ T.unpack (blockType (bspec child)) bspec :: BlockNode m il bl -> BlockSpec m il bl bspec = blockSpec . rootLabel endOfBlock :: Monad m => BlockParser m il bl () endOfBlock = updateState $ \st -> st{ blockMatched = False }