{- SPDX-FileCopyrightText: 2018-2019 Serokell - - SPDX-License-Identifier: MPL-2.0 -} {-# OPTIONS_GHC -fno-warn-orphans #-} -- | Markdown documents markdownScanner. module Xrefcheck.Scanners.Markdown ( markdownScanner , markdownSupport , parseFileInfo ) where import CMarkGFM (Node (..), NodeType (..), PosInfo (..), commonmarkToNode) import Control.Lens ((%=)) import Control.Monad.Trans.Except (Except, runExcept, throwE) import qualified Data.ByteString.Lazy as BSL import Data.Char (isSpace) import Data.Default (Default (..)) import qualified Data.Text as T import qualified Data.Text.Lazy as LT import Fmt (Buildable (..), blockListF, nameF, (+|), (|+)) import Xrefcheck.Core import Xrefcheck.Scan instance Buildable Node where build (Node _mpos ty subs) = nameF (show ty) $ blockListF subs toPosition :: Maybe PosInfo -> Position toPosition = Position . \case Nothing -> Nothing Just PosInfo{..} | startLine == endLine -> Just $ startLine |+ ":" +| startColumn |+ "-" +| endColumn |+ "" | otherwise -> Just $ " " +| startLine |+ ":" +| startColumn |+ " - " +| endLine |+ ":" +| endColumn |+ "" nodeFlatten :: Node -> [NodeType] nodeFlatten (Node _pos ty subs) = ty : concatMap nodeFlatten subs nodeExtractText :: Node -> Text nodeExtractText = mconcat . map extractText . nodeFlatten where extractText = \case TEXT t -> t CODE t -> t _ -> "" data IgnoreMode = Link | Paragraph | File deriving Eq nodeExtractInfo :: Node -> Except Text FileInfo nodeExtractInfo (Node _ _ docNodes) = if checkIgnoreFile docNodes then return def else finaliseFileInfo <$> extractionResult where extractionResult :: Except Text FileInfo extractionResult = execStateT (loop docNodes Nothing) def loop :: [Node] -> Maybe IgnoreMode -> StateT FileInfo (Except Text) () loop [] _ = pass loop (node@(Node pos ty subs) : nodes) toIgnore | toIgnore == Just File = returnError toIgnore "" pos | toIgnore == Just Link = do let (Node startPos _ _) = maybe defNode id $ safeHead subs let mNext = case ty of PARAGRAPH -> afterIgnoredLink subs <> Just nodes TEXT txt | null (dropWhile isSpace $ T.unpack txt) -> afterIgnoredLink nodes SOFTBREAK -> afterIgnoredLink nodes _ -> afterIgnoredLink (node : nodes) case mNext of Just next -> loop next Nothing Nothing -> returnError toIgnore "" startPos | toIgnore == Just Paragraph = case ty of PARAGRAPH -> loop nodes Nothing _ -> returnError toIgnore (prettyType ty) pos | otherwise = case ty of HTML_BLOCK _ -> processHtmlNode node pos nodes toIgnore HEADING lvl -> do let aType = HeaderAnchor lvl let aName = headerToAnchor $ nodeExtractText node let aPos = toPosition pos fiAnchors %= (Anchor{..} :) loop nodes toIgnore HTML_INLINE htmlText -> do let mName = T.stripSuffix "\">" =<< T.stripPrefix " do let aType = HandAnchor aPos = toPosition pos fiAnchors %= (Anchor{..} :) processHtmlNode node pos nodes toIgnore LINK url _ -> do let rName = nodeExtractText node rPos = toPosition pos link = if null url then rName else url let (rLink, rAnchor) = case T.splitOn "#" link of [t] -> (t, Nothing) t : ts -> (t, Just $ T.intercalate "#" ts) [] -> error "impossible" fiReferences %= (Reference{..} :) loop nodes toIgnore _ -> loop (subs ++ nodes) toIgnore defNode :: Node defNode = Node Nothing DOCUMENT [] -- hard-coded default Node getCommentContent :: Node -> Maybe Text getCommentContent node = do txt <- getHTMLText node T.stripSuffix "-->" =<< T.stripPrefix "