{- SPDX-FileCopyrightText: 2018-2019 Serokell - - SPDX-License-Identifier: MPL-2.0 -} {-# OPTIONS_GHC -fno-warn-orphans #-} -- | Markdown documents markdownScanner. module Xrefcheck.Scanners.Markdown ( markdownScanner , markdownSupport ) where import CMarkGFM (Node (..), NodeType (..), PosInfo (..), commonmarkToNode) import Control.Lens ((%=)) import qualified Data.ByteString.Lazy as BSL import Data.Default (Default (..)) import qualified Data.Text as T import qualified Data.Text.Lazy as LT import Fmt (Buildable (..), blockListF, nameF, (+|), (|+)) import GHC.Conc (par) import Xrefcheck.Core import Xrefcheck.Scan instance Buildable Node where build (Node _mpos ty subs) = nameF (show ty) $ blockListF subs toPosition :: Maybe PosInfo -> Position toPosition = Position . \case Nothing -> Nothing Just PosInfo{..} | startLine == endLine -> Just $ startLine |+ ":" +| startColumn |+ "-" +| endColumn |+ "" | otherwise -> Just $ " " +| startLine |+ ":" +| startColumn |+ " - " +| endLine |+ ":" +| endColumn |+ "" nodeFlatten :: Node -> [NodeType] nodeFlatten (Node _pos ty subs) = ty : concatMap nodeFlatten subs nodeExtractText :: Node -> Text nodeExtractText = mconcat . map extractText . nodeFlatten where extractText = \case TEXT t -> t CODE t -> t _ -> "" nodeExtractInfo :: Node -> ExceptT Text Identity FileInfo nodeExtractInfo docNode = fmap finaliseFileInfo $ execStateT (loop docNode) def where loop node@(Node pos ty subs) = case ty of DOCUMENT -> mapM_ loop subs PARAGRAPH -> mapM_ loop subs HEADING lvl -> let text = nodeExtractText node aType = HeaderAnchor lvl aName = headerToAnchor text aPos = toPosition pos in fiAnchors %= (Anchor{..} :) LIST _ -> mapM_ loop subs ITEM -> mapM_ loop subs HTML_INLINE htmlText -> do let mName = T.stripSuffix "\">" =<< T.stripPrefix " do let aType = HandAnchor aPos = toPosition pos fiAnchors %= (Anchor{..} :) LINK url _ -> do let rName = nodeExtractText node rPos = toPosition pos link = if null url then rName else url let (rLink, rAnchor) = case T.splitOn "#" link of [t] -> (t, Nothing) t : ts -> (t, Just $ T.intercalate "#" ts) [] -> error "impossible" fiReferences %= (Reference{..} :) _ -> pass parseFileInfo :: FilePath -> LT.Text -> FileInfo parseFileInfo path input = let outcome = runIdentity . runExceptT $ nodeExtractInfo $ commonmarkToNode [] [] $ toStrict input in case outcome of Left err -> error $ "Failed to parse file " <> show path <> ": " <> show err Right res -> res markdownScanner :: ScanAction markdownScanner path = liftIO $ do res <- parseFileInfo path . decodeUtf8 <$> BSL.readFile path force res `par` return res markdownSupport :: ([Extension], ScanAction) markdownSupport = ([".md"], markdownScanner)