{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE StrictData #-} {- | Module : Text.Pandoc.Transforms Copyright : © 2006-2024 John MacFarlane License : GPL-2.0-or-later Maintainer : John MacFarlane <jgm@berkeley@edu> Transformation of a Pandoc document post-parsing -} module Text.Pandoc.Transforms ( Transform , applyTransforms , adjustLinksAndIds , eastAsianLineBreakFilter , filterIpynbOutput , headerShift ) where import Data.List (sortOn) import Data.Text (Text) import Network.URI (unEscapeString) import Text.DocLayout (charWidth) import Text.Pandoc.Definition ( Pandoc (..), Attr, Block (..), Format (..), Inline (..) ) import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.Options (Extensions) import Text.Pandoc.Shared (stringify, textToIdentifier) import Text.Pandoc.Walk (walk) import qualified Data.Text as T import qualified Text.Pandoc.Builder as B -- | Transformation of a Pandoc document post-parsing type Transform = Pandoc -> Pandoc -- | Apply a list of transforms to a document, in order. applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc applyTransforms transforms d = return $ foldr ($) d transforms -- | Prefixes identifiers with a string derived from the filepath of -- @thisfile@; fixes links to targets in @allfiles@ accordingly. adjustLinksAndIds :: Extensions -- ^ defines how IDs are generated -> Text -- ^ thisfile -> [Text] -- ^ allfiles -> Transform adjustLinksAndIds exts thisfile allfiles | length allfiles > 1 = walk fixInline . walk fixBlock | otherwise = id where -- fix ids in blocks fixBlock :: Block -> Block fixBlock (CodeBlock attr t) = CodeBlock (fixAttrs attr) t fixBlock (Header lev attr ils) = Header lev (fixAttrs attr) ils fixBlock (Table attr cap cols th tbs tf) = Table (fixAttrs attr) cap cols th tbs tf fixBlock (Div attr bs) = Div (fixAttrs attr) bs fixBlock x = x -- fix ids and links in inlines fixInline :: Inline -> Inline fixInline (Code attr t) = Code (fixAttrs attr) t fixInline (Link attr ils (url,tit)) = Link (fixAttrs attr) ils (fixURL url,tit) fixInline (Image attr ils (url,tit)) = Image (fixAttrs attr) ils (fixURL url,tit) fixInline (Span attr ils) = Span (fixAttrs attr) ils fixInline x = x -- add thisfile as prefix of identifier fixAttrs :: Attr -> Attr fixAttrs (i,cs,kvs) | T.null i = (i,cs,kvs) | otherwise = (T.intercalate "__" (filter (not . T.null) [toIdent thisfile, i]), cs, kvs) -- turns a filepath into an identifier toIdent :: Text -> Text toIdent = textToIdentifier exts . T.intercalate "__" . T.split (\c -> c == '/' || c == '\\') -- if URL begins with file from allfiles, convert to -- an internal link with the appropriate identifier fixURL :: Text -> Text fixURL u = let (a,b) = T.break (== '#') $ T.pack . unEscapeString . T.unpack $ u filepart = if T.null a then toIdent thisfile else toIdent a fragpart = T.dropWhile (== '#') b in if T.null a || a `elem` allfiles then "#" <> T.intercalate "__" (filter (not . T.null) [filepart, fragpart]) else u -- | Process ipynb output cells. If mode is Nothing, -- remove all output. If mode is Just format, select -- best output for the format. If format is not ipynb, -- strip out ANSI escape sequences from CodeBlocks (see #5633). filterIpynbOutput :: Maybe Format -> Pandoc -> Pandoc filterIpynbOutput mode = walk go where go (Div (ident, "output":os, kvs) bs) = case mode of Nothing -> Div (ident, "output":os, kvs) [] -- "best" for ipynb includes all formats: Just fmt | fmt == Format "ipynb" -> Div (ident, "output":os, kvs) bs | otherwise -> Div (ident, "output":os, kvs) $ walk removeANSI $ take 1 $ sortOn rank bs where rank (RawBlock (Format "html") _) | fmt == Format "html" = 1 :: Int | fmt == Format "markdown" = 3 | otherwise = 4 rank (RawBlock (Format "latex") _) | fmt == Format "latex" = 1 | fmt == Format "markdown" = 3 | otherwise = 4 rank (RawBlock f _) | fmt == f = 1 | otherwise = 4 rank (Para [Image{}]) = 2 rank _ = 3 removeANSI (CodeBlock attr code) = CodeBlock attr (removeANSIEscapes code) removeANSI x = x removeANSIEscapes t | Just cs <- T.stripPrefix "\x1b[" t = removeANSIEscapes $ T.drop 1 $ T.dropWhile (/='m') cs | Just (c, cs) <- T.uncons t = T.cons c $ removeANSIEscapes cs | otherwise = "" go x = x -- | Remove soft breaks between East Asian characters. eastAsianLineBreakFilter :: Pandoc -> Pandoc eastAsianLineBreakFilter = bottomUp go where go (x:SoftBreak:y:zs) | Just (_, b) <- T.unsnoc $ stringify x , Just (c, _) <- T.uncons $ stringify y , charWidth b == 2 , charWidth c == 2 = x:y:zs | otherwise = x:SoftBreak:y:zs go xs = xs -- | Shift header levels up or down. headerShift :: Int -> Pandoc -> Pandoc headerShift n (Pandoc meta (Header m _ ils : bs)) | n < 0 , m + n == 0 = headerShift n $ B.setTitle (B.fromList ils) $ Pandoc meta bs -- for this case, see #10459: headerShift n (Pandoc meta (Div attr@(_,"section":_,_) (Header m _ ils : as) : bs)) | n < 0 , m + n == 0 = headerShift n $ B.setTitle (B.fromList ils) $ Pandoc meta (Div attr as : bs) headerShift n (Pandoc meta bs) = Pandoc meta (walk shift bs) where shift :: Block -> Block shift (Header level attr inner) | level + n > 0 = Header (level + n) attr inner | otherwise = Para inner shift x = x