{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module PandocToMarkdown
( pandocToMarkdown,
NotSafe (..),
Rectangle (..),
rectanglerize,
combineRectangles,
buildRow,
widthOf,
heightOf,
tableToMarkdown,
)
where
import Control.DeepSeq (NFData)
import Core.System.Base
import Core.Text
import Data.Foldable (foldl')
import Data.List (intersperse)
import qualified Data.Text as T (Text, null)
import GHC.Generics (Generic)
import Text.Pandoc
( Alignment (..),
Attr,
Block (..),
Format (..),
Inline (..),
ListAttributes,
MathType (..),
Pandoc (..),
QuoteType (..),
TableCell,
)
import Text.Pandoc.Shared (orderedListMarkers)
__WIDTH__ :: Int
__WIDTH__ = 78
pandocToMarkdown :: Pandoc -> Rope
pandocToMarkdown (Pandoc _ blocks) =
blocksToMarkdown __WIDTH__ blocks
blocksToMarkdown :: Int -> [Block] -> Rope
blocksToMarkdown _ [] = emptyRope
blocksToMarkdown margin (block1 : blocks) =
convertBlock margin block1
<> foldl'
(\text block -> text <> "\n" <> convertBlock margin block)
emptyRope
blocks
convertBlock :: Int -> Block -> Rope
convertBlock margin block =
let msg = "Unfinished block: " ++ show block -- FIXME
in case block of
Plain inlines -> plaintextToMarkdown margin inlines
Para inlines -> paragraphToMarkdown margin inlines
Header level _ inlines -> headingToMarkdown level inlines
Null -> emptyRope
RawBlock (Format "tex") string -> intoRope string <> "\n"
RawBlock (Format "html") string -> intoRope string <> "\n"
RawBlock _ _ -> error msg
CodeBlock attr string -> codeToMarkdown attr string
LineBlock list -> poemToMarkdown list
BlockQuote blocks -> quoteToMarkdown margin blocks
BulletList blockss -> bulletlistToMarkdown margin blockss
OrderedList attrs blockss -> orderedlistToMarkdown margin attrs blockss
DefinitionList blockss -> definitionlistToMarkdown margin blockss
HorizontalRule -> "---\n"
Table caption alignments relatives headers rows -> tableToMarkdown caption alignments relatives headers rows
Div attr blocks -> divToMarkdown margin attr blocks
{-
This does **not** emit a newline at the end. The intersperse happening in
`blocksToMarkdown` will terminate the line, but you won't get a blank line
between blocks as is the convention everywhere else (this was critical when
lists were nested in tight lists).
-}
plaintextToMarkdown :: Int -> [Inline] -> Rope
plaintextToMarkdown margin inlines =
wrap' margin (inlinesToMarkdown inlines)
{-
Everything was great until we had to figure out how to deal with line
breaks aka
, represented in Markdown by [' ',' ']. We do this by
replacing the line break Inline with \x2028. This character, U+2028 LS, is
the Line Separator character. It's one of those symbols up in General
Punctuation that no one ever uses. So we use it as a sentinel internally
here; first we break on those, and then we wrap the results.
-}
paragraphToMarkdown :: Int -> [Inline] -> Rope
paragraphToMarkdown margin inlines =
wrap' margin (inlinesToMarkdown inlines) <> "\n"
wrap' :: Int -> Rope -> Rope
wrap' margin =
mconcat . intersperse " \n" . fmap (wrap margin) . breakPieces isLineSeparator
where
isLineSeparator = (== '\x2028')
headingToMarkdown :: Int -> [Inline] -> Rope
headingToMarkdown level inlines =
let text = inlinesToMarkdown inlines
in case level of
1 -> text <> "\n" <> underline '=' text <> "\n"
2 -> text <> "\n" <> underline '-' text <> "\n"
n -> intoRope (replicate n '#') <> " " <> text <> "\n"
codeToMarkdown :: Attr -> T.Text -> Rope
codeToMarkdown attr literal =
let body = intoRope literal
lang = fencedAttributesToMarkdown attr
in "```" <> lang <> "\n"
<> body
<> "\n"
<> "```"
<> "\n"
poemToMarkdown :: [[Inline]] -> Rope
poemToMarkdown list =
mconcat (intersperse "\n" (fmap prefix list)) <> "\n"
where
prefix inlines = "| " <> inlinesToMarkdown inlines
quoteToMarkdown :: Int -> [Block] -> Rope
quoteToMarkdown margin blocks =
foldl' (\text block -> text <> prefix block) emptyRope blocks
where
prefix :: Block -> Rope
prefix = foldl' (\text line -> text <> "> " <> line <> "\n") emptyRope . rows
rows :: Block -> [Rope]
rows = breakLines . convertBlock (margin - 2)
bulletlistToMarkdown :: Int -> [[Block]] -> Rope
bulletlistToMarkdown = listToMarkdown (repeat "- ")
orderedlistToMarkdown :: Int -> ListAttributes -> [[Block]] -> Rope
orderedlistToMarkdown margin (num, style, delim) blockss =
listToMarkdown (intoMarkers (num, style, delim)) margin blockss
where
intoMarkers = fmap pad . fmap intoRope . orderedListMarkers
pad text = text <> if widthRope text > 2 then " " else " "
definitionlistToMarkdown :: Int -> [([Inline], [[Block]])] -> Rope
definitionlistToMarkdown margin definitions =
case definitions of
[] -> emptyRope
(definition1 : definitionN) ->
handleDefinition definition1
<> foldl'
(\text definition -> text <> "\n" <> handleDefinition definition)
emptyRope
definitionN
where
handleDefinition :: ([Inline], [[Block]]) -> Rope
handleDefinition (term, blockss) =
inlinesToMarkdown term <> "\n\n" <> listToMarkdown (repeat ": ") margin blockss
listToMarkdown :: [Rope] -> Int -> [[Block]] -> Rope
listToMarkdown markers margin items =
case pairs of
[] -> emptyRope
((marker1, blocks1) : pairsN) ->
listitem marker1 blocks1
<> foldl'
(\text (markerN, blocksN) -> text <> spacer blocksN <> listitem markerN blocksN)
emptyRope
pairsN
where
pairs = zip markers items
listitem :: Rope -> [Block] -> Rope
listitem _ [] = emptyRope
listitem marker blocks = indent marker blocks
{-
Tricky. Tight lists are represented by Plain, whereas more widely spaced
lists are represented by Para. A complex block (specifically a nested
list!) will handle its own spacing. This seems fragile.
-}
spacer :: [Block] -> Rope
spacer [] = emptyRope
spacer (block : _) = case block of
Plain _ -> emptyRope
Para _ -> "\n"
_ -> emptyRope -- ie nested list
indent :: Rope -> [Block] -> Rope
indent marker =
snd . foldl' (f marker) (True, emptyRope) . breakLines . blocksToMarkdown (margin - 4)
f :: Rope -> (Bool, Rope) -> Rope -> (Bool, Rope)
f marker (first, text) line
| nullRope line =
(False, text <> "\n") -- don't indent lines that should be blank
| otherwise =
if first
then (False, text <> marker <> line <> "\n")
else (False, text <> " " <> line <> "\n")
{-
In Pandoc flavoured Markdown,