{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module PandocToMarkdown (
pandocToMarkdown,
NotSafe (..),
tableToMarkdown,
) where
import Core.System.Base
import Core.Text
import Data.Foldable (foldl')
import Data.List (intersperse)
import qualified Data.Text as T (Text, null)
import Text.Pandoc (
Alignment (..),
Attr,
Block (..),
Caption (..),
Cell (..),
ColSpan (..),
ColSpec,
ColWidth (..),
Format (..),
Inline (..),
ListAttributes,
MathType (..),
Pandoc (..),
QuoteType (..),
Row (..),
RowSpan (..),
TableBody (..),
TableFoot (..),
TableHead (..),
)
import Text.Pandoc.Shared (orderedListMarkers)
__WIDTH__ :: Int
__WIDTH__ = 78
pandocToMarkdown :: Pandoc -> Rope
pandocToMarkdown (Pandoc _ blocks) =
blocksToMarkdown __WIDTH__ blocks
blocksToMarkdown :: Int -> [Block] -> Rope
blocksToMarkdown _ [] = emptyRope
blocksToMarkdown margin (block1 : blocks) =
convertBlock margin block1
<> foldl'
(\text block -> text <> "\n" <> convertBlock margin block)
emptyRope
blocks
convertBlock :: Int -> Block -> Rope
convertBlock margin block =
let msg = "Unfinished block: " ++ show block -- FIXME
in case block of
Plain inlines -> plaintextToMarkdown margin inlines
Para inlines -> paragraphToMarkdown margin inlines
Header level _ inlines -> headingToMarkdown level inlines
Null -> emptyRope
RawBlock (Format "tex") string -> intoRope string <> "\n"
RawBlock (Format "html") string -> intoRope string <> "\n"
RawBlock _ _ -> error msg
CodeBlock attr string -> codeToMarkdown attr string
LineBlock list -> poemToMarkdown list
BlockQuote blocks -> quoteToMarkdown margin blocks
BulletList blockss -> bulletlistToMarkdown margin blockss
OrderedList attrs blockss -> orderedlistToMarkdown margin attrs blockss
DefinitionList blockss -> definitionlistToMarkdown margin blockss
HorizontalRule -> "---\n"
Table attr caption alignments header rows footer -> tableToMarkdown attr caption alignments header rows footer
Div attr blocks -> divToMarkdown margin attr blocks
{-
This does **not** emit a newline at the end. The intersperse happening in
`blocksToMarkdown` will terminate the line, but you won't get a blank line
between blocks as is the convention everywhere else (this was critical when
lists were nested in tight lists).
-}
plaintextToMarkdown :: Int -> [Inline] -> Rope
plaintextToMarkdown margin inlines =
wrap' margin (inlinesToMarkdown inlines)
{-
Everything was great until we had to figure out how to deal with line
breaks aka
, represented in Markdown by [' ',' ']. We do this by
replacing the line break Inline with \x2028. This character, U+2028 LS, is
the Line Separator character. It's one of those symbols up in General
Punctuation that no one ever uses. So we use it as a sentinel internally
here; first we break on those, and then we wrap the results.
-}
paragraphToMarkdown :: Int -> [Inline] -> Rope
paragraphToMarkdown margin inlines =
wrap' margin (inlinesToMarkdown inlines) <> "\n"
wrap' :: Int -> Rope -> Rope
wrap' margin =
mconcat . intersperse " \n" . fmap (wrap margin) . breakPieces isLineSeparator
where
isLineSeparator = (== '\x2028')
headingToMarkdown :: Int -> [Inline] -> Rope
headingToMarkdown level inlines =
let text = inlinesToMarkdown inlines
in case level of
1 -> text <> "\n" <> underline '=' text <> "\n"
2 -> text <> "\n" <> underline '-' text <> "\n"
n -> intoRope (replicate n '#') <> " " <> text <> "\n"
codeToMarkdown :: Attr -> T.Text -> Rope
codeToMarkdown attr literal =
let body = intoRope literal
lang = fencedAttributesToMarkdown attr
in "```" <> lang <> "\n"
<> body
<> "\n"
<> "```"
<> "\n"
poemToMarkdown :: [[Inline]] -> Rope
poemToMarkdown list =
mconcat (intersperse "\n" (fmap prefix list)) <> "\n"
where
prefix inlines = "| " <> inlinesToMarkdown inlines
quoteToMarkdown :: Int -> [Block] -> Rope
quoteToMarkdown margin blocks =
foldl' (\text block -> text <> prefix block) emptyRope blocks
where
prefix :: Block -> Rope
prefix = foldl' (\text line -> text <> "> " <> line <> "\n") emptyRope . rows
rows :: Block -> [Rope]
rows = breakLines . convertBlock (margin - 2)
bulletlistToMarkdown :: Int -> [[Block]] -> Rope
bulletlistToMarkdown = listToMarkdown (repeat "- ")
orderedlistToMarkdown :: Int -> ListAttributes -> [[Block]] -> Rope
orderedlistToMarkdown margin (num, style, delim) blockss =
listToMarkdown (intoMarkers (num, style, delim)) margin blockss
where
intoMarkers = fmap pad . fmap intoRope . orderedListMarkers
pad text = text <> if widthRope text > 2 then " " else " "
definitionlistToMarkdown :: Int -> [([Inline], [[Block]])] -> Rope
definitionlistToMarkdown margin definitions =
case definitions of
[] -> emptyRope
(definition1 : definitionN) ->
handleDefinition definition1
<> foldl'
(\text definition -> text <> "\n" <> handleDefinition definition)
emptyRope
definitionN
where
handleDefinition :: ([Inline], [[Block]]) -> Rope
handleDefinition (term, blockss) =
inlinesToMarkdown term <> "\n\n" <> listToMarkdown (repeat ": ") margin blockss
listToMarkdown :: [Rope] -> Int -> [[Block]] -> Rope
listToMarkdown markers margin items =
case pairs of
[] -> emptyRope
((marker1, blocks1) : pairsN) ->
listitem marker1 blocks1
<> foldl'
(\text (markerN, blocksN) -> text <> spacer blocksN <> listitem markerN blocksN)
emptyRope
pairsN
where
pairs = zip markers items
listitem :: Rope -> [Block] -> Rope
listitem _ [] = emptyRope
listitem marker blocks = indent marker blocks
{-
Tricky. Tight lists are represented by Plain, whereas more widely spaced
lists are represented by Para. A complex block (specifically a nested
list!) will handle its own spacing. This seems fragile.
-}
spacer :: [Block] -> Rope
spacer [] = emptyRope
spacer (block : _) = case block of
Plain _ -> emptyRope
Para _ -> "\n"
_ -> emptyRope -- ie nested list
indent :: Rope -> [Block] -> Rope
indent marker =
snd . foldl' (f marker) (True, emptyRope) . breakLines . blocksToMarkdown (margin - 4)
f :: Rope -> (Bool, Rope) -> Rope -> (Bool, Rope)
f marker (first, text) line
| nullRope line =
(False, text <> "\n") -- don't indent lines that should be blank
| otherwise =
if first
then (False, text <> marker <> line <> "\n")
else (False, text <> " " <> line <> "\n")
{-
In Pandoc flavoured Markdown,