module Text.Pandoc.Readers.RST (
readRST
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.ParserCombinators.Parsec
import Control.Monad ( when, unless )
import Data.List ( findIndex, intercalate, transpose, sort )
import qualified Data.Map as M
import Text.Printf ( printf )
readRST :: ParserState
-> String
-> Pandoc
readRST state s = (readWith parseRST) state (s ++ "\n\n")
bulletListMarkers :: [Char]
bulletListMarkers = "*+-"
underlineChars :: [Char]
underlineChars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
specialChars :: [Char]
specialChars = "\\`|*_<>$:[-"
isHeader :: Int -> Block -> Bool
isHeader n (Header x _) = x == n
isHeader _ _ = False
promoteHeaders :: Int -> [Block] -> [Block]
promoteHeaders num ((Header level text):rest) =
(Header (level num) text):(promoteHeaders num rest)
promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
promoteHeaders _ [] = []
titleTransform :: [Block]
-> ([Block], [Inline])
titleTransform ((Header 1 head1):(Header 2 head2):rest) |
not (any (isHeader 1) rest || any (isHeader 2) rest) =
(promoteHeaders 2 rest, head1 ++ [Str ":", Space] ++ head2)
titleTransform ((Header 1 head1):rest) |
not (any (isHeader 1) rest) =
(promoteHeaders 1 rest, head1)
titleTransform blocks = (blocks, [])
parseRST :: GenParser Char ParserState Pandoc
parseRST = do
startPos <- getPosition
docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat
setInput docMinusKeys
setPosition startPos
blocks <- parseBlocks
let blocks' = filter (/= Null) blocks
state <- getState
let (blocks'', title) = if stateStandalone state
then titleTransform blocks'
else (blocks', [])
let authors = stateAuthors state
let date = stateDate state
let title' = if (null title) then (stateTitle state) else title
return $ Pandoc (Meta title' authors date) blocks''
parseBlocks :: GenParser Char ParserState [Block]
parseBlocks = manyTill block eof
block :: GenParser Char ParserState Block
block = choice [ codeBlock
, rawHtmlBlock
, rawLaTeXBlock
, fieldList
, blockQuote
, imageBlock
, customCodeBlock
, unknownDirective
, header
, hrule
, lineBlock
, table
, list
, lhsCodeBlock
, para
, plain
, nullBlock ] <?> "block"
fieldListItem :: String -> GenParser Char st ([Char], [Char])
fieldListItem indent = try $ do
string indent
char ':'
name <- many1 alphaNum
string ": "
skipSpaces
first <- manyTill anyChar newline
rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >>
indentedBlock
return (name, intercalate " " (first:(lines rest)))
fieldList :: GenParser Char ParserState Block
fieldList = try $ do
indent <- lookAhead $ many (oneOf " \t")
items <- many1 $ fieldListItem indent
blanklines
let authors = case lookup "Authors" items of
Just auth -> [auth]
Nothing -> map snd (filter (\(x,_) -> x == "Author") items)
unless (null authors) $ do
authors' <- mapM (parseFromString (many inline)) authors
updateState $ \st -> st {stateAuthors = map normalizeSpaces authors'}
case (lookup "Date" items) of
Just dat -> do
dat' <- parseFromString (many inline) dat
updateState $ \st -> st{ stateDate = normalizeSpaces dat' }
Nothing -> return ()
case (lookup "Title" items) of
Just tit -> parseFromString (many inline) tit >>=
\t -> updateState $ \st -> st {stateTitle = t}
Nothing -> return ()
let remaining = filter (\(x,_) -> (x /= "Authors") && (x /= "Author") &&
(x /= "Date") && (x /= "Title")) items
if null remaining
then return Null
else do terms <- mapM (return . (:[]) . Str . fst) remaining
defs <- mapM (parseFromString (many block) . snd)
remaining
return $ DefinitionList $ zip terms $ map (:[]) defs
lineBlockLine :: GenParser Char ParserState [Inline]
lineBlockLine = try $ do
string "| "
white <- many (oneOf " \t")
line <- many $ (notFollowedBy newline >> inline) <|> (try $ endline >>~ char ' ')
optional endline
return $ normalizeSpaces $ (if null white then [] else [Str white]) ++ line
lineBlock :: GenParser Char ParserState Block
lineBlock = try $ do
lines' <- many1 lineBlockLine
blanklines
return $ Para (intercalate [LineBreak] lines')
para :: GenParser Char ParserState Block
para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
codeBlockStart :: GenParser Char st Char
codeBlockStart = string "::" >> blankline >> blankline
paraBeforeCodeBlock :: GenParser Char ParserState Block
paraBeforeCodeBlock = try $ do
result <- many1 (notFollowedBy' codeBlockStart >> inline)
lookAhead (string "::")
return $ Para $ if last result == Space
then normalizeSpaces result
else (normalizeSpaces result) ++ [Str ":"]
paraNormal :: GenParser Char ParserState Block
paraNormal = try $ do
result <- many1 inline
newline
blanklines
return $ Para $ normalizeSpaces result
plain :: GenParser Char ParserState Block
plain = many1 inline >>= return . Plain . normalizeSpaces
imageBlock :: GenParser Char st Block
imageBlock = try $ do
string ".. image:: "
src <- manyTill anyChar newline
fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t")
many1 $ fieldListItem indent
optional blanklines
case lookup "alt" fields of
Just alt -> return $ Plain [Image [Str alt] (src, alt)]
Nothing -> return $ Plain [Image [Str "image"] (src, "")]
header :: GenParser Char ParserState Block
header = doubleHeader <|> singleHeader <?> "header"
doubleHeader :: GenParser Char ParserState Block
doubleHeader = try $ do
c <- oneOf underlineChars
rest <- many (char c)
let lenTop = length (c:rest)
skipSpaces
newline
txt <- many1 (notFollowedBy blankline >> inline)
pos <- getPosition
let len = (sourceColumn pos) 1
if (len > lenTop) then fail "title longer than border" else return ()
blankline
count lenTop (char c)
blanklines
state <- getState
let headerTable = stateHeaderTable state
let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of
Just ind -> (headerTable, ind + 1)
Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
setState (state { stateHeaderTable = headerTable' })
return $ Header level (normalizeSpaces txt)
singleHeader :: GenParser Char ParserState Block
singleHeader = try $ do
notFollowedBy' whitespace
txt <- many1 (do {notFollowedBy blankline; inline})
pos <- getPosition
let len = (sourceColumn pos) 1
blankline
c <- oneOf underlineChars
count (len 1) (char c)
many (char c)
blanklines
state <- getState
let headerTable = stateHeaderTable state
let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of
Just ind -> (headerTable, ind + 1)
Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
setState (state { stateHeaderTable = headerTable' })
return $ Header level (normalizeSpaces txt)
hrule :: GenParser Char st Block
hrule = try $ do
chr <- oneOf underlineChars
count 3 (char chr)
skipMany (char chr)
blankline
blanklines
return HorizontalRule
indentedLine :: String -> GenParser Char st [Char]
indentedLine indents = try $ do
string indents
result <- manyTill anyChar newline
return $ result ++ "\n"
indentedBlock :: GenParser Char st [Char]
indentedBlock = do
indents <- lookAhead $ many1 (oneOf " \t")
lns <- many $ choice $ [ indentedLine indents,
try $ do b <- blanklines
l <- indentedLine indents
return (b ++ l) ]
optional blanklines
return $ concat lns
codeBlock :: GenParser Char st Block
codeBlock = try $ do
codeBlockStart
result <- indentedBlock
return $ CodeBlock ("",[],[]) $ stripTrailingNewlines result
customCodeBlock :: GenParser Char st Block
customCodeBlock = try $ do
string ".. code-block:: "
language <- manyTill anyChar newline
blanklines
result <- indentedBlock
return $ CodeBlock ("", ["sourceCode", language], []) $ stripTrailingNewlines result
lhsCodeBlock :: GenParser Char ParserState Block
lhsCodeBlock = try $ do
failUnlessLHS
optional codeBlockStart
pos <- getPosition
when (sourceColumn pos /= 1) $ fail "Not in first column"
lns <- many1 birdTrackLine
let lns' = if all (\ln -> null ln || take 1 ln == " ") lns
then map (drop 1) lns
else lns
blanklines
return $ CodeBlock ("", ["sourceCode", "literate", "haskell"], []) $ intercalate "\n" lns'
birdTrackLine :: GenParser Char st [Char]
birdTrackLine = do
char '>'
manyTill anyChar newline
rawHtmlBlock :: GenParser Char st Block
rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
indentedBlock >>= return . RawHtml
rawLaTeXBlock :: GenParser Char st Block
rawLaTeXBlock = try $ do
string ".. raw:: latex"
blanklines
result <- indentedBlock
return $ Para [(TeX result)]
blockQuote :: GenParser Char ParserState Block
blockQuote = do
raw <- indentedBlock
contents <- parseFromString parseBlocks $ raw ++ "\n\n"
return $ BlockQuote contents
list :: GenParser Char ParserState Block
list = choice [ bulletList, orderedList, definitionList ] <?> "list"
definitionListItem :: GenParser Char ParserState ([Inline], [[Block]])
definitionListItem = try $ do
notFollowedBy (try $ char '.' >> char '.')
term <- many1Till inline endline
raw <- indentedBlock
contents <- parseFromString parseBlocks $ raw ++ "\n\n"
return (normalizeSpaces term, [contents])
definitionList :: GenParser Char ParserState Block
definitionList = many1 definitionListItem >>= return . DefinitionList
bulletListStart :: GenParser Char st Int
bulletListStart = try $ do
notFollowedBy' hrule
marker <- oneOf bulletListMarkers
white <- many1 spaceChar
return $ length (marker:white)
orderedListStart :: ListNumberStyle
-> ListNumberDelim
-> GenParser Char ParserState Int
orderedListStart style delim = try $ do
(_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
white <- many1 spaceChar
return $ markerLen + length white
listLine :: Int -> GenParser Char ParserState [Char]
listLine markerLength = try $ do
notFollowedBy blankline
indentWith markerLength
line <- manyTill anyChar newline
return $ line ++ "\n"
indentWith :: Int -> GenParser Char ParserState [Char]
indentWith num = do
state <- getState
let tabStop = stateTabStop state
if (num < tabStop)
then count num (char ' ')
else choice [ try (count num (char ' ')),
(try (char '\t' >> count (num tabStop) (char ' '))) ]
rawListItem :: GenParser Char ParserState Int
-> GenParser Char ParserState (Int, [Char])
rawListItem start = try $ do
markerLength <- start
firstLine <- manyTill anyChar newline
restLines <- many (listLine markerLength)
return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
listContinuation :: Int -> GenParser Char ParserState [Char]
listContinuation markerLength = try $ do
blanks <- many1 blankline
result <- many1 (listLine markerLength)
return $ blanks ++ concat result
listItem :: GenParser Char ParserState Int
-> GenParser Char ParserState [Block]
listItem start = try $ do
(markerLength, first) <- rawListItem start
rest <- many (listContinuation markerLength)
blanks <- choice [ try (many blankline >>~ lookAhead start),
many1 blankline ]
state <- getState
let oldContext = stateParserContext state
setState $ state {stateParserContext = ListItemState}
parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks
updateState (\st -> st {stateParserContext = oldContext})
return parsed
orderedList :: GenParser Char ParserState Block
orderedList = try $ do
(start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
items <- many1 (listItem (orderedListStart style delim))
let items' = compactify items
return $ OrderedList (start, style, delim) items'
bulletList :: GenParser Char ParserState Block
bulletList = many1 (listItem bulletListStart) >>=
return . BulletList . compactify
unknownDirective :: GenParser Char st Block
unknownDirective = try $ do
string ".."
notFollowedBy (noneOf " \t\n")
manyTill anyChar newline
many $ blanklines <|> (oneOf " \t" >> manyTill anyChar newline)
return Null
quotedReferenceName :: GenParser Char ParserState [Inline]
quotedReferenceName = try $ do
char '`' >> notFollowedBy (char '`')
label' <- many1Till inline (char '`')
return label'
unquotedReferenceName :: GenParser Char ParserState [Inline]
unquotedReferenceName = try $ do
label' <- many1Till inline (lookAhead $ char ':')
return label'
isolated :: Char -> GenParser Char st Char
isolated ch = try $ char ch >>~ notFollowedBy (char ch)
simpleReferenceName :: GenParser Char st [Inline]
simpleReferenceName = do
raw <- many1 (alphaNum <|> isolated '-' <|> isolated '.' <|>
(try $ char '_' >>~ lookAhead alphaNum))
return [Str raw]
referenceName :: GenParser Char ParserState [Inline]
referenceName = quotedReferenceName <|>
(try $ simpleReferenceName >>~ lookAhead (char ':')) <|>
unquotedReferenceName
referenceKey :: GenParser Char ParserState [Char]
referenceKey = do
startPos <- getPosition
(key, target) <- choice [imageKey, anonymousKey, regularKey]
st <- getState
let oldkeys = stateKeys st
updateState $ \s -> s { stateKeys = M.insert key target oldkeys }
optional blanklines
endPos <- getPosition
return $ replicate (sourceLine endPos sourceLine startPos) '\n'
targetURI :: GenParser Char st [Char]
targetURI = do
skipSpaces
optional newline
contents <- many1 (try (many spaceChar >> newline >>
many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
blanklines
return $ escapeURI $ removeLeadingTrailingSpace $ contents
imageKey :: GenParser Char ParserState (Key, Target)
imageKey = try $ do
string ".. |"
ref <- manyTill inline (char '|')
skipSpaces
string "image::"
src <- targetURI
return (Key (normalizeSpaces ref), (src, ""))
anonymousKey :: GenParser Char st (Key, Target)
anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
pos <- getPosition
return (Key [Str $ "_" ++ printf "%09d" (sourceLine pos)], (src, ""))
regularKey :: GenParser Char ParserState (Key, Target)
regularKey = try $ do
string ".. _"
ref <- referenceName
char ':'
src <- targetURI
return (Key (normalizeSpaces ref), (src, ""))
dashedLine :: Char -> GenParser Char st (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many (char ' ')
return (length dashes, length $ dashes ++ sp)
simpleDashedLines :: Char -> GenParser Char st [(Int,Int)]
simpleDashedLines ch = try $ many1 (dashedLine ch)
simpleTableSep :: Char -> GenParser Char ParserState Char
simpleTableSep ch = try $ simpleDashedLines ch >> newline
simpleTableFooter :: GenParser Char ParserState [Char]
simpleTableFooter = try $ simpleTableSep '=' >> blanklines
simpleTableRawLine :: [Int] -> GenParser Char ParserState [String]
simpleTableRawLine indices = do
line <- many1Till anyChar newline
return (simpleTableSplitLine indices line)
simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]]
simpleTableRow indices = do
notFollowedBy' simpleTableFooter
firstLine <- simpleTableRawLine indices
colLines <- return []
let cols = map unlines . transpose $ firstLine : colLines
mapM (parseFromString (many plain)) cols
simpleTableSplitLine :: [Int] -> String -> [String]
simpleTableSplitLine indices line =
map removeLeadingTrailingSpace
$ tail $ splitByIndices (init indices) line
simpleTableHeader :: Bool
-> GenParser Char ParserState ([[Block]], [Alignment], [Int])
simpleTableHeader headless = try $ do
optional blanklines
rawContent <- if headless
then return ""
else simpleTableSep '=' >> anyLine
dashes <- simpleDashedLines '='
newline
let lines' = map snd dashes
let indices = scanl (+) 0 lines'
let aligns = replicate (length lines') AlignDefault
let rawHeads = if headless
then replicate (length dashes) ""
else simpleTableSplitLine indices rawContent
heads <- mapM (parseFromString (many plain)) $
map removeLeadingTrailingSpace rawHeads
return (heads, aligns, indices)
simpleTable :: Bool
-> GenParser Char ParserState Block
simpleTable headless = do
Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter (return [])
return $ Table c a (replicate (length a) 0) h l
where
sep = return ()
gridTable :: Bool
-> GenParser Char ParserState Block
gridTable = gridTableWith block (return [])
table :: GenParser Char ParserState Block
table = gridTable False <|> simpleTable False <|>
gridTable True <|> simpleTable True <?> "table"
inline :: GenParser Char ParserState Inline
inline = choice [ link
, str
, whitespace
, endline
, strong
, emph
, code
, image
, hyphens
, superscript
, subscript
, escapedChar
, symbol ] <?> "inline"
hyphens :: GenParser Char ParserState Inline
hyphens = do
result <- many1 (char '-')
option Space endline
return $ Str result
escapedChar :: GenParser Char st Inline
escapedChar = escaped anyChar
symbol :: GenParser Char ParserState Inline
symbol = do
result <- oneOf specialChars
return $ Str [result]
code :: GenParser Char ParserState Inline
code = try $ do
string "``"
result <- manyTill anyChar (try (string "``"))
return $ Code $ removeLeadingTrailingSpace $ intercalate " " $ lines result
emph :: GenParser Char ParserState Inline
emph = enclosed (char '*') (char '*') inline >>=
return . Emph . normalizeSpaces
strong :: GenParser Char ParserState Inline
strong = enclosed (string "**") (try $ string "**") inline >>=
return . Strong . normalizeSpaces
interpreted :: [Char] -> GenParser Char st [Inline]
interpreted role = try $ do
optional $ try $ string "\\ "
result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
return [Str result]
superscript :: GenParser Char ParserState Inline
superscript = interpreted "sup" >>= (return . Superscript)
subscript :: GenParser Char ParserState Inline
subscript = interpreted "sub" >>= (return . Subscript)
whitespace :: GenParser Char ParserState Inline
whitespace = many1 spaceChar >> return Space <?> "whitespace"
str :: GenParser Char ParserState Inline
str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
endline :: GenParser Char ParserState Inline
endline = try $ do
newline
notFollowedBy blankline
st <- getState
if (stateParserContext st) == ListItemState
then notFollowedBy (anyOrderedListMarker >> spaceChar) >>
notFollowedBy' bulletListStart
else return ()
return Space
link :: GenParser Char ParserState Inline
link = choice [explicitLink, referenceLink, autoLink] <?> "link"
explicitLink :: GenParser Char ParserState Inline
explicitLink = try $ do
char '`'
notFollowedBy (char '`')
label' <- manyTill (notFollowedBy (char '`') >> inline)
(try (spaces >> char '<'))
src <- manyTill (noneOf ">\n") (char '>')
skipSpaces
string "`_"
return $ Link (normalizeSpaces label')
(escapeURI $ removeLeadingTrailingSpace src, "")
referenceLink :: GenParser Char ParserState Inline
referenceLink = try $ do
label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_'
state <- getState
let keyTable = stateKeys state
let isAnonKey (Key [Str ('_':_)]) = True
isAnonKey _ = False
key <- option (Key label') $
do char '_'
let anonKeys = sort $ filter isAnonKey $ M.keys keyTable
if null anonKeys
then pzero
else return (head anonKeys)
(src,tit) <- case lookupKeySrc keyTable key of
Nothing -> fail "no corresponding key"
Just target -> return target
when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable }
return $ Link (normalizeSpaces label') (src, tit)
autoURI :: GenParser Char ParserState Inline
autoURI = do
(orig, src) <- uri
return $ Link [Str orig] (src, "")
autoEmail :: GenParser Char ParserState Inline
autoEmail = do
(orig, src) <- emailAddress
return $ Link [Str orig] (src, "")
autoLink :: GenParser Char ParserState Inline
autoLink = autoURI <|> autoEmail
image :: GenParser Char ParserState Inline
image = try $ do
char '|'
ref <- manyTill inline (char '|')
state <- getState
let keyTable = stateKeys state
(src,tit) <- case lookupKeySrc keyTable (Key ref) of
Nothing -> fail "no corresponding key"
Just target -> return target
return $ Image (normalizeSpaces ref) (src, tit)