module Text.Pandoc.Readers.RST (
readRST
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.ParserCombinators.Parsec
import Data.List ( findIndex, delete, intercalate )
readRST :: ParserState -> String -> Pandoc
readRST state s = (readWith parseRST) state (s ++ "\n\n")
bulletListMarkers :: [Char]
bulletListMarkers = "*+-"
underlineChars :: [Char]
underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~"
specialChars :: [Char]
specialChars = "\\`|*_<>$:[-"
isHeader :: Int -> Block -> Bool
isHeader n (Header x _) = x == n
isHeader _ _ = False
promoteHeaders :: Int -> [Block] -> [Block]
promoteHeaders num ((Header level text):rest) =
(Header (level num) text):(promoteHeaders num rest)
promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
promoteHeaders _ [] = []
titleTransform :: [Block]
-> ([Block], [Inline])
titleTransform ((Header 1 head1):(Header 2 head2):rest) =
if (any (isHeader 1) rest) || (any (isHeader 2) rest)
then ((Header 1 head1):(Header 2 head2):rest, [])
else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
titleTransform ((Header 1 head1):rest) =
if (any (isHeader 1) rest)
then ((Header 1 head1):rest, [])
else ((promoteHeaders 1 rest), head1)
titleTransform blocks = (blocks, [])
parseRST :: GenParser Char ParserState Pandoc
parseRST = do
startPos <- getPosition
docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat
setInput docMinusKeys
setPosition startPos
st <- getState
let reversedKeys = stateKeys st
updateState $ \s -> s { stateKeys = reverse reversedKeys }
blocks <- parseBlocks
let blocks' = filter (/= Null) blocks
state <- getState
let (blocks'', title) = if stateStandalone state
then titleTransform blocks'
else (blocks', [])
let authors = stateAuthors state
let date = stateDate state
let title' = if (null title) then (stateTitle state) else title
return $ Pandoc (Meta title' authors date) blocks''
parseBlocks :: GenParser Char ParserState [Block]
parseBlocks = manyTill block eof
block :: GenParser Char ParserState Block
block = choice [ codeBlock
, rawHtmlBlock
, rawLaTeXBlock
, fieldList
, blockQuote
, imageBlock
, unknownDirective
, header
, hrule
, list
, lineBlock
, para
, plain
, nullBlock ] <?> "block"
fieldListItem :: String -> GenParser Char st ([Char], [Char])
fieldListItem indent = try $ do
string indent
char ':'
name <- many1 alphaNum
string ": "
skipSpaces
first <- manyTill anyChar newline
rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >>
indentedBlock
return (name, intercalate " " (first:(lines rest)))
fieldList :: GenParser Char ParserState Block
fieldList = try $ do
indent <- lookAhead $ many (oneOf " \t")
items <- many1 $ fieldListItem indent
blanklines
let authors = case lookup "Authors" items of
Just auth -> [auth]
Nothing -> map snd (filter (\(x,_) -> x == "Author") items)
if null authors
then return ()
else updateState $ \st -> st {stateAuthors = authors}
case (lookup "Date" items) of
Just dat -> updateState $ \st -> st {stateDate = dat}
Nothing -> return ()
case (lookup "Title" items) of
Just tit -> parseFromString (many inline) tit >>=
\t -> updateState $ \st -> st {stateTitle = t}
Nothing -> return ()
let remaining = filter (\(x,_) -> (x /= "Authors") && (x /= "Author") &&
(x /= "Date") && (x /= "Title")) items
if null remaining
then return Null
else do terms <- mapM (return . (:[]) . Str . fst) remaining
defs <- mapM (parseFromString (many block) . snd)
remaining
return $ DefinitionList $ zip terms defs
lineBlockLine :: GenParser Char ParserState [Inline]
lineBlockLine = try $ do
string "| "
white <- many (oneOf " \t")
line <- manyTill inline newline
return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak]
lineBlock :: GenParser Char ParserState Block
lineBlock = try $ do
lines' <- many1 lineBlockLine
blanklines
return $ Para (concat lines')
para :: GenParser Char ParserState Block
para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
codeBlockStart :: GenParser Char st Char
codeBlockStart = string "::" >> blankline >> blankline
paraBeforeCodeBlock :: GenParser Char ParserState Block
paraBeforeCodeBlock = try $ do
result <- many1 (notFollowedBy' codeBlockStart >> inline)
lookAhead (string "::")
return $ Para $ if last result == Space
then normalizeSpaces result
else (normalizeSpaces result) ++ [Str ":"]
paraNormal :: GenParser Char ParserState Block
paraNormal = try $ do
result <- many1 inline
newline
blanklines
return $ Para $ normalizeSpaces result
plain :: GenParser Char ParserState Block
plain = many1 inline >>= return . Plain . normalizeSpaces
imageBlock :: GenParser Char st Block
imageBlock = try $ do
string ".. image:: "
src <- manyTill anyChar newline
fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t")
many1 $ fieldListItem indent
optional blanklines
case lookup "alt" fields of
Just alt -> return $ Plain [Image [Str alt] (src, alt)]
Nothing -> return $ Plain [Image [Str "image"] (src, "")]
header :: GenParser Char ParserState Block
header = doubleHeader <|> singleHeader <?> "header"
doubleHeader :: GenParser Char ParserState Block
doubleHeader = try $ do
c <- oneOf underlineChars
rest <- many (char c)
let lenTop = length (c:rest)
skipSpaces
newline
txt <- many1 (notFollowedBy blankline >> inline)
pos <- getPosition
let len = (sourceColumn pos) 1
if (len > lenTop) then fail "title longer than border" else return ()
blankline
count lenTop (char c)
blanklines
state <- getState
let headerTable = stateHeaderTable state
let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of
Just ind -> (headerTable, ind + 1)
Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
setState (state { stateHeaderTable = headerTable' })
return $ Header level (normalizeSpaces txt)
singleHeader :: GenParser Char ParserState Block
singleHeader = try $ do
notFollowedBy' whitespace
txt <- many1 (do {notFollowedBy blankline; inline})
pos <- getPosition
let len = (sourceColumn pos) 1
blankline
c <- oneOf underlineChars
count (len 1) (char c)
many (char c)
blanklines
state <- getState
let headerTable = stateHeaderTable state
let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of
Just ind -> (headerTable, ind + 1)
Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
setState (state { stateHeaderTable = headerTable' })
return $ Header level (normalizeSpaces txt)
hrule :: GenParser Char st Block
hrule = try $ do
chr <- oneOf underlineChars
count 3 (char chr)
skipMany (char chr)
blankline
blanklines
return HorizontalRule
indentedLine :: String -> GenParser Char st [Char]
indentedLine indents = try $ do
string indents
result <- manyTill anyChar newline
return $ result ++ "\n"
indentedBlock :: GenParser Char st [Char]
indentedBlock = do
indents <- lookAhead $ many1 (oneOf " \t")
lns <- many $ choice $ [ indentedLine indents,
try $ do b <- blanklines
l <- indentedLine indents
return (b ++ l) ]
optional blanklines
return $ concat lns
codeBlock :: GenParser Char st Block
codeBlock = try $ do
codeBlockStart
result <- indentedBlock
return $ CodeBlock ("",[],[]) $ stripTrailingNewlines result
rawHtmlBlock :: GenParser Char st Block
rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
indentedBlock >>= return . RawHtml
rawLaTeXBlock :: GenParser Char st Block
rawLaTeXBlock = try $ do
string ".. raw:: latex"
blanklines
result <- indentedBlock
return $ Para [(TeX result)]
blockQuote :: GenParser Char ParserState Block
blockQuote = do
raw <- indentedBlock
contents <- parseFromString parseBlocks $ raw ++ "\n\n"
return $ BlockQuote contents
list :: GenParser Char ParserState Block
list = choice [ bulletList, orderedList, definitionList ] <?> "list"
definitionListItem :: GenParser Char ParserState ([Inline], [Block])
definitionListItem = try $ do
notFollowedBy (try $ char '.' >> char '.')
term <- many1Till inline endline
raw <- indentedBlock
contents <- parseFromString parseBlocks $ raw ++ "\n\n"
return (normalizeSpaces term, contents)
definitionList :: GenParser Char ParserState Block
definitionList = many1 definitionListItem >>= return . DefinitionList
bulletListStart :: GenParser Char st Int
bulletListStart = try $ do
notFollowedBy' hrule
marker <- oneOf bulletListMarkers
white <- many1 spaceChar
return $ length (marker:white)
orderedListStart :: ListNumberStyle
-> ListNumberDelim
-> GenParser Char st Int
orderedListStart style delim = try $ do
(_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
white <- many1 spaceChar
return $ markerLen + length white
listLine :: Int -> GenParser Char ParserState [Char]
listLine markerLength = try $ do
notFollowedBy blankline
indentWith markerLength
line <- manyTill anyChar newline
return $ line ++ "\n"
indentWith :: Int -> GenParser Char ParserState [Char]
indentWith num = do
state <- getState
let tabStop = stateTabStop state
if (num < tabStop)
then count num (char ' ')
else choice [ try (count num (char ' ')),
(try (char '\t' >> count (num tabStop) (char ' '))) ]
rawListItem :: GenParser Char ParserState Int
-> GenParser Char ParserState (Int, [Char])
rawListItem start = try $ do
markerLength <- start
firstLine <- manyTill anyChar newline
restLines <- many (listLine markerLength)
return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
listContinuation :: Int -> GenParser Char ParserState [Char]
listContinuation markerLength = try $ do
blanks <- many1 blankline
result <- many1 (listLine markerLength)
return $ blanks ++ concat result
listItem :: GenParser Char ParserState Int
-> GenParser Char ParserState [Block]
listItem start = try $ do
(markerLength, first) <- rawListItem start
rest <- many (listContinuation markerLength)
blanks <- choice [ try (many blankline >>~ lookAhead start),
many1 blankline ]
state <- getState
let oldContext = stateParserContext state
setState $ state {stateParserContext = ListItemState}
parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks
updateState (\st -> st {stateParserContext = oldContext})
return parsed
orderedList :: GenParser Char ParserState Block
orderedList = try $ do
(start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
items <- many1 (listItem (orderedListStart style delim))
let items' = compactify items
return $ OrderedList (start, style, delim) items'
bulletList :: GenParser Char ParserState Block
bulletList = many1 (listItem bulletListStart) >>=
return . BulletList . compactify
unknownDirective :: GenParser Char st Block
unknownDirective = try $ do
string ".."
notFollowedBy (noneOf " \t\n")
manyTill anyChar newline
many $ blanklines <|> (oneOf " \t" >> manyTill anyChar newline)
return Null
quotedReferenceName :: GenParser Char ParserState [Inline]
quotedReferenceName = try $ do
char '`' >> notFollowedBy (char '`')
label' <- many1Till inline (char '`')
return label'
unquotedReferenceName :: GenParser Char ParserState [Inline]
unquotedReferenceName = try $ do
label' <- many1Till inline (lookAhead $ char ':')
return label'
isolated :: Char -> GenParser Char st Char
isolated ch = try $ char ch >>~ notFollowedBy (char ch)
simpleReferenceName :: GenParser Char st [Inline]
simpleReferenceName = do
raw <- many1 (alphaNum <|> isolated '-' <|> isolated '.' <|>
(try $ char '_' >>~ lookAhead alphaNum))
return [Str raw]
referenceName :: GenParser Char ParserState [Inline]
referenceName = quotedReferenceName <|>
(try $ simpleReferenceName >>~ lookAhead (char ':')) <|>
unquotedReferenceName
referenceKey :: GenParser Char ParserState [Char]
referenceKey = do
startPos <- getPosition
key <- choice [imageKey, anonymousKey, regularKey]
st <- getState
let oldkeys = stateKeys st
updateState $ \s -> s { stateKeys = key : oldkeys }
optional blanklines
endPos <- getPosition
return $ replicate (sourceLine endPos sourceLine startPos) '\n'
targetURI :: GenParser Char st [Char]
targetURI = do
skipSpaces
optional newline
contents <- many1 (try (many spaceChar >> newline >>
many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
blanklines
return contents
imageKey :: GenParser Char ParserState ([Inline], (String, [Char]))
imageKey = try $ do
string ".. |"
ref <- manyTill inline (char '|')
skipSpaces
string "image::"
src <- targetURI
return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
anonymousKey :: GenParser Char st ([Inline], (String, [Char]))
anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
return ([Str "_"], (removeLeadingTrailingSpace src, ""))
regularKey :: GenParser Char ParserState ([Inline], (String, [Char]))
regularKey = try $ do
string ".. _"
ref <- referenceName
char ':'
src <- targetURI
return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
inline :: GenParser Char ParserState Inline
inline = choice [ link
, str
, whitespace
, endline
, strong
, emph
, code
, image
, hyphens
, superscript
, subscript
, escapedChar
, symbol ] <?> "inline"
hyphens :: GenParser Char ParserState Inline
hyphens = do
result <- many1 (char '-')
option Space endline
return $ Str result
escapedChar :: GenParser Char st Inline
escapedChar = escaped anyChar
symbol :: GenParser Char ParserState Inline
symbol = do
result <- oneOf specialChars
return $ Str [result]
code :: GenParser Char ParserState Inline
code = try $ do
string "``"
result <- manyTill anyChar (try (string "``"))
return $ Code $ removeLeadingTrailingSpace $ intercalate " " $ lines result
emph :: GenParser Char ParserState Inline
emph = enclosed (char '*') (char '*') inline >>=
return . Emph . normalizeSpaces
strong :: GenParser Char ParserState Inline
strong = enclosed (string "**") (try $ string "**") inline >>=
return . Strong . normalizeSpaces
interpreted :: [Char] -> GenParser Char st [Inline]
interpreted role = try $ do
optional $ try $ string "\\ "
result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
return [Str result]
superscript :: GenParser Char ParserState Inline
superscript = interpreted "sup" >>= (return . Superscript)
subscript :: GenParser Char ParserState Inline
subscript = interpreted "sub" >>= (return . Subscript)
whitespace :: GenParser Char ParserState Inline
whitespace = many1 spaceChar >> return Space <?> "whitespace"
str :: GenParser Char ParserState Inline
str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
endline :: GenParser Char ParserState Inline
endline = try $ do
newline
notFollowedBy blankline
st <- getState
if (stateParserContext st) == ListItemState
then notFollowedBy (anyOrderedListMarker >> spaceChar) >>
notFollowedBy' bulletListStart
else return ()
return Space
link :: GenParser Char ParserState Inline
link = choice [explicitLink, referenceLink, autoLink] <?> "link"
explicitLink :: GenParser Char ParserState Inline
explicitLink = try $ do
char '`'
notFollowedBy (char '`')
label' <- manyTill (notFollowedBy (char '`') >> inline)
(try (spaces >> char '<'))
src <- manyTill (noneOf ">\n ") (char '>')
skipSpaces
string "`_"
return $ Link (normalizeSpaces label') (removeLeadingTrailingSpace src, "")
referenceLink :: GenParser Char ParserState Inline
referenceLink = try $ do
label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_'
key <- option label' (do{char '_'; return [Str "_"]})
state <- getState
let keyTable = stateKeys state
src <- case lookupKeySrc keyTable key of
Nothing -> fail "no corresponding key"
Just target -> return target
let keyTable' = if (key == [Str "_"])
then delete ([Str "_"], src) keyTable
else keyTable
setState $ state { stateKeys = keyTable' }
return $ Link (normalizeSpaces label') src
autoURI :: GenParser Char ParserState Inline
autoURI = do
src <- uri
return $ Link [Str src] (src, "")
autoEmail :: GenParser Char ParserState Inline
autoEmail = do
src <- emailAddress
return $ Link [Str src] ("mailto:" ++ src, "")
autoLink :: GenParser Char ParserState Inline
autoLink = autoURI <|> autoEmail
image :: GenParser Char ParserState Inline
image = try $ do
char '|'
ref <- manyTill inline (char '|')
state <- getState
let keyTable = stateKeys state
src <- case lookupKeySrc keyTable ref of
Nothing -> fail "no corresponding key"
Just target -> return target
return $ Image (normalizeSpaces ref) src