module Text.Pandoc.Readers.Textile ( readTextile) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.ParserCombinators.Parsec
import Text.HTML.TagSoup.Match
import Data.Char ( digitToInt, isUpper )
import Control.Monad ( guard, liftM )
import Control.Applicative ((<$>), (*>), (<*))
readTextile :: ParserState
-> String
-> Pandoc
readTextile state s =
(readWith parseTextile) state{ stateOldDashes = True } (s ++ "\n\n")
parseTextile :: GenParser Char ParserState Pandoc
parseTextile = do
updateState (\state -> state { stateParseRaw = True, stateSmart = True })
many blankline
startPos <- getPosition
let firstPassParser = noteBlock <|> lineClump
manyTill firstPassParser eof >>= setInput . concat
setPosition startPos
st' <- getState
let reversedNotes = stateNotes st'
updateState $ \s -> s { stateNotes = reverse reversedNotes }
blocks <- parseBlocks
return $ Pandoc (Meta [] [] []) blocks
noteMarker :: GenParser Char ParserState [Char]
noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')
noteBlock :: GenParser Char ParserState [Char]
noteBlock = try $ do
startPos <- getPosition
ref <- noteMarker
optional blankline
contents <- liftM unlines $ many1Till anyLine (blanklines <|> noteBlock)
endPos <- getPosition
let newnote = (ref, contents ++ "\n")
st <- getState
let oldnotes = stateNotes st
updateState $ \s -> s { stateNotes = newnote : oldnotes }
return $ replicate (sourceLine endPos sourceLine startPos) '\n'
parseBlocks :: GenParser Char ParserState [Block]
parseBlocks = manyTill block eof
blockParsers :: [GenParser Char ParserState Block]
blockParsers = [ codeBlock
, header
, blockQuote
, hrule
, anyList
, rawHtmlBlock
, rawLaTeXBlock'
, maybeExplicitBlock "table" table
, maybeExplicitBlock "p" para
, nullBlock ]
block :: GenParser Char ParserState Block
block = choice blockParsers <?> "block"
codeBlock :: GenParser Char ParserState Block
codeBlock = codeBlockBc <|> codeBlockPre
codeBlockBc :: GenParser Char ParserState Block
codeBlockBc = try $ do
string "bc. "
contents <- manyTill anyLine blanklines
return $ CodeBlock ("",[],[]) $ unlines contents
codeBlockPre :: GenParser Char ParserState Block
codeBlockPre = try $ do
htmlTag (tagOpen (=="pre") null)
result' <- manyTill anyChar (try $ htmlTag (tagClose (=="pre")) >> blockBreak)
let result'' = case result' of
'\n':xs -> xs
_ -> result'
let result''' = case reverse result'' of
'\n':_ -> init result''
_ -> result''
return $ CodeBlock ("",[],[]) result'''
header :: GenParser Char ParserState Block
header = try $ do
char 'h'
level <- digitToInt <$> oneOf "123456"
optional attributes >> char '.' >> whitespace
name <- normalizeSpaces <$> manyTill inline blockBreak
return $ Header level name
blockQuote :: GenParser Char ParserState Block
blockQuote = try $ do
string "bq" >> optional attributes >> char '.' >> whitespace
BlockQuote . singleton <$> para
hrule :: GenParser Char st Block
hrule = try $ do
skipSpaces
start <- oneOf "-*"
count 2 (skipSpaces >> char start)
skipMany (spaceChar <|> char start)
newline
optional blanklines
return HorizontalRule
anyList :: GenParser Char ParserState Block
anyList = try $ ( (anyListAtDepth 1) <* blanklines )
anyListAtDepth :: Int -> GenParser Char ParserState Block
anyListAtDepth depth = choice [ bulletListAtDepth depth,
orderedListAtDepth depth,
definitionList ]
bulletListAtDepth :: Int -> GenParser Char ParserState Block
bulletListAtDepth depth = try $ BulletList <$> many1 (bulletListItemAtDepth depth)
bulletListItemAtDepth :: Int -> GenParser Char ParserState [Block]
bulletListItemAtDepth = genericListItemAtDepth '*'
orderedListAtDepth :: Int -> GenParser Char ParserState Block
orderedListAtDepth depth = try $ do
items <- many1 (orderedListItemAtDepth depth)
return (OrderedList (1, DefaultStyle, DefaultDelim) items)
orderedListItemAtDepth :: Int -> GenParser Char ParserState [Block]
orderedListItemAtDepth = genericListItemAtDepth '#'
genericListItemAtDepth :: Char -> Int -> GenParser Char ParserState [Block]
genericListItemAtDepth c depth = try $ do
count depth (char c) >> optional attributes >> whitespace
p <- inlines
sublist <- option [] (singleton <$> anyListAtDepth (depth + 1))
return ((Plain p):sublist)
definitionList :: GenParser Char ParserState Block
definitionList = try $ DefinitionList <$> many1 definitionListItem
definitionListItem :: GenParser Char ParserState ([Inline], [[Block]])
definitionListItem = try $ do
string "- "
term <- many1Till inline (try (whitespace >> string ":="))
def <- inlineDef <|> multilineDef
return (term, def)
where inlineDef :: GenParser Char ParserState [[Block]]
inlineDef = liftM (\d -> [[Plain d]]) $ try (whitespace >> inlines)
multilineDef :: GenParser Char ParserState [[Block]]
multilineDef = try $ do
optional whitespace >> newline
s <- many1Till anyChar (try (string "=:" >> newline))
ds <- parseFromString parseBlocks (s ++ "\n\n")
return [ds]
blockBreak :: GenParser Char ParserState ()
blockBreak = try (newline >> blanklines >> return ()) <|>
(lookAhead rawHtmlBlock >> return ())
rawHtmlBlock :: GenParser Char ParserState Block
rawHtmlBlock = try $ do
(_,b) <- htmlTag isBlockTag
optional blanklines
return $ RawBlock "html" b
rawLaTeXBlock' :: GenParser Char ParserState Block
rawLaTeXBlock' = do
failIfStrict
RawBlock "latex" <$> (rawLaTeXBlock <* spaces)
para :: GenParser Char ParserState Block
para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak
tableCell :: GenParser Char ParserState TableCell
tableCell = do
c <- many1 (noneOf "|\n")
content <- parseFromString (many1 inline) c
return $ [ Plain $ normalizeSpaces content ]
tableRow :: GenParser Char ParserState [TableCell]
tableRow = try $ ( char '|' *> (endBy1 tableCell (char '|')) <* newline)
tableRows :: GenParser Char ParserState [[TableCell]]
tableRows = many1 tableRow
tableHeaders :: GenParser Char ParserState [TableCell]
tableHeaders = let separator = (try $ string "|_.") in
try $ ( separator *> (sepBy1 tableCell separator) <* char '|' <* newline )
table :: GenParser Char ParserState Block
table = try $ do
headers <- option [] tableHeaders
rows <- tableRows
blanklines
let nbOfCols = max (length headers) (length $ head rows)
return $ Table []
(replicate nbOfCols AlignDefault)
(replicate nbOfCols 0.0)
headers
rows
maybeExplicitBlock :: String
-> GenParser Char ParserState Block
-> GenParser Char ParserState Block
maybeExplicitBlock name blk = try $ do
optional $ try $ string name >> optional attributes >> char '.' >>
((try whitespace) <|> endline)
blk
inline :: GenParser Char ParserState Inline
inline = choice inlineParsers <?> "inline"
inlines :: GenParser Char ParserState [Inline]
inlines = manyTill inline newline
inlineParsers :: [GenParser Char ParserState Inline]
inlineParsers = [ autoLink
, str
, whitespace
, endline
, code
, escapedInline
, htmlSpan
, rawHtmlInline
, rawLaTeXInline'
, note
, try $ (char '[' *> inlineMarkup <* char ']')
, inlineMarkup
, link
, image
, mark
, smartPunctuation inline
, symbol
]
inlineMarkup :: GenParser Char ParserState Inline
inlineMarkup = choice [ simpleInline (string "??") (Cite [])
, simpleInline (string "**") Strong
, simpleInline (string "__") Emph
, simpleInline (char '*') Strong
, simpleInline (char '_') Emph
, simpleInline (char '+') Emph
, simpleInline (char '-') Strikeout
, simpleInline (char '^') Superscript
, simpleInline (char '~') Subscript
]
mark :: GenParser Char st Inline
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
reg :: GenParser Char st Inline
reg = do
oneOf "Rr"
char ')'
return $ Str "\174"
tm :: GenParser Char st Inline
tm = do
oneOf "Tt"
oneOf "Mm"
char ')'
return $ Str "\8482"
copy :: GenParser Char st Inline
copy = do
oneOf "Cc"
char ')'
return $ Str "\169"
note :: GenParser Char ParserState Inline
note = try $ do
ref <- (char '[' *> many1 digit <* char ']')
notes <- stateNotes <$> getState
case lookup ref notes of
Nothing -> fail "note not found"
Just raw -> liftM Note $ parseFromString parseBlocks raw
markupChars :: [Char]
markupChars = "\\[]*#_@~-+^|%="
stringBreakers :: [Char]
stringBreakers = " \t\n('-.,:!?;\"<>"
wordBoundaries :: [Char]
wordBoundaries = markupChars ++ stringBreakers
hyphenedWords :: GenParser Char ParserState String
hyphenedWords = try $ do
hd <- noneOf wordBoundaries
tl <- many ( (noneOf wordBoundaries) <|>
try (oneOf markupChars <* lookAhead (noneOf wordBoundaries) ) )
let wd = hd:tl
option wd $ try $
(\r -> concat [wd, "-", r]) <$> (char '-' *> hyphenedWords)
str :: GenParser Char ParserState Inline
str = do
baseStr <- hyphenedWords
fullStr <- option baseStr $ try $ do
guard $ all isUpper baseStr
acro <- enclosed (char '(') (char ')') anyChar
return $ concat [baseStr, " (", acro, ")"]
updateLastStrPos
return $ Str fullStr
htmlSpan :: GenParser Char ParserState Inline
htmlSpan = try $ Str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') )
whitespace :: GenParser Char ParserState Inline
whitespace = many1 spaceChar >> return Space <?> "whitespace"
endline :: GenParser Char ParserState Inline
endline = try $ do
newline >> notFollowedBy blankline
return LineBreak
rawHtmlInline :: GenParser Char ParserState Inline
rawHtmlInline = RawInline "html" . snd <$> htmlTag isInlineTag
rawLaTeXInline' :: GenParser Char ParserState Inline
rawLaTeXInline' = try $ do
failIfStrict
rawLaTeXInline
link :: GenParser Char ParserState Inline
link = try $ do
name <- surrounded (char '"') inline
char ':'
url <- manyTill (anyChar) (lookAhead $ (space <|> try (oneOf ".;,:" >> (space <|> newline))))
return $ Link name (url, "")
autoLink :: GenParser Char ParserState Inline
autoLink = do
(orig, src) <- (try uri <|> try emailAddress)
return $ Link [Str orig] (src, "")
image :: GenParser Char ParserState Inline
image = try $ do
char '!' >> notFollowedBy space
src <- manyTill anyChar (lookAhead $ oneOf "!(")
alt <- option "" (try $ (char '(' >> manyTill anyChar (char ')')))
char '!'
return $ Image [Str alt] (src, alt)
escapedInline :: GenParser Char ParserState Inline
escapedInline = escapedEqs <|> escapedTag
escapedEqs :: GenParser Char ParserState Inline
escapedEqs = Str <$> (try $ string "==" *> manyTill anyChar (try $ string "=="))
escapedTag :: GenParser Char ParserState Inline
escapedTag = Str <$>
(try $ string "<notextile>" *> manyTill anyChar (try $ string "</notextile>"))
symbol :: GenParser Char ParserState Inline
symbol = Str . singleton <$> oneOf wordBoundaries
code :: GenParser Char ParserState Inline
code = code1 <|> code2
code1 :: GenParser Char ParserState Inline
code1 = Code nullAttr <$> surrounded (char '@') anyChar
code2 :: GenParser Char ParserState Inline
code2 = do
htmlTag (tagOpen (=="tt") null)
Code nullAttr <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt"))
attributes :: GenParser Char ParserState String
attributes = choice [ enclosed (char '(') (char ')') anyChar,
enclosed (char '{') (char '}') anyChar,
enclosed (char '[') (char ']') anyChar]
surrounded :: GenParser Char st t
-> GenParser Char st a
-> GenParser Char st [a]
surrounded border = enclosed border (try border)
simpleInline :: GenParser Char ParserState t
-> ([Inline] -> Inline)
-> GenParser Char ParserState Inline
simpleInline border construct = surrounded border (inlineWithAttribute) >>=
return . construct . normalizeSpaces
where inlineWithAttribute = (try $ optional attributes) >> inline
singleton :: a -> [a]
singleton x = [x]