{-# LANGUAGE CPP #-}
{-# LANGUAGE ExplicitForAll #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE IncoherentInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.Pandoc.Parsing ( take1WhileP,
takeP,
countChar,
textStr,
anyLine,
anyLineNewline,
indentWith,
manyChar,
many1Char,
manyTillChar,
many1TillChar,
many1Till,
manyUntil,
manyUntilChar,
sepBy1',
notFollowedBy',
oneOfStrings,
oneOfStringsCI,
spaceChar,
nonspaceChar,
skipSpaces,
blankline,
blanklines,
gobbleSpaces,
gobbleAtMostSpaces,
enclosed,
stringAnyCase,
parseFromString,
parseFromString',
lineClump,
charsInBalanced,
romanNumeral,
emailAddress,
uri,
mathInline,
mathDisplay,
withHorizDisplacement,
withRaw,
escaped,
characterReference,
upperRoman,
lowerRoman,
decimal,
lowerAlpha,
upperAlpha,
anyOrderedListMarker,
orderedListMarker,
charRef,
lineBlockLines,
tableWith,
widthsFromIndices,
gridTableWith,
gridTableWith',
readWith,
readWithM,
testStringWith,
guardEnabled,
guardDisabled,
updateLastStrPos,
notAfterString,
logMessage,
reportLogMessages,
ParserState (..),
HasReaderOptions (..),
HasIdentifierList (..),
HasMacros (..),
HasLogMessages (..),
HasLastStrPosition (..),
HasIncludeFiles (..),
defaultParserState,
HeaderType (..),
ParserContext (..),
QuoteContext (..),
HasQuoteContext (..),
NoteTable,
NoteTable',
KeyTable,
SubstTable,
Key (..),
toKey,
registerHeader,
smartPunctuation,
singleQuoteStart,
singleQuoteEnd,
doubleQuoteStart,
doubleQuoteEnd,
ellipses,
apostrophe,
dash,
nested,
citeKey,
Parser,
ParserT,
F,
Future(..),
runF,
askF,
asksF,
returnF,
trimInlinesF,
token,
(<+?>),
extractIdClass,
insertIncludedFile,
insertIncludedFileF,
Stream,
runParser,
runParserT,
parse,
tokenPrim,
anyToken,
getInput,
setInput,
unexpected,
char,
letter,
digit,
alphaNum,
skipMany,
skipMany1,
spaces,
space,
anyChar,
satisfy,
newline,
string,
count,
eof,
noneOf,
oneOf,
lookAhead,
notFollowedBy,
many,
many1,
manyTill,
(<|>),
(<?>),
choice,
try,
sepBy,
sepBy1,
sepEndBy,
sepEndBy1,
endBy,
endBy1,
option,
optional,
optionMaybe,
getState,
setState,
updateState,
SourcePos,
getPosition,
setPosition,
sourceColumn,
sourceLine,
setSourceColumn,
setSourceLine,
incSourceColumn,
incSourceLine,
newPos,
initialPos,
Line,
Column,
ParseError
)
where
import Control.Monad.Identity
import Control.Monad.Reader
import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper,
isPunctuation, isSpace, ord, toLower, toUpper)
import Data.Default
import Data.Functor (($>))
import Data.List (intercalate, transpose)
import qualified Data.Map as M
import Data.Maybe (mapMaybe, fromMaybe)
import qualified Data.Set as Set
import Data.String
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup.Entity (lookupEntity)
import Text.Pandoc.Asciify (toAsciiChar)
import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class.PandocMonad (PandocMonad, readFileFromDirs, report)
import Text.Pandoc.Definition
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Readers.LaTeX.Types (Macro)
import Text.Pandoc.Shared
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
import Text.Pandoc.XML (fromEntities)
import Text.Parsec hiding (token)
import Text.Parsec.Pos (initialPos, newPos, updatePosString)
import Control.Monad.Except
import Text.Pandoc.Error
type Parser t s = Parsec t s
type ParserT = ParsecT
newtype Future s a = Future { runDelayed :: Reader s a }
deriving (Monad, Applicative, Functor)
type F = Future ParserState
runF :: Future s a -> s -> a
runF = runReader . runDelayed
askF :: Future s s
askF = Future ask
asksF :: (s -> a) -> Future s a
asksF f = Future $ asks f
returnF :: Monad m => a -> m (Future s a)
returnF = return . return
trimInlinesF :: Future s Inlines -> Future s Inlines
trimInlinesF = liftM trimInlines
instance Semigroup a => Semigroup (Future s a) where
(<>) = liftM2 (<>)
instance (Semigroup a, Monoid a) => Monoid (Future s a) where
mempty = return mempty
mappend = (<>)
countChar :: (Stream s m Char, Monad m)
=> Int
-> ParsecT s st m Char
-> ParsecT s st m Text
countChar n = fmap T.pack . count n
textStr :: Stream s m Char => Text -> ParsecT s u m Text
textStr t = string (T.unpack t) $> t
take1WhileP :: Monad m
=> (Char -> Bool)
-> ParserT Text st m Text
take1WhileP f = do
c <- satisfy f
inp <- getInput
pos <- getPosition
let (t, rest) = T.span f inp
setInput rest
setPosition $
if f '\t' || f '\n'
then updatePosString pos $ T.unpack t
else incSourceColumn pos (T.length t)
return $ T.singleton c <> t
takeP :: Monad m => Int -> ParserT Text st m Text
takeP n = do
guard (n > 0)
inp <- getInput
pos <- getPosition
let (xs, rest) = T.splitAt n inp
anyChar
setInput rest
setPosition $ updatePosString pos $ T.unpack xs
return xs
anyLine :: Monad m => ParserT Text st m Text
anyLine = do
inp <- getInput
pos <- getPosition
case T.break (=='\n') inp of
(this, T.uncons -> Just ('\n', rest)) -> do
anyChar
setInput rest
setPosition $ incSourceLine (setSourceColumn pos 1) 1
return this
_ -> mzero
anyLineNewline :: Monad m => ParserT Text st m Text
anyLineNewline = (<> "\n") <$> anyLine
indentWith :: Stream s m Char
=> HasReaderOptions st
=> Int -> ParserT s st m Text
indentWith num = do
tabStop <- getOption readerTabStop
if num < tabStop
then countChar num (char ' ')
else choice [ try (countChar num (char ' '))
, try (char '\t' >> indentWith (num - tabStop)) ]
manyChar :: Stream s m t
=> ParserT s st m Char
-> ParserT s st m Text
manyChar = fmap T.pack . many
many1Char :: Stream s m t
=> ParserT s st m Char
-> ParserT s st m Text
many1Char = fmap T.pack . many1
manyTillChar :: Stream s m t
=> ParserT s st m Char
-> ParserT s st m a
-> ParserT s st m Text
manyTillChar p = fmap T.pack . manyTill p
many1Till :: (Show end, Stream s m t)
=> ParserT s st m a
-> ParserT s st m end
-> ParserT s st m [a]
many1Till p end = do
notFollowedBy' end
first <- p
rest <- manyTill p end
return (first:rest)
many1TillChar :: (Show end, Stream s m t)
=> ParserT s st m Char
-> ParserT s st m end
-> ParserT s st m Text
many1TillChar p = fmap T.pack . many1Till p
manyUntil :: ParserT s u m a
-> ParserT s u m b
-> ParserT s u m ([a], b)
manyUntil p end = scan
where scan =
(do e <- end
return ([], e)
) <|>
(do x <- p
(xs, e) <- scan
return (x:xs, e))
manyUntilChar :: ParserT s u m Char
-> ParserT s u m b
-> ParserT s u m (Text, b)
manyUntilChar p = fmap go . manyUntil p
where
go (x, y) = (T.pack x, y)
sepBy1' :: ParsecT s u m a
-> ParsecT s u m sep
-> ParsecT s u m [a]
sepBy1' p sep = (:) <$> p <*> many (try $ sep >> p)
notFollowedBy' :: (Show b, Stream s m a) => ParserT s st m b -> ParserT s st m ()
notFollowedBy' p = try $ join $ do a <- try p
return (unexpected (show a))
<|>
return (return ())
oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
oneOfStrings' f = fmap T.pack . oneOfStrings'' f . fmap T.unpack
oneOfStrings'' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
oneOfStrings'' _ [] = Prelude.fail "no strings"
oneOfStrings'' matches strs = try $ do
c <- anyChar
let strs' = [xs | (x:xs) <- strs, x `matches` c]
case strs' of
[] -> Prelude.fail "not found"
_ -> (c:) <$> oneOfStrings'' matches strs'
<|> if "" `elem` strs'
then return [c]
else Prelude.fail "not found"
oneOfStrings :: Stream s m Char => [Text] -> ParserT s st m Text
oneOfStrings = oneOfStrings' (==)
oneOfStringsCI :: Stream s m Char => [Text] -> ParserT s st m Text
oneOfStringsCI = oneOfStrings' ciMatch
where ciMatch x y = toLower' x == toLower' y
toLower' c | isAsciiUpper c = chr (ord c + 32)
| isAscii c = c
| otherwise = toLower c
spaceChar :: Stream s m Char => ParserT s st m Char
spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
nonspaceChar :: Stream s m Char => ParserT s st m Char
nonspaceChar = noneOf ['\t', '\n', ' ', '\r']
skipSpaces :: Stream s m Char => ParserT s st m ()
skipSpaces = skipMany spaceChar
blankline :: Stream s m Char => ParserT s st m Char
blankline = try $ skipSpaces >> newline
blanklines :: Stream s m Char => ParserT s st m Text
blanklines = T.pack <$> many1 blankline
gobbleSpaces :: (HasReaderOptions st, Monad m)
=> Int -> ParserT Text st m ()
gobbleSpaces 0 = return ()
gobbleSpaces n
| n < 0 = error "gobbleSpaces called with negative number"
| otherwise = try $ do
char ' ' <|> eatOneSpaceOfTab
gobbleSpaces (n - 1)
eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Text st m Char
eatOneSpaceOfTab = do
char '\t'
tabstop <- getOption readerTabStop
inp <- getInput
setInput $ T.replicate (tabstop - 1) " " <> inp
return ' '
gobbleAtMostSpaces :: (HasReaderOptions st, Monad m)
=> Int -> ParserT Text st m Int
gobbleAtMostSpaces 0 = return 0
gobbleAtMostSpaces n
| n < 0 = error "gobbleAtMostSpaces called with negative number"
| otherwise = option 0 $ do
char ' ' <|> eatOneSpaceOfTab
(+ 1) <$> gobbleAtMostSpaces (n - 1)
enclosed :: (Show end, Stream s m Char) => ParserT s st m t
-> ParserT s st m end
-> ParserT s st m a
-> ParserT s st m [a]
enclosed start end parser = try $
start >> notFollowedBy space >> many1Till parser end
stringAnyCase :: Stream s m Char => Text -> ParserT s st m Text
stringAnyCase = fmap T.pack . stringAnyCase' . T.unpack
stringAnyCase' :: Stream s m Char => String -> ParserT s st m String
stringAnyCase' [] = string ""
stringAnyCase' (x:xs) = do
firstChar <- char (toUpper x) <|> char (toLower x)
rest <- stringAnyCase' xs
return (firstChar:rest)
parseFromString :: (Stream s m Char, IsString s)
=> ParserT s st m r
-> Text
-> ParserT s st m r
parseFromString parser str = do
oldPos <- getPosition
setPosition $ initialPos "chunk"
oldInput <- getInput
setInput $ fromString $ T.unpack str
result <- parser
spaces
eof
setInput oldInput
setPosition oldPos
return result
parseFromString' :: (Stream s m Char, IsString s, HasLastStrPosition u)
=> ParserT s u m a
-> Text
-> ParserT s u m a
parseFromString' parser str = do
oldLastStrPos <- getLastStrPos <$> getState
updateState $ setLastStrPos Nothing
res <- parseFromString parser str
updateState $ setLastStrPos oldLastStrPos
return res
lineClump :: Monad m => ParserT Text st m Text
lineClump = blanklines
<|> (T.unlines <$> many1 (notFollowedBy blankline >> anyLine))
charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char
-> ParserT s st m Text
charsInBalanced open close parser = try $ do
char open
let isDelim c = c == open || c == close
raw <- many $ T.pack <$> many1 (notFollowedBy (satisfy isDelim) >> parser)
<|> (do res <- charsInBalanced open close parser
return $ T.singleton open <> res <> T.singleton close)
char close
return $ T.concat raw
romanNumeral :: Stream s m Char => Bool
-> ParserT s st m Int
romanNumeral upperCase = do
let rchar uc = char $ if upperCase then uc else toLower uc
let one = rchar 'I'
let five = rchar 'V'
let ten = rchar 'X'
let fifty = rchar 'L'
let hundred = rchar 'C'
let fivehundred = rchar 'D'
let thousand = rchar 'M'
lookAhead $ choice [one, five, ten, fifty, hundred, fivehundred, thousand]
thousands <- (1000 *) . length <$> many thousand
ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
fivehundreds <- option 0 $ 500 <$ fivehundred
fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
hundreds <- (100 *) . length <$> many hundred
nineties <- option 0 $ try $ ten >> hundred >> return 90
fifties <- option 0 (50 <$ fifty)
forties <- option 0 $ try $ ten >> fifty >> return 40
tens <- (10 *) . length <$> many ten
nines <- option 0 $ try $ one >> ten >> return 9
fives <- option 0 (5 <$ five)
fours <- option 0 $ try $ one >> five >> return 4
ones <- length <$> many one
let total = thousands + ninehundreds + fivehundreds + fourhundreds +
hundreds + nineties + fifties + forties + tens + nines +
fives + fours + ones
if total == 0
then Prelude.fail "not a roman numeral"
else return total
emailAddress :: Stream s m Char => ParserT s st m (Text, Text)
emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
where toResult mbox dom = let full = fromEntities $ T.pack $ mbox ++ '@':dom
in (full, escapeURI $ "mailto:" <> full)
mailbox = intercalate "." <$> (emailWord `sepBy1'` dot)
domain = intercalate "." <$> (subdomain `sepBy1'` dot)
dot = char '.'
subdomain = many1 $ alphaNum <|> innerPunct
innerPunct = try (satisfy (\c -> isEmailPunct c || c == '@')
<* notFollowedBy space
<* notFollowedBy (satisfy isPunctuation))
emailWord = do x <- satisfy isAlphaNum
xs <- many (satisfy isEmailChar)
return (x:xs)
isEmailChar c = isAlphaNum c || isEmailPunct c
isEmailPunct c = T.any (== c) "!\"#$%&'*+-/=?^_{|}~;"
uriScheme :: Stream s m Char => ParserT s st m Text
uriScheme = oneOfStringsCI (Set.toList schemes)
uri :: Stream s m Char => ParserT s st m (Text, Text)
uri = try $ do
scheme <- uriScheme
char ':'
notFollowedBy (oneOf "*_]")
str <- T.concat <$> many1 (uriChunkBetween '(' ')'
<|> uriChunkBetween '{' '}'
<|> uriChunkBetween '[' ']'
<|> T.pack <$> uriChunk)
str' <- option str $ char '/' >> return (str <> "/")
let uri' = scheme <> ":" <> fromEntities str'
return (uri', escapeURI uri')
where
wordChar = alphaNum <|> oneOf "#$%+/@\\_-&="
percentEscaped = try $ (:) <$> char '%' <*> many1 hexDigit
entity = try $ pure <$> characterReference
punct = try $ many1 (char ',') <|> fmap pure (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>'))
uriChunk = many1 wordChar
<|> percentEscaped
<|> entity
<|> try (punct <* lookAhead (void wordChar <|> void percentEscaped))
uriChunkBetween l r = try $ do chunk <- between (char l) (char r) uriChunk
return (T.pack $ [l] ++ chunk ++ [r])
mathInlineWith :: Stream s m Char => Text -> Text -> ParserT s st m Text
mathInlineWith op cl = try $ do
textStr op
when (op == "$") $ notFollowedBy space
words' <- many1Till (countChar 1 (noneOf " \t\n\\")
<|> (char '\\' >>
(try (string "text" >>
(("\\text" <>) <$> inBalancedBraces 0 ""))
<|> (\c -> T.pack ['\\',c]) <$> anyChar))
<|> do (blankline <* notFollowedBy' blankline) <|>
(oneOf " \t" <* skipMany (oneOf " \t"))
notFollowedBy (char '$')
return " "
) (try $ textStr cl)
notFollowedBy digit
return $ trimMath $ T.concat words'
where
inBalancedBraces :: Stream s m Char => Int -> Text -> ParserT s st m Text
inBalancedBraces n = fmap T.pack . inBalancedBraces' n . T.unpack
inBalancedBraces' :: Stream s m Char => Int -> String -> ParserT s st m String
inBalancedBraces' 0 "" = do
c <- anyChar
if c == '{'
then inBalancedBraces' 1 "{"
else mzero
inBalancedBraces' 0 s = return $ reverse s
inBalancedBraces' numOpen ('\\':xs) = do
c <- anyChar
inBalancedBraces' numOpen (c:'\\':xs)
inBalancedBraces' numOpen xs = do
c <- anyChar
case c of
'}' -> inBalancedBraces' (numOpen - 1) (c:xs)
'{' -> inBalancedBraces' (numOpen + 1) (c:xs)
_ -> inBalancedBraces' numOpen (c:xs)
mathDisplayWith :: Stream s m Char => Text -> Text -> ParserT s st m Text
mathDisplayWith op cl = try $ fmap T.pack $ do
textStr op
many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ textStr cl)
mathDisplay :: (HasReaderOptions st, Stream s m Char)
=> ParserT s st m Text
mathDisplay =
(guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
<|> (guardEnabled Ext_tex_math_single_backslash >>
mathDisplayWith "\\[" "\\]")
<|> (guardEnabled Ext_tex_math_double_backslash >>
mathDisplayWith "\\\\[" "\\\\]")
mathInline :: (HasReaderOptions st , Stream s m Char)
=> ParserT s st m Text
mathInline =
(guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
<|> (guardEnabled Ext_tex_math_single_backslash >>
mathInlineWith "\\(" "\\)")
<|> (guardEnabled Ext_tex_math_double_backslash >>
mathInlineWith "\\\\(" "\\\\)")
withHorizDisplacement :: Stream s m Char
=> ParserT s st m a
-> ParserT s st m (a, Int)
withHorizDisplacement parser = do
pos1 <- getPosition
result <- parser
pos2 <- getPosition
return (result, sourceColumn pos2 - sourceColumn pos1)
withRaw :: Monad m
=> ParsecT Text st m a
-> ParsecT Text st m (a, Text)
withRaw parser = do
pos1 <- getPosition
inp <- getInput
result <- parser
pos2 <- getPosition
let (l1,c1) = (sourceLine pos1, sourceColumn pos1)
let (l2,c2) = (sourceLine pos2, sourceColumn pos2)
let inplines = take ((l2 - l1) + 1) $ T.lines inp
let raw = case inplines of
[] -> ""
[l] -> T.take (c2 - c1) l
ls -> T.unlines (init ls) <> T.take (c2 - 1) (last ls)
return (result, raw)
escaped :: Stream s m Char
=> ParserT s st m Char
-> ParserT s st m Char
escaped parser = try $ char '\\' >> parser
characterReference :: Stream s m Char => ParserT s st m Char
characterReference = try $ do
char '&'
ent <- many1Till nonspaceChar (char ';')
let ent' = case ent of
'#':'X':xs -> '#':'x':xs
'#':_ -> ent
_ -> ent ++ ";"
case lookupEntity ent' of
Just (c : _) -> return c
_ -> Prelude.fail "entity not found"
upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
upperRoman = do
num <- romanNumeral True
return (UpperRoman, num)
lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
lowerRoman = do
num <- romanNumeral False
return (LowerRoman, num)
decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
decimal = do
num <- many1 digit
return (Decimal, fromMaybe 1 $ safeRead $ T.pack num)
exampleNum :: Stream s m Char
=> ParserT s ParserState m (ListNumberStyle, Int)
exampleNum = do
char '@'
lab <- T.pack <$> many (alphaNum <|> satisfy (\c -> c == '_' || c == '-'))
st <- getState
let num = stateNextExample st
let newlabels = if T.null lab
then stateExamples st
else M.insert lab num $ stateExamples st
updateState $ \s -> s{ stateNextExample = num + 1
, stateExamples = newlabels }
return (Example, num)
defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
defaultNum = do
char '#'
return (DefaultStyle, 1)
lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
lowerAlpha = do
ch <- oneOf ['a'..'z']
return (LowerAlpha, ord ch - ord 'a' + 1)
upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
upperAlpha = do
ch <- oneOf ['A'..'Z']
return (UpperAlpha, ord ch - ord 'A' + 1)
romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
(char 'I' >> return (UpperRoman, 1))
anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes
anyOrderedListMarker = choice
[delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
numParser <- [decimal, exampleNum, defaultNum, romanOne,
lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
inPeriod :: Stream s m Char
=> ParserT s st m (ListNumberStyle, Int)
-> ParserT s st m ListAttributes
inPeriod num = try $ do
(style, start) <- num
char '.'
let delim = if style == DefaultStyle
then DefaultDelim
else Period
return (start, style, delim)
inOneParen :: Stream s m Char
=> ParserT s st m (ListNumberStyle, Int)
-> ParserT s st m ListAttributes
inOneParen num = try $ do
(style, start) <- num
char ')'
return (start, style, OneParen)
inTwoParens :: Stream s m Char
=> ParserT s st m (ListNumberStyle, Int)
-> ParserT s st m ListAttributes
inTwoParens num = try $ do
char '('
(style, start) <- num
char ')'
return (start, style, TwoParens)
orderedListMarker :: Stream s m Char
=> ListNumberStyle
-> ListNumberDelim
-> ParserT s ParserState m Int
orderedListMarker style delim = do
let num = defaultNum <|>
case style of
DefaultStyle -> decimal
Example -> exampleNum
Decimal -> decimal
UpperRoman -> upperRoman
LowerRoman -> lowerRoman
UpperAlpha -> upperAlpha
LowerAlpha -> lowerAlpha
let context = case delim of
DefaultDelim -> inPeriod
Period -> inPeriod
OneParen -> inOneParen
TwoParens -> inTwoParens
(start, _, _) <- context num
return start
charRef :: Stream s m Char => ParserT s st m Inline
charRef = Str . T.singleton <$> characterReference
lineBlockLine :: Monad m => ParserT Text st m Text
lineBlockLine = try $ do
char '|'
char ' '
white <- T.pack <$> many (spaceChar >> return '\160')
notFollowedBy newline
line <- anyLine
continuations <- many (try $ char ' ' >> anyLine)
return $ white <> T.unwords (line : continuations)
blankLineBlockLine :: Stream s m Char => ParserT s st m Char
blankLineBlockLine = try (char '|' >> blankline)
lineBlockLines :: Monad m => ParserT Text st m [Text]
lineBlockLines = try $ do
lines' <- many1 (lineBlockLine <|> (T.singleton <$> blankLineBlockLine))
skipMany blankline
return lines'
tableWith :: (Stream s m Char, HasReaderOptions st, Monad mf)
=> ParserT s st m (mf [Blocks], [Alignment], [Int])
-> ([Int] -> ParserT s st m (mf [Blocks]))
-> ParserT s st m sep
-> ParserT s st m end
-> ParserT s st m (mf Blocks)
tableWith headerParser rowParser lineParser footerParser = try $ do
(aligns, widths, heads, rows) <- tableWith' headerParser rowParser
lineParser footerParser
let th = TableHead nullAttr <$> heads
tb = (:[]) . TableBody nullAttr 0 [] <$> rows
tf = pure $ TableFoot nullAttr []
return $ B.table B.emptyCaption (zip aligns (map fromWidth widths)) <$> th <*> tb <*> tf
where
fromWidth n
| n > 0 = ColWidth n
| otherwise = ColWidthDefault
type TableComponents mf = ([Alignment], [Double], mf [Row], mf [Row])
tableWith' :: (Stream s m Char, HasReaderOptions st, Monad mf)
=> ParserT s st m (mf [Blocks], [Alignment], [Int])
-> ([Int] -> ParserT s st m (mf [Blocks]))
-> ParserT s st m sep
-> ParserT s st m end
-> ParserT s st m (TableComponents mf)
tableWith' headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser
footerParser
numColumns <- getOption readerColumns
let widths = if null indices
then replicate (length aligns) 0.0
else widthsFromIndices numColumns indices
let toRow = Row nullAttr . map B.simpleCell
toHeaderRow l = [toRow l | not (null l)]
return (aligns, widths, toHeaderRow <$> heads, map toRow <$> lines')
widthsFromIndices :: Int
-> [Int]
-> [Double]
widthsFromIndices _ [] = []
widthsFromIndices numColumns' indices =
let numColumns = max numColumns' (if null indices then 0 else last indices)
lengths' = zipWith (-) indices (0:indices)
lengths = reverse $
case reverse lengths' of
[] -> []
[x] -> [x]
(x:y:zs) -> if x < y && y - x <= 2
then y:y:zs
else x:y:zs
totLength = sum lengths
quotient = if totLength > numColumns
then fromIntegral totLength
else fromIntegral numColumns
fracs = map (\l -> fromIntegral l / quotient) lengths in
tail fracs
gridTableWith :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
Monad mf, IsString s)
=> ParserT s st m (mf Blocks)
-> Bool
-> ParserT s st m (mf Blocks)
gridTableWith blocks headless =
tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
(gridTableSep '-') gridTableFooter
gridTableWith' :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
Monad mf, IsString s)
=> ParserT s st m (mf Blocks)
-> Bool
-> ParserT s st m (TableComponents mf)
gridTableWith' blocks headless =
tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
(gridTableSep '-') gridTableFooter
gridTableSplitLine :: [Int] -> Text -> [Text]
gridTableSplitLine indices line = map removeFinalBar $ tail $
splitTextByIndices (init indices) $ trimr line
gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment)
gridPart ch = do
leftColon <- option False (True <$ char ':')
dashes <- many1 (char ch)
rightColon <- option False (True <$ char ':')
char '+'
let lengthDashes = length dashes + (if leftColon then 1 else 0) +
(if rightColon then 1 else 0)
let alignment = case (leftColon, rightColon) of
(True, True) -> AlignCenter
(True, False) -> AlignLeft
(False, True) -> AlignRight
(False, False) -> AlignDefault
return ((lengthDashes, lengthDashes + 1), alignment)
gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)]
gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
removeFinalBar :: Text -> Text
removeFinalBar = T.dropWhileEnd go . T.dropWhileEnd (=='|')
where
go c = T.any (== c) " \t"
gridTableSep :: Stream s m Char => Char -> ParserT s st m Char
gridTableSep ch = try $ gridDashedLines ch >> return '\n'
gridTableHeader :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
=> Bool
-> ParserT s st m (mf Blocks)
-> ParserT s st m (mf [Blocks], [Alignment], [Int])
gridTableHeader headless blocks = try $ do
optional blanklines
dashes <- gridDashedLines '-'
rawContent <- if headless
then return $ repeat ""
else many1
(notFollowedBy (gridTableSep '=') >> char '|' >>
T.pack <$> many1Till anyChar newline)
underDashes <- if headless
then return dashes
else gridDashedLines '='
guard $ length dashes == length underDashes
let lines' = map (snd . fst) underDashes
let indices = scanl (+) 0 lines'
let aligns = map snd underDashes
let rawHeads = if headless
then replicate (length underDashes) ""
else map (T.unlines . map trim) $ transpose
$ map (gridTableSplitLine indices) rawContent
heads <- sequence <$> mapM (parseFromString' blocks . trim) rawHeads
return (heads, aligns, indices)
gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [Text]
gridTableRawLine indices = do
char '|'
line <- many1Till anyChar newline
return (gridTableSplitLine indices $ T.pack line)
gridTableRow :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
=> ParserT s st m (mf Blocks)
-> [Int]
-> ParserT s st m (mf [Blocks])
gridTableRow blocks indices = do
colLines <- many1 (gridTableRawLine indices)
let cols = map ((<> "\n") . T.unlines . removeOneLeadingSpace) $
transpose colLines
compactifyCell bs = case compactify [bs] of
[] -> mempty
x:_ -> x
cells <- sequence <$> mapM (parseFromString' blocks) cols
return $ fmap (map compactifyCell) cells
removeOneLeadingSpace :: [Text] -> [Text]
removeOneLeadingSpace xs =
if all startsWithSpace xs
then map (T.drop 1) xs
else xs
where startsWithSpace t = case T.uncons t of
Nothing -> True
Just (c, _) -> c == ' '
gridTableFooter :: Stream s m Char => ParserT s st m ()
gridTableFooter = optional blanklines
readWithM :: (Stream s m Char, ToText s)
=> ParserT s st m a
-> st
-> s
-> m (Either PandocError a)
readWithM parser state input =
mapLeft (PandocParsecError $ toText input) `liftM` runParserT parser state "source" input
readWith :: Parser Text st a
-> st
-> Text
-> Either PandocError a
readWith p t inp = runIdentity $ readWithM p t inp
testStringWith :: Show a
=> ParserT Text ParserState Identity a
-> Text
-> IO ()
testStringWith parser str = UTF8.putStrLn $ show $
readWith parser defaultParserState str
data ParserState = ParserState
{ stateOptions :: ReaderOptions,
stateParserContext :: ParserContext,
stateQuoteContext :: QuoteContext,
stateAllowLinks :: Bool,
stateAllowLineBreaks :: Bool,
stateMaxNestingLevel :: Int,
stateLastStrPos :: Maybe SourcePos,
stateKeys :: KeyTable,
stateHeaderKeys :: KeyTable,
stateSubstitutions :: SubstTable,
stateNotes :: NoteTable,
stateNotes' :: NoteTable',
stateNoteRefs :: Set.Set Text,
stateInNote :: Bool,
stateNoteNumber :: Int,
stateMeta :: Meta,
stateMeta' :: F Meta,
stateCitations :: M.Map Text Text,
stateHeaderTable :: [HeaderType],
stateIdentifiers :: Set.Set Text,
stateNextExample :: Int,
stateExamples :: M.Map Text Int,
stateMacros :: M.Map Text Macro,
stateRstDefaultRole :: Text,
stateRstHighlight :: Maybe Text,
stateRstCustomRoles :: M.Map Text (Text, Maybe Text, Attr),
stateCaption :: Maybe Inlines,
stateInHtmlBlock :: Maybe Text,
stateFencedDivLevel :: Int,
stateContainers :: [Text],
stateLogMessages :: [LogMessage],
stateMarkdownAttribute :: Bool
}
instance Default ParserState where
def = defaultParserState
instance HasMeta ParserState where
setMeta field val st =
st{ stateMeta = setMeta field val $ stateMeta st }
deleteMeta field st =
st{ stateMeta = deleteMeta field $ stateMeta st }
class HasReaderOptions st where
extractReaderOptions :: st -> ReaderOptions
getOption :: (Stream s m t) => (ReaderOptions -> b) -> ParserT s st m b
getOption f = f . extractReaderOptions <$> getState
instance HasReaderOptions ParserState where
extractReaderOptions = stateOptions
class HasQuoteContext st m where
getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext
withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a
instance Monad m => HasQuoteContext ParserState m where
getQuoteContext = stateQuoteContext <$> getState
withQuoteContext context parser = do
oldState <- getState
let oldQuoteContext = stateQuoteContext oldState
setState oldState { stateQuoteContext = context }
result <- parser
newState <- getState
setState newState { stateQuoteContext = oldQuoteContext }
return result
class HasIdentifierList st where
extractIdentifierList :: st -> Set.Set Text
updateIdentifierList :: (Set.Set Text -> Set.Set Text) -> st -> st
instance HasIdentifierList ParserState where
extractIdentifierList = stateIdentifiers
updateIdentifierList f st = st{ stateIdentifiers = f $ stateIdentifiers st }
class HasMacros st where
extractMacros :: st -> M.Map Text Macro
updateMacros :: (M.Map Text Macro -> M.Map Text Macro) -> st -> st
instance HasMacros ParserState where
extractMacros = stateMacros
updateMacros f st = st{ stateMacros = f $ stateMacros st }
class HasLastStrPosition st where
setLastStrPos :: Maybe SourcePos -> st -> st
getLastStrPos :: st -> Maybe SourcePos
instance HasLastStrPosition ParserState where
setLastStrPos pos st = st{ stateLastStrPos = pos }
getLastStrPos st = stateLastStrPos st
class HasLogMessages st where
addLogMessage :: LogMessage -> st -> st
getLogMessages :: st -> [LogMessage]
instance HasLogMessages ParserState where
addLogMessage msg st = st{ stateLogMessages = msg : stateLogMessages st }
getLogMessages st = reverse $ stateLogMessages st
class HasIncludeFiles st where
getIncludeFiles :: st -> [Text]
addIncludeFile :: Text -> st -> st
dropLatestIncludeFile :: st -> st
instance HasIncludeFiles ParserState where
getIncludeFiles = stateContainers
addIncludeFile f s = s{ stateContainers = f : stateContainers s }
dropLatestIncludeFile s = s { stateContainers = drop 1 $ stateContainers s }
defaultParserState :: ParserState
defaultParserState =
ParserState { stateOptions = def,
stateParserContext = NullState,
stateQuoteContext = NoQuote,
stateAllowLinks = True,
stateAllowLineBreaks = True,
stateMaxNestingLevel = 6,
stateLastStrPos = Nothing,
stateKeys = M.empty,
stateHeaderKeys = M.empty,
stateSubstitutions = M.empty,
stateNotes = [],
stateNotes' = M.empty,
stateNoteRefs = Set.empty,
stateInNote = False,
stateNoteNumber = 0,
stateMeta = nullMeta,
stateMeta' = return nullMeta,
stateCitations = M.empty,
stateHeaderTable = [],
stateIdentifiers = Set.empty,
stateNextExample = 1,
stateExamples = M.empty,
stateMacros = M.empty,
stateRstDefaultRole = "title-reference",
stateRstHighlight = Nothing,
stateRstCustomRoles = M.empty,
stateCaption = Nothing,
stateInHtmlBlock = Nothing,
stateFencedDivLevel = 0,
stateContainers = [],
stateLogMessages = [],
stateMarkdownAttribute = False
}
logMessage :: (Stream s m a, HasLogMessages st)
=> LogMessage -> ParserT s st m ()
logMessage msg = updateState (addLogMessage msg)
reportLogMessages :: (PandocMonad m, HasLogMessages st) => ParserT s st m ()
reportLogMessages = do
msgs <- getLogMessages <$> getState
mapM_ report msgs
guardEnabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
guardEnabled ext = getOption readerExtensions >>= guard . extensionEnabled ext
guardDisabled :: (Stream s m a, HasReaderOptions st) => Extension -> ParserT s st m ()
guardDisabled ext = getOption readerExtensions >>= guard . not . extensionEnabled ext
updateLastStrPos :: (Stream s m a, HasLastStrPosition st) => ParserT s st m ()
updateLastStrPos = getPosition >>= updateState . setLastStrPos . Just
notAfterString :: (Stream s m a, HasLastStrPosition st) => ParserT s st m Bool
notAfterString = do
pos <- getPosition
st <- getState
return $ getLastStrPos st /= Just pos
data HeaderType
= SingleHeader Char
| DoubleHeader Char
deriving (Eq, Show)
data ParserContext
= ListItemState
| NullState
deriving (Eq, Show)
data QuoteContext
= InSingleQuote
| InDoubleQuote
| NoQuote
deriving (Eq, Show)
type NoteTable = [(Text, Text)]
type NoteTable' = M.Map Text (SourcePos, F Blocks)
newtype Key = Key Text deriving (Show, Read, Eq, Ord)
toKey :: Text -> Key
toKey = Key . T.toLower . T.unwords . T.words . unbracket
where unbracket t
| Just ('[', t') <- T.uncons t
, Just (t'', ']') <- T.unsnoc t'
= t''
| otherwise
= t
type KeyTable = M.Map Key (Target, Attr)
type SubstTable = M.Map Key Inlines
registerHeader :: (Stream s m a, HasReaderOptions st,
HasLogMessages st, HasIdentifierList st)
=> Attr -> Inlines -> ParserT s st m Attr
registerHeader (ident,classes,kvs) header' = do
ids <- extractIdentifierList <$> getState
exts <- getOption readerExtensions
if T.null ident && Ext_auto_identifiers `extensionEnabled` exts
then do
let id' = uniqueIdent exts (B.toList header') ids
let id'' = if Ext_ascii_identifiers `extensionEnabled` exts
then T.pack $ mapMaybe toAsciiChar $ T.unpack id'
else id'
updateState $ updateIdentifierList $ Set.insert id'
updateState $ updateIdentifierList $ Set.insert id''
return (id'',classes,kvs)
else do
unless (T.null ident) $ do
when (ident `Set.member` ids) $ do
pos <- getPosition
logMessage $ DuplicateIdentifier ident pos
updateState $ updateIdentifierList $ Set.insert ident
return (ident,classes,kvs)
smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
smartPunctuation inlineParser = do
guardEnabled Ext_smart
choice [ quoted inlineParser, apostrophe, dash, ellipses ]
apostrophe :: Stream s m Char => ParserT s st m Inlines
apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
singleQuoted inlineParser = try $ B.singleQuoted . mconcat
<$ singleQuoteStart
<*> withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)
doubleQuoted :: (HasQuoteContext st m, Stream s m Char)
=> ParserT s st m Inlines
-> ParserT s st m Inlines
doubleQuoted inlineParser = try $ B.doubleQuoted . mconcat
<$ doubleQuoteStart
<*> withQuoteContext InDoubleQuote (manyTill inlineParser doubleQuoteEnd)
failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t)
=> QuoteContext
-> ParserT s st m ()
failIfInQuoteContext context = do
context' <- getQuoteContext
when (context' == context) $ Prelude.fail "already inside quotes"
charOrRef :: Stream s m Char => [Char] -> ParserT s st m Char
charOrRef cs =
oneOf cs <|> try (do c <- characterReference
guard (c `elem` cs)
return c)
singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
=> ParserT s st m ()
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
guard =<< notAfterString
try $ do
charOrRef "'\8216\145"
notFollowedBy (oneOf [' ', '\t', '\n'])
singleQuoteEnd :: Stream s m Char
=> ParserT s st m ()
singleQuoteEnd = try $ do
charOrRef "'\8217\146"
notFollowedBy alphaNum
doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char)
=> ParserT s st m ()
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
try $ do charOrRef "\"\8220\147"
notFollowedBy (oneOf [' ', '\t', '\n'])
doubleQuoteEnd :: Stream s m Char
=> ParserT s st m ()
doubleQuoteEnd = void (charOrRef "\"\8221\148")
ellipses :: Stream s m Char
=> ParserT s st m Inlines
ellipses = try (string "..." >> return (B.str "\8230"))
dash :: (HasReaderOptions st, Stream s m Char)
=> ParserT s st m Inlines
dash = try $ do
oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions
if oldDashes
then do
char '-'
(char '-' >> return (B.str "\8212"))
<|> (lookAhead digit >> return (B.str "\8211"))
else do
string "--"
(char '-' >> return (B.str "\8212"))
<|> return (B.str "\8211")
nested :: Stream s m a
=> ParserT s ParserState m a
-> ParserT s ParserState m a
nested p = do
nestlevel <- stateMaxNestingLevel <$> getState
guard $ nestlevel > 0
updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
res <- p
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
return res
citeKey :: (Stream s m Char, HasLastStrPosition st)
=> ParserT s st m (Bool, Text)
citeKey = try $ do
guard =<< notAfterString
suppress_author <- option False (True <$ char '-')
char '@'
firstChar <- alphaNum <|> char '_' <|> char '*'
let regchar = satisfy (\c -> isAlphaNum c || c == '_')
let internal p = try $ p <* lookAhead regchar
rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|>
try (oneOf ":/" <* lookAhead (char '/'))
let key = firstChar:rest
return (suppress_author, T.pack key)
token :: (Stream s m t)
=> (t -> Text)
-> (t -> SourcePos)
-> (t -> Maybe a)
-> ParsecT s st m a
token pp pos match = tokenPrim (T.unpack . pp) (\_ t _ -> pos t) match
infixr 5 <+?>
(<+?>) :: (Monoid a) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a
a <+?> b = a >>= flip fmap (try b <|> return mempty) . mappend
extractIdClass :: Attr -> Attr
extractIdClass (ident, cls, kvs) = (ident', cls', kvs')
where
ident' = fromMaybe ident (lookup "id" kvs)
cls' = maybe cls T.words $ lookup "class" kvs
kvs' = filter (\(k,_) -> k /= "id" || k /= "class") kvs
insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st)
=> ParserT a st m (mf Blocks)
-> (Text -> a)
-> [FilePath] -> FilePath
-> ParserT a st m (mf Blocks)
insertIncludedFile' blocks totoks dirs f = do
oldPos <- getPosition
oldInput <- getInput
containers <- getIncludeFiles <$> getState
when (T.pack f `elem` containers) $
throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show oldPos
updateState $ addIncludeFile $ T.pack f
mbcontents <- readFileFromDirs dirs f
contents <- case mbcontents of
Just s -> return s
Nothing -> do
report $ CouldNotLoadIncludeFile (T.pack f) oldPos
return ""
setPosition $ newPos f 1 1
setInput $ totoks contents
bs <- blocks
setInput oldInput
setPosition oldPos
updateState dropLatestIncludeFile
return bs
insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
=> ParserT [a] st m Blocks
-> (Text -> [a])
-> [FilePath] -> FilePath
-> ParserT [a] st m Blocks
insertIncludedFile blocks totoks dirs f =
runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f
insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st)
=> ParserT Text st m (Future st Blocks)
-> [FilePath] -> FilePath
-> ParserT Text st m (Future st Blocks)
insertIncludedFileF p = insertIncludedFile' p id