module Text.LaTeX.Base.Parser (
latexParser,
latexBlockParser,
latexAtOnce,
latexDocParser,
isMainDoc
#ifdef _TEST
, specials
#endif
)
where
import Data.Attoparsec.Text hiding (take, takeTill)
import qualified Data.Attoparsec.Text as A (takeTill)
import Data.Char (toLower)
import Data.Monoid
import Data.Text (Text)
import qualified Data.Text as T
import Control.Applicative ((<|>), (<$>))
import Control.Monad (unless)
import Text.LaTeX.Base.Syntax
latexAtOnce :: Text -> Either String LaTeX
latexAtOnce t | T.null t = return TeXEmpty
| otherwise =
case parse latexParser t of
Fail _ _ e -> Left e
Done _ r -> Right r
rx@(Partial _) ->
case feed rx T.empty of
Fail _ _ e -> Left e
Partial _ -> Left "incomplete input"
Done _ r -> Right r
latexParser :: Parser LaTeX
latexParser = blocks
latexBlockParser :: Parser LaTeX
latexBlockParser = block
latexDocParser :: Parser LaTeX
latexDocParser = blockTillDoc
blocks :: Parser LaTeX
blocks = mconcat <$> block `manyTill` endOfInput
blockTillDoc :: Parser LaTeX
blockTillDoc = do
b <- block
if isMainDoc b then return b
else mappend b <$> blockTillDoc
isMainDoc :: LaTeX -> Bool
isMainDoc b = case b of
TeXEnv "document" _ _ -> True
_ -> False
block :: Parser LaTeX
block = choice [text, dolMath, comment, environment, command, text2]
text :: Parser LaTeX
text = do
mbC <- peekChar
case mbC of
Nothing -> return TeXEmpty
Just c | c `elem` "$%\\{]}" -> fail "not text"
| otherwise -> TeXRaw <$> A.takeTill (`elem` "$%\\{]}")
text2 :: Parser LaTeX
text2 = do
_ <- char ']'
t <- try (text <|> return (TeXRaw T.empty))
return $ TeXRaw endlessSq <> t
environment :: Parser LaTeX
environment = choice [anonym, env]
anonym :: Parser LaTeX
anonym = char oBr >>
TeXBraces . mconcat <$> block `manyTill` char eBr
env :: Parser LaTeX
env = do
n <- envName begin
as <- cmdArgs
b <- envBody n
return $ TeXEnv (T.unpack n) as b
envName :: Text -> Parser Text
envName k = do
_ <- string k
_ <- char oBr
n <- A.takeTill (== eBr)
_ <- char eBr
return n
envBody :: Text -> Parser LaTeX
envBody n = mconcat <$> block `manyTill` endenv
where endenv = try $ string (end `T.snoc` oBr <> n
`T.snoc` eBr)
command :: Parser LaTeX
command = do
_ <- char bsl
mbX <- peekChar
case mbX of
Nothing -> return TeXEmpty
Just x -> if isSpecial x
then special
else do
c <- A.takeTill endCmd
as <- cmdArgs
if null as
then return $ TeXCommS (T.unpack c)
else return $ TeXComm (T.unpack c) as
cmdArgs :: Parser [TeXArg]
cmdArgs = try (whitespace >> string emptyArg >> return [FixArg TeXEmpty])
<|> many1 cmdArg
<|> return []
cmdArg :: Parser TeXArg
cmdArg = do
whitespace
c <- char '[' <|> char '{'
let e = case c of
'[' -> endlessSq
'{' -> endlessBr
_ -> error "this cannot happen!"
b <- mconcat <$> block `manyTill` string e
case c of
'[' -> return $ OptArg b
'{' -> return $ FixArg b
_ -> error "this cannot happen!"
whitespace :: Parser ()
whitespace = try (do _ <- char ' '
whitespace)
<|> return ()
special :: Parser LaTeX
special = do
x <- anyChar
case x of
'(' -> math Parentheses endPa
'[' -> math Square endSq
'{' -> lbrace
'}' -> rbrace
'|' -> vert
'\\' -> lbreak
_ -> commS [x]
lbreak :: Parser LaTeX
lbreak = do
y <- try (char oSq <|> char str <|> return ' ')
case y of
'[' -> linebreak False
'*' -> do z <- try (char oSq <|> return ' ')
case z of
'[' -> linebreak True
_ -> return (TeXLineBreak Nothing True)
_ -> return (TeXLineBreak Nothing False)
linebreak :: Bool -> Parser LaTeX
linebreak t = do m <- measure
_ <- char eSq
s <- try (char str <|> return ' ')
return $ TeXLineBreak (Just m) (t || s == str)
measure :: Parser Measure
measure = try (double >>= unit)
<|> CustomMeasure <$> block
unit :: Double -> Parser Measure
unit f = do
u1 <- anyChar
u2 <- anyChar
case map toLower [u1, u2] of
"pt" -> return $ Pt (truncate f)
"mm" -> return $ Mm f
"cm" -> return $ Cm f
"in" -> return $ In f
"ex" -> return $ Ex f
"em" -> return $ Em f
_ -> fail "NaN"
rbrace, lbrace,vert :: Parser LaTeX
lbrace = brace "{"
rbrace = brace "}"
vert = brace "|"
brace :: String -> Parser LaTeX
brace = return . TeXCommS
commS :: String -> Parser LaTeX
commS = return . TeXCommS
dolMath :: Parser LaTeX
dolMath = do
_ <- char dol
b <- mconcat <$> block `manyTill` char dol
return $ TeXMath Dollar b
math :: MathType -> Text -> Parser LaTeX
math t eMath = do
b <- mconcat <$> block `manyTill` try (string eMath)
return $ TeXMath t b
comment :: Parser LaTeX
comment = do
_ <- char per
c <- A.takeTill (== '\n')
e <- atEnd
unless e (char '\n' >>= \_ -> return ())
return $ TeXComment c
isSpecial :: Char -> Bool
isSpecial = (`elem` specials)
begin, end :: Text
begin = T.pack "\\begin"
end = T.pack "\\end"
endCmd :: Char -> Bool
endCmd = flip elem symbols
nul, eol, spc :: Char
nul = '\0'
eol = '\n'
spc = ' '
oBr, eBr, oSq, eSq, oPa, ePa, bsl, dol, per, str :: Char
oBr = '{'
eBr = '}'
oSq = '['
eSq = ']'
oPa = '('
ePa = ')'
bsl = '\\'
dol = '$'
per = '%'
str = '*'
endPa, endSq, endlessBr, endlessSq :: Text
endPa = T.pack "\\)"
endSq = T.pack "\\]"
endlessBr = T.pack "}"
endlessSq = T.pack "]"
emptyArg :: Text
emptyArg = T.pack "{}"
symbols :: String
symbols = [nul, eol, spc, oBr, eBr, eSq, oSq, oPa, ePa, bsl, dol, per]
specials :: String
specials = "'(),.-\"!^$&#{}%~|/:;=[]\\` "