module Text.LaTeX.Base.Parser (
parseLaTeX
, latexParser
, latexBlockParser
, latexAtOnce
#ifdef _TEST
, specials
#endif
) where
import Data.Attoparsec.Text hiding (take, takeTill)
import qualified Data.Attoparsec.Text as A (takeTill)
import Data.Char (toLower)
import Data.Monoid
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import qualified Data.Text as T
import Control.Applicative ((<|>), (<$>),many)
import Control.Monad (unless)
import Text.LaTeX.Base.Syntax
parseLaTeX :: Text -> Either String LaTeX
parseLaTeX t | T.null t = return TeXEmpty
| otherwise =
case parse latexParser t of
Fail _ _ e -> Left e
Done _ r -> Right r
rx@(Partial _) ->
case feed rx T.empty of
Fail _ _ e -> Left e
Partial _ -> Left "incomplete input"
Done _ r -> Right r
latexAtOnce :: Text -> Either String LaTeX
latexAtOnce = parseLaTeX
latexParser :: Parser LaTeX
latexParser = mconcat <$> latexBlockParser `manyTill` endOfInput
latexBlockParser :: Parser LaTeX
latexBlockParser = foldr1 (<|>) [text, dolMath, comment, text2, environment, command]
text :: Parser LaTeX
text = do
mbC <- peekChar
case mbC of
Nothing -> fail "text: Empty input."
Just c | c `elem` "$%\\{]}" -> fail "not text"
| otherwise -> TeXRaw <$> A.takeTill (`elem` "$%\\{]}")
text2 :: Parser LaTeX
text2 = do
_ <- char ']'
t <- try (text <|> return (TeXRaw T.empty))
return $ TeXRaw (T.pack "]") <> t
environment :: Parser LaTeX
environment = anonym <|> env
anonym :: Parser LaTeX
anonym = char '{' >>
TeXBraces . mconcat <$> latexBlockParser `manyTill` char '}'
env :: Parser LaTeX
env = do
_ <- char '\\'
n <- envName "begin"
sps <- many $ char ' '
let lsps = if null sps then mempty else TeXRaw $ T.pack sps
as <- cmdArgs
b <- envBody n
return $ TeXEnv (T.unpack n) (fromMaybe [] as) $
case as of
Just [] -> lsps <> TeXBraces mempty <> b
Nothing -> lsps <> b
_ -> b
envName :: Text -> Parser Text
envName k = do
_ <- string k
_ <- char '{'
n <- A.takeTill (== '}')
_ <- char '}'
return n
envBody :: Text -> Parser LaTeX
envBody n = mconcat <$> (bodyBlock n) `manyTill` endenv
where endenv = try $ string ("\\end") >> skipSpace >> string ("{" <> n <> "}")
bodyBlock :: Text -> Parser LaTeX
bodyBlock n = do
c <- peekChar
case c of
Just _ -> latexBlockParser
_ -> fail $ "Environment '" <> T.unpack n <> "' not finalized."
command :: Parser LaTeX
command = do
_ <- char '\\'
mbX <- peekChar
case mbX of
Nothing -> return TeXEmpty
Just x -> if isSpecial x
then special
else do
c <- A.takeTill endCmd
maybe (TeXCommS $ T.unpack c) (TeXComm $ T.unpack c) <$> cmdArgs
cmdArgs :: Parser (Maybe [TeXArg])
cmdArgs = try (string "{}" >> return (Just []))
<|> fmap Just (many1 cmdArg)
<|> return Nothing
cmdArg :: Parser TeXArg
cmdArg = do
c <- char '[' <|> char '{'
let e = case c of
'[' -> "]"
'{' -> "}"
_ -> error "this cannot happen!"
b <- mconcat <$> latexBlockParser `manyTill` string e
case c of
'[' -> return $ OptArg b
'{' -> return $ FixArg b
_ -> error "this cannot happen!"
special :: Parser LaTeX
special = do
x <- anyChar
case x of
'(' -> math Parentheses "\\)"
'[' -> math Square "\\]"
'{' -> lbrace
'}' -> rbrace
'|' -> vert
'\\' -> lbreak
_ -> commS [x]
lbreak :: Parser LaTeX
lbreak = do
y <- try (char '[' <|> char '*' <|> return ' ')
case y of
'[' -> linebreak False
'*' -> do z <- try (char '[' <|> return ' ')
case z of
'[' -> linebreak True
_ -> return (TeXLineBreak Nothing True)
_ -> return (TeXLineBreak Nothing False)
linebreak :: Bool -> Parser LaTeX
linebreak t = do m <- measure
_ <- char ']'
s <- try (char '*' <|> return ' ')
return $ TeXLineBreak (Just m) (t || s == '*')
measure :: Parser Measure
measure = try (double >>= unit) <|> CustomMeasure <$> latexBlockParser
unit :: Double -> Parser Measure
unit f = do
u1 <- anyChar
u2 <- anyChar
case map toLower [u1, u2] of
"pt" -> return $ Pt f
"mm" -> return $ Mm f
"cm" -> return $ Cm f
"in" -> return $ In f
"ex" -> return $ Ex f
"em" -> return $ Em f
_ -> fail "NaN"
rbrace, lbrace,vert :: Parser LaTeX
lbrace = brace "{"
rbrace = brace "}"
vert = brace "|"
brace :: String -> Parser LaTeX
brace = return . TeXCommS
commS :: String -> Parser LaTeX
commS = return . TeXCommS
dolMath :: Parser LaTeX
dolMath = do
_ <- char '$'
b <- mconcat <$> latexBlockParser `manyTill` char '$'
return $ TeXMath Dollar b
math :: MathType -> Text -> Parser LaTeX
math t eMath = do
b <- mconcat <$> latexBlockParser `manyTill` try (string eMath)
return $ TeXMath t b
comment :: Parser LaTeX
comment = do
_ <- char '%'
c <- A.takeTill (== '\n')
e <- atEnd
unless e (char '\n' >>= \_ -> return ())
return $ TeXComment c
isSpecial :: Char -> Bool
isSpecial = (`elem` specials)
endCmd :: Char -> Bool
endCmd c = notLowercaseAlph && notUppercaseAlph
where c' = fromEnum c
notLowercaseAlph = c' < fromEnum 'a' || c' > fromEnum 'z'
notUppercaseAlph = c' < fromEnum 'A' || c' > fromEnum 'Z'
specials :: String
specials = "'(),.-\"!^$&#{}%~|/:;=[]\\` "