{-# LANGUAGE OverloadedStrings #-} -- | The /LaTeX/ parser. -- -- Use 'parseLaTeX' to parse a 'Text' containing /LaTeX/ code. -- If the 'Text' is in a file, you may want to use 'parseLaTeXFile'. -- Use this module together with "Text.LaTeX.Base.Syntax" to perform -- analysis and transformations of /LaTeX/ code. The parser ('parseLaTeX') -- is related with the renderer ('render') by the following property: -- -- /If @t :: Text@ is a syntactically valid LaTeX block, then:/ -- -- > fmap render (parseLaTeX t) == Right t -- -- This property says two things: -- -- * Given a valid LaTeX input, 'parseLaTeX' returns a 'LaTeX' value. -- * If the parsed value is again rendered, you get the initial input. -- -- In other words, 'parseLaTeX' is a partial function defined over the -- set of valid LaTeX files, and 'render' is its /left/ inverse. -- module Text.LaTeX.Base.Parser ( -- * The parser parseLaTeX , parseLaTeXFile -- * Parsing errors , ParseError , errorPos , errorMessages -- ** Error messages , Message (..) , messageString -- ** Source positions , SourcePos , sourceLine , sourceColumn , sourceName -- * Configuring your parser , ParserConf (..) , defaultParserConf , parseLaTeXWith , parseLaTeXFileWith -- * Parser combinators , Parser , latexParser , latexBlockParser ) where import Text.Parsec hiding ((<|>),many) import Text.Parsec.Error import Data.Char (toLower,digitToInt) import Data.Monoid import Data.Maybe (fromMaybe) import qualified Data.Text as T import Control.Applicative import Control.Monad (unless) import Text.LaTeX.Base.Syntax import Text.LaTeX.Base.Render ------------------------------------------------------------------------ -- Parser configuration ------------------------------------------------------------------------ -- | Configuration for the LaTeX parser. data ParserConf = ParserConf { -- | This is the list of names of the environments such that -- their content will be parsed verbatim. verbatimEnvironments :: [String] } -- | Default parser configuration, used by 'parseLaTeX' and 'parseLaTeXFile'. -- -- Defaults: -- -- > verbatimEnvironments = ["verbatim"] -- defaultParserConf :: ParserConf defaultParserConf = ParserConf { verbatimEnvironments = ["verbatim"] } -- | Parser with 'Text' input and 'ParserConf' environment. type Parser = Parsec Text ParserConf ------------------------------------------------------------------------ -- Parser ------------------------------------------------------------------------ -- | Parse a 'Text' sequence as a 'LaTeX' block. If it fails, it returns -- an error string. parseLaTeX :: Text -> Either ParseError LaTeX parseLaTeX = parseLaTeXWith defaultParserConf parseLaTeXWith :: ParserConf -> Text -> Either ParseError LaTeX parseLaTeXWith conf t | T.null t = return TeXEmpty | otherwise = runParser latexParser conf "parseLaTeX input" t -- | Read a file and parse it as 'LaTeX'. parseLaTeXFile :: FilePath -> IO (Either ParseError LaTeX) parseLaTeXFile = parseLaTeXFileWith defaultParserConf parseLaTeXFileWith :: ParserConf -> FilePath -> IO (Either ParseError LaTeX) parseLaTeXFileWith conf fp = runParser latexParser conf fp <$> readFileTex fp -- | The 'LaTeX' parser. latexParser :: Parser LaTeX latexParser = mconcat <$> latexBlockParser `manyTill` eof -- | Parser of a single 'LaTeX' constructor, no appending blocks. latexBlockParser :: Parser LaTeX latexBlockParser = foldr1 (<|>) [ text "text" , dolMath "inline math ($)" , comment "comment" , text2 "text2" , try environment "environment" , command "command" ] -- Note: text stops on ']'; if the other parsers fail on the rest -- text2 handles it, starting with ']' ------------------------------------------------------------------------ -- Text ------------------------------------------------------------------------ text :: Parser LaTeX text = do mbC <- peekChar let nottext :: [Char] nottext = "$%\\{]}" case mbC of Nothing -> fail "text: Empty input." Just c | c `elem` nottext -> fail "not text" | otherwise -> TeXRaw <$> takeTill (`elem` nottext) ------------------------------------------------------------------------ -- Text without stopping on ']' ------------------------------------------------------------------------ text2 :: Parser LaTeX text2 = do _ <- char ']' t <- try (text <|> return (TeXRaw T.empty)) return $ TeXRaw (T.pack "]") <> t ------------------------------------------------------------------------ -- Environment ------------------------------------------------------------------------ environment :: Parser LaTeX environment = anonym <|> env anonym :: Parser LaTeX anonym = do _ <- char '{' l <- TeXBraces . mconcat <$> many latexBlockParser _ <- char '}' return l env :: Parser LaTeX env = do n <- char '\\' *> envName "begin" sps <- many $ char ' ' let lsps = if null sps then mempty else TeXRaw $ T.pack sps as <- cmdArgs verbatims <- verbatimEnvironments <$> getState if n `elem` verbatims then let endenv = try $ string "\\end" >> spaces >> string ("{" <> n <> "}") in TeXEnv n (fromMaybe [] as) . TeXRaw . T.pack <$> manyTill anyChar endenv else do b <- envBody n return $ TeXEnv n (fromMaybe [] as) $ case as of Just [] -> lsps <> TeXBraces mempty <> b Nothing -> lsps <> b _ -> b envName :: String -> Parser String envName k = do _ <- string k _ <- char '{' n <- takeTill (== '}') _ <- char '}' return $ T.unpack n envBody :: String -> Parser LaTeX envBody n = mconcat <$> bodyBlock n `manyTill` endenv where endenv = try $ string "\\end" >> spaces >> string ("{" <> n <> "}") bodyBlock :: String -> Parser LaTeX bodyBlock n = do c <- peekChar case c of Just _ -> latexBlockParser _ -> fail $ "Environment '" <> n <> "' not finalized." ------------------------------------------------------------------------ -- Command ------------------------------------------------------------------------ command :: Parser LaTeX command = do _ <- char '\\' mbX <- peekChar case mbX of Nothing -> return TeXEmpty Just x -> if isSpecial x then special else do c <- takeTill endCmd maybe (TeXCommS $ T.unpack c) (TeXComm $ T.unpack c) <$> cmdArgs ------------------------------------------------------------------------ -- Command Arguments ------------------------------------------------------------------------ cmdArgs :: Parser (Maybe [TeXArg]) cmdArgs = try (string "{}" >> return (Just [])) <|> fmap Just (try $ many1 cmdArg) <|> return Nothing cmdArg :: Parser TeXArg cmdArg = do c <- char '[' <|> char '{' let e = case c of '[' -> "]" '{' -> "}" _ -> error "this cannot happen!" b <- mconcat <$> manyTill latexBlockParser (string e) case c of '[' -> return $ OptArg b '{' -> return $ FixArg b _ -> error "this cannot happen!" ------------------------------------------------------------------------ -- Special commands (consisting of one char) ------------------------------------------------------------------------ special :: Parser LaTeX special = do x <- anyChar case x of '(' -> math Parentheses "\\)" '[' -> math Square "\\]" '{' -> lbrace '}' -> rbrace '|' -> vert '\\' -> lbreak _ -> commS [x] ------------------------------------------------------------------------ -- Line break ------------------------------------------------------------------------ lbreak :: Parser LaTeX lbreak = do y <- try (char '[' <|> char '*' <|> return ' ') case y of '[' -> linebreak False '*' -> do z <- try (char '[' <|> return ' ') case z of '[' -> linebreak True _ -> return (TeXLineBreak Nothing True) _ -> return (TeXLineBreak Nothing False) linebreak :: Bool -> Parser LaTeX linebreak t = do m <- measure "measure" _ <- char ']' s <- try (char '*' <|> return ' ') return $ TeXLineBreak (Just m) (t || s == '*') measure :: Parser Measure measure = try (floating >>= unit) <|> (CustomMeasure . mconcat) <$> manyTill latexBlockParser (lookAhead $ char ']') unit :: Double -> Parser Measure unit f = do u1 <- anyChar u2 <- anyChar case map toLower [u1, u2] of "pt" -> return $ Pt f "mm" -> return $ Mm f "cm" -> return $ Cm f "in" -> return $ In f "ex" -> return $ Ex f "em" -> return $ Em f _ -> fail "NaN" ------------------------------------------------------------------------ -- Right or left brace or vertical ------------------------------------------------------------------------ rbrace, lbrace,vert :: Parser LaTeX lbrace = brace "{" rbrace = brace "}" vert = brace "|" brace :: String -> Parser LaTeX brace = return . TeXCommS -- The same as commS? commS :: String -> Parser LaTeX commS = return . TeXCommS ------------------------------------------------------------------------ -- Math ------------------------------------------------------------------------ dolMath :: Parser LaTeX dolMath = do _ <- char '$' b <- mconcat <$> latexBlockParser `manyTill` char '$' return $ TeXMath Dollar b math :: MathType -> String -> Parser LaTeX math t eMath = do b <- mconcat <$> latexBlockParser `manyTill` try (string eMath) return $ TeXMath t b ------------------------------------------------------------------------ -- Comment ------------------------------------------------------------------------ comment :: Parser LaTeX comment = do _ <- char '%' c <- takeTill (== '\n') e <- atEnd unless e (char '\n' >>= \_ -> return ()) return $ TeXComment c ------------------------------------------------------------------------ -- Helpers ------------------------------------------------------------------------ isSpecial :: Char -> Bool isSpecial = (`elem` specials) endCmd :: Char -> Bool endCmd c = notLowercaseAlph && notUppercaseAlph where c' = fromEnum c notLowercaseAlph = c' < fromEnum 'a' || c' > fromEnum 'z' notUppercaseAlph = c' < fromEnum 'A' || c' > fromEnum 'Z' specials :: String specials = "'(),.-\"!^$&#{}%~|/:;=[]\\` " peekChar :: Parser (Maybe Char) peekChar = Just <$> (try $ lookAhead anyChar) <|> pure Nothing atEnd :: Parser Bool atEnd = (eof *> pure True) <|> pure False takeTill :: (Char -> Bool) -> Parser Text takeTill p = T.pack <$> many (satisfy (not . p)) -- Parsing doubles -- -- Code for 'floating', 'fractExponent', and 'sign' comes from parsers package: -- -- http://hackage.haskell.org/package/parsers -- floating :: Parser Double floating = decimal <**> fractExponent fractExponent :: Parser (Integer -> Double) fractExponent = (\fract expo n -> (fromInteger n + fract) * expo) <$> fraction <*> option 1.0 exponent' <|> (\expo n -> fromInteger n * expo) <$> exponent' where fraction = foldr op 0.0 <$> (char '.' *> (some digit "fraction")) op d f = (f + fromIntegral (digitToInt d))/10.0 exponent' = ((\f e -> power (f e)) <$ oneOf "eE" <*> sign <*> (decimal "exponent")) "exponent" power e | e < 0 = 1.0/power(-e) | otherwise = fromInteger (10^e) decimal :: Parser Integer decimal = read <$> many1 digit sign :: Parser (Integer -> Integer) sign = negate <$ char '-' <|> id <$ char '+' <|> pure id