{-# LANGUAGE TupleSections #-} {- Copyright (C) 2009 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | Functions for parsing a LaTeX formula to a Haskell representation. -} module Text.TeXMath.Readers.TeX (readTeX) where import Data.List (intercalate) import Control.Monad import Data.Char (isDigit, isAscii, isLetter) import qualified Data.Map as M import Text.Parsec hiding (label) import Text.Parsec.Error import Text.Parsec.String import Text.TeXMath.Types import Control.Applicative ((<*), (*>), (<*>), (<$>), (<$), pure) import qualified Text.TeXMath.Shared as S import Text.TeXMath.Readers.TeX.Macros (applyMacros, parseMacroDefinitions) import Text.TeXMath.Unicode.ToTeX (getSymbolType) import Data.Maybe (fromMaybe, fromJust) import Text.TeXMath.Unicode.ToUnicode (toUnicode) type TP = Parser -- The parser expr1 :: TP Exp expr1 = choice [ inbraces , variable , number , text , styled , root , mspace , mathop , phantom , boxed , binary , genfrac , substack , bareSubSup , environment , diacritical , unicode , ensuremath , enclosure , texSymbol ] <* ignorable -- | Parse a formula, returning a list of 'Exp'. readTeX :: String -> Either String [Exp] readTeX inp = let (ms, rest) = parseMacroDefinitions inp in either (Left . showParseError inp) (Right . id) $ parse formula "formula" (applyMacros ms rest) showParseError :: String -> ParseError -> String showParseError inp pe = snippet ++ "\n" ++ caretline ++ showErrorMessages "or" "unknown" "expecting" "unexpected" "eof" (errorMessages pe) where errln = sourceLine (errorPos pe) errcol = sourceColumn (errorPos pe) snipoffset = max 0 (errcol - 20) inplns = lines inp ln = if length inplns >= errln then inplns !! (errln - 1) else "" -- should not happen snippet = take 40 $ drop snipoffset ln caretline = replicate (errcol - snipoffset - 1) ' ' ++ "^" ctrlseq :: String -> TP String ctrlseq s = lexeme $ try $ do result <- string ('\\':s) case s of [c] | not (isLetter c) -> return () _ -> (do pos <- getPosition letter setPosition pos mzero ("non-letter after \\" ++ s)) <|> return () return result ignorable :: TP () ignorable = skipMany (comment <|> label <|> () <$ ctrlseq "nonumber" <|> (skipMany1 space "whitespace")) comment :: TP () comment = char '%' *> skipMany (noneOf "\n") *> optional newline label :: TP () label = ctrlseq "label" *> braces (skipMany (noneOf "}")) unGrouped :: Exp -> [Exp] unGrouped (EGrouped xs) = xs unGrouped x = [x] formula :: TP [Exp] formula = unGrouped <$> (ignorable *> manyExp expr <* eof) expr :: TP Exp expr = do optional (ctrlseq "displaystyle") (a, convertible) <- try (braces operatorname) -- needed because macros add {} <|> ((,False) <$> expr1) <|> operatorname limits <- limitsIndicator subSup limits convertible a <|> superOrSubscripted limits convertible a <|> return a -- | Parser for \operatorname command. -- Returns a tuple of EMathOperator name and Bool depending on the flavor -- of the command: -- -- - True for convertible operator (\operator*) -- -- - False otherwise operatorname :: TP (Exp, Bool) operatorname = do ctrlseq "operatorname" convertible <- (char '*' >> spaces >> return True) <|> return False op <- expToOperatorName <$> texToken maybe mzero (\s -> return (EMathOperator s, convertible)) op -- | Converts identifiers, symbols and numbers to a flat string. -- Returns Nothing if the expression contains anything else. expToOperatorName :: Exp -> Maybe String expToOperatorName e = case e of EGrouped xs -> concat <$> mapM fl xs _ -> fl e where fl f = case f of EIdentifier s -> Just s -- handle special characters ESymbol _ "\x2212" -> Just "-" ESymbol _ "\x2032" -> Just "'" ESymbol _ "\x2033" -> Just "''" ESymbol _ "\x2034" -> Just "'''" ESymbol _ "\x2057" -> Just "''''" ESymbol _ "\x02B9" -> Just "'" ESymbol _ s -> Just s ENumber s -> Just s EStyled sty xs -> concat <$> sequence (map (toStr sty) xs) _ -> Nothing toStr sty (EIdentifier s) = Just $ toUnicode sty s toStr _ (EText sty' s) = Just $ toUnicode sty' s toStr sty (ENumber s) = Just $ toUnicode sty s toStr sty (EMathOperator s) = Just $ toUnicode sty s toStr sty (ESymbol _ s) = Just $ toUnicode sty s toStr _ (ESpace _) = Just $ " " toStr _ (EStyled sty' exps) = concat <$> sequence (map (toStr sty') exps) toStr _ _ = Nothing bareSubSup :: TP Exp bareSubSup = subSup Nothing False (EIdentifier "") <|> superOrSubscripted Nothing False (EIdentifier "") limitsIndicator :: TP (Maybe Bool) limitsIndicator = (ctrlseq "limits" >> return (Just True)) <|> (ctrlseq "nolimits" >> return (Just False)) <|> return Nothing binomCmd :: TP String binomCmd = oneOfCommands (map fst binomCmds) binomCmds :: [(String, Exp -> Exp -> Exp)] binomCmds = [ ("\\choose", \x y -> EDelimited "(" ")" [Right (EFraction NoLineFrac x y)]) , ("\\brack", \x y -> EDelimited "[" "]" [Right (EFraction NoLineFrac x y)]) , ("\\brace", \x y -> EDelimited "{" "}" [Right (EFraction NoLineFrac x y)]) , ("\\bangle", \x y -> EDelimited "\x27E8" "\x27E9" [Right (EFraction NoLineFrac x y)]) ] genfrac :: TP Exp genfrac = do ctrlseq "genfrac" openDelim <- braces $ option '(' ((char '\\' >> oneOf "{}") <|> anyChar) closeDelim <- braces $ option ')' ((char '\\' >> oneOf "{}") <|> anyChar) bar <- False <$ try (braces (string "0pt")) <|> True <$ texToken displayStyle <- True <$ try (braces (char '0')) <|> False <$ texToken x <- texToken y <- texToken let fracType = case (bar, displayStyle) of (False, _) -> NoLineFrac (True, True) -> DisplayFrac _ -> NormalFrac return $ EDelimited [openDelim] [closeDelim] [Right (EFraction fracType x y)] substack :: TP Exp substack = do ctrlseq "substack" formulas <- braces $ ignorable >> (manyExp expr) `sepEndBy` endLine return $ EArray [AlignCenter] $ map (\x -> [[x]]) formulas asGroup :: [Exp] -> Exp asGroup [x] = x asGroup xs = EGrouped xs -- variant of many that is sensitive to \choose and other such commands manyExp' :: Bool -> TP Exp -> TP Exp manyExp' requireNonempty p = do initial <- if requireNonempty then many1 (notFollowedBy binomCmd >> p) else many (notFollowedBy binomCmd >> p) let withCmd :: String -> TP Exp withCmd cmd = case lookup cmd binomCmds of Just f -> f <$> (asGroup <$> pure initial) <*> (asGroup <$> many p) Nothing -> fail $ "Unknown command " ++ cmd (binomCmd >>= withCmd) <|> return (asGroup initial) manyExp :: TP Exp -> TP Exp manyExp = manyExp' False many1Exp :: TP Exp -> TP Exp many1Exp = manyExp' True inbraces :: TP Exp inbraces = braces (manyExp expr) texToken :: TP Exp texToken = texSymbol <|> inbraces <|> inbrackets <|> texChar texChar :: TP Exp texChar = do c <- noneOf "\n\t\r \\{}" <* spaces return $ if isDigit c then ENumber [c] else EIdentifier [c] inbrackets :: TP Exp inbrackets = (brackets $ manyExp $ notFollowedBy (char ']') >> expr) number :: TP Exp number = lexeme $ ENumber <$> try decimalNumber where decimalNumber = do xs <- many digit ys <- option [] $ try (char '.' >> (('.':) <$> many1 digit)) case xs ++ ys of [] -> mzero zs -> return zs enclosure :: TP Exp enclosure = basicEnclosure <|> scaledEnclosure <|> delimited -- Expensive basicEnclosure :: TP Exp basicEnclosure = choice (map (\(s, v) -> symbol s >> return v) enclosures) fence :: String -> TP String fence cmd = do symbol cmd enc <- basicEnclosure <|> (try (symbol ".") >> return (ESymbol Open "")) case enc of ESymbol Open x -> return x ESymbol Close x -> return x _ -> mzero middle :: TP String middle = fence "\\middle" right :: TP String right = fence "\\right" delimited :: TP Exp delimited = do openc <- try $ fence "\\left" contents <- concat <$> many (try $ ((:[]) . Left <$> middle) <|> (map Right . unGrouped <$> many1Exp (notFollowedBy right *> expr))) closec <- right <|> return "" return $ EDelimited openc closec contents scaledEnclosure :: TP Exp scaledEnclosure = do cmd <- oneOfCommands (map fst S.scalers) case S.getScalerValue cmd of Just r -> EScaled r <$> basicEnclosure Nothing -> mzero endLine :: TP Char endLine = try $ do symbol "\\\\" optional inbrackets -- can contain e.g. [1.0in] for a line height, not yet supported optional $ ctrlseq "hline" -- we don't represent the line, but it shouldn't crash parsing return '\n' arrayLine :: TP ArrayLine arrayLine = notFollowedBy (ctrlseq "end" >> return '\n') >> sepBy1 (unGrouped <$> manyExp (notFollowedBy endLine >> expr)) (symbol "&") arrayAlignments :: TP [Alignment] arrayAlignments = try $ do as <- braces (many letter) let letterToAlignment 'l' = AlignLeft letterToAlignment 'c' = AlignCenter letterToAlignment 'r' = AlignRight letterToAlignment _ = AlignDefault return $ map letterToAlignment as environment :: TP Exp environment = do ctrlseq "begin" name <- braces (oneOfStrings (M.keys environments) <* optional (char '*')) spaces case M.lookup name environments of Just env -> do result <- env spaces ctrlseq "end" braces (string name <* optional (char '*')) spaces return result Nothing -> mzero -- should not happen environments :: M.Map String (TP Exp) environments = M.fromList [ ("array", stdarray) , ("eqnarray", eqnarray) , ("align", align) , ("aligned", align) , ("alignat", inbraces *> spaces *> align) , ("alignedat", inbraces *> spaces *> align) , ("flalign", flalign) , ("flaligned", flalign) , ("cases", cases) , ("matrix", matrixWith "" "") , ("smallmatrix", matrixWith "" "") , ("pmatrix", matrixWith "(" ")") , ("bmatrix", matrixWith "[" "]") , ("Bmatrix", matrixWith "{" "}") , ("vmatrix", matrixWith "\x2223" "\x2223") , ("Vmatrix", matrixWith "\x2225" "\x2225") , ("split", align) , ("multline", gather) , ("gather", gather) , ("gathered", gather) ] mbArrayAlignments :: TP (Maybe [Alignment]) mbArrayAlignments = option Nothing $ Just <$> arrayAlignments alignsFromRows :: Alignment -> [ArrayLine] -> [Alignment] alignsFromRows _ [] = [] alignsFromRows defaultAlignment (r:_) = replicate (length r) defaultAlignment matrixWith :: String -> String -> TP Exp matrixWith opendelim closedelim = do mbaligns <- mbArrayAlignments lines' <- sepEndBy1 arrayLine endLine let aligns = fromMaybe (alignsFromRows AlignCenter lines') mbaligns return $ if null opendelim && null closedelim then EArray aligns lines' else EDelimited opendelim closedelim [Right $ EArray aligns lines'] stdarray :: TP Exp stdarray = do mbaligns <- mbArrayAlignments lines' <- sepEndBy1 arrayLine endLine let aligns = fromMaybe (alignsFromRows AlignDefault lines') mbaligns return $ EArray aligns lines' gather :: TP Exp gather = do rows <- sepEndBy arrayLine endLine return $ EArray (alignsFromRows AlignCenter rows) rows eqnarray :: TP Exp eqnarray = do rows <- sepEndBy1 arrayLine endLine let n = maximum $ map length rows return $ EArray (take n $ cycle [AlignRight, AlignCenter, AlignLeft]) rows align :: TP Exp align = do rows <- sepEndBy1 arrayLine endLine let n = maximum $ map length rows return $ EArray (take n $ cycle [AlignRight, AlignLeft]) rows flalign :: TP Exp flalign = do rows <- sepEndBy1 arrayLine endLine let n = maximum $ map length rows return $ EArray (take n $ cycle [AlignLeft, AlignRight]) rows cases :: TP Exp cases = do rs <- sepEndBy1 arrayLine endLine return $ EDelimited "{" "" [Right $ EArray (alignsFromRows AlignDefault rs) rs] variable :: TP Exp variable = do v <- letter spaces return $ EIdentifier [v] isConvertible :: Exp -> Bool isConvertible (EMathOperator x) = x `elem` convertibleOps where convertibleOps = [ "lim","liminf","limsup","inf","sup" , "min","max","Pr","det","gcd" ] isConvertible (ESymbol Rel _) = True isConvertible (ESymbol Bin _) = True isConvertible (ESymbol Op x) = x `elem` convertibleSyms where convertibleSyms = ["\x2211","\x220F","\x22C2", "\x22C3","\x22C0","\x22C1","\x2A05","\x2A06", "\x2210","\x2A01","\x2A02","\x2A00","\x2A04"] isConvertible _ = False -- check if sub/superscripts should always be under and over the expression isUnderover :: Exp -> Bool isUnderover (EOver _ _ (ESymbol Accent "\xFE37")) = True -- \overbrace isUnderover (EOver _ _ (ESymbol Accent "\x23B4")) = True -- \overbracket isUnderover (EUnder _ _ (ESymbol Accent "\xFE38")) = True -- \underbrace isUnderover (EUnder _ _ (ESymbol Accent "\x23B5")) = True -- \underbracket isUnderover (EOver _ _ (ESymbol Accent "\x23DE")) = True -- \overbrace isUnderover (EUnder _ _ (ESymbol Accent "\x23DF")) = True -- \underbrace isUnderover _ = False subSup :: Maybe Bool -> Bool -> Exp -> TP Exp subSup limits convertible a = try $ do let sub1 = symbol "_" >> expr1 let sup1 = symbol "^" >> expr1 (b,c) <- try (do {m <- sub1; n <- sup1; return (m,n)}) <|> (do {n <- sup1; m <- sub1; return (m,n)}) return $ case limits of Just True -> EUnderover False a b c Nothing | convertible || isConvertible a -> EUnderover True a b c | isUnderover a -> EUnderover False a b c _ -> ESubsup a b c superOrSubscripted :: Maybe Bool -> Bool -> Exp -> TP Exp superOrSubscripted limits convertible a = try $ do c <- oneOf "^_" spaces b <- expr case c of '^' -> return $ case limits of Just True -> EOver False a b Nothing | convertible || isConvertible a -> EOver True a b | isUnderover a -> EOver False a b _ -> ESuper a b '_' -> return $ case limits of Just True -> EUnder False a b Nothing | convertible || isConvertible a -> EUnder True a b | isUnderover a -> EUnder False a b _ -> ESub a b _ -> mzero unicode :: TP Exp unicode = lexeme $ do c <- satisfy (not . isAscii) return (ESymbol (getSymbolType c) [c]) ensuremath :: TP Exp ensuremath = ctrlseq "ensuremath" *> inbraces -- Note: cal and scr are treated the same way, as unicode is lacking such two different sets for those. styleOps :: M.Map String ([Exp] -> Exp) styleOps = M.fromList [ ("\\mathrm", EStyled TextNormal) , ("\\mathup", EStyled TextNormal) , ("\\mathbf", EStyled TextBold) , ("\\boldsymbol", EStyled TextBold) , ("\\mathbfup", EStyled TextBold) , ("\\mathit", EStyled TextItalic) , ("\\mathtt", EStyled TextMonospace) , ("\\texttt", EStyled TextMonospace) , ("\\mathsf", EStyled TextSansSerif) , ("\\mathsfup", EStyled TextSansSerif) , ("\\mathbb", EStyled TextDoubleStruck) , ("\\mathcal", EStyled TextScript) , ("\\mathscr", EStyled TextScript) , ("\\mathfrak", EStyled TextFraktur) , ("\\mathbfit", EStyled TextBoldItalic) , ("\\mathbfsfup", EStyled TextSansSerifBold) , ("\\mathbfsfit", EStyled TextSansSerifBoldItalic) , ("\\mathbfscr", EStyled TextBoldScript) , ("\\mathbffrak", EStyled TextBoldFraktur) , ("\\mathbfcal", EStyled TextBoldScript) , ("\\mathsfit", EStyled TextSansSerifItalic) ] diacritical :: TP Exp diacritical = do c <- oneOfCommands (map snd S.diacriticals) case S.getDiacriticalCons c of Just r -> r <$> texToken Nothing -> mzero phantom :: TP Exp phantom = EPhantom <$> (ctrlseq "phantom" *> texToken) boxed :: TP Exp boxed = EBoxed <$> (ctrlseq "boxed" *> texToken) text :: TP Exp text = do c <- oneOfCommands (M.keys textOps) maybe mzero (<$> (bracedText <* spaces)) $ M.lookup c textOps textOps :: M.Map String (String -> Exp) textOps = M.fromList [ ("\\textrm", (EText TextNormal)) , ("\\text", (EText TextNormal)) , ("\\textbf", (EText TextBold)) , ("\\textit", (EText TextItalic)) , ("\\texttt", (EText TextMonospace)) , ("\\textsf", (EText TextSansSerif)) , ("\\mbox", (EText TextNormal)) ] styled :: TP Exp styled = do c <- oneOfCommands (M.keys styleOps) case M.lookup c styleOps of Just f -> do x <- inbraces return $ case x of EGrouped xs -> f xs _ -> f [x] Nothing -> mzero -- note: sqrt can be unary, \sqrt{2}, or binary, \sqrt[3]{2} root :: TP Exp root = do ctrlseq "sqrt" <|> ctrlseq "surd" (ERoot <$> inbrackets <*> texToken) <|> (ESqrt <$> texToken) mspace :: TP Exp mspace = do ctrlseq "mspace" lexeme $ char '{' len <- many1 digit lexeme $ string "mu" lexeme $ char '}' case reads len of ((n,[]):_) -> return $ ESpace (n/18) _ -> mzero mathop :: TP Exp mathop = mathopWith "mathop" Op <|> mathopWith "mathrel" Rel <|> mathopWith "mathbin" Bin <|> mathopWith "mathord" Ord <|> mathopWith "mathopen" Open <|> mathopWith "mathclose" Close <|> mathopWith "mathpunct" Pun mathopWith :: String -> TeXSymbolType -> TP Exp mathopWith name ty = try $ do ctrlseq name e <- expr1 case e of ESymbol _ x -> return $ ESymbol ty x EIdentifier x -> return $ ESymbol ty x x -> return x binary :: TP Exp binary = do c <- oneOfCommands binops a <- texToken b <- texToken case c of "\\overset" -> return $ EOver False b a "\\stackrel" -> return $ EOver False b a "\\underset" -> return $ EUnder False b a "\\frac" -> return $ EFraction NormalFrac a b "\\tfrac" -> return $ EFraction InlineFrac a b "\\dfrac" -> return $ EFraction DisplayFrac a b "\\binom" -> return $ EDelimited "(" ")" [Right (EFraction NoLineFrac a b)] _ -> fail "Unrecognised binary operator" where binops = ["\\overset", "\\stackrel", "\\underset", "\\frac", "\\tfrac", "\\dfrac", "\\binom"] texSymbol :: TP Exp texSymbol = do negated <- (try (ctrlseq "not") >> return True) <|> return False sym <- operator <|> tSymbol if negated then neg sym else return sym oneOfCommands :: [String] -> TP String oneOfCommands cmds = try $ do cmd <- oneOfStrings cmds case cmd of ['\\',c] | not (isLetter c) -> return () _ -> (do pos <- getPosition letter setPosition pos mzero ("non-letter after " ++ cmd)) <|> return () spaces return cmd oneOfStrings' :: (Char -> Char -> Bool) -> [String] -> TP String oneOfStrings' _ [] = mzero oneOfStrings' matches strs = try $ do c <- anyChar let strs' = [xs | (x:xs) <- strs, x `matches` c] case strs' of [] -> mzero _ -> (c:) <$> oneOfStrings' matches strs' <|> if "" `elem` strs' then return [c] else mzero -- | Parses one of a list of strings. If the list contains -- two strings one of which is a prefix of the other, the longer -- string will be matched if possible. oneOfStrings :: [String] -> TP String oneOfStrings strs = oneOfStrings' (==) strs (intercalate ", " $ map show strs) -- | Like '()', but moves position back to the beginning of the parse -- before reporting the error. () :: Monad m => ParsecT s u m a -> String -> ParsecT s u m a () p expected = do pos <- getPosition p <|> (setPosition pos >> mzero expected) infix 0 tSymbol :: TP Exp tSymbol = do sym <- oneOfCommands (M.keys symbols) return $ fromJust (M.lookup sym symbols) operator :: TP Exp operator = do sym <- lexeme (oneOfStrings $ M.keys operators) return $ fromJust (M.lookup sym operators) neg :: Exp -> TP Exp neg (ESymbol Rel x) = ESymbol Rel `fmap` case x of "\x2282" -> return "\x2284" "\x2283" -> return "\x2285" "\x2286" -> return "\x2288" "\x2287" -> return "\x2289" "\x2208" -> return "\x2209" _ -> mzero neg _ = mzero lexeme :: TP a -> TP a lexeme p = p <* ignorable braces :: TP a -> TP a braces p = lexeme $ char '{' *> spaces *> p <* spaces <* char '}' brackets :: TP a -> TP a brackets p = lexeme $ char '[' *> spaces *> p <* spaces <* char ']' symbol :: String -> TP String symbol s = lexeme $ try $ string s enclosures :: [(String, Exp)] enclosures = [ ("(", ESymbol Open "(") , (")", ESymbol Close ")") , ("[", ESymbol Open "[") , ("]", ESymbol Close "]") , ("\\{", ESymbol Open "{") , ("\\}", ESymbol Close "}") , ("\\lbrack", ESymbol Open "[") , ("\\lbrace", ESymbol Open "{") , ("\\rbrack", ESymbol Close "]") , ("\\rbrace", ESymbol Close "}") , ("\\llbracket", ESymbol Open "\x27E6") , ("\\rrbracket", ESymbol Close "\x27E7") , ("\\langle", ESymbol Open "\x27E8") , ("\\rangle", ESymbol Close "\x27E9") , ("\\lfloor", ESymbol Open "\x230A") , ("\\rfloor", ESymbol Close "\x230B") , ("\\lceil", ESymbol Open "\x2308") , ("\\rceil", ESymbol Close "\x2309") , ("|", ESymbol Open "|") , ("|", ESymbol Close "|") , ("\\|", ESymbol Open "\x2225") , ("\\|", ESymbol Close "\x2225") , ("\\lvert", ESymbol Open "\x7C") , ("\\rvert", ESymbol Close "\x7C") , ("\\vert", ESymbol Close "\x7C") , ("\\lVert", ESymbol Open "\x2225") , ("\\rVert", ESymbol Close "\x2225") , ("\\Vert", ESymbol Close "\x2016") , ("\\ulcorner", ESymbol Open "\x231C") , ("\\urcorner", ESymbol Close "\x231D") ] operators :: M.Map String Exp operators = M.fromList [ ("+", ESymbol Bin "+") , ("-", ESymbol Bin "\x2212") , ("*", ESymbol Bin "*") , ("@", ESymbol Ord "@") , (",", ESymbol Pun ",") , (".", ESymbol Ord ".") , (";", ESymbol Pun ";") , (":", ESymbol Rel ":") , ("?", ESymbol Ord "?") , (">", ESymbol Rel ">") , ("<", ESymbol Rel "<") , ("!", ESymbol Ord "!") , ("'", ESymbol Ord "\x2032") , ("''", ESymbol Ord "\x2033") , ("'''", ESymbol Ord "\x2034") , ("''''", ESymbol Ord "\x2057") , ("=", ESymbol Rel "=") , (":=", ESymbol Rel ":=") , ("/", ESymbol Ord "/") , ("~", ESpace (4/18)) ] symbols :: M.Map String Exp symbols = M.fromList [ ("\\$", ESymbol Ord "$") , ("\\%", ESymbol Ord "%") , ("\\&", ESymbol Ord "&") , ("\\_", ESymbol Ord "_") , ("\\#", ESymbol Ord "#") , ("\\^", ESymbol Ord "^") , ("\\mid", ESymbol Bin "\x2223") , ("\\colon", ESymbol Pun ":") , ("\\parallel", ESymbol Rel "\x2225") , ("\\backslash", ESymbol Bin "\x2216") , ("\\setminus", ESymbol Bin "\\") , ("\\times", ESymbol Bin "\x00D7") , ("\\ltimes", ESymbol Bin "\x22C9") , ("\\rtimes", ESymbol Bin "\x22CA") , ("\\alpha", EIdentifier "\x03B1") , ("\\beta", EIdentifier "\x03B2") , ("\\chi", EIdentifier "\x03C7") , ("\\delta", EIdentifier "\x03B4") , ("\\Delta", EIdentifier "\x0394") , ("\\epsilon", EIdentifier "\x03F5") , ("\\varepsilon", EIdentifier "\x03B5") , ("\\eta", EIdentifier "\x03B7") , ("\\gamma", EIdentifier "\x03B3") , ("\\Gamma", EIdentifier "\x0393") , ("\\iota", EIdentifier "\x03B9") , ("\\kappa", EIdentifier "\x03BA") , ("\\lambda", EIdentifier "\x03BB") , ("\\Lambda", EIdentifier "\x039B") , ("\\mu", EIdentifier "\x03BC") , ("\\nu", EIdentifier "\x03BD") , ("\\omega", EIdentifier "\x03C9") , ("\\Omega", EIdentifier "\x03A9") , ("\\phi", EIdentifier "\x03D5") , ("\\varphi", EIdentifier "\x03C6") , ("\\Phi", EIdentifier "\x03A6") , ("\\pi", EIdentifier "\x03C0") , ("\\Pi", EIdentifier "\x03A0") , ("\\psi", EIdentifier "\x03C8") , ("\\Psi", EIdentifier "\x03A8") , ("\\rho", EIdentifier "\x03C1") , ("\\sigma", EIdentifier "\x03C3") , ("\\Sigma", EIdentifier "\x03A3") , ("\\tau", EIdentifier "\x03C4") , ("\\theta", EIdentifier "\x03B8") , ("\\vartheta", EIdentifier "\x03D1") , ("\\Theta", EIdentifier "\x0398") , ("\\upsilon", EIdentifier "\x03C5") , ("\\Upsilon", EIdentifier "\x03A5") , ("\\xi", EIdentifier "\x03BE") , ("\\Xi", EIdentifier "\x039E") , ("\\zeta", EIdentifier "\x03B6") , ("\\pm", ESymbol Bin "\x00B1") , ("\\mp", ESymbol Bin "\x2213") , ("\\triangleleft", ESymbol Bin "\x22B2") , ("\\triangleright", ESymbol Bin "\x22B3") , ("\\cdot", ESymbol Bin "\x22C5") , ("\\star", ESymbol Bin "\x22C6") , ("\\ast", ESymbol Bin "\x002A") , ("\\times", ESymbol Bin "\x00D7") , ("\\div", ESymbol Bin "\x00F7") , ("\\circ", ESymbol Bin "\x2218") , ("\\bullet", ESymbol Bin "\x2022") , ("\\oplus", ESymbol Bin "\x2295") , ("\\ominus", ESymbol Bin "\x2296") , ("\\otimes", ESymbol Bin "\x2297") , ("\\bigcirc", ESymbol Bin "\x25CB") , ("\\oslash", ESymbol Bin "\x2298") , ("\\odot", ESymbol Bin "\x2299") , ("\\land", ESymbol Bin "\x2227") , ("\\wedge", ESymbol Bin "\x2227") , ("\\lor", ESymbol Bin "\x2228") , ("\\vee", ESymbol Bin "\x2228") , ("\\cap", ESymbol Bin "\x2229") , ("\\cup", ESymbol Bin "\x222A") , ("\\sqcap", ESymbol Bin "\x2293") , ("\\sqcup", ESymbol Bin "\x2294") , ("\\uplus", ESymbol Bin "\x228E") , ("\\amalg", ESymbol Bin "\x2210") , ("\\bigtriangleup", ESymbol Bin "\x25B3") , ("\\bigtriangledown", ESymbol Bin "\x25BD") , ("\\dag", ESymbol Bin "\x2020") , ("\\dagger", ESymbol Bin "\x2020") , ("\\ddag", ESymbol Bin "\x2021") , ("\\ddagger", ESymbol Bin "\x2021") , ("\\lhd", ESymbol Bin "\x22B2") , ("\\rhd", ESymbol Bin "\x22B3") , ("\\unlhd", ESymbol Bin "\x22B4") , ("\\unrhd", ESymbol Bin "\x22B5") , ("\\lt", ESymbol Rel "<") , ("\\gt", ESymbol Rel ">") , ("\\ne", ESymbol Rel "\x2260") , ("\\neq", ESymbol Rel "\x2260") , ("\\le", ESymbol Rel "\x2264") , ("\\leq", ESymbol Rel "\x2264") , ("\\leqslant", ESymbol Rel "\x2264") , ("\\ge", ESymbol Rel "\x2265") , ("\\geq", ESymbol Rel "\x2265") , ("\\geqslant", ESymbol Rel "\x2265") , ("\\equiv", ESymbol Rel "\x2261") , ("\\ll", ESymbol Rel "\x226A") , ("\\gg", ESymbol Rel "\x226B") , ("\\doteq", ESymbol Rel "\x2250") , ("\\prec", ESymbol Rel "\x227A") , ("\\succ", ESymbol Rel "\x227B") , ("\\preceq", ESymbol Rel "\x227C") , ("\\succeq", ESymbol Rel "\x227D") , ("\\subset", ESymbol Rel "\x2282") , ("\\supset", ESymbol Rel "\x2283") , ("\\subseteq", ESymbol Rel "\x2286") , ("\\supseteq", ESymbol Rel "\x2287") , ("\\nsubset", ESymbol Rel "\x2284") , ("\\nsupset", ESymbol Rel "\x2285") , ("\\nsubseteq", ESymbol Rel "\x2288") , ("\\nsupseteq", ESymbol Rel "\x2289") , ("\\sqsubset", ESymbol Rel "\x228F") , ("\\sqsupset", ESymbol Rel "\x2290") , ("\\sqsubseteq", ESymbol Rel "\x2291") , ("\\sqsupseteq", ESymbol Rel "\x2292") , ("\\sim", ESymbol Rel "\x223C") , ("\\simeq", ESymbol Rel "\x2243") , ("\\approx", ESymbol Rel "\x2248") , ("\\cong", ESymbol Rel "\x2245") , ("\\Join", ESymbol Rel "\x22C8") , ("\\bowtie", ESymbol Rel "\x22C8") , ("\\in", ESymbol Rel "\x2208") , ("\\ni", ESymbol Rel "\x220B") , ("\\owns", ESymbol Rel "\x220B") , ("\\propto", ESymbol Rel "\x221D") , ("\\vdash", ESymbol Rel "\x22A2") , ("\\dashv", ESymbol Rel "\x22A3") , ("\\models", ESymbol Rel "\x22A8") , ("\\perp", ESymbol Rel "\x22A5") , ("\\smile", ESymbol Rel "\x2323") , ("\\frown", ESymbol Rel "\x2322") , ("\\asymp", ESymbol Rel "\x224D") , ("\\notin", ESymbol Rel "\x2209") , ("\\gets", ESymbol Rel "\x2190") , ("\\leftarrow", ESymbol Rel "\x2190") , ("\\to", ESymbol Rel "\x2192") , ("\\rightarrow", ESymbol Rel "\x2192") , ("\\leftrightarrow", ESymbol Rel "\x2194") , ("\\uparrow", ESymbol Rel "\x2191") , ("\\downarrow", ESymbol Rel "\x2193") , ("\\updownarrow", ESymbol Rel "\x2195") , ("\\Leftarrow", ESymbol Rel "\x21D0") , ("\\Rightarrow", ESymbol Rel "\x21D2") , ("\\Leftrightarrow", ESymbol Rel "\x21D4") , ("\\iff", ESymbol Rel "\x21D4") , ("\\Uparrow", ESymbol Rel "\x21D1") , ("\\Downarrow", ESymbol Rel "\x21D3") , ("\\Updownarrow", ESymbol Rel "\x21D5") , ("\\mapsto", ESymbol Rel "\x21A6") , ("\\longleftarrow", ESymbol Rel "\x2190") , ("\\longrightarrow", ESymbol Rel "\x2192") , ("\\longleftrightarrow", ESymbol Rel "\x2194") , ("\\Longleftarrow", ESymbol Rel "\x21D0") , ("\\Longrightarrow", ESymbol Rel "\x21D2") , ("\\Longleftrightarrow", ESymbol Rel "\x21D4") , ("\\longmapsto", ESymbol Rel "\x21A6") , ("\\sum", ESymbol Op "\x2211") , ("\\prod", ESymbol Op "\x220F") , ("\\bigcap", ESymbol Op "\x22C2") , ("\\bigcup", ESymbol Op "\x22C3") , ("\\bigwedge", ESymbol Op "\x22C0") , ("\\bigvee", ESymbol Op "\x22C1") , ("\\bigsqcap", ESymbol Op "\x2A05") , ("\\bigsqcup", ESymbol Op "\x2A06") , ("\\coprod", ESymbol Op "\x2210") , ("\\bigoplus", ESymbol Op "\x2A01") , ("\\bigotimes", ESymbol Op "\x2A02") , ("\\bigodot", ESymbol Op "\x2A00") , ("\\biguplus", ESymbol Op "\x2A04") , ("\\int", ESymbol Op "\x222B") , ("\\iint", ESymbol Op "\x222C") , ("\\iiint", ESymbol Op "\x222D") , ("\\oint", ESymbol Op "\x222E") , ("\\prime", ESymbol Ord "\x2032") , ("\\dots", ESymbol Ord "\x2026") , ("\\ldots", ESymbol Ord "\x2026") , ("\\cdots", ESymbol Ord "\x22EF") , ("\\vdots", ESymbol Ord "\x22EE") , ("\\ddots", ESymbol Ord "\x22F1") , ("\\forall", ESymbol Op "\x2200") , ("\\exists", ESymbol Op "\x2203") , ("\\Re", ESymbol Ord "\x211C") , ("\\Im", ESymbol Ord "\x2111") , ("\\aleph", ESymbol Ord "\x2135") , ("\\hbar", ESymbol Ord "\x210F") , ("\\ell", ESymbol Ord "\x2113") , ("\\wp", ESymbol Ord "\x2118") , ("\\emptyset", ESymbol Ord "\x2205") , ("\\infty", ESymbol Ord "\x221E") , ("\\partial", ESymbol Ord "\x2202") , ("\\nabla", ESymbol Ord "\x2207") , ("\\triangle", ESymbol Ord "\x25B3") , ("\\therefore", ESymbol Pun "\x2234") , ("\\angle", ESymbol Ord "\x2220") , ("\\diamond", ESymbol Op "\x22C4") , ("\\Diamond", ESymbol Op "\x25C7") , ("\\lozenge", ESymbol Op "\x25CA") , ("\\neg", ESymbol Op "\x00AC") , ("\\lnot", ESymbol Ord "\x00AC") , ("\\bot", ESymbol Ord "\x22A5") , ("\\top", ESymbol Ord "\x22A4") , ("\\square", ESymbol Ord "\x25AB") , ("\\Box", ESymbol Op "\x25A1") , ("\\wr", ESymbol Ord "\x2240") , ("\\!", ESpace (-3/18)) , ("\\,", ESpace (3/18)) , ("\\>", ESpace (4/18)) , ("\\:", ESpace (4/18)) , ("\\;", ESpace (5/18)) , ("\\ ", ESpace (4/18)) , ("\\quad", ESpace 1) , ("\\qquad", ESpace 2) , ("\\arccos", EMathOperator "arccos") , ("\\arcsin", EMathOperator "arcsin") , ("\\arctan", EMathOperator "arctan") , ("\\arg", EMathOperator "arg") , ("\\cos", EMathOperator "cos") , ("\\cosh", EMathOperator "cosh") , ("\\cot", EMathOperator "cot") , ("\\coth", EMathOperator "coth") , ("\\csc", EMathOperator "csc") , ("\\deg", EMathOperator "deg") , ("\\det", EMathOperator "det") , ("\\dim", EMathOperator "dim") , ("\\exp", EMathOperator "exp") , ("\\gcd", EMathOperator "gcd") , ("\\hom", EMathOperator "hom") , ("\\inf", EMathOperator "inf") , ("\\ker", EMathOperator "ker") , ("\\lg", EMathOperator "lg") , ("\\lim", EMathOperator "lim") , ("\\liminf", EMathOperator "liminf") , ("\\limsup", EMathOperator "limsup") , ("\\ln", EMathOperator "ln") , ("\\log", EMathOperator "log") , ("\\max", EMathOperator "max") , ("\\min", EMathOperator "min") , ("\\mod", EMathOperator "mod") , ("\\Pr", EMathOperator "Pr") , ("\\sec", EMathOperator "sec") , ("\\sin", EMathOperator "sin") , ("\\sinh", EMathOperator "sinh") , ("\\sup", EMathOperator "sup") , ("\\tan", EMathOperator "tan") , ("\\tanh", EMathOperator "tanh") ] -- text mode parsing textual :: TP String textual = regular <|> sps <|> ligature <|> textCommand <|> bracedText "text" sps :: TP String sps = " " <$ skipMany1 (oneOf " \t\n") regular :: TP String regular = many1 (noneOf "`'-~${}\\ \t") ligature :: TP String ligature = try ("\x2014" <$ string "---") <|> try ("\x2013" <$ string "--") <|> try (string "-") <|> try ("\x201C" <$ string "``") <|> try ("\x201D" <$ string "''") <|> try ("\x2019" <$ string "'") <|> try ("\x2018" <$ string "`") <|> try ("\xA0" <$ string "~") textCommand :: TP String textCommand = do cmd <- oneOfCommands (M.keys textCommands) case M.lookup cmd textCommands of Nothing -> fail ("Unknown control sequence " ++ cmd) Just c -> c bracedText :: TP String bracedText = do char '{' inner <- concat <$> many textual char '}' return inner tok :: TP Char tok = (try $ char '{' *> spaces *> anyChar <* spaces <* char '}') <|> anyChar textCommands :: M.Map String (TP String) textCommands = M.fromList [ ("\\#", return "#") , ("\\$", return "$") , ("\\%", return "%") , ("\\&", return "&") , ("\\_", return "_") , ("\\{", return "{") , ("\\}", return "}") , ("\\ldots", return "\x2026") , ("\\textasciitilde", return "~") , ("\\textasciicircum", return "^") , ("\\textbackslash", return "\\") , ("\\char", parseC) , ("\\aa", return "å") , ("\\AA", return "Å") , ("\\ss", return "ß") , ("\\o", return "ø") , ("\\O", return "Ø") , ("\\L", return "Ł") , ("\\l", return "ł") , ("\\ae", return "æ") , ("\\AE", return "Æ") , ("\\oe", return "œ") , ("\\OE", return "Œ") , ("\\`", option "`" $ grave <$> tok) , ("\\'", option "'" $ acute <$> tok) , ("\\^", option "^" $ circ <$> tok) , ("\\~", option "~" $ tilde <$> tok) , ("\\\"", option "\"" $ try $ umlaut <$> tok) , ("\\.", option "." $ try $ dot <$> tok) , ("\\=", option "=" $ try $ macron <$> tok) , ("\\c", option "c" $ try $ cedilla <$> tok) , ("\\v", option "v" $ try $ hacek <$> tok) , ("\\u", option "u" $ try $ breve <$> tok) , ("\\ ", return " ") ] parseC :: TP String parseC = try $ char '`' >> count 1 anyChar -- the functions below taken from pandoc: grave :: Char -> String grave 'A' = "À" grave 'E' = "È" grave 'I' = "Ì" grave 'O' = "Ò" grave 'U' = "Ù" grave 'a' = "à" grave 'e' = "è" grave 'i' = "ì" grave 'o' = "ò" grave 'u' = "ù" grave c = [c] acute :: Char -> String acute 'A' = "Á" acute 'E' = "É" acute 'I' = "Í" acute 'O' = "Ó" acute 'U' = "Ú" acute 'Y' = "Ý" acute 'a' = "á" acute 'e' = "é" acute 'i' = "í" acute 'o' = "ó" acute 'u' = "ú" acute 'y' = "ý" acute 'C' = "Ć" acute 'c' = "ć" acute 'L' = "Ĺ" acute 'l' = "ĺ" acute 'N' = "Ń" acute 'n' = "ń" acute 'R' = "Ŕ" acute 'r' = "ŕ" acute 'S' = "Ś" acute 's' = "ś" acute 'Z' = "Ź" acute 'z' = "ź" acute c = [c] circ :: Char -> String circ 'A' = "Â" circ 'E' = "Ê" circ 'I' = "Î" circ 'O' = "Ô" circ 'U' = "Û" circ 'a' = "â" circ 'e' = "ê" circ 'i' = "î" circ 'o' = "ô" circ 'u' = "û" circ 'C' = "Ĉ" circ 'c' = "ĉ" circ 'G' = "Ĝ" circ 'g' = "ĝ" circ 'H' = "Ĥ" circ 'h' = "ĥ" circ 'J' = "Ĵ" circ 'j' = "ĵ" circ 'S' = "Ŝ" circ 's' = "ŝ" circ 'W' = "Ŵ" circ 'w' = "ŵ" circ 'Y' = "Ŷ" circ 'y' = "ŷ" circ c = [c] tilde :: Char -> String tilde 'A' = "Ã" tilde 'a' = "ã" tilde 'O' = "Õ" tilde 'o' = "õ" tilde 'I' = "Ĩ" tilde 'i' = "ĩ" tilde 'U' = "Ũ" tilde 'u' = "ũ" tilde 'N' = "Ñ" tilde 'n' = "ñ" tilde c = [c] umlaut :: Char -> String umlaut 'A' = "Ä" umlaut 'E' = "Ë" umlaut 'I' = "Ï" umlaut 'O' = "Ö" umlaut 'U' = "Ü" umlaut 'a' = "ä" umlaut 'e' = "ë" umlaut 'i' = "ï" umlaut 'o' = "ö" umlaut 'u' = "ü" umlaut c = [c] dot :: Char -> String dot 'C' = "Ċ" dot 'c' = "ċ" dot 'E' = "Ė" dot 'e' = "ė" dot 'G' = "Ġ" dot 'g' = "ġ" dot 'I' = "İ" dot 'Z' = "Ż" dot 'z' = "ż" dot c = [c] macron :: Char -> String macron 'A' = "Ā" macron 'E' = "Ē" macron 'I' = "Ī" macron 'O' = "Ō" macron 'U' = "Ū" macron 'a' = "ā" macron 'e' = "ē" macron 'i' = "ī" macron 'o' = "ō" macron 'u' = "ū" macron c = [c] cedilla :: Char -> String cedilla 'c' = "ç" cedilla 'C' = "Ç" cedilla 's' = "ş" cedilla 'S' = "Ş" cedilla 't' = "ţ" cedilla 'T' = "Ţ" cedilla 'e' = "ȩ" cedilla 'E' = "Ȩ" cedilla 'h' = "ḩ" cedilla 'H' = "Ḩ" cedilla 'o' = "o̧" cedilla 'O' = "O̧" cedilla c = [c] hacek :: Char -> String hacek 'A' = "Ǎ" hacek 'a' = "ǎ" hacek 'C' = "Č" hacek 'c' = "č" hacek 'D' = "Ď" hacek 'd' = "ď" hacek 'E' = "Ě" hacek 'e' = "ě" hacek 'G' = "Ǧ" hacek 'g' = "ǧ" hacek 'H' = "Ȟ" hacek 'h' = "ȟ" hacek 'I' = "Ǐ" hacek 'i' = "ǐ" hacek 'j' = "ǰ" hacek 'K' = "Ǩ" hacek 'k' = "ǩ" hacek 'L' = "Ľ" hacek 'l' = "ľ" hacek 'N' = "Ň" hacek 'n' = "ň" hacek 'O' = "Ǒ" hacek 'o' = "ǒ" hacek 'R' = "Ř" hacek 'r' = "ř" hacek 'S' = "Š" hacek 's' = "š" hacek 'T' = "Ť" hacek 't' = "ť" hacek 'U' = "Ǔ" hacek 'u' = "ǔ" hacek 'Z' = "Ž" hacek 'z' = "ž" hacek c = [c] breve :: Char -> String breve 'A' = "Ă" breve 'a' = "ă" breve 'E' = "Ĕ" breve 'e' = "ĕ" breve 'G' = "Ğ" breve 'g' = "ğ" breve 'I' = "Ĭ" breve 'i' = "ĭ" breve 'O' = "Ŏ" breve 'o' = "ŏ" breve 'U' = "Ŭ" breve 'u' = "ŭ" breve c = [c]