{-# OPTIONS_GHC -fno-warn-orphans #-} module Language.Eq.InputParser.MathML ( mathMlToEqLang , mathMlToEqLang' ) where import Control.Applicative import Language.Eq.Algorithm.Utils import qualified Language.Eq.UnicodeSymbols as Uni import Text.XML.HaXml.Parse import Text.XML.HaXml.Types -- | Type used to reduce the complexity of XML -- tree and favor an easier pattern matching data ReducedXmlTree = Xop String | Xsymb String | Xnum String | Xsqrt ReducedXmlTree | Xfrac ReducedXmlTree ReducedXmlTree | Xsup ReducedXmlTree ReducedXmlTree | XunderOver ReducedXmlTree ReducedXmlTree ReducedXmlTree | Xfenced String String ReducedXmlTree | Xrow [ReducedXmlTree] | Xtable [[ReducedXmlTree]] deriving (Show) mathMlToEqLang' :: String -> String mathMlToEqLang' = either id id . mathMlToEqLang -- | Input XML code encoded in a string -- output a string in Eq Language, ready to -- be parsed by the usual meanings. mathMlToEqLang :: String -> Either String String mathMlToEqLang text = xmlParse' "mathml" text >>= simplifyXml >>= toProgramString toProgramString :: ReducedXmlTree -> Either String String toProgramString tree = (\s -> s "") <$> translate tree simplifyXml :: Document a -> Either String ReducedXmlTree simplifyXml (Document a b (Elem (N "m:math") c lst) l) = simplifyXml (Document a b (Elem (N "math") c lst) l) simplifyXml (Document a b (Elem (QN _ "math") c lst) l) = simplifyXml (Document a b (Elem (N "math") c lst) l) simplifyXml (Document _ _ (Elem (N "math") _ lst) _) = Xrow <$> eitherMap (map simplifyContent lst) simplifyXml _ = error "The xml document has the wrong format" strOfContent :: Content a -> String strOfContent (CString _ txt _) = txt strOfContent _ = error "Xml string waited at this point" elemOfContent :: Content a -> Element a elemOfContent (CElem e _) = e elemOfContent _ = error "Xml element waited at this point" -- | Helper to simplify content simplifyContent :: Content a -> Either String ReducedXmlTree simplifyContent = simplify . elemOfContent eitherMap :: [Either a b] -> Either a [b] eitherMap [] = Right [] eitherMap lst = foldr mapper (Right []) lst where mapper (Left a) _ = Left a mapper _ (Left a) = Left a mapper (Right v) (Right list) = Right (v:list) -- | Really transform an XML file to a simplified tree -- to make a better pattern matching simplify :: Element a -> Either String ReducedXmlTree -- This rule is for mathML generated by microsoft math input -- panel whom got the bad habit of prefixing it by 'm:' simplify (Elem (QN _ x) att cont) = simplify (Elem (N x) att cont) simplify (Elem (N ('m':':':x)) att cont) = simplify (Elem (N x) att cont) simplify (Elem (N "mi") _ [c]) = Right . Xsymb $ strOfContent c simplify (Elem (N "mn") _ [c]) = Right . Xnum $ strOfContent c simplify (Elem (N "mo") _ [c]) = Right . Xop $ strOfContent c simplify (Elem (N "mrow") _ lst) = Xrow <$> eitherMap (map simplifyContent lst) simplify (Elem (N "msqrt") _ lst) = Xsqrt . Xrow <$> eitherMap (map simplifyContent lst) simplify (Elem (N "mfrac") _ [a,b]) = Xfrac <$> simplifyContent a <*> simplifyContent b simplify (Elem (N "msup") _ [a,b]) = Xsup <$> simplifyContent a <*> simplifyContent b simplify (Elem (N "munderover") _ [a,b,c]) = XunderOver <$> simplifyContent a <*> simplifyContent b <*> simplifyContent c simplify (Elem (N "mtable") _ lst) = Xtable <$> lineList where lineList = eitherMap $ map (unrow . elemOfContent) lst unrow (Elem (QN _ n) a b) = unrow (Elem (N n) a b) unrow (Elem (N ('m':':':n)) a b) = unrow (Elem (N n) a b) unrow (Elem (N "mtr") _ cells) = eitherMap $ map (uncell . elemOfContent) cells unrow _ = Left "Ill formed MathML Matrix" uncell (Elem (QN _ n) a b) = uncell (Elem (N n) a b) uncell (Elem (N ('m':':':n)) a b) = uncell (Elem (N n) a b) uncell (Elem (N "mtd") _ cellList) = Xrow <$> eitherMap (map simplifyContent cellList) uncell _ = Left "Ill format MathML Matrix cell" simplify (Elem (N "mfenced") [ (N "open", AttValue [Left openChar]) , (N "close", AttValue [Left closeChar]) ] lst) = Xfenced openChar closeChar . Xrow <$> eitherMap (map simplifyContent lst) simplify (Elem (N "mfenced") attrs _lst) = Left $ show attrs simplify (Elem (N elemName) _ _) = Left $ "Unknown MathMl element : " ++ elemName str :: String -> String -> String str = (++) char :: Char -> String -> String char = (:) uniSymbolTranslation :: [(Int, String)] uniSymbolTranslation = [ (Uni.pi, "pi") , (Uni.infinity, "infinite") ] unicodeTranslation :: [(Int, String)] unicodeTranslation = [ (Uni.logicalAnd, "&&") , (Uni.logicalOr, "||") , (Uni.logicalNot, "not") , (Uni.identicalTo, "==") , (Uni.lessThanOrEqualTo, "<=") , (Uni.greaterThanOrEqualTo, ">=") , (Uni.multiplicationSign , "*") ] vardeclFinder :: [ReducedXmlTree] -> Maybe ([ReducedXmlTree],[ReducedXmlTree], String) vardeclFinder = declFind [] where declFind _ [] = Nothing declFind acc (Xop [op]:next) | fromEnum op == Uni.doubleStruckItalicSmalld = obtainVar acc next declFind acc (Xsymb ['d']:next) = obtainVar acc next declFind acc (Xsymb ['d', var]:next) = Just (reverse acc, next, [var]) declFind acc (Xrow lst:next) = declFind acc (lst ++ next) declFind acc (x:xs) = declFind (x:acc) xs obtainVar _ [] = Nothing obtainVar acc (Xsymb var:next) = Just (reverse acc, next, var) obtainVar acc (Xrow lst:next) = obtainVar acc (lst ++ next) obtainVar _ _ = Nothing -- | Real transformation =) translate :: ReducedXmlTree -> Either String ShowS translate (Xop [s]) = case lookup (fromEnum s) unicodeTranslation of Nothing -> Right $ char s Just v -> Right $ str v translate (Xsymb [s]) = case lookup (fromEnum s) uniSymbolTranslation of Nothing -> Right $ char s Just v -> Right $ str v -- Special case to handle matrix translate (Xfenced op en body@(Xtable _)) | (op == "(" && en == ")") || (op == "[" && en == "]") = translate body translate (Xfenced op en (Xrow [body@(Xtable _)])) | (op == "(" && en == ")") || (op == "[" && en == "]") = translate body translate (Xfenced "(" ")" body) = (\sub -> char '(' . sub . char ')') <$> translate body translate (Xfenced "|" "|" body) = (\sub -> str "abs(" . sub . char ')') <$> translate body translate (Xfenced str1 str2 body) = (\sub -> shows body . str str1 . sub . str str2) <$> translate body translate (Xrow ((XunderOver (Xop [bigop]) lowerBound upperBound):rs)) | fromEnum bigop == Uni.sum = (\ini end what -> str "sum(" . ini . char ',' . end . char ',' . what . char ')') <$> translate lowerBound <*> translate upperBound <*> translate (Xrow rs) | fromEnum bigop == Uni.product = (\ini end what -> str "product(" . ini . char ',' . end . char ',' . what . char ')') <$> translate lowerBound <*> translate upperBound <*> translate (Xrow rs) | fromEnum bigop == Uni.integral = case vardeclFinder rs of Nothing -> Left "Invalid integral definition, cannot be handled" Just (acc,rest,var) -> (\lower upper what rest' -> str "integrate(" . lower . char ',' . upper . char ',' . what . char ',' . str var . char ')' . rest') <$> translate lowerBound <*> translate upperBound <*> translate (Xrow acc) <*> translate (Xrow rest) | otherwise = Left "Unrecognized big operator" translate (XunderOver _ _ _) = Left "Unrecognized operator" translate (Xop s) = Right $ str s translate (Xsymb s) = Right $ str s translate (Xnum s) = Right $ str s translate (Xsqrt subTree) = (\sub -> str "sqrt(" . sub . char ')') <$> translate subTree translate (Xfrac a b) = (\a' b' -> char '(' . a' . str ") / (" . b' . char ')') <$> translate a <*> translate b translate (Xsup a b) = (\a' b' -> char '(' . a' . str ") ^ (" . b' . char ')') <$> translate a <*> translate b translate (Xrow []) = Right id translate (Xrow lst) = concatS <$> eitherMap (map translate lst) translate (Xtable []) = Left "Wrong table format" translate (Xtable lst) = (\elems -> str "matrix( " . shows lineCount . char ',' . shows columncount . char ',' . interspereseS (char ',') elems . char ')') <$> (eitherMap . map translate $ concat lst) where lineCount = length lst columncount = length $ head lst