module Text.LaTeX.Character (
   toUnicodeString,
   fromUnicodeString,
   table,
   ) where

-- import Data.List.HT (multiReplace, )
import Data.Tuple.HT (mapFst, mapSnd, )
import Data.Char (isLetter, chr, ord, )
import qualified Data.Map as Map


{- |
Replace LaTeX macros for special characters
by Unicode characters in a lazy way.
-}
toUnicodeString :: String -> String
toUnicodeString =
   filter (not . flip elem "{}") .
   toUnicodeStringCore
{-
toUnicodeString =
   multiReplace table
-}

toUnicodeStringCore :: String -> String
toUnicodeStringCore "" = ""
toUnicodeStringCore (c:cs) =
   case c of
     '\\' ->
       let getArgument "" = ("", "")
           getArgument (a:argsuffix) =
              case a of
                 {-
                 this does not support nested curly braces,
                 however, I have no argument with braces in my dictionary
                 -}
                 '{' -> mapSnd (drop 1) $ break ('}'==) argsuffix
                 '\\' -> mapFst ('\\':) $ span isLetter argsuffix
                 _ -> ([a], argsuffix)
           translateInvocation = do
              (macro,(arg,rest)) <-
                 case cs of
                    [] -> Nothing
                    b:bs -> Just $
                       if elem b "'`^\"~"
                         then ([b], getArgument bs)
                         else mapSnd getArgument $
                              span isLetter cs
              code <- Map.lookup (macro,arg) toMap
              return (code, rest)
       in  case translateInvocation of
              Just (code,rest) -> code : toUnicodeStringCore rest
              Nothing -> c : toUnicodeStringCore cs
     '$' ->
        let (math, rest) = break ('$'==) cs
        in  parseMathString math ++ toUnicodeStringCore (drop 1 rest)
     _ -> c : toUnicodeStringCore cs

parseMathString :: String -> String
parseMathString "" = ""
parseMathString (c:cs) =
   if c/='\\'
     then c : parseMathString cs
     else let (ident,rest) = span isLetter cs
          in  maybe ('\\':ident) (:[])
                 (Map.lookup ident mathMap) ++
              parseMathString rest

fromUnicodeString :: String -> String
fromUnicodeString =
   concatMap (\c -> Map.findWithDefault [c] c fromMap)
--   multiReplace (map swap table)

{-# DEPRECATED table "use toUnicodeString or fromUnicodeString" #-}
table :: [(String, String)]
table =
   ("\\&",    "&") :
   ("\\~{}",  "~") :
   ("\\\"a",  "ä") :
   ("\\\"o",  "ö") :
   ("\\\"u",  "ü") :
   ("\\\"A",  "Ä") :
   ("\\\"O",  "Ö") :
   ("\\\"U",  "Ü") :
   ("\\ss{}", "ß") :
   ("\\`e",   "è") :
   ("\\'e",   "é") :
   ("\\'a",   "á") :
   ("\\'{\\i}", "í") :
   ("\\'u",   "ú") :
   ("\\'U",   "Ú") :
   ("\\o{}", "ø") :
   ("\\O{}", "Ø") :
   ("\\oe{}", "œ") :
   ("\\OE{}", "Œ") :
   ("\\ae{}", "æ") :
   ("\\AE{}", "Æ") :
   ("\\l{}", "ł") :
   ("\\L{}", "Ł") :
   ("\\c{c}", "ç") :
   ("\\c{C}", "Ç") :
   ("\\~a", "ã") :
   ("\\~A", "Ã") :
   []


fromMap :: Map.Map Char String
fromMap =
   Map.fromList
      (do (base, variants) <- accents
          (accent, code) <- variants
          return (chr code, '\\':accent:'{':base++'}':""))
   `Map.union`
   Map.fromList
      (map (\(ident, code) -> (chr code, "\\"++ident++"{}")) specialChars)
   `Map.union`
   Map.fromList
      -- curly braces around dollars assert that no $$ can occur in the output
      (map (\(ident, code) -> (chr code, "{$\\"++ident++"$}")) mathChars)
   `Map.union`
   Map.fromList
      (map (\c -> (c, '\\':c:[])) escapedChars)


toMap :: Map.Map (String, String) Char
toMap =
   Map.fromList
      (do (base, variants) <- accents
          (accent, code) <- variants
          return (([accent], base), chr code))
   `Map.union`
   Map.fromList
      (map (\(ident, code) -> ((ident, ""), chr code)) specialChars)
   `Map.union`
   Map.fromList
      (map (\c -> (('\\':c:[], ""), c)) escapedChars)


accents :: [(String, [(Char, Int)])]
accents =
   ("A", ('`', 192) : ('\'', 193) : ('^', 194) : ('~', 195) : ('"', 196) : []) :
   ("E", ('`', 200) : ('\'', 201) : ('^', 202) : ('"', 203) : []) :
   ("I", ('`', 204) : ('\'', 205) : ('^', 206) : ('"', 207) : ('~', 296) : []) :
   ("J", ('^', 308) : []) :
   ("N", ('~', 209) : []) :
   ("O", ('`', 210) : ('\'', 211) : ('^', 212) : ('~', 213) : ('"', 214) : []) :
   ("U", ('`', 217) : ('\'', 218) : ('^', 219) : ('"', 220) : []) :
   ("Y", ('"', 223) : ('\'', 221) : []) :
   ("C", ('c', 199) : ('\'', 262) : ('^', 264) : ('.', 266) : ('v', 268) : []) :
   ("S", ('c', 350) : ('\'', 346) : ('^', 348) : ('v', 352) : []) :
   ("a", ('`', 224) : ('\'', 225) : ('^', 227) : ('"', 228) : []) :
   ("e", ('`', 232) : ('\'', 233) : ('^', 234) : ('"', 235) : []) :
   ("i", ('`', 236) : ('\'', 237) : ('^', 238) : ('"', 239) : []) :
   ("\\i", ('`', 236) : ('\'', 237) : ('^', 238) : ('"', 239) : ('~', 297) : []) :
   ("\\j", ('^', 309) : []) :
   ("n", ('~', 241) : []) :
   ("o", ('`', 242) : ('\'', 243) : ('^', 244) : ('~', 245) : ('"', 246) : []) :
   ("u", ('`', 249) : ('\'', 250) : ('^', 251) : ('"', 252) : []) :
   ("y", ('"', 255) : ('\'', 253) : []) :
   ("c", ('c', 231) : ('\'', 263) : ('^', 265) : ('.', 267) : ('v', 269) : []) :
   ("s", ('c', 351) : ('\'', 347) : ('^', 349) : ('"', 223) : ('v', 353) : []) :
   []

specialChars :: [(String, Int)]
specialChars =
   ("cc", 231) :
   ("cC", 199) :
   ("aa", 229) :
   ("AA", 197) :
   ("l",  321) :
   ("L",  322) :
   ("ss", 223) :
   ("3",  223) :
   ("o",  248) :
   ("O",  216) :
   ("ae", 230) :
   ("AE", 198) :
   ("S", 167) :
   ("pounds", 163) :
   ("euro", 8364) :
   ("copyright", 169) :

   ("textbackslash", ord '\\') :
   ("textbar", ord '|') :
   ("textasciitilde", ord '~') :
   ("textasciicircum", ord '^') :
   ("textless", ord '<') :
   ("textgreater", ord '>') :
   ("textdollar", ord '$') :
   ("textexclamdown", 161) :
   ("textquestiondown", 191) :
   ("textquotedblleft", ord '"') :
   []

mathMap :: Map.Map String Char
mathMap =
   Map.fromList (map (\(name,code) -> (name, chr code)) mathChars)

mathChars :: [(String, Int)]
mathChars =
   ("Alpha",      913) :
   ("Beta",       914) :
   ("Gamma",      915) :
   ("Delta",      916) :
   ("Epsilon",    917) :
   ("Zeta",       918) :
   ("Eta",        919) :
   ("Theta",      920) :
   ("Iota",       921) :
   ("Kappa",      922) :
   ("Lambda",     923) :
   ("Mu",         924) :
   ("Nu",         925) :
   ("Xi",         926) :
   ("Omikron",    927) :
   ("Pi",         928) :
   ("Rho",        929) :
   ("Sigma",      931) :
   ("Tau",        932) :
   ("Upsilon",    933) :
   ("Phi",        934) :
   ("Chi",        935) :
   ("Psi",        936) :
   ("Omega",      937) :
   ("alpha",      945) :
   ("beta",       946) :
   ("gamma",      947) :
   ("delta",      948) :
   ("epsilon",    949) :
   ("zeta",       950) :
   ("eta",        951) :
   ("theta",      952) :
   ("iota",       953) :
   ("kappa",      954) :
   ("lambda",     955) :
   ("mu",         956) :
   ("nu",         957) :
   ("xi",         958) :
   ("omikron",    959) :
   ("pi",         960) :
   ("rho",        961) :
   ("sigma",      963) :
   ("tau",        964) :
   ("upsilon",    965) :
   ("phi",        966) :
   ("chi",        967) :
   ("psi",        968) :
   ("omega",      969) :
   []

escapedChars :: [Char]
escapedChars = "$%&_#{}"