{-# LANGUAGE DerivingVia        #-}
{-# LANGUAGE OverloadedStrings  #-}
{-# LANGUAGE StandaloneDeriving #-}
-- | Utilities to handle entity references.
module Data.XML.Parser.Low.Entity
  ( EntityDecoder(..)
  , decodePredefinedEntities
  , decodeHtmlEntities
  ) where

import           Control.Applicative
import           Control.Arrow           (Kleisli (..), (>>>))
import           Control.Monad
import           Data.Function
import           Data.Map                (Map)
import qualified Data.Map                as Map
import           Data.Monoid
import           Data.Text               (Text)
import qualified Data.Text               as Text
import           Text.Parser.Char
import           Text.Parser.Combinators


-- | A function that describes how to expand entity references.
newtype EntityDecoder = EntityDecoder { EntityDecoder -> Text -> Maybe Text
runEntityDecoder :: Text -> Maybe Text }

-- | Can be combined with '(<>)' to try multiple decoders, from left to right.
deriving via (Alt (WrappedArrow (Kleisli Maybe) Text) Text) instance Semigroup EntityDecoder
deriving via (Alt (WrappedArrow (Kleisli Maybe) Text) Text) instance Monoid EntityDecoder


-- | <https://www.w3.org/TR/REC-xml/#sec-predefined-ent>
decodePredefinedEntities :: EntityDecoder
decodePredefinedEntities :: EntityDecoder
decodePredefinedEntities = (Text -> Maybe Text) -> EntityDecoder
EntityDecoder ((Text -> Maybe Text) -> EntityDecoder)
-> (Text -> Maybe Text) -> EntityDecoder
forall a b. (a -> b) -> a -> b
$ \Text
name ->
  Text -> Map Text Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Text
name Map Text Text
predefinedEntities Maybe Text -> (Maybe Text -> Maybe Text) -> Maybe Text
forall a b. a -> (a -> b) -> b
& Maybe Text -> (Text -> Maybe Text) -> Maybe Text -> Maybe Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Maybe Text
forall (f :: * -> *) a. Alternative f => f a
empty Text -> Maybe Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure

predefinedEntities :: Map Text Text
predefinedEntities :: Map Text Text
predefinedEntities = [(Text, Text)] -> Map Text Text
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList
  [ (Text
"lt", Text
"<")
  , (Text
"gt", Text
">")
  , (Text
"amp", Text
"&")
  , (Text
"quot", Text
"\"")
  , (Text
"apos", Text
"'")
  ]

-- | <https://www.w3.org/TR/xhtml1/dtds.html#h-A2>
decodeHtmlEntities :: EntityDecoder
decodeHtmlEntities :: EntityDecoder
decodeHtmlEntities = (Text -> Maybe Text) -> EntityDecoder
EntityDecoder ((Text -> Maybe Text) -> EntityDecoder)
-> (Text -> Maybe Text) -> EntityDecoder
forall a b. (a -> b) -> a -> b
$ \Text
name ->
  Text -> Map Text Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Text
name Map Text Text
htmlEntities Maybe Text -> (Maybe Text -> Maybe Text) -> Maybe Text
forall a b. a -> (a -> b) -> b
& Maybe Text -> (Text -> Maybe Text) -> Maybe Text -> Maybe Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Maybe Text
forall (f :: * -> *) a. Alternative f => f a
empty Text -> Maybe Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure

htmlEntities :: Map Text Text
htmlEntities :: Map Text Text
htmlEntities = [(Text, Text)] -> Map Text Text
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList
  [ (Text
"nbsp", Text
"\160")
  , (Text
"iexcl", Text
"\161")
  , (Text
"cent", Text
"\162")
  , (Text
"pound", Text
"\163")
  , (Text
"curren", Text
"\164")
  , (Text
"yen", Text
"\165")
  , (Text
"brvbar", Text
"\166")
  , (Text
"sect", Text
"\167")
  , (Text
"uml", Text
"\168")
  , (Text
"copy", Text
"\169")
  , (Text
"ordf", Text
"\170")
  , (Text
"laquo", Text
"\171")
  , (Text
"not", Text
"\172")
  , (Text
"shy", Text
"\173")
  , (Text
"reg", Text
"\174")
  , (Text
"macr", Text
"\175")
  , (Text
"deg", Text
"\176")
  , (Text
"plusmn", Text
"\177")
  , (Text
"sup2", Text
"\178")
  , (Text
"sup3", Text
"\179")
  , (Text
"acute", Text
"\180")
  , (Text
"micro", Text
"\181")
  , (Text
"para", Text
"\182")
  , (Text
"middot", Text
"\183")
  , (Text
"cedil", Text
"\184")
  , (Text
"sup1", Text
"\185")
  , (Text
"ordm", Text
"\186")
  , (Text
"raquo", Text
"\187")
  , (Text
"frac14", Text
"\188")
  , (Text
"frac12", Text
"\189")
  , (Text
"frac34", Text
"\190")
  , (Text
"iquest", Text
"\191")
  , (Text
"Agrave", Text
"\192")
  , (Text
"Aacute", Text
"\193")
  , (Text
"Acirc", Text
"\194")
  , (Text
"Atilde", Text
"\195")
  , (Text
"Auml", Text
"\196")
  , (Text
"Aring", Text
"\197")
  , (Text
"AElig", Text
"\198")
  , (Text
"Ccedil", Text
"\199")
  , (Text
"Egrave", Text
"\200")
  , (Text
"Eacute", Text
"\201")
  , (Text
"Ecirc", Text
"\202")
  , (Text
"Euml", Text
"\203")
  , (Text
"Igrave", Text
"\204")
  , (Text
"Iacute", Text
"\205")
  , (Text
"Icirc", Text
"\206")
  , (Text
"Iuml", Text
"\207")
  , (Text
"ETH", Text
"\208")
  , (Text
"Ntilde", Text
"\209")
  , (Text
"Ograve", Text
"\210")
  , (Text
"Oacute", Text
"\211")
  , (Text
"Ocirc", Text
"\212")
  , (Text
"Otilde", Text
"\213")
  , (Text
"Ouml", Text
"\214")
  , (Text
"times", Text
"\215")
  , (Text
"Oslash", Text
"\216")
  , (Text
"Ugrave", Text
"\217")
  , (Text
"Uacute", Text
"\218")
  , (Text
"Ucirc", Text
"\219")
  , (Text
"Uuml", Text
"\220")
  , (Text
"Yacute", Text
"\221")
  , (Text
"THORN", Text
"\222")
  , (Text
"szlig", Text
"\223")
  , (Text
"agrave", Text
"\224")
  , (Text
"aacute", Text
"\225")
  , (Text
"acirc", Text
"\226")
  , (Text
"atilde", Text
"\227")
  , (Text
"auml", Text
"\228")
  , (Text
"aring", Text
"\229")
  , (Text
"aelig", Text
"\230")
  , (Text
"ccedil", Text
"\231")
  , (Text
"egrave", Text
"\232")
  , (Text
"eacute", Text
"\233")
  , (Text
"ecirc", Text
"\234")
  , (Text
"euml", Text
"\235")
  , (Text
"igrave", Text
"\236")
  , (Text
"iacute", Text
"\237")
  , (Text
"icirc", Text
"\238")
  , (Text
"iuml", Text
"\239")
  , (Text
"eth", Text
"\240")
  , (Text
"ntilde", Text
"\241")
  , (Text
"ograve", Text
"\242")
  , (Text
"oacute", Text
"\243")
  , (Text
"ocirc", Text
"\244")
  , (Text
"otilde", Text
"\245")
  , (Text
"ouml", Text
"\246")
  , (Text
"divide", Text
"\247")
  , (Text
"oslash", Text
"\248")
  , (Text
"ugrave", Text
"\249")
  , (Text
"uacute", Text
"\250")
  , (Text
"ucirc", Text
"\251")
  , (Text
"uuml", Text
"\252")
  , (Text
"yacute", Text
"\253")
  , (Text
"thorn", Text
"\254")
  , (Text
"yuml", Text
"\255")
  , (Text
"OElig", Text
"\338")
  , (Text
"oelig", Text
"\339")
  , (Text
"Scaron", Text
"\352")
  , (Text
"scaron", Text
"\353")
  , (Text
"Yuml", Text
"\376")
  , (Text
"fnof", Text
"\402")
  , (Text
"circ", Text
"\710")
  , (Text
"tilde", Text
"\732")
  , (Text
"Alpha", Text
"\913")
  , (Text
"Beta", Text
"\914")
  , (Text
"Gamma", Text
"\915")
  , (Text
"Delta", Text
"\916")
  , (Text
"Epsilon", Text
"\917")
  , (Text
"Zeta", Text
"\918")
  , (Text
"Eta", Text
"\919")
  , (Text
"Theta", Text
"\920")
  , (Text
"Iota", Text
"\921")
  , (Text
"Kappa", Text
"\922")
  , (Text
"Lambda", Text
"\923")
  , (Text
"Mu", Text
"\924")
  , (Text
"Nu", Text
"\925")
  , (Text
"Xi", Text
"\926")
  , (Text
"Omicron", Text
"\927")
  , (Text
"Pi", Text
"\928")
  , (Text
"Rho", Text
"\929")
  , (Text
"Sigma", Text
"\931")
  , (Text
"Tau", Text
"\932")
  , (Text
"Upsilon", Text
"\933")
  , (Text
"Phi", Text
"\934")
  , (Text
"Chi", Text
"\935")
  , (Text
"Psi", Text
"\936")
  , (Text
"Omega", Text
"\937")
  , (Text
"alpha", Text
"\945")
  , (Text
"beta", Text
"\946")
  , (Text
"gamma", Text
"\947")
  , (Text
"delta", Text
"\948")
  , (Text
"epsilon", Text
"\949")
  , (Text
"zeta", Text
"\950")
  , (Text
"eta", Text
"\951")
  , (Text
"theta", Text
"\952")
  , (Text
"iota", Text
"\953")
  , (Text
"kappa", Text
"\954")
  , (Text
"lambda", Text
"\955")
  , (Text
"mu", Text
"\956")
  , (Text
"nu", Text
"\957")
  , (Text
"xi", Text
"\958")
  , (Text
"omicron", Text
"\959")
  , (Text
"pi", Text
"\960")
  , (Text
"rho", Text
"\961")
  , (Text
"sigmaf", Text
"\962")
  , (Text
"sigma", Text
"\963")
  , (Text
"tau", Text
"\964")
  , (Text
"upsilon", Text
"\965")
  , (Text
"phi", Text
"\966")
  , (Text
"chi", Text
"\967")
  , (Text
"psi", Text
"\968")
  , (Text
"omega", Text
"\969")
  , (Text
"thetasym", Text
"\977")
  , (Text
"upsih", Text
"\978")
  , (Text
"piv", Text
"\982")
  , (Text
"ensp", Text
"\8194")
  , (Text
"emsp", Text
"\8195")
  , (Text
"thinsp", Text
"\8201")
  , (Text
"zwnj", Text
"\8204")
  , (Text
"zwj", Text
"\8205")
  , (Text
"lrm", Text
"\8206")
  , (Text
"rlm", Text
"\8207")
  , (Text
"ndash", Text
"\8211")
  , (Text
"mdash", Text
"\8212")
  , (Text
"lsquo", Text
"\8216")
  , (Text
"rsquo", Text
"\8217")
  , (Text
"sbquo", Text
"\8218")
  , (Text
"ldquo", Text
"\8220")
  , (Text
"rdquo", Text
"\8221")
  , (Text
"bdquo", Text
"\8222")
  , (Text
"dagger", Text
"\8224")
  , (Text
"Dagger", Text
"\8225")
  , (Text
"bull", Text
"\8226")
  , (Text
"hellip", Text
"\8230")
  , (Text
"permil", Text
"\8240")
  , (Text
"prime", Text
"\8242")
  , (Text
"Prime", Text
"\8243")
  , (Text
"lsaquo", Text
"\8249")
  , (Text
"rsaquo", Text
"\8250")
  , (Text
"oline", Text
"\8254")
  , (Text
"frasl", Text
"\8260")
  , (Text
"euro", Text
"\8364")
  , (Text
"image", Text
"\8465")
  , (Text
"weierp", Text
"\8472")
  , (Text
"real", Text
"\8476")
  , (Text
"trade", Text
"\8482")
  , (Text
"alefsym", Text
"\8501")
  , (Text
"larr", Text
"\8592")
  , (Text
"uarr", Text
"\8593")
  , (Text
"rarr", Text
"\8594")
  , (Text
"darr", Text
"\8595")
  , (Text
"harr", Text
"\8596")
  , (Text
"crarr", Text
"\8629")
  , (Text
"lArr", Text
"\8656")
  , (Text
"uArr", Text
"\8657")
  , (Text
"rArr", Text
"\8658")
  , (Text
"dArr", Text
"\8659")
  , (Text
"hArr", Text
"\8660")
  , (Text
"forall", Text
"\8704")
  , (Text
"part", Text
"\8706")
  , (Text
"exist", Text
"\8707")
  , (Text
"empty", Text
"\8709")
  , (Text
"nabla", Text
"\8711")
  , (Text
"isin", Text
"\8712")
  , (Text
"notin", Text
"\8713")
  , (Text
"ni", Text
"\8715")
  , (Text
"prod", Text
"\8719")
  , (Text
"sum", Text
"\8721")
  , (Text
"minus", Text
"\8722")
  , (Text
"lowast", Text
"\8727")
  , (Text
"radic", Text
"\8730")
  , (Text
"prop", Text
"\8733")
  , (Text
"infin", Text
"\8734")
  , (Text
"ang", Text
"\8736")
  , (Text
"and", Text
"\8743")
  , (Text
"or", Text
"\8744")
  , (Text
"cap", Text
"\8745")
  , (Text
"cup", Text
"\8746")
  , (Text
"int", Text
"\8747")
  , (Text
"there4", Text
"\8756")
  , (Text
"sim", Text
"\8764")
  , (Text
"cong", Text
"\8773")
  , (Text
"asymp", Text
"\8776")
  , (Text
"ne", Text
"\8800")
  , (Text
"equiv", Text
"\8801")
  , (Text
"le", Text
"\8804")
  , (Text
"ge", Text
"\8805")
  , (Text
"sub", Text
"\8834")
  , (Text
"sup", Text
"\8835")
  , (Text
"nsub", Text
"\8836")
  , (Text
"sube", Text
"\8838")
  , (Text
"supe", Text
"\8839")
  , (Text
"oplus", Text
"\8853")
  , (Text
"otimes", Text
"\8855")
  , (Text
"perp", Text
"\8869")
  , (Text
"sdot", Text
"\8901")
  , (Text
"lceil", Text
"\8968")
  , (Text
"rceil", Text
"\8969")
  , (Text
"lfloor", Text
"\8970")
  , (Text
"rfloor", Text
"\8971")
  , (Text
"lang", Text
"\9001")
  , (Text
"rang", Text
"\9002")
  , (Text
"loz", Text
"\9674")
  , (Text
"spades", Text
"\9824")
  , (Text
"clubs", Text
"\9827")
  , (Text
"hearts", Text
"\9829")
  , (Text
"diams", Text
"\9830")
  ]