{-# LANGUAGE LambdaCase        #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns      #-}
module Text.Pandoc.XML ( escapeCharForXML,
                         escapeStringForXML,
                         inTags,
                         selfClosingTag,
                         inTagsSimple,
                         inTagsIndented,
                         toEntities,
                         toHtml5Entities,
                         fromEntities,
                         html4Attributes,
                         html5Attributes,
                         rdfaAttributes ) where
import Data.Char (isAscii, isSpace, ord)
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup.Entity (lookupEntity, htmlEntities)
import Text.DocLayout
import Text.Printf (printf)
import qualified Data.Map as M
import Data.String
import qualified Data.Set as Set
escapeCharForXML :: Char -> Text
escapeCharForXML :: Char -> Text
escapeCharForXML Char
x = case Char
x of
                       Char
'&' -> Text
"&"
                       Char
'<' -> Text
"<"
                       Char
'>' -> Text
">"
                       Char
'"' -> Text
"""
                       Char
c   -> Char -> Text
T.singleton Char
c
escapeStringForXML :: Text -> Text
escapeStringForXML :: Text -> Text
escapeStringForXML = (Char -> Text) -> Text -> Text
T.concatMap Char -> Text
escapeCharForXML (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
T.filter Char -> Bool
isLegalXMLChar
  where isLegalXMLChar :: Char -> Bool
isLegalXMLChar Char
c = Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\t' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\r' Bool -> Bool -> Bool
||
                           (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x20' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FF') Bool -> Bool -> Bool
||
                           (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xE000' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFD') Bool -> Bool -> Bool
||
                           (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x10000' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x10FFFF')
  
escapeNls :: Text -> Text
escapeNls :: Text -> Text
escapeNls = (Char -> Text) -> Text -> Text
T.concatMap ((Char -> Text) -> Text -> Text) -> (Char -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ \case
  Char
'\n' -> Text
"
"
  Char
c    -> Char -> Text
T.singleton Char
c
attributeList :: (HasChars a, IsString a) => [(Text, Text)] -> Doc a
attributeList :: [(Text, Text)] -> Doc a
attributeList = [Doc a] -> Doc a
forall a. [Doc a] -> Doc a
hcat ([Doc a] -> Doc a)
-> ([(Text, Text)] -> [Doc a]) -> [(Text, Text)] -> Doc a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Text, Text) -> Doc a) -> [(Text, Text)] -> [Doc a]
forall a b. (a -> b) -> [a] -> [b]
map
  (\(Text
a, Text
b) -> String -> Doc a
forall a. HasChars a => String -> Doc a
text (Text -> String
T.unpack (Text -> String) -> Text -> String
forall a b. (a -> b) -> a -> b
$ Text
" " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> Text
escapeStringForXML Text
a Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"=\"" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<>
  Text -> Text
escapeNls (Text -> Text
escapeStringForXML Text
b) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"\""))
inTags :: (HasChars a, IsString a)
      => Bool -> Text -> [(Text, Text)] -> Doc a -> Doc a
inTags :: Bool -> Text -> [(Text, Text)] -> Doc a -> Doc a
inTags Bool
isIndented Text
tagType [(Text, Text)]
attribs Doc a
contents =
  let openTag :: Doc a
openTag = Char -> Doc a
forall a. HasChars a => Char -> Doc a
char Char
'<' Doc a -> Doc a -> Doc a
forall a. Semigroup a => a -> a -> a
<> String -> Doc a
forall a. HasChars a => String -> Doc a
text (Text -> String
T.unpack Text
tagType) Doc a -> Doc a -> Doc a
forall a. Semigroup a => a -> a -> a
<> [(Text, Text)] -> Doc a
forall a. (HasChars a, IsString a) => [(Text, Text)] -> Doc a
attributeList [(Text, Text)]
attribs Doc a -> Doc a -> Doc a
forall a. Semigroup a => a -> a -> a
<>
                Char -> Doc a
forall a. HasChars a => Char -> Doc a
char Char
'>'
      closeTag :: Doc a
closeTag  = String -> Doc a
forall a. HasChars a => String -> Doc a
text String
"</" Doc a -> Doc a -> Doc a
forall a. Semigroup a => a -> a -> a
<> String -> Doc a
forall a. HasChars a => String -> Doc a
text (Text -> String
T.unpack Text
tagType) Doc a -> Doc a -> Doc a
forall a. Semigroup a => a -> a -> a
<> Char -> Doc a
forall a. HasChars a => Char -> Doc a
char Char
'>'
  in  if Bool
isIndented
         then Doc a
openTag Doc a -> Doc a -> Doc a
forall a. Doc a -> Doc a -> Doc a
$$ Int -> Doc a -> Doc a
forall a. IsString a => Int -> Doc a -> Doc a
nest Int
2 Doc a
contents Doc a -> Doc a -> Doc a
forall a. Doc a -> Doc a -> Doc a
$$ Doc a
closeTag
         else Doc a
openTag Doc a -> Doc a -> Doc a
forall a. Semigroup a => a -> a -> a
<> Doc a
contents Doc a -> Doc a -> Doc a
forall a. Semigroup a => a -> a -> a
<> Doc a
closeTag
selfClosingTag :: (HasChars a, IsString a)
               => Text -> [(Text, Text)] -> Doc a
selfClosingTag :: Text -> [(Text, Text)] -> Doc a
selfClosingTag Text
tagType [(Text, Text)]
attribs =
  Char -> Doc a
forall a. HasChars a => Char -> Doc a
char Char
'<' Doc a -> Doc a -> Doc a
forall a. Semigroup a => a -> a -> a
<> String -> Doc a
forall a. HasChars a => String -> Doc a
text (Text -> String
T.unpack Text
tagType) Doc a -> Doc a -> Doc a
forall a. Semigroup a => a -> a -> a
<> [(Text, Text)] -> Doc a
forall a. (HasChars a, IsString a) => [(Text, Text)] -> Doc a
attributeList [(Text, Text)]
attribs Doc a -> Doc a -> Doc a
forall a. Semigroup a => a -> a -> a
<> String -> Doc a
forall a. HasChars a => String -> Doc a
text String
" />"
inTagsSimple :: (HasChars a, IsString a)
             => Text -> Doc a -> Doc a
inTagsSimple :: Text -> Doc a -> Doc a
inTagsSimple Text
tagType = Bool -> Text -> [(Text, Text)] -> Doc a -> Doc a
forall a.
(HasChars a, IsString a) =>
Bool -> Text -> [(Text, Text)] -> Doc a -> Doc a
inTags Bool
False Text
tagType []
inTagsIndented :: (HasChars a, IsString a)
               => Text -> Doc a -> Doc a
inTagsIndented :: Text -> Doc a -> Doc a
inTagsIndented Text
tagType = Bool -> Text -> [(Text, Text)] -> Doc a -> Doc a
forall a.
(HasChars a, IsString a) =>
Bool -> Text -> [(Text, Text)] -> Doc a -> Doc a
inTags Bool
True Text
tagType []
toEntities :: Text -> Text
toEntities :: Text -> Text
toEntities = (Char -> Text) -> Text -> Text
T.concatMap Char -> Text
go
  where go :: Char -> Text
go Char
c | Char -> Bool
isAscii Char
c = Char -> Text
T.singleton Char
c
             | Bool
otherwise = String -> Text
T.pack (String -> Int -> String
forall r. PrintfType r => String -> r
printf String
"&#x%X;" (Char -> Int
ord Char
c))
toHtml5Entities :: Text -> Text
toHtml5Entities :: Text -> Text
toHtml5Entities = (Char -> Text) -> Text -> Text
T.concatMap Char -> Text
go
  where go :: Char -> Text
go Char
c | Char -> Bool
isAscii Char
c = Char -> Text
T.singleton Char
c
             | Bool
otherwise =
                 case Char -> Map Char Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Char
c Map Char Text
html5EntityMap of
                   Just Text
t  -> Char -> Text
T.singleton Char
'&' Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
t Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Char -> Text
T.singleton Char
';'
                   Maybe Text
Nothing -> String -> Text
T.pack (String
"&#" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show (Char -> Int
ord Char
c) String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
";")
html5EntityMap :: M.Map Char Text
html5EntityMap :: Map Char Text
html5EntityMap = ((String, String) -> Map Char Text -> Map Char Text)
-> Map Char Text -> [(String, String)] -> Map Char Text
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (String, String) -> Map Char Text -> Map Char Text
forall k. Ord k => (String, [k]) -> Map k Text -> Map k Text
go Map Char Text
forall a. Monoid a => a
mempty [(String, String)]
htmlEntities
  where go :: (String, [k]) -> Map k Text -> Map k Text
go (String
ent, [k]
s) Map k Text
entmap =
         case [k]
s of
           [k
c] -> (Text -> Text -> Text) -> k -> Text -> Map k Text -> Map k Text
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
M.insertWith
                   (\Text
new Text
old -> if Text -> Int
T.length Text
new Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Text -> Int
T.length Text
old
                                   then Text
old
                                   else Text
new) k
c Text
ent' Map k Text
entmap
             where ent' :: Text
ent' = (Char -> Bool) -> Text -> Text
T.takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
';') (String -> Text
T.pack String
ent)
           [k]
_   -> Map k Text
entmap
fromEntities :: Text -> Text
fromEntities :: Text -> Text
fromEntities = String -> Text
T.pack (String -> Text) -> (Text -> String) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
fromEntities'
fromEntities' :: Text -> String
fromEntities' :: Text -> String
fromEntities' (Text -> Maybe (Char, Text)
T.uncons -> Just (Char
'&', Text
xs)) =
  case String -> Maybe String
lookupEntity (String -> Maybe String) -> String -> Maybe String
forall a b. (a -> b) -> a -> b
$ Text -> String
T.unpack Text
ent' of
        Just String
c  -> String
c String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Text -> String
fromEntities' Text
rest
        Maybe String
Nothing -> String
"&" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Text -> String
fromEntities' Text
xs
    where (Text
ent, Text
rest) = case (Char -> Bool) -> Text -> (Text, Text)
T.break (\Char
c -> Char -> Bool
isSpace Char
c Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
';') Text
xs of
                          (Text
zs,Text -> Maybe (Char, Text)
T.uncons -> Just (Char
';',Text
ys)) -> (Text
zs,Text
ys)
                          (Text
zs, Text
ys) -> (Text
zs,Text
ys)
          ent' :: Text
ent'
            | Just Text
ys <- Text -> Text -> Maybe Text
T.stripPrefix Text
"#X" Text
ent = Text
"#x" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
ys  
            | Just (Char
'#', Text
_) <- Text -> Maybe (Char, Text)
T.uncons Text
ent     = Text
ent
            | Bool
otherwise                         = Text
ent Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
";"
fromEntities' Text
t = case Text -> Maybe (Char, Text)
T.uncons Text
t of
  Just (Char
x, Text
xs) -> Char
x Char -> String -> String
forall a. a -> [a] -> [a]
: Text -> String
fromEntities' Text
xs
  Maybe (Char, Text)
Nothing      -> String
""
html5Attributes :: Set.Set Text
html5Attributes :: Set Text
html5Attributes = [Text] -> Set Text
forall a. Ord a => [a] -> Set a
Set.fromList
  [ Text
"abbr"
  , Text
"accept"
  , Text
"accept-charset"
  , Text
"accesskey"
  , Text
"action"
  , Text
"allow"
  , Text
"allowfullscreen"
  , Text
"allowpaymentrequest"
  , Text
"allowusermedia"
  , Text
"alt"
  , Text
"as"
  , Text
"async"
  , Text
"autocapitalize"
  , Text
"autocomplete"
  , Text
"autofocus"
  , Text
"autoplay"
  , Text
"charset"
  , Text
"checked"
  , Text
"cite"
  , Text
"class"
  , Text
"color"
  , Text
"cols"
  , Text
"colspan"
  , Text
"content"
  , Text
"contenteditable"
  , Text
"controls"
  , Text
"coords"
  , Text
"crossorigin"
  , Text
"data"
  , Text
"datetime"
  , Text
"decoding"
  , Text
"default"
  , Text
"defer"
  , Text
"dir"
  , Text
"dirname"
  , Text
"disabled"
  , Text
"download"
  , Text
"draggable"
  , Text
"enctype"
  , Text
"enterkeyhint"
  , Text
"for"
  , Text
"form"
  , Text
"formaction"
  , Text
"formenctype"
  , Text
"formmethod"
  , Text
"formnovalidate"
  , Text
"formtarget"
  , Text
"headers"
  , Text
"height"
  , Text
"hidden"
  , Text
"high"
  , Text
"href"
  , Text
"hreflang"
  , Text
"http-equiv"
  , Text
"id"
  , Text
"imagesizes"
  , Text
"imagesrcset"
  , Text
"inputmode"
  , Text
"integrity"
  , Text
"is"
  , Text
"ismap"
  , Text
"itemid"
  , Text
"itemprop"
  , Text
"itemref"
  , Text
"itemscope"
  , Text
"itemtype"
  , Text
"kind"
  , Text
"label"
  , Text
"lang"
  , Text
"list"
  , Text
"loading"
  , Text
"loop"
  , Text
"low"
  , Text
"manifest"
  , Text
"max"
  , Text
"maxlength"
  , Text
"media"
  , Text
"method"
  , Text
"min"
  , Text
"minlength"
  , Text
"multiple"
  , Text
"muted"
  , Text
"name"
  , Text
"nomodule"
  , Text
"nonce"
  , Text
"novalidate"
  , Text
"onabort"
  , Text
"onafterprint"
  , Text
"onauxclick"
  , Text
"onbeforeprint"
  , Text
"onbeforeunload"
  , Text
"onblur"
  , Text
"oncancel"
  , Text
"oncanplay"
  , Text
"oncanplaythrough"
  , Text
"onchange"
  , Text
"onclick"
  , Text
"onclose"
  , Text
"oncontextmenu"
  , Text
"oncopy"
  , Text
"oncuechange"
  , Text
"oncut"
  , Text
"ondblclick"
  , Text
"ondrag"
  , Text
"ondragend"
  , Text
"ondragenter"
  , Text
"ondragexit"
  , Text
"ondragleave"
  , Text
"ondragover"
  , Text
"ondragstart"
  , Text
"ondrop"
  , Text
"ondurationchange"
  , Text
"onemptied"
  , Text
"onended"
  , Text
"onerror"
  , Text
"onfocus"
  , Text
"onhashchange"
  , Text
"oninput"
  , Text
"oninvalid"
  , Text
"onkeydown"
  , Text
"onkeypress"
  , Text
"onkeyup"
  , Text
"onlanguagechange"
  , Text
"onload"
  , Text
"onloadeddata"
  , Text
"onloadedmetadata"
  , Text
"onloadend"
  , Text
"onloadstart"
  , Text
"onmessage"
  , Text
"onmessageerror"
  , Text
"onmousedown"
  , Text
"onmouseenter"
  , Text
"onmouseleave"
  , Text
"onmousemove"
  , Text
"onmouseout"
  , Text
"onmouseover"
  , Text
"onmouseup"
  , Text
"onoffline"
  , Text
"ononline"
  , Text
"onpagehide"
  , Text
"onpageshow"
  , Text
"onpaste"
  , Text
"onpause"
  , Text
"onplay"
  , Text
"onplaying"
  , Text
"onpopstate"
  , Text
"onprogress"
  , Text
"onratechange"
  , Text
"onrejectionhandled"
  , Text
"onreset"
  , Text
"onresize"
  , Text
"onscroll"
  , Text
"onsecuritypolicyviolation"
  , Text
"onseeked"
  , Text
"onseeking"
  , Text
"onselect"
  , Text
"onstalled"
  , Text
"onstorage"
  , Text
"onsubmit"
  , Text
"onsuspend"
  , Text
"ontimeupdate"
  , Text
"ontoggle"
  , Text
"onunhandledrejection"
  , Text
"onunload"
  , Text
"onvolumechange"
  , Text
"onwaiting"
  , Text
"onwheel"
  , Text
"open"
  , Text
"optimum"
  , Text
"pattern"
  , Text
"ping"
  , Text
"placeholder"
  , Text
"playsinline"
  , Text
"poster"
  , Text
"preload"
  , Text
"readonly"
  , Text
"referrerpolicy"
  , Text
"rel"
  , Text
"required"
  , Text
"reversed"
  , Text
"role"
  , Text
"rows"
  , Text
"rowspan"
  , Text
"sandbox"
  , Text
"scope"
  , Text
"selected"
  , Text
"shape"
  , Text
"size"
  , Text
"sizes"
  , Text
"slot"
  , Text
"span"
  , Text
"spellcheck"
  , Text
"src"
  , Text
"srcdoc"
  , Text
"srclang"
  , Text
"srcset"
  , Text
"start"
  , Text
"step"
  , Text
"style"
  , Text
"tabindex"
  , Text
"target"
  , Text
"title"
  , Text
"translate"
  , Text
"type"
  , Text
"typemustmatch"
  , Text
"updateviacache"
  , Text
"usemap"
  , Text
"value"
  , Text
"width"
  , Text
"workertype"
  , Text
"wrap"
  ]
rdfaAttributes :: Set.Set Text
rdfaAttributes :: Set Text
rdfaAttributes = [Text] -> Set Text
forall a. Ord a => [a] -> Set a
Set.fromList
  [ Text
"about"
  , Text
"rel"
  , Text
"rev"
  , Text
"src"
  , Text
"href"
  , Text
"resource"
  , Text
"property"
  , Text
"content"
  , Text
"datatype"
  , Text
"typeof"
  , Text
"vocab"
  , Text
"prefix"
  ]
html4Attributes :: Set.Set Text
html4Attributes :: Set Text
html4Attributes = [Text] -> Set Text
forall a. Ord a => [a] -> Set a
Set.fromList
  [ Text
"abbr"
  , Text
"accept"
  , Text
"accept-charset"
  , Text
"accesskey"
  , Text
"action"
  , Text
"align"
  , Text
"alink"
  , Text
"alt"
  , Text
"archive"
  , Text
"axis"
  , Text
"background"
  , Text
"bgcolor"
  , Text
"border"
  , Text
"cellpadding"
  , Text
"cellspacing"
  , Text
"char"
  , Text
"charoff"
  , Text
"charset"
  , Text
"checked"
  , Text
"cite"
  , Text
"class"
  , Text
"classid"
  , Text
"clear"
  , Text
"code"
  , Text
"codebase"
  , Text
"codetype"
  , Text
"color"
  , Text
"cols"
  , Text
"colspan"
  , Text
"compact"
  , Text
"content"
  , Text
"coords"
  , Text
"data"
  , Text
"datetime"
  , Text
"declare"
  , Text
"defer"
  , Text
"dir"
  , Text
"disabled"
  , Text
"enctype"
  , Text
"face"
  , Text
"for"
  , Text
"frame"
  , Text
"frameborder"
  , Text
"headers"
  , Text
"height"
  , Text
"href"
  , Text
"hreflang"
  , Text
"hspace"
  , Text
"http-equiv"
  , Text
"id"
  , Text
"ismap"
  , Text
"label"
  , Text
"lang"
  , Text
"language"
  , Text
"link"
  , Text
"longdesc"
  , Text
"marginheight"
  , Text
"marginwidth"
  , Text
"maxlength"
  , Text
"media"
  , Text
"method"
  , Text
"multiple"
  , Text
"name"
  , Text
"nohref"
  , Text
"noresize"
  , Text
"noshade"
  , Text
"nowrap"
  , Text
"object"
  , Text
"onblur"
  , Text
"onchange"
  , Text
"onclick"
  , Text
"ondblclick"
  , Text
"onfocus"
  , Text
"onkeydown"
  , Text
"onkeypress"
  , Text
"onkeyup"
  , Text
"onload"
  , Text
"onmousedown"
  , Text
"onmousemove"
  , Text
"onmouseout"
  , Text
"onmouseover"
  , Text
"onmouseup"
  , Text
"onreset"
  , Text
"onselect"
  , Text
"onsubmit"
  , Text
"onunload"
  , Text
"profile"
  , Text
"prompt"
  , Text
"readonly"
  , Text
"rel"
  , Text
"rev"
  , Text
"rows"
  , Text
"rowspan"
  , Text
"rules"
  , Text
"scheme"
  , Text
"scope"
  , Text
"scrolling"
  , Text
"selected"
  , Text
"shape"
  , Text
"size"
  , Text
"span"
  , Text
"src"
  , Text
"standby"
  , Text
"start"
  , Text
"style"
  , Text
"summary"
  , Text
"tabindex"
  , Text
"target"
  , Text
"text"
  , Text
"title"
  , Text
"usemap"
  , Text
"valign"
  , Text
"value"
  , Text
"valuetype"
  , Text
"version"
  , Text
"vlink"
  , Text
"vspace"
  , Text
"width"
  ]