{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.Pandoc.XML ( escapeCharForXML,
escapeStringForXML,
escapeNCName,
inTags,
selfClosingTag,
inTagsSimple,
inTagsIndented,
toEntities,
toHtml5Entities,
fromEntities,
lookupEntity,
html4Attributes,
html5Attributes,
rdfaAttributes ) where
import Data.Char (isAscii, isSpace, ord, isLetter, isDigit)
import Data.Text (Text)
import qualified Data.Text as T
import Commonmark.Entity (lookupEntity)
import Text.HTML.TagSoup.Entity (htmlEntities)
import Text.DocLayout
( ($$), char, hcat, nest, text, Doc, HasChars )
import Text.Printf (printf)
import qualified Data.Map as M
import Data.String ( IsString )
import qualified Data.Set as Set
escapeCharForXML :: Char -> Text
escapeCharForXML :: Char -> Text
escapeCharForXML Char
x = case Char
x of
Char
'&' -> Text
"&"
Char
'<' -> Text
"<"
Char
'>' -> Text
">"
Char
'"' -> Text
"""
Char
c -> Char -> Text
T.singleton Char
c
escapeStringForXML :: Text -> Text
escapeStringForXML :: Text -> Text
escapeStringForXML = (Char -> Text) -> Text -> Text
T.concatMap Char -> Text
escapeCharForXML forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
T.filter Char -> Bool
isLegalXMLChar
where isLegalXMLChar :: Char -> Bool
isLegalXMLChar Char
c = Char
c forall a. Eq a => a -> a -> Bool
== Char
'\t' Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'\n' Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'\r' Bool -> Bool -> Bool
||
(Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x20' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FF') Bool -> Bool -> Bool
||
(Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\xE000' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFD') Bool -> Bool -> Bool
||
(Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x10000' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x10FFFF')
escapeNls :: Text -> Text
escapeNls :: Text -> Text
escapeNls = (Char -> Text) -> Text -> Text
T.concatMap forall a b. (a -> b) -> a -> b
$ \case
Char
'\n' -> Text
" "
Char
c -> Char -> Text
T.singleton Char
c
attributeList :: (HasChars a, IsString a) => [(Text, Text)] -> Doc a
attributeList :: forall a. (HasChars a, IsString a) => [(Text, Text)] -> Doc a
attributeList = forall a. [Doc a] -> Doc a
hcat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map
(\(Text
a, Text
b) -> forall a. HasChars a => String -> Doc a
text (Text -> String
T.unpack forall a b. (a -> b) -> a -> b
$ Text
" " forall a. Semigroup a => a -> a -> a
<> Text -> Text
escapeStringForXML Text
a forall a. Semigroup a => a -> a -> a
<> Text
"=\"" forall a. Semigroup a => a -> a -> a
<>
Text -> Text
escapeNls (Text -> Text
escapeStringForXML Text
b) forall a. Semigroup a => a -> a -> a
<> Text
"\""))
inTags :: (HasChars a, IsString a)
=> Bool -> Text -> [(Text, Text)] -> Doc a -> Doc a
inTags :: forall a.
(HasChars a, IsString a) =>
Bool -> Text -> [(Text, Text)] -> Doc a -> Doc a
inTags Bool
isIndented Text
tagType [(Text, Text)]
attribs Doc a
contents =
let openTag :: Doc a
openTag = forall a. HasChars a => Char -> Doc a
char Char
'<' forall a. Semigroup a => a -> a -> a
<> forall a. HasChars a => String -> Doc a
text (Text -> String
T.unpack Text
tagType) forall a. Semigroup a => a -> a -> a
<> forall a. (HasChars a, IsString a) => [(Text, Text)] -> Doc a
attributeList [(Text, Text)]
attribs forall a. Semigroup a => a -> a -> a
<>
forall a. HasChars a => Char -> Doc a
char Char
'>'
closeTag :: Doc a
closeTag = forall a. HasChars a => String -> Doc a
text String
"</" forall a. Semigroup a => a -> a -> a
<> forall a. HasChars a => String -> Doc a
text (Text -> String
T.unpack Text
tagType) forall a. Semigroup a => a -> a -> a
<> forall a. HasChars a => Char -> Doc a
char Char
'>'
in if Bool
isIndented
then Doc a
openTag forall a. Doc a -> Doc a -> Doc a
$$ forall a. IsString a => Int -> Doc a -> Doc a
nest Int
2 Doc a
contents forall a. Doc a -> Doc a -> Doc a
$$ Doc a
closeTag
else Doc a
openTag forall a. Semigroup a => a -> a -> a
<> Doc a
contents forall a. Semigroup a => a -> a -> a
<> Doc a
closeTag
selfClosingTag :: (HasChars a, IsString a)
=> Text -> [(Text, Text)] -> Doc a
selfClosingTag :: forall a.
(HasChars a, IsString a) =>
Text -> [(Text, Text)] -> Doc a
selfClosingTag Text
tagType [(Text, Text)]
attribs =
forall a. HasChars a => Char -> Doc a
char Char
'<' forall a. Semigroup a => a -> a -> a
<> forall a. HasChars a => String -> Doc a
text (Text -> String
T.unpack Text
tagType) forall a. Semigroup a => a -> a -> a
<> forall a. (HasChars a, IsString a) => [(Text, Text)] -> Doc a
attributeList [(Text, Text)]
attribs forall a. Semigroup a => a -> a -> a
<> forall a. HasChars a => String -> Doc a
text String
" />"
inTagsSimple :: (HasChars a, IsString a)
=> Text -> Doc a -> Doc a
inTagsSimple :: forall a. (HasChars a, IsString a) => Text -> Doc a -> Doc a
inTagsSimple Text
tagType = forall a.
(HasChars a, IsString a) =>
Bool -> Text -> [(Text, Text)] -> Doc a -> Doc a
inTags Bool
False Text
tagType []
inTagsIndented :: (HasChars a, IsString a)
=> Text -> Doc a -> Doc a
inTagsIndented :: forall a. (HasChars a, IsString a) => Text -> Doc a -> Doc a
inTagsIndented Text
tagType = forall a.
(HasChars a, IsString a) =>
Bool -> Text -> [(Text, Text)] -> Doc a -> Doc a
inTags Bool
True Text
tagType []
toEntities :: Text -> Text
toEntities :: Text -> Text
toEntities = (Char -> Text) -> Text -> Text
T.concatMap Char -> Text
go
where go :: Char -> Text
go Char
c | Char -> Bool
isAscii Char
c = Char -> Text
T.singleton Char
c
| Bool
otherwise = String -> Text
T.pack (forall r. PrintfType r => String -> r
printf String
"&#x%X;" (Char -> Int
ord Char
c))
toHtml5Entities :: Text -> Text
toHtml5Entities :: Text -> Text
toHtml5Entities = (Char -> Text) -> Text -> Text
T.concatMap Char -> Text
go
where go :: Char -> Text
go Char
c | Char -> Bool
isAscii Char
c = Char -> Text
T.singleton Char
c
| Bool
otherwise =
case forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Char
c Map Char Text
html5EntityMap of
Just Text
t -> Char -> Text
T.singleton Char
'&' forall a. Semigroup a => a -> a -> a
<> Text
t forall a. Semigroup a => a -> a -> a
<> Char -> Text
T.singleton Char
';'
Maybe Text
Nothing -> String -> Text
T.pack (String
"&#" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (Char -> Int
ord Char
c) forall a. [a] -> [a] -> [a]
++ String
";")
html5EntityMap :: M.Map Char Text
html5EntityMap :: Map Char Text
html5EntityMap = forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr forall {k}. Ord k => (String, [k]) -> Map k Text -> Map k Text
go forall a. Monoid a => a
mempty [(String, String)]
htmlEntities
where go :: (String, [k]) -> Map k Text -> Map k Text
go (String
ent, [k]
s) Map k Text
entmap =
case [k]
s of
[k
c] -> forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
M.insertWith
(\Text
new Text
old -> if Text -> Int
T.length Text
new forall a. Ord a => a -> a -> Bool
> Text -> Int
T.length Text
old
then Text
old
else Text
new) k
c Text
ent' Map k Text
entmap
where ent' :: Text
ent' = (Char -> Bool) -> Text -> Text
T.takeWhile (forall a. Eq a => a -> a -> Bool
/=Char
';') (String -> Text
T.pack String
ent)
[k]
_ -> Map k Text
entmap
escapeNCName :: Text -> Text
escapeNCName :: Text -> Text
escapeNCName Text
t = case Text -> Maybe (Char, Text)
T.uncons Text
t of
Maybe (Char, Text)
Nothing -> Text
T.empty
Just (Char
c, Text
cs) -> Char -> Text
escapeStartChar Char
c forall a. Semigroup a => a -> a -> a
<> (Char -> Text) -> Text -> Text
T.concatMap Char -> Text
escapeNCNameChar Text
cs
where
escapeStartChar :: Char -> Text
escapeStartChar :: Char -> Text
escapeStartChar Char
c = if Char -> Bool
isLetter Char
c Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'_'
then Char -> Text
T.singleton Char
c
else Char -> Text
escapeChar Char
c
escapeNCNameChar :: Char -> Text
escapeNCNameChar :: Char -> Text
escapeNCNameChar Char
c = if Char -> Bool
isNCNameChar Char
c
then Char -> Text
T.singleton Char
c
else Char -> Text
escapeChar Char
c
isNCNameChar :: Char -> Bool
isNCNameChar :: Char -> Bool
isNCNameChar Char
c = Char -> Bool
isLetter Char
c Bool -> Bool -> Bool
|| Char
c forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"_-.·" :: String) Bool -> Bool -> Bool
|| Char -> Bool
isDigit Char
c
Bool -> Bool -> Bool
|| Char
'\x0300' forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x036f'
Bool -> Bool -> Bool
|| Char
'\x203f' forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x2040'
escapeChar :: Char -> Text
escapeChar :: Char -> Text
escapeChar = String -> Text
T.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall r. PrintfType r => String -> r
printf String
"U%04X" forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord
fromEntities :: Text -> Text
fromEntities :: Text -> Text
fromEntities Text
t
= let (Text
x, Text
y) = (Char -> Bool) -> Text -> (Text, Text)
T.break (forall a. Eq a => a -> a -> Bool
== Char
'&') Text
t
in if Text -> Bool
T.null Text
y
then Text
t
else Text
x forall a. Semigroup a => a -> a -> a
<>
let (Text
ent, Text
rest) = (Char -> Bool) -> Text -> (Text, Text)
T.break (\Char
c -> Char -> Bool
isSpace Char
c Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
';') Text
y
rest' :: Text
rest' = case Text -> Maybe (Char, Text)
T.uncons Text
rest of
Just (Char
';',Text
ys) -> Text
ys
Maybe (Char, Text)
_ -> Text
rest
ent' :: Text
ent' = Int -> Text -> Text
T.drop Int
1 Text
ent forall a. Semigroup a => a -> a -> a
<> Text
";"
in case Text -> Maybe Text
lookupEntity Text
ent' of
Just Text
c -> Text
c forall a. Semigroup a => a -> a -> a
<> Text -> Text
fromEntities Text
rest'
Maybe Text
Nothing -> Text
ent forall a. Semigroup a => a -> a -> a
<> Text -> Text
fromEntities Text
rest
html5Attributes :: Set.Set Text
html5Attributes :: Set Text
html5Attributes = forall a. Ord a => [a] -> Set a
Set.fromList
[ Text
"abbr"
, Text
"accept"
, Text
"accept-charset"
, Text
"accesskey"
, Text
"action"
, Text
"allow"
, Text
"allowfullscreen"
, Text
"allowpaymentrequest"
, Text
"allowusermedia"
, Text
"alt"
, Text
"as"
, Text
"async"
, Text
"autocapitalize"
, Text
"autocomplete"
, Text
"autofocus"
, Text
"autoplay"
, Text
"charset"
, Text
"checked"
, Text
"cite"
, Text
"class"
, Text
"color"
, Text
"cols"
, Text
"colspan"
, Text
"content"
, Text
"contenteditable"
, Text
"controls"
, Text
"coords"
, Text
"crossorigin"
, Text
"data"
, Text
"datetime"
, Text
"decoding"
, Text
"default"
, Text
"defer"
, Text
"dir"
, Text
"dirname"
, Text
"disabled"
, Text
"download"
, Text
"draggable"
, Text
"enctype"
, Text
"enterkeyhint"
, Text
"for"
, Text
"form"
, Text
"formaction"
, Text
"formenctype"
, Text
"formmethod"
, Text
"formnovalidate"
, Text
"formtarget"
, Text
"headers"
, Text
"height"
, Text
"hidden"
, Text
"high"
, Text
"href"
, Text
"hreflang"
, Text
"http-equiv"
, Text
"id"
, Text
"imagesizes"
, Text
"imagesrcset"
, Text
"inputmode"
, Text
"integrity"
, Text
"is"
, Text
"ismap"
, Text
"itemid"
, Text
"itemprop"
, Text
"itemref"
, Text
"itemscope"
, Text
"itemtype"
, Text
"kind"
, Text
"label"
, Text
"lang"
, Text
"list"
, Text
"loading"
, Text
"loop"
, Text
"low"
, Text
"manifest"
, Text
"max"
, Text
"maxlength"
, Text
"media"
, Text
"method"
, Text
"min"
, Text
"minlength"
, Text
"multiple"
, Text
"muted"
, Text
"name"
, Text
"nomodule"
, Text
"nonce"
, Text
"novalidate"
, Text
"onabort"
, Text
"onafterprint"
, Text
"onauxclick"
, Text
"onbeforeprint"
, Text
"onbeforeunload"
, Text
"onblur"
, Text
"oncancel"
, Text
"oncanplay"
, Text
"oncanplaythrough"
, Text
"onchange"
, Text
"onclick"
, Text
"onclose"
, Text
"oncontextmenu"
, Text
"oncopy"
, Text
"oncuechange"
, Text
"oncut"
, Text
"ondblclick"
, Text
"ondrag"
, Text
"ondragend"
, Text
"ondragenter"
, Text
"ondragexit"
, Text
"ondragleave"
, Text
"ondragover"
, Text
"ondragstart"
, Text
"ondrop"
, Text
"ondurationchange"
, Text
"onemptied"
, Text
"onended"
, Text
"onerror"
, Text
"onfocus"
, Text
"onhashchange"
, Text
"oninput"
, Text
"oninvalid"
, Text
"onkeydown"
, Text
"onkeypress"
, Text
"onkeyup"
, Text
"onlanguagechange"
, Text
"onload"
, Text
"onloadeddata"
, Text
"onloadedmetadata"
, Text
"onloadend"
, Text
"onloadstart"
, Text
"onmessage"
, Text
"onmessageerror"
, Text
"onmousedown"
, Text
"onmouseenter"
, Text
"onmouseleave"
, Text
"onmousemove"
, Text
"onmouseout"
, Text
"onmouseover"
, Text
"onmouseup"
, Text
"onoffline"
, Text
"ononline"
, Text
"onpagehide"
, Text
"onpageshow"
, Text
"onpaste"
, Text
"onpause"
, Text
"onplay"
, Text
"onplaying"
, Text
"onpopstate"
, Text
"onprogress"
, Text
"onratechange"
, Text
"onrejectionhandled"
, Text
"onreset"
, Text
"onresize"
, Text
"onscroll"
, Text
"onsecuritypolicyviolation"
, Text
"onseeked"
, Text
"onseeking"
, Text
"onselect"
, Text
"onstalled"
, Text
"onstorage"
, Text
"onsubmit"
, Text
"onsuspend"
, Text
"ontimeupdate"
, Text
"ontoggle"
, Text
"onunhandledrejection"
, Text
"onunload"
, Text
"onvolumechange"
, Text
"onwaiting"
, Text
"onwheel"
, Text
"open"
, Text
"optimum"
, Text
"pattern"
, Text
"ping"
, Text
"placeholder"
, Text
"playsinline"
, Text
"poster"
, Text
"preload"
, Text
"readonly"
, Text
"referrerpolicy"
, Text
"rel"
, Text
"required"
, Text
"reversed"
, Text
"role"
, Text
"rows"
, Text
"rowspan"
, Text
"sandbox"
, Text
"scope"
, Text
"selected"
, Text
"shape"
, Text
"size"
, Text
"sizes"
, Text
"slot"
, Text
"span"
, Text
"spellcheck"
, Text
"src"
, Text
"srcdoc"
, Text
"srclang"
, Text
"srcset"
, Text
"start"
, Text
"step"
, Text
"style"
, Text
"tabindex"
, Text
"target"
, Text
"title"
, Text
"translate"
, Text
"type"
, Text
"typemustmatch"
, Text
"updateviacache"
, Text
"usemap"
, Text
"value"
, Text
"width"
, Text
"workertype"
, Text
"wrap"
]
rdfaAttributes :: Set.Set Text
rdfaAttributes :: Set Text
rdfaAttributes = forall a. Ord a => [a] -> Set a
Set.fromList
[ Text
"about"
, Text
"rel"
, Text
"rev"
, Text
"src"
, Text
"href"
, Text
"resource"
, Text
"property"
, Text
"content"
, Text
"datatype"
, Text
"typeof"
, Text
"vocab"
, Text
"prefix"
]
html4Attributes :: Set.Set Text
html4Attributes :: Set Text
html4Attributes = forall a. Ord a => [a] -> Set a
Set.fromList
[ Text
"abbr"
, Text
"accept"
, Text
"accept-charset"
, Text
"accesskey"
, Text
"action"
, Text
"align"
, Text
"alink"
, Text
"alt"
, Text
"archive"
, Text
"axis"
, Text
"background"
, Text
"bgcolor"
, Text
"border"
, Text
"cellpadding"
, Text
"cellspacing"
, Text
"char"
, Text
"charoff"
, Text
"charset"
, Text
"checked"
, Text
"cite"
, Text
"class"
, Text
"classid"
, Text
"clear"
, Text
"code"
, Text
"codebase"
, Text
"codetype"
, Text
"color"
, Text
"cols"
, Text
"colspan"
, Text
"compact"
, Text
"content"
, Text
"coords"
, Text
"data"
, Text
"datetime"
, Text
"declare"
, Text
"defer"
, Text
"dir"
, Text
"disabled"
, Text
"enctype"
, Text
"face"
, Text
"for"
, Text
"frame"
, Text
"frameborder"
, Text
"headers"
, Text
"height"
, Text
"href"
, Text
"hreflang"
, Text
"hspace"
, Text
"http-equiv"
, Text
"id"
, Text
"ismap"
, Text
"label"
, Text
"lang"
, Text
"language"
, Text
"link"
, Text
"longdesc"
, Text
"marginheight"
, Text
"marginwidth"
, Text
"maxlength"
, Text
"media"
, Text
"method"
, Text
"multiple"
, Text
"name"
, Text
"nohref"
, Text
"noresize"
, Text
"noshade"
, Text
"nowrap"
, Text
"object"
, Text
"onblur"
, Text
"onchange"
, Text
"onclick"
, Text
"ondblclick"
, Text
"onfocus"
, Text
"onkeydown"
, Text
"onkeypress"
, Text
"onkeyup"
, Text
"onload"
, Text
"onmousedown"
, Text
"onmousemove"
, Text
"onmouseout"
, Text
"onmouseover"
, Text
"onmouseup"
, Text
"onreset"
, Text
"onselect"
, Text
"onsubmit"
, Text
"onunload"
, Text
"profile"
, Text
"prompt"
, Text
"readonly"
, Text
"rel"
, Text
"rev"
, Text
"rows"
, Text
"rowspan"
, Text
"rules"
, Text
"scheme"
, Text
"scope"
, Text
"scrolling"
, Text
"selected"
, Text
"shape"
, Text
"size"
, Text
"span"
, Text
"src"
, Text
"standby"
, Text
"start"
, Text
"style"
, Text
"summary"
, Text
"tabindex"
, Text
"target"
, Text
"text"
, Text
"title"
, Text
"usemap"
, Text
"valign"
, Text
"value"
, Text
"valuetype"
, Text
"version"
, Text
"vlink"
, Text
"vspace"
, Text
"width"
]