module Text.Pandoc.XML ( stripTags,
escapeCharForXML,
escapeStringForXML,
inTags,
selfClosingTag,
inTagsSimple,
inTagsIndented,
toEntities,
fromEntities ) where
import Text.Pandoc.Pretty
import Data.Char (ord, isAscii, isSpace)
import Text.HTML.TagSoup.Entity (lookupEntity)
stripTags :: String -> String
stripTags ('<':xs) =
let (_,rest) = break (=='>') xs
in if null rest
then ""
else stripTags (tail rest)
stripTags (x:xs) = x : stripTags xs
stripTags [] = []
escapeCharForXML :: Char -> String
escapeCharForXML x = case x of
'&' -> "&"
'<' -> "<"
'>' -> ">"
'"' -> """
c -> [c]
escapeStringForXML :: String -> String
escapeStringForXML = concatMap escapeCharForXML
attributeList :: [(String, String)] -> Doc
attributeList = hcat . map
(\(a, b) -> text (' ' : escapeStringForXML a ++ "=\"" ++
escapeStringForXML b ++ "\""))
inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc
inTags isIndented tagType attribs contents =
let openTag = char '<' <> text tagType <> attributeList attribs <>
char '>'
closeTag = text "</" <> text tagType <> char '>'
in if isIndented
then openTag $$ nest 2 contents $$ closeTag
else openTag <> contents <> closeTag
selfClosingTag :: String -> [(String, String)] -> Doc
selfClosingTag tagType attribs =
char '<' <> text tagType <> attributeList attribs <> text " />"
inTagsSimple :: String -> Doc -> Doc
inTagsSimple tagType = inTags False tagType []
inTagsIndented :: String -> Doc -> Doc
inTagsIndented tagType = inTags True tagType []
toEntities :: String -> String
toEntities [] = ""
toEntities (c:cs)
| isAscii c = c : toEntities cs
| otherwise = "&#" ++ show (ord c) ++ ";" ++ toEntities cs
fromEntities :: String -> String
fromEntities ('&':xs) =
case lookupEntity ent of
Just c -> c : fromEntities rest
Nothing -> '&' : fromEntities xs
where (ent, rest) = case break (\c -> isSpace c || c == ';') xs of
(zs,';':ys) -> (zs,ys)
_ -> ("",xs)
fromEntities (x:xs) = x : fromEntities xs
fromEntities [] = []