module Text.HTML.SanitizeXSS (sanitizeXSS) where
import Text.HTML.TagSoup
import Data.Set (Set(), member, fromList)
import Data.Char ( toLower )
import Network.URI ( parseURIReference, URI (..),
isAllowedInURI, escapeURIString, uriScheme )
import Codec.Binary.UTF8.String ( encodeString )
sanitizeXSS :: String -> String
sanitizeXSS = renderTagsOptions renderOptions {
optMinimize = \x -> x `elem` ["br","img"]
} . safeTags . parseTags
where
safeTags :: [Tag String] -> [Tag String]
safeTags [] = []
safeTags (t@(TagClose name):tags) | safeTagName name = t:(safeTags tags)
| otherwise = safeTags tags
safeTags (TagOpen name attributes:tags)
| safeTagName name = TagOpen name (filter safeAttribute attributes) : safeTags tags
| otherwise = safeTags tags
safeTags (t:tags) = t:safeTags tags
safeTagName :: String -> Bool
safeTagName tagname = tagname `member` sanitaryTags
safeAttribute :: (String, String) -> Bool
safeAttribute (name, value) = name `member` sanitaryAttributes &&
(name `notElem` ["href","src"] || sanitaryURI value)
sanitaryURI :: String -> Bool
sanitaryURI u =
case parseURIReference (escapeURI u) of
Just p -> (map toLower $ uriScheme p) `member` safeURISchemes
Nothing -> False
escapeURI :: String -> String
escapeURI = escapeURIString isAllowedInURI . encodeString
safeURISchemes :: Set String
safeURISchemes = fromList [ "", "http:", "https:", "ftp:", "mailto:", "file:",
"telnet:", "gopher:", "aaa:", "aaas:", "acap:", "cap:", "cid:",
"crid:", "dav:", "dict:", "dns:", "fax:", "go:", "h323:", "im:",
"imap:", "ldap:", "mid:", "news:", "nfs:", "nntp:", "pop:",
"pres:", "sip:", "sips:", "snmp:", "tel:", "urn:", "wais:",
"xmpp:", "z39.50r:", "z39.50s:", "aim:", "callto:", "cvs:",
"ed2k:", "feed:", "fish:", "gg:", "irc:", "ircs:", "lastfm:",
"ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:",
"secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:",
"snews:", "webcal:", "ymsgr:"]
sanitaryTags :: Set String
sanitaryTags = fromList ["a", "abbr", "acronym", "address", "area", "b", "big",
"blockquote", "br", "button", "caption", "center",
"cite", "code", "col", "colgroup", "dd", "del", "dfn",
"dir", "div", "dl", "dt", "em", "fieldset", "font",
"form", "h1", "h2", "h3", "h4", "h5", "h6", "hr",
"i", "img", "input", "ins", "kbd", "label", "legend",
"li", "map", "menu", "ol", "optgroup", "option", "p",
"pre", "q", "s", "samp", "select", "small", "span",
"strike", "strong", "sub", "sup", "table", "tbody",
"td", "textarea", "tfoot", "th", "thead", "tr", "tt",
"u", "ul", "var"]
sanitaryAttributes :: Set String
sanitaryAttributes = fromList ["abbr", "accept", "accept-charset",
"accesskey", "action", "align", "alt", "axis",
"border", "cellpadding", "cellspacing", "char",
"charoff", "charset", "checked", "cite", "class",
"clear", "cols", "colspan", "color", "compact",
"coords", "datetime", "dir", "disabled",
"enctype", "for", "frame", "headers", "height",
"href", "hreflang", "hspace", "id", "ismap",
"label", "lang", "longdesc", "maxlength", "media",
"method", "multiple", "name", "nohref", "noshade",
"nowrap", "prompt", "readonly", "rel", "rev",
"rows", "rowspan", "rules", "scope", "selected",
"shape", "size", "span", "src", "start",
"summary", "tabindex", "target", "title", "type",
"usemap", "valign", "value", "vspace", "width"]