module Text.HTML.SanitizeXSS
    ( sanitize
    , sanitizeBalance
    , sanitizeXSS
    , filterTags
    , safeTags
    ) where

import Text.HTML.TagSoup

import Data.Set (Set(), member, notMember, (\\), fromList)
import Data.Char ( toLower )

import Network.URI ( parseURIReference, URI (..),
                     isAllowedInURI, escapeURIString, uriScheme )
import Codec.Binary.UTF8.String ( encodeString )

import qualified Data.Map as Map

{-
import Debug.Trace
debug :: (Show a) => a -> a
debug a = trace ("DEBUG: " ++ show a) a
  -}


-- | santize the html to prevent XSS attacks. See README.md <http://github.com/gregwebs/haskell-xss-sanitize> for more details
sanitize :: String -> String
sanitize = sanitizeXSS

-- | alias of sanitize function
sanitizeXSS :: String -> String
sanitizeXSS = filterTags safeTags

-- | same as sanitize but makes sure there are no lone closing tags. See README.md <http://github.com/gregwebs/haskell-xss-sanitize> for more details
sanitizeBalance :: String -> String
sanitizeBalance = filterTags (balance Map.empty . safeTags)

-- | insert custom tag filtering. Don't forget to compose your filter with safeTags!
filterTags :: ([Tag String] -> [Tag String]) -> String -> String
filterTags f = renderTagsOptions renderOptions {
    optMinimize = \x -> x `elem` ["br","img"] -- <img><img> converts to <img />, <a/> converts to <a></a>
  } .  f . canonicalizeTags . parseTags

balance :: Map.Map String Int -> [Tag String] -> [Tag String]
balance m [] =
    concatMap go $ Map.toList m
  where
    go (name, i)
        | noClosing name = []
        | otherwise = replicate i $ TagClose name
    noClosing = flip elem ["br", "img"]
balance m (t@(TagClose name):tags) =
    case Map.lookup name m of
        Nothing -> TagOpen name [] : TagClose name : balance m tags
        Just i ->
            let m' = if i == 1
                        then Map.delete name m
                        else Map.insert name (i - 1) m
             in t : balance m' tags
balance m (TagOpen name as : tags) =
    TagOpen name as : balance m' tags
  where
    m' = case Map.lookup name m of
            Nothing -> Map.insert name 1 m
            Just i -> Map.insert name (i + 1) m
balance m (t:ts) = t : balance m ts

-- | Filters out any usafe tags and attributes. Use with filterTags to create a custom filter.
safeTags :: [Tag String] -> [Tag String]
safeTags [] = []
safeTags (t@(TagClose name):tags)
    | safeTagName name = t : safeTags tags
    | otherwise = safeTags tags
safeTags (TagOpen name attributes:tags)
  | safeTagName name = TagOpen name (filter safeAttribute attributes) : safeTags tags
  | otherwise = safeTags tags
safeTags (t:tags) = t:safeTags tags

safeTagName :: String -> Bool
safeTagName tagname = tagname `member` sanitaryTags

safeAttribute :: (String, String) -> Bool
safeAttribute (name, value) = name `member` sanitaryAttributes &&
  (name `notMember` attrValIsUri || sanitaryURI value)
         

-- | Returns @True@ if the specified URI is not a potential security risk.
sanitaryURI :: String -> Bool
sanitaryURI u =
  case parseURIReference (escapeURI u) of
     Just p  -> (null (uriScheme p)) ||
                ((map toLower $ init $ uriScheme p) `member` safeURISchemes)
     Nothing -> False


-- | Escape unicode characters in a URI.  Characters that are
-- already valid in a URI, including % and ?, are left alone.
escapeURI :: String -> String
escapeURI = escapeURIString isAllowedInURI . encodeString

safeURISchemes :: Set String
safeURISchemes = fromList acceptable_protocols

sanitaryTags :: Set String
sanitaryTags = fromList (acceptable_elements ++ mathml_elements ++ svg_elements)
  \\ (fromList svg_allow_local_href) -- extra filtering not implemented

sanitaryAttributes :: Set String
sanitaryAttributes = fromList (acceptable_attributes ++ mathml_attributes ++ svg_attributes)
  \\ (fromList svg_attr_val_allows_ref) -- extra unescaping not implemented

attrValIsUri :: Set String
attrValIsUri = fromList ["href", "src", "cite", "action", "longdesc",
    "xlink:href", "xml:base"]

acceptable_elements :: [String]
acceptable_elements = ["a", "abbr", "acronym", "address", "area",
    "article", "aside", "audio", "b", "big", "blockquote", "br", "button",
    "canvas", "caption", "center", "cite", "code", "col", "colgroup",
    "command", "datagrid", "datalist", "dd", "del", "details", "dfn",
    "dialog", "dir", "div", "dl", "dt", "em", "event-source", "fieldset",
    "figure", "footer", "font", "form", "header", "h1", "h2", "h3", "h4",
    "h5", "h6", "hr", "i", "img", "input", "ins", "keygen", "kbd",
    "label", "legend", "li", "m", "map", "menu", "meter", "multicol",
    "nav", "nextid", "ol", "output", "optgroup", "option", "p", "pre",
    "progress", "q", "s", "samp", "section", "select", "small", "sound",
    "source", "spacer", "span", "strike", "strong", "sub", "sup", "table",
    "tbody", "td", "textarea", "time", "tfoot", "th", "thead", "tr", "tt",
    "u", "ul", "var", "video"]
  
mathml_elements :: [String]
mathml_elements = ["maction", "math", "merror", "mfrac", "mi",
    "mmultiscripts", "mn", "mo", "mover", "mpadded", "mphantom",
    "mprescripts", "mroot", "mrow", "mspace", "msqrt", "mstyle", "msub",
    "msubsup", "msup", "mtable", "mtd", "mtext", "mtr", "munder",
    "munderover", "none"]

-- this should include altGlyph I think
svg_elements :: [String]
svg_elements = ["a", "animate", "animateColor", "animateMotion",
    "animateTransform", "clipPath", "circle", "defs", "desc", "ellipse",
    "font-face", "font-face-name", "font-face-src", "g", "glyph", "hkern",
    "linearGradient", "line", "marker", "metadata", "missing-glyph",
    "mpath", "path", "polygon", "polyline", "radialGradient", "rect",
    "set", "stop", "svg", "switch", "text", "title", "tspan", "use"]
  
acceptable_attributes :: [String]
acceptable_attributes = ["abbr", "accept", "accept-charset", "accesskey",
    "action", "align", "alt", "autocomplete", "autofocus", "axis",
    "background", "balance", "bgcolor", "bgproperties", "border",
    "bordercolor", "bordercolordark", "bordercolorlight", "bottompadding",
    "cellpadding", "cellspacing", "ch", "challenge", "char", "charoff",
    "choff", "charset", "checked", "cite", "class", "clear", "color",
    "cols", "colspan", "compact", "contenteditable", "controls", "coords",
    -- "data", TODO: allow this with further filtering
    "datafld", "datapagesize", "datasrc", "datetime", "default",
    "delay", "dir", "disabled", "draggable", "dynsrc", "enctype", "end",
    "face", "for", "form", "frame", "galleryimg", "gutter", "headers",
    "height", "hidefocus", "hidden", "high", "href", "hreflang", "hspace",
    "icon", "id", "inputmode", "ismap", "keytype", "label", "leftspacing",
    "lang", "list", "longdesc", "loop", "loopcount", "loopend",
    "loopstart", "low", "lowsrc", "max", "maxlength", "media", "method",
    "min", "multiple", "name", "nohref", "noshade", "nowrap", "open",
    "optimum", "pattern", "ping", "point-size", "prompt", "pqg",
    "radiogroup", "readonly", "rel", "repeat-max", "repeat-min",
    "replace", "required", "rev", "rightspacing", "rows", "rowspan",
    "rules", "scope", "selected", "shape", "size", "span", "src", "start",
    "step",
    -- "style", TODO: allow this with further filtering
    "summary", "suppress", "tabindex", "target",
    "template", "title", "toppadding", "type", "unselectable", "usemap",
    "urn", "valign", "value", "variable", "volume", "vspace", "vrml",
    "width", "wrap", "xml:lang"]

acceptable_protocols :: [String]
acceptable_protocols = [ "ed2k", "ftp", "http", "https", "irc",
    "mailto", "news", "gopher", "nntp", "telnet", "webcal",
    "xmpp", "callto", "feed", "urn", "aim", "rsync", "tag",
    "ssh", "sftp", "rtsp", "afs" ]

mathml_attributes :: [String]
mathml_attributes = ["actiontype", "align", "columnalign", "columnalign",
    "columnalign", "columnlines", "columnspacing", "columnspan", "depth",
    "display", "displaystyle", "equalcolumns", "equalrows", "fence",
    "fontstyle", "fontweight", "frame", "height", "linethickness", "lspace",
    "mathbackground", "mathcolor", "mathvariant", "mathvariant", "maxsize",
    "minsize", "other", "rowalign", "rowalign", "rowalign", "rowlines",
    "rowspacing", "rowspan", "rspace", "scriptlevel", "selection",
    "separator", "stretchy", "width", "width", "xlink:href", "xlink:show",
    "xlink:type", "xmlns", "xmlns:xlink"]

svg_attributes :: [String]
svg_attributes = ["accent-height", "accumulate", "additive", "alphabetic",
    "arabic-form", "ascent", "attributeName", "attributeType",
    "baseProfile", "bbox", "begin", "by", "calcMode", "cap-height",
    "class", "clip-path", "color", "color-rendering", "content", "cx",
    "cy", "d", "dx", "dy", "descent", "display", "dur", "end", "fill",
    "fill-opacity", "fill-rule", "font-family", "font-size",
    "font-stretch", "font-style", "font-variant", "font-weight", "from",
    "fx", "fy", "g1", "g2", "glyph-name", "gradientUnits", "hanging",
    "height", "horiz-adv-x", "horiz-origin-x", "id", "ideographic", "k",
    "keyPoints", "keySplines", "keyTimes", "lang", "marker-end",
    "marker-mid", "marker-start", "markerHeight", "markerUnits",
    "markerWidth", "mathematical", "max", "min", "name", "offset",
    "opacity", "orient", "origin", "overline-position",
    "overline-thickness", "panose-1", "path", "pathLength", "points",
    "preserveAspectRatio", "r", "refX", "refY", "repeatCount",
    "repeatDur", "requiredExtensions", "requiredFeatures", "restart",
    "rotate", "rx", "ry", "slope", "stemh", "stemv", "stop-color",
    "stop-opacity", "strikethrough-position", "strikethrough-thickness",
    "stroke", "stroke-dasharray", "stroke-dashoffset", "stroke-linecap",
    "stroke-linejoin", "stroke-miterlimit", "stroke-opacity",
    "stroke-width", "systemLanguage", "target", "text-anchor", "to",
    "transform", "type", "u1", "u2", "underline-position",
    "underline-thickness", "unicode", "unicode-range", "units-per-em",
    "values", "version", "viewBox", "visibility", "width", "widths", "x",
    "x-height", "x1", "x2", "xlink:actuate", "xlink:arcrole",
    "xlink:href", "xlink:role", "xlink:show", "xlink:title", "xlink:type",
    "xml:base", "xml:lang", "xml:space", "xmlns", "xmlns:xlink", "y",
    "y1", "y2", "zoomAndPan"]

-- the values for these need to be escaped
svg_attr_val_allows_ref :: [String]
svg_attr_val_allows_ref = ["clip-path", "color-profile", "cursor", "fill",
    "filter", "marker", "marker-start", "marker-mid", "marker-end",
    "mask", "stroke"]

svg_allow_local_href :: [String]
svg_allow_local_href = ["altGlyph", "animate", "animateColor",
    "animateMotion", "animateTransform", "cursor", "feImage", "filter",
    "linearGradient", "pattern", "radialGradient", "textpath", "tref",
    "set", "use"]

{- style value (css) filtering not implemented
 -
 - this is used for css filtering
allowed_svg_properties = fromList acceptable_svg_properties
acceptable_svg_properties = [ "fill", "fill-opacity", "fill-rule",
    "stroke", "stroke-width", "stroke-linecap", "stroke-linejoin",
    "stroke-opacity"]


allowed_css_properties = fromList acceptable_css_properties
allowed_css_keywords = fromList acceptable_css_keywords
acceptable_css_properties = ["azimuth", "background-color",
    "border-bottom-color", "border-collapse", "border-color",
    "border-left-color", "border-right-color", "border-top-color", "clear",
    "color", "cursor", "direction", "display", "elevation", "float", "font",
    "font-family", "font-size", "font-style", "font-variant", "font-weight",
    "height", "letter-spacing", "line-height", "overflow", "pause",
    "pause-after", "pause-before", "pitch", "pitch-range", "richness",
    "speak", "speak-header", "speak-numeral", "speak-punctuation",
    "speech-rate", "stress", "text-align", "text-decoration", "text-indent",
    "unicode-bidi", "vertical-align", "voice-family", "volume",
    "white-space", "width"]
acceptable_css_keywords = ["auto", "aqua", "black", "block", "blue",
    "bold", "both", "bottom", "brown", "center", "collapse", "dashed",
    "dotted", "fuchsia", "gray", "green", "!important", "italic", "left",
    "lime", "maroon", "medium", "none", "navy", "normal", "nowrap", "olive",
    "pointer", "purple", "red", "right", "solid", "silver", "teal", "top",
    "transparent", "underline", "white", "yellow"]
-}