{-# LANGUAGE OverloadedStrings #-} -- | Sanatize HTML to prevent XSS attacks. -- -- See README.md for more details. module Text.HTML.SanitizeXSS ( -- * Sanitize sanitize , sanitizeBalance , sanitizeXSS -- * Custom filtering , filterTags , safeTags , safeTagsCustom , clearTags , clearTagsCustom , balanceTags -- * Utilities , safeTagName , sanitizeAttribute , sanitaryURI ) where import Text.HTML.SanitizeXSS.Css import Text.HTML.TagSoup import Data.Set (Set(), member, notMember, (\\), fromList, fromAscList) import Data.Char ( toLower ) import Data.Text (Text) import qualified Data.Text as T import Network.URI ( parseURIReference, URI (..), isAllowedInURI, escapeURIString, uriScheme ) import Codec.Binary.UTF8.String ( encodeString ) import Data.Maybe (mapMaybe) -- | Sanitize HTML to prevent XSS attacks. This is equivalent to @filterTags safeTags@. sanitize :: Text -> Text sanitize = sanitizeXSS -- | alias of sanitize function sanitizeXSS :: Text -> Text sanitizeXSS = filterTags (safeTags . clearTags) -- | Sanitize HTML to prevent XSS attacks and also make sure the tags are balanced. -- This is equivalent to @filterTags (balanceTags . safeTags)@. sanitizeBalance :: Text -> Text sanitizeBalance = filterTags (balanceTags . safeTags . clearTags) -- | Filter which makes sure the tags are balanced. Use with 'filterTags' and 'safeTags' to create a custom filter. balanceTags :: [Tag Text] -> [Tag Text] balanceTags = balance [] -- | Parse the given text to a list of tags, apply the given filtering -- function, and render back to HTML. You can insert your own custom -- filtering, but make sure you compose your filtering function with -- 'safeTags' and 'clearTags' or 'safeTagsCustom' and 'clearTagsCustom'. filterTags :: ([Tag Text] -> [Tag Text]) -> Text -> Text filterTags f = renderTagsOptions renderOptions { optEscape = id -- stops &"<> from being escaped which breaks existing HTML entities , optMinimize = \x -> x `member` voidElems -- converts to , converts to } . f . canonicalizeTags . parseTagsOptions (parseOptionsEntities (const Nothing)) voidElems :: Set T.Text voidElems = fromAscList $ T.words $ T.pack "area base br col command embed hr img input keygen link meta param source track wbr" balance :: [Text] -- ^ unclosed tags -> [Tag Text] -> [Tag Text] balance unclosed [] = map TagClose $ filter (`notMember` voidElems) unclosed balance (x:xs) tags'@(TagClose name:tags) | x == name = TagClose name : balance xs tags | x `member` voidElems = balance xs tags' | otherwise = TagOpen name [] : TagClose name : balance (x:xs) tags balance unclosed (TagOpen name as : tags) = TagOpen name as : balance (name : unclosed) tags balance unclosed (t:ts) = t : balance unclosed ts -- | Filters out unsafe tags and sanitizes attributes. Use with -- filterTags to create a custom filter. safeTags :: [Tag Text] -> [Tag Text] safeTags = safeTagsCustom safeTagName sanitizeAttribute -- | Filters out unsafe tags and sanitizes attributes, like -- 'safeTags', but uses custom functions for determining which tags -- are safe and for sanitizing attributes. This allows you to add or -- remove specific tags or attributes on the white list, or to use -- your own white list. -- -- @safeTagsCustom safeTagName sanitizeAttribute@ is equivalent to -- 'safeTags'. -- -- @since 0.3.6 safeTagsCustom :: (Text -> Bool) -- ^ Select safe tags, like -- 'safeTagName' -> ((Text, Text) -> Maybe (Text, Text)) -- ^ Sanitize attributes, -- like 'sanitizeAttribute' -> [Tag Text] -> [Tag Text] safeTagsCustom _ _ [] = [] safeTagsCustom safeName sanitizeAttr (t@(TagClose name):tags) | safeName name = t : safeTagsCustom safeName sanitizeAttr tags | otherwise = safeTagsCustom safeName sanitizeAttr tags safeTagsCustom safeName sanitizeAttr (TagOpen name attributes:tags) | safeName name = TagOpen name (mapMaybe sanitizeAttr attributes) : safeTagsCustom safeName sanitizeAttr tags | otherwise = safeTagsCustom safeName sanitizeAttr tags safeTagsCustom n a (t:tags) = t : safeTagsCustom n a tags -- | Directly removes tags even if they are not closed properly. -- This is importent to clear out both the script and iframe tag -- in sequences like "