{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE DeriveDataTypeable #-} --------------------------------------------------------- -- | -- Module : Web.Encodings -- Copyright : Michael Snoyman -- License : BSD3 -- -- Maintainer : Michael Snoyman -- Stability : Stable -- Portability : portable -- -- Various web encodings. -- --------------------------------------------------------- module Web.Encodings ( -- * Simple encodings. -- ** URL (percentage encoding) encodeUrl , decodeUrl , decodeUrlFailure , DecodeUrlException (..) -- ** HTML (entity encoding) , encodeHtml , decodeHtml -- ** JSON , encodeJson , decodeJson -- * HTTP level encoding. -- ** Query string- pairs of percentage encoding , encodeUrlPairs , decodeUrlPairs , decodeUrlPairsFailure -- ** Post parameters , FileInfo (..) , parseMultipart , parsePost -- ** Specific HTTP headers , decodeCookies , parseCookies , parseHttpAccept -- * Date/time encoding , formatW3 ) where import Numeric (showHex) import Data.List (isPrefixOf, sortBy) import Web.Encodings.MimeHeader import Data.Maybe (fromMaybe) import Data.Time.Clock import System.Locale import Data.Time.Format import Web.Encodings.StringLike (StringLike) import qualified Web.Encodings.StringLike as SL import Control.Failure import Safe import Data.Char (ord, isControl) import Data.Function (on) import Data.Typeable (Typeable) import Control.Exception (Exception) import qualified Data.ByteString as BS -- | Encode all but unreserved characters with percentage encoding. -- -- This function implicitly converts the given value to a UTF-8 bytestream. encodeUrl :: StringLike a => a -> a encodeUrl = SL.concatMapUtf8 encodeUrlChar encodeUrlChar :: Char -> String encodeUrlChar c -- List of unreserved characters per RFC 3986 -- Gleaned from http://en.wikipedia.org/wiki/Percent-encoding | 'A' <= c && c <= 'Z' = [c] | 'a' <= c && c <= 'z' = [c] | '0' <= c && c <= '9' = [c] encodeUrlChar c@'-' = [c] encodeUrlChar c@'_' = [c] encodeUrlChar c@'.' = [c] encodeUrlChar c@'~' = [c] encodeUrlChar ' ' = "+" encodeUrlChar y = let (a, c) = fromEnum y `divMod` 16 b = a `mod` 16 showHex' x -- FIXME just use Numeric version? | x < 10 = toEnum $ x + (fromEnum '0') | x < 16 = toEnum $ x - 10 + (fromEnum 'A') | otherwise = error $ "Invalid argument to showHex: " ++ show x in ['%', showHex' b, showHex' c] -- | Decode percentage encoding. Assumes use of UTF-8 character encoding. -- -- If there are any parse errors, this returns the original input. If you would -- like to be alerted more directly of errors, use 'decodeUrlFailure'. decodeUrl :: StringLike s => s -> s decodeUrl s = fromMaybe s $ decodeUrlFailure s data DecodeUrlException = InvalidPercentEncoding | InvalidUtf8Encoding deriving (Show, Typeable) instance Exception DecodeUrlException -- | Same as 'decodeUrl', but 'failure's on either invalid percent or UTF8 -- encoding. decodeUrlFailure :: (MonadFailure DecodeUrlException m, StringLike s) => s -> m s decodeUrlFailure s = do bs <- decodeUrlFailure' s case SL.unpackUtf8 bs of Nothing -> failure InvalidUtf8Encoding Just x -> return x decodeUrlFailure' :: (MonadFailure DecodeUrlException m, StringLike s) => s -> m BS.ByteString decodeUrlFailure' s = do case SL.uncons s of Nothing -> return SL.empty Just (a, s') -> case a of '%' -> do case SL.uncons s' of Nothing -> failure InvalidPercentEncoding Just (b, s'') -> case SL.uncons s'' of Nothing -> failure InvalidPercentEncoding Just (c, s''') -> case getHex b c of Nothing -> failure InvalidPercentEncoding Just h -> do s'''' <- decodeUrlFailure' s''' return $ h `SL.cons` s'''' '+' -> do s'' <- decodeUrlFailure' s' return $ ' ' `SL.cons` s'' _ -> do s'' <- decodeUrlFailure' s' return $ a `SL.cons` s'' getHex :: Char -> Char -> Maybe Char getHex x y = do x' <- hexVal x y' <- hexVal y return $ toEnum $ x' * 16 + y' -- | Escape special HTML characters. encodeHtml :: StringLike s => s -> s encodeHtml = SL.concatMap encodeHtmlChar encodeHtmlChar :: Char -> String encodeHtmlChar '<' = "<" encodeHtmlChar '>' = ">" encodeHtmlChar '&' = "&" encodeHtmlChar '"' = """ encodeHtmlChar '\'' = "'" encodeHtmlChar c = [c] -- | Decode HTML-encoded content into plain content. -- -- Note: this does not support all HTML entities available. It also swallows -- all failures. decodeHtml :: StringLike s => s -> s decodeHtml s = case SL.uncons s of Nothing -> SL.empty Just ('&', xs) -> fromMaybe ('&' `SL.cons` decodeHtml xs) $ do (before, after) <- SL.breakCharMaybe ';' xs c <- case SL.unpack before of -- this are small enough that unpack is ok "lt" -> return '<' "gt" -> return '>' "amp" -> return '&' "quot" -> return '"' '#' : 'x' : hex -> readHexChar hex '#' : 'X' : hex -> readHexChar hex '#' : dec -> readDecChar dec _ -> Nothing -- just to shut up a warning return $ c `SL.cons` decodeHtml after Just (x, xs) -> x `SL.cons` decodeHtml xs readHexChar :: String -> Maybe Char readHexChar s = helper 0 s where helper i "" = return $ toEnum i helper i (c:cs) = do c' <- hexVal c helper (i * 16 + c') cs hexVal :: Char -> Maybe Int hexVal c | '0' <= c && c <= '9' = Just $ ord c - ord '0' | 'A' <= c && c <= 'F' = Just $ ord c - ord 'A' + 10 | 'a' <= c && c <= 'f' = Just $ ord c - ord 'a' + 10 | otherwise = Nothing readDecChar :: String -> Maybe Char readDecChar s = do i <- readMay s :: Maybe Int return $ toEnum i -- | Convert into key-value pairs. Strips the leading ? if necesary. decodeUrlPairs :: StringLike s => s -> [(s, s)] decodeUrlPairs = map decodeUrlPair . SL.split '&' . SL.dropWhile (== '?') -- | Convert into key-value pairs. Strips the leading ? if necesary. 'failure's -- as necesary for invalid encodings. decodeUrlPairsFailure :: (StringLike s, MonadFailure DecodeUrlException m) => s -> m [(s, s)] decodeUrlPairsFailure = mapM decodeUrlPairFailure . SL.split '&' . SL.dropWhile (== '?') decodeUrlPairFailure :: (StringLike s, MonadFailure DecodeUrlException m) => s -> m (s, s) decodeUrlPairFailure b = do let (x, y) = SL.breakChar '=' b x' <- decodeUrlFailure x y' <- decodeUrlFailure y return (x', y') decodeUrlPair :: StringLike s => s -> (s, s) decodeUrlPair b = let (x, y) = SL.breakChar '=' b in (decodeUrl x, decodeUrl y) -- | Convert a list of key-values pairs into a query string. -- Does not include the question mark at the beginning. encodeUrlPairs :: StringLike s => [(s, s)] -> s encodeUrlPairs = SL.intercalate (SL.pack "&") . map encodeUrlPair encodeUrlPair :: StringLike s => (s, s) -> s encodeUrlPair (x, y) = encodeUrl x `SL.append` ('=' `SL.cons` encodeUrl y) -- | Perform JSON-encoding on a string. Does not wrap in quotation marks. -- Taken from json package by Sigbjorn Finne. encodeJson :: StringLike s => s -> s encodeJson = SL.concatMap encodeJsonChar encodeJsonChar :: Char -> String encodeJsonChar '\b' = "\\b" encodeJsonChar '\f' = "\\f" encodeJsonChar '\n' = "\\n" encodeJsonChar '\r' = "\\r" encodeJsonChar '\t' = "\\t" encodeJsonChar '"' = "\\\"" encodeJsonChar '\\' = "\\\\" encodeJsonChar c | not $ isControl c = [c] | c < '\x10' = '\\' : 'u' : '0' : '0' : '0' : hexxs | c < '\x100' = '\\' : 'u' : '0' : '0' : hexxs | c < '\x1000' = '\\' : 'u' : '0' : hexxs where hexxs = showHex (fromEnum c) "" -- FIXME encodeJsonChar c = [c] decodeJson :: StringLike s => s -> s decodeJson s = case SL.uncons s of Nothing -> SL.empty Just ('\\', xs) -> fromMaybe ('\\' `SL.cons` decodeJson xs) $ do (x, xs') <- SL.uncons xs if x == 'u' then do (a, e) <- SL.uncons xs' (b, f) <- SL.uncons e (c, g) <- SL.uncons f (d, h) <- SL.uncons g res <- readHexChar [a, b, c, d] return $ res `SL.cons` decodeJson h else do c <- case x of 'b' -> return '\b' 'f' -> return '\f' 'n' -> return '\n' 'r' -> return '\r' 't' -> return '\t' '"' -> return '"' '\'' -> return '\'' '\\' -> return '\\' _ -> Nothing return $ c `SL.cons` decodeJson xs' Just (x, xs) -> x `SL.cons` decodeJson xs -- | Information on an uploaded file. data FileInfo s c = FileInfo { fileName :: s , fileContentType :: s , fileContent :: c } instance Show s => Show (FileInfo s a) where show (FileInfo fn ct _) = "FileInfo: " ++ show fn ++ " (" ++ show ct ++ ")" -- | Parse a multipart form into parameters and files. parseMultipart :: StringLike s => String -- ^ boundary -> s -- ^ content -> ([(s, s)], [(s, FileInfo s s)]) parseMultipart boundary content = let pieces = getPieces boundary content getJusts [] = [] getJusts (Nothing:rest) = getJusts rest getJusts ((Just x):rest) = x : getJusts rest getLefts [] = [] getLefts (Left x:rest) = x : getLefts rest getLefts (Right _:rest) = getLefts rest getRights [] = [] getRights (Left _:rest) = getRights rest getRights (Right x:rest) = x : getRights rest pieces' = getJusts $ map parsePiece pieces in (getLefts pieces', getRights pieces') -- | Parse a single segment of a multipart/form-data POST. parsePiece :: (StringLike s, MonadFailure (AttributeNotFound s) m) => s -> m (Either (s, s) (s, FileInfo s s)) parsePiece b = do let (headers', content) = SL.takeUntilBlank b headers = map parseHeader headers' name <- lookupHeaderAttr (SL.pack "Content-Disposition") (SL.pack "name") headers let filename = lookupHeaderAttr (SL.pack "Content-Disposition") (SL.pack "filename") headers let ctype = fromMaybe SL.empty $ lookupHeader (SL.pack "Content-Type") headers -- charset = lookupHeaderAttr "Content-Type" "charset" headers return $ case filename of Nothing -> Left (name, SL.chomp content) Just f -> Right (name, FileInfo f ctype content) -- | Split up a bytestring along the given boundary. getPieces :: StringLike s => String -- ^ boundary -> s -- ^ content -> [s] {- FIXME this would be nice... getPieces b c = let fullBound = ord '-' `BS.cons'` (ord '-' `BS.cons'` b) pieces = fullBound `BS.split` c in filter (/= toLazyByteString "--") $ filter (not . BS.null) $ map chompBS pieces -} getPieces _ c | SL.null c = [] getPieces b c = let fullBound = SL.pack $ '-' `SL.cons` ('-' `SL.cons` b) (next, rest) = SL.breakString fullBound c rest' = checkRest rest rest'' = getPieces b rest' in if SL.null next then rest'' else SL.chomp next : rest'' where br = '\r' bn = '\n' dash = '-' checkRest bs | SL.lengthLT 2 bs = SL.empty | SL.head bs == bn = SL.tail bs | SL.head bs == br && SL.head (SL.tail bs) == bn = SL.tail $ SL.tail bs | SL.head bs == dash && SL.head (SL.tail bs) == dash = SL.empty | otherwise = SL.empty -- FIXME -- | Parse a post request. This function determines the correct decoding -- function to use. parsePost :: StringLike s => String -- ^ content type -> String -- ^ content length -> s -- ^ body of the post -> ([(s, s)], [(s, FileInfo s s)]) parsePost ctype clength body | urlenc `SL.isPrefixOf` ctype = (decodeUrlPairs content, []) | formBound `isPrefixOf` ctype = parseMultipart boundProcessed content | otherwise = ([], []) where len = case reads clength of ((x, _):_) -> x [] -> 0 content = SL.take len body urlenc = "application/x-www-form-urlencoded" formBound = "multipart/form-data; boundary=" boundProcessed = drop (length formBound) ctype {-# DEPRECATED decodeCookies "Please use parseCookies instead" #-} -- | Deprecate alias for 'parseCookies'. decodeCookies :: StringLike s => s -> [(s, s)] decodeCookies = parseCookies -- | Decode the value of an HTTP_COOKIE header into key/value pairs. parseCookies :: StringLike s => s -> [(s, s)] parseCookies s | SL.null s = [] | otherwise = let (first, rest) = SL.break (== ';') s in parseCookie first : parseCookies (SL.dropWhile (== ';') rest) parseCookie :: StringLike s => s -> (s, s) parseCookie s = let (key, value) = SL.break (== '=') s key' = SL.dropWhile (== ' ') key value' = case SL.uncons value of Just ('=', rest) -> rest _ -> value in (key', value') -- | Parse the HTTP accept string to determine supported content types. parseHttpAccept :: StringLike s => s -> [s] parseHttpAccept = map fst . sortBy (rcompare `on` snd) . map grabQ . SL.split ',' rcompare :: Ord a => a -> a -> Ordering rcompare x y = case compare x y of LT -> GT GT -> LT EQ -> EQ grabQ :: StringLike s => s -> (s, Double) grabQ s = let (s', q) = SL.breakChar ';' s (_, q') = SL.breakChar '=' q in (trimWhite s', readQ $ trimWhite q') readQ :: StringLike s => s -> Double readQ = Safe.readDef 1.0 . SL.unpack trimWhite :: StringLike s => s -> s trimWhite = SL.dropWhile (== ' ') -- | Format a 'UTCTime' in W3 format; useful for setting cookies. formatW3 :: UTCTime -> String formatW3 = formatTime defaultTimeLocale "%FT%X-00:00"