{-# LANGUAGE FlexibleContexts #-} --------------------------------------------------------- -- | -- Module : Web.Encodings -- Copyright : Michael Snoyman -- License : BSD3 -- -- Maintainer : Michael Snoyman -- Stability : Stable -- Portability : portable -- -- Various web encodings. -- --------------------------------------------------------- module Web.Encodings ( -- * Simple encodings. -- ** URL (percentage encoding) encodeUrl , decodeUrl -- ** HTML (entity encoding) , encodeHtml , decodeHtml -- ** JSON , encodeJson , decodeJson -- * HTTP level encoding. -- ** Query string- pairs of percentage encoding , encodeUrlPairs , decodeUrlPairs -- ** Post parameters , FileInfo (..) , parseMultipart , parsePost -- ** Cookies , decodeCookies -- * Date/time encoding , formatW3 ) where import Numeric (showHex) import Data.List (isPrefixOf) import Web.Encodings.MimeHeader import Data.Maybe (fromMaybe) import Data.Time.Clock import System.Locale import Data.Time.Format import Web.Encodings.StringLike (StringLike) import qualified Web.Encodings.StringLike as SL import Control.Failure import Safe import Data.Char (ord, isControl) -- | Encode all but unreserved characters with percentage encoding. -- -- Assumes use of UTF-8 character encoding. encodeUrl :: StringLike s => s -> s encodeUrl = SL.concatMap encodeUrlChar encodeUrlChar :: Char -> String encodeUrlChar c -- List of unreserved characters per RFC 3986 -- Gleaned from http://en.wikipedia.org/wiki/Percent-encoding | 'A' <= c && c <= 'Z' = [c] | 'a' <= c && c <= 'z' = [c] | '0' <= c && c <= '9' = [c] encodeUrlChar c@'-' = [c] encodeUrlChar c@'_' = [c] encodeUrlChar c@'.' = [c] encodeUrlChar c@'~' = [c] encodeUrlChar ' ' = "+" encodeUrlChar y = let (a, c) = fromEnum y `divMod` 16 b = a `mod` 16 showHex' x -- FIXME just use Numeric version? | x < 10 = toEnum $ x + (fromEnum '0') | x < 16 = toEnum $ x - 10 + (fromEnum 'A') | otherwise = error $ "Invalid argument to showHex: " ++ show x in ['%', showHex' b, showHex' c] -- | Decode percentage encoding. Assumes use of UTF-8 character encoding. decodeUrl :: StringLike s => s -> s decodeUrl s = fromMaybe s $ do (a, s') <- SL.uncons s case a of '%' -> do (b, s'') <- SL.uncons s' (c, s''') <- SL.uncons s'' return $ getHex b c `SL.cons` decodeUrl s''' '+' -> return $ ' ' `SL.cons` decodeUrl s' _ -> return $ a `SL.cons` decodeUrl s' getHex :: Char -> Char -> Char getHex x y = toEnum $ (fromHex x) * 16 + fromHex y fromHex :: Char -> Int fromHex = fromMaybe 0 . hexVal -- FIXME this fromMaybe is rather bad... -- | Escape special HTML characters. encodeHtml :: StringLike s => s -> s encodeHtml = SL.concatMap encodeHtmlChar encodeHtmlChar :: Char -> String encodeHtmlChar '<' = "<" encodeHtmlChar '>' = ">" encodeHtmlChar '&' = "&" encodeHtmlChar '"' = """ encodeHtmlChar '\'' = "'" encodeHtmlChar c = [c] -- | Decode HTML-encoded content into plain content. -- -- Note: this does not support all HTML entities available. It also swallows -- all failures. decodeHtml :: StringLike s => s -> s decodeHtml s = case SL.uncons s of Nothing -> SL.empty Just ('&', xs) -> fromMaybe ('&' `SL.cons` decodeHtml xs) $ do (before, after) <- SL.breakCharMaybe ';' xs c <- case SL.unpack before of -- this are small enough that unpack is ok "lt" -> return '<' "gt" -> return '>' "amp" -> return '&' "quot" -> return '"' '#' : 'x' : hex -> readHexChar hex '#' : 'X' : hex -> readHexChar hex '#' : dec -> readDecChar dec _ -> Nothing -- just to shut up a warning return $ c `SL.cons` decodeHtml after Just (x, xs) -> x `SL.cons` decodeHtml xs readHexChar :: String -> Maybe Char readHexChar s = helper 0 s where helper i "" = return $ toEnum i helper i (c:cs) = do c' <- hexVal c helper (i * 16 + c') cs hexVal :: Char -> Maybe Int hexVal c | '0' <= c && c <= '9' = Just $ ord c - ord '0' | 'A' <= c && c <= 'F' = Just $ ord c - ord 'A' + 10 | 'a' <= c && c <= 'f' = Just $ ord c - ord 'a' + 10 | otherwise = Nothing readDecChar :: String -> Maybe Char readDecChar s = do i <- readMay s :: Maybe Int return $ toEnum i -- | Convert into key-value pairs. Strips the leading ? if necesary. decodeUrlPairs :: StringLike s => s -> [(s, s)] decodeUrlPairs = map decodeUrlPair . SL.split '&' . SL.dropWhile (== '?') {- decodeUrlPairs s = unsafePerformIO $ do putStrLn $ "Received: " ++ show s let dropped = SL.dropWhile (== '?') s putStrLn $ "Dropped: " ++ show dropped let sp = SL.split '&' dropped putStrLn $ "Split: " ++ show sp let f = filter (not . SL.null) sp putStrLn $ "Filtered: " ++ show f let res = map decodeUrlPair f putStrLn $ "Result: " ++ show res return res -} decodeUrlPair :: StringLike s => s -> (s, s) decodeUrlPair b = let (x, y) = SL.breakChar '=' b in (decodeUrl x, decodeUrl y) -- | Convert a list of key-values pairs into a query string. -- Does not include the question mark at the beginning. encodeUrlPairs :: StringLike s => [(s, s)] -> s encodeUrlPairs = SL.intercalate (SL.pack "&") . map encodeUrlPair encodeUrlPair :: StringLike s => (s, s) -> s encodeUrlPair (x, y) = encodeUrl x `SL.append` ('=' `SL.cons` encodeUrl y) -- | Perform JSON-encoding on a string. Does not wrap in quotation marks. -- Taken from json package by Sigbjorn Finne. encodeJson :: StringLike s => s -> s encodeJson = SL.concatMap encodeJsonChar encodeJsonChar :: Char -> String encodeJsonChar '\b' = "\\b" encodeJsonChar '\f' = "\\f" encodeJsonChar '\n' = "\\n" encodeJsonChar '\r' = "\\r" encodeJsonChar '\t' = "\\t" encodeJsonChar '"' = "\\\"" encodeJsonChar '\\' = "\\\\" encodeJsonChar c | not $ isControl c = [c] | c < '\x10' = '\\' : 'u' : '0' : '0' : '0' : hexxs | c < '\x100' = '\\' : 'u' : '0' : '0' : hexxs | c < '\x1000' = '\\' : 'u' : '0' : hexxs where hexxs = showHex (fromEnum c) "" -- FIXME encodeJsonChar c = [c] decodeJson :: StringLike s => s -> s decodeJson s = case SL.uncons s of Nothing -> SL.empty Just ('\\', xs) -> fromMaybe ('\\' `SL.cons` decodeJson xs) $ do (x, xs') <- SL.uncons xs if x == 'u' then do (a, e) <- SL.uncons xs' (b, f) <- SL.uncons e (c, g) <- SL.uncons f (d, h) <- SL.uncons g res <- readHexChar [a, b, c, d] return $ res `SL.cons` decodeJson h else do c <- case x of 'b' -> return '\b' 'f' -> return '\f' 'n' -> return '\n' 'r' -> return '\r' 't' -> return '\t' '"' -> return '"' '\'' -> return '\'' '\\' -> return '\\' _ -> Nothing return $ c `SL.cons` decodeJson xs' Just (x, xs) -> x `SL.cons` decodeJson xs -- | Information on an uploaded file. data FileInfo s c = FileInfo { fileName :: s , fileContentType :: s , fileContent :: c } instance Show s => Show (FileInfo s a) where show (FileInfo fn ct _) = "FileInfo: " ++ show fn ++ " (" ++ show ct ++ ")" -- | Parse a multipart form into parameters and files. parseMultipart :: StringLike s => String -- ^ boundary -> s -- ^ content -> ([(s, s)], [(s, FileInfo s s)]) parseMultipart boundary content = let pieces = getPieces boundary content getJusts [] = [] getJusts (Nothing:rest) = getJusts rest getJusts ((Just x):rest) = x : getJusts rest getLefts [] = [] getLefts (Left x:rest) = x : getLefts rest getLefts (Right _:rest) = getLefts rest getRights [] = [] getRights (Left _:rest) = getRights rest getRights (Right x:rest) = x : getRights rest pieces' = getJusts $ map parsePiece pieces in (getLefts pieces', getRights pieces') -- | Parse a single segment of a multipart/form-data POST. parsePiece :: (StringLike s, MonadFailure (AttributeNotFound s) m) => s -> m (Either (s, s) (s, FileInfo s s)) parsePiece b = do let (headers', content) = SL.takeUntilBlank b headers = map parseHeader headers' name <- lookupHeaderAttr (SL.pack "Content-Disposition") (SL.pack "name") headers let filename = lookupHeaderAttr (SL.pack "Content-Disposition") (SL.pack "filename") headers let ctype = fromMaybe SL.empty $ lookupHeader (SL.pack "Content-Type") headers -- charset = lookupHeaderAttr "Content-Type" "charset" headers return $ case filename of Nothing -> Left (name, SL.chomp content) Just f -> Right (name, FileInfo f ctype content) -- | Split up a bytestring along the given boundary. getPieces :: StringLike s => String -- ^ boundary -> s -- ^ content -> [s] {- FIXME this would be nice... getPieces b c = let fullBound = ord '-' `BS.cons'` (ord '-' `BS.cons'` b) pieces = fullBound `BS.split` c in filter (/= toLazyByteString "--") $ filter (not . BS.null) $ map chompBS pieces -} getPieces _ c | SL.null c = [] getPieces b c = let fullBound = SL.pack $ '-' `SL.cons` ('-' `SL.cons` b) (next, rest) = SL.breakString fullBound c rest' = checkRest rest rest'' = getPieces b rest' in if SL.null next then rest'' else SL.chomp next : rest'' where br = '\r' bn = '\n' dash = '-' checkRest bs | SL.lengthLT 2 bs = SL.empty | SL.head bs == bn = SL.tail bs | SL.head bs == br && SL.head (SL.tail bs) == bn = SL.tail $ SL.tail bs | SL.head bs == dash && SL.head (SL.tail bs) == dash = SL.empty | otherwise = SL.empty -- FIXME -- | Parse a post request. This function determines the correct decoding -- function to use. parsePost :: StringLike s => String -- ^ content type -> String -- ^ content length -> s -- ^ body of the post -> ([(s, s)], [(s, FileInfo s s)]) parsePost ctype clength body | urlenc `SL.isPrefixOf` ctype = (decodeUrlPairs content, []) | formBound `isPrefixOf` ctype = parseMultipart boundProcessed content | otherwise = ([], []) where len = case reads clength of ((x, _):_) -> x [] -> 0 content = SL.take len body urlenc = "application/x-www-form-urlencoded" formBound = "multipart/form-data; boundary=" boundProcessed = drop (length formBound) ctype -- | Decode the value of an HTTP_COOKIE header into key/value pairs. decodeCookies :: String -> [(String, String)] decodeCookies [] = [] decodeCookies s = let (first, rest) = break (== ';') s in decodeCookie first : decodeCookies (dropWhile (== ';') rest) decodeCookie :: String -> (String, String) decodeCookie s = let (key, value) = break (== '=') s key' = dropWhile (== ' ') key value' = case value of ('=':rest) -> rest x -> x in (key', value') -- | Format a 'UTCTime' in W3 format; useful for setting cookies. formatW3 :: UTCTime -> String formatW3 = formatTime defaultTimeLocale "%FT%X-00:00"