module Snap.Internal.Parsing where
import Blaze.ByteString.Builder
import Control.Applicative
import Control.Arrow (first, second)
import Control.Monad
import Data.Attoparsec.Char8 hiding (Done, many)
import qualified Data.Attoparsec.Char8 as Atto
import Data.Attoparsec.FastSet (FastSet)
import qualified Data.Attoparsec.FastSet as FS
import Data.Bits
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as S
import Data.ByteString.Internal (c2w, w2c)
import qualified Data.ByteString.Lazy.Char8 as L
import Data.ByteString.Nums.Careless.Int (int)
import qualified Data.ByteString.Nums.Careless.Hex as Cvt
import qualified Data.CaseInsensitive as CI
import Data.CaseInsensitive (CI)
import Data.Char hiding (isDigit, isSpace)
import Data.DList (DList)
import qualified Data.DList as DL
import Data.Int
import Data.List (intersperse)
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Maybe
import Data.Monoid
import Data.Word
import Prelude hiding (head, take, takeWhile)
import Snap.Internal.Http.Types
fullyParse :: ByteString -> Parser a -> Either String a
fullyParse s p =
case r' of
(Fail _ _ e) -> Left e
(Partial _) -> Left "parse failed"
(Atto.Done _ x) -> Right x
where
r = parse p s
r' = feed r ""
parseNum :: Parser Int64
parseNum = liftM int $ Atto.takeWhile1 Atto.isDigit
sp, digit, letter :: Parser Char
sp = char ' '
digit = satisfy isDigit
letter = satisfy isAlpha
untilEOL :: Parser ByteString
untilEOL = takeWhile notend
where
notend c = not $ c == '\r' || c == '\n'
crlf :: Parser ByteString
crlf = string "\r\n"
generateFS :: (Word8 -> Bool) -> FastSet
generateFS f = FS.fromList $ filter f [0..255]
spaces :: Parser [Char]
spaces = many sp
pSpaces :: Parser ByteString
pSpaces = takeWhile isSpace
fieldChars :: Parser ByteString
fieldChars = takeWhile isFieldChar
where
isFieldChar = flip FS.memberChar fieldCharSet
fieldCharSet :: FastSet
fieldCharSet = generateFS f
where
f d = let c = (toEnum $ fromEnum d)
in (isDigit c) || (isAlpha c) || c == '-' || c == '_'
pHeaders :: Parser [(ByteString, ByteString)]
pHeaders = many header
where
header =
liftA2 (,)
fieldName
(char ':' *> spaces *> contents)
fieldName =
liftA2 S.cons letter fieldChars
contents =
liftA2 S.append
(untilEOL <* crlf)
(continuation <|> pure S.empty)
isLeadingWS w =
elem w wstab
wstab = " \t"
leadingWhiteSpace =
takeWhile1 isLeadingWS
continuation =
liftA2 S.cons
(leadingWhiteSpace *> pure ' ')
contents
pWord :: Parser ByteString
pWord = pQuotedString <|> (takeWhile (/= ';'))
pQuotedString :: Parser ByteString
pQuotedString = q *> quotedText <* q
where
quotedText = (S.concat . reverse) <$> f []
f soFar = do
t <- takeWhile qdtext
let soFar' = t:soFar
choice [ string "\\\"" *> f ("\"" : soFar')
, pure soFar' ]
q = char '\"'
qdtext = matchAll [ isRFCText, (/= '\"'), (/= '\\') ]
isRFCText :: Char -> Bool
isRFCText = not . isControl
matchAll :: [ Char -> Bool ] -> Char -> Bool
matchAll x c = and $ map ($ c) x
pAvPairs :: Parser [(ByteString, ByteString)]
pAvPairs = do
a <- pAvPair
b <- many (pSpaces *> char ';' *> pSpaces *> pAvPair)
return $ a:b
pAvPair :: Parser (ByteString, ByteString)
pAvPair = do
key <- pToken <* pSpaces
val <- liftM trim (option "" $ char '=' *> pSpaces *> pWord)
return (key, val)
pParameter :: Parser (ByteString, ByteString)
pParameter = do
key <- pToken <* pSpaces
val <- liftM trim (char '=' *> pSpaces *> pWord)
return (trim key, val)
trim :: ByteString -> ByteString
trim = snd . S.span isSpace . fst . S.spanEnd isSpace
pValueWithParameters :: Parser (ByteString, [(CI ByteString, ByteString)])
pValueWithParameters = do
value <- liftM trim (pSpaces *> takeWhile (/= ';'))
params <- many pParam
return (value, map (first CI.mk) params)
where
pParam = pSpaces *> char ';' *> pSpaces *> pParameter
pContentTypeWithParameters ::
Parser (ByteString, [(CI ByteString, ByteString)])
pContentTypeWithParameters = do
value <- liftM trim (pSpaces *> takeWhile (not . isSep))
params <- many (pSpaces *> satisfy isSep *> pSpaces *> pParameter)
return (value, map (first CI.mk) params)
where
isSep c = c == ';' || c == ','
pToken :: Parser ByteString
pToken = takeWhile isToken
isToken :: Char -> Bool
isToken c = FS.memberChar c tokenTable
tokenTable :: FastSet
tokenTable = generateFS (f . toEnum . fromEnum)
where
f = matchAll [ isAscii
, not . isControl
, not . isSpace
, not . flip elem [ '(', ')', '<', '>', '@', ',', ';'
, ':', '\\', '\"', '/', '[', ']'
, '?', '=', '{', '}' ]
]
parseToCompletion :: Parser a -> ByteString -> Maybe a
parseToCompletion p s = toResult $ finish r
where
r = parse p s
toResult (Atto.Done _ c) = Just c
toResult _ = Nothing
pUrlEscaped :: Parser ByteString
pUrlEscaped = do
sq <- nextChunk DL.empty
return $ S.concat $ DL.toList sq
where
nextChunk :: DList ByteString -> Parser (DList ByteString)
nextChunk s = (endOfInput *> pure s) <|> do
c <- anyChar
case c of
'+' -> plusSpace s
'%' -> percentEncoded s
_ -> unEncoded c s
percentEncoded :: DList ByteString -> Parser (DList ByteString)
percentEncoded l = do
hx <- take 2
when (S.length hx /= 2 || (not $ S.all isHexDigit hx)) $
fail "bad hex in url"
let code = w2c ((Cvt.hex hx) :: Word8)
nextChunk $ DL.snoc l (S.singleton code)
unEncoded :: Char -> DList ByteString -> Parser (DList ByteString)
unEncoded c l' = do
let l = DL.snoc l' (S.singleton c)
bs <- takeTill (flip elem "%+")
if S.null bs
then nextChunk l
else nextChunk $ DL.snoc l bs
plusSpace :: DList ByteString -> Parser (DList ByteString)
plusSpace l = nextChunk (DL.snoc l (S.singleton ' '))
urlDecode :: ByteString -> Maybe ByteString
urlDecode = parseToCompletion pUrlEscaped
urlEncode :: ByteString -> ByteString
urlEncode = toByteString . urlEncodeBuilder
urlEncodeBuilder :: ByteString -> Builder
urlEncodeBuilder = S.foldl' f mempty
where
f b c =
if c == ' '
then b `mappend` fromWord8 (c2w '+')
else if FS.memberChar c urlEncodeTable
then b `mappend` fromWord8 (c2w c)
else b `mappend` hexd c
urlEncodeTable :: FastSet
urlEncodeTable = generateFS f
where
f w = any ($ c) [ isAlphaNum
, flip elem ['$', '-', '.', '!', '*'
, '\'', '(', ')', ',' ]]
where
c = w2c w
hexd :: Char -> Builder
hexd c0 = fromWord8 (c2w '%') `mappend` fromWord8 hi `mappend` fromWord8 low
where
c = c2w c0
d = c2w . intToDigit
low = d $ fromEnum $ c .&. 0xf
hi = d $ fromEnum $ (c .&. 0xf0) `shiftR` 4
finish :: Atto.Result a -> Atto.Result a
finish (Atto.Partial f) = flip feed "" $ f ""
finish x = x
parseUrlEncoded :: ByteString -> Map ByteString [ByteString]
parseUrlEncoded s = foldr (\(k,v) m -> Map.insertWith' (++) k [v] m)
Map.empty
decoded
where
breakApart = (second (S.drop 1)) . S.break (== '=')
parts :: [(ByteString,ByteString)]
parts = map breakApart $
S.splitWith (\c -> c == '&' || c == ';') s
urldecode = parseToCompletion pUrlEscaped
decodeOne (a,b) = do
a' <- urldecode a
b' <- urldecode b
return (a',b')
decoded = catMaybes $ map decodeOne parts
buildUrlEncoded :: Map ByteString [ByteString] -> Builder
buildUrlEncoded m = mconcat builders
where
builders = intersperse (fromWord8 $ c2w '&') $
concatMap encodeVS $ Map.toList m
encodeVS (k,vs) = map (encodeOne k) vs
encodeOne k v = mconcat [ urlEncodeBuilder k
, fromWord8 $ c2w '='
, urlEncodeBuilder v ]
printUrlEncoded :: Map ByteString [ByteString] -> ByteString
printUrlEncoded = toByteString . buildUrlEncoded
pCookies :: Parser [Cookie]
pCookies = do
kvps <- pAvPairs
return $ map toCookie $ filter (not . S.isPrefixOf "$" . fst) kvps
where
toCookie (nm,val) = Cookie nm val Nothing Nothing Nothing False False
parseCookie :: ByteString -> Maybe [Cookie]
parseCookie = parseToCompletion pCookies
strictize :: L.ByteString -> ByteString
strictize = S.concat . L.toChunks