{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TupleSections #-} module URI.ByteString.Internal where ------------------------------------------------------------------------------- import Blaze.ByteString.Builder (Builder) import qualified Blaze.ByteString.Builder as BB import qualified Blaze.ByteString.Builder.Char.Utf8 as BB import Control.Applicative import Control.Monad import Data.Attoparsec.ByteString import qualified Data.Attoparsec.ByteString as A import Data.Bits import Data.ByteString (ByteString) import qualified Data.ByteString as BS import Data.Char (ord) import Data.Ix import Data.List (delete, intersperse, stripPrefix, (\\)) import Data.Maybe import Data.Monoid import Data.Word import Text.Read (readMaybe) ------------------------------------------------------------------------------- import URI.ByteString.Types ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- -- | Strict URI Parser config. Follows RFC3986 as-specified. Use this -- if you can be certain that your URIs are properly encoded or if you -- want parsing to fail if they deviate from the spec at all. strictURIParserOptions :: URIParserOptions strictURIParserOptions = URIParserOptions { upoValidQueryChar = validForQuery } ------------------------------------------------------------------------------- -- | Lax URI Parser config. Use this if you you want to handle common -- deviations from the spec gracefully. -- -- * Allows non-encoded [ and ] in query string laxURIParserOptions :: URIParserOptions laxURIParserOptions = URIParserOptions { upoValidQueryChar = validForQueryLax } ------------------------------------------------------------------------------- -- | URI Serializer ------------------------------------------------------------------------------- -- | Serialize a URI into a Builder. -- -- Example of serializing + converting to a lazy "Data.ByteString.Lazy.ByteString": -- -- >>> BB.toLazyByteString $ serializeURI $ URI {uriScheme = Scheme {schemeBS = "http"}, uriAuthority = Just (Authority {authorityUserInfo = Nothing, authorityHost = Host {hostBS = "www.example.org"}, authorityPort = Nothing}), uriPath = "/foo", uriQuery = Query {queryPairs = [("bar","baz")]}, uriFragment = Just "quux"} -- "http://www.example.org/foo?bar=baz#quux" serializeURI :: URI -> Builder serializeURI URI {..} = scheme <> BB.fromString ":" <> serializeRelativeRef rr where scheme = bs $ schemeBS uriScheme rr = RelativeRef uriAuthority uriPath uriQuery uriFragment -- | Like 'serializeURI', with conversion into a strict 'ByteString'. serializeURI' :: URI -> ByteString serializeURI' = BB.toByteString . serializeURI -- | Like 'serializeURI', but do not render scheme. serializeRelativeRef :: RelativeRef -> Builder serializeRelativeRef RelativeRef {..} = authority <> path <> query <> fragment where path = mconcat $ intersperse (c8 '/') $ map urlEncodePath segs segs = BS.split slash rrPath authority = maybe mempty serializeAuthority rrAuthority query = serializeQuery rrQuery fragment = maybe mempty (\s -> c8 '#' <> bs s) rrFragment -- | Like 'serializeRelativeRef', with conversion into a strict 'ByteString'. serializeRelativeRef' :: RelativeRef -> ByteString serializeRelativeRef' = BB.toByteString . serializeRelativeRef ------------------------------------------------------------------------------- serializeQuery :: Query -> Builder serializeQuery (Query []) = mempty serializeQuery (Query ps) = c8 '?' <> mconcat (intersperse (c8 '&') (map serializePair ps)) where serializePair (k, v) = urlEncodeQuery k <> c8 '=' <> urlEncodeQuery v ------------------------------------------------------------------------------- serializeAuthority :: Authority -> Builder serializeAuthority Authority {..} = BB.fromString "//" <> userinfo <> bs host <> port where userinfo = maybe mempty serializeUserInfo authorityUserInfo host = hostBS authorityHost port = maybe mempty packPort authorityPort packPort (Port p) = c8 ':' <> BB.fromString (show p) ------------------------------------------------------------------------------- serializeUserInfo :: UserInfo -> Builder serializeUserInfo UserInfo {..} = bs uiUsername <> c8 ':' <> bs uiPassword <> c8 '@' ------------------------------------------------------------------------------- bs :: ByteString -> Builder bs = BB.fromByteString ------------------------------------------------------------------------------- c8 :: Char -> Builder c8 = BB.fromChar ------------------------------------------------------------------------------- -- | Parse a strict ByteString into a URI or an error. -- -- Example: -- -- >>> parseURI strictURIParserOptions "http://www.example.org/foo?bar=baz#quux" -- Right (URI {uriScheme = Scheme {schemeBS = "http"}, uriAuthority = Just (Authority {authorityUserInfo = Nothing, authorityHost = Host {hostBS = "www.example.org"}, authorityPort = Nothing}), uriPath = "/foo", uriQuery = Query {queryPairs = [("bar","baz")]}, uriFragment = Just "quux"}) -- -- >>> parseURI strictURIParserOptions "$$$$://badurl.example.org" -- Left (MalformedScheme NonAlphaLeading) -- -- There are some urls that you'll encounter which defy the spec, such -- as those with square brackets in the query string. If you must be -- able to parse those, you can use "laxURIParserOptions" or specify your own -- -- >>> parseURI strictURIParserOptions "http://www.example.org/foo?bar[]=baz" -- Left MalformedQuery -- -- >>> parseURI laxURIParserOptions "http://www.example.org/foo?bar[]=baz" -- Right (URI {uriScheme = Scheme {schemeBS = "http"}, uriAuthority = Just (Authority {authorityUserInfo = Nothing, authorityHost = Host {hostBS = "www.example.org"}, authorityPort = Nothing}), uriPath = "/foo", uriQuery = Query {queryPairs = [("bar[]","baz")]}, uriFragment = Nothing}) -- -- >>> let myLaxOptions = URIParserOptions { upoValidQueryChar = liftA2 (||) (upoValidQueryChar strictURIParserOptions) (inClass "[]")} -- >>> parseURI myLaxOptions "http://www.example.org/foo?bar[]=baz" -- Right (URI {uriScheme = Scheme {schemeBS = "http"}, uriAuthority = Just (Authority {authorityUserInfo = Nothing, authorityHost = Host {hostBS = "www.example.org"}, authorityPort = Nothing}), uriPath = "/foo", uriQuery = Query {queryPairs = [("bar[]","baz")]}, uriFragment = Nothing}) parseURI :: URIParserOptions -> ByteString -> Either URIParseError URI parseURI opts = parseOnly' OtherError (uriParser' opts) -- | Like 'parseURI', but do not parse scheme. parseRelativeRef :: URIParserOptions -> ByteString -> Either URIParseError RelativeRef parseRelativeRef opts = parseOnly' OtherError (relativeRefParser' opts) ------------------------------------------------------------------------------- -- | Convenience alias for a parser that can return URIParseError type URIParser = Parser' URIParseError ------------------------------------------------------------------------------- -- | Underlying attoparsec parser. Useful for composing with your own parsers. uriParser :: URIParserOptions -> Parser URI uriParser = unParser' . uriParser' ------------------------------------------------------------------------------- -- | Toplevel parser for URIs uriParser' :: URIParserOptions -> URIParser URI uriParser' opts = do scheme <- schemeParser void $ word8 colon `orFailWith` MalformedScheme MissingColon RelativeRef authority path query fragment <- relativeRefParser' opts return $ URI scheme authority path query fragment ------------------------------------------------------------------------------- -- | Underlying attoparsec parser. Useful for composing with your own parsers. relativeRefParser :: URIParserOptions -> Parser RelativeRef relativeRefParser = unParser' . relativeRefParser' ------------------------------------------------------------------------------- -- | Toplevel parser for relative refs relativeRefParser' :: URIParserOptions -> URIParser RelativeRef relativeRefParser' opts = do (authority, path) <- hierPartParser <|> rrPathParser query <- queryParser opts frag <- mFragmentParser case frag of Just _ -> endOfInput `orFailWith` MalformedFragment Nothing -> endOfInput `orFailWith` MalformedQuery return $ RelativeRef authority path query frag ------------------------------------------------------------------------------- -- | Parser for scheme, e.g. "http", "https", etc. schemeParser :: URIParser Scheme schemeParser = do c <- satisfy isAlpha `orFailWith` MalformedScheme NonAlphaLeading rest <- A.takeWhile isSchemeValid `orFailWith` MalformedScheme InvalidChars return $ Scheme $ c `BS.cons` rest where isSchemeValid = inClass $ "-+." ++ alphaNum ------------------------------------------------------------------------------- -- | Hier part immediately follows the schema and encompasses the -- authority and path sections. hierPartParser :: URIParser (Maybe Authority, ByteString) hierPartParser = authWithPathParser <|> pathAbsoluteParser <|> pathRootlessParser <|> pathEmptyParser ------------------------------------------------------------------------------- -- | Relative references have awkward corner cases. See -- 'firstRelRefSegmentParser'. rrPathParser :: URIParser (Maybe Authority, ByteString) rrPathParser = (Nothing,) <$> ((<>) <$> firstRelRefSegmentParser <*> pathParser) ------------------------------------------------------------------------------- -- | See the "authority path-abempty" grammar in the RFC authWithPathParser :: URIParser (Maybe Authority, ByteString) authWithPathParser = string' "//" *> ((,) <$> mAuthorityParser <*> pathParser) ------------------------------------------------------------------------------- -- | See the "path-absolute" grammar in the RFC. Essentially a special -- case of rootless. pathAbsoluteParser :: URIParser (Maybe Authority, ByteString) pathAbsoluteParser = string' "/" *> pathRootlessParser ------------------------------------------------------------------------------- -- | See the "path-rootless" grammar in the RFC. pathRootlessParser :: URIParser (Maybe Authority, ByteString) pathRootlessParser = (,) <$> pure Nothing <*> pathParser1 ------------------------------------------------------------------------------- -- | See the "path-empty" grammar in the RFC. Must not be followed -- with a path-valid char. pathEmptyParser :: URIParser (Maybe Authority, ByteString) pathEmptyParser = do nextChar <- peekWord8 `orFailWith` OtherError "impossible peekWord8 error" case nextChar of Just c -> guard (notInClass pchar c) >> return emptyCase _ -> return emptyCase where emptyCase = (Nothing, mempty) ------------------------------------------------------------------------------- -- | Parser whe mAuthorityParser :: URIParser (Maybe Authority) mAuthorityParser = mParse authorityParser ------------------------------------------------------------------------------- -- | Parses the user info section of a URL (i.e. for HTTP Basic -- Authentication). Note that this will decode any percent-encoded -- data. userInfoParser :: URIParser UserInfo userInfoParser = (uiTokenParser <* word8 atSym) `orFailWith` MalformedUserInfo where atSym = 64 uiTokenParser = do ui <- A.takeWhile1 validForUserInfo let (user, passWithColon) = BS.break (== colon) $ urlDecode' ui let pass = BS.drop 1 passWithColon return $ UserInfo user pass validForUserInfo = inClass $ pctEncoded ++ subDelims ++ (':' : unreserved) ------------------------------------------------------------------------------- -- | Authority consists of host and port authorityParser :: URIParser Authority authorityParser = Authority <$> mParse userInfoParser <*> hostParser <*> mPortParser ------------------------------------------------------------------------------- -- | Parser that can handle IPV6/Future literals, IPV4, and domain names. hostParser :: URIParser Host hostParser = (Host <$> parsers) `orFailWith` MalformedHost where parsers = ipLiteralParser <|> ipV4Parser <|> regNameParser ipLiteralParser = word8 oBracket *> (ipVFutureParser <|> ipV6Parser) <* word8 cBracket ------------------------------------------------------------------------------- -- | Parses IPV6 addresses. See relevant section in RFC. ipV6Parser :: Parser ByteString ipV6Parser = do leading <- h16s elided <- maybe [] (const [""]) <$> optional (string "::") trailing <- many (A.takeWhile (/= colon) <* word8 colon) (finalChunkLen, final) <- finalChunk let len = length (leading ++ trailing) + finalChunkLen when (len > 8) $ fail "Too many digits in IPv6 address" return $ rejoin $ [rejoin leading] ++ elided ++ trailing ++ maybeToList final where finalChunk = fromMaybe (0, Nothing) <$> optional (finalIpV4 <|> finalH16) finalH16 = (1, ) . Just <$> h16 finalIpV4 = (2, ) . Just <$> ipV4Parser rejoin = BS.intercalate ":" h16s = h16 `sepBy` word8 colon h16 = mconcat <$> parseBetween 1 4 (A.takeWhile1 hexDigit) ------------------------------------------------------------------------------- -- | Parses IPVFuture addresses. See relevant section in RFC. ipVFutureParser :: Parser ByteString ipVFutureParser = do _ <- word8 lowercaseV ds <- A.takeWhile1 hexDigit _ <- word8 period rest <- A.takeWhile1 $ inClass $ subDelims ++ ":" ++ unreserved return $ "v" <> ds <> "." <> rest where lowercaseV = 118 ------------------------------------------------------------------------------- -- | Parses a valid IPV4 address ipV4Parser :: Parser ByteString ipV4Parser = mconcat <$> sequence [ decOctet , dot , decOctet , dot , decOctet , dot , decOctet] where decOctet = do s <- A.takeWhile1 isDigit let len = BS.length s guard $ len > 0 && len <= 3 let num = bsToNum s guard $ num >= 1 && num <= 255 return s dot = string "." ------------------------------------------------------------------------------- -- | This corresponds to the hostname, e.g. www.example.org regNameParser :: Parser ByteString regNameParser = urlDecode' <$> A.takeWhile1 (inClass validForRegName) where validForRegName = pctEncoded ++ subDelims ++ unreserved ------------------------------------------------------------------------------- -- | Only parse a port if the colon signifier is there. mPortParser :: URIParser (Maybe Port) mPortParser = word8' colon `thenJust` portParser ------------------------------------------------------------------------------- -- | Parses port number from the hostname. Colon separator must be -- handled elsewhere. portParser :: URIParser Port portParser = (Port . bsToNum <$> A.takeWhile1 isDigit) `orFailWith` MalformedPort ------------------------------------------------------------------------------- -- | Path with any number of segments pathParser :: URIParser ByteString pathParser = pathParser' A.many' ------------------------------------------------------------------------------- -- | Path with at least 1 segment pathParser1 :: URIParser ByteString pathParser1 = pathParser' A.many1' ------------------------------------------------------------------------------- -- | Parses the path section of a url. Note that while this can take -- percent-encoded characters, it does not itself decode them while parsing. pathParser' :: (Parser ByteString -> Parser [ByteString]) -> URIParser ByteString pathParser' repeatParser = (urlDecodeQuery . mconcat <$> repeatParser segmentParser) `orFailWith` MalformedPath where segmentParser = mconcat <$> sequence [string "/", A.takeWhile (inClass pchar)] ------------------------------------------------------------------------------- -- | Parses the first segment of a path section of a relative-path -- reference. See RFC 3986, Section 4.2. -- firstRelRefSegmentParser :: URIParser ByteString firstRelRefSegmentParser :: URIParser ByteString firstRelRefSegmentParser = A.takeWhile (inClass (pchar \\ ":")) `orFailWith` MalformedPath ------------------------------------------------------------------------------- -- | This parser is being a bit pragmatic. The query section in the -- spec does not identify the key/value format used in URIs, but that -- is what most users are expecting to see. One alternative could be -- to just expose the query string as a string and offer functions on -- URI to parse a query string to a Query. queryParser :: URIParserOptions -> URIParser Query queryParser opts = do mc <- peekWord8 `orFailWith` OtherError "impossible peekWord8 error" case mc of Just c | c == question -> skip' 1 *> itemsParser | c == hash -> pure mempty | otherwise -> fail' MalformedPath _ -> pure mempty where itemsParser = Query <$> A.sepBy' (queryItemParser opts) (word8' ampersand) ------------------------------------------------------------------------------- -- | When parsing a single query item string like "foo=bar", turns it -- into a key/value pair as per convention, with the value being -- optional. & separators need to be handled further up. queryItemParser :: URIParserOptions -> URIParser (ByteString, ByteString) queryItemParser opts = do s <- A.takeWhile1 (upoValidQueryChar opts) `orFailWith` MalformedQuery let (k, vWithEquals) = BS.break (== equals) s let v = BS.drop 1 vWithEquals return (urlDecodeQuery k, urlDecodeQuery v) ------------------------------------------------------------------------------- validForQuery :: Word8 -> Bool validForQuery = inClass ('?':'/':delete '&' pchar) ------------------------------------------------------------------------------- validForQueryLax :: Word8 -> Bool validForQueryLax = notInClass "&#" ------------------------------------------------------------------------------- -- | Only parses a fragment if the # signifiier is there mFragmentParser :: URIParser (Maybe ByteString) mFragmentParser = word8' hash `thenJust` fragmentParser ------------------------------------------------------------------------------- -- | The final piece of a uri, e.g. #fragment, minus the #. fragmentParser :: URIParser ByteString fragmentParser = A.takeWhile1 validFragmentWord `orFailWith` MalformedFragment where validFragmentWord = inClass ('?':'/':pchar) ------------------------------------------------------------------------------- -- | Grammar Components ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- hexDigit :: Word8 -> Bool hexDigit = inClass "0-9a-fA-F" ------------------------------------------------------------------------------- isAlpha :: Word8 -> Bool isAlpha = inClass alpha ------------------------------------------------------------------------------- isDigit :: Word8 -> Bool isDigit = inClass digit ------------------------------------------------------------------------------- pchar :: String pchar = pctEncoded ++ subDelims ++ ":@" ++ unreserved ------------------------------------------------------------------------------- -- Very important! When concatenating this to other strings to make larger -- character classes, you must put this at the end because the '-' character -- is treated as a range unless it's at the beginning or end. unreserved :: String unreserved = alphaNum ++ "~._-" ------------------------------------------------------------------------------- unreserved8 :: [Word8] unreserved8 = map ord8 unreserved ------------------------------------------------------------------------------- unreservedPath8 :: [Word8] unreservedPath8 = unreserved8 ++ map ord8 ":@&=+$," ------------------------------------------------------------------------------- ord8 :: Char -> Word8 ord8 = fromIntegral . ord ------------------------------------------------------------------------------- -- | pc-encoded technically is % HEXDIG HEXDIG but that's handled by -- the previous alphaNum constraint. May need to double back with a -- parser to ensure pct-encoded never exceeds 2 hexdigs after pctEncoded :: String pctEncoded = "%" ------------------------------------------------------------------------------- subDelims :: String subDelims = "!$&'()*+,;=" ------------------------------------------------------------------------------- alphaNum :: String alphaNum = alpha ++ digit ------------------------------------------------------------------------------- alpha :: String alpha = "a-zA-Z" ------------------------------------------------------------------------------- digit :: String digit = "0-9" ------------------------------------------------------------------------------- colon :: Word8 colon = 58 ------------------------------------------------------------------------------- oBracket :: Word8 oBracket = 91 ------------------------------------------------------------------------------- cBracket :: Word8 cBracket = 93 ------------------------------------------------------------------------------- equals :: Word8 equals = 61 ------------------------------------------------------------------------------- question :: Word8 question = 63 ------------------------------------------------------------------------------- ampersand :: Word8 ampersand = 38 ------------------------------------------------------------------------------- hash :: Word8 hash = 35 ------------------------------------------------------------------------------- period :: Word8 period = 46 ------------------------------------------------------------------------------- slash :: Word8 slash = 47 ------------------------------------------------------------------------------- -- | ByteString Utilities ------------------------------------------------------------------------------- -- FIXME: theres probably a much better way to do this ------------------------------------------------------------------------------- -- | Convert a bytestring into an int representation. Assumes the -- entire string is comprised of 0-9 digits. bsToNum :: ByteString -> Int bsToNum s = sum $ zipWith (*) (reverse ints) [10 ^ x | x <- [0..] :: [Int]] where w2i w = fromEnum $ w - 48 ints = map w2i . BS.unpack $ s ------------------------------------------------------------------------------- -- | Decoding specifically for the query string, which decodes + as -- space. Shorthand for @urlDecode True@ urlDecodeQuery :: ByteString -> ByteString urlDecodeQuery = urlDecode plusToSpace where plusToSpace = True ------------------------------------------------------------------------------- -- | Decode any part of the URL besides the query, which decodes + as -- space. urlDecode' :: ByteString -> ByteString urlDecode' = urlDecode plusToSpace where plusToSpace = False ------------------------------------------------------------------------------- -- | Parsing with Strongly-Typed Errors ------------------------------------------------------------------------------- -- | A parser with a specific error type. Attoparsec unfortunately -- throws all errors into strings, which cannot be handled well -- programmatically without doing something silly like parsing error -- messages. This wrapper attempts to concentrate these errors into -- one type. newtype Parser' e a = Parser' { unParser' :: Parser a} deriving ( Functor , Applicative , Alternative , Monad , MonadPlus , Monoid) ------------------------------------------------------------------------------- -- | Use with caution. Catch a parser failing and return Nothing. mParse :: Parser' e a -> Parser' e (Maybe a) mParse p = option Nothing (Just <$> p) ------------------------------------------------------------------------------- -- | If the first parser succeeds, discard the result and use the -- second parser (which may fail). If the first parser fails, return -- Nothing. This is used to check a benign precondition that indicates -- the presence of a parsible token, i.e. ? preceeding a query. thenJust :: Parser' e a -> Parser' e b -> Parser' e (Maybe b) thenJust p1 p2 = p1 *> (Just <$> p2) <|> pure Nothing ------------------------------------------------------------------------------- -- | Lift a word8 Parser into a strongly error typed parser. This will -- generate a "stringy" error message if it fails, so you should -- probably be prepared to exit with a nicer error further up. word8' :: Word8 -> Parser' e Word8 word8' = Parser' . word8 ------------------------------------------------------------------------------- -- | Skip exactly 1 character. Fails if the character isn't -- there. Generates a "stringy" error. skip' :: Int -> Parser' e () skip' = Parser' . void . A.take ------------------------------------------------------------------------------- -- | Lifted version of the string token parser. Same caveats about -- "stringy" errors apply. string' :: ByteString -> Parser' e ByteString string' = Parser' . string ------------------------------------------------------------------------------- -- | Combinator for tunnelling more specific error types through the -- attoparsec machinery using read/show. orFailWith :: (Show e, Read e) => Parser a -> e -> Parser' e a orFailWith p e = Parser' p <|> fail' e ------------------------------------------------------------------------------- -- | Should be preferred to fail' fail' :: (Show e, Read e) => e -> Parser' e a fail' = fail . show ------------------------------------------------------------------------------- parseBetween :: (Alternative m, Monad m) => Int -> Int -> m a -> m [a] parseBetween a b f = choice parsers where parsers = map (`count` f) $ reverse $ range (a, b) ------------------------------------------------------------------------------- -- | Stronger-typed variation of parseOnly'. Consumes all input. parseOnly' :: (Read e, Show e) => (String -> e) -- ^ Fallback if we can't parse a failure message for the sake of totality. -> Parser' e a -> ByteString -> Either e a parseOnly' noParse (Parser' p) = fmapL readWithFallback . parseOnly p where readWithFallback s = fromMaybe (noParse s) (readMaybe . stripAttoparsecGarbage $ s) ------------------------------------------------------------------------------- -- | Our pal Control.Monad.fail is how attoparsec propagates -- errors. If you throw an error string with fail (your only choice), -- it will *always* prepend it with "Failed reading: ". At least in -- this version. That may change to something else and break this workaround. stripAttoparsecGarbage :: String -> String stripAttoparsecGarbage = stripPrefix' "Failed reading: " ------------------------------------------------------------------------------- -- | stripPrefix where it is a noop if the prefix doesn't exist. stripPrefix' :: Eq a => [a] -> [a] -> [a] stripPrefix' pfx s = fromMaybe s $ stripPrefix pfx s ------------------------------------------------------------------------------- fmapL :: (a -> b) -> Either a r -> Either b r fmapL f = either (Left . f) Right ------------------------------------------------------------------------------- -- | This function was extracted from the @http-types@ package. The -- license can be found in licenses/http-types/LICENSE urlDecode :: Bool -- ^ Whether to decode '+' to ' ' -> BS.ByteString -> BS.ByteString urlDecode replacePlus z = fst $ BS.unfoldrN (BS.length z) go z where go bs' = case BS.uncons bs' of Nothing -> Nothing Just (43, ws) | replacePlus -> Just (32, ws) -- plus to space Just (37, ws) -> Just $ fromMaybe (37, ws) $ do -- percent (x, xs) <- BS.uncons ws x' <- hexVal x (y, ys) <- BS.uncons xs y' <- hexVal y Just (combine x' y', ys) Just (w, ws) -> Just (w, ws) hexVal w | 48 <= w && w <= 57 = Just $ w - 48 -- 0 - 9 | 65 <= w && w <= 70 = Just $ w - 55 -- A - F | 97 <= w && w <= 102 = Just $ w - 87 -- a - f | otherwise = Nothing combine :: Word8 -> Word8 -> Word8 combine a b = shiftL a 4 .|. b ------------------------------------------------------------------------------- --TODO: keep an eye on perf here. seems like a good use case for a DList. the word8 list could be a set/hashset -- | Percent-encoding for URLs. Specify a list of additional -- unreserved characters to permit. urlEncode :: [Word8] -> ByteString -> Builder urlEncode extraUnreserved = mconcat . map encodeChar . BS.unpack where encodeChar ch | unreserved' ch = BB.fromWord8 ch | otherwise = h2 ch unreserved' ch | ch >= 65 && ch <= 90 = True -- A-Z | ch >= 97 && ch <= 122 = True -- a-z | ch >= 48 && ch <= 57 = True -- 0-9 unreserved' c = c `elem` extraUnreserved h2 v = let (a, b) = v `divMod` 16 in bs $ BS.pack [37, h a, h b] -- percent (%) h i | i < 10 = 48 + i -- zero (0) | otherwise = 65 + i - 10 -- 65: A ------------------------------------------------------------------------------- -- | Encode a ByteString for use in the query section of a URL urlEncodeQuery :: ByteString -> Builder urlEncodeQuery = urlEncode unreserved8 ------------------------------------------------------------------------------- -- | Encode a ByteString for use in the path section of a URL urlEncodePath :: ByteString -> Builder urlEncodePath = urlEncode unreservedPath8