{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} -- | Query strings generally have the following form: @"key1=value1&key2=value2"@ -- -- >>> renderQuery False [("key1", Just "value1"), ("key2", Just "value2")] -- "key1=value1&key2=value2" -- -- But if the value of @key1@ is 'Nothing', it becomes: @key1&key2=value2@ -- -- >>> renderQuery False [("key1", Nothing), ("key2", Just "value2")] -- "key1&key2=value2" -- -- This module also provides type synonyms and functions to handle queries -- that do not allow/expect keys without values ('SimpleQuery'), handle -- queries which have partially escaped characters module Network.HTTP.Types.URI ( -- * Query strings -- ** Query Query, QueryItem, renderQuery, renderQueryBuilder, parseQuery, parseQueryReplacePlus, -- *** Query (Text) QueryText, queryTextToQuery, queryToQueryText, renderQueryText, parseQueryText, -- ** SimpleQuery -- | If values are guaranteed, it might be easier working with 'SimpleQuery'. -- -- This way, you don't have to worry about any 'Maybe's, though when parsing -- a query string and there's no @\'=\'@ after the key in the query item, the -- value will just be an empty 'B.ByteString'. SimpleQuery, SimpleQueryItem, simpleQueryToQuery, renderSimpleQuery, parseSimpleQuery, -- ** PartialEscapeQuery -- | For some values in query items, certain characters must not be percent-encoded, -- for example @\'+\'@ or @\':\'@ in -- -- @q=a+language:haskell+created:2009-01-01..2009-02-01&sort=stars@ -- -- Using specific 'EscapeItem's provides a way to decide which parts of a query string value -- will be URL encoded and which won't. -- -- This is mandatory when searching for @\'+\'@ (@%2B@ being a percent-encoded @\'+\'@): -- -- @q=%2B+language:haskell@ PartialEscapeQuery, PartialEscapeQueryItem, EscapeItem (..), renderQueryPartialEscape, renderQueryBuilderPartialEscape, -- * Path -- ** Segments + Query String extractPath, encodePath, decodePath, -- ** Path Segments encodePathSegments, encodePathSegmentsRelative, decodePathSegments, -- * URL encoding / decoding urlEncode, urlEncodeBuilder, urlDecode, ) where import Control.Arrow (second, (***)) import Data.Bits (shiftL, (.|.)) import qualified Data.ByteString as B import qualified Data.ByteString.Builder as B import qualified Data.ByteString.Lazy as BL import Data.Char (ord) import Data.List (intersperse) import Data.Maybe (fromMaybe) #if __GLASGOW_HASKELL__ < 710 import Data.Monoid #endif import Data.Text (Text) import Data.Text.Encoding (decodeUtf8With, encodeUtf8) import Data.Text.Encoding.Error (lenientDecode) import Data.Word (Word8) -- | An item from the query string, split up into two parts. -- -- The second part should be 'Nothing' if there was no key-value -- separator after the query item name. -- -- @since 0.2.0 type QueryItem = (B.ByteString, Maybe B.ByteString) -- | A sequence of 'QueryItem's. type Query = [QueryItem] -- | Like Query, but with 'Text' instead of 'B.ByteString' (UTF8-encoded). -- -- @since 0.5.2 type QueryText = [(Text, Maybe Text)] -- | Convert 'QueryText' to 'Query'. -- -- @since 0.5.2 queryTextToQuery :: QueryText -> Query queryTextToQuery = map $ encodeUtf8 *** fmap encodeUtf8 -- | Convert 'QueryText' to a 'B.Builder'. -- -- If you want a question mark (@?@) added to the front of the result, use 'True'. -- -- @since 0.5.2 renderQueryText :: Bool -> QueryText -> B.Builder renderQueryText b = renderQueryBuilder b . queryTextToQuery -- | Convert 'Query' to 'QueryText' (leniently decoding the UTF-8). -- -- @since 0.5.2 queryToQueryText :: Query -> QueryText queryToQueryText = map $ go *** fmap go where go = decodeUtf8With lenientDecode -- | Parse a 'QueryText' from a 'B.ByteString'. See 'parseQuery' for details. -- -- @'queryToQueryText' . 'parseQuery'@ -- -- @since 0.5.2 parseQueryText :: B.ByteString -> QueryText parseQueryText = queryToQueryText . parseQuery -- | Simplified query item type without support for parameter-less items. -- -- @since 0.2.0 type SimpleQueryItem = (B.ByteString, B.ByteString) -- | A sequence of 'SimpleQueryItem's. type SimpleQuery = [SimpleQueryItem] -- | Convert 'SimpleQuery' to 'Query'. -- -- @since 0.5 simpleQueryToQuery :: SimpleQuery -> Query simpleQueryToQuery = map (second Just) -- | Renders the given 'Query' into a 'Builder'. -- -- If you want a question mark (@?@) added to the front of the result, use 'True'. -- -- @since 0.5 renderQueryBuilder :: Bool -> Query -> B.Builder renderQueryBuilder _ [] = mempty renderQueryBuilder qmark' (p : ps) = -- FIXME: replace mconcat + map with foldr mconcat $ go (if qmark' then qmark else mempty) p : map (go amp) ps where qmark = B.byteString "?" amp = B.byteString "&" equal = B.byteString "=" go sep (k, mv) = mconcat [ sep , urlEncodeBuilder True k , case mv of Nothing -> mempty Just v -> equal `mappend` urlEncodeBuilder True v ] -- | Renders the given 'Query' into a 'B.ByteString'. -- -- If you want a question mark (@?@) added to the front of the result, use 'True'. -- -- @since 0.2.0 renderQuery :: Bool -> Query -> B.ByteString renderQuery qm = BL.toStrict . B.toLazyByteString . renderQueryBuilder qm -- | Render the given 'SimpleQuery' into a 'ByteString'. -- -- If you want a question mark (@?@) added to the front of the result, use 'True'. -- -- @since 0.2.0 renderSimpleQuery :: Bool -> SimpleQuery -> B.ByteString renderSimpleQuery useQuestionMark = renderQuery useQuestionMark . simpleQueryToQuery -- | Split out the query string into a list of keys and values. A few -- importants points: -- -- * The result returned is still bytestrings, since we perform no character -- decoding here. Most likely, you will want to use UTF-8 decoding, but this is -- left to the user of the library. -- -- * Percent decoding errors are ignored. In particular, @"%Q"@ will be output as -- @"%Q"@. -- -- * It decodes @\'+\'@ characters to @\' \'@ -- -- @since 0.2.0 parseQuery :: B.ByteString -> Query parseQuery = parseQueryReplacePlus True -- | Same functionality as 'parseQuery', but with the option to decode @\'+\'@ characters to @\' \'@ -- or to preserve any @\'+\'@ encountered. -- -- If you want to replace any @\'+\'@ with a space, use 'True'. -- -- @since 0.12.2 parseQueryReplacePlus :: Bool -> B.ByteString -> Query parseQueryReplacePlus replacePlus bs = parseQueryString' $ dropQuestion bs where dropQuestion q = case B.uncons q of Just (63, q') -> q' _ -> q parseQueryString' q | B.null q = [] parseQueryString' q = let (x, xs) = breakDiscard queryStringSeparators q in parsePair x : parseQueryString' xs where parsePair x = let (k, v) = B.break (== 61) x -- equal sign v'' = case B.uncons v of Just (_, v') -> Just $ urlDecode replacePlus v' _ -> Nothing in (urlDecode replacePlus k, v'') queryStringSeparators :: B.ByteString queryStringSeparators = B.pack [38, 59] -- ampersand, semicolon -- | Break the second bytestring at the first occurrence of any bytes from -- the first bytestring, discarding that byte. breakDiscard :: B.ByteString -> B.ByteString -> (B.ByteString, B.ByteString) breakDiscard seps s = let (x, y) = B.break (`B.elem` seps) s in (x, B.drop 1 y) -- | Parse 'SimpleQuery' from a 'ByteString'. -- -- This uses 'parseQuery' under the hood, and will transform -- any 'Nothing' values into an empty 'B.ByteString'. -- -- @since 0.2.0 parseSimpleQuery :: B.ByteString -> SimpleQuery parseSimpleQuery = map (second $ fromMaybe B.empty) . parseQuery ord8 :: Char -> Word8 ord8 = fromIntegral . ord unreservedQS, unreservedPI :: [Word8] unreservedQS = map ord8 "-_.~" -- FIXME: According to RFC 3986, the following are also allowed in path segments: -- "!'()*;" -- -- https://www.rfc-editor.org/rfc/rfc3986#section-3.3 unreservedPI = map ord8 "-_.~:@&=+$," -- | Percent-encoding for URLs. -- -- This will substitute every byte with its percent-encoded equivalent unless: -- -- * The byte is alphanumeric. (i.e. one of @/[A-Za-z0-9]/@) -- -- * The byte is one of the 'Word8' listed in the first argument. urlEncodeBuilder' :: [Word8] -> B.ByteString -> B.Builder urlEncodeBuilder' extraUnreserved = mconcat . map encodeChar . B.unpack where encodeChar ch | unreserved ch = B.word8 ch | otherwise = h2 ch unreserved ch | ch >= 65 && ch <= 90 = True -- A-Z | ch >= 97 && ch <= 122 = True -- a-z | ch >= 48 && ch <= 57 = True -- 0-9 unreserved c = c `elem` extraUnreserved -- must be upper-case h2 v = B.word8 37 `mappend` B.word8 (h a) `mappend` B.word8 (h b) -- 37 = % where (a, b) = v `divMod` 16 h i | i < 10 = 48 + i -- zero (0) | otherwise = 65 + i - 10 -- 65: A -- | Percent-encoding for URLs. -- -- Like 'urlEncode', but only makes the 'B.Builder'. -- -- @since 0.5 urlEncodeBuilder :: Bool -> B.ByteString -> B.Builder urlEncodeBuilder True = urlEncodeBuilder' unreservedQS urlEncodeBuilder False = urlEncodeBuilder' unreservedPI -- | Percent-encoding for URLs. -- -- In short: -- -- * if you're encoding (parts of) a path element, use 'False'. -- -- * if you're encoding (parts of) a query string, use 'True'. -- -- === __In-depth explanation__ -- -- This will substitute every byte with its percent-encoded equivalent unless: -- -- * The byte is alphanumeric. (i.e. @A-Z@, @a-z@, or @0-9@) -- -- * The byte is either a dash @\'-\'@, an underscore @\'_\'@, a dot @\'.\'@, or a tilde @\'~\'@ -- -- * If 'False' is used, the following will also /not/ be percent-encoded: -- -- * colon @\':\'@, at sign @\'\@\'@, ampersand @\'&\'@, equals sign @\'=\'@, plus sign @\'+\'@, dollar sign @\'$\'@ or a comma @\',\'@ -- -- @since 0.2.0 urlEncode :: Bool -> B.ByteString -> B.ByteString urlEncode q = BL.toStrict . B.toLazyByteString . urlEncodeBuilder q -- | Percent-decoding. -- -- If you want to replace any @\'+\'@ with a space, use 'True'. -- -- @since 0.2.0 urlDecode :: Bool -> B.ByteString -> B.ByteString urlDecode replacePlus z = fst $ B.unfoldrN (B.length z) go z where go bs = case B.uncons bs of Nothing -> Nothing -- plus to space Just (43, ws) | replacePlus -> Just (32, ws) -- percent Just (37, ws) -> Just $ fromMaybe (37, ws) $ do (x, xs) <- B.uncons ws x' <- hexVal x (y, ys) <- B.uncons xs y' <- hexVal y Just (combine x' y', ys) Just (w, ws) -> Just (w, ws) hexVal w | 48 <= w && w <= 57 = Just $ w - 48 -- 0 - 9 | 65 <= w && w <= 70 = Just $ w - 55 -- A - F | 97 <= w && w <= 102 = Just $ w - 87 -- a - f | otherwise = Nothing combine :: Word8 -> Word8 -> Word8 combine a b = shiftL a 4 .|. b -- | Encodes a list of path segments into a valid URL fragment. -- -- This function takes the following three steps: -- -- * UTF-8 encodes the characters. -- -- * Prepends each segment with a slash. -- -- * Performs percent-encoding on all characters that are __not__: -- -- * alphanumeric (i.e. @A-Z@ and @a-z@) -- -- * digits (i.e. @0-9@) -- -- * a dash @\'-\'@, an underscore @\'_\'@, a dot @\'.\'@, or a tilde @\'~\'@ -- -- For example: -- -- >>> encodePathSegments ["foo", "bar1", "~baz"] -- "/foo/bar1/~baz" -- -- >>> encodePathSegments ["foo bar", "baz/bin"] -- "/foo%20bar/baz%2Fbin" -- -- >>> encodePathSegments ["שלום"] -- "/%D7%A9%D7%9C%D7%95%D7%9D" -- -- Huge thanks to /Jeremy Shaw/ who created the original implementation of this -- function in web-routes and did such thorough research to determine all -- correct escaping procedures. -- -- @since 0.5 encodePathSegments :: [Text] -> B.Builder encodePathSegments = foldr (\x -> mappend (B.byteString "/" `mappend` encodePathSegment x)) mempty -- | Like 'encodePathSegments', but without the initial slash. -- -- @since 0.6.10 encodePathSegmentsRelative :: [Text] -> B.Builder encodePathSegmentsRelative xs = mconcat $ intersperse (B.byteString "/") (map encodePathSegment xs) encodePathSegment :: Text -> B.Builder encodePathSegment = urlEncodeBuilder False . encodeUtf8 -- | Parse a list of path segments from a valid URL fragment. -- -- Will also decode any percent-encoded characters. -- -- @since 0.5 decodePathSegments :: B.ByteString -> [Text] decodePathSegments "" = [] decodePathSegments "/" = [] decodePathSegments a = go $ drop1Slash a where drop1Slash bs = case B.uncons bs of Just (47, bs') -> bs' -- 47 == / _ -> bs go bs = let (x, y) = B.break (== 47) bs in decodePathSegment x : if B.null y then [] else go $ B.drop 1 y decodePathSegment :: B.ByteString -> Text decodePathSegment = decodeUtf8With lenientDecode . urlDecode False -- | Extract whole path (path segments + query) from a -- [RFC 2616 Request-URI](http://tools.ietf.org/html/rfc2616#section-5.1.2). -- -- Though a more accurate description of this function's behaviour is that -- it removes the domain/origin if the string starts with an HTTP protocol. -- (i.e. @http://@ or @https://@) -- -- This function will not change anything when given any other 'B.ByteString'. -- (except return a root path @\"\/\"@ if given an empty string) -- -- >>> extractPath "/path" -- "/path" -- -- >>> extractPath "http://example.com:8080/path" -- "/path" -- -- >>> extractPath "http://example.com" -- "/" -- -- >>> extractPath "" -- "/" -- -- >>> extractPath "www.google.com/some/path" -- "www.google.com/some/path" -- -- @since 0.8.5 extractPath :: B.ByteString -> B.ByteString extractPath = ensureNonEmpty . extract where extract path | "http://" `B.isPrefixOf` path = (snd . breakOnSlash . B.drop 7) path | "https://" `B.isPrefixOf` path = (snd . breakOnSlash . B.drop 8) path | otherwise = path breakOnSlash = B.break (== 47) ensureNonEmpty "" = "/" ensureNonEmpty p = p -- | Encode a whole path (path segments + query). -- -- @since 0.5 encodePath :: [Text] -> Query -> B.Builder encodePath x [] = encodePathSegments x encodePath x y = encodePathSegments x `mappend` renderQueryBuilder True y -- | Decode a whole path (path segments + query). -- -- @since 0.5 decodePath :: B.ByteString -> ([Text], Query) decodePath b = let (x, y) = B.break (== 63) b -- question mark in (decodePathSegments x, parseQuery y) ----------------------------------------------------------------------------------------- -- | Section of a query item value that decides whether to use -- regular URL encoding (using @'urlEncode True'@) with 'QE', -- or to not encode /anything/ with 'QN'. -- -- @since 0.12.1 data EscapeItem = -- | will be URL encoded QE B.ByteString | -- | will NOT /at all/ be URL encoded QN B.ByteString deriving (Show, Eq, Ord) -- | Partially escaped query item. -- -- The key will always be encoded using @'urlEncode True'@, -- but the value will be encoded depending on which 'EscapeItem's are used. -- -- @since 0.12.1 type PartialEscapeQueryItem = (B.ByteString, [EscapeItem]) -- | Query with some characters that should not be escaped. -- -- General form: @a=b&c=d:e+f&g=h@ -- -- @since 0.12.1 type PartialEscapeQuery = [PartialEscapeQueryItem] -- | Convert 'PartialEscapeQuery' to 'ByteString'. -- -- If you want a question mark (@?@) added to the front of the result, use 'True'. -- -- >>> renderQueryPartialEscape True [("a", [QN "x:z + ", QE (encodeUtf8 "They said: \"שלום\"")])] -- "?a=x:z + They%20said%3A%20%22%D7%A9%D7%9C%D7%95%D7%9D%22" -- -- @since 0.12.1 renderQueryPartialEscape :: Bool -> PartialEscapeQuery -> B.ByteString renderQueryPartialEscape qm = BL.toStrict . B.toLazyByteString . renderQueryBuilderPartialEscape qm -- | Convert a 'PartialEscapeQuery' to a 'B.Builder'. -- -- If you want a question mark (@?@) added to the front of the result, use 'True'. -- -- @since 0.12.1 renderQueryBuilderPartialEscape :: Bool -> PartialEscapeQuery -> B.Builder renderQueryBuilderPartialEscape _ [] = mempty -- FIXME replace mconcat + map with foldr renderQueryBuilderPartialEscape qmark' (p : ps) = mconcat $ go (if qmark' then qmark else mempty) p : map (go amp) ps where qmark = B.byteString "?" amp = B.byteString "&" equal = B.byteString "=" go sep (k, mv) = mconcat [ sep , urlEncodeBuilder True k , case mv of [] -> mempty vs -> equal `mappend` mconcat (map encode vs) ] encode (QE v) = urlEncodeBuilder True v encode (QN v) = B.byteString v