-- -- HTTP client for use with io-streams -- -- Copyright © 2012-2021 Athae Eredh Siniath and Others -- -- The code in this file, and the program it is a part of, is -- made available to you by its authors as open source software: -- you can redistribute it and/or modify it under the terms of -- the BSD licence. -- -- Significant portions of this file were written while studying -- the HTTP request parser implementation in the Snap Framework; -- snap-core's src/Snap/Internal/Parsing.hs and snap-server's -- src/Snap/Internal/Http/Parser.hs, and various utility functions -- have been cloned from there. -- {-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE OverloadedStrings #-} {-# OPTIONS_HADDOCK hide, not-home #-} module Network.Http.ResponseParser ( readResponseHeader, readResponseBody, UnexpectedCompression (..), -- for testing readDecimal, ) where import Prelude hiding (take, takeWhile) import Control.Exception (Exception, throwIO) import Control.Monad (void) import Control.Monad.IO.Class (liftIO) import Data.Attoparsec.ByteString.Char8 import Data.Bits (Bits (..)) import Data.ByteString (ByteString) import qualified Data.ByteString.Char8 as S import Data.CaseInsensitive (mk) import Data.Char (ord) import Data.Int (Int64) import Data.Typeable (Typeable) import System.IO.Streams (Generator, InputStream) import qualified System.IO.Streams as Streams import qualified System.IO.Streams.Attoparsec as Streams #if !MIN_VERSION_base(4,8,0) import Control.Applicative #endif import Network.Http.Internal import Network.Http.Utilities {- The chunk size coming down from the server is somewhat arbitrary; it's really just an indication of how many bytes need to be read before the next size marker or end marker - neither of which has anything to do with streaming on our side. Instead, we'll feed bytes into our InputStream at an appropriate intermediate size. -} __BITE_SIZE__ :: Int __BITE_SIZE__ = 32 * 1024 {- Process the reply from the server up to the end of the headers as deliniated by a blank line. -} readResponseHeader :: InputStream ByteString -> IO Response readResponseHeader i = do (sc, sm) <- Streams.parseFromStream parseStatusLine i hs <- readHeaderFields i let h = buildHeaders hs let te = case lookupHeader h "Transfer-Encoding" of Just x' -> if mk x' == "chunked" then Chunked else None Nothing -> None let ce = case lookupHeader h "Content-Encoding" of Just x' -> if mk x' == "gzip" then Gzip else Identity Nothing -> Identity let nm = case lookupHeader h "Content-Length" of Just x' -> Just (readDecimal x' :: Int64) Nothing -> case sc of 204 -> Just 0 304 -> Just 0 100 -> Just 0 _ -> Nothing return Response { pStatusCode = sc , pStatusMsg = sm , pTransferEncoding = te , pContentEncoding = ce , pContentLength = nm , pHeaders = h } parseStatusLine :: Parser (Int, ByteString) parseStatusLine = do sc <- string "HTTP/1." *> satisfy version *> char ' ' *> decimal <* char ' ' sm <- takeTill (== '\r') <* crlf return (sc, sm) where version c = c == '1' || c == '0' crlf :: Parser ByteString crlf = string "\r\n" --------------------------------------------------------------------- {- Switch on the encoding and compression headers, wrapping the raw InputStream to present the entity body's actual bytes. -} readResponseBody :: Response -> InputStream ByteString -> IO (InputStream ByteString) readResponseBody p i1 = do i2 <- case t of None -> case l of Just n -> readFixedLengthBody i1 n Nothing -> readUnlimitedBody i1 Chunked -> readChunkedBody i1 i3 <- case c of Identity -> return i2 Gzip -> readCompressedBody i2 Deflate -> throwIO (UnexpectedCompression $ show c) return i3 where t = pTransferEncoding p c = pContentEncoding p l = pContentLength p readDecimal :: (Enum α, Num α, Bits α) => ByteString -> α readDecimal str' = S.foldl' f 0 x' where f !cnt !i = cnt * 10 + digitToInt i x' = head $ S.words str' {-# INLINE digitToInt #-} digitToInt :: (Enum α, Num α, Bits α) => Char -> α digitToInt c | c >= '0' && c <= '9' = toEnum $! ord c - ord '0' | otherwise = error $ "'" ++ [c] ++ "' is not an ascii digit" {-# INLINE readDecimal #-} data UnexpectedCompression = UnexpectedCompression String deriving (Typeable, Show) instance Exception UnexpectedCompression --------------------------------------------------------------------- {- Process a response body in chunked transfer encoding, taking the resultant bytes and reproducing them as an InputStream -} readChunkedBody :: InputStream ByteString -> IO (InputStream ByteString) readChunkedBody i1 = do i2 <- Streams.fromGenerator (consumeChunks i1) return i2 {- For a response body in chunked transfer encoding, iterate over the individual chunks, reading the size parameter, then looping over that chunk in bites of at most __BYTE_SIZE__, yielding them to the receiveResponse InputStream accordingly. -} consumeChunks :: InputStream ByteString -> Generator ByteString () consumeChunks i1 = do !n <- parseSize if n > 0 then do -- read one or more bites, then loop to next chunk go n skipCRLF consumeChunks i1 else do -- skip "trailers" and consume final CRLF skipEnd where go 0 = return () go !n = do (!x', !r) <- liftIO $ readN n i1 Streams.yield x' go r parseSize = do n <- liftIO $ Streams.parseFromStream transferChunkSize i1 return n skipEnd = do liftIO $ do _ <- readHeaderFields i1 return () skipCRLF = do liftIO $ do _ <- Streams.parseFromStream crlf i1 return () {- Read the specified number of bytes up to a maximum of __BITE_SIZE__, returning a resultant ByteString and the number of bytes remaining. -} readN :: Int -> InputStream ByteString -> IO (ByteString, Int) readN n i1 = do !x' <- Streams.readExactly p i1 return (x', r) where !d = n - size !p = if d > 0 then size else n !r = if d > 0 then d else 0 size = __BITE_SIZE__ transferChunkSize :: Parser (Int) transferChunkSize = do !n <- hexadecimal void (takeTill (== '\r')) void crlf return n --------------------------------------------------------------------- {- This has the rather crucial side effect of terminating the stream after the requested number of bytes. Otherwise, code handling responses waits on more input until an HTTP timeout occurs. -} readFixedLengthBody :: InputStream ByteString -> Int64 -> IO (InputStream ByteString) readFixedLengthBody i1 n = do i2 <- Streams.takeBytes n i1 return i2 {- On the other hand, there is the (predominently HTTP/1.0) case where there is no content length sent and no chunking, with the result that only the connection closing marks the end of the response body. -} readUnlimitedBody :: InputStream ByteString -> IO (InputStream ByteString) readUnlimitedBody i1 = do return i1 --------------------------------------------------------------------- readCompressedBody :: InputStream ByteString -> IO (InputStream ByteString) readCompressedBody i1 = do i2 <- Streams.gunzip i1 return i2