{-# LANGUAGE OverloadedStrings, BangPatterns, RecordWildCards, ViewPatterns,
             DoAndIfThenElse, PatternGuards, ScopedTypeVariables,
             TupleSections #-}
{-# OPTIONS_GHC -fwarn-incomplete-patterns -fwarn-unused-imports #-}
-- | HTTP downloader tailored for web-crawler needs.
--
--  * Handles all possible http-client exceptions and returns
--    human readable error messages.
--
--  * Handles some web server bugs (returning @deflate@ data instead of @gzip@,
--    invalid @gzip@ encoding).
--
--  * Uses OpenSSL instead of @tls@ package (since @tls@ doesn't handle all sites and works slower than OpenSSL).
--
--  * Ignores invalid SSL sertificates.
--
--  * Receives data in 32k chunks internally to reduce memory fragmentation
--    on many parallel downloads.
--
--  * Download timeout.
--
--  * Total download size limit.
--
--  * Returns HTTP headers for subsequent redownloads
--    and handles @Not modified@ results.
--
--  * Can be used with external DNS resolver (hsdns-cache for example).
--
--  * Keep-alive connections pool (thanks to http-client).
--
--  Typical workflow in crawler:
--
--  @
--  withDnsCache $ \ c -> withDownloader $ \ d -> do
--  ... -- got URL from queue
--  ra <- resolveA c $ hostNameFromUrl url
--  case ra of
--      Left err -> ... -- uh oh, bad host
--      Right ha -> do
--          ... -- crawler politeness stuff (rate limits, queues)
--          dr <- download d url (Just ha) opts
--          case dr of
--              DROK dat redownloadOptions ->
--                  ... -- analyze data, save redownloadOpts for next download
--              DRRedirect .. -> ...
--              DRNotModified -> ...
--              DRError e -> ...
--  @
--
-- It's highly recommended to use
-- <http://hackage.haskell.org/package/concurrent-dns-cache>
-- (preferably with single resolver pointing to locally running BIND)
-- for DNS resolution since @getAddrInfo@ used in @http-client@ can be
-- buggy and ineffective when it needs to resolve many hosts per second for
-- a long time.
--
module Network.HTTP.Conduit.Downloader
    ( -- * Download operations
      urlGetContents, urlGetContentsPost
    , download, post, downloadG, rawDownload
    , DownloadResult(..), RawDownloadResult(..), DownloadOptions

      -- * Downloader
    , DownloaderSettings(..)
    , Downloader, withDownloader, withDownloaderSettings, newDownloader

      -- * Utils
    , postRequest
    ) where

import qualified Data.Text as T
import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Internal as B
import Control.Monad
import qualified Control.Exception as E
import Data.Default as C
import Data.String
import Data.Char
import Data.Maybe
import Data.List
import Foreign
import qualified Network.Socket as NS

import qualified OpenSSL as SSL
import qualified OpenSSL.Session as SSL
import qualified Network.HTTP.Types as N
import qualified Network.HTTP.Client as C
import qualified Network.HTTP.Client.Internal as C
import qualified Network.HTTP.Client.OpenSSL as C
import Codec.Compression.Zlib.Raw as Deflate
import Network.URI
import System.IO.Unsafe
import Data.Time.Format
import Data.Time.Clock
import Data.Time.Clock.POSIX
import System.Timeout

-- | Result of 'download' operation.
data DownloadResult
    = DROK       B.ByteString DownloadOptions
      -- ^ Successful download with data and options for next download.
    | DRRedirect String
      -- ^ Redirect URL
    | DRError    String
      -- ^ Error
    | DRNotModified
      -- ^ HTTP 304 Not Modified
    deriving (Int -> DownloadResult -> ShowS
[DownloadResult] -> ShowS
DownloadResult -> String
(Int -> DownloadResult -> ShowS)
-> (DownloadResult -> String)
-> ([DownloadResult] -> ShowS)
-> Show DownloadResult
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DownloadResult] -> ShowS
$cshowList :: [DownloadResult] -> ShowS
show :: DownloadResult -> String
$cshow :: DownloadResult -> String
showsPrec :: Int -> DownloadResult -> ShowS
$cshowsPrec :: Int -> DownloadResult -> ShowS
Show, ReadPrec [DownloadResult]
ReadPrec DownloadResult
Int -> ReadS DownloadResult
ReadS [DownloadResult]
(Int -> ReadS DownloadResult)
-> ReadS [DownloadResult]
-> ReadPrec DownloadResult
-> ReadPrec [DownloadResult]
-> Read DownloadResult
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [DownloadResult]
$creadListPrec :: ReadPrec [DownloadResult]
readPrec :: ReadPrec DownloadResult
$creadPrec :: ReadPrec DownloadResult
readList :: ReadS [DownloadResult]
$creadList :: ReadS [DownloadResult]
readsPrec :: Int -> ReadS DownloadResult
$creadsPrec :: Int -> ReadS DownloadResult
Read, DownloadResult -> DownloadResult -> Bool
(DownloadResult -> DownloadResult -> Bool)
-> (DownloadResult -> DownloadResult -> Bool) -> Eq DownloadResult
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DownloadResult -> DownloadResult -> Bool
$c/= :: DownloadResult -> DownloadResult -> Bool
== :: DownloadResult -> DownloadResult -> Bool
$c== :: DownloadResult -> DownloadResult -> Bool
Eq)

-- | Result of 'rawDownload' operation.
data RawDownloadResult
    = RawDownloadResult
      { RawDownloadResult -> Status
rdrStatus :: N.Status
      , RawDownloadResult -> HttpVersion
rdrHttpVersion :: N.HttpVersion
      , RawDownloadResult -> ResponseHeaders
rdrHeaders :: N.ResponseHeaders
      , RawDownloadResult -> ByteString
rdrBody :: B.ByteString
      , RawDownloadResult -> CookieJar
rdrCookieJar :: C.CookieJar
      }
    deriving Int -> RawDownloadResult -> ShowS
[RawDownloadResult] -> ShowS
RawDownloadResult -> String
(Int -> RawDownloadResult -> ShowS)
-> (RawDownloadResult -> String)
-> ([RawDownloadResult] -> ShowS)
-> Show RawDownloadResult
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RawDownloadResult] -> ShowS
$cshowList :: [RawDownloadResult] -> ShowS
show :: RawDownloadResult -> String
$cshow :: RawDownloadResult -> String
showsPrec :: Int -> RawDownloadResult -> ShowS
$cshowsPrec :: Int -> RawDownloadResult -> ShowS
Show

-- | @If-None-Match@ and/or @If-Modified-Since@ headers.
type DownloadOptions = [String]

-- | Settings used in downloader.
data DownloaderSettings
    = DownloaderSettings
      { DownloaderSettings -> ByteString
dsUserAgent :: B.ByteString
        -- ^ User agent string. Default: @\"Mozilla\/5.0 (compatible; HttpConduitDownloader\/1.0; +http:\/\/hackage.haskell.org\/package\/http-conduit-downloader)\"@.
        --
        -- Be a good crawler. Provide your User-Agent please.
      , DownloaderSettings -> Int
dsTimeout :: Int
        -- ^ Download timeout. Default: 30 seconds.
      , DownloaderSettings -> ManagerSettings
dsManagerSettings :: C.ManagerSettings
        -- ^ Conduit 'Manager' settings.
        -- Default: ManagerSettings with SSL certificate checks removed.
      , DownloaderSettings -> Int
dsMaxDownloadSize :: Int
        -- ^ Download size limit in bytes. Default: 10MB.
      }
-- http://wiki.apache.org/nutch/OptimizingCrawls
-- use 10 seconds as default timeout (too small).

instance Default DownloaderSettings where
    def :: DownloaderSettings
def =
        DownloaderSettings :: ByteString -> Int -> ManagerSettings -> Int -> DownloaderSettings
DownloaderSettings
        { dsUserAgent :: ByteString
dsUserAgent = ByteString
"Mozilla/5.0 (compatible; HttpConduitDownloader/1.0; +http://hackage.haskell.org/package/http-conduit-downloader)"
        , dsTimeout :: Int
dsTimeout = Int
30
        , dsManagerSettings :: ManagerSettings
dsManagerSettings =
            (IO SSLContext -> ManagerSettings
C.opensslManagerSettings (IO SSLContext -> ManagerSettings)
-> IO SSLContext -> ManagerSettings
forall a b. (a -> b) -> a -> b
$ SSLContext -> IO SSLContext
forall (m :: * -> *) a. Monad m => a -> m a
return SSLContext
globalSSLContext)
            { managerProxyInsecure :: ProxyOverride
C.managerProxyInsecure = ProxyOverride
C.proxyFromRequest
            , managerProxySecure :: ProxyOverride
C.managerProxySecure = ProxyOverride
C.proxyFromRequest
            }
        , dsMaxDownloadSize :: Int
dsMaxDownloadSize = Int
10Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
1024Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
1024
        }

-- tls package doesn't handle some sites:
-- https://github.com/vincenthz/hs-tls/issues/53
-- plus tls is about 2 times slower than HsOpenSSL
-- using OpenSSL instead

globalSSLContext :: SSL.SSLContext
globalSSLContext :: SSLContext
globalSSLContext = IO SSLContext -> SSLContext
forall a. IO a -> a
unsafePerformIO (IO SSLContext -> SSLContext) -> IO SSLContext -> SSLContext
forall a b. (a -> b) -> a -> b
$ do
    SSLContext
ctx <- IO SSLContext
SSL.context
--     SSL.contextSetCiphers ctx "DEFAULT"
--     SSL.contextSetVerificationMode ctx SSL.VerifyNone
--     SSL.contextAddOption ctx SSL.SSL_OP_NO_SSLv3
--     SSL.contextAddOption ctx SSL.SSL_OP_ALL
    SSLContext -> IO SSLContext
forall (m :: * -> *) a. Monad m => a -> m a
return SSLContext
ctx
{-# NOINLINE globalSSLContext #-}

-- | Keeps http-client 'Manager' and 'DownloaderSettings'.
data Downloader
    = Downloader
      { Downloader -> Manager
manager :: C.Manager
      , Downloader -> DownloaderSettings
settings :: DownloaderSettings
      }

-- | Create a 'Downloader' with settings.
newDownloader :: DownloaderSettings -> IO Downloader
newDownloader :: DownloaderSettings -> IO Downloader
newDownloader DownloaderSettings
s = do
    IO () -> IO ()
forall a. IO a -> IO a
SSL.withOpenSSL (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ () -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
return () -- init in case it wasn't initialized yet
    Manager
m <- ManagerSettings -> IO Manager
C.newManager (ManagerSettings -> IO Manager) -> ManagerSettings -> IO Manager
forall a b. (a -> b) -> a -> b
$ DownloaderSettings -> ManagerSettings
dsManagerSettings DownloaderSettings
s
    Downloader -> IO Downloader
forall (m :: * -> *) a. Monad m => a -> m a
return (Downloader -> IO Downloader) -> Downloader -> IO Downloader
forall a b. (a -> b) -> a -> b
$ Manager -> DownloaderSettings -> Downloader
Downloader Manager
m DownloaderSettings
s

-- | Create a new 'Downloader', use it in the provided function,
-- and then release it.
withDownloader :: (Downloader -> IO a) -> IO a
withDownloader :: (Downloader -> IO a) -> IO a
withDownloader = DownloaderSettings -> (Downloader -> IO a) -> IO a
forall a. DownloaderSettings -> (Downloader -> IO a) -> IO a
withDownloaderSettings DownloaderSettings
forall a. Default a => a
def

-- | Create a new 'Downloader' with provided settings,
-- use it in the provided function, and then release it.
withDownloaderSettings :: DownloaderSettings -> (Downloader -> IO a) -> IO a
withDownloaderSettings :: DownloaderSettings -> (Downloader -> IO a) -> IO a
withDownloaderSettings DownloaderSettings
s Downloader -> IO a
f = Downloader -> IO a
f (Downloader -> IO a) -> IO Downloader -> IO a
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DownloaderSettings -> IO Downloader
newDownloader DownloaderSettings
s

parseUrl :: String -> Either E.SomeException C.Request
parseUrl :: String -> Either SomeException Request
parseUrl = String -> Either SomeException Request
forall (m :: * -> *). MonadThrow m => String -> m Request
C.parseRequest (String -> Either SomeException Request)
-> ShowS -> String -> Either SomeException Request
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'#')

-- | Perform download
download  ::    Downloader
             -> String -- ^ URL
             -> Maybe NS.HostAddress -- ^ Optional resolved 'HostAddress'
             -> DownloadOptions
             -> IO DownloadResult
download :: Downloader
-> String
-> Maybe HostAddress
-> DownloadOptions
-> IO DownloadResult
download = (Request -> IO Request)
-> Downloader
-> String
-> Maybe HostAddress
-> DownloadOptions
-> IO DownloadResult
downloadG Request -> IO Request
forall (m :: * -> *) a. Monad m => a -> m a
return

-- | Perform HTTP POST.
post :: Downloader -> String -> Maybe NS.HostAddress -> B.ByteString
     -> IO DownloadResult
post :: Downloader
-> String -> Maybe HostAddress -> ByteString -> IO DownloadResult
post Downloader
d String
url Maybe HostAddress
ha ByteString
dat =
    (Request -> IO Request)
-> Downloader
-> String
-> Maybe HostAddress
-> DownloadOptions
-> IO DownloadResult
downloadG (Request -> IO Request
forall (m :: * -> *) a. Monad m => a -> m a
return (Request -> IO Request)
-> (Request -> Request) -> Request -> IO Request
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Request -> Request
postRequest ByteString
dat) Downloader
d String
url Maybe HostAddress
ha []

-- | Make HTTP POST request.
postRequest :: B.ByteString -> C.Request -> C.Request
postRequest :: ByteString -> Request -> Request
postRequest ByteString
dat Request
rq =
    Request
rq { method :: ByteString
C.method = ByteString
N.methodPost
       , requestBody :: RequestBody
C.requestBody = ByteString -> RequestBody
C.RequestBodyBS ByteString
dat }

-- | Generic version of 'download'
-- with ability to modify http-client 'Request'.
downloadG ::    (C.Request -> IO C.Request)
                -- ^ Function to modify 'Request'
                -- (e.g. sign or make 'postRequest')
             -> Downloader
             -> String -- ^ URL
             -> Maybe NS.HostAddress -- ^ Optional resolved 'HostAddress'
             -> DownloadOptions
             -> IO (DownloadResult)
downloadG :: (Request -> IO Request)
-> Downloader
-> String
-> Maybe HostAddress
-> DownloadOptions
-> IO DownloadResult
downloadG Request -> IO Request
f Downloader
d String
u Maybe HostAddress
h DownloadOptions
o = ((DownloadResult, Maybe RawDownloadResult) -> DownloadResult)
-> IO (DownloadResult, Maybe RawDownloadResult)
-> IO DownloadResult
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (DownloadResult, Maybe RawDownloadResult) -> DownloadResult
forall a b. (a, b) -> a
fst (IO (DownloadResult, Maybe RawDownloadResult) -> IO DownloadResult)
-> IO (DownloadResult, Maybe RawDownloadResult)
-> IO DownloadResult
forall a b. (a -> b) -> a -> b
$ (Request -> IO Request)
-> Downloader
-> String
-> Maybe HostAddress
-> DownloadOptions
-> IO (DownloadResult, Maybe RawDownloadResult)
rawDownload Request -> IO Request
f Downloader
d String
u Maybe HostAddress
h DownloadOptions
o

-- | Even more generic version of 'download', which returns 'RawDownloadResult'.
-- 'RawDownloadResult' is optional since it can not be determined on timeouts
-- and connection errors.
rawDownload ::  (C.Request -> IO C.Request)
                -- ^ Function to modify 'Request'
                -- (e.g. sign or make 'postRequest')
             -> Downloader
             -> String -- ^ URL
             -> Maybe NS.HostAddress -- ^ Optional resolved 'HostAddress'
             -> DownloadOptions
             -> IO (DownloadResult, Maybe RawDownloadResult)
rawDownload :: (Request -> IO Request)
-> Downloader
-> String
-> Maybe HostAddress
-> DownloadOptions
-> IO (DownloadResult, Maybe RawDownloadResult)
rawDownload Request -> IO Request
f (Downloader {Manager
DownloaderSettings
settings :: DownloaderSettings
manager :: Manager
settings :: Downloader -> DownloaderSettings
manager :: Downloader -> Manager
..}) String
url Maybe HostAddress
hostAddress DownloadOptions
opts =
  case String -> Either SomeException Request
parseUrl String
url of
    Left SomeException
e ->
        (DownloadResult -> (DownloadResult, Maybe RawDownloadResult))
-> IO DownloadResult
-> IO (DownloadResult, Maybe RawDownloadResult)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (, Maybe RawDownloadResult
forall a. Maybe a
Nothing) (IO DownloadResult -> IO (DownloadResult, Maybe RawDownloadResult))
-> IO DownloadResult
-> IO (DownloadResult, Maybe RawDownloadResult)
forall a b. (a -> b) -> a -> b
$
        IO DownloadResult
-> (HttpException -> IO DownloadResult)
-> Maybe HttpException
-> IO DownloadResult
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (DownloadResult -> IO DownloadResult
forall (m :: * -> *) a. Monad m => a -> m a
return (DownloadResult -> IO DownloadResult)
-> DownloadResult -> IO DownloadResult
forall a b. (a -> b) -> a -> b
$ String -> DownloadResult
DRError (String -> DownloadResult) -> String -> DownloadResult
forall a b. (a -> b) -> a -> b
$ SomeException -> String
forall a. Show a => a -> String
show SomeException
e) (String -> HttpException -> IO DownloadResult
forall (m :: * -> *).
Monad m =>
String -> HttpException -> m DownloadResult
httpExceptionToDR String
url)
              (SomeException -> Maybe HttpException
forall e. Exception e => SomeException -> Maybe e
E.fromException SomeException
e)
    Right Request
rq -> do
        let dl :: Request -> Bool -> IO (DownloadResult, Maybe RawDownloadResult)
dl Request
req Bool
firstTime = do
                UTCTime
t0 <- IO UTCTime
getCurrentTime
                (DownloadResult, Maybe RawDownloadResult)
r <- (HttpException -> IO (DownloadResult, Maybe RawDownloadResult))
-> IO (DownloadResult, Maybe RawDownloadResult)
-> IO (DownloadResult, Maybe RawDownloadResult)
forall e a. Exception e => (e -> IO a) -> IO a -> IO a
E.handle ((DownloadResult -> (DownloadResult, Maybe RawDownloadResult))
-> IO DownloadResult
-> IO (DownloadResult, Maybe RawDownloadResult)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (, Maybe RawDownloadResult
forall a. Maybe a
Nothing) (IO DownloadResult -> IO (DownloadResult, Maybe RawDownloadResult))
-> (HttpException -> IO DownloadResult)
-> HttpException
-> IO (DownloadResult, Maybe RawDownloadResult)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> HttpException -> IO DownloadResult
forall (m :: * -> *).
Monad m =>
String -> HttpException -> m DownloadResult
httpExceptionToDR String
url) (IO (DownloadResult, Maybe RawDownloadResult)
 -> IO (DownloadResult, Maybe RawDownloadResult))
-> IO (DownloadResult, Maybe RawDownloadResult)
-> IO (DownloadResult, Maybe RawDownloadResult)
forall a b. (a -> b) -> a -> b
$
                    Request
-> Manager
-> (Response BodyReader
    -> IO (DownloadResult, Maybe RawDownloadResult))
-> IO (DownloadResult, Maybe RawDownloadResult)
forall a.
Request -> Manager -> (Response BodyReader -> IO a) -> IO a
C.withResponse Request
req Manager
manager ((Response BodyReader
  -> IO (DownloadResult, Maybe RawDownloadResult))
 -> IO (DownloadResult, Maybe RawDownloadResult))
-> (Response BodyReader
    -> IO (DownloadResult, Maybe RawDownloadResult))
-> IO (DownloadResult, Maybe RawDownloadResult)
forall a b. (a -> b) -> a -> b
$ \ Response BodyReader
r -> do
                    let s :: Status
s = Response BodyReader -> Status
forall body. Response body -> Status
C.responseStatus Response BodyReader
r
                        h :: ResponseHeaders
h = Response BodyReader -> ResponseHeaders
forall body. Response body -> ResponseHeaders
C.responseHeaders Response BodyReader
r
                        rdr :: ByteString -> RawDownloadResult
rdr ByteString
d =
                            RawDownloadResult :: Status
-> HttpVersion
-> ResponseHeaders
-> ByteString
-> CookieJar
-> RawDownloadResult
RawDownloadResult
                            { rdrStatus :: Status
rdrStatus = Status
s
                            , rdrHttpVersion :: HttpVersion
rdrHttpVersion = Response BodyReader -> HttpVersion
forall body. Response body -> HttpVersion
C.responseVersion Response BodyReader
r
                            , rdrHeaders :: ResponseHeaders
rdrHeaders = ResponseHeaders
h
                            , rdrBody :: ByteString
rdrBody = ByteString
d
                            , rdrCookieJar :: CookieJar
rdrCookieJar = Response BodyReader -> CookieJar
forall body. Response body -> CookieJar
C.responseCookieJar Response BodyReader
r
                            }
                        readLen :: ByteString -> Int
readLen = (Int -> Char -> Int) -> Int -> ByteString -> Int
forall a. (a -> Char -> a) -> a -> ByteString -> a
B.foldl' (\ Int
a Char
d -> Int
a Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
10 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Char -> Int
ord Char
d Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'0') Int
0
                    Maybe (Maybe ByteString)
mbb <- case HeaderName -> ResponseHeaders -> Maybe ByteString
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup HeaderName
"Content-Length" ResponseHeaders
h of
                        Just ByteString
l
                            | (Char -> Bool) -> ByteString -> Bool
B.all (\ Char
c -> Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9') ByteString
l
                              Bool -> Bool -> Bool
&& Bool -> Bool
not (ByteString -> Bool
B.null ByteString
l)
                              Bool -> Bool -> Bool
&& ByteString -> Int
readLen ByteString
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> DownloaderSettings -> Int
dsMaxDownloadSize DownloaderSettings
settings
                            -> do
                               -- liftIO $ putStrLn "Content-Length too large"
                               Maybe (Maybe ByteString) -> IO (Maybe (Maybe ByteString))
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe (Maybe ByteString)
forall a. Maybe a
Nothing
                               -- no reason to download body
                        Maybe ByteString
_ -> do
                            UTCTime
t1 <- IO UTCTime
getCurrentTime
                            let timeSpentMicro :: NominalDiffTime
timeSpentMicro = UTCTime -> UTCTime -> NominalDiffTime
diffUTCTime UTCTime
t1 UTCTime
t0 NominalDiffTime -> NominalDiffTime -> NominalDiffTime
forall a. Num a => a -> a -> a
* NominalDiffTime
1000000
                                remainingTime :: Int
remainingTime =
                                    NominalDiffTime -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round (NominalDiffTime -> Int) -> NominalDiffTime -> Int
forall a b. (a -> b) -> a -> b
$ Int -> NominalDiffTime
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
to NominalDiffTime -> NominalDiffTime -> NominalDiffTime
forall a. Num a => a -> a -> a
- NominalDiffTime
timeSpentMicro
                            if Int
remainingTime Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0 then
                                Maybe (Maybe ByteString) -> IO (Maybe (Maybe ByteString))
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe (Maybe ByteString)
forall a. Maybe a
Nothing
                            else
                                Int -> IO (Maybe ByteString) -> IO (Maybe (Maybe ByteString))
forall a. Int -> IO a -> IO (Maybe a)
timeout Int
remainingTime
                                (IO (Maybe ByteString) -> IO (Maybe (Maybe ByteString)))
-> IO (Maybe ByteString) -> IO (Maybe (Maybe ByteString))
forall a b. (a -> b) -> a -> b
$ BodyReader -> Int -> IO (Maybe ByteString)
sinkByteString (BodyReader -> BodyReader
C.brRead (BodyReader -> BodyReader) -> BodyReader -> BodyReader
forall a b. (a -> b) -> a -> b
$ Response BodyReader -> BodyReader
forall body. Response body -> body
C.responseBody Response BodyReader
r)
                                    (DownloaderSettings -> Int
dsMaxDownloadSize DownloaderSettings
settings)
                    case Maybe (Maybe ByteString)
mbb of
                        Maybe (Maybe ByteString)
Nothing ->
                            (DownloadResult, Maybe RawDownloadResult)
-> IO (DownloadResult, Maybe RawDownloadResult)
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> DownloadResult
DRError String
"Timeout", RawDownloadResult -> Maybe RawDownloadResult
forall a. a -> Maybe a
Just (RawDownloadResult -> Maybe RawDownloadResult)
-> RawDownloadResult -> Maybe RawDownloadResult
forall a b. (a -> b) -> a -> b
$ ByteString -> RawDownloadResult
rdr ByteString
"")
                        Just (Just ByteString
b) -> do
                            let d :: ByteString
d = ResponseHeaders -> ByteString -> ByteString
forall a.
(Eq a, IsString a) =>
[(a, ByteString)] -> ByteString -> ByteString
tryDeflate ResponseHeaders
h ByteString
b
                            UTCTime
curTime <- IO UTCTime
getCurrentTime
                            (DownloadResult, Maybe RawDownloadResult)
-> IO (DownloadResult, Maybe RawDownloadResult)
forall (m :: * -> *) a. Monad m => a -> m a
return
                                (UTCTime
-> String
-> Status
-> ResponseHeaders
-> ByteString
-> DownloadResult
makeDownloadResultC UTCTime
curTime String
url Status
s ResponseHeaders
h ByteString
d
                                , RawDownloadResult -> Maybe RawDownloadResult
forall a. a -> Maybe a
Just (RawDownloadResult -> Maybe RawDownloadResult)
-> RawDownloadResult -> Maybe RawDownloadResult
forall a b. (a -> b) -> a -> b
$ ByteString -> RawDownloadResult
rdr ByteString
d)
                        Just Maybe ByteString
Nothing ->
                            (DownloadResult, Maybe RawDownloadResult)
-> IO (DownloadResult, Maybe RawDownloadResult)
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> DownloadResult
DRError String
"Too much data", RawDownloadResult -> Maybe RawDownloadResult
forall a. a -> Maybe a
Just (RawDownloadResult -> Maybe RawDownloadResult)
-> RawDownloadResult -> Maybe RawDownloadResult
forall a b. (a -> b) -> a -> b
$ ByteString -> RawDownloadResult
rdr ByteString
"")
                case (DownloadResult, Maybe RawDownloadResult)
r of
                    (DRError String
e, Maybe RawDownloadResult
_)
                        | String
"ZlibException" String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` String
e Bool -> Bool -> Bool
&& Bool
firstTime ->
                            -- some sites return junk instead of gzip data.
                            -- retrying without compression
                            Request -> Bool -> IO (DownloadResult, Maybe RawDownloadResult)
dl (Request -> Request
disableCompression Request
req) Bool
False
                    (DownloadResult, Maybe RawDownloadResult)
_ ->
                        (DownloadResult, Maybe RawDownloadResult)
-> IO (DownloadResult, Maybe RawDownloadResult)
forall (m :: * -> *) a. Monad m => a -> m a
return (DownloadResult, Maybe RawDownloadResult)
r
            disableCompression :: Request -> Request
disableCompression Request
req =
                Request
req { requestHeaders :: ResponseHeaders
C.requestHeaders =
                          (HeaderName
"Accept-Encoding", ByteString
"") (HeaderName, ByteString) -> ResponseHeaders -> ResponseHeaders
forall a. a -> [a] -> [a]
: Request -> ResponseHeaders
C.requestHeaders Request
req }
            rq1 :: Request
rq1 = Request
rq { requestHeaders :: ResponseHeaders
C.requestHeaders =
                               [(HeaderName
"Accept", ByteString
"*/*")
                               ,(HeaderName
"User-Agent", DownloaderSettings -> ByteString
dsUserAgent DownloaderSettings
settings)
                               ]
                               ResponseHeaders -> ResponseHeaders -> ResponseHeaders
forall a. [a] -> [a] -> [a]
++ (String -> (HeaderName, ByteString))
-> DownloadOptions -> ResponseHeaders
forall a b. (a -> b) -> [a] -> [b]
map String -> (HeaderName, ByteString)
toHeader DownloadOptions
opts
                               ResponseHeaders -> ResponseHeaders -> ResponseHeaders
forall a. [a] -> [a] -> [a]
++ Request -> ResponseHeaders
C.requestHeaders Request
rq
                     , redirectCount :: Int
C.redirectCount = Int
0
                     , responseTimeout :: ResponseTimeout
C.responseTimeout = Int -> ResponseTimeout
C.responseTimeoutMicro Int
to
                       -- it's only connection + headers timeout,
                       -- response body needs additional timeout
                     , hostAddress :: Maybe HostAddress
C.hostAddress = Maybe HostAddress
hostAddress
                     , checkResponse :: Request -> Response BodyReader -> IO ()
C.checkResponse = \ Request
_ Response BodyReader
_ -> () -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
                     }
            to :: Int
to = DownloaderSettings -> Int
dsTimeout DownloaderSettings
settings Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
1000000
        Request
req <- Request -> IO Request
f Request
rq1
        Request -> Bool -> IO (DownloadResult, Maybe RawDownloadResult)
dl Request
req Bool
True
    where toHeader :: String -> N.Header
          toHeader :: String -> (HeaderName, ByteString)
toHeader String
h = let (String
a,String
b) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
':') String
h in
                       (String -> HeaderName
forall a. IsString a => String -> a
fromString String
a, String -> ByteString
forall a. IsString a => String -> a
fromString (ShowS
forall a. [a] -> [a]
tail String
b))
          tryDeflate :: [(a, ByteString)] -> ByteString -> ByteString
tryDeflate [(a, ByteString)]
headers ByteString
b
              | Just ByteString
d <- a -> [(a, ByteString)] -> Maybe ByteString
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup a
"Content-Encoding" [(a, ByteString)]
headers
              , (Char -> Char) -> ByteString -> ByteString
B.map Char -> Char
toLower ByteString
d ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
"deflate"
                  = ByteString -> ByteString
BL.toStrict (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
Deflate.decompress (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
BL.fromStrict ByteString
b
              | Bool
otherwise = ByteString
b

httpExceptionToDR :: Monad m => String -> C.HttpException -> m DownloadResult
httpExceptionToDR :: String -> HttpException -> m DownloadResult
httpExceptionToDR String
url HttpException
exn = DownloadResult -> m DownloadResult
forall (m :: * -> *) a. Monad m => a -> m a
return (DownloadResult -> m DownloadResult)
-> DownloadResult -> m DownloadResult
forall a b. (a -> b) -> a -> b
$ case HttpException
exn of
    C.HttpExceptionRequest Request
_ HttpExceptionContent
ec -> String -> HttpExceptionContent -> DownloadResult
httpExceptionContentToDR String
url HttpExceptionContent
ec
    C.InvalidUrlException String
_ String
e
        | String
e String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"Invalid URL" -> String -> DownloadResult
DRError String
e
        | Bool
otherwise -> String -> DownloadResult
DRError (String -> DownloadResult) -> String -> DownloadResult
forall a b. (a -> b) -> a -> b
$ String
"Invalid URL: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
e

httpExceptionContentToDR :: String -> C.HttpExceptionContent -> DownloadResult
httpExceptionContentToDR :: String -> HttpExceptionContent -> DownloadResult
httpExceptionContentToDR String
url HttpExceptionContent
ec = case HttpExceptionContent
ec of
    C.StatusCodeException Response ()
r ByteString
b ->
      UTCTime
-> String
-> Status
-> ResponseHeaders
-> ByteString
-> DownloadResult
makeDownloadResultC (NominalDiffTime -> UTCTime
posixSecondsToUTCTime NominalDiffTime
0) String
url
      (Response () -> Status
forall body. Response body -> Status
C.responseStatus Response ()
r) (Response () -> ResponseHeaders
forall body. Response body -> ResponseHeaders
C.responseHeaders Response ()
r) ByteString
b
    C.TooManyRedirects [Response ByteString]
_ -> String -> DownloadResult
DRError String
"Too many redirects"
    HttpExceptionContent
C.OverlongHeaders -> String -> DownloadResult
DRError String
"Overlong HTTP headers"
    HttpExceptionContent
C.ResponseTimeout -> String -> DownloadResult
DRError String
"Response timeout"
    HttpExceptionContent
C.ConnectionTimeout -> String -> DownloadResult
DRError String
"Connection timeout"
    C.ConnectionFailure SomeException
e -> String -> DownloadResult
DRError (String -> DownloadResult) -> String -> DownloadResult
forall a b. (a -> b) -> a -> b
$ String
"Connection failed: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ SomeException -> String
forall a. Show a => a -> String
show SomeException
e
    C.InvalidStatusLine ByteString
l -> String -> DownloadResult
DRError (String -> DownloadResult) -> String -> DownloadResult
forall a b. (a -> b) -> a -> b
$ String
"Invalid HTTP status line:\n" String -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> String
B.unpack ByteString
l
    C.InvalidHeader ByteString
h -> String -> DownloadResult
DRError (String -> DownloadResult) -> String -> DownloadResult
forall a b. (a -> b) -> a -> b
$ String
"Invalid HTTP header:\n" String -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> String
B.unpack ByteString
h
    C.InvalidRequestHeader ByteString
h -> String -> DownloadResult
DRError (String -> DownloadResult) -> String -> DownloadResult
forall a b. (a -> b) -> a -> b
$ String
"Invalid HTTP request header:\n" String -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> String
B.unpack ByteString
h
    C.InternalException SomeException
e
        | Just (ConnectionAbruptlyTerminated
_ :: SSL.ConnectionAbruptlyTerminated) <- SomeException -> Maybe ConnectionAbruptlyTerminated
forall e. Exception e => SomeException -> Maybe e
E.fromException SomeException
e ->
            String -> DownloadResult
DRError String
"Connection abruptly terminated"
        | Just (SSL.ProtocolError String
pe) <- SomeException -> Maybe ProtocolError
forall e. Exception e => SomeException -> Maybe e
E.fromException SomeException
e ->
            String -> DownloadResult
DRError (String -> DownloadResult) -> String -> DownloadResult
forall a b. (a -> b) -> a -> b
$ String
"SSL protocol error: " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
pe
        | Bool
otherwise -> String -> DownloadResult
DRError (String -> DownloadResult) -> String -> DownloadResult
forall a b. (a -> b) -> a -> b
$ SomeException -> String
forall a. Show a => a -> String
show SomeException
e
    C.ProxyConnectException ByteString
_ Int
_ Status
s ->
        String -> DownloadResult
DRError (String -> DownloadResult) -> String -> DownloadResult
forall a b. (a -> b) -> a -> b
$ String
"Proxy CONNECT failed: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Status -> String
httpStatusString Status
s
    HttpExceptionContent
C.NoResponseDataReceived -> String -> DownloadResult
DRError String
"No response data received"
    HttpExceptionContent
C.TlsNotSupported -> String -> DownloadResult
DRError String
"TLS not supported"
    C.WrongRequestBodyStreamSize Word64
e Word64
a ->
        String -> DownloadResult
DRError (String -> DownloadResult) -> String -> DownloadResult
forall a b. (a -> b) -> a -> b
$ String
"The request body provided did not match the expected size "
        String -> ShowS
forall a. [a] -> [a] -> [a]
++ Word64 -> Word64 -> String
forall a a. (Show a, Show a) => a -> a -> String
ea Word64
e Word64
a
    C.ResponseBodyTooShort Word64
e Word64
a -> String -> DownloadResult
DRError (String -> DownloadResult) -> String -> DownloadResult
forall a b. (a -> b) -> a -> b
$ String
"Response body too short " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Word64 -> Word64 -> String
forall a a. (Show a, Show a) => a -> a -> String
ea Word64
e Word64
a
    HttpExceptionContent
C.InvalidChunkHeaders -> String -> DownloadResult
DRError String
"Invalid chunk headers"
    HttpExceptionContent
C.IncompleteHeaders -> String -> DownloadResult
DRError String
"Incomplete headers"
    C.InvalidDestinationHost ByteString
_ -> String -> DownloadResult
DRError String
"Invalid destination host"
    C.HttpZlibException ZlibException
e -> String -> DownloadResult
DRError (String -> DownloadResult) -> String -> DownloadResult
forall a b. (a -> b) -> a -> b
$ ZlibException -> String
forall a. Show a => a -> String
show ZlibException
e
    C.InvalidProxyEnvironmentVariable Text
n Text
v ->
        String -> DownloadResult
DRError (String -> DownloadResult) -> String -> DownloadResult
forall a b. (a -> b) -> a -> b
$ String
"Invalid proxy environment variable "
        String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
forall a. Show a => a -> String
show Text
n String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"=" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
forall a. Show a => a -> String
show Text
v
    C.InvalidProxySettings Text
s -> String -> DownloadResult
DRError (String -> DownloadResult) -> String -> DownloadResult
forall a b. (a -> b) -> a -> b
$ String
"Invalid proxy settings:\n" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
s
    HttpExceptionContent
C.ConnectionClosed -> String -> DownloadResult
DRError String
"Connection closed"
    where ea :: a -> a -> String
ea a
expected a
actual =
              String
"(expected " String -> ShowS
forall a. [a] -> [a] -> [a]
++ a -> String
forall a. Show a => a -> String
show a
expected String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" bytes, actual is "
              String -> ShowS
forall a. [a] -> [a] -> [a]
++ a -> String
forall a. Show a => a -> String
show a
actual String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" bytes)"

bufSize :: Int
bufSize :: Int
bufSize = Int
32 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
1024 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
overhead -- Copied from Data.ByteString.Lazy.
    where overhead :: Int
overhead = Int
2 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int -> Int
forall a. Storable a => a -> Int
sizeOf (Int
forall a. HasCallStack => a
undefined :: Int)

newBuf :: IO B.ByteString
newBuf :: BodyReader
newBuf = do
    ForeignPtr Word8
fp <- Int -> IO (ForeignPtr Word8)
forall a. Int -> IO (ForeignPtr a)
B.mallocByteString Int
bufSize
    ByteString -> BodyReader
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> BodyReader) -> ByteString -> BodyReader
forall a b. (a -> b) -> a -> b
$ ForeignPtr Word8 -> Int -> Int -> ByteString
B.PS ForeignPtr Word8
fp Int
0 Int
0

addBs :: [B.ByteString] -> B.ByteString -> B.ByteString
      -> IO ([B.ByteString], B.ByteString)
addBs :: [ByteString]
-> ByteString -> ByteString -> IO ([ByteString], ByteString)
addBs [ByteString]
acc (B.PS ForeignPtr Word8
bfp Int
_ Int
bl) (B.PS ForeignPtr Word8
sfp Int
offs Int
sl) = do
    let cpSize :: Int
cpSize = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Int
bufSize Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
bl) Int
sl
        bl' :: Int
bl' = Int
bl Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
cpSize
    ForeignPtr Word8 -> (Ptr Word8 -> IO ()) -> IO ()
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word8
bfp ((Ptr Word8 -> IO ()) -> IO ()) -> (Ptr Word8 -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \ Ptr Word8
dst -> ForeignPtr Word8 -> (Ptr Word8 -> IO ()) -> IO ()
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word8
sfp ((Ptr Word8 -> IO ()) -> IO ()) -> (Ptr Word8 -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \ Ptr Word8
src ->
        Ptr Word8 -> Ptr Word8 -> Int -> IO ()
B.memcpy (Ptr Word8
dst Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
bl) (Ptr Word8
src Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
offs) (Int -> Int
forall a. Enum a => Int -> a
toEnum Int
cpSize)
    if Int
bl' Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
bufSize then do
        ByteString
buf' <- BodyReader
newBuf
--        print ("filled", cpSize)
        [ByteString]
-> ByteString -> ByteString -> IO ([ByteString], ByteString)
addBs (ForeignPtr Word8 -> Int -> Int -> ByteString
B.PS ForeignPtr Word8
bfp Int
0 Int
bufSize ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
acc) ByteString
buf'
              (ForeignPtr Word8 -> Int -> Int -> ByteString
B.PS ForeignPtr Word8
sfp (Int
offs Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
cpSize) (Int
sl Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
cpSize))
    else do
--        print ("ok", cpSize, bl')
        ([ByteString], ByteString) -> IO ([ByteString], ByteString)
forall (m :: * -> *) a. Monad m => a -> m a
return ([ByteString]
acc, ForeignPtr Word8 -> Int -> Int -> ByteString
B.PS ForeignPtr Word8
bfp Int
0 Int
bl')

-- | Sink data using 32k buffers to reduce memory fragmentation.
-- Returns 'Nothing' if downloaded too much data.
sinkByteString :: IO B.ByteString -> Int -> IO (Maybe B.ByteString)
sinkByteString :: BodyReader -> Int -> IO (Maybe ByteString)
sinkByteString BodyReader
readChunk Int
limit = do
    ByteString
buf <- BodyReader
newBuf
    Int -> [ByteString] -> ByteString -> IO (Maybe ByteString)
go Int
0 [] ByteString
buf
    where go :: Int -> [ByteString] -> ByteString -> IO (Maybe ByteString)
go Int
len [ByteString]
acc ByteString
buf = do
              ByteString
inp <- BodyReader
readChunk
              if ByteString -> Bool
B.null ByteString
inp then
                  Maybe ByteString -> IO (Maybe ByteString)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe ByteString -> IO (Maybe ByteString))
-> Maybe ByteString -> IO (Maybe ByteString)
forall a b. (a -> b) -> a -> b
$ ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
B.concat ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> [ByteString]
forall a. [a] -> [a]
reverse (ByteString
bufByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
acc)
              else do
                  ([ByteString]
acc', ByteString
buf') <- [ByteString]
-> ByteString -> ByteString -> IO ([ByteString], ByteString)
addBs [ByteString]
acc ByteString
buf ByteString
inp
                  let len' :: Int
len' = Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ByteString -> Int
B.length ByteString
inp
                  if Int
len' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
limit then
                      Maybe ByteString -> IO (Maybe ByteString)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe ByteString
forall a. Maybe a
Nothing
                  else
                      Int -> [ByteString] -> ByteString -> IO (Maybe ByteString)
go Int
len' [ByteString]
acc' ByteString
buf'

makeDownloadResultC :: UTCTime -> String -> N.Status -> N.ResponseHeaders
                    -> B.ByteString -> DownloadResult
makeDownloadResultC :: UTCTime
-> String
-> Status
-> ResponseHeaders
-> ByteString
-> DownloadResult
makeDownloadResultC UTCTime
curTime String
url Status
c ResponseHeaders
headers ByteString
b = do
    if Status -> Int
N.statusCode Status
c Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
304 then
        DownloadResult
DRNotModified
    else if Status -> Int
N.statusCode Status
c Int -> [Int] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`
          [ Int
300 -- Multiple choices
          , Int
301 -- Moved permanently
          , Int
302 -- Found
          , Int
303 -- See other
          , Int
307 -- Temporary redirect
          , Int
308 -- Permanent redirect
          ] then
        case HeaderName -> ResponseHeaders -> Maybe ByteString
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup HeaderName
"location" ResponseHeaders
headers of
            Just (ByteString -> String
B.unpack -> String
loc) ->
                String -> DownloadResult
redirect (String -> DownloadResult) -> String -> DownloadResult
forall a b. (a -> b) -> a -> b
$
                    ShowS
relUri ((Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'#') ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' ') String
loc)
                    --  ^ Location can be relative and contain #fragment
            Maybe ByteString
_ ->
                String -> DownloadResult
DRError (String -> DownloadResult) -> String -> DownloadResult
forall a b. (a -> b) -> a -> b
$ String
"Redirect status, but no Location field\n"
                    String -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> String
B.unpack (Status -> ByteString
N.statusMessage Status
c) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"\n"
                    String -> ShowS
forall a. [a] -> [a] -> [a]
++ DownloadOptions -> String
unlines (((HeaderName, ByteString) -> String)
-> ResponseHeaders -> DownloadOptions
forall a b. (a -> b) -> [a] -> [b]
map (HeaderName, ByteString) -> String
forall a. Show a => a -> String
show ResponseHeaders
headers)
    else if Status -> Int
N.statusCode Status
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
300 then
        String -> DownloadResult
DRError (String -> DownloadResult) -> String -> DownloadResult
forall a b. (a -> b) -> a -> b
$ Status -> String
httpStatusString Status
c
    else
        ByteString -> DownloadOptions -> DownloadResult
DROK ByteString
b (DownloadOptions -> ResponseHeaders -> DownloadOptions
forall a.
(Eq a, IsString a) =>
DownloadOptions -> [(a, ByteString)] -> DownloadOptions
redownloadOpts [] ResponseHeaders
headers)
    where redirect :: String -> DownloadResult
redirect String
r
--              | r == url = DRError $ "HTTP redirect to the same url?"
              | Bool
otherwise = String -> DownloadResult
DRRedirect String
r
          redownloadOpts :: DownloadOptions -> [(a, ByteString)] -> DownloadOptions
redownloadOpts DownloadOptions
acc [] = DownloadOptions -> DownloadOptions
forall a. [a] -> [a]
reverse DownloadOptions
acc
          redownloadOpts DownloadOptions
_ ((a
"Pragma", (Char -> Char) -> ByteString -> ByteString
B.map Char -> Char
toLower -> ByteString
tag) : [(a, ByteString)]
_)
              | ByteString
"no-cache" ByteString -> ByteString -> Bool
`B.isInfixOf` ByteString
tag = []
          redownloadOpts DownloadOptions
_ ((a
"Cache-Control", (Char -> Char) -> ByteString -> ByteString
B.map Char -> Char
toLower -> ByteString
tag) : [(a, ByteString)]
_)
              | (ByteString -> Bool) -> [ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (ByteString -> ByteString -> Bool
`B.isInfixOf` ByteString
tag)
                [ByteString
"no-cache", ByteString
"no-store", ByteString
"must-revalidate", ByteString
"max-age=0"] = []
          redownloadOpts DownloadOptions
acc ((a
"Expires", ByteString
time):[(a, ByteString)]
xs)
              | String
ts <- ByteString -> String
B.unpack ByteString
time
              , Just UTCTime
t <- String -> Maybe UTCTime
parseHttpTime String
ts
              , UTCTime
t UTCTime -> UTCTime -> Bool
forall a. Ord a => a -> a -> Bool
> UTCTime
curTime =
                   DownloadOptions -> [(a, ByteString)] -> DownloadOptions
redownloadOpts DownloadOptions
acc [(a, ByteString)]
xs
              | Bool
otherwise = [] -- expires is non-valid or in the past
          redownloadOpts DownloadOptions
acc ((a
"ETag", ByteString
tag):[(a, ByteString)]
xs) =
              DownloadOptions -> [(a, ByteString)] -> DownloadOptions
redownloadOpts ((String
"If-None-Match: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> String
B.unpack ByteString
tag) String -> DownloadOptions -> DownloadOptions
forall a. a -> [a] -> [a]
: DownloadOptions
acc) [(a, ByteString)]
xs
          redownloadOpts DownloadOptions
acc ((a
"Last-Modified", ByteString
time):[(a, ByteString)]
xs)
              | String
ts <- ByteString -> String
B.unpack ByteString
time
              , Just UTCTime
t <- String -> Maybe UTCTime
parseHttpTime String
ts
              , UTCTime
t UTCTime -> UTCTime -> Bool
forall a. Ord a => a -> a -> Bool
<= UTCTime
curTime = -- use only valid timestamps
              DownloadOptions -> [(a, ByteString)] -> DownloadOptions
redownloadOpts ((String
"If-Modified-Since: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> String
B.unpack ByteString
time) String -> DownloadOptions -> DownloadOptions
forall a. a -> [a] -> [a]
: DownloadOptions
acc) [(a, ByteString)]
xs
          redownloadOpts DownloadOptions
acc ((a, ByteString)
_:[(a, ByteString)]
xs) = DownloadOptions -> [(a, ByteString)] -> DownloadOptions
redownloadOpts DownloadOptions
acc [(a, ByteString)]
xs
          fixNonAscii :: ShowS
fixNonAscii =
              (Char -> Bool) -> ShowS
escapeURIString
                  (\ Char
x -> Char -> Int
ord Char
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x7f Bool -> Bool -> Bool
&& Char
x Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` (String
" []{}|\"" :: String)) ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
              ShowS
trimString
          relUri :: ShowS
relUri (ShowS
fixNonAscii -> String
r) =
              String -> Maybe String -> String
forall a. a -> Maybe a -> a
fromMaybe String
r (Maybe String -> String) -> Maybe String -> String
forall a b. (a -> b) -> a -> b
$
              (URI -> String) -> Maybe URI -> Maybe String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ String
"") (ShowS -> String) -> (URI -> ShowS) -> URI -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS -> URI -> ShowS
uriToString ShowS
forall a. a -> a
id) (Maybe URI -> Maybe String) -> Maybe URI -> Maybe String
forall a b. (a -> b) -> a -> b
$
              (URI -> URI -> URI) -> Maybe URI -> Maybe URI -> Maybe URI
forall (m :: * -> *) a1 a2 r.
Monad m =>
(a1 -> a2 -> r) -> m a1 -> m a2 -> m r
liftM2 URI -> URI -> URI
relativeTo
                  (String -> Maybe URI
parseURIReference String
r)
                  (String -> Maybe URI
parseURI (String -> Maybe URI) -> String -> Maybe URI
forall a b. (a -> b) -> a -> b
$ ShowS
fixNonAscii String
url)

httpStatusString :: N.Status -> [Char]
httpStatusString :: Status -> String
httpStatusString Status
c =
    String
"HTTP " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show (Status -> Int
N.statusCode Status
c) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> String
B.unpack (Status -> ByteString
N.statusMessage Status
c)

tryParseTime :: [String] -> String -> Maybe UTCTime
tryParseTime :: DownloadOptions -> String -> Maybe UTCTime
tryParseTime DownloadOptions
formats String
string =
    (Maybe UTCTime -> Maybe UTCTime -> Maybe UTCTime)
-> Maybe UTCTime -> [Maybe UTCTime] -> Maybe UTCTime
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Maybe UTCTime -> Maybe UTCTime -> Maybe UTCTime
forall (m :: * -> *) a. MonadPlus m => m a -> m a -> m a
mplus Maybe UTCTime
forall a. Maybe a
Nothing ([Maybe UTCTime] -> Maybe UTCTime)
-> [Maybe UTCTime] -> Maybe UTCTime
forall a b. (a -> b) -> a -> b
$
    (String -> Maybe UTCTime) -> DownloadOptions -> [Maybe UTCTime]
forall a b. (a -> b) -> [a] -> [b]
map (\ String
fmt -> Bool -> TimeLocale -> String -> String -> Maybe UTCTime
forall (m :: * -> *) t.
(MonadFail m, ParseTime t) =>
Bool -> TimeLocale -> String -> String -> m t
parseTimeM Bool
True TimeLocale
defaultTimeLocale String
fmt (ShowS
trimString String
string))
        DownloadOptions
formats

trimString :: String -> String
trimString :: ShowS
trimString = ShowS
forall a. [a] -> [a]
reverse ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
forall a. [a] -> [a]
reverse ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace

parseHttpTime :: String -> Maybe UTCTime
parseHttpTime :: String -> Maybe UTCTime
parseHttpTime =
    DownloadOptions -> String -> Maybe UTCTime
tryParseTime
    [String
"%a, %e %b %Y %k:%M:%S %Z" -- Sun, 06 Nov 1994 08:49:37 GMT
    ,String
"%A, %e-%b-%y %k:%M:%S %Z" -- Sunday, 06-Nov-94 08:49:37 GMT
    ,String
"%a %b %e %k:%M:%S %Y"     -- Sun Nov  6 08:49:37 1994
    ]

globalDownloader :: Downloader
globalDownloader :: Downloader
globalDownloader = IO Downloader -> Downloader
forall a. IO a -> a
unsafePerformIO (IO Downloader -> Downloader) -> IO Downloader -> Downloader
forall a b. (a -> b) -> a -> b
$ DownloaderSettings -> IO Downloader
newDownloader DownloaderSettings
forall a. Default a => a
def
{-# NOINLINE globalDownloader #-}

-- | Download single URL with default 'DownloaderSettings'.
-- Fails if result is not 'DROK'.
urlGetContents :: String -> IO B.ByteString
urlGetContents :: String -> BodyReader
urlGetContents String
url = do
    DownloadResult
r <- Downloader
-> String
-> Maybe HostAddress
-> DownloadOptions
-> IO DownloadResult
download Downloader
globalDownloader String
url Maybe HostAddress
forall a. Maybe a
Nothing []
    case DownloadResult
r of
        DROK ByteString
c DownloadOptions
_ -> ByteString -> BodyReader
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
c
        DownloadResult
e -> String -> BodyReader
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> BodyReader) -> String -> BodyReader
forall a b. (a -> b) -> a -> b
$ String
"urlGetContents " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
forall a. Show a => a -> String
show String
url String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
": " String -> ShowS
forall a. [a] -> [a] -> [a]
++ DownloadResult -> String
forall a. Show a => a -> String
show DownloadResult
e

-- | Post data and download single URL with default 'DownloaderSettings'.
-- Fails if result is not 'DROK'.
urlGetContentsPost :: String -> B.ByteString -> IO B.ByteString
urlGetContentsPost :: String -> ByteString -> BodyReader
urlGetContentsPost String
url ByteString
dat = do
    DownloadResult
r <- Downloader
-> String -> Maybe HostAddress -> ByteString -> IO DownloadResult
post Downloader
globalDownloader String
url Maybe HostAddress
forall a. Maybe a
Nothing ByteString
dat
    case DownloadResult
r of
        DROK ByteString
c DownloadOptions
_ -> ByteString -> BodyReader
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
c
        DownloadResult
e -> String -> BodyReader
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> BodyReader) -> String -> BodyReader
forall a b. (a -> b) -> a -> b
$ String
"urlGetContentsPost " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
forall a. Show a => a -> String
show String
url String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
": " String -> ShowS
forall a. [a] -> [a] -> [a]
++ DownloadResult -> String
forall a. Show a => a -> String
show DownloadResult
e