module Network.HTTP.Conduit.Downloader
(
urlGetContents, urlGetContentsPost
, download, post, downloadG
, DownloadResult(..), DownloadOptions
, DownloaderSettings(..)
, Downloader, withDownloader, withDownloaderSettings, newDownloader
, postRequest, sinkByteString
) where
import Control.Monad.Trans
import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Internal as B
import Control.Monad
import qualified Control.Exception as E
import Data.Default as C
import Data.String
import Data.Char
import Data.Maybe
import Data.List
import Foreign
import qualified Network.Socket as NS
import qualified Network.TLS as TLS
import qualified Network.HTTP.Types as N
import qualified Network.HTTP.Conduit as C
import qualified Network.Connection as NC
import Network.HTTP.Client.Internal (makeConnection, Connection)
import qualified Control.Monad.Trans.Resource as C
import qualified Data.Conduit as C
import System.Timeout.Lifted
import Codec.Compression.Zlib.Raw as Deflate
import Network.URI
import Data.Time.Format
import System.Locale
import Data.Time.Clock
import Data.Time.Clock.POSIX
import System.IO
data DownloadResult
= DROK B.ByteString DownloadOptions
| DRRedirect String
| DRError String
| DRNotModified
deriving (Show, Read, Eq)
type DownloadOptions = [String]
data DownloaderSettings
= DownloaderSettings
{ dsUserAgent :: B.ByteString
, dsTimeout :: Int
, dsManagerSettings :: C.ManagerSettings
, dsMaxDownloadSize :: Int
}
instance Default DownloaderSettings where
def =
DownloaderSettings
{ dsUserAgent = "Mozilla/5.0 (compatible; HttpConduitDownloader/1.0; +http://hackage.haskell.org/package/http-conduit-downloader)"
, dsTimeout = 30
, dsManagerSettings =
(C.mkManagerSettings tls Nothing)
{ C.managerTlsConnection =
getTlsConnection (Just tls)
}
, dsMaxDownloadSize = 10*1024*1024
}
where tls = NC.TLSSettingsSimple True False False
getTlsConnection :: Maybe NC.TLSSettings
-> IO (Maybe NS.HostAddress -> String -> Int -> IO Connection)
getTlsConnection tls = do
context <- NC.initConnectionContext
return $ \ mbha host port -> do
cf <- case mbha of
Nothing -> return $ NC.connectTo context
Just ha -> do
handle <- openSocketHandle ha port
return $ NC.connectFromHandle context handle
conn <- cf $ NC.ConnectionParams
{ NC.connectionHostname = host
, NC.connectionPort = fromIntegral port
, NC.connectionUseSecure = tls
, NC.connectionUseSocks = Nothing
}
convertConnection conn
where
convertConnection conn = makeConnection
(NC.connectionGetChunk conn)
(NC.connectionPut conn)
(NC.connectionClose conn `E.catch` \(_ :: E.IOException) -> return ())
openSocketHandle :: NS.HostAddress
-> Int
-> IO Handle
openSocketHandle ha port = do
let addr = NS.AddrInfo
{ NS.addrFlags = []
, NS.addrFamily = NS.AF_INET
, NS.addrSocketType = NS.Stream
, NS.addrProtocol = 6
, NS.addrAddress = NS.SockAddrInet (toEnum port) ha
, NS.addrCanonName = Nothing
}
E.bracketOnError
(NS.socket (NS.addrFamily addr) (NS.addrSocketType addr)
(NS.addrProtocol addr))
(NS.sClose)
(\sock -> do
NS.setSocketOption sock NS.NoDelay 1
NS.connect sock (NS.addrAddress addr)
NS.socketToHandle sock ReadWriteMode)
data Downloader
= Downloader
{ manager :: C.Manager
, settings :: DownloaderSettings
}
newDownloader :: DownloaderSettings -> IO Downloader
newDownloader s = do
m <- C.newManager $ dsManagerSettings s
return $ Downloader m s
withDownloader :: (Downloader -> IO a) -> IO a
withDownloader = withDownloaderSettings def
withDownloaderSettings :: DownloaderSettings -> (Downloader -> IO a) -> IO a
withDownloaderSettings s f = C.runResourceT $ do
(_, m) <- C.allocate (C.newManager $ dsManagerSettings s) C.closeManager
liftIO $ f (Downloader m s)
parseUrl :: String -> Either C.HttpException C.Request
parseUrl = C.parseUrl . takeWhile (/= '#')
download :: Downloader
-> String
-> Maybe NS.HostAddress
-> DownloadOptions
-> IO DownloadResult
download = downloadG return
post :: Downloader -> String -> Maybe NS.HostAddress -> B.ByteString
-> IO DownloadResult
post d url ha dat =
downloadG (return . postRequest dat) d url ha []
postRequest :: B.ByteString -> C.Request -> C.Request
postRequest dat rq =
rq { C.method = N.methodPost
, C.requestBody = C.RequestBodyBS dat }
downloadG ::
(C.Request -> C.ResourceT IO C.Request)
-> Downloader
-> String
-> Maybe NS.HostAddress
-> DownloadOptions
-> IO DownloadResult
downloadG f (Downloader {..}) url hostAddress opts =
case parseUrl url of
Left e -> httpExceptionToDR url e
Right rq -> do
let rq1 = rq { C.requestHeaders =
[("Accept", "*/*")
,("User-Agent", dsUserAgent settings)
]
++ map toHeader opts
++ C.requestHeaders rq
, C.redirectCount = 0
, C.responseTimeout = Nothing
, C.hostAddress = hostAddress
}
req <- C.runResourceT $ f rq1
let dl firstTime = do
r <- C.runResourceT (timeout (dsTimeout settings * 1000000) $ do
r <- C.http req manager
mbb <- C.responseBody r C.$$+-
sinkByteString (dsMaxDownloadSize settings)
case mbb of
Just b -> do
let c = C.responseStatus r
h = C.responseHeaders r
d = tryDeflate h b
curTime <- liftIO $ getCurrentTime
return $ makeDownloadResultC curTime url c h d
Nothing -> return $ DRError "Too much data")
`E.catch`
(fmap Just . httpExceptionToDR url)
`E.catch`
(return . Just . handshakeFailed)
`E.catch`
(return . Just . someException)
case r of
Just (DRError e)
| ("EOF reached" `isSuffixOf` e ||
e == "Invalid HTTP status line:\n"
) && firstTime ->
dl False
_ ->
return $ fromMaybe (DRError "Timeout") r
dl True
where toHeader :: String -> N.Header
toHeader h = let (a,b) = break (== ':') h in
(fromString a, fromString (tail b))
handshakeFailed (TLS.Terminated _ e tlsError) =
DRError $ "SSL terminated:\n" ++ show tlsError
handshakeFailed (TLS.HandshakeFailed tlsError) =
DRError $ "SSL handshake failed:\n" ++ show tlsError
handshakeFailed TLS.ConnectionNotEstablished =
DRError $ "SSL connection not established"
someException :: E.SomeException -> DownloadResult
someException e = case show e of
"<<timeout>>" -> DRError "Timeout"
s -> DRError s
tryDeflate headers b
| Just d <- lookup "Content-Encoding" headers
, B.map toLower d == "deflate"
= B.concat $ BL.toChunks $ Deflate.decompress $
BL.fromChunks [b]
| otherwise = b
httpExceptionToDR :: Monad m => String -> C.HttpException -> m DownloadResult
httpExceptionToDR url exn = return $ case exn of
C.StatusCodeException c h _ ->
makeDownloadResultC
(posixSecondsToUTCTime 0) url c h ""
C.InvalidUrlException _ e -> DRError $ "Invalid URL: " ++ e
C.TooManyRedirects _ -> DRError "Too many redirects"
C.UnparseableRedirect _ -> DRError "Unparseable redirect"
C.TooManyRetries -> DRError "Too many retries"
C.HttpParserException e -> DRError $ "HTTP parser error: " ++ e
C.HandshakeFailed -> DRError "Handshake failed"
C.OverlongHeaders -> DRError "Overlong HTTP headers"
C.ResponseTimeout -> DRError "Timeout"
C.FailedConnectionException _host _port -> DRError "Connection failed"
C.ExpectedBlankAfter100Continue -> DRError "Expected blank after 100 (Continue)"
C.InvalidStatusLine l -> DRError $ "Invalid HTTP status line:\n" ++ B.unpack l
C.NoResponseDataReceived -> DRError "No response data received"
C.TlsException e -> DRError $ "TLS exception:\n" ++ show e
C.InvalidHeader h -> DRError $ "Invalid HTTP header:\n" ++ B.unpack h
C.InternalIOException e ->
case show e of
"<<timeout>>" -> DRError "Timeout"
s -> DRError s
C.ProxyConnectException {..} -> DRError "Can't connect to proxy"
C.ResponseBodyTooShort _ _ -> DRError "Response body too short"
C.InvalidChunkHeaders -> DRError "Invalid chunk headers"
C.TlsNotSupported -> DRError "TLS not supported"
C.IncompleteHeaders -> DRError "Incomplete headers"
bufSize :: Int
bufSize = 32 * 1024 overhead
where overhead = 2 * sizeOf (undefined :: Int)
newBuf :: IO B.ByteString
newBuf = do
fp <- B.mallocByteString bufSize
return $ B.PS fp 0 0
addBs :: [B.ByteString] -> B.ByteString -> B.ByteString
-> IO ([B.ByteString], B.ByteString)
addBs acc (B.PS bfp _ bl) (B.PS sfp offs sl) = do
let cpSize = min (bufSize bl) sl
bl' = bl + cpSize
withForeignPtr bfp $ \ dst -> withForeignPtr sfp $ \ src ->
B.memcpy (dst `plusPtr` bl) (src `plusPtr` offs) (toEnum cpSize)
if bl' == bufSize then do
buf' <- newBuf
addBs (B.PS bfp 0 bufSize : acc) buf'
(B.PS sfp (offs + cpSize) (sl cpSize))
else do
return (acc, B.PS bfp 0 bl')
sinkByteString :: MonadIO m => Int -> C.Sink B.ByteString m (Maybe B.ByteString)
sinkByteString limit = do
buf <- liftIO $ newBuf
go 0 [] buf
where go len acc buf = do
mbinp <- C.await
case mbinp of
Just inp -> do
(acc', buf') <- liftIO $ addBs acc buf inp
let len' = len + B.length inp
if len' > limit then
return Nothing
else
go len' acc' buf'
Nothing -> do
return $ Just $ B.concat $ reverse (buf:acc)
makeDownloadResultC :: UTCTime -> String -> N.Status -> N.ResponseHeaders
-> B.ByteString -> DownloadResult
makeDownloadResultC curTime url c headers b = do
if N.statusCode c == 304 then
DRNotModified
else if N.statusCode c `elem`
[ 300
, 301
, 302
, 303
, 307
] then
case lookup "location" headers of
Just (B.unpack -> loc) ->
redirect $
relUri (takeWhile (/= '#') $ dropWhile (== ' ') loc)
_ ->
DRError $ "Redirect status, but no Location field\n"
++ B.unpack (N.statusMessage c) ++ "\n"
++ unlines (map show headers)
else if N.statusCode c >= 300 then
DRError $ "HTTP " ++ show (N.statusCode c) ++ " "
++ B.unpack (N.statusMessage c)
else
DROK b (redownloadOpts [] headers)
where redirect r
| otherwise = DRRedirect r
redownloadOpts acc [] = reverse acc
redownloadOpts _ (("Pragma", B.map toLower -> tag) : _)
| "no-cache" `B.isInfixOf` tag = []
redownloadOpts _ (("Cache-Control", B.map toLower -> tag) : _)
| any (`B.isInfixOf` tag)
["no-cache", "no-store", "must-revalidate", "max-age=0"] = []
redownloadOpts acc (("Expires", time):xs)
| ts <- B.unpack time
, Just t <- parseHttpTime ts
, t > curTime =
redownloadOpts acc xs
| otherwise = []
redownloadOpts acc (("ETag", tag):xs) =
redownloadOpts (("If-None-Match: " ++ B.unpack tag) : acc) xs
redownloadOpts acc (("Last-Modified", time):xs)
| ts <- B.unpack time
, Just t <- parseHttpTime ts
, t <= curTime =
redownloadOpts (("If-Modified-Since: " ++ B.unpack time) : acc) xs
redownloadOpts acc (_:xs) = redownloadOpts acc xs
relUri r =
fromMaybe r $
fmap (($ "") . uriToString id) $
liftM2 relativeTo
(parseURIReference $ trim r)
(parseURI url)
trim = reverse . dropWhile isSpace . reverse . dropWhile isSpace
tryParseTime :: [String] -> String -> Maybe UTCTime
tryParseTime formats string =
foldr mplus Nothing $
map (\ fmt -> parseTime defaultTimeLocale fmt (trimString string)) formats
where trimString = reverse . dropWhile isSpace . reverse . dropWhile isSpace
parseHttpTime :: String -> Maybe UTCTime
parseHttpTime =
tryParseTime
["%a, %e %b %Y %k:%M:%S %Z"
,"%A, %e-%b-%y %k:%M:%S %Z"
,"%a %b %e %k:%M:%S %Y"
]
urlGetContents :: String -> IO B.ByteString
urlGetContents url = withDownloader $ \ d -> do
r <- download d url Nothing []
case r of
DROK c _ -> return c
e -> fail $ "urlGetContents " ++ show url ++ ": " ++ show e
urlGetContentsPost :: String -> B.ByteString -> IO B.ByteString
urlGetContentsPost url dat = withDownloader $ \ d -> do
r <- post d url Nothing dat
case r of
DROK c _ -> return c
e -> fail $ "urlGetContentsPost " ++ show url ++ ": " ++ show e