-------------------------------------------------------------------- -- | -- Module : Network.Download -- Copyright : (c) Don Stewart -- License : BSD3 -- -- Maintainer: Don Stewart -- Stability : provisional -- Portability: posix -- -- A binding to libdownload, a efficient, high‐level library for -- retrieving files using Uniform Resource Locators (URLs). This -- provides simple, uniform access to file, ftp and http resources. -- Content may be retrieved as a string, bytestring or as parsed tags. -- -- Example: -- -- > s <- openURI "http://haskell.org" -- -------------------------------------------------------------------- module Network.Download ( openURI , openURIString , parseURI ) where import Foreign import Foreign.C.Types import Foreign.C.String import qualified Foreign.Concurrent as C import Control.Monad import Control.Monad.Instances import Data.Word import System.IO import System.IO.Unsafe import Control.Exception import qualified Data.ByteString as S import qualified Data.ByteString.Unsafe as S import qualified Data.ByteString.Internal as S import qualified Data.ByteString.Char8 as Char8 import Text.HTML.TagSoup #include "download.h" ------------------------------------------------------------------------ -- Some API ideas: -- | Download content specified by url (in RFC1738 form), using either -- ftp, http or file protocols, returning the content as a strict -- 'ByteString'. -- -- If the url is malformed, a 'Left' value is returned. -- Similarly, if an error occurs, 'Left' is returned, with a -- protocol-specific error string. -- -- If the file protocol is used, documents will be retrieved from the -- local filesystem. If the ftp scheme is used, the FTP protocol -- (RFC959) is used. If no user name or passoword are provided, -- anonymous login, with user name "anonymous" and password "anonymous@". -- will be attempted. -- -- If the http method is used, HTTP/1.1 will be used. -- -- Examples: -- -- > openURI "http://haskell.org" -- openURI :: String -> IO (Either String S.ByteString) openURI s = case parseURL s of Nothing -> return $ Left $ "Malformed url: "++ s Just url -> do e <- getFile url [] case e of Left err -> return $ Left $ "Failed to connect: " ++ err Right s -> return $ Right s -- | Like openURI, but returns the result as a 'String' -- openURIString :: String -> IO (Either String String) openURIString s = (fmap Char8.unpack) `fmap` openURI s -- | Download the content via openURI, but return it as a list of parsed -- tags using the tagsoup html parser. -- parseURI :: String -> IO (Either String [Tag]) parseURI s = (fmap parseTags) `fmap` openURIString s ------------------------------------------------------------------------ -- Internal: -- A data type, tracked by the garbage collector, that handles -- libdownload's url structures. -- data URL = URL {-# UNPACK #-} !(ForeignPtr URL_) deriving (Eq, Ord, Show) data URL_ = URL_ -- | Takes a URL in the form of a String and splits it into its -- components function according to the Common Internet Scheme Syntax -- detailed in RFC1738. -- parseURL :: String -> Maybe URL parseURL s = unsafePerformIO $ withCString s $ \cstr -> do p <- c_parseURL cstr if p == nullPtr then return Nothing else do fp <- C.newForeignPtr p (c_freeURL p) -- The pointer returned by downloadMakeURL() -- or downloadParseURL() should be freed -- using downloadFreeURL() return . Just $ URL fp -- struct url *downloadParseURL(const char *); foreign import ccall unsafe "downloadParseURL" c_parseURL :: CString -> IO (Ptr URL_) -- void downloadFreeURL(struct url *); foreign import ccall unsafe "downloadFreeURL" c_freeURL :: Ptr URL_ -> IO () ------------------------------------------------------------------------ -- The flags argument is a string of characters which specify -- transfer options. The meaning of the individual flags is -- scheme‐dependent -- data Flag = Passive -- ^ a passive (rather than active) connection will be attempted. | Low -- ^ data sockets will be allocated in the low (or -- default) port range instead of the high port range | Direct -- ^ use a direct connection even if a proxy server is defined deriving (Eq,Show) encodeFlag :: Flag -> Char encodeFlag Passive = 'p' encodeFlag Low = 'l' encodeFlag Direct = 'd' ------------------------------------------------------------------------ -- The flags argument is a string of characters which specify -- transferoptions. The meaning of the individual flags is -- scheme‐dependent, and is detailed in the appropriate section below. -- getFile :: URL -> [Flag] -> IO (Either String S.ByteString) getFile (URL fp) flags = do withForeignPtr fp $ \c_url -> withCString (map encodeFlag flags) $ \c_flags -> bracket (do sp <- c_get c_url c_flags dl <- lastErrorCode if dl /= dlerr_none || sp == nullPtr then return Nothing else return $ Just sp) -- ll other functions return a stream pointer which may be used to -- access the requested document, or NULL if an error occurred. -- (maybe (return ()) c_fclose) $ \sp -> case sp of Nothing -> Left `fmap` lastError Just stream -> load stream -- but if we can stat it, we can be more efficient. where load :: Ptr CFile -> IO (Either String S.ByteString) load stream = do let start = 1024 p <- mallocBytes start i <- c_fread p 1 (fromIntegral start) stream -- I hate C error handling dl_err <- lastErrorCode st_err <- c_ferror stream if dl_err /= dlerr_none || st_err /= 0 then do free p Left `fmap` lastError else if i < fromIntegral start then do p' <- reallocBytes p (fromIntegral i) fp <- newForeignPtr finalizerFree p' return (Right $! S.fromForeignPtr fp 0 (fromIntegral i)) else go stream p start -- duplicated too much code go stream p n = do let n' = 2 * n p' <- reallocBytes p n' i <- c_fread (p' `plusPtr` n) 1 (fromIntegral n) stream dl_err <- lastErrorCode st_err <- c_ferror stream if dl_err /= dlerr_none || st_err /= 0 then do free p Left `fmap` lastError else if i < fromIntegral n then do let i' = n + fromIntegral i p'' <- reallocBytes p' i' fp <- newForeignPtr finalizerFree p'' return (Right $! S.fromForeignPtr fp 0 (fromIntegral i')) else go stream p' n' ------------------------------------------------------------------------ -- size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); -- foreign import ccall unsafe "fread" c_fread :: Ptr Word8 -> CSize -> CSize -> Ptr CFile -> IO CSize -- int fclose(FILE *fp); -- foreign import ccall unsafe "fclose" c_fclose :: Ptr CFile -> IO () -- ignoring CInt -- int ferror(FILE *stream); -- foreign import ccall unsafe "ferror" c_ferror :: Ptr CFile -> IO CInt ------------------------------------------------------------------------ foreign import ccall unsafe "downloadGet" c_get :: Ptr URL_ -> CString -> IO (Ptr CFile) foreign import ccall unsafe "hs_download_utils.h hs_download_last_error" c_lastErrorString :: IO CString lastError :: IO String lastError = peekCString =<< c_lastErrorString -- Wrap this safely... foreign import ccall unsafe "hs_download_utils.h hs_download_last_error_code" lastErrorCode :: IO DLError ------------------------------------------------------------------------ version :: String version = #const_str USER_AGENT_STRING ------------------------------------------------------------------------ type URLLen = Int #{enum URLLen, , url_schemelen = URL_SCHEMELEN , url_userlen = URL_USERLEN , url_pwdlen = URL_PWDLEN } ------------------------------------------------------------------------ newtype Scheme = Scheme { unScheme :: String } deriving (Eq, Show) scheme_ftp, scheme_http, scheme_https, scheme_file :: Scheme scheme_ftp = Scheme #const_str SCHEME_FTP scheme_http = Scheme #const_str SCHEME_HTTP scheme_https = Scheme #const_str SCHEME_HTTPS scheme_file = Scheme #const_str SCHEME_FILE ------------------------------------------------------------------------ newtype DLError = DLError { unDLError :: CInt } deriving (Eq, Show) #{enum DLError, DLError , dlerr_none = DLERR_NONE , dlerr_abort = DLERR_ABORT , dlerr_auth = DLERR_AUTH , dlerr_down = DLERR_DOWN , dlerr_exists = DLERR_EXISTS , dlerr_full = DLERR_FULL , dlerr_info = DLERR_INFO , dlerr_memory = DLERR_MEMORY , dlerr_moved = DLERR_MOVED , dlerr_network = DLERR_NETWORK , dlerr_ok = DLERR_OK , dlerr_proto = DLERR_PROTO , dlerr_resolv = DLERR_RESOLV , dlerr_server = DLERR_SERVER , dlerr_temp = DLERR_TEMP , dlerr_timeout = DLERR_TIMEOUT , dlerr_unavail = DLERR_UNAVAIL , dlerr_unknown = DLERR_UNKNOWN , dlerr_url = DLERR_URL , dlerr_verbose = DLERR_VERBOSE }