-------------------------------------------------------------------------------- -- | -- Module : Network.URL -- Copyright : (c) Galois, Inc. 2007, 2008 -- License : BSD3 -- -- Maintainer : Iavor S. Diatchki -- Stability : Provisional -- Portability : Portable -- -- Provides a convenient way for working with HTTP URLs. -- Based on RFC 1738. -- See also: RFC 3986 module Network.URL ( URL(..), URLType(..), Host(..), Protocol(..) , secure, secure_prot , exportURL, importURL, exportHost , add_param , decString, encString , ok_host, ok_url, ok_param, ok_path , exportParams, importParams ) where import Data.Char (isAlpha, isAscii, isDigit) import Data.List (intersperse) import Data.Word (Word8) import Numeric (readHex, showHex) import qualified Codec.Binary.UTF8.String as UTF8 -- | Contains information about the connection to the host. data Host = Host { protocol :: Protocol , host :: String , port :: Maybe Integer } deriving (Eq,Ord,Show) -- | The type of known protocols. data Protocol = HTTP Bool | FTP Bool | RawProt String deriving (Eq,Ord,Show) -- | Is this a \"secure\" protocol. This works only for known protocols, -- for 'RawProt' values we return 'False'. secure_prot :: Protocol -> Bool secure_prot (HTTP s) = s secure_prot (FTP s) = s secure_prot (RawProt _) = False -- | Does this host use a \"secure\" protocol (e.g., https). secure :: Host -> Bool secure = secure_prot . protocol -- | Different types of URL. data URLType = Absolute Host -- ^ Has a host | HostRelative -- ^ Does not have a host | PathRelative -- ^ Relative to another URL deriving (Eq, Ord, Show) -- | A type for working with URL. -- The parameters are in @application\/x-www-form-urlencoded@ format: -- data URL = URL { url_type :: URLType , url_path :: String , url_params :: [(String,String)] } deriving (Eq,Ord,Show) -- | Add a (key,value) parameter to a URL. add_param :: URL -> (String,String) -> URL add_param url x = url { url_params = x : url_params url } -- | Convert a list of \"bytes\" to a URL. importURL :: String -> Maybe URL importURL cs0 = do (ho,cs5) <- front cs0 (pa,cs6) <- the_path cs5 as <- the_args cs6 return URL { url_type = ho, url_path = pa, url_params = as } where front ('/':cs) = return (HostRelative,cs) front cs = case the_prot cs of Just (pr,cs1) -> do let (ho,cs2) = the_host cs1 (po,cs3) <- the_port cs2 cs4 <- case cs3 of [] -> return [] '/':cs5 -> return cs5 _ -> Nothing return (Absolute Host { protocol = pr , host = ho , port = po }, cs4) _ -> return (PathRelative,cs) the_prot :: String -> Maybe (Protocol, String) the_prot urlStr = case break (':' ==) urlStr of (as@(_:_), ':' : '/' : '/' : bs) -> Just (prot, bs) where prot = case as of "https" -> HTTP True "http" -> HTTP False "ftps" -> FTP True "ftp" -> FTP False _ -> RawProt as _ -> Nothing the_host = span ok_host the_port (':':cs) = case span isDigit cs of ([],_) -> Nothing (xs,ds) -> Just (Just (read xs),ds) the_port cs5 = return (Nothing, cs5) the_path cs = do let (as,bs) = break end_path cs s <- decString False as return (s,bs) where end_path c = c == '#' || c == '?' the_args ('?' : cs) = importParams cs the_args _ = return [] importParams :: String -> Maybe [(String,String)] importParams [] = return [] importParams ds = mapM a_param (breaks ('&'==) ds) where a_param cs = do let (as,bs) = break ('=' ==) cs k <- decString True as v <- case bs of "" -> return "" _:xs -> decString True xs return (k,v) -- | Convert the host part of a URL to a list of \"bytes\". exportHost :: Host -> String exportHost absol = the_prot ++ "://" ++ host absol ++ the_port where the_prot = exportProt (protocol absol) the_port = maybe "" (\x -> ':' : show x) (port absol) -- | Convert the host part of a URL to a list of \"bytes\". -- WARNING: We output \"raw\" protocols as they are. exportProt :: Protocol -> String exportProt prot = case prot of HTTP True -> "https" HTTP False -> "http" FTP True -> "ftps" FTP False -> "ftp" RawProt s -> s -- | Convert a URL to a list of \"bytes\". -- We represent non-ASCII characters using UTF8. exportURL :: URL -> String exportURL url = absol ++ the_path ++ the_params where absol = case url_type url of Absolute hst -> exportHost hst ++ "/" HostRelative -> "/" PathRelative -> "" the_path = encString False ok_path (url_path url) the_params = case url_params url of [] -> "" xs -> '?' : exportParams xs exportParams :: [(String,String)] -> String exportParams ps = concat (intersperse "&" $ map a_param ps) where a_param (k,mv) = encString True ok_param k ++ case mv of "" -> "" v -> '=' : encString True ok_param v -- | Convert a string to bytes by escaping the characters that -- do not satisfy the input predicate. The first argument specifies -- if we should replace spaces with +. encString :: Bool -> (Char -> Bool) -> String -> String encString pl p ys = foldr enc1 [] ys where enc1 ' ' xs | pl = '+' : xs enc1 x xs = if p x then x : xs else encChar x ++ xs -- | %-encode a character. Uses UTF8 to represent characters as bytes. encChar :: Char -> String encChar c = concatMap encByte (UTF8.encode [c]) -- | %-encode a byte. encByte :: Word8 -> String encByte b = '%' : case showHex b "" of d@[_] -> '0' : d d -> d -- | Decode a list of \"bytes\" to a string. -- Performs % and UTF8 decoding. decString :: Bool -> String -> Maybe String decString b = fmap UTF8.decode . decStrBytes b -- Convert a list of \"bytes\" to actual bytes. -- Performs %-decoding. The boolean specifies if we should turn pluses into -- spaces. decStrBytes :: Bool -> String -> Maybe [Word8] decStrBytes _ [] = Just [] decStrBytes p ('%' : cs) = do (n,cs1) <- decByte cs fmap (n:) (decStrBytes p cs1) decStrBytes p (c : cs) = let b = if p && c == '+' then 32 -- space else fromIntegral (fromEnum c) in (b :) `fmap` decStrBytes p cs -- truncates "large bytes". -- | Parse a percent-encoded byte. decByte :: String -> Maybe (Word8,String) decByte (x : y : cs) = case readHex [x,y] of [(n,"")] -> Just (n,cs) _ -> Nothing decByte _ = Nothing -- Classification of characters. -- Note that these only return True for ASCII characters; this is important. -------------------------------------------------------------------------------- ok_host :: Char -> Bool ok_host c = isDigit c || isAlphaASCII c || c == '.' || c == '-' ok_param :: Char -> Bool ok_param c = ok_host c || c `elem` "~;:@$_!*'()," -- | Characters that can appear non % encoded in the path part of the URL ok_path :: Char -> Bool ok_path c = ok_param c || c `elem` "/=&" -- XXX: others? check RFC -- | Characters that do not need to be encoded in URL ok_url :: Char -> Bool ok_url c = isDigit c || isAlphaASCII c || c `elem` ".-;:@$_!*'(),/=&?~+" -- Misc -------------------------------------------------------------------------------- isAlphaASCII :: Char -> Bool isAlphaASCII x = isAscii x && isAlpha x breaks :: (a -> Bool) -> [a] -> [[a]] breaks p xs = case break p xs of (as,[]) -> [as] (as,_:bs) -> as : breaks p bs