{-# LANGUAGE TemplateHaskell #-} module Network.Protocol.Uri ( URI , Scheme , IPv4 , Domain , RegName , Port , Query , Fragment , Hash , UserInfo , PathSegment , Parameters , Path , Host , Authority , encode , decode , mkURI , mkScheme , mkPath , mkAuthority , mkQuery , mkFragment , mkUserinfo , mkHost , mkPort , absolute , segments , domain , regname , ipv4 , userinfo , host , port , relative , scheme , authority , path , query , fragment , parseURI , parseAbsoluteURI , parseAuthority , parsePath , parseHost , pUriReference , pAbsoluteURI , pAuthority , pPath , pHost , parseQueryParams , queryParams , extension , mkPathRelative , mimetype , normalize , jail , (/+) ) where import Control.Applicative import Data.Bits import Data.Char (ord, chr, isDigit, isAlphaNum, intToDigit, isHexDigit) import Data.List (intercalate, isPrefixOf) import Data.Maybe (mapMaybe, fromJust) import Data.Record.Label import Misc.Misc (eitherToMaybe, (@@), bool, pMaybe, split) import Network.Protocol.Mime import System.FilePath.Posix import Text.ParserCombinators.Parsec hiding (many, optional, (<|>)) -------[ data type definition of URIs ]---------------------------------------- type Scheme = String type IPv4 = [Int] -- actually 4-tupel type Domain = [String] type RegName = String type Port = Int type Query = String type Fragment = String type Hash = String type UserInfo = String type PathSegment = String type Parameters = [(String, Maybe String)] data Path = Path { _absolute :: Bool , _segments :: [PathSegment] } deriving Eq data Host = Hostname { __domain :: Domain } | RegName { __regname :: String } | IPv4 { __ipv4 :: IPv4 } deriving Eq data Authority = Authority { __userinfo :: UserInfo , __host :: Host , __port :: Port } deriving Eq data URI = URI { _relative :: Bool , _scheme :: Scheme , _authority :: Authority , __path :: Path , __query :: Query , _fragment :: Fragment } deriving Eq $(mkLabels [''Path, ''Host, ''Authority, ''URI]) _domain :: Label Host Domain _host :: Label Authority Host _ipv4 :: Label Host IPv4 _path :: Label URI Path _port :: Label Authority Port _query :: Label URI Query _regname :: Label Host String _userinfo :: Label Authority UserInfo absolute :: Label Path Bool authority :: Label URI Authority domain :: Label URI Domain fragment :: Label URI Fragment host :: Label URI String ipv4 :: Label URI IPv4 path :: Label URI FilePath port :: Label URI Port query :: Label URI Query regname :: Label URI String relative :: Label URI Bool scheme :: Label URI Scheme segments :: Label Path [PathSegment] userinfo :: Label URI UserInfo -- Public label based on private labels. domain = _domain % _host % authority regname = _regname % _host % authority ipv4 = _ipv4 % _host % authority userinfo = _userinfo % authority port = _port % authority query = Label { lget = decode . lget _query , lset = lset _query . encode } host = Label { lget = show . lget (_host % authority) , lset = lset (_host % authority) . fromJust . parseHost } path = Label { lget = decode . show . lget _path , lset = lset _path . fromJust . parsePath . encode } -- testUri0 :: URI -- testUri0 = fromJust $ parseURI "http://sebas@hs.spugium.net:8080/wiki/first%20section/chapter-2.pdf?additional=vars" -- testUri1 :: URI -- testUri1 = fromJust $ parseURI "http://cs.uu.nl/docs/../docs/vakken/../vakken/" -------[ creating, selection and modifying URIs ]------------------------------ -- pseudo constructors for `empty values' mkURI :: URI mkURI = URI False mkScheme mkAuthority mkPath mkQuery mkFragment mkScheme :: Scheme mkScheme = "" mkPath :: Path mkPath = Path False [] mkAuthority :: Authority mkAuthority = Authority "" mkHost mkPort mkQuery :: Query mkQuery = "" mkFragment :: Fragment mkFragment = "" mkUserinfo :: UserInfo mkUserinfo = "" mkHost :: Host mkHost = Hostname [] mkPort :: Port mkPort = (-1) -------[ path encoding and decoding ]------------------------------------------ encode :: String -> String encode = concatMap encodeChr where encodeChr c | unreserved c || genDelims c || subDelims c = [c] | otherwise = '%' : intToDigit (shiftR (ord c) 4) : intToDigit ((ord c) .&. 0x0F) : [] decode :: String -> String decode [] = [] decode ('%':d:e:ds) | isHexDigit d && isHexDigit e = f d e : decode ds where f a b = chr $ (ord a-ord '0') * 16 + (ord b - ord '0') decode (d:ds) = d : decode ds -------[ show instance for URIs ]---------------------------------------------- -- TODO: cleanup and test this mess: QuickCheck property for testing: -- parse $ show u == u /\ show $ parse u' = u' -- TODO: ShowS instance Show Path where show (Path a s) = (if a then "/" else "") ++ (intercalate "/" s) instance Show Host where show (Hostname d) = if null d then "" else intercalate "." d show (IPv4 a) = intercalate "." $ map show a show (RegName r) = r instance Show Authority where show (Authority u h p) = (if hst h then "//" else "") ++ (if null u then "" else u ++ "@") ++ show h ++ (if p == -1 then "" else ":" ++ show p) where hst (Hostname d) = not $ null d hst (RegName d) = not $ null d hst _ = True instance Show URI where show (URI r s a p q f) = (if not r then (if null s then "" else s ++ ":") else "") ++ (show a) ++ (show p) ++ (if null q then "" else "?" ++ q) ++ (if null f then "" else "#" ++ f) -------[ global URI parse interface ]------------------------------------------ -- Top level parsers. parseURI :: String -> Maybe URI parseURI = eitherToMaybe . parse pUriReference "" parseAbsoluteURI :: String -> Maybe URI parseAbsoluteURI = eitherToMaybe . parse pAbsoluteURI "" parseAuthority :: String -> Maybe Authority parseAuthority = eitherToMaybe . parse pAuthority "" parsePath :: String -> Maybe Path parsePath = eitherToMaybe . parse pPath "" parseHost :: String -> Maybe Host parseHost = eitherToMaybe . parse pHost "" -------[ parsing URIs according to rfc2396 ]----------------------------------- -- D.2. Modifications pAlpha, pDigit, pAlphanum :: GenParser Char st Char pAlpha = letter pDigit = digit pAlphanum = alphaNum -- 2.3. Unreserved Characters unreserved :: Char -> Bool unreserved c = isAlphaNum c || elem c "-._~" pUnreserved :: GenParser Char st Char pUnreserved = pAlphanum <|> oneOf "-._~" -- 2.2. Reserved Characters genDelims :: Char -> Bool genDelims = flip elem ":/?#[]@" subDelims :: Char -> Bool subDelims = flip elem "!$&'()*+,;=" {- pReserved :: GenParser Char st Char pReserved = pGenDelims <|> pSubDelims pGenDelims :: GenParser Char st Char pGenDelims = oneOf ":/?#[]@" -} pSubDelims :: GenParser Char st Char pSubDelims = oneOf "!$&'()*+,;=" -- 2.1. Percent-Encoding pPctEncoded :: GenParser Char st String pPctEncoded = (:) <$> char '%' <*> pHex pHex :: GenParser Char st String pHex = (\a b -> a:b:[]) <$> hexDigit <*> hexDigit -- 3. Syntax Components -- With the hier-part integrated. pUri :: GenParser Char st URI pUri = (\a (b,c) d e -> URI False a b c d e) <$> (pScheme <* string ":") <*> (ap <|> p) <*> option "" (string "?" *> pQuery) <*> option "" (string "#" *> pFragment) where ap = (,) <$> (string "//" *> pAuthority) <*> pPathAbempty p = ((,) mkAuthority) <$> (pPathAbsolute <|> pPathRootless {-<|> pPathEmpty-}) -- 3.1. Scheme pScheme :: GenParser Char st Scheme pScheme = (:) <$> pAlpha <*> many (pAlphanum <|> oneOf "+_.") -- 3.2. Authority pAuthority :: GenParser Char st Authority pAuthority = Authority <$> option mkUserinfo (try (pUserinfo <* string "@")) <*> pHost <*> option mkPort (string ":" *> pPort) -- 3.2.1. User Information pUserinfo :: GenParser Char st String pUserinfo = concat <$> many ( (pure <$> pUnreserved) <|> ( pPctEncoded) <|> (pure <$> pSubDelims) <|> (pure <$> oneOf ":") ) -- 3.2.2. Host pHost :: GenParser Char st Host pHost = diff <$> pRegName -- <|> RegName <$> pRegName where diff a = maybe (RegName a) sep (pHostname @@ a) sep a = bool (Hostname a) (ipreg a) $ hst a ipreg a = bool (IPv4 $ map read a) (RegName $ intercalate "." a) $ ip a hst = not . all isDigit . head . dropWhile null . reverse ip a = length a == 4 && length (mapMaybe (pDecOctet @@) a) == 4 {- pfff, ipv6 is sooo not gonna make it.. pIPLiteral = "[" ( IPv6address <|> IPvFuture ) "]" pIPvFuture = "v" 1*HEXDIG "." 1*( unreserved <|> subDelims <|> ":" ) pIPv6address = 6( h16 ":" ) ls32 <|> "::" 5( h16 ":" ) ls32 <|> [ h16 ] "::" 4( h16 ":" ) ls32 <|> [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 <|> [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 <|> [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 <|> [ *4( h16 ":" ) h16 ] "::" ls32 <|> [ *5( h16 ":" ) h16 ] "::" h16 <|> [ *6( h16 ":" ) h16 ] "::" pH16 = 1*4HEXDIG pLs32 = ( h16 ":" h16 ) <|> IPv4address -} {- pIPv4address :: GenParser Char st [Int] pIPv4address = (:) <$> pDecOctet <*> (count 3 $ char '.' *> pDecOctet) -} pDecOctet :: GenParser Char st Int pDecOctet = read <$> choice [ try ((\a b c -> [a,b,c]) <$> char '2' <*> char '5' <*> oneOf "012345") , try ((\a b c -> [a,b,c]) <$> char '2' <*> oneOf "01234" <*> digit) , try ((\a b c -> [a,b,c]) <$> char '1' <*> digit <*> digit) , try ((\a b -> [a,b]) <$> digit <*> digit) , (pure <$> digit) ] pRegName :: GenParser Char st String pRegName = concat <$> many1 ( (pure <$> pUnreserved) <|> pPctEncoded <|> (pure <$> pSubDelims)) -- Not actually part of the rfc3986, but comptability with the rfc2396. -- This information can be useful, so why throw away. pHostname :: GenParser Char st Domain pHostname = sepBy (option "" pDomainlabel) (string ".") pDomainlabel :: GenParser Char st String pDomainlabel = intercalate "-" <$> sepBy1 (some pAlphanum) (string "-") -- 3.2.3. Port pPort :: GenParser Char st Port pPort = read <$> some pDigit -- 3.4. Query pQuery :: GenParser Char st String pQuery = concat <$> many (pPchar <|> pure <$> oneOf "/?") -- 3.5. Fragment pFragment :: GenParser Char st String pFragment = concat <$> many (pPchar <|> pure <$> oneOf "/?" ) -- 3.3. Path pPath, pPathAbempty, pPathAbsolute, pPathNoscheme, pPathRootless, pPathEmpty :: GenParser Char st Path pPath = try pPathAbsolute -- begins with "/" but not "//" <|> try pPathNoscheme -- begins with a nonColon segment <|> try pPathRootless -- begins with a segment <|> pPathEmpty -- zero characters pPathAbempty = (Path True) <$> _pSlashSegments pPathAbsolute = (char '/' *>) $ (Path True) <$> (option [] $ (:) <$> pSegmentNz <*> _pSlashSegments) pPathNoscheme = (Path False) <$> ((:) <$> pSegmentNzNc <*> _pSlashSegments) pPathRootless = (Path False) <$> ((:) <$> pSegmentNz <*> _pSlashSegments) pPathEmpty = (Path False []) <$ string "" pSegment, pSegmentNz, pSegmentNzNc :: GenParser Char st String pSegment = concat <$> many pPchar pSegmentNz = concat <$> some pPchar pSegmentNzNc = concat <$> some ( (pure <$> pUnreserved) <|> pPctEncoded <|> (pure <$> pSubDelims) <|> (pure <$> oneOf "@" )) _pSlashSegments :: GenParser Char st [PathSegment] _pSlashSegments = (many $ (:) <$> char '/' *> pSegment) pPchar :: GenParser Char st String pPchar = choice [ pure <$> pUnreserved , pPctEncoded , pure <$> pSubDelims , pure <$> oneOf ":@" ] -- 4.1. URI Reference pUriReference :: GenParser Char st URI pUriReference = try pAbsoluteURI <|> pRelativeRef -- 4.2. Relative Reference -- With the relative-part integrated. pRelativeRef :: GenParser Char st URI pRelativeRef = ($) <$> (try pRelativePart <|> ((URI True mkScheme mkAuthority) <$> (pPathAbsolute <|> pPathRootless <|> pPathEmpty))) <*> option "" (string "?" *> pQuery) <*> option "" (string "#" *> pFragment) pRelativePart :: GenParser Char st (Query -> Fragment -> URI) pRelativePart = (URI True mkScheme) <$> (string "//" *> pAuthority) <*> pPathAbempty -- 4.3. Absolute URI pAbsoluteURI :: GenParser Char st URI pAbsoluteURI = pUri -------[ parsing query parameters ]-------------------------------------------- -- Parse a pre-decoded query string into key value pairs parameters. pQueryParams :: GenParser Char st Parameters pQueryParams = filter (not . null . fst) <$> sepBy ((,) <$> many (noneOf "=&") <*> pMaybe (char '=' *> (translateParam <$> many (noneOf "&")))) (char '&') parseQueryParams :: String -> Maybe Parameters parseQueryParams = eitherToMaybe . parse pQueryParams "" queryParams :: URI -> Parameters queryParams = maybe [] id . parseQueryParams . lget query -- Translate special characters in a parameter. translateParam :: String -> String translateParam [] = [] translateParam ('+':xs) = ' ' : translateParam xs translateParam (x:xs) = x : translateParam xs -------[ filename path utilities ]--------------------------------------------- mkPathRelative :: FilePath -> FilePath mkPathRelative = dropWhile (=='/') -- Generate a label for the filename extension. extension :: Label FilePath (Maybe String) extension = Label {lget = getExt, lset = setExt} where splt p = (\(a,b) -> (reverse a, reverse b)) $ break (=='.') $ reverse p isExt e p = '/' `elem` e || not ('.' `elem` p) getExt p = let (u, v) = splt p in if isExt u v then Nothing else Just u setExt e p = let (u, v) = splt p in (if isExt u v then p else init v) ++ maybe "" ('.':) e -- Try to guess the 'correct' mime type for the input file based on the file -- extension. mimetype :: FilePath -> Maybe String mimetype p = lget extension p >>= mime -- Normalize a path by removing or merging all dot or dot-dot segments and -- double slashes. Todo: cleanup, remove fixp. normalize :: FilePath -> FilePath normalize [] = [] normalize p = fixAbs absolut $ intercalate "/" $ norm where fixAbs True = ('/':) fixAbs False = id absolut = head p == '/' norm = fixp False $ split '/' p fixp False xs = let ys = merge xs in fixp (xs == ys) ys fixp True xs = xs merge ("":xs) = merge xs merge (".":xs) = merge xs merge ("..":"..":xs) = ".." : ".." : merge xs merge (_:"..":xs) = merge xs merge (x:xs) = x : merge xs merge xs = xs jail :: FilePath -> FilePath -> Maybe FilePath jail jailDir p = let nj = normalize jailDir np = normalize p in if nj `isPrefixOf` np -- && not (".." `isPrefixOf` np) then Just np else Nothing (/+) :: FilePath -> FilePath -> FilePath a /+ b = normalize (a ++ "/" ++ b)