module URL where --import Fudgets (ctrace,Host(..),Port(..)) import Control.Applicative((<|>)) import Utils2(unmix,mix) -- ** Types -- | This is the type for URLs (Universal Resource Locators). -- The functions 'url2str' and 'ParseURL.parseURL' convert between strings -- and the 'URL' type. data URL = URL (Maybe Proto) (Maybe Host) (Maybe Port) FilePath (Maybe Fragment) deriving (Eq,Ord,Show,Read) type Proto = String type Host = String type Port = Int type Fragment = String -- ** Show it url2str (URL proto host port path fragment) = protostr++hoststr++portstr++path++fragmentstr where protostr = opt proto (++":") hoststr = opt host ("//"++) portstr = opt port ((':':).show) fragmentstr = opt fragment ('#':) opt m f = maybe "" f m -- ** Relative URLs relativeURL path = URL Nothing Nothing Nothing path Nothing fragmentURL fragment = URL Nothing Nothing Nothing "" (Just fragment) -- ** Disregarding the fragment part docURL (URL proto host port path fragment) = URL proto host port path Nothing sameDoc url1 url2 = docURL url1 == docURL url2 -- ** Selectors fragment (URL _ _ _ _ fr) = fr urlHost (URL _ host _ _ _) = host urlPath (URL _ _ _ path _) = path -- ** Combining URLs --joinURL parent child = {-ctrace "joinURL" (parent,child,res)-} res -- where res = joinURL' parent child -- | Joining URLs joinURL (URL proto host port ppath pfragment) child = case child of URL (Just "data") _ _ _ _ -> child URL (Just "news") _ _ _ _ -> child URL (Just "mailto") _ _ _ _ -> child URL (Just "file") _ _ _ _ -> child URL (Just "javascript") _ _ _ _ -> child URL cproto chost cport cpath cfragment -> URL (cproto <|> proto) (chost <|> host) (if chost==Nothing then cport <|> port else cport) (joinpath' ppath cpath) cfragment where joinpath' ppath cpath= case (cpath,cfragment) of ("",Just _) -> ppath _ -> joinpath ppath cpath where joinpath ppath cpath = case cpath of '/':_ -> cpath '?':_ -> (fst.break (=='?')) ppath++cpath _ -> compresspath (parent ppath++cpath) where parent = reverse . snd . break (=='/') . reverse compresspath = flip mix "/" . rmdotdot . rmdot . unmix '/' rmdot (".":ns@(_:_)) = rmdot ns rmdot [n,"."] = [n,""] rmdot (n:ns) = rmdotq n:rmdot ns rmdot [] = [] rmdotq ('.':'?':n) = '?':n rmdotq n = n {- -- Fails to simplify a/b/../../c into c rmdotdot ("..":ns) = "..":rmdotdot ns -- leading ".." can not be removed rmdotdot (n:"..":ns) = rmdotdot ns -- n/=".." thanks to previous line rmdotdot (n:ns) = n:rmdotdot ns rmdotdot [] = [] -} rmdotdot = rmdotdot' [] rmdotdot' ps [] = reverse ps rmdotdot' (p:ps) ("..":ns) | p `notElem` [".",".."] = rmdotdot' ps ns rmdotdot' ps (n:ns) = rmdotdot' (n:ps) ns