module Network.Shpider.URL ( module Network.URL , isSameDomain , mkAbsoluteUrl , isAbsoluteUrl , isMailto , isHttp , getDomain , getFolder ) where import Network.URL import Network.Shpider.State import Text.Regex.Posix import Network.Shpider.TextUtils -- | is the second url on the same domain as the first? Note: this will return False if either URL is invalid. isSameDomain :: String -> String -> Bool isSameDomain urlYourOn urlYourNot = let url1 = importURL urlYourOn url2 = importURL urlYourNot in maybe False ( \ t1 -> maybe False ( \ t2 -> let urlType1 = url_type t1 urlType2 = url_type t2 isRelative ut = ( ut == PathRelative ) || ( ut == HostRelative ) ut1Relative = isRelative urlType1 ut2Relative = isRelative urlType2 utsSame = urlType1 == urlType2 in ( ut1Relative || ut2Relative || utsSame ) ) url2 ) url1 -- | Assumes the given URL is relative to `currentPage`. mkAbsoluteUrl :: String -> Shpider ( Maybe String ) mkAbsoluteUrl uncleanUrl = do if not $ isMailto uncleanUrl then do shpider <- get let cleanUrlStr = escapeSpaces uncleanUrl maybeUrl = importURL cleanUrlStr currentAddr = addr $ currentPage shpider maybe ( return Nothing ) ( \ url -> case url_type url of PathRelative -> return $ Just $ getFolder currentAddr ++ urlStr HostRelative -> return $ Just $ getDomain currentAddr ++ urlStr _ -> return $ Just urlStr ) maybeUrl else return Nothing where urlStr = escapeSpaces uncleanUrl -- | True if the url is absolute isAbsoluteUrl :: String -> Bool isAbsoluteUrl urlStr = case importURL urlStr of Just url -> case url_type url of Absolute _ -> True _ -> False _ -> False -- | is the given string of form \"mailto:person.com\"? isMailto :: String -> Bool isMailto = flip (=~) "mailto:.*" -- | is the url a http url? isHttp :: String -> Bool isHttp = flip (=~) "(http://|https://).*" -- | Get the protocol and domain from a URL eg -- -- @ -- getDomain \"widdle:\/\/owqueer.co.uk\/strangeanticsofsailors\/jimmy\" -- -- \"widdle:\/\/owqueer.co.uk\" -- @ getDomain :: String -> String getDomain = flip (=~) "[^:]+://[^/]+" -- | Get the whole url up to and including the current folder of the present document. -- -- @ -- getFolder \"widdle:\/\/owqueer.co.uk\/strangeanticsofsailors\/jimmy\" -- -- \"widdle:\/\/owqueer.co.uk\/strangeanticsofsailors\/\" -- @ getFolder :: String -> String getFolder url = let ms = url =~ "[^/]*/[^/]*" l = length ms in if l > 0 then ( concat $ take ( l - 1 ) $ map head ms ) ++ "/" else ""