-- This file is part of Diohsc -- Copyright (C) 2020 Martin Bays -- -- This program is free software: you can redistribute it and/or modify -- it under the terms of version 3 of the GNU General Public License as -- published by the Free Software Foundation, or any later version. -- -- You should have received a copy of the GNU General Public License -- along with this program. If not, see http://www.gnu.org/licenses/. {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE Safe #-} module URI ( URI , URIRef , escapeIRI , escapePathString , escapeQuery , escapeQueryPart , nullUri , parseAbsoluteUri , parseUriAsAbsolute , parseUriReference , pathSegments , relativeFrom , relativeTo , setQuery , stripUri , unescapeUriString , uriFragment , uriPath , uriPort , uriRegName , uriScheme , uriQuery ) where import Control.Monad (mplus, (<=<)) import Data.Char (toLower) import Data.List (dropWhileEnd) import Data.Maybe (isNothing) import Safe (readMay) import qualified Network.URI as NU defaultScheme :: String defaultScheme = "gemini" defaultPort :: Int defaultPort = 1965 -- | Represents a normalised absolute URI with scheme and port defaults as above. -- We use "Uri" rather than "URI" in camelcase, -- because I prefer to think of it as a word rather than an acronym. -- Still use "URI" if it's the first/only word of the identifier. newtype URI = URI {uriUri :: NU.URI} deriving (Eq,Ord) instance Show URI where show (URI uri) = show uri uriPath, uriQuery, uriFragment :: URI -> String uriPath = NU.uriPath . uriUri uriQuery = NU.uriQuery . uriUri uriFragment = NU.uriFragment . uriUri -- | strips trailing ':' uriScheme :: URI -> String uriScheme = init . NU.uriScheme . uriUri pathSegments :: URI -> [String] pathSegments (URI uri) = NU.pathSegments uri nullUri :: URI nullUri = URI NU.nullURI -- | URI reference. May be absolute. Not normalised. newtype URIRef = URIRef NU.URI deriving (Eq,Ord) instance Show URIRef where show (URIRef uri) = show uri normaliseUri :: NU.URI -> URI normaliseUri uri = URI $ uri { NU.uriPath = (\p -> if null p then "/" else p) . NU.normalizePathSegments . NU.normalizeEscape $ NU.uriPath uri , NU.uriFragment = "" , NU.uriScheme = if null $ NU.uriScheme uri then defaultScheme else toLower <$> NU.uriScheme uri , NU.uriAuthority = (\auth -> auth { NU.uriPort = if NU.uriPort auth == ':' : show defaultPort then "" else NU.uriPort auth , NU.uriRegName = toLower <$> NU.uriRegName auth , NU.uriUserInfo = "" }) <$> NU.uriAuthority uri , NU.uriQuery = NU.normalizeEscape $ NU.uriQuery uri } parseAbsoluteUri :: String -> Maybe URI parseAbsoluteUri = (normaliseUri <$>) . NU.parseURI parseUriAsAbsolute :: String -> Maybe URI parseUriAsAbsolute s = parseAbsoluteUri s `mplus` parseAbsoluteUri (defaultScheme ++ "://" ++ s) parseUriReference :: String -> Maybe URIRef parseUriReference = (URIRef <$>) . NU.parseURIReference setQuery :: String -> URI -> URI setQuery q (URI uri) = URI $ uri { NU.uriQuery = q } stripUri :: URI -> URI stripUri (URI uri) = URI $ uri { NU.uriPath = dropWhileEnd (== '/') $ NU.uriPath uri, NU.uriQuery = "" } relativeTo :: URIRef -> URI -> URI relativeTo (URIRef ref) (URI uri) = normaliseUri $ NU.relativeTo ref uri -- | lift NU.relativeFrom, but set scheme when the result is absolute, -- and avoid initial slash where possible, and prefer "." to "" and ".." to "../" relativeFrom :: URI -> URI -> URIRef relativeFrom (URI uri1) (URI uri2) = URIRef . fixDots . stripSlash . setScheme $ NU.relativeFrom uri1 uri2 where setScheme ref | isNothing (NU.uriAuthority ref) = ref | otherwise = ref { NU.uriScheme = NU.uriScheme uri1 } stripSlash ref | '/':path' <- NU.uriPath ref , not $ null path' , ref' <- ref { NU.uriPath = path' } , NU.relativeTo ref' uri2 == uri1 = ref' | otherwise = ref fixDots ref = case NU.uriPath ref of "" | ref' <- ref { NU.uriPath = "." } , NU.relativeTo ref' uri2 == uri1 -> ref' "../" -> ref { NU.uriPath = ".." } _ -> ref uriRegName :: URI -> Maybe String uriRegName = (NU.uriRegName <$>) . NU.uriAuthority . uriUri uriPort :: URI -> Maybe Int uriPort = (readPort . NU.uriPort) <=< (NU.uriAuthority . uriUri) where readPort (':':n) = readMay n readPort _ = Nothing escapePathString :: String -> String escapePathString = NU.escapeURIString (\c -> NU.isUnreserved c || c == '/') unescapeUriString :: String -> String unescapeUriString = NU.unEscapeString -- | unreserved / sub-delims / ":" / "@" / "/" / "?" isUnescapedInQuery :: Char -> Bool isUnescapedInQuery c = NU.isUnescapedInURI c && c `notElem` ("#[]"::String) escapeQuery :: String -> String escapeQuery = NU.escapeURIString isUnescapedInQuery . withEscapes where withEscapes "" = "" withEscapes ('\\':'x':h1:h2:s) | Just c <- readMay $ "'\\x" <> [h1,h2,'\''] = c:withEscapes s withEscapes ('\\':'e':s) = '\ESC':withEscapes s withEscapes ('\\':'r':s) = '\r':withEscapes s withEscapes ('\\':'n':s) = '\n':withEscapes s withEscapes ('\\':'t':s) = '\t':withEscapes s withEscapes ('\\':c:s) = c:withEscapes s withEscapes (c:s) = c:withEscapes s -- |escape the query part of an unparsed uri string escapeQueryPart :: String -> String escapeQueryPart s | (s','?':q) <- break (== '?') s = s' ++ '?' : escapeQuery q | otherwise = s -- |conversion of IRI to URI according to Step 2 in Section 3.1 in RFC3987 -- (for now at least, we apply this also to the regname rather than -- punycoding) escapeIRI :: String -> String escapeIRI = NU.escapeURIString (not . escape) where -- |ucschar or iprivate in RFC3987 escape :: Char -> Bool escape c = let i = fromEnum c in i >= 0xA0 && i <= 0xD7FF || i >= 0xE000 && i <= 0xF8FF || i >= 0xF900 && i <= 0xFDCF || i >= 0xFDF0 && i <= 0xFFEF || i >= 0x10000 && i <= 0x1FFFD || i >= 0x20000 && i <= 0x2FFFD || i >= 0x30000 && i <= 0x3FFFD || i >= 0x40000 && i <= 0x4FFFD || i >= 0x50000 && i <= 0x5FFFD || i >= 0x60000 && i <= 0x6FFFD || i >= 0x70000 && i <= 0x7FFFD || i >= 0x80000 && i <= 0x8FFFD || i >= 0x90000 && i <= 0x9FFFD || i >= 0xA0000 && i <= 0xAFFFD || i >= 0xB0000 && i <= 0xBFFFD || i >= 0xC0000 && i <= 0xCFFFD || i >= 0xD0000 && i <= 0xDFFFD || i >= 0xE1000 && i <= 0xEFFFD || i >= 0xF0000 && i <= 0xFFFFD || i >= 0x100000 && i <= 0x10FFFD