{-| References: - @ URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] hier-part = "//" authority path-abempty / path-absolute / path-rootless / path-empty URI-reference = URI / relative-ref absolute-URI = scheme ":" hier-part [ "?" query ] relative-ref = relative-part [ "?" query ] [ "#" fragment ] relative-part = "//" authority path-abempty / path-absolute / path-noscheme / path-empty scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) authority = [ userinfo "@" ] host [ ":" port ] userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) host = IP-literal / IPv4address / reg-name port = *DIGIT IP-literal = "[" ( IPv6address / IPvFuture ) "]" IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) IPv6address = 6( h16 ":" ) ls32 / "::" 5( h16 ":" ) ls32 / [ h16 ] "::" 4( h16 ":" ) ls32 / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 / [ *4( h16 ":" ) h16 ] "::" ls32 / [ *5( h16 ":" ) h16 ] "::" h16 / [ *6( h16 ":" ) h16 ] "::" h16 = 1*4HEXDIG ls32 = ( h16 ":" h16 ) / IPv4address IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet dec-octet = DIGIT ; 0-9 / %x31-39 DIGIT ; 10-99 / "1" 2DIGIT ; 100-199 / "2" %x30-34 DIGIT ; 200-249 / "25" %x30-35 ; 250-255 reg-name = *( unreserved / pct-encoded / sub-delims ) path = path-abempty ; begins with "/" or is empty / path-absolute ; begins with "/" but not "//" / path-noscheme ; begins with a non-colon segment / path-rootless ; begins with a segment / path-empty ; zero characters path-abempty = *( "/" segment ) path-absolute = "/" [ segment-nz *( "/" segment ) ] path-noscheme = segment-nz-nc *( "/" segment ) path-rootless = segment-nz *( "/" segment ) path-empty = 0 segment = *pchar segment-nz = 1*pchar segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) ; non-zero-length segment without any colon ":" pchar = unreserved / pct-encoded / sub-delims / ":" / "@" query = *( pchar / "/" / "?" ) fragment = *( pchar / "/" / "?" ) pct-encoded = "%" HEXDIG HEXDIG unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" reserved = gen-delims / sub-delims gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" @ -} module Iri.CodePointPredicates.Rfc3986 where import Iri.Prelude hiding ((|||), (&&&), inRange, Predicate) import Iri.CodePointPredicates.Core import qualified Data.Vector as A {-| Reference: @ unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" @ -} unreserved :: Predicate unreserved = asciiAlphanumeric ||| oneOfChars "-._~" subDelims :: Predicate subDelims = oneOfChars "!$&'()*+,;=" {-# NOINLINE scheme #-} scheme :: Predicate scheme = cached $ asciiAlphanumeric ||| oneOfChars "+.-" {-# NOINLINE domainLabel #-} domainLabel :: Predicate domainLabel = cached $ asciiAlphanumeric ||| oneOfChars "-_~" {- pchar = unreserved / pct-encoded / sub-delims / ":" / "@" -} {-# NOINLINE unencodedPathSegment #-} unencodedPathSegment :: Predicate unencodedPathSegment = cached $ unreserved ||| subDelims ||| oneOfChars ":@" {-| Reference: @ query = *( pchar / "/" / "?" ) @ Notice that we've added the "|" char, because some real life URIs seem to contain it. Also we've excluded the '+' char, because it gets decoded as a space char. -} {-# NOINLINE unencodedQuery #-} unencodedQuery :: Predicate unencodedQuery = cached $ (unencodedPathSegment ||| oneOfChars "/?|") &&& (/= 43) unencodedFragment :: Predicate unencodedFragment = unencodedQuery {-# NOINLINE unencodedQueryComponent #-} unencodedQueryComponent :: Predicate unencodedQueryComponent = cached $ unencodedQuery &&& not . oneOfChars "=&;" {-# NOINLINE unencodedUserInfoComponent #-} unencodedUserInfoComponent :: Predicate unencodedUserInfoComponent = cached $ unreserved ||| subDelims