{-| References: - @ The following rules are different from those in [RFC3986]: IRI = scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ] ihier-part = "//" iauthority ipath-abempty / ipath-absolute / ipath-rootless / ipath-empty IRI-reference = IRI / irelative-ref absolute-IRI = scheme ":" ihier-part [ "?" iquery ] irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ] irelative-part = "//" iauthority ipath-abempty / ipath-absolute / ipath-noscheme / ipath-empty iauthority = [ iuserinfo "@" ] ihost [ ":" port ] iuserinfo = *( iunreserved / pct-encoded / sub-delims / ":" ) ihost = IP-literal / IPv4address / ireg-name ireg-name = *( iunreserved / pct-encoded / sub-delims ) ipath = ipath-abempty ; begins with "/" or is empty / ipath-absolute ; begins with "/" but not "//" / ipath-noscheme ; begins with a non-colon segment / ipath-rootless ; begins with a segment / ipath-empty ; zero characters ipath-abempty = *( "/" isegment ) ipath-absolute = "/" [ isegment-nz *( "/" isegment ) ] ipath-noscheme = isegment-nz-nc *( "/" isegment ) ipath-rootless = isegment-nz *( "/" isegment ) ipath-empty = 0 isegment = *ipchar isegment-nz = 1*ipchar isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims / "@" ) ; non-zero-length segment without any colon ":" ipchar = iunreserved / pct-encoded / sub-delims / ":" / "@" iquery = *( ipchar / iprivate / "/" / "?" ) ifragment = *( ipchar / "/" / "?" ) iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD / %xD0000-DFFFD / %xE1000-EFFFD iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD Some productions are ambiguous. The "first-match-wins" (a.k.a. "greedy") algorithm applies. For details, see [RFC3986]. The following rules are the same as those in [RFC3986]: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) port = *DIGIT IP-literal = "[" ( IPv6address / IPvFuture ) "]" IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) IPv6address = 6( h16 ":" ) ls32 / "::" 5( h16 ":" ) ls32 / [ h16 ] "::" 4( h16 ":" ) ls32 / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 / [ *4( h16 ":" ) h16 ] "::" ls32 / [ *5( h16 ":" ) h16 ] "::" h16 / [ *6( h16 ":" ) h16 ] "::" h16 = 1*4HEXDIG ls32 = ( h16 ":" h16 ) / IPv4address IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet dec-octet = DIGIT ; 0-9 / %x31-39 DIGIT ; 10-99 / "1" 2DIGIT ; 100-199 / "2" %x30-34 DIGIT ; 200-249 / "25" %x30-35 ; 250-255 pct-encoded = "%" HEXDIG HEXDIG unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" reserved = gen-delims / sub-delims gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" This syntax does not support IPv6 scoped addressing zone identifiers. @ -} module Iri.CodePointPredicates.Rfc3987 where import Iri.Prelude hiding ((|||), (&&&), inRange, Predicate) import Iri.CodePointPredicates.Core import qualified Iri.CodePointPredicates.Rfc3986 as A scheme :: Predicate scheme = A.scheme unencodedUserInfoComponent :: Predicate unencodedUserInfoComponent = unreserved ||| A.subDelims {- ireg-name = *( iunreserved / pct-encoded / sub-delims ) -} unencodedRegName :: Predicate unencodedRegName = (unreserved ||| A.subDelims) &&& ((/=) 46) {- ipchar = iunreserved / pct-encoded / sub-delims / ":" / "@" -} unencodedPathSegment :: Predicate unencodedPathSegment = unreserved ||| A.subDelims ||| oneOfChars ":@" {-| Reference: @ iquery = *( ipchar / iprivate / "/" / "?" ) @ Notice that we've added the "|" char, because some real life URIs seem to contain it. Also we've excluded the '+' char, because it gets decoded as a space char. -} unencodedQuery :: Predicate unencodedQuery = (unencodedPathSegment ||| private ||| oneOfChars "/?|") &&& (/= 43) {-| Notice that we've added the "|" char, because some real life URIs seem to contain it. Also we've excluded the '+' char, because it gets decoded as a space char. -} unencodedFragment :: Predicate unencodedFragment = (unencodedPathSegment ||| oneOfChars "/?|") &&& (/= 43) {- iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar -} unreserved :: Predicate unreserved = A.unreserved ||| ucs {- ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD / %xD0000-DFFFD / %xE1000-EFFFD -} ucs :: Predicate ucs x = x >= 0xA0 && x <= 0xD7FF || x >= 0xF900 && x <= 0xFDCF || x >= 0xFDF0 && x <= 0xFFEF || x >= 0x10000 && x <= 0x1FFFD || x >= 0x20000 && x <= 0x2FFFD || x >= 0x30000 && x <= 0x3FFFD || x >= 0x40000 && x <= 0x4FFFD || x >= 0x50000 && x <= 0x5FFFD || x >= 0x60000 && x <= 0x6FFFD || x >= 0x70000 && x <= 0x7FFFD || x >= 0x80000 && x <= 0x8FFFD || x >= 0x90000 && x <= 0x9FFFD || x >= 0xA0000 && x <= 0xAFFFD || x >= 0xB0000 && x <= 0xBFFFD || x >= 0xC0000 && x <= 0xCFFFD || x >= 0xD0000 && x <= 0xDFFFD || x >= 0xE1000 && x <= 0xEFFFD {- iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD -} private :: Predicate private x = x >= 0xE000 && x <= 0xF8FF || x >= 0xF0000 && x <= 0xFFFFD || x >= 0x100000 && x <= 0x10FFFD