{-|
References:
- <https://www.ietf.org/rfc/rfc3986 RFC3986: Uniform Resource Identifier (URI): Generic Syntax>

@
URI           = scheme ":" hier-part [ "?" query ] [ "#" fragment ]

hier-part     = "//" authority path-abempty
              / path-absolute
              / path-rootless
              / path-empty

URI-reference = URI / relative-ref

absolute-URI  = scheme ":" hier-part [ "?" query ]

relative-ref  = relative-part [ "?" query ] [ "#" fragment ]

relative-part = "//" authority path-abempty
              / path-absolute
              / path-noscheme
              / path-empty

scheme        = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )

authority     = [ userinfo "@" ] host [ ":" port ]
userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
host          = IP-literal / IPv4address / reg-name
port          = *DIGIT

IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"

IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )

IPv6address   =                            6( h16 ":" ) ls32
              /                       "::" 5( h16 ":" ) ls32
              / [               h16 ] "::" 4( h16 ":" ) ls32
              / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
              / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
              / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
              / [ *4( h16 ":" ) h16 ] "::"              ls32
              / [ *5( h16 ":" ) h16 ] "::"              h16
              / [ *6( h16 ":" ) h16 ] "::"

h16           = 1*4HEXDIG
ls32          = ( h16 ":" h16 ) / IPv4address
IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
dec-octet     = DIGIT                 ; 0-9
              / %x31-39 DIGIT         ; 10-99
              / "1" 2DIGIT            ; 100-199
              / "2" %x30-34 DIGIT     ; 200-249
              / "25" %x30-35          ; 250-255

reg-name      = *( unreserved / pct-encoded / sub-delims )

path          = path-abempty    ; begins with "/" or is empty
              / path-absolute   ; begins with "/" but not "//"
              / path-noscheme   ; begins with a non-colon segment
              / path-rootless   ; begins with a segment
              / path-empty      ; zero characters

path-abempty  = *( "/" segment )
path-absolute = "/" [ segment-nz *( "/" segment ) ]
path-noscheme = segment-nz-nc *( "/" segment )
path-rootless = segment-nz *( "/" segment )
path-empty    = 0<pchar>

segment       = *pchar
segment-nz    = 1*pchar
segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
              ; non-zero-length segment without any colon ":"

pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"

query         = *( pchar / "/" / "?" )

fragment      = *( pchar / "/" / "?" )

pct-encoded   = "%" HEXDIG HEXDIG

unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
reserved      = gen-delims / sub-delims
gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
              / "*" / "+" / "," / ";" / "="
@

-}
module Iri.CodePointPredicates.Rfc3986
where

import Iri.Prelude hiding ((|||), (&&&), inRange, Predicate)
import Iri.CodePointPredicates.Core

{-|
Reference:

@
unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
@
-}
unreserved :: Predicate
unreserved =
  asciiAlphanumeric ||| oneOfChars "-._~"

subDelims :: Predicate
subDelims =
  oneOfChars "!$&'()*+,;="

{-# NOINLINE scheme #-}
scheme :: Predicate
scheme =
  cached $
  asciiAlphanumeric ||| oneOfChars "+.-"

{-# NOINLINE domainLabel #-}
domainLabel :: Predicate
domainLabel =
  cached $
  asciiAlphanumeric ||| oneOfChars "-_~"

{-
pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
-}
{-# NOINLINE unencodedPathSegment #-}
unencodedPathSegment :: Predicate
unencodedPathSegment =
  cached $
  unreserved ||| subDelims ||| oneOfChars ":@"

{-|
Reference:

@
query         = *( pchar / "/" / "?" )
@

Notice that we've added the "|" char, because some real life URIs seem to contain it.
Also we've excluded the '+' char, because it gets decoded as a space char.
-}
{-# NOINLINE unencodedQuery #-}
unencodedQuery :: Predicate
unencodedQuery =
  cached $
  (unencodedPathSegment ||| oneOfChars "/?|[]\"'") &&& (/= 43)

unencodedFragment :: Predicate
unencodedFragment =
  unencodedQuery

{-# NOINLINE unencodedQueryComponent #-}
unencodedQueryComponent :: Predicate
unencodedQueryComponent =
  cached $
  unencodedQuery &&& not . oneOfChars "=&;"

{-# NOINLINE unencodedUserInfoComponent #-}
unencodedUserInfoComponent :: Predicate
unencodedUserInfoComponent =
  cached $
  unreserved ||| subDelims