module Iri.Parsing.Attoparsec.Text
(
  iri,
  httpIri,
)
where

import Iri.Prelude
import Iri.Data
import Data.Attoparsec.Text hiding (try)
import qualified Data.ByteString as K
import qualified Data.Text as T
import qualified Data.Text.Encoding as B
import qualified Data.Text.Encoding.Error as L
import qualified Data.Vector as S
import qualified VectorBuilder.MonadPlus as E
import qualified Iri.CodePointPredicates.Rfc3987 as C
import qualified Iri.MonadPlus as R
import qualified Text.Builder as J
import qualified Net.IPv4 as M
import qualified Net.IPv6 as N


{-# INLINE labeled #-}
labeled :: String -> Parser a -> Parser a
labeled label parser =
  parser <?> label

{-|
Parser of a well-formed IRI conforming to the RFC3987 standard into IRI.
Performs URL-decoding.
-}
{-# INLINABLE iri #-}
iri :: Parser Iri
iri =
  labeled "IRI" $ do
    parsedScheme <- scheme
    char ':'
    parsedHierarchy <- hierarchy
    parsedQuery <- query
    parsedFragment <- fragment
    return (Iri parsedScheme parsedHierarchy parsedQuery parsedFragment)

{-|
Same as 'iri', but optimized specifially for the case of HTTP IRIs.
-}
{-# INLINABLE httpIri #-}
httpIri :: Parser HttpIri
httpIri =
  labeled "HTTP IRI" $ do
    asciiCI "http"
    secure <- satisfy (\ x -> x == 's' || x == 'S') $> True <|> pure False
    string "://"
    parsedHost <- host
    parsedPort <- PresentPort <$> (char ':' *> port) <|> pure MissingPort
    parsedPath <- ((char '/') *> path) <|> pure (Path mempty)
    parsedQuery <- query
    parsedFragment <- fragment
    return (HttpIri (Security secure) parsedHost parsedPort parsedPath parsedQuery parsedFragment)

{-# INLINE hierarchy #-}
hierarchy :: Parser Hierarchy
hierarchy =
  do
    slashPresent <- (char '/') $> True <|> pure False
    if slashPresent
      then do
        slashPresent <- (char '/') $> True <|> pure False
        if slashPresent
          then authorisedHierarchyBody AuthorisedHierarchy
          else AbsoluteHierarchy <$> path
      else RelativeHierarchy <$> path

{-# INLINE authorisedHierarchyBody #-}
authorisedHierarchyBody :: (Authority -> Path -> body) -> Parser body
authorisedHierarchyBody body =
  do
    parsedUserInfo <- (presentUserInfo PresentUserInfo <* char '@') <|> pure MissingUserInfo
    parsedHost <- host
    parsedPort <- PresentPort <$> (char ':' *> port) <|> pure MissingPort
    parsedPath <- ((char '/') *> path) <|> pure (Path mempty)
    return (body (Authority parsedUserInfo parsedHost parsedPort) parsedPath)

{-# INLINE scheme #-}
scheme :: Parser Scheme
scheme =
  labeled "Scheme" $
  fmap (Scheme . B.encodeUtf8) (takeWhile1 (C.scheme . ord))

{-# INLINABLE presentUserInfo #-}
presentUserInfo :: (User -> Password -> a) -> Parser a
presentUserInfo result =
  labeled "User info" $
  do
    user <- User <$> urlEncodedComponent (C.unencodedUserInfoComponent . ord)
    passwordFollows <- True <$ char ':' <|> pure False
    if passwordFollows
      then do
        password <- PresentPassword <$> urlEncodedComponent (C.unencodedUserInfoComponent . ord)
        return (result user password)
      else return (result user MissingPassword)

{-# INLINE host #-}
host :: Parser Host
host =
  labeled "Host" $
  IpV6Host <$> N.parser <|>
  IpV4Host <$> M.parser <|>
  NamedHost <$> domainName

{-# INLINE domainName #-}
domainName :: Parser RegName
domainName =
  fmap RegName (E.sepBy1 domainLabel (char '.'))

{-|
Domain label with Punycode decoding applied.
-}
{-# INLINE domainLabel #-}
domainLabel :: Parser DomainLabel
domainLabel =
  labeled "Domain label" $
  DomainLabel <$> takeWhile1 (C.unencodedRegName . ord)

{-# INLINE port #-}
port :: Parser Word16
port =
  decimal

{-# INLINE path #-}
path :: Parser Path
path =
  do
    segments <- E.sepBy pathSegment (char '/')
    if segmentsAreEmpty segments
      then return (Path mempty)
      else return (Path segments)
  where
    segmentsAreEmpty segments =
      S.length segments == 1 &&
      (case S.unsafeHead segments of PathSegment headSegmentText -> T.null headSegmentText)

{-# INLINE pathSegment #-}
pathSegment :: Parser PathSegment
pathSegment =
  fmap PathSegment (urlEncodedComponent (C.unencodedPathSegment . ord))

{-# INLINABLE urlEncodedComponent #-}
urlEncodedComponent :: (Char -> Bool) -> Parser Text
urlEncodedComponent unencodedCharPredicate =
  labeled "URL-encoded component" $
  fmap J.run $
  R.foldl mappend mempty $
  (J.text <$> takeWhile1 unencodedCharPredicate) <|> urlEncodedSequence

{-# INLINABLE urlEncodedSequence #-}
urlEncodedSequence :: Parser J.Builder
urlEncodedSequence =
  labeled "URL-encoded sequence" $ do
    start <- progress (mempty, mempty, B.streamDecodeUtf8) =<< urlEncodedByte
    R.foldlM progress (start) urlEncodedByte >>= finish
  where
    progress (!builder, _ :: ByteString, decode) byte =
      case unsafeDupablePerformIO (try (evaluate (decode (K.singleton byte)))) of
        Right (B.Some decodedChunk undecodedBytes newDecode) ->
          return (builder <> J.text decodedChunk, undecodedBytes, newDecode)
        Left (L.DecodeError error _) ->
          fail (showString "UTF8 decoding: " error)
    finish (builder, undecodedBytes, _) =
      if K.null undecodedBytes
        then return builder
        else fail (showString "UTF8 decoding: Bytes remaining: " (show undecodedBytes))

{-# INLINE urlEncodedByte #-}
urlEncodedByte :: Parser Word8
urlEncodedByte =
  do
    char '%' 
    digit1 <- fromIntegral <$> hexadecimalDigit
    digit2 <- fromIntegral <$> hexadecimalDigit
    return (shiftL digit1 4 .|. digit2)

{-# INLINE hexadecimalDigit #-}
hexadecimalDigit :: Parser Int
hexadecimalDigit =
  do
    c <- anyChar
    let x = ord c
    if x >= 48 && x < 58
      then return (x - 48)
      else if x >= 65 && x < 71
        then return (x - 55)
        else if x >= 97 && x < 103
          then return (x - 97)
          else fail ("Not a hexadecimal digit: " <> show c)

{-# INLINABLE query #-}
query :: Parser Query
query =
  labeled "Query" $
  (char '?' *> queryBody) <|> pure (Query mempty)

{-|
The stuff after the question mark.
-}
{-# INLINABLE queryBody #-}
queryBody :: Parser Query
queryBody =
  fmap Query (urlEncodedComponent (C.unencodedQuery . ord))

{-# INLINABLE fragment #-}
fragment :: Parser Fragment
fragment =
  labeled "Fragment" $
  (char '#' *> (Fragment <$> urlEncodedComponent (C.unencodedFragment . ord))) <|>
  pure (Fragment mempty)