{- SPDX-FileCopyrightText: 2023 Serokell <https://serokell.io>
 -
 - SPDX-License-Identifier: MPL-2.0
 -}

{-# LANGUAGE ExistentialQuantification #-}

module Xrefcheck.Data.URI
  ( UriParseError (..)
  , parseUri
  ) where

import Universum

import Control.Exception.Safe (handleJust)
import Control.Monad.Except (throwError)
import Text.URI (ParseExceptionBs, URI, mkURIBs)
import URI.ByteString qualified as URIBS

data UriParseError
  = UPEInvalid URIBS.URIParseError
  | UPEConversion ParseExceptionBs
  deriving stock (Int -> UriParseError -> ShowS
[UriParseError] -> ShowS
UriParseError -> String
(Int -> UriParseError -> ShowS)
-> (UriParseError -> String)
-> ([UriParseError] -> ShowS)
-> Show UriParseError
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> UriParseError -> ShowS
showsPrec :: Int -> UriParseError -> ShowS
$cshow :: UriParseError -> String
show :: UriParseError -> String
$cshowList :: [UriParseError] -> ShowS
showList :: [UriParseError] -> ShowS
Show, UriParseError -> UriParseError -> Bool
(UriParseError -> UriParseError -> Bool)
-> (UriParseError -> UriParseError -> Bool) -> Eq UriParseError
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: UriParseError -> UriParseError -> Bool
== :: UriParseError -> UriParseError -> Bool
$c/= :: UriParseError -> UriParseError -> Bool
/= :: UriParseError -> UriParseError -> Bool
Eq)

data AnyURIRef = forall a. AnyURIRef (URIBS.URIRef a)

serializeAnyURIRef :: AnyURIRef -> ByteString
serializeAnyURIRef :: AnyURIRef -> ByteString
serializeAnyURIRef (AnyURIRef URIRef a
uri) = URIRef a -> ByteString
forall a. URIRef a -> ByteString
URIBS.serializeURIRef' URIRef a
uri

-- | Parse URI according to RFC 3986 extended by allowing non-encoded
-- `[` and `]` in query string.
--
-- The first parameter indicates whether the parsing should admit relative
-- URIs or not.
parseUri :: Bool -> Text -> ExceptT UriParseError IO URI
parseUri :: Bool -> Text -> ExceptT UriParseError IO URI
parseUri Bool
canBeRelative Text
link = do
  -- There exist two main standards of URL parsing: RFC 3986 and the Web
  -- Hypertext Application Technology Working Group's URL standard. Ideally,
  -- we want to be able to parse the URLs in accordance with the latter
  -- standard, because it provides a much less ambiguous set of rules for
  -- percent-encoding special characters, and is essentially a living
  -- standard that gets updated constantly.
  --
  -- We have chosen the 'uri-bytestring' library for URI parsing because
  -- of the 'laxURIParseOptions' parsing configuration. 'mkURI' from
  -- the 'modern-uri' library parses URIs in accordance with RFC 3986 and does
  -- not provide a means of parsing customization, which contrasts with
  -- 'parseURI' that accepts a 'URIParserOptions'. One of the predefined
  -- configurations of this type is 'strictURIParserOptions', which follows
  -- RFC 3986, and the other -- 'laxURIParseOptions' -- allows brackets
  -- in the queries, which draws us closer to the WHATWG URL standard.
  --
  -- The 'modern-uri' package can parse an URI deciding if it is absolute or
  -- relative depending on the success or failure of the scheme parsing. By
  -- contrast, in 'uri-bytestring' it has to be decided beforehand, resulting in
  -- different URI types.
  AnyURIRef
uri <- case URIParserOptions
-> ByteString -> Either URIParseError (URIRef Absolute)
URIBS.parseURI URIParserOptions
URIBS.laxURIParserOptions (Text -> ByteString
forall a b. ConvertUtf8 a b => a -> b
encodeUtf8 Text
link) of
    Left (URIBS.MalformedScheme SchemaError
_) | Bool
canBeRelative ->
      URIParserOptions
-> ByteString -> Either URIParseError (URIRef Relative)
URIBS.parseRelativeRef URIParserOptions
URIBS.laxURIParserOptions (Text -> ByteString
forall a b. ConvertUtf8 a b => a -> b
encodeUtf8 Text
link)
        Either URIParseError (URIRef Relative)
-> (Either URIParseError (URIRef Relative)
    -> ExceptT UriParseError IO AnyURIRef)
-> ExceptT UriParseError IO AnyURIRef
forall a b. a -> (a -> b) -> b
& (URIParseError -> ExceptT UriParseError IO AnyURIRef)
-> (URIRef Relative -> ExceptT UriParseError IO AnyURIRef)
-> Either URIParseError (URIRef Relative)
-> ExceptT UriParseError IO AnyURIRef
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (UriParseError -> ExceptT UriParseError IO AnyURIRef
forall a. UriParseError -> ExceptT UriParseError IO a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (UriParseError -> ExceptT UriParseError IO AnyURIRef)
-> (URIParseError -> UriParseError)
-> URIParseError
-> ExceptT UriParseError IO AnyURIRef
forall b c a. (b -> c) -> (a -> b) -> a -> c
. URIParseError -> UriParseError
UPEInvalid) (AnyURIRef -> ExceptT UriParseError IO AnyURIRef
forall a. a -> ExceptT UriParseError IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (AnyURIRef -> ExceptT UriParseError IO AnyURIRef)
-> (URIRef Relative -> AnyURIRef)
-> URIRef Relative
-> ExceptT UriParseError IO AnyURIRef
forall b c a. (b -> c) -> (a -> b) -> a -> c
. URIRef Relative -> AnyURIRef
forall a. URIRef a -> AnyURIRef
AnyURIRef)
    Left URIParseError
err -> UriParseError -> ExceptT UriParseError IO AnyURIRef
forall a. UriParseError -> ExceptT UriParseError IO a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (UriParseError -> ExceptT UriParseError IO AnyURIRef)
-> UriParseError -> ExceptT UriParseError IO AnyURIRef
forall a b. (a -> b) -> a -> b
$ URIParseError -> UriParseError
UPEInvalid URIParseError
err
    Right URIRef Absolute
uri -> AnyURIRef -> ExceptT UriParseError IO AnyURIRef
forall a. a -> ExceptT UriParseError IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (AnyURIRef -> ExceptT UriParseError IO AnyURIRef)
-> AnyURIRef -> ExceptT UriParseError IO AnyURIRef
forall a b. (a -> b) -> a -> b
$ URIRef Absolute -> AnyURIRef
forall a. URIRef a -> AnyURIRef
AnyURIRef URIRef Absolute
uri

  -- We stick to our infrastructure by continuing to operate on the datatypes
  -- from 'modern-uri', which are used in the 'req' library. First we
  -- serialize our URI parsed with 'parseURI' so it becomes a 'ByteString'
  -- with all the necessary special characters *percent-encoded*, and then
  -- call 'mkURIBs'.
  ByteString -> ExceptT UriParseError IO URI
forall (m :: * -> *). MonadThrow m => ByteString -> m URI
mkURIBs (AnyURIRef -> ByteString
serializeAnyURIRef AnyURIRef
uri)
    -- Ideally, this exception should never be thrown, as the URI
    -- already *percent-encoded* with 'parseURI' from 'uri-bytestring'
    -- and 'mkURIBs' is only used to convert to 'URI' type from
    -- 'modern-uri' package.
    ExceptT UriParseError IO URI
-> (ExceptT UriParseError IO URI -> ExceptT UriParseError IO URI)
-> ExceptT UriParseError IO URI
forall a b. a -> (a -> b) -> b
& (SomeException -> Maybe ParseExceptionBs)
-> (ParseExceptionBs -> ExceptT UriParseError IO URI)
-> ExceptT UriParseError IO URI
-> ExceptT UriParseError IO URI
forall (m :: * -> *) e b a.
(HasCallStack, MonadCatch m, Exception e) =>
(e -> Maybe b) -> (b -> m a) -> m a -> m a
handleJust SomeException -> Maybe ParseExceptionBs
forall e. Exception e => SomeException -> Maybe e
fromException (UriParseError -> ExceptT UriParseError IO URI
forall a. UriParseError -> ExceptT UriParseError IO a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (UriParseError -> ExceptT UriParseError IO URI)
-> (ParseExceptionBs -> UriParseError)
-> ParseExceptionBs
-> ExceptT UriParseError IO URI
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ParseExceptionBs -> UriParseError
UPEConversion)