{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE PolyKinds #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}

module Text.Regex.Pcre2.Internal where

import           Control.Applicative        (Alternative(..))
import           Control.Exception          hiding (TypeError)
import           Control.Monad.State.Strict
import           Data.Either                (partitionEithers)
import           Data.Function              ((&))
import           Data.Functor               ((<&>))
import           Data.Functor.Const         (Const(..))
import           Data.Functor.Identity      (Identity(..))
import           Data.IORef
import           Data.IntMap.Strict         (IntMap)
import qualified Data.IntMap.Strict         as IM
import           Data.List                  (foldl', intercalate)
import           Data.List.NonEmpty         (NonEmpty(..))
import qualified Data.List.NonEmpty         as NE
import           Data.Maybe                 (fromJust, fromMaybe)
import           Data.Monoid                (Alt(..), Any(..), First(..))
import           Data.Proxy                 (Proxy(..))
import           Data.Text                  (Text)
import qualified Data.Text                  as Text
import qualified Data.Text.Foreign          as Text
import           Data.Type.Bool             (type (||), If)
import           Data.Type.Equality         (type (==))
import           Data.Typeable              (cast)
import           Foreign
import           Foreign.C.Types
import qualified Foreign.Concurrent         as Conc
import           GHC.TypeLits               hiding (Text)
import qualified GHC.TypeLits               as TypeLits
import           System.IO.Unsafe           (unsafePerformIO)
import           Text.Regex.Pcre2.Foreign

-- * General utilities

-- | There is no @nullForeignPtr@ to pass to `withForeignPtr`, so we have to
-- fake it with a `Maybe`.
withForeignOrNullPtr :: Maybe (ForeignPtr a) -> (Ptr a -> IO b) -> IO b
withForeignOrNullPtr Nothing   = \action -> action nullPtr
withForeignOrNullPtr (Just fp) = withForeignPtr fp

-- | Helper so we never leak untracked 'Ptr's.
mkForeignPtr :: (Ptr a -> IO ()) -> IO (Ptr a) -> IO (ForeignPtr a)
mkForeignPtr finalize create = create >>= Conc.newForeignPtr <*> finalize

-- | Helper so we never leak untracked 'FunPtr's.
mkFunPtr :: ForeignPtr b -> IO (FunPtr a) -> IO (FunPtr a)
mkFunPtr anchor create = do
    fPtr <- create
    Conc.addForeignPtrFinalizer anchor $ freeHaskellFunPtr fPtr
    return fPtr

safeLast :: [a] -> Maybe a
safeLast [] = Nothing
safeLast xs = Just $ last xs

bitOr :: (Foldable t, Num a, Bits a) => t a -> a
bitOr = foldl' (.|.) 0

-- | Equivalent to @flip fix@.
--
-- Used to express a recursive function of one argument that is called only once
-- on an initial value:
--
-- > let go x = ... in go x0
--
-- as:
--
-- > fix1 x0 $ \go x -> ...
fix1 :: a -> ((a -> b) -> a -> b) -> b
fix1 x f = fix f x

-- | Like `fix1`, but for a function of two arguments.  Currently unused.
fix2 :: a -> b -> ((a -> b -> c) -> a -> b -> c) -> c
fix2 x y f = fix f x y

-- | Like `fix1`, but for a function of three arguments.  Currently unused.
fix3 :: a -> b -> c -> ((a -> b -> c -> d) -> a -> b -> c -> d) -> d
fix3 x y z f = fix f x y z

-- ** Fast @Text@ slicing

data SliceRange = SliceRange
    {-# UNPACK #-} !Text.I16
    {-# UNPACK #-} !Text.I16

-- | Zero-copy slice a 'Text'.  An unset capture is represented by a
-- `pcre2_UNSET` range and is interpreted in this library as `Text.empty`.
thinSlice :: Text -> SliceRange -> Text
thinSlice text (SliceRange off offEnd)
    | off == fromIntegral pcre2_UNSET = Text.empty
    | otherwise                       = text
        & Text.dropWord16 off
        & Text.takeWord16 (offEnd - off)

-- | Slice a 'Text', copying if it\'s less than half of the original.
slice :: Text -> SliceRange -> Text
slice text sliceRange =
    let substring = thinSlice text sliceRange
    in if Text.length substring > Text.length text `div` 2
        then substring
        else Text.copy substring

-- | Probably unnecessary, but unrestricted 'castPtr' feels dangerous.
class CastCUs a b | a -> b where
    castCUs :: Ptr a -> Ptr b
    castCUs = castPtr
    {-# INLINE castCUs #-}
instance CastCUs CUShort Word16
instance CastCUs Word16 CUShort

-- ** Lens types and utilities
--
-- $LensTypesAndUtilities
-- The combinators\' types are excessively polymorphic\/polykinded\/ugly because
-- they have no accompanying signatures (not worth the trouble).

type Lens'      s a = forall f. (Functor f)     => (a -> f a) -> s -> f s
type Traversal' s a = forall f. (Applicative f) => (a -> f a) -> s -> f s

preview l = getFirst . getConst . l (Const . First . Just)
view l = getConst . l Const
to k f = Const . getConst . f . k
has l = getAny . getConst . l (\_ -> Const $ Any True)

-- toAlternativeOf :: (Alternative f) => Getting (Alt f a) s a -> s -> f a
toAlternativeOf l = getAlt . view (l . to (Alt . pure))

_headNE :: Lens' (NonEmpty a) a
_headNE f (x :| xs) = f x <&> \x' -> x' :| xs

-- * Assembling inputs into @Matcher@s and @Subber@s

-- | A matching function where all inputs and auxilliary data structures have
-- been \"compiled\".  It takes a subject and produces a raw result code and
-- match data, to be passed to further validation and inspection.
type Matcher = Text -> IO (CInt, MatchData)

-- | A substitution function.  It takes a subject and produces the number of
-- substitutions performed (0 or 1, or more in the presence of `SubGlobal`)
-- along with the transformed subject.  Currently the number is unused.
type Subber = Text -> IO (CInt, Text)

-- ** Options

-- | A `Monoid` representing nearly every facility PCRE2 presents for tweaking
-- the behavior of regex compilation and execution.
--
-- All library functions that take options have the suffix @Opt@ in their names;
-- for each of them, there\'s also a non-@Opt@ convenience function that simply
-- has the (unexported) `mempty` option.  For many uses, options won\'t be
-- needed.
--
-- Some options can be enabled by special character sequences in the pattern as
-- an alternative to specifying them as an `Option`.  See `Caseless` for
-- example.
--
-- Documentation is scant here.  For more complete, accurate information,
-- including discussions of corner cases arising from specific combinations of
-- options and pattern items, please see the [C API
-- documentation](https://pcre.org/current/doc/html/pcre2api.html).
data Option
    = NoOptions -- ^ `mempty`
    | TwoOptions Option Option -- ^ `<>`

    | AllowEmptyClass -- ^ Make @[]@ not match anything, rather than counting
    -- the @]@ as the first character of the class.
    | AltBsux -- ^ Like `AltBsuxLegacy`, except with ECMAScript 6 hex literal
    -- feature for @\\u@.
    | AltBsuxLegacy -- ^ Behave like ECMAScript 5 for @\\U@, @\\u@, and @\\x@.
    -- See 'AltBsux'.
    | AltCircumflex -- ^ Match a @^@ after a newline at the end of the subject.
    -- Only relevant in multiline mode.
    | AltVerbNames -- ^ Enable backslash escapes in verb names.  E.g.,
    -- @(*MARK:L\\(O\\)L)@.
    | Anchored -- ^ Equivalent to beginning pattern with @^@.
    | AutoCallout -- ^ Run callout for every pattern item.  Only relevant if a
    -- callout is set.
    | BadEscapeIsLiteral -- ^ Do not throw an error for unrecognized or
    -- malformed escapes.  /\"This is a dangerous option.\"/
    | Bsr Bsr -- ^ Override what @\\R@ matches (default given by `defaultBsr`).
    | Caseless -- ^ Case-insensitive match.  Equivalent to @(?i)@.
    | DepthLimit Word32 -- ^ Override maximum depth of nested backtracking
    -- (default given by `defaultDepthLimit`).  Equivalent to
    -- @(*LIMIT_DEPTH=@/number/@)@.
    | DollarEndOnly -- ^ Don\'t match @$@ with a newline at the end of the
    -- subject.
    | DotAll -- ^ A dot also matches a (single-character) newline.  Equivalent
    -- to @(?s)@.
    | DupNames -- ^ Allow non-unique capture names.
    | EndAnchored -- ^ More or less like ending pattern with @$@.
    | EscapedCrIsLf -- ^ Interpret @\\r@ as @\\n@.
    | Extended -- ^ In the pattern, ignore whitespace, and enable comments
    -- starting with @#@.  Equivalent to @(?x)@.
    | ExtendedMore -- ^ Like `Extended` but also ignore spaces and tabs within
    -- @[]@.
    | FirstLine -- ^ The match must begin in the first line of the subject.
    | HeapLimit Word32 -- ^ Override maximum heap memory (in kibibytes) used to
    -- hold backtracking information (default given by `defaultHeapLimit`).
    -- Equivalent to @(*LIMIT_HEAP=@/number/@)@.
    | Literal -- ^ Treat the pattern as a literal string.
    | MatchLimit Word32 -- ^ Override maximum value of the main matching loop\'s
    -- internal counter (default given by `defaultMatchLimit`), as a simple CPU
    -- throttle.  Equivalent to @(*LIMIT_MATCH=@/number/@)@.
    | MatchLine -- ^ Only match complete lines.  Equivalent to bracketing the
    -- pattern with @^(?:@/pattern/@)$@.
    | MatchUnsetBackRef -- ^ A backreference to an unset capture group matches
    -- an empty string.
    | MatchWord -- ^ Only match subjects that have word boundaries at the
    -- beginning and end.  Equivalent to bracketing the pattern with
    -- @\\b(?:@/pattern/@)\\b@.
    | MaxPatternLength Word64 -- ^ Default is `maxBound`.
    | Multiline -- ^ @^@ and @$@ mean \"beginning\/end of a line\" rather than
    -- \"beginning\/end of the subject\".  Equivalent to @(?m)@.
    | NeverBackslashC -- ^ Do not allow the unsafe @\\C@ sequence.
    | NeverUcp -- ^ Don\'t count Unicode characters in some character classes
    -- such as @\\d@.  Overrides @(*UCP)@.
    | Newline Newline -- ^ Override what a newline is (default given by
    -- `defaultNewline`).  Equivalent to @(*CRLF)@ or similar.
    | NoAutoCapture -- ^ Disable numbered capturing parentheses.
    | NoAutoPossess -- ^ Turn off some optimizations, possibly resulting in some
    -- callouts not being called.
    | NoDotStarAnchor -- ^ Turn off an optimization involving @.*@, possibly
    -- resulting in some callouts not being called.
    | NoStartOptimize -- ^ Turn off some optimizations normally performed at the
    -- beginning of a pattern.
    | NotBol -- ^ First character of subject is not the __b__eginning __o__f
    -- __l__ine.  Only affects @^@.
    | NotEmpty -- ^ The 0th capture doesn\'t match if it would be empty.
    | NotEmptyAtStart -- ^ The 0th capture doesn\'t match if it would be empty
    -- and at the beginning of the subject.
    | NotEol -- ^ End of subject is not the __e__nd __o__f __l__ine.  Only
    -- affects @$@.
    | OffsetLimit Word64 -- ^ Limit how far an unanchored search can advance in
    -- the subject.
    | ParensLimit Word32 -- ^ Override max depth of nested parentheses (default
    -- given by `defaultParensLimit`).
    | PartialHard -- ^ If the subject ends without finding a complete match,
    -- stop trying alternatives and signal a partial match immediately.
    -- Currently we do this by throwing a `Pcre2Exception` but we should do
    -- better.
    | PartialSoft -- ^ If the subject ends and all alternatives have been tried,
    -- but no complete match is found, signal a partial match.  Currently we do
    -- this by throwing a `Pcre2Exception` but we should do better.
    | SubGlobal -- ^ /Affects `subOpt`./  Replace all, rather than just the
    -- first.
    | SubLiteral -- ^ /Affects `subOpt`./  Treat the replacement as a literal
    -- string.
    | SubReplacementOnly -- ^ /Affects `subOpt`./  Return just the rendered
    -- replacement instead of it within the subject.  With `SubGlobal`, all
    -- results are concatenated.
    | SubUnknownUnset -- ^ /Affects `subOpt`./  References in the replacement
    -- to non-existent captures don\'t error but are treated as unset.
    | SubUnsetEmpty -- ^ /Affects `subOpt`./  References in the replacement
    -- to unset captures don\'t error but are treated as empty.
    | Ucp -- ^ Count Unicode characters in some character classes such as @\\d@.
    -- Incompatible with `NeverUcp`.
    | Ungreedy -- ^ Invert the effect of @?@.  Without it, quantifiers are
    -- non-greedy; with it, they are greedy.  Equivalent to @(?U)@.
    | UnsafeCallout (CalloutInfo -> IO CalloutResult) -- ^ Run the given callout
    -- on every capture.  Multiples of this option before the rightmost are
    -- ignored.
    --
    -- /NOTE: The callout is run via `unsafePerformIO` within pure code!/
    | UnsafeCompileRecGuard (Int -> IO Bool) -- ^ Run the given guard on every
    -- new descent into a level of parentheses, passing the current depth as
    -- argument.  Returning @False@ aborts pattern compilation with an
    -- exception.  Multiples of this option before the rightmost are ignored.
    --
    -- /NOTE: Currently (PCRE2 version 10\.35) patterns seem to be parsed in/
    -- /two passes, both times triggering the recursion guard.  Also, it is/
    -- /triggered at the beginning of the pattern, passing 0.  None of this is/
    -- /documented; expect the unexpected in the presence of side effects!/
    --
    -- /NOTE: The guard is run via `unsafePerformIO` within pure code!/
    | UnsafeSubCallout (SubCalloutInfo -> IO SubCalloutResult) -- ^ Run the
    -- given callout on every substitution.  This is at most once unless
    -- `SubGlobal` is set.  Multiples of this option before the rightmost are
    -- ignored.
    --
    -- /NOTE: The callout is run via `unsafePerformIO` within pure code!/
    | Utf -- ^ Treat both the pattern and subject as UTF rather than fixed-width
    -- 16-bit code units.

instance Semigroup Option where
    (<>) = TwoOptions

instance Monoid Option where
    mempty = NoOptions

-- | What @\\R@, __b__ack__s__lash __R__, can mean.
data Bsr
    = BsrUnicode -- ^ any Unicode line ending sequence
    | BsrAnyCrlf -- ^ @\\r@, @\\n@, or @\\r\\n@
    deriving (Eq, Show)

-- | C to Haskell.
bsrFromC :: CUInt -> Bsr
bsrFromC x
    | x == pcre2_BSR_UNICODE = BsrUnicode
    | x == pcre2_BSR_ANYCRLF = BsrAnyCrlf
    | otherwise              = error $ "bsrFromC: bad value " ++ show x

-- | Haskell to C.
bsrToC :: Bsr -> CUInt
bsrToC BsrUnicode = pcre2_BSR_UNICODE
bsrToC BsrAnyCrlf = pcre2_BSR_ANYCRLF

-- | What\'s considered a newline.
data Newline
    = NewlineCr      -- ^ @\\r@ only
    | NewlineLf      -- ^ @\\n@ only
    | NewlineCrlf    -- ^ @\\r\\n@ only
    | NewlineAny     -- ^ any Unicode line ending sequence
    | NewlineAnyCrlf -- ^ any of the above
    | NewlineNul     -- ^ binary zero
    deriving (Eq, Show)

-- | C to Haskell.
newlineFromC :: CUInt -> Newline
newlineFromC x
    | x == pcre2_NEWLINE_CR      = NewlineCr
    | x == pcre2_NEWLINE_LF      = NewlineLf
    | x == pcre2_NEWLINE_CRLF    = NewlineCrlf
    | x == pcre2_NEWLINE_ANY     = NewlineAny
    | x == pcre2_NEWLINE_ANYCRLF = NewlineAnyCrlf
    | x == pcre2_NEWLINE_NUL     = NewlineNul
    | otherwise                  = error $ "newlineFromC: bad value " ++ show x

-- | Haskell to C.
newlineToC :: Newline -> CUInt
newlineToC NewlineCr      = pcre2_NEWLINE_CR
newlineToC NewlineLf      = pcre2_NEWLINE_LF
newlineToC NewlineCrlf    = pcre2_NEWLINE_CRLF
newlineToC NewlineAny     = pcre2_NEWLINE_ANY
newlineToC NewlineAnyCrlf = pcre2_NEWLINE_ANYCRLF
newlineToC NewlineNul     = pcre2_NEWLINE_NUL

-- | Input for user-defined callouts.
data CalloutInfo
    = CalloutInfo {
        -- | The index of which capture we\'re on.  Note that callouts are never
        -- run for the 0th capture.
        calloutIndex :: CalloutIndex,
        -- | The captures that have been set so far.
        calloutCaptures :: NonEmpty (Maybe Text),
        -- | The original subject.
        calloutSubject :: Text,
        -- | The name of the most recently passed @(*MARK)@, @(*PRUNE)@, or
        -- @(*THEN)@, if any.
        calloutMark :: Maybe Text,
        -- | Is this the first callout after the start of matching?
        calloutIsFirst :: Bool,
        -- | Has a backtrack occurred since the previous callout, or the
        -- beginning of matching if no previous callouts?
        calloutBacktracked :: Bool}
    deriving (Show, Eq)

-- | What caused the callout.
data CalloutIndex
    = CalloutNumber Int -- ^ Numbered capture.
    | CalloutName Text -- ^ Named capture.
    | CalloutAuto Int Int -- ^ The item located at this half-open range of
    -- offsets within the pattern.  See `AutoCallout`.
    deriving (Show, Eq)

-- | Callout functions return one of these values, which dictates what happens
-- next in the match.
data CalloutResult
    = CalloutProceed -- ^ Keep going.
    | CalloutNoMatchHere -- ^ Fail the current capture, but not the whole match.
    -- For example, backtracking may occur.
    | CalloutNoMatch -- ^ Fail the whole match.
    deriving (Show, Eq)

-- | Input for user-defined substitution callouts.
data SubCalloutInfo
    = SubCalloutInfo {
        -- | The 1-based index of which substitution we\'re on.  Only goes past
        -- 1 during global substitutions.
        subCalloutSubsCount :: Int,
        -- | The captures that have been set so far.
        subCalloutCaptures :: NonEmpty (Maybe Text),
        -- | The original subject.
        subCalloutSubject :: Text,
        -- | The replacement.
        subCalloutReplacement :: Text}
    deriving (Show, Eq)

-- | Substitution callout functions return one of these values, which dictates
-- what happens next in the substitution.
data SubCalloutResult
    = SubCalloutAccept -- ^ Succeed, and keep going if in global mode.
    | SubCalloutSkip -- ^ Do not perform this substitution, but keep going if in
    -- global mode.
    | SubCalloutAbort -- ^ Do not perform this or any subsequent substitutions.
    deriving (Show, Eq)

-- ** Lower-level representation of options and C data

type CompileContext = ForeignPtr Pcre2_compile_context
type Code           = ForeignPtr Pcre2_code
type MatchContext   = ForeignPtr Pcre2_match_context
type MatchData      = ForeignPtr Pcre2_match_data

-- | An `Option` can result in multiple \"plans\".
applyOption :: Option -> [AppliedOption]
applyOption = \case
    NoOptions            -> []
    TwoOptions opt0 opt1 -> applyOption opt0 ++ applyOption opt1

    -- CompileOption
    Anchored          -> [CompileOption pcre2_ANCHORED]
    AllowEmptyClass   -> [CompileOption pcre2_ALLOW_EMPTY_CLASS]
    AltBsuxLegacy     -> [CompileOption pcre2_ALT_BSUX]
    AltCircumflex     -> [CompileOption pcre2_ALT_CIRCUMFLEX]
    AltVerbNames      -> [CompileOption pcre2_ALT_VERBNAMES]
    AutoCallout       -> [CompileOption pcre2_AUTO_CALLOUT]
    Caseless          -> [CompileOption pcre2_CASELESS]
    DollarEndOnly     -> [CompileOption pcre2_DOLLAR_ENDONLY]
    DotAll            -> [CompileOption pcre2_DOTALL]
    DupNames          -> [CompileOption pcre2_DUPNAMES]
    EndAnchored       -> [CompileOption pcre2_ENDANCHORED]
    Extended          -> [CompileOption pcre2_EXTENDED]
    ExtendedMore      -> [CompileOption pcre2_EXTENDED_MORE]
    FirstLine         -> [CompileOption pcre2_FIRSTLINE]
    Literal           -> [CompileOption pcre2_LITERAL]
    MatchUnsetBackRef -> [CompileOption pcre2_MATCH_UNSET_BACKREF]
    Multiline         -> [CompileOption pcre2_MULTILINE]
    NeverBackslashC   -> [CompileOption pcre2_NEVER_BACKSLASH_C]
    NeverUcp          -> [CompileOption pcre2_NEVER_UCP]
    NoAutoCapture     -> [CompileOption pcre2_NO_AUTO_CAPTURE]
    NoAutoPossess     -> [CompileOption pcre2_NO_AUTO_POSSESS]
    NoDotStarAnchor   -> [CompileOption pcre2_NO_DOTSTAR_ANCHOR]
    NoStartOptimize   -> [CompileOption pcre2_NO_START_OPTIMIZE]
    Ucp               -> [CompileOption pcre2_UCP]
    Ungreedy          -> [CompileOption pcre2_UNGREEDY]
    Utf               -> [CompileOption pcre2_UTF]

    -- ExtraCompileOption
    AltBsux            -> [CompileExtraOption pcre2_EXTRA_ALT_BSUX]
    BadEscapeIsLiteral -> [CompileExtraOption pcre2_EXTRA_BAD_ESCAPE_IS_LITERAL]
    EscapedCrIsLf      -> [CompileExtraOption pcre2_EXTRA_ESCAPED_CR_IS_LF]
    MatchLine          -> [CompileExtraOption pcre2_EXTRA_MATCH_LINE]
    MatchWord          -> [CompileExtraOption pcre2_EXTRA_MATCH_WORD]

    -- CompileContextOption
    Bsr bsr -> unary
        CompileContextOption pcre2_set_bsr (bsrToC bsr)
    MaxPatternLength len -> unary
        CompileContextOption pcre2_set_max_pattern_length (fromIntegral len)
    Newline newline -> unary
        CompileContextOption pcre2_set_newline (newlineToC newline)
    ParensLimit limit -> unary
        CompileContextOption pcre2_set_parens_nest_limit (fromIntegral limit)

    -- CompileRecGuardOption
    UnsafeCompileRecGuard f -> [CompileRecGuardOption f]

    -- MatchOption
    NotBol             -> [MatchOption pcre2_NOTBOL]
    NotEol             -> [MatchOption pcre2_NOTEOL]
    NotEmpty           -> [MatchOption pcre2_NOTEMPTY]
    NotEmptyAtStart    -> [MatchOption pcre2_NOTEMPTY_ATSTART]
    PartialHard        -> [MatchOption pcre2_PARTIAL_HARD]
    PartialSoft        -> [MatchOption pcre2_PARTIAL_SOFT]
    SubReplacementOnly -> [MatchOption pcre2_SUBSTITUTE_REPLACEMENT_ONLY]
    SubGlobal          -> [MatchOption pcre2_SUBSTITUTE_GLOBAL]
    SubLiteral         -> [MatchOption pcre2_SUBSTITUTE_LITERAL]
    SubUnknownUnset    -> [MatchOption pcre2_SUBSTITUTE_UNKNOWN_UNSET]
    SubUnsetEmpty      -> [MatchOption pcre2_SUBSTITUTE_UNSET_EMPTY]

    -- CalloutOption
    UnsafeCallout f -> [CalloutOption f]

    -- SubCalloutOption
    UnsafeSubCallout f -> [SubCalloutOption f]

    -- MatchContextOption
    OffsetLimit limit -> CompileOption pcre2_USE_OFFSET_LIMIT : unary
        MatchContextOption pcre2_set_offset_limit (fromIntegral limit)
    HeapLimit limit -> unary
        MatchContextOption pcre2_set_heap_limit (fromIntegral limit)
    MatchLimit limit -> unary
        MatchContextOption pcre2_set_match_limit (fromIntegral limit)
    DepthLimit limit -> unary
        MatchContextOption pcre2_set_depth_limit (fromIntegral limit)

    where
    unary ctor f x = (: []) $ ctor $ \ctx -> withForeignPtr ctx $ \ctxPtr ->
        f ctxPtr x >>= check (== 0)

-- | Intermediate representation of options expressing what effect they\'ll have
-- on which stage of regex compilation\/execution.  Also provide fake @Prism'@s.
data AppliedOption
    = CompileOption CUInt
    | CompileExtraOption CUInt
    | CompileContextOption (CompileContext -> IO ())
    | CompileRecGuardOption (Int -> IO Bool)
    | MatchOption CUInt
    | CalloutOption (CalloutInfo -> IO CalloutResult)
    | SubCalloutOption (SubCalloutInfo -> IO SubCalloutResult)
    | MatchContextOption (MatchContext -> IO ())

_CompileOption f =
    \case CompileOption x -> CompileOption <$> f x; o -> pure o
_CompileExtraOption f =
    \case CompileExtraOption x -> CompileExtraOption <$> f x; o -> pure o
_CompileContextOption f =
    \case CompileContextOption x -> CompileContextOption <$> f x; o -> pure o
_CompileRecGuardOption f =
    \case CompileRecGuardOption x -> CompileRecGuardOption <$> f x; o -> pure o
_MatchOption f =
    \case MatchOption x -> MatchOption <$> f x; o -> pure o
_CalloutOption f =
    \case CalloutOption x -> CalloutOption <$> f x; o -> pure o
_SubCalloutOption f =
    \case SubCalloutOption x -> SubCalloutOption <$> f x; o -> pure o
_MatchContextOption f =
    \case MatchContextOption x -> MatchContextOption <$> f x; o -> pure o

-- ** Extracting options at the right times
--
-- $ExtractingOptionsAtTheRightTimes
-- We interleave the extraction of options with the manipulation of foreign data
-- en route to the target `Matcher` or `Subber`.

-- | A `Monad` modeling both option extraction and foreign effects.
type ExtractOpts = StateT [AppliedOption] IO

-- | Use a fake @Prism'@ to extract a category of options.
extractOptsOf :: Traversal' AppliedOption a -> ExtractOpts [a]
extractOptsOf traversal = state $ partitionEithers . map discrim where
    discrim opt = maybe (Right opt) Left $ preview traversal opt

-- | Prepare to compile a `Code`.
extractCompileEnv :: ExtractOpts CompileEnv
extractCompileEnv = do
    ctxUpds <- extractOptsOf _CompileContextOption
    xtraOpts <- bitOr <$> extractOptsOf _CompileExtraOption
    recGuard <- safeLast <$> extractOptsOf _CompileRecGuardOption

    compileEnvCtx <- sequence $ do
        guard $ not $ null ctxUpds && xtraOpts == 0 && null recGuard
        Just $ liftIO $ do
            ctx <- mkForeignPtr pcre2_compile_context_free $
                pcre2_compile_context_create nullPtr

            forM_ ctxUpds $ \update -> update ctx
            when (xtraOpts /= 0) $ withForeignPtr ctx $ \ctxPtr ->
                pcre2_set_compile_extra_options ctxPtr xtraOpts >>= check (== 0)

            return ctx

    compileEnvERef <- sequence $ do
        ctx <- compileEnvCtx
        f <- recGuard
        Just $ liftIO $ do
            eRef <- newIORef Nothing
            fPtr <- mkFunPtr ctx $ mkRecursionGuard $ \depth _ -> do
                resultOrE <- try $ do
                    f <- evaluate f
                    result <- f $ fromIntegral depth
                    evaluate result
                case resultOrE of
                    Right success -> return $ if success then 0 else 1
                    Left e        -> writeIORef eRef (Just e) >> return 1

            withForeignPtr ctx $ \ctxPtr -> do
                result <- pcre2_set_compile_recursion_guard ctxPtr fPtr nullPtr
                check (== 0) result

            return eRef

    return $ CompileEnv {..}

-- | Inputs to `Code` compilation besides the pattern.
data CompileEnv = CompileEnv {
    compileEnvCtx :: Maybe CompileContext,
    -- | A register for catching exceptions thrown in recursion guards, if
    -- needed.
    compileEnvERef :: Maybe (IORef (Maybe SomeException))}

-- | Compile a `Code`.
extractCode :: Text -> CompileEnv -> ExtractOpts Code
extractCode patt (CompileEnv {..}) = do
    opts <- bitOr <$> extractOptsOf _CompileOption

    liftIO $ mkForeignPtr pcre2_code_free $
        Text.useAsPtr patt $ \pattPtr pattCUs ->
        alloca $ \errorCodePtr ->
        alloca $ \errorOffPtr ->
        withForeignOrNullPtr compileEnvCtx $ \ctxPtr -> do
            codePtr <- pcre2_compile
                (castCUs pattPtr)
                (fromIntegral pattCUs)
                opts
                errorCodePtr
                errorOffPtr
                ctxPtr
            when (codePtr == nullPtr) $ do
                -- Re-throw exception (if any) from recursion guard (if any)
                forM_ compileEnvERef $ readIORef >=> mapM_ throwIO
                -- Otherwise throw PCRE2 error
                errorCode <- peek errorCodePtr
                offCUs <- peek errorOffPtr
                throwIO $ Pcre2CompileException errorCode patt offCUs

            return codePtr

-- | `Code` and auxiliary compiled data used in preparation for a match or
-- substitution.  This remains constant for the lifetime of a `Matcher` or
-- `Subber`.
data MatchEnv = MatchEnv {
    matchEnvCode       :: Code,
    matchEnvOpts       :: CUInt,
    matchEnvCtx        :: Maybe MatchContext,
    matchEnvCallout    :: Maybe (CalloutInfo -> IO CalloutResult),
    matchEnvSubCallout :: Maybe (SubCalloutInfo -> IO SubCalloutResult)}

-- | Prepare a matching function.
extractMatchEnv :: Code -> ExtractOpts MatchEnv
extractMatchEnv matchEnvCode = do
    matchEnvOpts <- bitOr <$> extractOptsOf _MatchOption

    matchEnvCtx <- extractOptsOf _MatchContextOption >>= \case
        []      -> return Nothing
        ctxUpds -> liftIO $ Just <$> do
            ctx <- mkForeignPtr pcre2_match_context_free $
                pcre2_match_context_create nullPtr

            forM_ ctxUpds $ \update -> update ctx

            return ctx

    matchEnvCallout <- safeLast <$> extractOptsOf _CalloutOption
    matchEnvSubCallout <- safeLast <$> extractOptsOf _SubCalloutOption

    return $ MatchEnv {..}

-- | Helper for @assemble*@ functions.  Basically, extract all options and help
-- produce a function that takes a `Text` subject and does something.
assembleSubjFun
    :: (MatchEnv -> Text -> IO a)
    -> Option
    -> Text
    -> IO (Text -> IO a)
assembleSubjFun mkSubjFun option patt =
    runStateT extractAll (applyOption option) <&> \case
        (subjFun, []) -> subjFun
        _             -> error "BUG! Options not fully extracted"

    where
    extractAll = do
        compileEnv <- extractCompileEnv
        code <- extractCode patt compileEnv
        matchEnv <- extractMatchEnv code

        return $ mkSubjFun matchEnv

-- | Produce a `Matcher`.
assembleMatcher
    :: Option
    -> Text       -- ^ pattern
    -> IO Matcher
assembleMatcher = assembleSubjFun $ \matchEnv@(MatchEnv {..}) ->
    let matchTempEnvWithSubj = mkMatchTempEnv matchEnv
    in \subject -> withForeignPtr matchEnvCode $ \codePtr -> do
        matchData <- mkForeignPtr pcre2_match_data_free $
            pcre2_match_data_create_from_pattern codePtr nullPtr

        MatchTempEnv {..} <- matchTempEnvWithSubj subject
        result <-
            Text.useAsPtr subject $ \subjPtr subjCUs ->
            withForeignOrNullPtr matchTempEnvCtx $ \ctxPtr ->
            withForeignPtr matchData $ \matchDataPtr -> pcre2_match
                codePtr
                (castCUs subjPtr)
                (fromIntegral subjCUs)
                0
                matchEnvOpts
                matchDataPtr
                ctxPtr
        maybeRethrow matchTempEnvRef

        return (result, matchData)

-- | A `Subber` works by first writing results to a reasonably-sized buffer.  If
-- we run out of room, PCRE2 allows us to simulate the rest of the substitution
-- without writing anything, in order to calculate how big the buffer actually
-- has to be.  In this event, we rerun the substitution with a new,
-- exactly-sized buffer.
--
-- One potential issue arising from two attempts is running effectful callouts
-- twice.  We mitigate this by skipping callouts the second time:
--
-- * all capture callouts, since they had run during the simulation, and
--
-- * those substitution callouts that had run the first time.
--
-- Therefore, the first time, log the substitution callout indexes that had run
-- along with their results, and replay the log the second time, returning those
-- same results without re-incurring effects.
assembleSubber
    :: Text      -- ^ replacement
    -> Option
    -> Text      -- ^ pattern
    -> IO Subber
assembleSubber replacement =
    assembleSubjFun $ \firstMatchEnv@(MatchEnv {..}) ->
    -- Subber
    \subject ->
    withForeignPtr matchEnvCode $ \codePtr ->
    Text.useAsPtr subject $ \subjPtr subjCUs ->
    Text.useAsPtr replacement $ \replPtr replCUs ->
    alloca $ \outLenPtr -> do
        let checkAndGetOutput :: CInt -> PCRE2_SPTR -> IO (CInt, Text)
            checkAndGetOutput 0      _         = return (0, subject)
            checkAndGetOutput result outBufPtr = do
                check (> 0) result
                outLen <- peek outLenPtr
                out <- Text.fromPtr (castCUs outBufPtr) (fromIntegral outLen)
                return (result, out)

            run :: CUInt -> Ptr Pcre2_match_context -> PCRE2_SPTR -> IO CInt
            run curOpts ctxPtr outBufPtr = pcre2_substitute
                codePtr
                (castCUs subjPtr)
                (fromIntegral subjCUs)
                0
                (matchEnvOpts .|. curOpts)
                nullPtr
                ctxPtr
                (castCUs replPtr)
                (fromIntegral replCUs)
                outBufPtr
                outLenPtr

            -- Guess the size of the output to be <= 2x that of the subject.
            initOutLen = Text.length subject * 2

        poke outLenPtr $ fromIntegral initOutLen
        MatchTempEnv {..} <- mkMatchTempEnv firstMatchEnv subject
        firstAttempt <- withForeignOrNullPtr matchTempEnvCtx $ \ctxPtr ->
            allocaArray initOutLen $ \outBufPtr -> do
                result <- run pcre2_SUBSTITUTE_OVERFLOW_LENGTH ctxPtr outBufPtr
                maybeRethrow matchTempEnvRef
                if result == pcre2_ERROR_NOMEMORY
                    then Left <$> case matchTempEnvRef of
                        Nothing  -> return IM.empty
                        Just ref -> calloutStateSubsLog <$> readIORef ref
                    else Right <$> checkAndGetOutput result outBufPtr

        case firstAttempt of
            Right resultAndOutput -> return resultAndOutput
            Left subsLog          -> do
                -- The output was bigger than we guessed.  Try again.
                computedOutLen <- fromIntegral <$> peek outLenPtr
                let finalMatchEnv = firstMatchEnv {
                        -- Do not run capture callouts again.
                        matchEnvCallout = Nothing,
                        -- Do not run any substitution callouts run previously.
                        matchEnvSubCallout = fastFwd <$> matchEnvSubCallout}
                    fastFwd f = \info ->
                        case subsLog IM.!? subCalloutSubsCount info of
                            Just result -> return result
                            Nothing     -> f info
                MatchTempEnv {..} <- mkMatchTempEnv finalMatchEnv subject
                withForeignOrNullPtr matchTempEnvCtx $ \ctxPtr ->
                    allocaArray computedOutLen $ \outBufPtr -> do
                        result <- run 0 ctxPtr outBufPtr
                        maybeRethrow matchTempEnvRef
                        checkAndGetOutput result outBufPtr

-- | Generate per-call data for @pcre2_match()@ etc., to accommodate callouts.
--
-- We need to save and inspect state that occurs in potentially concurrent
-- matches.  This means a new state ref for each match, which means a new
-- `FunPtr` to close on it, which means a new match context to set it to.
mkMatchTempEnv
    :: MatchEnv
    -> Text -- ^ Callout info requires access to the original subject.
    -> IO MatchTempEnv
mkMatchTempEnv (MatchEnv {..})
    | null matchEnvCallout && null matchEnvSubCallout = \_ -> return noCallouts
    | otherwise                                       = \subject -> do
        calloutStateRef <- newIORef $ CalloutState {
            calloutStateException = Nothing,
            calloutStateSubsLog   = IM.empty}
        ctx <- mkForeignPtr pcre2_match_context_free ctxPtrForCallouts

        -- Install C function pointers in the @pcre2_match_context@.  When
        -- dereferenced and called, they will force the user-supplied Haskell
        -- callout functions and their results, catching any exceptions and
        -- saving them.

        -- Install callout, if any
        forM_ matchEnvCallout $ \f -> do
            fPtr <- mkFunPtr ctx $ mkCallout $ \blockPtr _ -> do
                info <- getCalloutInfo subject blockPtr
                resultOrE <- try $ do
                    f <- evaluate f
                    result <- f info
                    evaluate result
                case resultOrE of
                    Right result -> return $ case result of
                        CalloutProceed     -> 0
                        CalloutNoMatchHere -> 1
                        CalloutNoMatch     -> pcre2_ERROR_NOMATCH
                    Left e -> do
                        modifyIORef' calloutStateRef $ \cst -> cst {
                            calloutStateException = Just e}
                        return pcre2_ERROR_CALLOUT
            withForeignPtr ctx $ \ctxPtr ->
                pcre2_set_callout ctxPtr fPtr nullPtr >>= check (== 0)

        -- Install substitution callout, if any
        forM_ matchEnvSubCallout $ \f -> do
            fPtr <- mkFunPtr ctx $ mkCallout $ \blockPtr _ -> do
                info <- getSubCalloutInfo subject blockPtr
                resultOrE <- try $ do
                    f <- evaluate f
                    result <- f info
                    evaluate result
                case resultOrE of
                    Right result -> do
                        modifyIORef' calloutStateRef $ \cst -> cst {
                            calloutStateSubsLog = IM.insert
                                (subCalloutSubsCount info)
                                result
                                (calloutStateSubsLog cst)}
                        return $ case result of
                            SubCalloutAccept ->  0
                            SubCalloutSkip   ->  1
                            SubCalloutAbort  -> -1
                    Left e -> do
                        modifyIORef' calloutStateRef $ \cst -> cst {
                            calloutStateException = Just e}
                        return (-1)
            withForeignPtr ctx $ \ctxPtr -> do
                result <- pcre2_set_substitute_callout ctxPtr fPtr nullPtr
                check (== 0) result

        return $ MatchTempEnv {
            matchTempEnvCtx = Just ctx,
            matchTempEnvRef = Just calloutStateRef}

    where
    noCallouts = MatchTempEnv {
        matchTempEnvCtx = matchEnvCtx,
        matchTempEnvRef = Nothing}
    ctxPtrForCallouts = case matchEnvCtx of
        -- No pre-existing match context, so create one afresh.
        Nothing -> pcre2_match_context_create nullPtr
        -- Pre-existing match context, so copy it.
        Just ctx -> withForeignPtr ctx pcre2_match_context_copy

-- | Per-call data for @pcre2_match()@ etc.
data MatchTempEnv = MatchTempEnv {
    matchTempEnvCtx :: Maybe MatchContext,
    matchTempEnvRef :: Maybe (IORef CalloutState)}

-- | Data computed during callouts that will be stashed in an IORef and
-- inspected after @pcre2_match()@ or similar completes.
data CalloutState = CalloutState {
    calloutStateException :: Maybe SomeException,
    calloutStateSubsLog   :: IntMap SubCalloutResult}

-- | FFI wrapper.
foreign import ccall "wrapper" mkRecursionGuard
    :: (CUInt -> Ptr a -> IO CInt)
    -> IO (FunPtr (CUInt -> Ptr a -> IO CInt))

-- | FFI wrapper.
foreign import ccall "wrapper" mkCallout
    :: (Ptr block -> Ptr a -> IO CInt)
    -> IO (FunPtr (Ptr block -> Ptr a -> IO CInt))

-- | Within a callout, marshal the original subject and @pcre2_callout_block@
-- data to Haskell and present to the user function.  Ensure no pointers are
-- leaked!
getCalloutInfo :: Text -> Ptr Pcre2_callout_block -> IO CalloutInfo
getCalloutInfo subject blockPtr = do
    calloutIndex <- do
        str <- pcre2_callout_block_callout_string blockPtr
        if str == nullPtr
            then pcre2_callout_block_callout_number blockPtr >>= \case
                -- Auto callout
                255 -> liftM2 CalloutAuto pattPos itemLen where
                    pattPos = intVia pcre2_callout_block_pattern_position
                    itemLen = intVia pcre2_callout_block_next_item_length
                    intVia getter = fromIntegral <$> getter blockPtr
                -- Numerical callout
                number -> return $ CalloutNumber $ fromIntegral number
            else do
                -- String callout
                len <- pcre2_callout_block_callout_string_length blockPtr
                CalloutName <$> Text.fromPtr (castCUs str) (fromIntegral len)

    let calloutSubject = subject

    calloutCaptures <- do
        ovecPtr <- pcre2_callout_block_offset_vector blockPtr
        top <- pcre2_callout_block_capture_top blockPtr
        forM (0 :| [1 .. fromIntegral top - 1]) $ \n -> do
            start <- peekElemOff ovecPtr $ n * 2
            if start == pcre2_UNSET
                then return Nothing
                else Just <$> do
                    end <- peekElemOff ovecPtr $ n * 2 + 1
                    return $ slice calloutSubject $ SliceRange
                        (fromIntegral start)
                        (fromIntegral end)

    calloutMark <- do
        ptr <- pcre2_callout_block_mark blockPtr
        if ptr == nullPtr
            then return Nothing
            else Just <$> do
                -- TODO Replace this with a more obviously best way to slurp a
                -- zero-terminated region of memory into a `Text`, given
                -- whatever the pcre2callout spec means by "zero-terminated".
                len <- fix1 0 $ \go off -> peekElemOff ptr off >>= \case
                    0 -> return off
                    _ -> go $ off + 1
                Text.fromPtr (castCUs ptr) (fromIntegral len)

    flags <- pcre2_callout_block_callout_flags blockPtr
    let calloutIsFirst = flags .&. pcre2_CALLOUT_STARTMATCH /= 0
        calloutBacktracked = flags .&. pcre2_CALLOUT_BACKTRACK /= 0

    return $ CalloutInfo {..}

-- | Within a substitution callout, marshal the original subject and
-- @pcre2_substitute_callout_block@ data to Haskell and present to the user
-- function.  Ensure no pointers are leaked!
getSubCalloutInfo
    :: Text -> Ptr Pcre2_substitute_callout_block -> IO SubCalloutInfo
getSubCalloutInfo subCalloutSubject blockPtr = do
    subCalloutSubsCount <-
        fromIntegral <$> pcre2_substitute_callout_block_subscount blockPtr

    subCalloutCaptures <- do
        ovecPtr <- pcre2_substitute_callout_block_ovector blockPtr
        ovecCount <- pcre2_substitute_callout_block_oveccount blockPtr
        forM (0 :| [1 .. fromIntegral ovecCount - 1]) $ \n -> do
            start <- peekElemOff ovecPtr $ n * 2
            if start == pcre2_UNSET
                then return Nothing
                else Just <$> do
                    end <- peekElemOff ovecPtr $ n * 2 + 1
                    return $ slice subCalloutSubject $ SliceRange
                        (fromIntegral start)
                        (fromIntegral end)

    subCalloutReplacement <- do
        outPtr <- pcre2_substitute_callout_block_output blockPtr
        offsetsPtr <- pcre2_substitute_callout_block_output_offsets blockPtr
        [start, end] <- forM [0, 1] $ peekElemOff offsetsPtr
        Text.fromPtr
            (castCUs $ advancePtr outPtr $ fromIntegral start)
            (fromIntegral $ end - start)

    return $ SubCalloutInfo {..}

-- | If there was a callout and it threw an exception, rethrow it.
maybeRethrow :: Maybe (IORef CalloutState) -> IO ()
maybeRethrow = mapM_ $ readIORef >=> mapM_ throwIO . calloutStateException

-- * Packaging @Matcher@s and @Subber@s as public API functions

-- | The most general form of a matching function, which can also be used as a
-- @Setter'@ to perform substitutions at the Haskell level.
--
-- When any capture is forced, all captures are forced in order to GC C data
-- more promptly.  When we need /some/ captures but not /all/ captures, we can
-- pass in a whitelist of the numbers of captures we want, thereby avoiding
-- unnecessary allocations.
--
-- Internal only!  Users should not (have to) know about `Matcher`.
_capturesInternal :: Matcher -> [Int] -> Traversal' Text (NonEmpty Text)
_capturesInternal matcher whitelist f subject
    | result == pcre2_ERROR_NOMATCH = pure subject
    | result <= 0                   = throw $ Pcre2Exception result
    | otherwise                     = f cs <&> \cs' ->
        -- Swag foldl-as-foldr to create only as many segments as we need to
        -- stitch back together and no more.
        let triples = ovecEntries `NE.zip` cs `NE.zip` cs'
        in Text.concat $ foldr mkSegments termSegments triples 0

    where

    (result, matchData) = unsafePerformIO $ matcher subject
    cs = unsafePerformIO $ forM ovecEntries $ evaluate . slice subject
    ovecEntries = unsafePerformIO $ getOvecEntriesAt whitelist matchData

    mkSegments ((SliceRange off offEnd, c), c') r prevOffEnd
        | off == fromIntegral pcre2_UNSET || c == c' =
            -- This substring is unset or unchanged.  Keep going without making
            -- cuts.
            r prevOffEnd
        | otherwise =
            -- Emit the subject up until here, and the new substring, and keep
            -- going, remembering where we are now.
            thinSlice subject (SliceRange prevOffEnd off) : c' : r offEnd
    termSegments off =
        let offEnd = fromIntegral $ Text.length subject
        -- If the terminal segment is empty, omit it altogether.  That way,
        -- `Text.concat` can just return the subject without copying anything in
        -- cases where no substring is changed.
        in [thinSlice subject (SliceRange off offEnd) | off /= offEnd]

-- | Strictly read specifically indexed captures' offsets from match results.
-- Truncate indexes when they go out of bounds, to support infinite lists.
getOvecEntriesAt :: [Int] -> MatchData -> IO (NonEmpty SliceRange)
getOvecEntriesAt ns matchData = withForeignPtr matchData $ \matchDataPtr -> do
    count <- fromIntegral <$> pcre2_get_ovector_count matchDataPtr

    case NE.nonEmpty $ takeWhile (< count) ns of
        Nothing -> error "BUG! Tried to get empty list of captures"
        Just ns -> do
            ovecPtr <- pcre2_get_ovector_pointer matchDataPtr

            let peekOvec :: Int -> IO Text.I16
                peekOvec = fmap fromIntegral . peekElemOff ovecPtr

            forM ns $ \n -> liftM2 SliceRange
                (peekOvec $ n * 2)
                (peekOvec $ n * 2 + 1)

-- | Helper to create non-Template Haskell API functions.  They all take options
-- and a pattern, and then do something via a 'Matcher'.
withMatcher :: (Matcher -> a) -> Option -> Text -> a
withMatcher f option patt = f $ unsafePerformIO $ assembleMatcher option patt

-- | Match a pattern to a subject and return a list of captures, or @[]@ if no
-- match.
captures :: Text -> Text -> [Text]
captures = capturesOpt mempty

-- | @capturesOpt mempty = captures@
capturesOpt :: Option -> Text -> Text -> [Text]
capturesOpt option patt = view $ _capturesOpt option patt . to NE.toList

-- | Match a pattern to a subject and return a non-empty list of captures in an
-- `Alternative`, or `empty` if no match.  The non-empty list constructor `:|`
-- serves as a cue to differentiate the 0th capture from the others:
--
-- > let parseDate = capturesA "(\\d{4})-(\\d{2})-(\\d{2})"
-- > in case parseDate "submitted 2020-10-20" of
-- >     Just (date :| [y, m, d]) -> ...
-- >     Nothing                  -> putStrLn "didn't match"
capturesA :: (Alternative f) => Text -> Text -> f (NonEmpty Text)
capturesA = capturesOptA mempty

-- | @capturesOptA mempty = capturesA@
capturesOptA :: (Alternative f) => Option -> Text -> Text -> f (NonEmpty Text)
capturesOptA option patt = toAlternativeOf $ _capturesOpt option patt

-- | Does the pattern match the subject?
matches :: Text -> Text -> Bool
matches = matchesOpt mempty

-- | @matchesOpt mempty = matches@
matchesOpt :: Option -> Text -> Text -> Bool
matchesOpt = withMatcher $ \matcher -> has $ _capturesInternal matcher []

-- | Match a pattern to a subject and return the portion that matched in an
-- @Alternative@, or `empty` if no match.
match :: (Alternative f) => Text -> Text -> f Text
match = matchOpt mempty

-- | @matchOpt mempty = match@
matchOpt :: (Alternative f) => Option -> Text -> Text -> f Text
matchOpt option patt = toAlternativeOf $ _matchOpt option patt

-- | Perform at most one substitution.  See
-- [the docs](https://pcre.org/current/doc/html/pcre2api.html#SEC36) for the
-- special syntax of /replacement/.
--
-- >>> sub "(\\w+) calling the (\\w+)" "$2 calling the $1" "the pot calling the kettle black"
-- "the kettle calling the pot black"
sub
    :: Text -- ^ pattern
    -> Text -- ^ replacement
    -> Text -- ^ subject
    -> Text -- ^ result
sub = subOpt mempty

-- | Perform substitutions globally.
--
-- >>> gsub "a" "o" "apples and bananas"
-- "opples ond bononos"
gsub :: Text -> Text -> Text -> Text
gsub = subOpt SubGlobal

-- | @
-- subOpt mempty = sub
-- subOpt SubGlobal = gsub
-- @
subOpt :: Option -> Text -> Text -> Text -> Text
subOpt option patt replacement = snd . unsafePerformIO . subber where
    subber = unsafePerformIO $ assembleSubber replacement option patt

-- | Given a pattern, produce an affine traversal (0 or 1 targets) that focuses
-- from a subject to a potential non-empty list of captures.
--
-- Substitution works in the following way:  If a capture is set such that the
-- new `Text` is not equal to the old one, a substitution occurs, otherwise it
-- doesn\'t.  This matters in cases where a capture encloses another
-- capture&#x2014;notably, /all/ parenthesized captures are enclosed by the 0th.
--
-- >>> threeAndMiddle = _captures ". (.) ."
-- >>> "A A A" & threeAndMiddle .~ "A A A" :| ["B"]
-- "A B A"
-- >>> "A A A" & threeAndMiddle .~ "A B A" :| ["A"]
-- "A B A"
--
-- Changing multiple overlapping captures won\'t do what you want and is
-- unsupported.
--
-- Changing an unset capture is unsupported because the PCRE2 match API does not
-- give location info about it.  Currently we ignore all such attempts.  (Native
-- substitution functions like `sub` do not have this limitation.  See also
-- `SubUnknownUnset` and `SubUnsetEmpty`.)
--
-- If the list becomes longer for some reason, the extra elements are ignored.
-- If it\'s shortened, the absent elements are considered to be unchanged.
--
-- It's recommended that the list be modified capture-wise,
-- using [@ix@](https://hackage.haskell.org/package/microlens/docs/Lens-Micro.html#v:ix).
--
-- > let madlibs = _captures "(\\w+) my (\\w+)"
-- >
-- > print $ "Well bust my buttons!" &~ do
-- >     zoom madlibs $ do
-- >         ix 1 . _head .= 'd'
-- >         ix 2 %= Text.reverse
-- >     _last .= '?'
-- >
-- > -- "Well dust my snottub?"
_captures :: Text -> Traversal' Text (NonEmpty Text)
_captures = _capturesOpt mempty

-- | @_capturesOpt mempty = _captures@
_capturesOpt :: Option -> Text -> Traversal' Text (NonEmpty Text)
_capturesOpt = withMatcher $ \matcher -> _capturesInternal matcher [0 ..]

-- | Given a pattern, produce an affine traversal (0 or 1 targets) that focuses
-- from a subject to the portion of it that matches.
--
-- Equivalent to @\\patt -> `_captures` patt . ix 0@, but more efficient.
_match :: Text -> Traversal' Text Text
_match = _matchOpt mempty

-- | @_matchOpt mempty = _match@
_matchOpt :: Option -> Text -> Traversal' Text Text
_matchOpt = withMatcher $ \matcher -> _capturesInternal matcher [0] . _headNE

-- * Support for Template Haskell compile-time regex analysis

-- | From options and pattern, determine parenthesized patterns\' names in
-- order.
predictCaptureNames :: Option -> Text -> IO [Maybe Text]
predictCaptureNames option patt = do
    code <- evalStateT
        (extractCompileEnv >>= extractCode patt)
        (applyOption option)

    withForeignPtr code getCaptureNames

-- | NOTE: The 0th capture is always named @Nothing@.
getCaptureNames :: Ptr Pcre2_code -> IO [Maybe Text]
getCaptureNames codePtr = do
    nameCount <- getCodeInfo @CUInt codePtr pcre2_INFO_NAMECOUNT
    nameEntrySize <- getCodeInfo @CUInt codePtr pcre2_INFO_NAMEENTRYSIZE
    nameTable <- getCodeInfo @PCRE2_SPTR codePtr pcre2_INFO_NAMETABLE

    -- Can't do [0 .. nameCount - 1] because it underflows when nameCount == 0
    let indexes = takeWhile (< nameCount) [0 ..]
    names <- fmap IM.fromList $ forM indexes $ \i -> do
        let entryPtr = nameTable `advancePtr` fromIntegral (i * nameEntrySize)
            groupNamePtr = entryPtr `advancePtr` 1
        groupNumber <- peek entryPtr
        groupNameLen <- lengthArray0 0 groupNamePtr
        groupName <- Text.fromPtr
            (castCUs groupNamePtr)
            (fromIntegral groupNameLen)
        return (fromIntegral groupNumber, groupName)

    hiCaptNum <- getCodeInfo @CUInt codePtr pcre2_INFO_CAPTURECOUNT

    return $ map (names IM.!?) [0 .. fromIntegral hiCaptNum]

-- | Low-level access to compiled pattern info, per the docs.
getCodeInfo :: (Storable a) => Ptr Pcre2_code -> CUInt -> IO a
getCodeInfo codePtr what = alloca $ \wherePtr -> do
    pcre2_pattern_info codePtr what wherePtr >>= check (== 0)
    peek wherePtr

-- | A wrapper around a list of captures that carries additional type-level
-- information about the number and names of those captures.
--
-- This type is only intended to be created by
-- `Text.Regex.Pcre2.regex`\/`Text.Regex.Pcre2._regex` and consumed by
-- `Text.Regex.Pcre2.capture`\/`Text.Regex.Pcre2._capture`, relying on type
-- inference.  Specifying the @info@ explicitly in a type signature is not
-- supported&#x2014;the definition of `CapturesInfo` is not part of the public
-- API and may change without warning.
--
-- After obtaining `Captures` it\'s recommended to immediately consume them and
-- transform them into application-level data, to avoid leaking the types to top
-- level and having to write signatures.  In times of need, \"@Captures _@\" may
-- be written with the help of @{-\# LANGUAGE PartialTypeSignatures \#-}@.
newtype Captures (info :: CapturesInfo) = Captures (NonEmpty Text)

-- | The kind of `Captures`\'s @info@.
type CapturesInfo = (Nat, [(Symbol, Nat)])

-- | Look up the number of a capture at compile time, either by number or by
-- name.  Throw a helpful 'TypeError' if the index doesn\'t exist.
type family CaptNum (i :: k) (info :: CapturesInfo) :: Nat where
    CaptNum (num :: Nat) '(hi, _) =
        If (CmpNat num 0 == 'LT || CmpNat num hi == 'GT)
            -- then
            (TypeError (TypeLits.Text "No capture numbered " :<>: ShowType num))
            -- else
            num

    CaptNum (name :: Symbol) '(_, '(name, num) ': _) = num
    CaptNum (name :: Symbol) '(hi, _ ': kvs) = 1 + CaptNum name '(hi, kvs)
    CaptNum (name :: Symbol) _ = TypeError
        (TypeLits.Text "No capture named " :<>: ShowType name)

    CaptNum _ _ = TypeError
        (TypeLits.Text "Capture index must be a number (Nat) or name (Symbol)")

-- | Safely lookup a capture in a `Captures` result obtained from a Template
-- Haskell-generated matching function.
--
-- The ugly type signature may be interpreted like this:  /Given some capture/
-- /group index @i@ and some @info@ about a regex, ensure that index exists and/
-- /is resolved to the number @num@ at compile time.  Then, at runtime, get a/
-- /capture group from a list of captures./
--
-- In practice the variable @i@ is specified by type application and the other
-- variables are inferred.
--
-- > capture @3
-- > capture @"bar"
--
-- Specifying a nonexistant number or name will result in a type error.
capture :: forall i info num. (CaptNum i info ~ num, KnownNat num) =>
    Captures info -> Text
capture = view $ _capture @i

-- | Like `capture` but focus from a `Captures` to a capture.
_capture :: forall i info num. (CaptNum i info ~ num, KnownNat num) =>
    Lens' (Captures info) Text
_capture f (Captures cs) =
    let (ls, c : rs) = NE.splitAt (fromInteger $ natVal @num Proxy) cs
    in f c <&> \c' -> Captures $ NE.fromList $ ls ++ c' : rs

-- * Exceptions

-- | The root of the PCRE2 exception hierarchy.
data SomePcre2Exception = forall e. (Exception e) => SomePcre2Exception e
instance Show SomePcre2Exception where
    show (SomePcre2Exception e) = show e
instance Exception SomePcre2Exception

-- | Vanilla PCRE2 exceptions with messages generated by the underlying C
-- library.
newtype Pcre2Exception = Pcre2Exception CInt
instance Show Pcre2Exception where
    show (Pcre2Exception x) = "pcre2: " ++ Text.unpack (getErrorMessage x)
instance Exception Pcre2Exception where
    toException = toException . SomePcre2Exception
    fromException = fromException >=> \(SomePcre2Exception e) -> cast e

-- | PCRE2 compile exceptions.  Along with a message stating the cause, we show
-- the pattern with a cursor pointing at where the error is (if not after the
-- last character).
data Pcre2CompileException = Pcre2CompileException !CInt !Text !PCRE2_SIZE
instance Show Pcre2CompileException where
    show (Pcre2CompileException x patt offset) = intercalate "\n" $ [
        "pcre2_compile: " ++ Text.unpack (getErrorMessage x),
        replicate tab ' ' ++ Text.unpack patt] ++
        [replicate (tab + offset') ' ' ++ "^" | offset' < Text.length patt]
        where
        tab = 20
        offset' = fromIntegral offset
instance Exception Pcre2CompileException where
    toException = toException . SomePcre2Exception
    fromException = fromException >=> \(SomePcre2Exception e) -> cast e

-- | Built-in message corresponding to the integer error code.
getErrorMessage :: CInt -> Text
getErrorMessage errorCode = unsafePerformIO $ do
    let bufCUs = 120
    allocaBytes (bufCUs * 2) $ \bufPtr -> do
        cus <- pcre2_get_error_message errorCode bufPtr (fromIntegral bufCUs)
        Text.fromPtr (castCUs bufPtr) (fromIntegral cus)

-- | Most PCRE2 C functions return an @int@ indicating a possible error.  Test
-- it against a predicate, and throw an exception upon failure.
check :: (CInt -> Bool) -> CInt -> IO ()
check p x = unless (p x) $ throwIO $ Pcre2Exception x

-- * PCRE2 compile-time config

-- | Helper for getting PCRE2 compile-time config integers.
getConfigNumeric :: CUInt -> CUInt
getConfigNumeric what = unsafePerformIO $ alloca $ \ptr -> do
    pcre2_config what ptr
    peek ptr

-- | Helper for getting PCRE2 compile-time config strings.
getConfigString :: CUInt -> Maybe Text
getConfigString what = unsafePerformIO $ do
    len <- pcre2_config what nullPtr
    if len == pcre2_ERROR_BADOPTION
        then return Nothing
        -- FIXME Do we really need "+ 1" here?
        else allocaBytes (fromIntegral (len + 1) * 2) $ \ptr -> do
            pcre2_config what ptr
            Just <$> Text.fromPtr ptr (fromIntegral len - 1)

-- | See `Bsr`.
defaultBsr :: Bsr
defaultBsr = bsrFromC $ getConfigNumeric pcre2_CONFIG_BSR

-- | Which code widths PCRE2 is compiled to operate on.  Can be any combination
-- of 8, 16, and 32.  Should be @[16]@ but provided here for completeness.
compiledWidths :: [Int]
compiledWidths =
    let bitmap = getConfigNumeric pcre2_CONFIG_COMPILED_WIDTHS
    in [w | (b, w) <- [(1, 8), (2, 16), (4, 32)], b .&. bitmap /= 0]

-- | See `DepthLimit`.
defaultDepthLimit :: Int
defaultDepthLimit = fromIntegral $ getConfigNumeric pcre2_CONFIG_DEPTHLIMIT

-- | See `HeapLimit`.
defaultHeapLimit :: Int
defaultHeapLimit = fromIntegral $ getConfigNumeric pcre2_CONFIG_HEAPLIMIT

-- | Was PCRE2 built with JIT support?
supportsJit :: Bool
supportsJit = getConfigNumeric pcre2_CONFIG_JIT == 1

-- | A nice description of the CPU architecture JIT support is compiled for, if
-- any.
jitTarget :: Maybe Text
jitTarget = getConfigString pcre2_CONFIG_JITTARGET

-- | Number of bytes used for internal linkage in compiled regexes.
linkSize :: Int
linkSize = fromIntegral $ getConfigNumeric pcre2_CONFIG_LINKSIZE

-- | See `MatchLimit`.
defaultMatchLimit :: Int
defaultMatchLimit = fromIntegral $ getConfigNumeric pcre2_CONFIG_MATCHLIMIT

-- | See `Newline`.
defaultNewline :: Newline
defaultNewline = newlineFromC $ getConfigNumeric pcre2_CONFIG_NEWLINE

-- | See `NeverBackslashC`.
defaultIsNeverBackslashC :: Bool
defaultIsNeverBackslashC = getConfigNumeric pcre2_CONFIG_NEVER_BACKSLASH_C == 1

-- | See `ParensLimit`.
defaultParensLimit :: Int
defaultParensLimit = fromIntegral $ getConfigNumeric pcre2_CONFIG_PARENSLIMIT

-- | Size in bytes of PCRE2\'s built-in character processing tables.
defaultTablesLength :: Int
defaultTablesLength = fromIntegral $ getConfigNumeric pcre2_CONFIG_TABLES_LENGTH

-- | Unicode version string such as @8.0.0@, if Unicode is supported at all.
unicodeVersion :: Maybe Text
unicodeVersion = case getConfigString pcre2_CONFIG_UNICODE_VERSION of
    Just v | Text.unpack v == "Unicode not supported" -> Nothing
    maybeV                                            -> maybeV

-- | Was PCRE2 built with Unicode support?
supportsUnicode :: Bool
supportsUnicode = getConfigNumeric pcre2_CONFIG_UNICODE == 1

-- | Version of the built-in C library.  The versioning scheme is that PCRE
-- legacy is 8.x and PCRE2 is 10.x, so this should be @10.@/something/.
pcreVersion :: Text
pcreVersion = case getConfigString pcre2_CONFIG_VERSION of
    Just v  -> v
    Nothing -> error "pcreVersion: unable to get string"