{-# OPTIONS_GHC -Wno-incomplete-patterns #-}

module Codec.Borsh.Incremental.Decoder (
    -- * Definition
    Decoder(..)
    -- * Operations supported by any decoder
  , liftDecoder
  , decodeLittleEndian
  , decodeLargeToken
  , decodeIncremental
  , decodeIncremental_
    -- * Running
  , DecodeResult(..)
  , deserialiseByteString
  ) where

import Control.Applicative
import Control.Monad
import Control.Monad.Fail
import Control.Monad.ST
import Data.Word

import qualified Data.ByteString      as S
import qualified Data.ByteString.Lazy as L

import Codec.Borsh.Incremental.Located
import Codec.Borsh.Incremental.Monad
import Codec.Borsh.Internal.Util.ByteString
import Codec.Borsh.Internal.Util.ByteSwap

{-------------------------------------------------------------------------------
  Definition
-------------------------------------------------------------------------------}

-- | Decoder
--
-- A decoder describes how to match against a single chunk of the input.
--
-- For decoders for primitive types, use 'Codec.Borsh.FromBorsh' instances.
newtype Decoder s a = Decoder {
      matchChunk :: LocatedChunk -> ST s (LocatedChunk, DecodeResult s a)
    }

{-------------------------------------------------------------------------------
  Operations supported by the 'Decoder' monad
-------------------------------------------------------------------------------}

-- | Lift an 'ST' operation into the 'Decoder' monad.
liftDecoder :: ST s a -> Decoder s a
liftDecoder sa = Decoder $ \chunk -> (chunk, ) . DecodeDone <$> sa

-- | Decode a value encoded in little endian byte order (e.g. as mandated by the
-- [Borsh](https://borsh.io) spec).
decodeLittleEndian :: forall s a. ByteSwap a => Decoder s a
decodeLittleEndian = Decoder aux
  where
    aux :: LocatedChunk -> ST s (LocatedChunk, DecodeResult s a)
    aux chunk@(L bs off) =
        case peekByteString bs of
          Just (x, sizeX, bs') ->
            return (L bs' (off + sizeX), DecodeDone x)
          Nothing ->
            return (chunk, DecodeNeedsData decodeLittleEndian)

-- | Large token of known length that spans multiple chunks
--
-- This is NOT incremental: all chunks will be read into memory before the
-- result is returned. Primarily useful for large types that are not
-- easily split into (valid) chunks, such as UTF8-encoded text (if were
-- wanted to split that, we'd have to split it at UTF8 boundaries).
decodeLargeToken ::
     Word32 -- ^ Number of bytes to decode
  -> Decoder s L.ByteString
decodeLargeToken n = Decoder $ \chunk ->
    return (chunk, DecodeLargeToken n return)

-- | Incremental interface
--
-- When decoding large objects such as lists, we do not want to bring all
-- required chunks into memory before decoding the list. Instead, we want to
-- decode the list elements as we go. In this case, 'DecodeIncremental' can
-- be used to repeatedly decode a value using decoder for the elements.
--
-- NOTE: This interface is incremental in the sense that the /input chunks/
-- are read one at a time. It is /NOT/ incremental in the generated /output/.
decodeIncremental ::
     Word32      -- ^ Number of elements in the sequence to decode
  -> Decoder s a -- ^ 'Decoder' to run for the individual elements
  -> Decoder s [a]
decodeIncremental n d = Decoder $ \chunk ->
    return (chunk, DecodeIncremental n d return)

-- | Variation on 'decoreIncremental', where we do not accumulate results
--
-- This is useful for example for datatypes that we can update imperatively
-- (using the 'ST' monad), such as mutable arrays. It could also be used to skip
-- over unused parts of the input.
decodeIncremental_ ::
     Word32       -- ^ Number of elements in the sequence to decode
  -> Decoder s () -- ^ 'Decoder' to run for the individual elements
  -> Decoder s ()
decodeIncremental_ n d = Decoder $ \chunk ->
    return (chunk, DecodeIncremental_ n d $ return ())

{-------------------------------------------------------------------------------
  Results
-------------------------------------------------------------------------------}

data DecodeResult s a where
  -- | The decoder terminated successfully: we can stop decoding
  DecodeDone :: a -> DecodeResult s a

  -- | The decoder failed: we should abort
  DecodeFail :: String -> DecodeResult s a

  -- | The decoder needs more data before it can continue
  --
  -- NOTE: The decoder that is waiting for more data may not be (and typically
  -- will not be) the decoder we started with in 'matchChunk': in the typical
  -- case, a bunch of values will have been decoded successfully before we get
  -- to a (continuation) decoder that requires data beyond the current chunk.
  DecodeNeedsData :: Decoder s a -> DecodeResult s a

  -- | Large token of known length that spans multiple chunks
  --
  -- See 'decodeLargeToken' for discussion.
  --
  -- The continuation will be called with a lazy bytestring of precisely the
  -- requested length (provided enough input is available, of course), along
  -- with the remaining input token to be provided to the continuation decoder.
  DecodeLargeToken ::
       Word32  -- ^ Required number of bytes
    -> (L.ByteString -> Decoder s a)
    -> DecodeResult s a

  -- | Incremental interface
  --
  -- See 'decodeIncremental' for discussion.
  DecodeIncremental ::
       Word32               -- ^ How often to repeat the smaller decoder
    -> Decoder s a          -- ^ Decoder to repeat
    -> ([a] -> Decoder s b) -- ^ Process all elements
    -> DecodeResult s b

  -- | Variation on 'DecodeIncremental', where we do not accumulate results
  --
  -- See 'decodeIncremental_' for discussion.
  DecodeIncremental_ ::
       Word32        -- ^ How often to repeat the smaller decoder
    -> Decoder s ()  -- ^ Decoder to repeat (imperatively handling each element)
    -> Decoder s a   -- ^ Continuation
    -> DecodeResult s a

{-------------------------------------------------------------------------------
  Monad instance
-------------------------------------------------------------------------------}

instance Functor (Decoder s) where
  fmap = liftA

instance Applicative (Decoder s) where
  pure x = Decoder $ \chunk -> return (chunk, DecodeDone x)
  (<*>)  = ap

instance Monad (Decoder s) where
  return  = pure
  x >>= f = Decoder $ \chunk -> do
      (chunk', result) <- matchChunk x chunk
      case result of
        DecodeDone a ->
          matchChunk (f a) chunk'
        DecodeFail e ->
          return (chunk', DecodeFail e)
        DecodeNeedsData d ->
          return (chunk', DecodeNeedsData (d >>= f))
        DecodeLargeToken reqLen k ->
          return (chunk', DecodeLargeToken reqLen (k >=> f))
        DecodeIncremental count d k ->
          return (chunk', DecodeIncremental count d (k >=> f))
        DecodeIncremental_ count d k ->
          return (chunk', DecodeIncremental_ count d (k >>= f))

instance MonadFail (Decoder s) where
  fail e = Decoder $ \chunk -> return (chunk, DecodeFail e)

{-------------------------------------------------------------------------------
  Running decoders
-------------------------------------------------------------------------------}

-- | Top-level entry point
--
-- We start without any input at all (and depending on the specific decoder,
-- we may never need any).
runDecoder :: Decoder s a -> Incr s (LocatedChunk, a)
runDecoder = runWith $ L S.empty 0

-- | Run decoder against specified chunk
runWith :: LocatedChunk -> Decoder s a -> Incr s (LocatedChunk, a)
runWith chunk d = uncurry processResult =<< liftIncr (matchChunk d chunk)

{-------------------------------------------------------------------------------
  Processing the result of a decoder
-------------------------------------------------------------------------------}

-- | Process decoder result
processResult :: LocatedChunk -> DecodeResult s a -> Incr s (LocatedChunk, a)
processResult chunk = \case
    DecodeDone x -> return    (chunk, x)
    DecodeFail e -> decodeFail chunk  e

    DecodeNeedsData      d   -> processNeedsData      d   chunk
    DecodeLargeToken   n   k -> processLargeToken   n   k chunk
    DecodeIncremental  n d k -> processIncremental  n d k chunk
    DecodeIncremental_ n d k -> processIncremental_ n d k chunk

processNeedsData ::
     Decoder s a
  -> Located S.ByteString
  -> Incr s (LocatedChunk, a)
processNeedsData d chunk@(L bs off) = needChunk >>= \case
    Nothing   -> decodeFail chunk "end of input"
    Just next -> runWith (L (bs <> next) off) d

-- | Auxiliary to 'processResult': process token that spans multple chunks
--
-- Precondition: if the accumulated length exceeds the required length, we must
-- be able to split the mostly added chunk to make up for the difference.
processLargeToken :: forall s a.
     Word32                         -- ^ Required total size
  -> (L.ByteString -> Decoder s a)  -- ^ Continuation
  -> LocatedChunk                   -- ^ Current chunk
  -> Incr s (LocatedChunk, a)
processLargeToken reqLen k = go . toLocatedChunks
  where
    go :: LocatedChunks -> Incr s (LocatedChunk, a)
    go acc =
        case splitChunks reqLen acc of
          Nothing -> needChunk >>= \case
            Nothing   -> decodeFail (fromLocatedChunks acc) "end of input"
            Just next -> go (addChunk next acc)
          Just (large, left) ->
            uncurry processResult =<< liftIncr (matchChunk (k large) left)

-- | Auxiliary to 'processResult': incremental decoding
processIncremental :: forall s a b.
     Word32                -- ^ Number of elements required
  -> Decoder s a           -- ^ Decoder to repeat
  -> ([a] -> Decoder s b)  -- ^ Continuation once we processed all elements
  -> LocatedChunk          -- ^ Current chunk
  -> Incr s (LocatedChunk, b)
processIncremental count d k = go [] count
  where
    go :: [a] -> Word32 -> LocatedChunk -> Incr s (LocatedChunk, b)
    go acc 0 chunk = do result <- liftIncr (matchChunk (k (reverse acc)) chunk)
                        uncurry processResult result
    go acc n chunk = do (chunk', a) <- runWith chunk d
                        go (a:acc) (n - 1) chunk'

-- | Imperative version of 'processIncremental'
--
-- See 'DecodeIncremental_' for discussion.
processIncremental_ :: forall s a.
     Word32        -- ^ Number of elements required
  -> Decoder s ()  -- ^ Decoder to repeat
  -> Decoder s a   -- ^ Continuation once we processed all elements
  -> LocatedChunk  -- ^ Current chunk
  -> Incr s (LocatedChunk, a)
processIncremental_ count d k = go count
  where
    go :: Word32 -> LocatedChunk -> Incr s (LocatedChunk, a)
    go 0 chunk = do result <- liftIncr (matchChunk k chunk)
                    uncurry processResult result
    go n chunk = do (chunk', ()) <- runWith chunk d
                    go (n - 1) chunk'

{-------------------------------------------------------------------------------
  Top-level API
-------------------------------------------------------------------------------}

-- | Run decoder
deserialiseByteString ::
     (forall s. Decoder s a)
  -- ^ Decoder
  -> L.ByteString
  -- ^ Input
  -> Either DeserialiseFailure (L.ByteString, ByteOffset, a)
  -- ^ Left-over input, offset of the left-over input relative to the start,
  -- and the result.
deserialiseByteString d = runIDecode (runIncr (runDecoder d))