{-# LANGUAGE ConstraintKinds     #-}
{-# LANGUAGE FlexibleInstances   #-}
{-# LANGUAGE MultiWayIf          #-}
{-# LANGUAGE ScopedTypeVariables #-}
-- | Avro encoding and decoding routines.
--
-- This library provides a high level interface for encoding (and decoding)
-- Haskell values in Apache's Avro serialization format.
--
-- The goal is to match Aeson's API whenever reasonable,
-- meaning user experience with one effectively translate to the other.
--
-- Avro RPC is not currently supported.
--
-- == Library Structure
--
-- The library structure includes:
--
--   * This module, "Data.Avro", providing a high-level interface via
--     classes of 'FromAvro' and 'ToAvro' for decoding and encoding values.
--
--   * "Data.Avro.Schema": Defines the type for Avro schema's and its JSON
--      encoding/decoding.
--
--   * "Data.Avro.Encode" and "Data.Avro.Decode": More
--     efficient conversion capable of avoiding the intermediate representation.
--     Also, the implementation of the en/decoding of the intermediate
--     representation.
--
--   * "Data.Avro.Decode.Lazy": Lazy/Streaming decoding for Avro containers.
--
--   * "Data.Avro.Deconflict": translate decoded data from an
--     encoder schema to the (potentially different) decoder's schema.
module Data.Avro
  ( -- * Schema
    Schema

    -- * Encoding and decoding
  , Result(..), badValue
  , encode
  , decode

  , (.:)
  , (.=), record, fixed

    -- * Working with containers
    -- ** Decoding containers
  , decodeWithSchema
  , decodeContainer
  , decodeContainerWithSchema
  , decodeContainerBytes

    -- ** Encoding containers
  , encodeContainer
  , encodeContainer'
  , encodeContainerWithSync
  , encodeContainerWithSync'

  -- * Classes and instances
  , FromAvro(..)
  , ToAvro(..)
  , HasAvroSchema(..)
  , schemaOf

  -- * Misc
  , Avro
  ) where

import           Control.Arrow         (first)
import qualified Data.Avro.Decode      as D
import qualified Data.Avro.Decode.Lazy as DL
import           Data.Avro.Deconflict  as C
import qualified Data.Avro.Encode      as E
import           Data.Avro.Schema      as S
import           Data.Avro.Types       as T
import qualified Data.Binary.Get       as G
import qualified Data.Binary.Put       as P
import qualified Data.ByteString       as B
import           Data.ByteString.Lazy  (ByteString)
import qualified Data.ByteString.Lazy  as BL
import           Data.Foldable         (toList)
import qualified Data.HashMap.Strict   as HashMap
import           Data.Int
import           Data.List.NonEmpty    (NonEmpty (..))
import qualified Data.Map              as Map
import           Data.Monoid           ((<>))
import           Data.Tagged
import           Data.Text             (Text)
import qualified Data.Text             as Text
import qualified Data.Text.Lazy        as TL
import qualified Data.Vector           as V
import           Data.Word
import           Prelude               as P

import Data.Avro.Codec         (Codec, deflateCodec, nullCodec)
import Data.Avro.FromAvro
import Data.Avro.HasAvroSchema
import Data.Avro.ToAvro

type Avro a = (FromAvro a, ToAvro a)

-- | Decode a lazy bytestring using a 'Schema' of the return type.
decode :: forall a. FromAvro a => ByteString -> Result a
decode bytes =
  case D.decodeAvro (untag (schema :: Tagged a Type)) bytes of
      Right val -> fromAvro val
      Left err  -> Error err

-- | Decode a lazy bytestring using a provided schema
decodeWithSchema :: FromAvro a => Schema -> ByteString -> Result a
decodeWithSchema sch bytes =
  case D.decodeAvro sch bytes of
    Right val -> fromAvro val
    Left err  -> Error err

-- | Decode a container and de-conflict the writer schema with
-- a reader schema for a return type.
-- Like in 'decodeContainerWithSchema'
-- exceptions are thrown instead of a 'Result' type to
-- allow this function to be read lazy (to be done in some later version).
decodeContainer :: forall a. FromAvro a => ByteString -> [[a]]
decodeContainer bs =
  let readerSchema = untag (schema :: Tagged a Schema)
  in decodeContainerWithSchema readerSchema bs

-- |Decode a container and de-conflict the writer schema with a given
-- reader-schema.  Exceptions are thrown instead of a 'Result' type to
-- allow this function to be read lazy (to be done in some later version).
decodeContainerWithSchema :: FromAvro a => Schema -> ByteString -> [[a]]
decodeContainerWithSchema readerSchema bs =
  case D.decodeContainer bs of
    Right (writerSchema,val) ->
      let
        writerSchema' = S.expandNamedTypes writerSchema
        readerSchema' = S.expandNamedTypes readerSchema
        err e = error $ "Could not deconflict reader and writer schema." <> e
        dec x =
          case C.deconflictNoResolve writerSchema' readerSchema' x of
            Left e   -> err e
            Right v  -> case fromAvro v of
                          Success x -> x
                          Error e   -> error e
      in P.map (P.map dec) val
    Left err -> error err

-- | Encodes a value to a lazy ByteString
encode :: ToAvro a => a -> BL.ByteString
encode = E.encodeAvro . toAvro

-- | Encode chunks of objects into a container, using 16 random bytes for
-- the synchronization markers.
encodeContainer :: forall a. ToAvro a => [[a]] -> IO BL.ByteString
encodeContainer = encodeContainer' nullCodec

encodeContainer' :: forall a. ToAvro a => Codec -> [[a]] -> IO BL.ByteString
encodeContainer' codec =
  let sch = untag (schema :: Tagged a Schema)
  in E.encodeContainer codec sch . map (map toAvro)

-- | Encode chunks of objects into a container, using the provided
-- ByteString as the synchronization markers.
encodeContainerWithSync :: forall a. ToAvro a => (Word64,Word64,Word64,Word64) -> [[a]] -> BL.ByteString
encodeContainerWithSync = encodeContainerWithSync' nullCodec

-- | Encode chunks of objects into a container, using the provided
-- ByteString as the synchronization markers.
encodeContainerWithSync' :: forall a. ToAvro a => Codec -> (Word64,Word64,Word64,Word64) -> [[a]] -> BL.ByteString
encodeContainerWithSync' codec (a,b,c,d) =
  let
    sch = untag (schema :: Tagged a Schema)
    syncBytes = P.runPut $ mapM_ P.putWord64le [a,b,c,d]
  in E.encodeContainerWithSync codec sch syncBytes . map (map toAvro)

-- |Like 'decodeContainer' but returns the avro-encoded bytes for each
-- object in the container instead of the Haskell type.
--
-- This is particularly useful when slicing up containers into one or more
-- smaller files.  By extracting the original bytestring it is possible to
-- avoid re-encoding data.
decodeContainerBytes :: ByteString -> [[ByteString]]
decodeContainerBytes bs =
  case D.decodeContainerWith schemaBytes bs of
    Right (writerSchema, val) -> val
    Left e                    -> error $ "Could not decode container: " <> e
  where
  schemaBytes sch =
    do start <- G.bytesRead
       end   <- G.lookAhead $ do _ <- D.getAvroOf sch
                                 G.bytesRead
       G.getLazyByteString (end-start)

record :: Foldable f => Type -> f (Text,T.Value Type) -> T.Value Type
record ty = T.Record ty . HashMap.fromList . toList

fixed :: Type -> B.ByteString -> T.Value Type
fixed = T.Fixed
-- @enumToAvro val@ will generate an Avro encoded value of enum suitable
-- for serialization ('encode').
-- enumToAvro :: (Show a, Enum a, Bounded a, Generic a) => a -> T.Value Type
-- enumToAvro e = T.Enum ty (show e)
--  where
--   ty = S.Enum nm Nothing [] Nothing (map (Text.pack . show) [minBound..maxBound])
--   nm = datatypeName g
--   g  = from e -- GHC generics