{-# LANGUAGE BangPatterns #-}

{- | Builders for encoding data with Apache Avro. Most functions in this
module are just aliases for other functions. Avro uses zig-zag LEB128
for all integral types.
-}
module Data.Bytes.Builder.Avro
  ( int
  , int32
  , int64
  , word16
  , word32
  , word128
  , bytes
  , chunks
  , text

    -- * Maps
  , map2
  ) where

import Data.Bytes (Bytes)
import Data.Bytes.Builder (Builder)
import Data.Bytes.Chunks (Chunks)
import Data.Int
import Data.Text (Text)
import Data.WideWord (Word128)
import Data.Word

import qualified Data.Bytes as Bytes
import qualified Data.Bytes.Builder as B
import qualified Data.Bytes.Chunks as Chunks
import qualified Data.Bytes.Text.Utf8 as Utf8

int32 :: Int32 -> Builder
int32 :: Int32 -> Builder
int32 = Int32 -> Builder
B.int32LEB128

int64 :: Int64 -> Builder
int64 :: Int64 -> Builder
int64 = Int64 -> Builder
B.int64LEB128

int :: Int -> Builder
int :: Int -> Builder
int = Int -> Builder
B.intLEB128

{- | Note: This results in a zigzag encoded number. Avro does not have
unsigned types.
-}
word16 :: Word16 -> Builder
word16 :: Word16 -> Builder
word16 = Int32 -> Builder
B.int32LEB128 (Int32 -> Builder) -> (Word16 -> Int32) -> Word16 -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word16 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral

{- | Note: This results in a zigzag encoded number. Avro does not have
unsigned types.
-}
word32 :: Word32 -> Builder
word32 :: Word32 -> Builder
word32 = Int64 -> Builder
B.int64LEB128 (Int64 -> Builder) -> (Word32 -> Int64) -> Word32 -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word32 -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral

{- | Note: This results in a @fixed@ encoded value of length 16. In the
schema, the type must be @{"type": "fixed", "name": "...", "size": 16}@.
A big-endian encoding is used.
-}
word128 :: Word128 -> Builder
word128 :: Word128 -> Builder
word128 = Word128 -> Builder
B.word128BE

bytes :: Bytes -> Builder
bytes :: Bytes -> Builder
bytes !Bytes
b = Int -> Builder
int (Bytes -> Int
Bytes.length Bytes
b) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Bytes -> Builder
B.bytes Bytes
b

chunks :: Chunks -> Builder
chunks :: Chunks -> Builder
chunks !Chunks
b = Int -> Builder
int (Chunks -> Int
Chunks.length Chunks
b) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Chunks -> Builder
B.chunks Chunks
b

text :: Text -> Builder
text :: Text -> Builder
text = Bytes -> Builder
bytes (Bytes -> Builder) -> (Text -> Bytes) -> Text -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bytes
Utf8.fromText

{- | Encode a map with exactly two key-value pairs. The keys are text.
This is commonly used to encode the header in an avro file, which has
a map with two keys: @avro.schema@ and @avro.codec@.
-}
map2 ::
  -- | First key
  Text ->
  -- | First value (already encoded)
  Builder ->
  -- | Second key
  Text ->
  -- | Second value (already encoded)
  Builder ->
  Builder
{-# INLINE map2 #-}
map2 :: Text -> Builder -> Text -> Builder -> Builder
map2 Text
k1 Builder
v1 Text
k2 Builder
v2 = Word8 -> Builder
B.word8 Word8
0x04 Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Text -> Builder
text Text
k1 Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
v1 Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Text -> Builder
text Text
k2 Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
v2 Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Word8 -> Builder
B.word8 Word8
0x00