{-# LANGUAGE OverloadedStrings   #-}
{-# LANGUAGE ScopedTypeVariables #-}
-- | Avro supports a JSON representation of Avro objects alongside the
-- Avro binary format. An Avro schema can be used to generate and
-- validate JSON representations of Avro objects.
--
-- The JSON format is the same format as used for default values in
-- schemas except unions are encoded differently. Non-union values are
-- encoded as follows:
--
-- +--------------+----------+----------+
-- |Avro Type     |JSON Type |Example   |
-- +==============+==========+==========+
-- |null          |null      |null      |
-- +--------------+----------+----------+
-- |boolean       |boolean   |true      |
-- +--------------+----------+----------+
-- |int, long     |integer   |1         |
-- +--------------+----------+----------+
-- |float, double |number    |1.1       |
-- +--------------+----------+----------+
-- |bytes         |string    |"\u00FF"  |
-- +--------------+----------+----------+
-- |string        |string    |"foo"     |
-- +--------------+----------+----------+
-- |record        |object    |{"a":1}   |
-- +--------------+----------+----------+
-- |enum          |string    |"FOO"     |
-- +--------------+----------+----------+
-- |array         |array     |[1]       |
-- +--------------+----------+----------+
-- |map           |object    |{"a":1}   |
-- +--------------+----------+----------+
-- |fixed         |string    |"\u00FF"  |
-- +--------------+----------+----------+
--
-- (Table from the Avro 1.8.2 specification:
-- <https://avro.apache.org/docs/1.8.2/spec.html#schema_record>)
--
-- Bytes and fixed are encoded as JSON strings where each byte is
-- translated into the corresponding Unicode codepoint between 0–255,
-- which includes non-printable characters. Note that this encoding
-- happens at the Unicode code-point level, meaning it is independent
-- of text encoding. (JSON is, by definition, encoded in UTF8.)
--
-- Unions are encoded as an object with a single field that specifies
-- the "branch" of the union. If the branch is a primitive type like
-- @"string"@, the name of the primitive type is used:
--
-- @
-- { "string" : "foo" }
-- @
--
-- For named types (record, enum and fixed), the name of the type is
-- used:
--
-- @
-- { "MyRecord" : { ... } }
-- @
module Data.Avro.JSON where

import qualified Data.Aeson           as Aeson
import qualified Data.Aeson.Key       as K
import qualified Data.Aeson.KeyMap    as KM
import           Data.ByteString.Lazy (ByteString)
import qualified Data.Foldable        as Foldable
import           Data.HashMap.Strict  ((!))
import qualified Data.HashMap.Strict  as HashMap
import           Data.List.NonEmpty   (NonEmpty (..))
import qualified Data.List.NonEmpty   as NE
import           Data.Tagged
import qualified Data.Text            as Text

import qualified Data.Avro.HasAvroSchema as Schema
import           Data.Avro.Schema.Schema (DefaultValue (..), Result (..), Schema, parseAvroJSON)
import qualified Data.Avro.Schema.Schema as Schema
import qualified Data.Vector             as V

decodeAvroJSON :: Schema -> Aeson.Value -> Result DefaultValue
decodeAvroJSON :: Schema -> Value -> Result DefaultValue
decodeAvroJSON Schema
schema Value
json =
  (Schema -> Value -> Result DefaultValue)
-> (TypeName -> Maybe Schema)
-> Schema
-> Value
-> Result DefaultValue
parseAvroJSON Schema -> Value -> Result DefaultValue
union TypeName -> Maybe Schema
env Schema
schema Value
json
  where
    env :: TypeName -> Maybe Schema
env =
      (TypeName -> Maybe Schema) -> Schema -> TypeName -> Maybe Schema
forall (m :: * -> *).
Applicative m =>
(TypeName -> m Schema) -> Schema -> TypeName -> m Schema
Schema.buildTypeEnvironment TypeName -> Maybe Schema
forall (m :: * -> *) a a. (MonadFail m, Show a) => a -> m a
missing Schema
schema
    missing :: a -> m a
missing a
name =
      String -> m a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String
"Type " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> a -> String
forall a. Show a => a -> String
show a
name String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" not in schema")

    union :: Schema -> Value -> Result DefaultValue
union (Schema.Union Vector Schema
schemas) Value
Aeson.Null
      | Schema
Schema.Null Schema -> Vector Schema -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` Vector Schema
schemas =
          DefaultValue -> Result DefaultValue
forall (f :: * -> *) a. Applicative f => a -> f a
pure (DefaultValue -> Result DefaultValue)
-> DefaultValue -> Result DefaultValue
forall a b. (a -> b) -> a -> b
$ Vector Schema -> Schema -> DefaultValue -> DefaultValue
Schema.DUnion Vector Schema
schemas Schema
Schema.Null DefaultValue
Schema.DNull
      | Bool
otherwise                  =
          String -> Result DefaultValue
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Null not in union."
    union (Schema.Union Vector Schema
schemas) (Aeson.Object Object
obj)
      | Object -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null Object
obj =
          String -> Result DefaultValue
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Invalid encoding of union: empty object ({})."
      | Object -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Object
obj Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
1 =
          String -> Result DefaultValue
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Invalid encoding of union: object with too many fields."
      | Bool
otherwise      =
          let
            canonicalize :: Text -> Text
canonicalize Text
name
              | Text -> Bool
forall a. (Eq a, IsString a) => a -> Bool
isBuiltIn Text
name = Text
name
              | Bool
otherwise      = TypeName -> Text
Schema.renderFullname (TypeName -> Text) -> TypeName -> Text
forall a b. (a -> b) -> a -> b
$ Text -> TypeName
Schema.parseFullname Text
name
            branch :: Text
branch =
              Key -> Text
K.toText (Key -> Text) -> Key -> Text
forall a b. (a -> b) -> a -> b
$ [Key] -> Key
forall a. [a] -> a
head (Object -> [Key]
forall v. KeyMap v -> [Key]
KM.keys Object
obj)
            names :: HashMap Text Schema
names =
              [(Text, Schema)] -> HashMap Text Schema
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HashMap.fromList [(Schema -> Text
Schema.typeName Schema
t, Schema
t) | Schema
t <- Vector Schema -> [Schema]
forall (t :: * -> *) a. Foldable t => t a -> [a]
Foldable.toList Vector Schema
schemas]
          in case Text -> HashMap Text Schema -> Maybe Schema
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup (Text -> Text
canonicalize Text
branch) HashMap Text Schema
names of
            Just Schema
t  -> do
              DefaultValue
nested <- (Schema -> Value -> Result DefaultValue)
-> (TypeName -> Maybe Schema)
-> Schema
-> Value
-> Result DefaultValue
parseAvroJSON Schema -> Value -> Result DefaultValue
union TypeName -> Maybe Schema
env Schema
t (Value -> Result DefaultValue) -> Value -> Result DefaultValue
forall a b. (a -> b) -> a -> b
$ case Key -> Object -> Maybe Value
forall v. Key -> KeyMap v -> Maybe v
KM.lookup (Text -> Key
K.fromText Text
branch) Object
obj of
                Just Value
val -> Value
val
                Maybe Value
Nothing -> String -> Value
forall a. HasCallStack => String -> a
error String
"impossible"
              DefaultValue -> Result DefaultValue
forall (m :: * -> *) a. Monad m => a -> m a
return (Vector Schema -> Schema -> DefaultValue -> DefaultValue
Schema.DUnion Vector Schema
schemas Schema
t DefaultValue
nested)
            Maybe Schema
Nothing -> String -> Result DefaultValue
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String
"Type '" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Text -> String
Text.unpack Text
branch String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"' not in union: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Vector Schema -> String
forall a. Show a => a -> String
show Vector Schema
schemas)
    union Schema.Union{} Value
_ =
      String -> Result DefaultValue
forall a. String -> Result a
Schema.Error String
"Invalid JSON representation for union: has to be a JSON object with exactly one field."
    union Schema
_ Value
_ =
      String -> Result DefaultValue
forall a. HasCallStack => String -> a
error String
"Impossible: function given non-union schema."

    isBuiltIn :: a -> Bool
isBuiltIn a
name = a
name a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [ a
"null", a
"boolean", a
"int", a
"long", a
"float"
                                 , a
"double", a
"bytes", a
"string", a
"array", a
"map" ]

-- -- | Convert a 'Aeson.Value' into a type that has an Avro schema. The
-- -- schema is used to validate the JSON and will return an 'Error' if
-- -- the JSON object is not encoded correctly or does not match the schema.
-- fromJSON :: forall a. (FromAvro a) => Aeson.Value -> Result a
-- fromJSON json = do
--   value <- decodeAvroJSON schema json
--   fromAvro value
--   where
--     schema = untag (Schema.schema :: Tagged a Schema)

-- -- | Parse a 'ByteString' as JSON and convert it to a type with an
-- -- Avro schema. Will return 'Error' if the input is not valid JSON or
-- -- the JSON does not convert with the specified schema.
-- parseJSON :: forall a. (FromAvro a) => ByteString -> Result a
-- parseJSON input = case Aeson.eitherDecode input of
--   Left msg    -> Error msg
--   Right value -> fromJSON value

-- -- | Convert an object with an Avro schema to JSON using that schema.
-- --
-- -- We always need the schema to /encode/ to JSON because representing
-- -- unions requires using the names of named types.
-- toJSON :: forall a. (ToAvro a) => a -> Aeson.Value
-- toJSON = Aeson.toJSON . toAvro