{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module Data.Avro.Schema
(
Schema, Type(..)
, Field(..), Order(..)
, TypeName(..)
, mkEnum, mkUnion
, validateSchema
, typeName
, buildTypeEnvironment
, Result(..)
) where
import Prelude as P
import Control.Applicative
import Control.Monad.Except
import Control.Monad.State.Strict
import qualified Control.Monad.Fail as MF
import qualified Data.Aeson as A
import Data.Aeson ((.=),object,(.:?),(.:),(.!=),FromJSON(..),ToJSON(..))
import Data.Aeson.Types (Parser,typeMismatch)
import qualified Data.ByteString.Base16 as Base16
import qualified Data.HashMap.Strict as HashMap
import Data.Hashable
import qualified Data.List as L
import Data.List.NonEmpty (NonEmpty(..))
import qualified Data.List.NonEmpty as NE
import Data.Maybe (catMaybes, fromMaybe)
import Data.Monoid ((<>), First(..))
import qualified Data.Set as S
import Data.String
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Encoding as T
import qualified Data.Vector as V
import qualified Data.Avro.Types as Ty
import qualified Data.IntMap as IM
import Data.Int
import Text.Show.Functions
type Schema = Type
data Type
=
Null
| Boolean
| Int | Long
| Float | Double
| Bytes | String
| Array { item :: Type }
| Map { values :: Type }
| NamedType TypeName
| Record { name :: TypeName
, namespace :: Maybe Text
, aliases :: [TypeName]
, doc :: Maybe Text
, order :: Maybe Order
, fields :: [Field]
}
| Enum { name :: TypeName
, namespace :: Maybe Text
, aliases :: [TypeName]
, doc :: Maybe Text
, symbols :: [Text]
, symbolLookup :: Int64 -> Maybe Text
}
| Union { options :: NonEmpty Type
, unionLookup :: Int64 -> Maybe Type
}
| Fixed { name :: TypeName
, namespace :: Maybe Text
, aliases :: [TypeName]
, size :: Int
}
deriving (Show)
instance Eq Type where
Null == Null = True
Boolean == Boolean = True
Int == Int = True
Long == Long = True
Float == Float = True
Double == Double = True
Bytes == Bytes = True
String == String = True
Array ty == Array ty2 = ty == ty2
Map ty == Map ty2 = ty == ty2
NamedType t == NamedType t2 = t == t2
Record _ _ _ _ _ fs == Record _ _ _ _ _ fs2 = fs == fs2
Enum _ _ _ _ s _ == Enum _ _ _ _ s2 _ = s == s2
Union a _ == Union b _ = a == b
Fixed _ _ _ s == Fixed _ _ _ s2 = s == s2
_ == _ = False
mkEnum :: TypeName -> [TypeName] -> Maybe Text -> Maybe Text -> [Text] -> Type
mkEnum n as ns d ss = Enum n ns as d ss (\i -> IM.lookup (fromIntegral i) mp)
where
mp = IM.fromList (zip [0..] ss)
mkUnion :: NonEmpty Type -> Type
mkUnion os = Union os (\i -> IM.lookup (fromIntegral i) mp)
where mp = IM.fromList (zip [0..] $ NE.toList os)
newtype TypeName = TN { unTN :: T.Text }
deriving (Eq, Ord)
instance Show TypeName where
show (TN s) = show s
instance Monoid TypeName where
mempty = TN mempty
mappend (TN a) (TN b) = TN (a <> b)
instance IsString TypeName where
fromString = TN . fromString
instance Hashable TypeName where
hashWithSalt s (TN t) = hashWithSalt (hashWithSalt s ("AvroTypeName" :: Text)) t
typeName :: Type -> Text
typeName bt =
case bt of
Null -> "null"
Boolean -> "boolean"
Int -> "int"
Long -> "long"
Float -> "float"
Double -> "double"
Bytes -> "bytes"
String -> "string"
Array _ -> "array"
Map _ -> "map"
NamedType (TN t) -> t
Union (x:|_) _ -> typeName x
_ -> unTN $ name bt
data Field = Field { fldName :: Text
, fldAliases :: [Text]
, fldDoc :: Maybe Text
, fldOrder :: Maybe Order
, fldType :: Type
, fldDefault :: Maybe (Ty.Value Type)
}
deriving (Eq, Show)
data Order = Ascending | Descending | Ignore
deriving (Eq, Ord, Show)
instance FromJSON Type where
parseJSON (A.String s) =
case s of
"null" -> return Null
"boolean" -> return Boolean
"int" -> return Int
"long" -> return Long
"float" -> return Float
"double" -> return Double
"bytes" -> return Bytes
"string" -> return String
somename -> return (NamedType (TN somename))
parseJSON (A.Object o) =
do ty <- o .: ("type" :: Text)
case ty of
"map" -> Map <$> o .: ("values" :: Text)
"array" -> Array <$> o .: ("items" :: Text)
"record" ->
Record <$> o .: ("name" :: Text)
<*> o .:? ("namespace" :: Text)
<*> o .:? ("aliases" :: Text) .!= []
<*> o .:? ("doc" :: Text)
<*> o .:? ("order" :: Text) .!= Just Ascending
<*> o .: ("fields" :: Text)
"enum" ->
mkEnum <$> o .: ("name" :: Text)
<*> o .:? ("aliases" :: Text) .!= []
<*> o .:? ("namespace" :: Text)
<*> o .:? ("doc" :: Text)
<*> o .: ("symbols" :: Text)
"fixed" ->
Fixed <$> o .: ("name" :: Text)
<*> o .:? ("namespace" :: Text)
<*> o .:? ("aliases" :: Text) .!= []
<*> o .: ("size" :: Text)
s -> fail $ "Unrecognized object type: " <> s
parseJSON (A.Array arr) | V.length arr > 0 =
mkUnion . NE.fromList <$> mapM parseJSON (V.toList arr)
parseJSON foo = typeMismatch "Invalid JSON for Avro Schema" foo
instance ToJSON Type where
toJSON bt =
case bt of
Null -> A.String "null"
Boolean -> A.String "boolean"
Int -> A.String "int"
Long -> A.String "long"
Float -> A.String "float"
Double -> A.String "double"
Bytes -> A.String "bytes"
String -> A.String "string"
Array tn -> object [ "type" .= ("array" :: Text), "items" .= tn ]
Map tn -> object [ "type" .= ("map" :: Text), "values" .= tn ]
NamedType (TN tn) -> A.String tn
Record {..} ->
let opts = catMaybes
[ ("order" .=) <$> order
, ("namespace" .=) <$> namespace
, ("doc" .=) <$> doc
]
in object $ opts ++
[ "type" .= ("record" :: Text)
, "name" .= name
, "aliases" .= aliases
, "fields" .= fields
]
Enum {..} ->
let opts = catMaybes
[ ("namespace" .=) <$> namespace
, ("doc" .=) <$> doc
]
in object $ opts ++
[ "type" .= ("enum" :: Text)
, "name" .= name
, "aliases" .= aliases
, "symbols" .= symbols
]
Union {..} -> A.Array $ V.fromList $ P.map toJSON (NE.toList options)
Fixed {..} ->
let opts = catMaybes
[ ("namespace" .=) <$> namespace ]
in object $ opts ++
[ "type" .= ("fixed" :: Text)
, "name" .= name
, "aliases" .= aliases
, "size" .= size
]
instance ToJSON TypeName where
toJSON (TN t) = A.String t
instance FromJSON TypeName where
parseJSON (A.String s) = return (TN s)
parseJSON j = typeMismatch "TypeName" j
instance FromJSON Field where
parseJSON (A.Object o) =
do nm <- o .: "name"
doc <- o .:? "doc"
ty <- o .: "type"
let err = fail "Haskell Avro bindings does not support default for aliased or recursive types at this time."
defM <- o .:? "default"
def <- case parseAvroJSON err ty <$> defM of
Just (Success x) -> return (Just x)
Just (Error e) -> fail e
Nothing -> return Nothing
od <- o .:? ("order" :: Text) .!= Just Ascending
al <- o .:? ("aliases" :: Text) .!= []
return $ Field nm al doc od ty def
parseJSON j = typeMismatch "Field " j
instance ToJSON Field where
toJSON Field {..} =
let opts = catMaybes
[ ("order" .=) <$> fldOrder
, ("doc" .=) <$> fldDoc
, ("default" .=) <$> fldDefault
]
in object $ opts ++
[ "name" .= fldName
, "type" .= fldType
, "aliases" .= fldAliases
]
instance ToJSON (Ty.Value Type) where
toJSON av =
case av of
Ty.Null -> A.Null
Ty.Boolean b -> A.Bool b
Ty.Int i -> A.Number (fromIntegral i)
Ty.Long i -> A.Number (fromIntegral i)
Ty.Float f -> A.Number (realToFrac f)
Ty.Double d -> A.Number (realToFrac d)
Ty.Bytes bs -> A.String ("\\u" <> T.decodeUtf8 (Base16.encode bs))
Ty.String t -> A.String t
Ty.Array vec -> A.Array (V.map toJSON vec)
Ty.Map mp -> A.Object (HashMap.map toJSON mp)
Ty.Record _ flds -> A.Object (HashMap.map toJSON flds)
Ty.Union _ _ Ty.Null -> A.Null
Ty.Union _ ty val -> object [ typeName ty .= val ]
Ty.Fixed _ bs -> A.String ("\\u" <> T.decodeUtf8 (Base16.encode bs))
Ty.Enum _ _ txt -> A.String txt
data Result a = Success a | Error String
deriving (Eq,Ord,Show)
instance Monad Result where
return = pure
Success a >>= k = k a
Error e >>= _ = Error e
fail = MF.fail
instance Functor Result where
fmap f (Success x) = Success (f x)
fmap _ (Error e) = Error e
instance MF.MonadFail Result where
fail = Error
instance MonadError String Result where
throwError = fail
catchError a@(Success _) _ = a
catchError (Error e) k = k e
instance Applicative Result where
pure = Success
(<*>) = ap
instance Alternative Result where
empty = mzero
(<|>) = mplus
instance MonadPlus Result where
mzero = fail "mzero"
mplus a@(Success _) _ = a
mplus _ b = b
instance Monoid (Result a) where
mempty = fail "Empty Result"
mappend = mplus
instance Foldable Result where
foldMap _ (Error _) = mempty
foldMap f (Success y) = f y
foldr _ z (Error _) = z
foldr f z (Success y) = f y z
instance Traversable Result where
traverse _ (Error err) = pure (Error err)
traverse f (Success v) = Success <$> f v
parseAvroJSON :: (Text -> Maybe Type) -> Type -> A.Value -> Result (Ty.Value Type)
parseAvroJSON env (NamedType (TN tn)) av =
case env tn of
Nothing -> fail $ "Could not resolve type name for " <> show tn
Just t -> parseAvroJSON env t av
parseAvroJSON env ty av =
case av of
A.String s ->
case ty of
String -> return $ Ty.String s
Enum {..} ->
if s `elem` symbols
then return $ Ty.Enum ty (maybe (error "IMPOSSIBLE BUG") id $ lookup s (zip symbols [0..])) s
else fail $ "JSON string is not one of the expected symbols for enum '" <> show name <> "': " <> T.unpack s
Union tys _ -> do
f <- tryAllTypes env tys av
maybe (fail $ "No match for String in union '" <> show (typeName ty) <> "'.") pure f
_ -> avroTypeMismatch ty "string"
A.Bool b -> case ty of
Boolean -> return $ Ty.Boolean b
_ -> avroTypeMismatch ty "boolean"
A.Number i ->
case ty of
Int -> return $ Ty.Int (floor i)
Long -> return $ Ty.Long (floor i)
Float -> return $ Ty.Float (realToFrac i)
Double -> return $ Ty.Double (realToFrac i)
Union tys _ -> do
f <- tryAllTypes env tys av
maybe (fail $ "No match for Number in union '" <> show (typeName ty) <> "'.") pure f
_ -> avroTypeMismatch ty "number"
A.Array vec ->
case ty of
Array t -> Ty.Array <$> V.mapM (parseAvroJSON env t) vec
Union tys _ -> do
f <- tryAllTypes env tys av
maybe (fail $ "No match for Array in union '" <> show (typeName ty) <> "'.") pure f
_ -> avroTypeMismatch ty "array"
A.Object obj ->
case ty of
Map mTy -> Ty.Map <$> mapM (parseAvroJSON env mTy) obj
Record {..} ->
do let lkAndParse f =
case HashMap.lookup (fldName f) obj of
Nothing -> case fldDefault f of
Just v -> return v
Nothing -> fail $ "Decode failure: No record field '" <> T.unpack (fldName f) <> "' and no default in schema."
Just v -> parseAvroJSON env (fldType f) v
Ty.Record ty . HashMap.fromList <$> mapM (\f -> (fldName f,) <$> lkAndParse f) fields
Union tys _ -> do
f <- tryAllTypes env tys av
maybe (fail $ "No match for given record in union '" <> show (typeName ty) <> "'.") pure f
_ -> avroTypeMismatch ty "object"
A.Null -> case ty of
Null -> return Ty.Null
Union us _ | Null `elem` NE.toList us -> return $ Ty.Union us Null Ty.Null
_ -> avroTypeMismatch ty "null"
tryAllTypes :: (Text -> Maybe Type) -> NonEmpty Type -> A.Value -> Result (Maybe (Ty.Value Type))
tryAllTypes env tys av =
getFirst <$> foldMap (\t -> First . Just <$> parseAvroJSON env t av) (NE.toList tys)
`catchError` (\_ -> return mempty)
avroTypeMismatch :: Type -> Text -> Result a
avroTypeMismatch expected actual =
fail $ "Could not resolve type '" <> T.unpack actual <> "' with expected type: " <> show expected
instance ToJSON Order where
toJSON o =
case o of
Ascending -> A.String "ascending"
Descending -> A.String "descending"
Ignore -> A.String "ignore"
instance FromJSON Order where
parseJSON (A.String s) =
case s of
"ascending" -> return Ascending
"descending" -> return Descending
"ignore" -> return Ignore
_ -> fail $ "Unknown string for order: " <> T.unpack s
parseJSON j = typeMismatch "Order" j
validateSchema :: Schema -> Parser ()
validateSchema _sch = return ()
buildTypeEnvironment :: Applicative m => (TypeName -> m Type) -> Type -> TypeName -> m Type
buildTypeEnvironment failure from =
\forTy -> case HashMap.lookup forTy mp of
Nothing -> failure forTy
Just res -> pure res
where
mp = HashMap.fromList $ go from
go :: Type -> [(TypeName,Type)]
go ty =
let mk :: TypeName -> [TypeName] -> Maybe Text -> [(TypeName,Type)]
mk n as ns =
let unqual = n:as
qual = maybe [] (\x -> P.map (mappend (TN x <> ".")) unqual) ns
in zip (unqual ++ qual) (repeat ty)
in case ty of
Record {..} -> mk name aliases namespace ++ concatMap (go . fldType) fields
Enum {..} -> mk name aliases namespace
Union {..} -> concatMap go options
Fixed {..} -> mk name aliases namespace
Array {..} -> go item
_ -> []