{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
module Parquet.ThriftTypes where
import Data.ByteString
import qualified Data.Generics.Product.Fields as GL
import qualified Data.Generics.Product.Positions as GL
import Data.Int
import Data.Text
import GHC.Generics
import GHC.TypeLits (AppendSymbol, Symbol)
import Control.Lens
import Pinch
import Data.Binary (Binary)
instance Binary a => Binary (Field k a)
instance Binary (Enumeration k)
data StringType = StringType
deriving (Show, Eq, Generic, Binary)
instance Pinchable StringType where
type Tag StringType = TStruct
pinch _ = struct []
unpinch _ = pure StringType
data UUIDType = UUIDType
deriving (Show, Eq, Generic, Binary)
instance Pinchable UUIDType where
type Tag UUIDType = TStruct
pinch _ = struct []
unpinch _ = pure UUIDType
data MapType = MapType
deriving (Show, Eq, Generic, Binary)
instance Pinchable MapType where
type Tag MapType = TStruct
pinch _ = struct []
unpinch _ = pure MapType
data ListType = ListType
deriving (Show, Eq, Generic, Binary)
instance Pinchable ListType where
type Tag ListType = TStruct
pinch _ = struct []
unpinch _ = pure ListType
data EnumType = EnumType
deriving (Show, Eq, Generic, Binary)
instance Pinchable EnumType where
type Tag EnumType = TStruct
pinch _ = struct []
unpinch _ = pure EnumType
data DateType = DateType
deriving (Show, Eq, Generic, Binary)
instance Pinchable DateType where
type Tag DateType = TStruct
pinch _ = struct []
unpinch _ = pure DateType
type family TypeName a :: Symbol where
TypeName (M1 D ('MetaData name _ _ _) f ()) = name
TypeName a = TypeName (Rep a ())
pinchPos
:: forall pos s t a1 b1 a2 b2
. (GL.HasPosition 1 a1 b1 a2 b2, GL.HasPosition pos s t a1 b1)
=> Lens s t a2 b2
pinchPos = GL.position @pos . GL.position @1
pinchField
:: forall field s i r field_name
. ( field_name ~ ("_" `AppendSymbol` TypeName s `AppendSymbol` "_" `AppendSymbol` field)
, GL.HasPosition 1 i i r r
, GL.HasField field_name s s i i
)
=> Lens s s r r
pinchField = GL.field @field_name . GL.position @1
data DecimalType = DecimalType
{ _DecimalType_scale :: Field 1 Int32
, _DecimalType_precision :: Field 2 Int32
} deriving (Show, Eq, Generic, Pinchable, Binary)
data TimestampType = TimestampType
{ _TimestampType_isAdjustedToUTC :: Field 1 Bool
, _TimestampType_unit :: Field 2 TimeUnit
} deriving (Show, Eq, Generic, Pinchable, Binary)
data TimeType = TimeType
{ _TimeType_isAdjustedToUTC :: Field 1 Bool
, _TimeType_unit :: Field 2 TimeUnit
} deriving (Show, Eq, Generic, Pinchable, Binary)
data MilliSeconds = MilliSeconds
deriving (Show, Eq, Generic, Binary)
instance Pinchable MilliSeconds where
type Tag MilliSeconds = TStruct
pinch _ = struct []
unpinch _ = pure MilliSeconds
data MicroSeconds = MicroSeconds
deriving (Show, Eq, Generic, Binary)
instance Pinchable MicroSeconds where
type Tag MicroSeconds = TStruct
pinch _ = struct []
unpinch _ = pure MicroSeconds
data NanoSeconds = NanoSeconds
deriving (Show, Eq, Generic, Binary)
instance Pinchable NanoSeconds where
type Tag NanoSeconds = TStruct
pinch _ = struct []
unpinch _ = pure NanoSeconds
data TimeUnit
= TimeUnitMILLIS (Field 1 MilliSeconds)
| TimeUnitMICROS (Field 2 MicroSeconds)
| TimeUnitNANOS (Field 3 NanoSeconds)
deriving (Show, Eq, Generic, Binary)
instance Pinchable TimeUnit
data IntType = IntType
{ _IntType_bitWidth :: Field 1 Int8
, _IntType_isSigned :: Field 2 Bool
} deriving (Show, Eq, Generic, Pinchable, Binary)
data NullType = NullType
deriving (Show, Eq, Generic, Binary)
instance Pinchable NullType where
type Tag NullType = TStruct
pinch _ = struct []
unpinch _ = pure NullType
data JsonType = JsonType
deriving (Show, Eq, Generic, Binary)
instance Pinchable JsonType where
type Tag JsonType = TStruct
pinch _ = struct []
unpinch _ = pure JsonType
data BsonType = BsonType
deriving (Show, Eq, Generic, Binary)
instance Pinchable BsonType where
type Tag BsonType = TStruct
pinch _ = struct []
unpinch _ = pure BsonType
data LogicalType
= LogicalTypeSTRING (Field 1 StringType)
| LogicalTypeMAP (Field 2 MapType)
| LogicalTypeLIST (Field 3 ListType)
| LogicalTypeENUM (Field 4 EnumType)
| LogicalTypeDECIMAL (Field 5 DecimalType)
| LogicalTypeDATE (Field 6 DateType)
| LogicalTypeTIME (Field 7 TimeType)
| LogicalTypeTIMESTAMP (Field 8 TimestampType)
| LogicalTypeINTEGER (Field 10 IntType)
| LogicalTypeUNKNOWN (Field 11 NullType)
| LogicalTypeJSON (Field 12 JsonType)
| LogicalTypeBSON (Field 13 BsonType)
| LogicalTypeUUID (Field 14 UUIDType)
deriving (Show, Eq, Generic, Pinchable, Binary)
data ConvertedType
= UTF8 (Enumeration 0)
| MAP (Enumeration 1)
| MAP_KEY_VALUE (Enumeration 2)
| LIST (Enumeration 3)
| DECIMAL (Enumeration 5)
| DATE (Enumeration 6)
| TIME_MILLIS (Enumeration 7)
| TIME_MICROS (Enumeration 8)
| TIMESTAMP_MILLIS (Enumeration 9)
| TIMESTAMP_MICROS (Enumeration 10)
| UINT_8 (Enumeration 11)
| UINT_16 (Enumeration 12)
| UINT_32 (Enumeration 13)
| UINT_64 (Enumeration 14)
| INT_8 (Enumeration 15)
| INT_16 (Enumeration 16)
| INT_32 (Enumeration 17)
| INT_64 (Enumeration 18)
| JSON (Enumeration 19)
| BSON (Enumeration 20)
| INTERVAL (Enumeration 21)
deriving (Show, Eq, Generic, Pinchable, Binary)
data Type
= BOOLEAN (Enumeration 0)
| INT32 (Enumeration 1)
| INT64 (Enumeration 2)
| INT96 (Enumeration 3)
| FLOAT (Enumeration 4)
| DOUBLE (Enumeration 5)
| BYTE_ARRAY (Enumeration 6)
| FIXED_LEN_BYTE_ARRAY (Enumeration 7)
deriving (Show, Eq, Generic, Pinchable, Binary)
data FieldRepetitionType
= REQUIRED (Enumeration 0)
| OPTIONAL (Enumeration 1)
| REPEATED (Enumeration 2)
deriving (Show, Eq, Generic, Pinchable, Binary)
data SchemaElement = SchemaElement
{ _SchemaElement_type :: Field 1 (Maybe Type)
, _SchemaElement_type_length :: Field 2 (Maybe Int32)
, _SchemaElement_repetition_type :: Field 3 (Maybe FieldRepetitionType)
, _SchemaElement_name :: Field 4 Text
, _SchemaElement_num_children :: Field 5 (Maybe Int32)
, _SchemaElement_converted_type :: Field 6 (Maybe ConvertedType)
, _SchemaElement_scale :: Field 7 (Maybe Int32)
, _SchemaElement_precision :: Field 8 (Maybe Int32)
, _SchemaElement_field_id :: Field 9 (Maybe Int32)
, _SchemaElement_logicalType :: Field 10 (Maybe LogicalType)
}
deriving (Show, Eq, Generic, Pinchable, Binary)
data Encoding
= PLAIN (Enumeration 0)
| PLAIN_DICTIONARY (Enumeration 2)
| RLE (Enumeration 3)
| BIT_PACKED (Enumeration 4)
| DELTA_BINARY_PACKED (Enumeration 5)
| DELTA_LENGTH_BYTE_ARRAY (Enumeration 6)
| DELTA_BYTE_ARRAY (Enumeration 7)
| RLE_DICTIONARY (Enumeration 8)
deriving (Show, Eq, Generic, Pinchable, Binary)
data CompressionCodec
= UNCOMPRESSED (Enumeration 0)
| SNAPPY (Enumeration 1)
| GZIP (Enumeration 2)
| LZO (Enumeration 3)
| BROTLI (Enumeration 4)
| LZ4 (Enumeration 5)
| ZSTD (Enumeration 6)
deriving (Show, Eq, Generic, Pinchable, Binary)
data Statistics = Statistics
{ _Statistics_max :: Field 1 (Maybe ByteString)
, _Statistics_min :: Field 2 (Maybe ByteString)
, _Statistics_null_count :: Field 3 (Maybe Int64)
, _Statistics_distinct_count :: Field 4 (Maybe Int64)
, _Statistics_max_value :: Field 5 (Maybe ByteString)
, _Statistics_min_value :: Field 6 (Maybe ByteString)
} deriving (Show, Eq, Generic, Pinchable, Binary)
data PageEncodingStats = PageEncodingStats
{ _PageEncodingStats_page_type :: Field 1 PageType
, _PageEncodingStats_encoding :: Field 2 Encoding
, _PageEncodingStats_count :: Field 3 Int32
} deriving (Show, Eq, Generic, Pinchable, Binary)
data PageType
= DATA_PAGE (Enumeration 0)
| INDEX_PAGE (Enumeration 1)
| DICTIONARY_PAGE (Enumeration 2)
| DATA_PAGE_V2 (Enumeration 3)
deriving (Show, Eq, Generic, Pinchable, Binary)
data SortingColumn = SortingColumn
{ _SortingColumn_column_idx :: Field 1 Int32
, _SortingColumn_descending :: Field 2 Bool
, _SortingColumn_nulls_first :: Field 3 Bool
} deriving (Show, Eq, Generic, Pinchable, Binary)
data AesGcmV1 = AesGcmV1
{ _AesGcmV1_aad_prefix :: Field 1 (Maybe ByteString)
, _AesGcmV1_aad_file_unique :: Field 2 (Maybe ByteString)
, _AesGcmV1_supply_aad_prefix :: Field 3 (Maybe Bool)
} deriving (Show, Eq, Generic, Pinchable, Binary)
data AesGcmCtrV1 = AesGcmCtrV1
{ _AesGcmCtrV1_aad_prefix :: Field 1 (Maybe ByteString)
, _AesGcmCtrV1_aad_file_unique :: Field 2 (Maybe ByteString)
, _AesGcmCtrV1_supply_aad_prefix :: Field 3 (Maybe Bool)
} deriving (Show, Eq, Generic, Pinchable, Binary)
data EncryptionAlgorithm
= EncryptionAlgorithm_AES_GCM_V1 (Field 1 AesGcmV1)
| EncryptionAlgorithm_AES_GCM_CTR_V1 (Field 2 AesGcmCtrV1)
deriving (Show, Eq, Generic, Pinchable, Binary)
data TypeDefinedOrder = TypeDefinedOrder
deriving (Show, Eq, Generic, Binary)
instance Pinchable TypeDefinedOrder where
type Tag TypeDefinedOrder = TStruct
pinch _ = struct []
unpinch _ = pure TypeDefinedOrder
data ColumnOrder
= ColumnOrder_TYPE_ORDER (Field 1 TypeDefinedOrder)
deriving (Show, Eq, Generic, Pinchable, Binary)
data EncryptionWithFooterKey = EncryptionWithFooterKey
deriving (Show, Eq, Generic, Binary)
instance Pinchable EncryptionWithFooterKey where
type Tag EncryptionWithFooterKey = TStruct
pinch _ = struct []
unpinch _ = pure EncryptionWithFooterKey
data EncryptionWithColumnKey = EncryptionWithColumnKey
{ _EncryptionWithColumnKey_path_in_schema :: Field 1 [Text]
, _EncryptionWithColumnKey_key_metadata :: Field 2 (Maybe ByteString)
} deriving (Show, Eq, Generic, Pinchable, Binary)
data ColumnCryptoMetaData
= ColumnCryptoMetaData_ENCRYPTION_WITH_FOOTER_KEY (Field 1 EncryptionWithFooterKey)
| ColumnCryptoMetaData_ENCRYPTION_WITH_COLUMN_KEY (Field 2 EncryptionWithColumnKey)
deriving (Show, Eq, Generic, Pinchable, Binary)
data KeyValue = KeyValue
{ _KeyValue_key :: Field 1 Text
, _KeyValue_value :: Field 2 (Maybe Text)
} deriving (Show, Eq, Generic, Pinchable, Binary)
data ColumnMetaData = ColumnMetaData
{ _ColumnMetaData_type :: Field 1 Type
, _ColumnMetaData_encodings :: Field 2 [Encoding]
, _ColumnMetaData_path_in_schema :: Field 3 [Text]
, _ColumnMetaData_codec :: Field 4 CompressionCodec
, _ColumnMetaData_num_values :: Field 5 Int64
, _ColumnMetaData_total_uncompressed_size :: Field 6 Int64
, _ColumnMetaData_total_compressed_size :: Field 7 Int64
, _ColumnMetaData_key_value_metadata :: Field 8 (Maybe [KeyValue])
, _ColumnMetaData_data_page_offset :: Field 9 Int64
, _ColumnMetaData_index_page_offset :: Field 10 (Maybe Int64)
, _ColumnMetaData_dictionary_page_offset :: Field 11 (Maybe Int64)
, _ColumnMetaData_statistics :: Field 12 (Maybe Statistics)
, _ColumnMetaData_encoding_stats :: Field 13 (Maybe [PageEncodingStats])
, _ColumnMetaData_bloom_filter_offset :: Field 14 (Maybe Int64)
} deriving (Show, Eq, Generic, Pinchable, Binary)
data ColumnChunk = ColumnChunk
{ _ColumnChunk_file_path :: Field 1 (Maybe Text)
, _ColumnChunk_file_offset :: Field 2 Int64
, _ColumnChunk_meta_data :: Field 3 (Maybe ColumnMetaData)
, _ColumnChunk_offset_index_offset :: Field 4 (Maybe Int64)
, _ColumnChunk_offset_index_length :: Field 5 (Maybe Int32)
, _ColumnChunk_column_index_offset :: Field 6 (Maybe Int64)
, _ColumnChunk_column_index_length :: Field 7 (Maybe Int32)
, _ColumnChunk_crypto_metadata :: Field 8 (Maybe ColumnCryptoMetaData)
, _ColumnChunk_encrypted_column_metadata :: Field 9 (Maybe ByteString)
} deriving (Show, Eq, Generic, Pinchable, Binary)
data RowGroup = RowGroup
{ _RowGroup_column_chunks :: Field 1 [ColumnChunk]
, _RowGroup_total_byte_size :: Field 2 Int64
, _RowGroup_num_rows :: Field 3 Int64
, _RowGroup_sorting_columns :: Field 4 (Maybe [SortingColumn])
, _RowGroup_file_offset :: Field 5 (Maybe Int64)
, _RowGroup_total_compressed_size :: Field 6 (Maybe Int64)
, _RowGroup_ordinal :: Field 7 (Maybe Int16)
} deriving (Show, Eq, Generic, Binary, Pinchable)
data FileMetadata = FileMetadata
{ _FileMetadata_version :: Field 1 Int32
, _FileMetadata_schema :: Field 2 [SchemaElement]
, _FileMetadata_num_rows :: Field 3 Int64
, _FileMetadata_row_groups :: Field 4 [RowGroup]
, _FileMetadata_key_value_metadata :: Field 5 (Maybe [KeyValue])
, _FileMetadata_created_by :: Field 6 (Maybe Text)
, _FileMetadata_column_orders :: Field 7 (Maybe [ColumnOrder])
, _FileMetadata_encryption_algorithm :: Field 8 (Maybe EncryptionAlgorithm)
, _FileMetadata_footer_signing_key_metadata :: Field 9 (Maybe ByteString)
} deriving (Show, Eq, Generic, Pinchable, Binary)
data PageHeader = PageHeader
{ _PageHeader_type :: Field 1 PageType
, _PageHeader_uncompressed_page_size :: Field 2 Int32
, _PageHeader_compressed_page_size :: Field 3 Int32
, _PageHeader_crc :: Field 4 (Maybe Int32)
, _PageHeader_data_page_header :: Field 5 (Maybe DataPageHeader)
, _PageHeader_index_page_header :: Field 6 (Maybe IndexPageHeader)
, _PageHeader_dictionary_page_header :: Field 7 (Maybe DictionaryPageHeader)
, _PageHeader_data_page_header_v2 :: Field 8 (Maybe DataPageHeaderV2)
} deriving (Show, Eq, Generic, Pinchable, Binary)
data IndexPageHeader = IndexPageHeader
deriving (Show, Eq, Generic, Binary)
instance Pinchable IndexPageHeader where
type Tag IndexPageHeader = TStruct
pinch _ = struct []
unpinch _ = pure IndexPageHeader
data DataPageHeader = DataPageHeader
{ _DataPageHeader_num_values :: Field 1 Int32
, _DataPageHeader_encoding :: Field 2 Encoding
, _DataPageHeader_definition_level_encoding :: Field 3 Encoding
, _DataPageHeader_repetition_level_encoding :: Field 4 Encoding
, _DataPageHeader_statistics :: Field 5 (Maybe Statistics)
} deriving (Show, Eq, Generic, Pinchable, Binary)
data DictionaryPageHeader = DictionaryPageHeader
{ _DictionaryPageHeader_num_values :: Field 1 Int32
, _DictionaryPageHeader_encoding :: Field 2 Encoding
, _DictionaryPageHeader_is_sorted :: Field 3 (Maybe Bool)
} deriving (Show, Eq, Generic, Pinchable, Binary)
data DataPageHeaderV2 = DataPageHeaderV2
{ _DataPageHeaderV2_num_values :: Field 1 Int32
, _DataPageHeaderV2_num_nulls :: Field 2 Int32
, _DataPageHeaderV2_num_rows :: Field 3 Int32
, _DataPageHeaderV2_encoding :: Field 4 Encoding
, _DataPageHeaderV2_definition_levels_byte_length :: Field 5 Int32
, _DataPageHeaderV2_repetition_levels_byte_length :: Field 6 Int32
, _DataPageHeaderV2_is_compressed :: Field 7 (Maybe Bool)
, _DataPageHeaderV2_statistics :: Field 8 (Maybe Statistics)
} deriving (Show, Eq, Generic, Pinchable, Binary)
unField :: Field n a -> a
unField (Field a) = a