Safe Haskell | Safe-Inferred |
---|---|
Language | Haskell2010 |
Hydra.Langs.Parquet.Format
Description
A model for the Parquet format. Based on the Thrift-based specification at: | https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift
Synopsis
- data Type
- _Type :: Name
- _Type_boolean :: Name
- _Type_int32 :: Name
- _Type_int64 :: Name
- _Type_float :: Name
- _Type_double :: Name
- _Type_byteArray :: Name
- _Type_fixedLenByteArray :: Name
- data FieldRepetitionType
- _FieldRepetitionType :: Name
- _FieldRepetitionType_required :: Name
- _FieldRepetitionType_optional :: Name
- _FieldRepetitionType_repeated :: Name
- data Statistics = Statistics {}
- _Statistics :: Name
- _Statistics_nullCount :: Name
- _Statistics_distinctCount :: Name
- _Statistics_maxValue :: Name
- _Statistics_minValue :: Name
- data DecimalType = DecimalType {}
- _DecimalType :: Name
- _DecimalType_scale :: Name
- _DecimalType_precision :: Name
- data TimeUnit
- _TimeUnit :: Name
- _TimeUnit_millis :: Name
- _TimeUnit_micros :: Name
- _TimeUnit_nanos :: Name
- data TimestampType = TimestampType {}
- _TimestampType :: Name
- _TimestampType_isAdjustedToUtc :: Name
- _TimestampType_unit :: Name
- data TimeType = TimeType {}
- _TimeType :: Name
- _TimeType_isAdjustedToUtc :: Name
- _TimeType_unit :: Name
- data IntType = IntType {}
- _IntType :: Name
- _IntType_bitWidth :: Name
- _IntType_isSigned :: Name
- data LogicalType
- _LogicalType :: Name
- _LogicalType_string :: Name
- _LogicalType_map :: Name
- _LogicalType_list :: Name
- _LogicalType_enum :: Name
- _LogicalType_decimal :: Name
- _LogicalType_date :: Name
- _LogicalType_time :: Name
- _LogicalType_timestamp :: Name
- _LogicalType_integer :: Name
- _LogicalType_unknown :: Name
- _LogicalType_json :: Name
- _LogicalType_bson :: Name
- _LogicalType_uuid :: Name
- data SchemaElement = SchemaElement {}
- _SchemaElement :: Name
- _SchemaElement_type :: Name
- _SchemaElement_typeLength :: Name
- _SchemaElement_repetitionType :: Name
- _SchemaElement_name :: Name
- _SchemaElement_numChildren :: Name
- _SchemaElement_fieldId :: Name
- _SchemaElement_logicalType :: Name
- data Encoding
- _Encoding :: Name
- _Encoding_plain :: Name
- _Encoding_rle :: Name
- _Encoding_bitPacked :: Name
- _Encoding_deltaBinaryPacked :: Name
- _Encoding_deltaLengthByteArray :: Name
- _Encoding_deltaByteArray :: Name
- _Encoding_rleDictionary :: Name
- _Encoding_byteStreamSplit :: Name
- data CompressionCodec
- _CompressionCodec :: Name
- _CompressionCodec_uncompressed :: Name
- _CompressionCodec_snappy :: Name
- _CompressionCodec_gzip :: Name
- _CompressionCodec_lzo :: Name
- _CompressionCodec_brotli :: Name
- _CompressionCodec_zstd :: Name
- _CompressionCodec_lz4Raw :: Name
- data PageType
- _PageType :: Name
- _PageType_dataPage :: Name
- _PageType_indexPage :: Name
- _PageType_dictionaryPage :: Name
- _PageType_dataPageV2 :: Name
- data BoundaryOrder
- _BoundaryOrder :: Name
- _BoundaryOrder_unordered :: Name
- _BoundaryOrder_ascending :: Name
- _BoundaryOrder_descending :: Name
- data DataPageHeader = DataPageHeader {}
- _DataPageHeader :: Name
- _DataPageHeader_numValues :: Name
- _DataPageHeader_encoding :: Name
- _DataPageHeader_definitionLevelEncoding :: Name
- _DataPageHeader_repetitionLevelEncoding :: Name
- _DataPageHeader_statistics :: Name
- data IndexPageHeader = IndexPageHeader {
- _IndexPageHeader :: Name
- data DictionaryPageHeader = DictionaryPageHeader {}
- _DictionaryPageHeader :: Name
- _DictionaryPageHeader_numValues :: Name
- _DictionaryPageHeader_encoding :: Name
- _DictionaryPageHeader_isSorted :: Name
- data DataPageHeaderV2 = DataPageHeaderV2 {
- dataPageHeaderV2NumValues :: Int
- dataPageHeaderV2NumNulls :: Int
- dataPageHeaderV2NumRows :: Int
- dataPageHeaderV2Encoding :: Encoding
- dataPageHeaderV2DefinitionLevelsByteLength :: Int
- dataPageHeaderV2RepetitionLevelsByteLength :: Int
- dataPageHeaderV2IsCompressed :: Maybe Bool
- dataPageHeaderV2Statistics :: Maybe Statistics
- _DataPageHeaderV2 :: Name
- _DataPageHeaderV2_numValues :: Name
- _DataPageHeaderV2_numNulls :: Name
- _DataPageHeaderV2_numRows :: Name
- _DataPageHeaderV2_encoding :: Name
- _DataPageHeaderV2_definitionLevelsByteLength :: Name
- _DataPageHeaderV2_repetitionLevelsByteLength :: Name
- _DataPageHeaderV2_isCompressed :: Name
- _DataPageHeaderV2_statistics :: Name
- data BloomFilterAlgorithm = BloomFilterAlgorithmBlock
- _BloomFilterAlgorithm :: Name
- _BloomFilterAlgorithm_block :: Name
- data BloomFilterHash = BloomFilterHashXxhash
- _BloomFilterHash :: Name
- _BloomFilterHash_xxhash :: Name
- data BloomFilterCompression = BloomFilterCompressionUncompressed
- _BloomFilterCompression :: Name
- _BloomFilterCompression_uncompressed :: Name
- data BloomFilterHeader = BloomFilterHeader {}
- _BloomFilterHeader :: Name
- _BloomFilterHeader_numBytes :: Name
- _BloomFilterHeader_algorithm :: Name
- _BloomFilterHeader_hash :: Name
- _BloomFilterHeader_compression :: Name
- data PageHeader = PageHeader {
- pageHeaderType :: PageType
- pageHeaderUncompressedPageSize :: Int
- pageHeaderCompressedPageSize :: Int
- pageHeaderCrc :: Maybe Int
- pageHeaderDataPageHeader :: Maybe DataPageHeader
- pageHeaderIndexPageHeader :: Maybe IndexPageHeader
- pageHeaderDictionaryPageHeader :: Maybe DictionaryPageHeader
- pageHeaderDataPageHeaderV2 :: Maybe DataPageHeaderV2
- _PageHeader :: Name
- _PageHeader_type :: Name
- _PageHeader_uncompressedPageSize :: Name
- _PageHeader_compressedPageSize :: Name
- _PageHeader_crc :: Name
- _PageHeader_dataPageHeader :: Name
- _PageHeader_indexPageHeader :: Name
- _PageHeader_dictionaryPageHeader :: Name
- _PageHeader_dataPageHeaderV2 :: Name
- data KeyValue = KeyValue {}
- _KeyValue :: Name
- _KeyValue_key :: Name
- _KeyValue_value :: Name
- data SortingColumn = SortingColumn {}
- _SortingColumn :: Name
- _SortingColumn_columnIdx :: Name
- _SortingColumn_descending :: Name
- _SortingColumn_nullsFirst :: Name
- data PageEncodingStats = PageEncodingStats {}
- _PageEncodingStats :: Name
- _PageEncodingStats_pageType :: Name
- _PageEncodingStats_encoding :: Name
- _PageEncodingStats_count :: Name
- data ColumnMetaData = ColumnMetaData {
- columnMetaDataType :: Type
- columnMetaDataEncodings :: [Encoding]
- columnMetaDataPathInSchema :: [String]
- columnMetaDataCodec :: CompressionCodec
- columnMetaDataNumValues :: Int64
- columnMetaDataTotalUncompressedSize :: Int64
- columnMetaDataTotalCompressedSize :: Int64
- columnMetaDataKeyValueMetadata :: Maybe [KeyValue]
- columnMetaDataDataPageOffset :: Int64
- columnMetaDataIndexPageOffset :: Maybe Int64
- columnMetaDataDictionaryPageOffset :: Maybe Int64
- columnMetaDataStatistics :: Maybe Statistics
- columnMetaDataEncodingStats :: Maybe [PageEncodingStats]
- columnMetaDataBloomFilterOffset :: Maybe Int64
- _ColumnMetaData :: Name
- _ColumnMetaData_type :: Name
- _ColumnMetaData_encodings :: Name
- _ColumnMetaData_pathInSchema :: Name
- _ColumnMetaData_codec :: Name
- _ColumnMetaData_numValues :: Name
- _ColumnMetaData_totalUncompressedSize :: Name
- _ColumnMetaData_totalCompressedSize :: Name
- _ColumnMetaData_keyValueMetadata :: Name
- _ColumnMetaData_dataPageOffset :: Name
- _ColumnMetaData_indexPageOffset :: Name
- _ColumnMetaData_dictionaryPageOffset :: Name
- _ColumnMetaData_statistics :: Name
- _ColumnMetaData_encodingStats :: Name
- _ColumnMetaData_bloomFilterOffset :: Name
- data EncryptionWithFooterKey = EncryptionWithFooterKey {
- _EncryptionWithFooterKey :: Name
- data EncryptionWithColumnKey = EncryptionWithColumnKey {}
- _EncryptionWithColumnKey :: Name
- _EncryptionWithColumnKey_pathInSchema :: Name
- _EncryptionWithColumnKey_keyMetadata :: Name
- data ColumnCryptoMetaData
- _ColumnCryptoMetaData :: Name
- _ColumnCryptoMetaData_encryptionWithFooterKey :: Name
- _ColumnCryptoMetaData_encryptionWithColumnKey :: Name
- data ColumnChunk = ColumnChunk {
- columnChunkFilePath :: Maybe String
- columnChunkFileOffset :: Int64
- columnChunkMetaData :: Maybe ColumnMetaData
- columnChunkOffsetIndexOffset :: Maybe Int64
- columnChunkOffsetIndexLength :: Maybe Int
- columnChunkColumnIndexOffset :: Maybe Int64
- columnChunkColumnIndexLength :: Maybe Int
- columnChunkCryptoMetadata :: Maybe ColumnCryptoMetaData
- columnChunkEncryptedColumnMetadata :: Maybe String
- _ColumnChunk :: Name
- _ColumnChunk_filePath :: Name
- _ColumnChunk_fileOffset :: Name
- _ColumnChunk_metaData :: Name
- _ColumnChunk_offsetIndexOffset :: Name
- _ColumnChunk_offsetIndexLength :: Name
- _ColumnChunk_columnIndexOffset :: Name
- _ColumnChunk_columnIndexLength :: Name
- _ColumnChunk_cryptoMetadata :: Name
- _ColumnChunk_encryptedColumnMetadata :: Name
- data RowGroup = RowGroup {}
- _RowGroup :: Name
- _RowGroup_columns :: Name
- _RowGroup_totalByteSize :: Name
- _RowGroup_numRows :: Name
- _RowGroup_sortingColumns :: Name
- _RowGroup_fileOffset :: Name
- _RowGroup_totalCompressedSize :: Name
- _RowGroup_ordinal :: Name
- data ColumnOrder = ColumnOrderTypeOrder
- _ColumnOrder :: Name
- _ColumnOrder_typeOrder :: Name
- data PageLocation = PageLocation {}
- _PageLocation :: Name
- _PageLocation_offset :: Name
- _PageLocation_compressedPageSize :: Name
- _PageLocation_firstRowIndex :: Name
- data OffsetIndex = OffsetIndex {}
- _OffsetIndex :: Name
- _OffsetIndex_pageLocations :: Name
- data ColumnIndex = ColumnIndex {}
- _ColumnIndex :: Name
- _ColumnIndex_nullPages :: Name
- _ColumnIndex_minValues :: Name
- _ColumnIndex_maxValues :: Name
- _ColumnIndex_boundaryOrder :: Name
- _ColumnIndex_nullCounts :: Name
- data AesGcmV1 = AesGcmV1 {}
- _AesGcmV1 :: Name
- _AesGcmV1_aadPrefix :: Name
- _AesGcmV1_aadFileUnique :: Name
- _AesGcmV1_supplyAadPrefix :: Name
- data AesGcmCtrV1 = AesGcmCtrV1 {}
- _AesGcmCtrV1 :: Name
- _AesGcmCtrV1_aadPrefix :: Name
- _AesGcmCtrV1_aadFileUnique :: Name
- _AesGcmCtrV1_supplyAadPrefix :: Name
- data EncryptionAlgorithm
- _EncryptionAlgorithm :: Name
- _EncryptionAlgorithm_aesGcmV1 :: Name
- _EncryptionAlgorithm_aesGcmCtrV1 :: Name
- data FileMetaData = FileMetaData {
- fileMetaDataVersion :: Int
- fileMetaDataSchema :: [SchemaElement]
- fileMetaDataNumRows :: Int64
- fileMetaDataRowGroups :: [RowGroup]
- fileMetaDataKeyValueMetadata :: Maybe [KeyValue]
- fileMetaDataCreatedBy :: Maybe String
- fileMetaDataColumnOrders :: Maybe [ColumnOrder]
- fileMetaDataEncryptionAlgorithm :: Maybe EncryptionAlgorithm
- fileMetaDataFooterSigningKeyMetadata :: Maybe String
- _FileMetaData :: Name
- _FileMetaData_version :: Name
- _FileMetaData_schema :: Name
- _FileMetaData_numRows :: Name
- _FileMetaData_rowGroups :: Name
- _FileMetaData_keyValueMetadata :: Name
- _FileMetaData_createdBy :: Name
- _FileMetaData_columnOrders :: Name
- _FileMetaData_encryptionAlgorithm :: Name
- _FileMetaData_footerSigningKeyMetadata :: Name
- data FileCryptoMetaData = FileCryptoMetaData {}
- _FileCryptoMetaData :: Name
- _FileCryptoMetaData_encryptionAlgorithm :: Name
- _FileCryptoMetaData_keyMetadata :: Name
Documentation
Types supported by Parquet. These types are intended to be used in combination with the encodings to control the on disk storage format. For example INT16 is not included as a type since a good encoding of INT32 would handle this.
Constructors
TypeBoolean | |
TypeInt32 | |
TypeInt64 | |
TypeFloat | |
TypeDouble | |
TypeByteArray | |
TypeFixedLenByteArray |
_Type_boolean :: Name Source #
_Type_int32 :: Name Source #
_Type_int64 :: Name Source #
_Type_float :: Name Source #
_Type_double :: Name Source #
data FieldRepetitionType Source #
Representation of Schemas
Constructors
FieldRepetitionTypeRequired | This field is required (can not be null) and each record has exactly 1 value. |
FieldRepetitionTypeOptional | The field is optional (can be null) and each record has 0 or 1 values. |
FieldRepetitionTypeRepeated | The field is repeated and can contain 0 or more values |
Instances
data Statistics Source #
Statistics per row group and per page. All fields are optional.
Constructors
Statistics | |
Fields
|
Instances
Read Statistics Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS Statistics # readList :: ReadS [Statistics] # readPrec :: ReadPrec Statistics # readListPrec :: ReadPrec [Statistics] # | |
Show Statistics Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> Statistics -> ShowS # show :: Statistics -> String # showList :: [Statistics] -> ShowS # | |
Eq Statistics Source # | |
Defined in Hydra.Langs.Parquet.Format | |
Ord Statistics Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: Statistics -> Statistics -> Ordering # (<) :: Statistics -> Statistics -> Bool # (<=) :: Statistics -> Statistics -> Bool # (>) :: Statistics -> Statistics -> Bool # (>=) :: Statistics -> Statistics -> Bool # max :: Statistics -> Statistics -> Statistics # min :: Statistics -> Statistics -> Statistics # |
_Statistics :: Name Source #
data DecimalType Source #
Decimal logical type annotation. To maintain forward-compatibility in v1, implementations using this logical type must also set scale and precision on the annotated SchemaElement. Allowed for physical types: INT32, INT64, FIXED, and BINARY
Constructors
DecimalType | |
Fields |
Instances
Read DecimalType Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS DecimalType # readList :: ReadS [DecimalType] # readPrec :: ReadPrec DecimalType # readListPrec :: ReadPrec [DecimalType] # | |
Show DecimalType Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> DecimalType -> ShowS # show :: DecimalType -> String # showList :: [DecimalType] -> ShowS # | |
Eq DecimalType Source # | |
Defined in Hydra.Langs.Parquet.Format | |
Ord DecimalType Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: DecimalType -> DecimalType -> Ordering # (<) :: DecimalType -> DecimalType -> Bool # (<=) :: DecimalType -> DecimalType -> Bool # (>) :: DecimalType -> DecimalType -> Bool # (>=) :: DecimalType -> DecimalType -> Bool # max :: DecimalType -> DecimalType -> DecimalType # min :: DecimalType -> DecimalType -> DecimalType # |
_DecimalType :: Name Source #
Constructors
TimeUnitMillis | |
TimeUnitMicros | |
TimeUnitNanos |
data TimestampType Source #
Timestamp logical type annotation. Allowed for physical types: INT64
Constructors
TimestampType | |
Fields |
Instances
Read TimestampType Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS TimestampType # readList :: ReadS [TimestampType] # | |
Show TimestampType Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> TimestampType -> ShowS # show :: TimestampType -> String # showList :: [TimestampType] -> ShowS # | |
Eq TimestampType Source # | |
Defined in Hydra.Langs.Parquet.Format Methods (==) :: TimestampType -> TimestampType -> Bool # (/=) :: TimestampType -> TimestampType -> Bool # | |
Ord TimestampType Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: TimestampType -> TimestampType -> Ordering # (<) :: TimestampType -> TimestampType -> Bool # (<=) :: TimestampType -> TimestampType -> Bool # (>) :: TimestampType -> TimestampType -> Bool # (>=) :: TimestampType -> TimestampType -> Bool # max :: TimestampType -> TimestampType -> TimestampType # min :: TimestampType -> TimestampType -> TimestampType # |
Time logical type annotation. Allowed for physical types: INT32 (millis), INT64 (micros, nanos)
Constructors
TimeType | |
Fields |
Integer logical type annotation. bitWidth must be 8, 16, 32, or 64. Allowed for physical types: INT32, INT64
Constructors
IntType | |
Fields |
data LogicalType Source #
LogicalType annotations to replace ConvertedType. To maintain compatibility, implementations using LogicalType for a SchemaElement aust also set the corresponding ConvertedType (if any) from the following table.
Constructors
LogicalTypeString | use ConvertedType UTF8 |
LogicalTypeMap | use ConvertedType MAP |
LogicalTypeList | use ConvertedType LIST |
LogicalTypeEnum | use ConvertedType ENUM |
LogicalTypeDecimal DecimalType | use ConvertedType DECIMAL + SchemaElement.{scale, precision} |
LogicalTypeDate | use ConvertedType DATE |
LogicalTypeTime TimeType | use ConvertedType TIME_MICROS for TIME(isAdjustedToUTC = *, unit = MICROS). use ConvertedType TIME_MILLIS for TIME(isAdjustedToUTC = *, unit = MILLIS) |
LogicalTypeTimestamp TimestampType | use ConvertedType TIMESTAMP_MICROS for TIMESTAMP(isAdjustedToUTC = *, unit = MICROS). use ConvertedType TIMESTAMP_MILLIS for TIMESTAMP(isAdjustedToUTC = *, unit = MILLIS) |
LogicalTypeInteger IntType | use ConvertedType INT_* or UINT_* |
LogicalTypeUnknown | no compatible ConvertedType |
LogicalTypeJson | use ConvertedType JSON |
LogicalTypeBson | use ConvertedType BSON |
LogicalTypeUuid | no compatible ConvertedType |
Instances
Read LogicalType Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS LogicalType # readList :: ReadS [LogicalType] # readPrec :: ReadPrec LogicalType # readListPrec :: ReadPrec [LogicalType] # | |
Show LogicalType Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> LogicalType -> ShowS # show :: LogicalType -> String # showList :: [LogicalType] -> ShowS # | |
Eq LogicalType Source # | |
Defined in Hydra.Langs.Parquet.Format | |
Ord LogicalType Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: LogicalType -> LogicalType -> Ordering # (<) :: LogicalType -> LogicalType -> Bool # (<=) :: LogicalType -> LogicalType -> Bool # (>) :: LogicalType -> LogicalType -> Bool # (>=) :: LogicalType -> LogicalType -> Bool # max :: LogicalType -> LogicalType -> LogicalType # min :: LogicalType -> LogicalType -> LogicalType # |
_LogicalType :: Name Source #
data SchemaElement Source #
Represents a element inside a schema definition. | - if it is a group (inner node) then type is undefined and num_children is defined | - if it is a primitive type (leaf) then type is defined and num_children is undefined | the nodes are listed in depth first traversal order.
Constructors
SchemaElement | |
Fields
|
Instances
Read SchemaElement Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS SchemaElement # readList :: ReadS [SchemaElement] # | |
Show SchemaElement Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> SchemaElement -> ShowS # show :: SchemaElement -> String # showList :: [SchemaElement] -> ShowS # | |
Eq SchemaElement Source # | |
Defined in Hydra.Langs.Parquet.Format Methods (==) :: SchemaElement -> SchemaElement -> Bool # (/=) :: SchemaElement -> SchemaElement -> Bool # | |
Ord SchemaElement Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: SchemaElement -> SchemaElement -> Ordering # (<) :: SchemaElement -> SchemaElement -> Bool # (<=) :: SchemaElement -> SchemaElement -> Bool # (>) :: SchemaElement -> SchemaElement -> Bool # (>=) :: SchemaElement -> SchemaElement -> Bool # max :: SchemaElement -> SchemaElement -> SchemaElement # min :: SchemaElement -> SchemaElement -> SchemaElement # |
Encodings supported by Parquet. Not all encodings are valid for all types. These enums are also used to specify the encoding of definition and repetition levels. See the accompanying doc for the details of the more complicated encodings.
Constructors
EncodingPlain | Default encoding. | BOOLEAN - 1 bit per value. 0 is false; 1 is true. | INT32 - 4 bytes per value. Stored as little-endian. | INT64 - 8 bytes per value. Stored as little-endian. | FLOAT - 4 bytes per value. IEEE. Stored as little-endian. | DOUBLE - 8 bytes per value. IEEE. Stored as little-endian. | BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes. | FIXED_LEN_BYTE_ARRAY - Just the bytes. |
EncodingRle | Group packed run length encoding. Usable for definition/repetition levels encoding and Booleans (on one bit: 0 is false; 1 is true.) |
EncodingBitPacked | Bit packed encoding. This can only be used if the data has a known max width. Usable for definition/repetition levels encoding. |
EncodingDeltaBinaryPacked | Delta encoding for integers. This can be used for int columns and works best on sorted data |
EncodingDeltaLengthByteArray | Encoding for byte arrays to separate the length values and the data. The lengths are encoded using DELTA_BINARY_PACKED |
EncodingDeltaByteArray | Incremental-encoded byte array. Prefix lengths are encoded using DELTA_BINARY_PACKED. Suffixes are stored as delta length byte arrays. |
EncodingRleDictionary | Dictionary encoding: the ids are encoded using the RLE encoding |
EncodingByteStreamSplit | Encoding for floating-point data. K byte-streams are created where K is the size in bytes of the data type. The individual bytes of an FP value are scattered to the corresponding stream and the streams are concatenated. This itself does not reduce the size of the data but can lead to better compression afterwards. |
_Encoding_rle :: Name Source #
data CompressionCodec Source #
Supported compression algorithms. Codecs added in format version X.Y can be read by readers based on X.Y and later. Codec support may vary between readers based on the format version and libraries available at runtime. See Compression.md for a detailed specification of these algorithms.
Constructors
CompressionCodecUncompressed | |
CompressionCodecSnappy | |
CompressionCodecGzip | |
CompressionCodecLzo | |
CompressionCodecBrotli | Added in 2.4 |
CompressionCodecZstd | Added in 2.4 |
CompressionCodecLz4Raw | Added in 2.9 |
Instances
data BoundaryOrder Source #
Enum to annotate whether lists of min/max elements inside ColumnIndex are ordered and if so, in which direction.
Instances
Read BoundaryOrder Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS BoundaryOrder # readList :: ReadS [BoundaryOrder] # | |
Show BoundaryOrder Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> BoundaryOrder -> ShowS # show :: BoundaryOrder -> String # showList :: [BoundaryOrder] -> ShowS # | |
Eq BoundaryOrder Source # | |
Defined in Hydra.Langs.Parquet.Format Methods (==) :: BoundaryOrder -> BoundaryOrder -> Bool # (/=) :: BoundaryOrder -> BoundaryOrder -> Bool # | |
Ord BoundaryOrder Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: BoundaryOrder -> BoundaryOrder -> Ordering # (<) :: BoundaryOrder -> BoundaryOrder -> Bool # (<=) :: BoundaryOrder -> BoundaryOrder -> Bool # (>) :: BoundaryOrder -> BoundaryOrder -> Bool # (>=) :: BoundaryOrder -> BoundaryOrder -> Bool # max :: BoundaryOrder -> BoundaryOrder -> BoundaryOrder # min :: BoundaryOrder -> BoundaryOrder -> BoundaryOrder # |
data DataPageHeader Source #
Data page header
Constructors
DataPageHeader | |
Fields
|
Instances
Read DataPageHeader Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS DataPageHeader # readList :: ReadS [DataPageHeader] # | |
Show DataPageHeader Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> DataPageHeader -> ShowS # show :: DataPageHeader -> String # showList :: [DataPageHeader] -> ShowS # | |
Eq DataPageHeader Source # | |
Defined in Hydra.Langs.Parquet.Format Methods (==) :: DataPageHeader -> DataPageHeader -> Bool # (/=) :: DataPageHeader -> DataPageHeader -> Bool # | |
Ord DataPageHeader Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: DataPageHeader -> DataPageHeader -> Ordering # (<) :: DataPageHeader -> DataPageHeader -> Bool # (<=) :: DataPageHeader -> DataPageHeader -> Bool # (>) :: DataPageHeader -> DataPageHeader -> Bool # (>=) :: DataPageHeader -> DataPageHeader -> Bool # max :: DataPageHeader -> DataPageHeader -> DataPageHeader # min :: DataPageHeader -> DataPageHeader -> DataPageHeader # |
data IndexPageHeader Source #
Constructors
IndexPageHeader | |
Instances
Read IndexPageHeader Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS IndexPageHeader # readList :: ReadS [IndexPageHeader] # | |
Show IndexPageHeader Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> IndexPageHeader -> ShowS # show :: IndexPageHeader -> String # showList :: [IndexPageHeader] -> ShowS # | |
Eq IndexPageHeader Source # | |
Defined in Hydra.Langs.Parquet.Format Methods (==) :: IndexPageHeader -> IndexPageHeader -> Bool # (/=) :: IndexPageHeader -> IndexPageHeader -> Bool # | |
Ord IndexPageHeader Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: IndexPageHeader -> IndexPageHeader -> Ordering # (<) :: IndexPageHeader -> IndexPageHeader -> Bool # (<=) :: IndexPageHeader -> IndexPageHeader -> Bool # (>) :: IndexPageHeader -> IndexPageHeader -> Bool # (>=) :: IndexPageHeader -> IndexPageHeader -> Bool # max :: IndexPageHeader -> IndexPageHeader -> IndexPageHeader # min :: IndexPageHeader -> IndexPageHeader -> IndexPageHeader # |
data DictionaryPageHeader Source #
The dictionary page must be placed at the first position of the column chunk if it is partly or completely dictionary encoded. At most one dictionary page can be placed in a column chunk.
Constructors
DictionaryPageHeader | |
Fields
|
Instances
data DataPageHeaderV2 Source #
New page format allowing reading levels without decompressing the data Repetition and definition levels are uncompressed The remaining section containing the data is compressed if is_compressed is true
Constructors
DataPageHeaderV2 | |
Fields
|
Instances
data BloomFilterAlgorithm Source #
The algorithm used in Bloom filter.
Constructors
BloomFilterAlgorithmBlock | Block-based Bloom filter. |
Instances
data BloomFilterHash Source #
The hash function used in Bloom filter. This function takes the hash of a column value using plain encoding.
Constructors
BloomFilterHashXxhash | xxHash Strategy. |
Instances
Read BloomFilterHash Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS BloomFilterHash # readList :: ReadS [BloomFilterHash] # | |
Show BloomFilterHash Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> BloomFilterHash -> ShowS # show :: BloomFilterHash -> String # showList :: [BloomFilterHash] -> ShowS # | |
Eq BloomFilterHash Source # | |
Defined in Hydra.Langs.Parquet.Format Methods (==) :: BloomFilterHash -> BloomFilterHash -> Bool # (/=) :: BloomFilterHash -> BloomFilterHash -> Bool # | |
Ord BloomFilterHash Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: BloomFilterHash -> BloomFilterHash -> Ordering # (<) :: BloomFilterHash -> BloomFilterHash -> Bool # (<=) :: BloomFilterHash -> BloomFilterHash -> Bool # (>) :: BloomFilterHash -> BloomFilterHash -> Bool # (>=) :: BloomFilterHash -> BloomFilterHash -> Bool # max :: BloomFilterHash -> BloomFilterHash -> BloomFilterHash # min :: BloomFilterHash -> BloomFilterHash -> BloomFilterHash # |
data BloomFilterCompression Source #
The compression used in the Bloom filter.
Constructors
BloomFilterCompressionUncompressed |
Instances
data BloomFilterHeader Source #
Bloom filter header is stored at beginning of Bloom filter data of each column and followed by its bitset.
Constructors
BloomFilterHeader | |
Fields
|
Instances
data PageHeader Source #
Constructors
PageHeader | |
Fields
|
Instances
Read PageHeader Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS PageHeader # readList :: ReadS [PageHeader] # readPrec :: ReadPrec PageHeader # readListPrec :: ReadPrec [PageHeader] # | |
Show PageHeader Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> PageHeader -> ShowS # show :: PageHeader -> String # showList :: [PageHeader] -> ShowS # | |
Eq PageHeader Source # | |
Defined in Hydra.Langs.Parquet.Format | |
Ord PageHeader Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: PageHeader -> PageHeader -> Ordering # (<) :: PageHeader -> PageHeader -> Bool # (<=) :: PageHeader -> PageHeader -> Bool # (>) :: PageHeader -> PageHeader -> Bool # (>=) :: PageHeader -> PageHeader -> Bool # max :: PageHeader -> PageHeader -> PageHeader # min :: PageHeader -> PageHeader -> PageHeader # |
_PageHeader :: Name Source #
Wrapper struct to store key values
Constructors
KeyValue | |
Fields |
_KeyValue_key :: Name Source #
data SortingColumn Source #
Wrapper struct to specify sort order
Constructors
SortingColumn | |
Fields
|
Instances
Read SortingColumn Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS SortingColumn # readList :: ReadS [SortingColumn] # | |
Show SortingColumn Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> SortingColumn -> ShowS # show :: SortingColumn -> String # showList :: [SortingColumn] -> ShowS # | |
Eq SortingColumn Source # | |
Defined in Hydra.Langs.Parquet.Format Methods (==) :: SortingColumn -> SortingColumn -> Bool # (/=) :: SortingColumn -> SortingColumn -> Bool # | |
Ord SortingColumn Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: SortingColumn -> SortingColumn -> Ordering # (<) :: SortingColumn -> SortingColumn -> Bool # (<=) :: SortingColumn -> SortingColumn -> Bool # (>) :: SortingColumn -> SortingColumn -> Bool # (>=) :: SortingColumn -> SortingColumn -> Bool # max :: SortingColumn -> SortingColumn -> SortingColumn # min :: SortingColumn -> SortingColumn -> SortingColumn # |
data PageEncodingStats Source #
statistics of a given page type and encoding
Constructors
PageEncodingStats | |
Fields
|
Instances
data ColumnMetaData Source #
Description for column metadata
Constructors
ColumnMetaData | |
Fields
|
Instances
Read ColumnMetaData Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS ColumnMetaData # readList :: ReadS [ColumnMetaData] # | |
Show ColumnMetaData Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> ColumnMetaData -> ShowS # show :: ColumnMetaData -> String # showList :: [ColumnMetaData] -> ShowS # | |
Eq ColumnMetaData Source # | |
Defined in Hydra.Langs.Parquet.Format Methods (==) :: ColumnMetaData -> ColumnMetaData -> Bool # (/=) :: ColumnMetaData -> ColumnMetaData -> Bool # | |
Ord ColumnMetaData Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: ColumnMetaData -> ColumnMetaData -> Ordering # (<) :: ColumnMetaData -> ColumnMetaData -> Bool # (<=) :: ColumnMetaData -> ColumnMetaData -> Bool # (>) :: ColumnMetaData -> ColumnMetaData -> Bool # (>=) :: ColumnMetaData -> ColumnMetaData -> Bool # max :: ColumnMetaData -> ColumnMetaData -> ColumnMetaData # min :: ColumnMetaData -> ColumnMetaData -> ColumnMetaData # |
data EncryptionWithColumnKey Source #
Constructors
EncryptionWithColumnKey | |
Fields
|
Instances
data ColumnCryptoMetaData Source #
Constructors
ColumnCryptoMetaDataEncryptionWithFooterKey EncryptionWithFooterKey | |
ColumnCryptoMetaDataEncryptionWithColumnKey EncryptionWithColumnKey |
Instances
data ColumnChunk Source #
Constructors
ColumnChunk | |
Fields
|
Instances
Read ColumnChunk Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS ColumnChunk # readList :: ReadS [ColumnChunk] # readPrec :: ReadPrec ColumnChunk # readListPrec :: ReadPrec [ColumnChunk] # | |
Show ColumnChunk Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> ColumnChunk -> ShowS # show :: ColumnChunk -> String # showList :: [ColumnChunk] -> ShowS # | |
Eq ColumnChunk Source # | |
Defined in Hydra.Langs.Parquet.Format | |
Ord ColumnChunk Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: ColumnChunk -> ColumnChunk -> Ordering # (<) :: ColumnChunk -> ColumnChunk -> Bool # (<=) :: ColumnChunk -> ColumnChunk -> Bool # (>) :: ColumnChunk -> ColumnChunk -> Bool # (>=) :: ColumnChunk -> ColumnChunk -> Bool # max :: ColumnChunk -> ColumnChunk -> ColumnChunk # min :: ColumnChunk -> ColumnChunk -> ColumnChunk # |
_ColumnChunk :: Name Source #
Constructors
RowGroup | |
Fields
|
data ColumnOrder Source #
Union to specify the order used for the min_value and max_value fields for a column. This union takes the role of an enhanced enum that allows rich elements (which will be needed for a collation-based ordering in the future). Possible values are: | * TypeDefinedOrder - the column uses the order defined by its logical or physical type (if there is no logical type). | If the reader does not support the value of this union, min and max stats for this column should be ignored.
Constructors
ColumnOrderTypeOrder | The sort orders for logical types are: | UTF8 - unsigned byte-wise comparison | INT8 - signed comparison | INT16 - signed comparison | INT32 - signed comparison | INT64 - signed comparison | UINT8 - unsigned comparison | UINT16 - unsigned comparison | UINT32 - unsigned comparison | UINT64 - unsigned comparison | DECIMAL - signed comparison of the represented value | DATE - signed comparison | TIME_MILLIS - signed comparison | TIME_MICROS - signed comparison | TIMESTAMP_MILLIS - signed comparison | TIMESTAMP_MICROS - signed comparison | INTERVAL - unsigned comparison | JSON - unsigned byte-wise comparison | BSON - unsigned byte-wise comparison | ENUM - unsigned byte-wise comparison | LIST - undefined | MAP - undefined | In the absence of logical types, the sort order is determined by the physical type: | BOOLEAN - false, true | INT32 - signed comparison | INT64 - signed comparison | INT96 (only used for legacy timestamps) - undefined | FLOAT - signed comparison of the represented value (*) | DOUBLE - signed comparison of the represented value (*) | BYTE_ARRAY - unsigned byte-wise comparison | FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison | (*) Because the sorting order is not specified properly for floating | point values (relations vs. total ordering) the following | compatibility rules should be applied when reading statistics: | - If the min is a NaN, it should be ignored. | - If the max is a NaN, it should be ignored. | - If the min is +0, the row group may contain -0 values as well. | - If the max is -0, the row group may contain +0 values as well. | - When looking for NaN values, min and max should be ignored. |
Instances
Read ColumnOrder Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS ColumnOrder # readList :: ReadS [ColumnOrder] # readPrec :: ReadPrec ColumnOrder # readListPrec :: ReadPrec [ColumnOrder] # | |
Show ColumnOrder Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> ColumnOrder -> ShowS # show :: ColumnOrder -> String # showList :: [ColumnOrder] -> ShowS # | |
Eq ColumnOrder Source # | |
Defined in Hydra.Langs.Parquet.Format | |
Ord ColumnOrder Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: ColumnOrder -> ColumnOrder -> Ordering # (<) :: ColumnOrder -> ColumnOrder -> Bool # (<=) :: ColumnOrder -> ColumnOrder -> Bool # (>) :: ColumnOrder -> ColumnOrder -> Bool # (>=) :: ColumnOrder -> ColumnOrder -> Bool # max :: ColumnOrder -> ColumnOrder -> ColumnOrder # min :: ColumnOrder -> ColumnOrder -> ColumnOrder # |
_ColumnOrder :: Name Source #
data PageLocation Source #
Constructors
PageLocation | |
Fields
|
Instances
Read PageLocation Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS PageLocation # readList :: ReadS [PageLocation] # | |
Show PageLocation Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> PageLocation -> ShowS # show :: PageLocation -> String # showList :: [PageLocation] -> ShowS # | |
Eq PageLocation Source # | |
Defined in Hydra.Langs.Parquet.Format | |
Ord PageLocation Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: PageLocation -> PageLocation -> Ordering # (<) :: PageLocation -> PageLocation -> Bool # (<=) :: PageLocation -> PageLocation -> Bool # (>) :: PageLocation -> PageLocation -> Bool # (>=) :: PageLocation -> PageLocation -> Bool # max :: PageLocation -> PageLocation -> PageLocation # min :: PageLocation -> PageLocation -> PageLocation # |
_PageLocation :: Name Source #
data OffsetIndex Source #
Constructors
OffsetIndex | |
Fields
|
Instances
Read OffsetIndex Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS OffsetIndex # readList :: ReadS [OffsetIndex] # readPrec :: ReadPrec OffsetIndex # readListPrec :: ReadPrec [OffsetIndex] # | |
Show OffsetIndex Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> OffsetIndex -> ShowS # show :: OffsetIndex -> String # showList :: [OffsetIndex] -> ShowS # | |
Eq OffsetIndex Source # | |
Defined in Hydra.Langs.Parquet.Format | |
Ord OffsetIndex Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: OffsetIndex -> OffsetIndex -> Ordering # (<) :: OffsetIndex -> OffsetIndex -> Bool # (<=) :: OffsetIndex -> OffsetIndex -> Bool # (>) :: OffsetIndex -> OffsetIndex -> Bool # (>=) :: OffsetIndex -> OffsetIndex -> Bool # max :: OffsetIndex -> OffsetIndex -> OffsetIndex # min :: OffsetIndex -> OffsetIndex -> OffsetIndex # |
_OffsetIndex :: Name Source #
data ColumnIndex Source #
Description for ColumnIndex. Each array-field[i] refers to the page at OffsetIndex.page_locations[i]
Constructors
ColumnIndex | |
Fields
|
Instances
Read ColumnIndex Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS ColumnIndex # readList :: ReadS [ColumnIndex] # readPrec :: ReadPrec ColumnIndex # readListPrec :: ReadPrec [ColumnIndex] # | |
Show ColumnIndex Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> ColumnIndex -> ShowS # show :: ColumnIndex -> String # showList :: [ColumnIndex] -> ShowS # | |
Eq ColumnIndex Source # | |
Defined in Hydra.Langs.Parquet.Format | |
Ord ColumnIndex Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: ColumnIndex -> ColumnIndex -> Ordering # (<) :: ColumnIndex -> ColumnIndex -> Bool # (<=) :: ColumnIndex -> ColumnIndex -> Bool # (>) :: ColumnIndex -> ColumnIndex -> Bool # (>=) :: ColumnIndex -> ColumnIndex -> Bool # max :: ColumnIndex -> ColumnIndex -> ColumnIndex # min :: ColumnIndex -> ColumnIndex -> ColumnIndex # |
_ColumnIndex :: Name Source #
Constructors
AesGcmV1 | |
Fields
|
data AesGcmCtrV1 Source #
Constructors
AesGcmCtrV1 | |
Fields
|
Instances
Read AesGcmCtrV1 Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS AesGcmCtrV1 # readList :: ReadS [AesGcmCtrV1] # readPrec :: ReadPrec AesGcmCtrV1 # readListPrec :: ReadPrec [AesGcmCtrV1] # | |
Show AesGcmCtrV1 Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> AesGcmCtrV1 -> ShowS # show :: AesGcmCtrV1 -> String # showList :: [AesGcmCtrV1] -> ShowS # | |
Eq AesGcmCtrV1 Source # | |
Defined in Hydra.Langs.Parquet.Format | |
Ord AesGcmCtrV1 Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: AesGcmCtrV1 -> AesGcmCtrV1 -> Ordering # (<) :: AesGcmCtrV1 -> AesGcmCtrV1 -> Bool # (<=) :: AesGcmCtrV1 -> AesGcmCtrV1 -> Bool # (>) :: AesGcmCtrV1 -> AesGcmCtrV1 -> Bool # (>=) :: AesGcmCtrV1 -> AesGcmCtrV1 -> Bool # max :: AesGcmCtrV1 -> AesGcmCtrV1 -> AesGcmCtrV1 # min :: AesGcmCtrV1 -> AesGcmCtrV1 -> AesGcmCtrV1 # |
_AesGcmCtrV1 :: Name Source #
data EncryptionAlgorithm Source #
Instances
data FileMetaData Source #
Description for file metadata
Constructors
FileMetaData | |
Fields
|
Instances
Read FileMetaData Source # | |
Defined in Hydra.Langs.Parquet.Format Methods readsPrec :: Int -> ReadS FileMetaData # readList :: ReadS [FileMetaData] # | |
Show FileMetaData Source # | |
Defined in Hydra.Langs.Parquet.Format Methods showsPrec :: Int -> FileMetaData -> ShowS # show :: FileMetaData -> String # showList :: [FileMetaData] -> ShowS # | |
Eq FileMetaData Source # | |
Defined in Hydra.Langs.Parquet.Format | |
Ord FileMetaData Source # | |
Defined in Hydra.Langs.Parquet.Format Methods compare :: FileMetaData -> FileMetaData -> Ordering # (<) :: FileMetaData -> FileMetaData -> Bool # (<=) :: FileMetaData -> FileMetaData -> Bool # (>) :: FileMetaData -> FileMetaData -> Bool # (>=) :: FileMetaData -> FileMetaData -> Bool # max :: FileMetaData -> FileMetaData -> FileMetaData # min :: FileMetaData -> FileMetaData -> FileMetaData # |
_FileMetaData :: Name Source #
data FileCryptoMetaData Source #
Crypto metadata for files with encrypted footer
Constructors
FileCryptoMetaData | |
Fields
|