{-# LANGUAGE OverloadedStrings #-}

module Hydra.Sources.Tier4.Langs.Parquet.Format where

import Hydra.Sources.Tier3.All
import Hydra.Dsl.Annotations
import Hydra.Dsl.Bootstrap
import Hydra.Dsl.Types as Types


-- Note: deprecated and trivial/empty type definitions are excluded from this model
parquetFormatModule :: Module
parquetFormatModule :: Module
parquetFormatModule = Namespace
-> [Element] -> [Module] -> [Module] -> Maybe [Char] -> Module
Module Namespace
ns [Element]
elements [Module
hydraCoreModule] [Module]
tier0Modules (Maybe [Char] -> Module) -> Maybe [Char] -> Module
forall a b. (a -> b) -> a -> b
$
    [Char] -> Maybe [Char]
forall a. a -> Maybe a
Just ([Char]
"A model for the Parquet format. Based on the Thrift-based specification at:\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
      [Char]
"  https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift")
  where
    ns :: Namespace
ns = [Char] -> Namespace
Namespace [Char]
"hydra/langs/parquet/format"
    def :: [Char] -> Type -> Element
def = Namespace -> [Char] -> Type -> Element
datatype Namespace
ns
    parquet :: [Char] -> Type
parquet = Namespace -> [Char] -> Type
typeref Namespace
ns

    elements :: [Element]
elements = [
-- /**
--  * Types supported by Parquet.  These types are intended to be used in combination
--  * with the encodings to control the on disk storage format.
--  * For example INT16 is not included as a type since a good encoding of INT32
--  * would handle this.
--  */
-- enum Type {
      [Char] -> Type -> Element
def [Char]
"Type" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc ([Char]
"Types supported by Parquet.  These types are intended to be used in combination " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"with the encodings to control the on disk storage format. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"For example INT16 is not included as a type since a good encoding of INT32 " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"would handle this.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [[Char]] -> Type
enum [
--   BOOLEAN = 0;
          [Char]
"boolean",
--   INT32 = 1;
          [Char]
"int32",
--   INT64 = 2;
          [Char]
"int64",
--   INT96 = 3;  // deprecated, only used by legacy implementations.
--   FLOAT = 4;
          [Char]
"float",
--   DOUBLE = 5;
          [Char]
"double",
--   BYTE_ARRAY = 6;
          [Char]
"byteArray",
--   FIXED_LEN_BYTE_ARRAY = 7;
          [Char]
"fixedLenByteArray"],
-- }

-- /**
--  * DEPRECATED: Common types used by frameworks(e.g. hive, pig) using parquet.
--  * ConvertedType is superseded by LogicalType.  This enum should not be extended.
--  *
--  * See LogicalTypes.md for conversion between ConvertedType and LogicalType.
--  */
-- enum ConvertedType {
--   /** a BYTE_ARRAY actually contains UTF8 encoded chars */
--   UTF8 = 0;
--
--   /** a map is converted as an optional field containing a repeated key/value pair */
--   MAP = 1;
--
--   /** a key/value pair is converted into a group of two fields */
--   MAP_KEY_VALUE = 2;
--
--   /** a list is converted into an optional field containing a repeated field for its
--    * values */
--   LIST = 3;
--
--   /** an enum is converted into a binary field */
--   ENUM = 4;
--
--   /**
--    * A decimal value.
--    *
--    * This may be used to annotate binary or fixed primitive types. The
--    * underlying byte array stores the unscaled value encoded as two's
--    * complement using big-endian byte order (the most significant byte is the
--    * zeroth element). The value of the decimal is the value * 10^{-scale}.
--    *
--    * This must be accompanied by a (maximum) precision and a scale in the
--    * SchemaElement. The precision specifies the number of digits in the decimal
--    * and the scale stores the location of the decimal point. For example 1.23
--    * would have precision 3 (3 total digits) and scale 2 (the decimal point is
--    * 2 digits over).
--    */
--   DECIMAL = 5;
--
--   /**
--    * A Date
--    *
--    * Stored as days since Unix epoch, encoded as the INT32 physical type.
--    *
--    */
--   DATE = 6;
--
--   /**
--    * A time
--    *
--    * The total number of milliseconds since midnight.  The value is stored
--    * as an INT32 physical type.
--    */
--   TIME_MILLIS = 7;
--
--   /**
--    * A time.
--    *
--    * The total number of microseconds since midnight.  The value is stored as
--    * an INT64 physical type.
--    */
--   TIME_MICROS = 8;
--
--   /**
--    * A date/time combination
--    *
--    * Date and time recorded as milliseconds since the Unix epoch.  Recorded as
--    * a physical type of INT64.
--    */
--   TIMESTAMP_MILLIS = 9;
--
--   /**
--    * A date/time combination
--    *
--    * Date and time recorded as microseconds since the Unix epoch.  The value is
--    * stored as an INT64 physical type.
--    */
--   TIMESTAMP_MICROS = 10;
--
--
--   /**
--    * An unsigned integer value.
--    *
--    * The number describes the maximum number of meaningful data bits in
--    * the stored value. 8, 16 and 32 bit values are stored using the
--    * INT32 physical type.  64 bit values are stored using the INT64
--    * physical type.
--    *
--    */
--   UINT_8 = 11;
--   UINT_16 = 12;
--   UINT_32 = 13;
--   UINT_64 = 14;
--
--   /**
--    * A signed integer value.
--    *
--    * The number describes the maximum number of meaningful data bits in
--    * the stored value. 8, 16 and 32 bit values are stored using the
--    * INT32 physical type.  64 bit values are stored using the INT64
--    * physical type.
--    *
--    */
--   INT_8 = 15;
--   INT_16 = 16;
--   INT_32 = 17;
--   INT_64 = 18;
--
--   /**
--    * An embedded JSON document
--    *
--    * A JSON document embedded within a single UTF8 column.
--    */
--   JSON = 19;
--
--   /**
--    * An embedded BSON document
--    *
--    * A BSON document embedded within a single BINARY column.
--    */
--   BSON = 20;
--
--   /**
--    * An interval of time
--    *
--    * This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12
--    * This data is composed of three separate little endian unsigned
--    * integers.  Each stores a component of a duration of time.  The first
--    * integer identifies the number of months associated with the duration,
--    * the second identifies the number of days associated with the duration
--    * and the third identifies the number of milliseconds associated with
--    * the provided duration.  This duration of time is independent of any
--    * particular timezone or date.
--    */
--   INTERVAL = 21;
--}

-- /**
--  * Representation of Schemas
--  */
-- enum FieldRepetitionType {
      [Char] -> Type -> Element
def [Char]
"FieldRepetitionType" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc [Char]
"Representation of Schemas" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
union [
--   /** This field is required (can not be null) and each record has exactly 1 value. */
--   REQUIRED = 0;
          [Char]
"required"[Char] -> Type -> FieldType
>: [Char] -> Type -> Type
doc [Char]
"This field is required (can not be null) and each record has exactly 1 value." Type
unit,
--
--   /** The field is optional (can be null) and each record has 0 or 1 values. */
--   OPTIONAL = 1;
          [Char]
"optional"[Char] -> Type -> FieldType
>: [Char] -> Type -> Type
doc [Char]
"The field is optional (can be null) and each record has 0 or 1 values." Type
unit,
--
--   /** The field is repeated and can contain 0 or more values */
--   REPEATED = 2;
          [Char]
"repeated"[Char] -> Type -> FieldType
>: [Char] -> Type -> Type
doc [Char]
"The field is repeated and can contain 0 or more values" Type
unit],
-- }

-- /**
--  * Statistics per row group and per page
--  * All fields are optional.
--  */
-- struct Statistics {
      [Char] -> Type -> Element
def [Char]
"Statistics" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc [Char]
"Statistics per row group and per page. All fields are optional." (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--    /**
--     * DEPRECATED: min and max value of the column. Use min_value and max_value.
--     *
--     * Values are encoded using PLAIN encoding, except that variable-length byte
--     * arrays do not include a length prefix.
--     *
--     * These fields encode min and max values determined by signed comparison
--     * only. New files should use the correct order for a column's logical type
--     * and store the values in the min_value and max_value fields.
--     *
--     * To support older readers, these may be set when the column order is
--     * signed.
--     */
--    1: optional binary max;
--    2: optional binary min;
--    /** count of null value in the column */
--    3: optional i64 null_count;
        [Char]
"nullCount"[Char] -> Type -> FieldType
>: Type -> Type
optional Type
uint64,
--    /** count of distinct values occurring */
--    4: optional i64 distinct_count;
        [Char]
"distinctCount"[Char] -> Type -> FieldType
>: Type -> Type
optional Type
uint64,
--    /**
--     * Min and max values for the column, determined by its ColumnOrder.
--     *
--     * Values are encoded using PLAIN encoding, except that variable-length byte
--     * arrays do not include a length prefix.
--     */
--    5: optional binary max_value;
        [Char]
"maxValue"[Char] -> Type -> FieldType
>:
          [Char] -> Type -> Type
doc ([Char]
"Max value for the column, determined by its ColumnOrder. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
               [Char]
"Values are encoded using PLAIN encoding, except that variable-length byte " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
               [Char]
"arrays do not include a length prefix.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
          Type -> Type
optional Type
binary,
--    6: optional binary min_value;
        [Char]
"minValue"[Char] -> Type -> FieldType
>:
          [Char] -> Type -> Type
doc ([Char]
"Max value for the column, determined by its ColumnOrder. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
               [Char]
"Values are encoded using PLAIN encoding, except that variable-length byte " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
               [Char]
"arrays do not include a length prefix.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
          Type -> Type
optional Type
binary],
-- }

-- /** Empty structs to use as logical type annotations */
-- struct StringType {}  // allowed for BINARY, must be encoded with UTF-8
-- struct UUIDType {}    // allowed for FIXED[16], must encoded raw UUID bytes
-- struct MapType {}     // see LogicalTypes.md
-- struct ListType {}    // see LogicalTypes.md
-- struct EnumType {}    // allowed for BINARY, must be encoded with UTF-8
-- struct DateType {}    // allowed for INT32
-- /**
--  * Logical type to annotate a column that is always null.
--  *
--  * Sometimes when discovering the schema of existing data, values are always
--  * null and the physical type can't be determined. This annotation signals
--  * the case where the physical type was guessed from all null values.
--  */
-- struct NullType {}    // allowed for any physical type, only null values stored
-- /**
--  * Decimal logical type annotation
--  *
--  * To maintain forward-compatibility in v1, implementations using this logical
--  * type must also set scale and precision on the annotated SchemaElement.
--  *
--  * Allowed for physical types: INT32, INT64, FIXED, and BINARY
--  */
-- struct DecimalType {
      [Char] -> Type -> Element
def [Char]
"DecimalType" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc ([Char]
"Decimal logical type annotation. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"To maintain forward-compatibility in v1, implementations using this logical " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"type must also set scale and precision on the annotated SchemaElement. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"Allowed for physical types: INT32, INT64, FIXED, and BINARY") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   1: required i32 scale
          [Char]
"scale"[Char] -> Type -> FieldType
>: Type
int32,
--   2: required i32 precision
          [Char]
"precision"[Char] -> Type -> FieldType
>: Type
int32],
-- }

-- /** Time units for logical types */
-- struct MilliSeconds {}
-- struct MicroSeconds {}
-- struct NanoSeconds {}
-- union TimeUnit {
      [Char] -> Type -> Element
def [Char]
"TimeUnit" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [[Char]] -> Type
enum [
--   1: MilliSeconds MILLIS
          [Char]
"millis",
--   2: MicroSeconds MICROS
          [Char]
"micros",
--   3: NanoSeconds NANOS
          [Char]
"nanos"],
-- }

-- /**
--  * Timestamp logical type annotation
--  *
--  * Allowed for physical types: INT64
--  */
-- struct TimestampType {
      [Char] -> Type -> Element
def [Char]
"TimestampType" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc ([Char]
"Timestamp logical type annotation. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"Allowed for physical types: INT64") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   1: required bool isAdjustedToUTC
          [Char]
"isAdjustedToUtc"[Char] -> Type -> FieldType
>: Type
boolean,
--   2: required TimeUnit unit
          [Char]
"unit"[Char] -> Type -> FieldType
>: [Char] -> Type
parquet [Char]
"TimeUnit"],
-- }

-- /**
--  * Time logical type annotation
--  *
--  * Allowed for physical types: INT32 (millis), INT64 (micros, nanos)
--  */
-- struct TimeType {
      [Char] -> Type -> Element
def [Char]
"TimeType" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc ([Char]
"Time logical type annotation. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"Allowed for physical types: INT32 (millis), INT64 (micros, nanos)") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   1: required bool isAdjustedToUTC
          [Char]
"isAdjustedToUtc"[Char] -> Type -> FieldType
>: Type
boolean,
--   2: required TimeUnit unit
          [Char]
"unit"[Char] -> Type -> FieldType
>: [Char] -> Type
parquet [Char]
"TimeUnit"],
-- }

-- /**
--  * Integer logical type annotation
--  *
--  * bitWidth must be 8, 16, 32, or 64.
--  *
--  * Allowed for physical types: INT32, INT64
--  */
-- struct IntType {
      [Char] -> Type -> Element
def [Char]
"IntType" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc ([Char]
"Integer logical type annotation. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
            [Char]
"bitWidth must be 8, 16, 32, or 64. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
            [Char]
"Allowed for physical types: INT32, INT64") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   1: required i8 bitWidth
          [Char]
"bitWidth"[Char] -> Type -> FieldType
>: Type
uint8,
--   2: required bool isSigned
          [Char]
"isSigned"[Char] -> Type -> FieldType
>: Type
boolean],
-- }

-- /**
--  * Embedded JSON logical type annotation
--  *
--  * Allowed for physical types: BINARY
--  */
-- struct JsonType {
-- }
--
-- /**
--  * Embedded BSON logical type annotation
--  *
--  * Allowed for physical types: BINARY
--  */
-- struct BsonType {
-- }
--
-- /**
--  * LogicalType annotations to replace ConvertedType.
--  *
--  * To maintain compatibility, implementations using LogicalType for a
--  * SchemaElement aust also set the corresponding ConvertedType (if any)
--  * from the following table.
--  */
-- union LogicalType {
      [Char] -> Type -> Element
def [Char]
"LogicalType" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc ([Char]
"LogicalType annotations to replace ConvertedType. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"To maintain compatibility, implementations using LogicalType for a " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"SchemaElement aust also set the corresponding ConvertedType (if any) " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"from the following table.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
union [
--   1:  StringType STRING       // use ConvertedType UTF8
          [Char]
"string"[Char] -> Type -> FieldType
>: [Char] -> Type -> Type
doc [Char]
"use ConvertedType UTF8" Type
unit,
--   2:  MapType MAP             // use ConvertedType MAP
          [Char]
"map"[Char] -> Type -> FieldType
>: [Char] -> Type -> Type
doc [Char]
"use ConvertedType MAP" Type
unit,
--   3:  ListType LIST           // use ConvertedType LIST
          [Char]
"list"[Char] -> Type -> FieldType
>: [Char] -> Type -> Type
doc [Char]
"use ConvertedType LIST" Type
unit,
--   4:  EnumType ENUM           // use ConvertedType ENUM
          [Char]
"enum"[Char] -> Type -> FieldType
>: [Char] -> Type -> Type
doc [Char]
"use ConvertedType ENUM" Type
unit,
--   5:  DecimalType DECIMAL     // use ConvertedType DECIMAL + SchemaElement.{scale, precision}
          [Char]
"decimal"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"use ConvertedType DECIMAL + SchemaElement.{scale, precision}" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"DecimalType",
--   6:  DateType DATE           // use ConvertedType DATE
          [Char]
"date"[Char] -> Type -> FieldType
>: [Char] -> Type -> Type
doc [Char]
"use ConvertedType DATE" Type
unit,
--
--   // use ConvertedType TIME_MICROS for TIME(isAdjustedToUTC = *, unit = MICROS)
--   // use ConvertedType TIME_MILLIS for TIME(isAdjustedToUTC = *, unit = MILLIS)
--   7:  TimeType TIME
          [Char]
"time"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"use ConvertedType TIME_MICROS for TIME(isAdjustedToUTC = *, unit = MICROS). " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"use ConvertedType TIME_MILLIS for TIME(isAdjustedToUTC = *, unit = MILLIS)") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"TimeType",
--
--   // use ConvertedType TIMESTAMP_MICROS for TIMESTAMP(isAdjustedToUTC = *, unit = MICROS)
--   // use ConvertedType TIMESTAMP_MILLIS for TIMESTAMP(isAdjustedToUTC = *, unit = MILLIS)
--   8:  TimestampType TIMESTAMP
          [Char]
"timestamp"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"use ConvertedType TIMESTAMP_MICROS for TIMESTAMP(isAdjustedToUTC = *, unit = MICROS). " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"use ConvertedType TIMESTAMP_MILLIS for TIMESTAMP(isAdjustedToUTC = *, unit = MILLIS)") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"TimestampType",
--
--   // 9: reserved for INTERVAL
--   10: IntType INTEGER         // use ConvertedType INT_* or UINT_*
          [Char]
"integer"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"use ConvertedType INT_* or UINT_*" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"IntType",
--   11: NullType UNKNOWN        // no compatible ConvertedType
          [Char]
"unknown"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"no compatible ConvertedType" Type
unit,
--   12: JsonType JSON           // use ConvertedType JSON
          [Char]
"json"[Char] -> Type -> FieldType
>: [Char] -> Type -> Type
doc [Char]
"use ConvertedType JSON" Type
unit,
--   13: BsonType BSON           // use ConvertedType BSON
          [Char]
"bson"[Char] -> Type -> FieldType
>: [Char] -> Type -> Type
doc [Char]
"use ConvertedType BSON" Type
unit,
--   14: UUIDType UUID           // no compatible ConvertedType
          [Char]
"uuid"[Char] -> Type -> FieldType
>: [Char] -> Type -> Type
doc [Char]
"no compatible ConvertedType" Type
unit],
-- }

-- /**
--  * Represents a element inside a schema definition.
--  *  - if it is a group (inner node) then type is undefined and num_children is defined
--  *  - if it is a primitive type (leaf) then type is defined and num_children is undefined
--  * the nodes are listed in depth first traversal order.
--  */
-- struct SchemaElement {
      [Char] -> Type -> Element
def [Char]
"SchemaElement" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc ([Char]
"Represents a element inside a schema definition.\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"- if it is a group (inner node) then type is undefined and num_children is defined\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"- if it is a primitive type (leaf) then type is defined and num_children is undefined\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"the nodes are listed in depth first traversal order.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   /** Data type for this field. Not set if the current element is a non-leaf node */
--   1: optional Type type;
          [Char]
"type"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Data type for this field. Not set if the current element is a non-leaf node" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"Type",
--
--   /** If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the vales.
--    * Otherwise, if specified, this is the maximum bit length to store any of the values.
--    * (e.g. a low cardinality INT col could have this set to 3).  Note that this is
--    * in the schema, and therefore fixed for the entire file.
--    */
--   2: optional i32 type_length;
          [Char]
"typeLength"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the values. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"Otherwise, if specified, this is the maximum bit length to store any of the values. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"(e.g. a low cardinality INT col could have this set to 3).  Note that this is " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"in the schema, and therefore fixed for the entire file.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
int32,
--
--   /** repetition of the field. The root of the schema does not have a repetition_type.
--    * All other nodes must have one */
--   3: optional FieldRepetitionType repetition_type;
          [Char]
"repetitionType"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"repetition of the field. The root of the schema does not have a repetition_type. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"All other nodes must have one") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"FieldRepetitionType",
--
--   /** Name of the field in the schema */
--   4: required string name;
          [Char]
"name"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Name of the field in the schema"
            Type
string,
--
--   /** Nested fields.  Since thrift does not support nested fields,
--    * the nesting is flattened to a single list by a depth-first traversal.
--    * The children count is used to construct the nested relationship.
--    * This field is not set when the element is a primitive type
--    */
--   5: optional i32 num_children;
          [Char]
"numChildren"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Nested fields.  Since thrift does not support nested fields, " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"the nesting is flattened to a single list by a depth-first traversal. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"The children count is used to construct the nested relationship. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"This field is not set when the element is a primitive type") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
int32,
--
--   /**
--    * DEPRECATED: When the schema is the result of a conversion from another model.
--    * Used to record the original type to help with cross conversion.
--    *
--    * This is superseded by logicalType.
--    */
--   6: optional ConvertedType converted_type;
--
--   /**
--    * DEPRECATED: Used when this column contains decimal data.
--    * See the DECIMAL converted type for more details.
--    *
--    * This is superseded by using the DecimalType annotation in logicalType.
--    */
--   7: optional i32 scale
--   8: optional i32 precision
--
--   /** When the original schema supports field ids, this will save the
--    * original field id in the parquet schema
--    */
--   9: optional i32 field_id;
          [Char]
"fieldId"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"When the original schema supports field ids, this will save the " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"original field id in the parquet schema") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
int32,
--
--   /**
--    * The logical type of this SchemaElement
--    *
--    * LogicalType replaces ConvertedType, but ConvertedType is still required
--    * for some logical types to ensure forward-compatibility in format v1.
--    */
--   10: optional LogicalType logicalType
          [Char]
"logicalType"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"The logical type of this SchemaElement. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"LogicalType replaces ConvertedType, but ConvertedType is still required " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"for some logical types to ensure forward-compatibility in format v1.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"LogicalType"],
-- }

-- /**
--  * Encodings supported by Parquet.  Not all encodings are valid for all types.  These
--  * enums are also used to specify the encoding of definition and repetition levels.
--  * See the accompanying doc for the details of the more complicated encodings.
--  */
-- enum Encoding {
      [Char] -> Type -> Element
def [Char]
"Encoding" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc ([Char]
"Encodings supported by Parquet.  Not all encodings are valid for all types.  These " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"enums are also used to specify the encoding of definition and repetition levels. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"See the accompanying doc for the details of the more complicated encodings.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
union [
--   /** Default encoding.
--    * BOOLEAN - 1 bit per value. 0 is false; 1 is true.
--    * INT32 - 4 bytes per value.  Stored as little-endian.
--    * INT64 - 8 bytes per value.  Stored as little-endian.
--    * FLOAT - 4 bytes per value.  IEEE. Stored as little-endian.
--    * DOUBLE - 8 bytes per value.  IEEE. Stored as little-endian.
--    * BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
--    * FIXED_LEN_BYTE_ARRAY - Just the bytes.
--    */
--   PLAIN = 0;
          [Char]
"plain"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Default encoding.\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"BOOLEAN - 1 bit per value. 0 is false; 1 is true.\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"INT32 - 4 bytes per value.  Stored as little-endian.\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"INT64 - 8 bytes per value.  Stored as little-endian.\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"FLOAT - 4 bytes per value.  IEEE. Stored as little-endian.\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"DOUBLE - 8 bytes per value.  IEEE. Stored as little-endian.\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"FIXED_LEN_BYTE_ARRAY - Just the bytes.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type
unit,
--
--   /** Group VarInt encoding for INT32/INT64.
--    * This encoding is deprecated. It was never used
--    */
--   //  GROUP_VAR_INT = 1;
--
--   /**
--    * Deprecated: Dictionary encoding. The values in the dictionary are encoded in the
--    * plain type.
--    * in a data page use RLE_DICTIONARY instead.
--    * in a Dictionary page use PLAIN instead
--    */
--   PLAIN_DICTIONARY = 2;
--
--   /** Group packed run length encoding. Usable for definition/repetition levels
--    * encoding and Booleans (on one bit: 0 is false; 1 is true.)
--    */
--   RLE = 3;
          [Char]
"rle"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Group packed run length encoding. Usable for definition/repetition levels " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"encoding and Booleans (on one bit: 0 is false; 1 is true.)") Type
unit,
--
--   /** Bit packed encoding.  This can only be used if the data has a known max
--    * width.  Usable for definition/repetition levels encoding.
--    */
--   BIT_PACKED = 4;
          [Char]
"bitPacked"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Bit packed encoding.  This can only be used if the data has a known max " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"width.  Usable for definition/repetition levels encoding.") Type
unit,
--
--   /** Delta encoding for integers. This can be used for int columns and works best
--    * on sorted data
--    */
--   DELTA_BINARY_PACKED = 5;
          [Char]
"deltaBinaryPacked"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Delta encoding for integers. This can be used for int columns and works best " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"on sorted data") Type
unit,
--
--   /** Encoding for byte arrays to separate the length values and the data. The lengths
--    * are encoded using DELTA_BINARY_PACKED
--    */
--   DELTA_LENGTH_BYTE_ARRAY = 6;
          [Char]
"deltaLengthByteArray"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Encoding for byte arrays to separate the length values and the data. The lengths " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"are encoded using DELTA_BINARY_PACKED") Type
unit,
--
--   /** Incremental-encoded byte array. Prefix lengths are encoded using DELTA_BINARY_PACKED.
--    * Suffixes are stored as delta length byte arrays.
--    */
--   DELTA_BYTE_ARRAY = 7;
          [Char]
"deltaByteArray"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Incremental-encoded byte array. Prefix lengths are encoded using DELTA_BINARY_PACKED. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"Suffixes are stored as delta length byte arrays.") Type
unit,
--
--   /** Dictionary encoding: the ids are encoded using the RLE encoding
--    */
--   RLE_DICTIONARY = 8;
          [Char]
"rleDictionary"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Dictionary encoding: the ids are encoded using the RLE encoding") Type
unit,
--
--   /** Encoding for floating-point data.
--       K byte-streams are created where K is the size in bytes of the data type.
--       The individual bytes of an FP value are scattered to the corresponding stream and
--       the streams are concatenated.
--       This itself does not reduce the size of the data but can lead to better compression
--       afterwards.
--    */
--   BYTE_STREAM_SPLIT = 9;
          [Char]
"byteStreamSplit"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Encoding for floating-point data. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"K byte-streams are created where K is the size in bytes of the data type. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"The individual bytes of an FP value are scattered to the corresponding stream and " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"the streams are concatenated. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"This itself does not reduce the size of the data but can lead to better compression " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"afterwards.") Type
unit],
-- }

-- /**
--  * Supported compression algorithms.
--  *
--  * Codecs added in format version X.Y can be read by readers based on X.Y and later.
--  * Codec support may vary between readers based on the format version and
--  * libraries available at runtime.
--  *
--  * See Compression.md for a detailed specification of these algorithms.
--  */
-- enum CompressionCodec {
      [Char] -> Type -> Element
def [Char]
"CompressionCodec" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc ([Char]
"Supported compression algorithms. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"Codecs added in format version X.Y can be read by readers based on X.Y and later. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"Codec support may vary between readers based on the format version and " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"libraries available at runtime. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"See Compression.md for a detailed specification of these algorithms.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
union [
--   UNCOMPRESSED = 0;
          [Char]
"uncompressed"[Char] -> Type -> FieldType
>: Type
unit,
--   SNAPPY = 1;
          [Char]
"snappy"[Char] -> Type -> FieldType
>: Type
unit,
--   GZIP = 2;
          [Char]
"gzip"[Char] -> Type -> FieldType
>: Type
unit,
--   LZO = 3;
          [Char]
"lzo"[Char] -> Type -> FieldType
>: Type
unit,
--   BROTLI = 4;  // Added in 2.4
          [Char]
"brotli"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Added in 2.4" Type
unit,
--   LZ4 = 5;     // DEPRECATED (Added in 2.4)
--   ZSTD = 6;    // Added in 2.4
          [Char]
"zstd"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Added in 2.4" Type
unit,
--   LZ4_RAW = 7; // Added in 2.9
          [Char]
"lz4Raw"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Added in 2.9" Type
unit],
-- }

-- enum PageType {
      [Char] -> Type -> Element
def [Char]
"PageType" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [[Char]] -> Type
enum [
--   DATA_PAGE = 0;
          [Char]
"dataPage",
--   INDEX_PAGE = 1;
          [Char]
"indexPage",
--   DICTIONARY_PAGE = 2;
          [Char]
"dictionaryPage",
--   DATA_PAGE_V2 = 3;
          [Char]
"dataPageV2"],
-- }

-- /**
--  * Enum to annotate whether lists of min/max elements inside ColumnIndex
--  * are ordered and if so, in which direction.
--  */
-- enum BoundaryOrder {
      [Char] -> Type -> Element
def [Char]
"BoundaryOrder" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc ([Char]
"Enum to annotate whether lists of min/max elements inside ColumnIndex " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"are ordered and if so, in which direction.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [[Char]] -> Type
enum [
--   UNORDERED = 0;
          [Char]
"unordered",
--   ASCENDING = 1;
          [Char]
"ascending",
--   DESCENDING = 2;
          [Char]
"descending"],
-- }

-- /** Data page header */
-- struct DataPageHeader {
      [Char] -> Type -> Element
def [Char]
"DataPageHeader" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc [Char]
"Data page header" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   /** Number of values, including NULLs, in this data page. **/
--   1: required i32 num_values
          [Char]
"numValues"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Number of values, including NULLs, in this data page."
            Type
int32,
--
--   /** Encoding used for this data page **/
--   2: required Encoding encoding
          [Char]
"encoding"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Encoding used for this data page" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"Encoding",
--
--   /** Encoding used for definition levels **/
--   3: required Encoding definition_level_encoding;
          [Char]
"definitionLevelEncoding"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Encoding used for definition levels" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"Encoding",
--
--   /** Encoding used for repetition levels **/
--   4: required Encoding repetition_level_encoding;
          [Char]
"repetitionLevelEncoding"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Encoding used for repetition levels" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"Encoding",
--
--   /** Optional statistics for the data in this page**/
--   5: optional Statistics statistics;
          [Char]
"statistics"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Optional statistics for the data in this page" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"Statistics"],
-- }
--
-- struct IndexPageHeader {
      [Char] -> Type -> Element
def [Char]
"IndexPageHeader" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$ [FieldType] -> Type
record [],
--   // TODO
-- }

-- /**
--  * The dictionary page must be placed at the first position of the column chunk
--  * if it is partly or completely dictionary encoded. At most one dictionary page
--  * can be placed in a column chunk.
--  **/
-- struct DictionaryPageHeader {
      [Char] -> Type -> Element
def [Char]
"DictionaryPageHeader" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc ([Char]
"The dictionary page must be placed at the first position of the column chunk " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"if it is partly or completely dictionary encoded. At most one dictionary page " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"can be placed in a column chunk.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   /** Number of values in the dictionary **/
--   1: required i32 num_values;
          [Char]
"numValues"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Number of values in the dictionary" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type
int32,
--
--   /** Encoding using this dictionary page **/
--   2: required Encoding encoding
          [Char]
"encoding"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Encoding using this dictionary page" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"Encoding",
--
--   /** If true, the entries in the dictionary are sorted in ascending order **/
--   3: optional bool is_sorted;
          [Char]
"isSorted"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"If true, the entries in the dictionary are sorted in ascending order" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
boolean],
-- }

-- /**
--  * New page format allowing reading levels without decompressing the data
--  * Repetition and definition levels are uncompressed
--  * The remaining section containing the data is compressed if is_compressed is true
--  **/
-- struct DataPageHeaderV2 {
      [Char] -> Type -> Element
def [Char]
"DataPageHeaderV2" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc ([Char]
"New page format allowing reading levels without decompressing the data " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"Repetition and definition levels are uncompressed " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"The remaining section containing the data is compressed if is_compressed is true") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   /** Number of values, including NULLs, in this data page. **/
--   1: required i32 num_values
          [Char]
"numValues"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Number of values, including NULLs, in this data page." (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type
int32,
--   /** Number of NULL values, in this data page.
--       Number of non-null = num_values - num_nulls which is also the number of values in the data section **/
--   2: required i32 num_nulls
          [Char]
"numNulls"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Number of NULL values, in this data page. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"Number of non-null = num_values - num_nulls which is also the number of values in the data section") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type
int32,
--   /** Number of rows in this data page. which means pages change on record boundaries (r = 0) **/
--   3: required i32 num_rows
          [Char]
"numRows"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Number of rows in this data page. which means pages change on record boundaries (r = 0)" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type
int32,
--   /** Encoding used for data in this page **/
--   4: required Encoding encoding
          [Char]
"encoding"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Encoding used for data in this page" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"Encoding",
--
--   // repetition levels and definition levels are always using RLE (without size in it)
--
--   /** length of the definition levels */
--   5: required i32 definition_levels_byte_length;
          [Char]
"definitionLevelsByteLength"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"length of the definition levels" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type
int32,
--   /** length of the repetition levels */
--   6: required i32 repetition_levels_byte_length;
          [Char]
"repetitionLevelsByteLength"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"length of the repetition levels" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type
int32,
--
--   /**  whether the values are compressed.
--   Which means the section of the page between
--   definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included)
--   is compressed with the compression_codec.
--   If missing it is considered compressed */
--   7: optional bool is_compressed = 1;
          [Char]
"isCompressed"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"whether the values are compressed. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"Which means the section of the page between " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"is compressed with the compression_codec. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"If missing it is considered compressed") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
boolean,
--
--   /** optional statistics for the data in this page **/
--   8: optional Statistics statistics;
          [Char]
"statistics"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"optional statistics for the data in this page" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"Statistics"],
-- }

-- /** Block-based algorithm type annotation. **/
-- struct SplitBlockAlgorithm {}
-- /** The algorithm used in Bloom filter. **/
-- union BloomFilterAlgorithm {
      [Char] -> Type -> Element
def [Char]
"BloomFilterAlgorithm" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc [Char]
"The algorithm used in Bloom filter." (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
union [
--   /** Block-based Bloom filter. **/
--   1: SplitBlockAlgorithm BLOCK;
          [Char]
"block"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Block-based Bloom filter." Type
unit],
-- }

-- /** Hash strategy type annotation. xxHash is an extremely fast non-cryptographic hash
--  * algorithm. It uses 64 bits version of xxHash.
--  **/
-- struct XxHash {}
--
-- /**
--  * The hash function used in Bloom filter. This function takes the hash of a column value
--  * using plain encoding.
--  **/
--  union BloomFilterHash {
      [Char] -> Type -> Element
def [Char]
"BloomFilterHash" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc ([Char]
"The hash function used in Bloom filter. This function takes the hash of a column value " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"using plain encoding.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
union [
--   /** xxHash Strategy. **/
--   1: XxHash XXHASH;
          [Char]
"xxhash"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"xxHash Strategy." Type
unit],
-- }

-- /**
--  * The compression used in the Bloom filter.
--  **/
-- struct Uncompressed {}
-- union BloomFilterCompression {
      [Char] -> Type -> Element
def [Char]
"BloomFilterCompression" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc [Char]
"The compression used in the Bloom filter." (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [[Char]] -> Type
enum [
--   1: Uncompressed UNCOMPRESSED;
          [Char]
"uncompressed"],
-- }

-- /**
--   * Bloom filter header is stored at beginning of Bloom filter data of each column
--   * and followed by its bitset.
--   **/
-- struct BloomFilterHeader {
      [Char] -> Type -> Element
def [Char]
"BloomFilterHeader" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc ([Char]
"Bloom filter header is stored at beginning of Bloom filter data of each column " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"and followed by its bitset.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   /** The size of bitset in bytes **/
--   1: required i32 numBytes;
          [Char]
"numBytes"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"The size of bitset in bytes" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type
int32,
--   /** The algorithm for setting bits. **/
--   2: required BloomFilterAlgorithm algorithm;
          [Char]
"algorithm"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"The algorithm for setting bits." (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"BloomFilterAlgorithm",
--   /** The hash function used for Bloom filter. **/
--   3: required BloomFilterHash hash;
          [Char]
"hash"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"The hash function used for Bloom filter." (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"BloomFilterHash",
--   /** The compression used in the Bloom filter **/
--   4: required BloomFilterCompression compression;
          [Char]
"compression"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"The compression used in the Bloom filter" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"BloomFilterCompression"],
-- }

-- struct PageHeader {
      [Char] -> Type -> Element
def [Char]
"PageHeader" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   /** the type of the page: indicates which of the *_header fields is set **/
--   1: required PageType type
          [Char]
"type"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"the type of the page: indicates which of the *_header fields is set" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"PageType",
--
--   /** Uncompressed page size in bytes (not including this header) **/
--   2: required i32 uncompressed_page_size
          [Char]
"uncompressedPageSize"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Uncompressed page size in bytes (not including this header)" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
           Type
int32,
--
--   /** Compressed (and potentially encrypted) page size in bytes, not including this header **/
--   3: required i32 compressed_page_size
          [Char]
"compressedPageSize"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Compressed (and potentially encrypted) page size in bytes, not including this header" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type
int32,
--
--   /** The 32bit CRC for the page, to be be calculated as follows:
--    * - Using the standard CRC32 algorithm
--    * - On the data only, i.e. this header should not be included. 'Data'
--    *   hereby refers to the concatenation of the repetition levels, the
--    *   definition levels and the column value, in this exact order.
--    * - On the encoded versions of the repetition levels, definition levels and
--    *   column values
--    * - On the compressed versions of the repetition levels, definition levels
--    *   and column values where possible;
--    *   - For v1 data pages, the repetition levels, definition levels and column
--    *     values are always compressed together. If a compression scheme is
--    *     specified, the CRC shall be calculated on the compressed version of
--    *     this concatenation. If no compression scheme is specified, the CRC
--    *     shall be calculated on the uncompressed version of this concatenation.
--    *   - For v2 data pages, the repetition levels and definition levels are
--    *     handled separately from the data and are never compressed (only
--    *     encoded). If a compression scheme is specified, the CRC shall be
--    *     calculated on the concatenation of the uncompressed repetition levels,
--    *     uncompressed definition levels and the compressed column values.
--    *     If no compression scheme is specified, the CRC shall be calculated on
--    *     the uncompressed concatenation.
--    * - In encrypted columns, CRC is calculated after page encryption; the
--    *   encryption itself is performed after page compression (if compressed)
--    * If enabled, this allows for disabling checksumming in HDFS if only a few
--    * pages need to be read.
--    **/
--   4: optional i32 crc
          [Char]
"crc"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"The 32bit CRC for the page, to be be calculated as follows:\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"- Using the standard CRC32 algorithm\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"- On the data only, i.e. this header should not be included. 'Data'\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  hereby refers to the concatenation of the repetition levels, the\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  definition levels and the column value, in this exact order.\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"- On the encoded versions of the repetition levels, definition levels and\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  column values\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"- On the compressed versions of the repetition levels, definition levels\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  and column values where possible;\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  - For v1 data pages, the repetition levels, definition levels and column\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"    values are always compressed together. If a compression scheme is\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"    specified, the CRC shall be calculated on the compressed version of\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"    this concatenation. If no compression scheme is specified, the CRC\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"    shall be calculated on the uncompressed version of this concatenation.\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  - For v2 data pages, the repetition levels and definition levels are\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"    handled separately from the data and are never compressed (only\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"    encoded). If a compression scheme is specified, the CRC shall be\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"    calculated on the concatenation of the uncompressed repetition levels,\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"    uncompressed definition levels and the compressed column values.\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"    If no compression scheme is specified, the CRC shall be calculated on\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"    the uncompressed concatenation.\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"- In encrypted columns, CRC is calculated after page encryption; the\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  encryption itself is performed after page compression (if compressed)\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"If enabled, this allows for disabling checksumming in HDFS if only a few " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"pages need to be read. ") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
int32,
--
--   // Headers for page specific data.  One only will be set.
--   5: optional DataPageHeader data_page_header;
          [Char]
"dataPageHeader"[Char] -> Type -> FieldType
>:
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"DataPageHeader",
--   6: optional IndexPageHeader index_page_header;
          [Char]
"indexPageHeader"[Char] -> Type -> FieldType
>:
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"IndexPageHeader",
--   7: optional DictionaryPageHeader dictionary_page_header;
          [Char]
"dictionaryPageHeader"[Char] -> Type -> FieldType
>:
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"DictionaryPageHeader",
--   8: optional DataPageHeaderV2 data_page_header_v2;
          [Char]
"dataPageHeaderV2"[Char] -> Type -> FieldType
>:
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"DataPageHeaderV2"],
-- }

-- /**
--  * Wrapper struct to store key values
--  */
--  struct KeyValue {
      [Char] -> Type -> Element
def [Char]
"KeyValue" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc [Char]
"Wrapper struct to store key values" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   1: required string key
          [Char]
"key"[Char] -> Type -> FieldType
>: Type
string,
--   2: optional string value
          [Char]
"value"[Char] -> Type -> FieldType
>: Type -> Type
optional Type
string],
-- }

-- /**
--  * Wrapper struct to specify sort order
--  */
-- struct SortingColumn {
      [Char] -> Type -> Element
def [Char]
"SortingColumn" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc [Char]
"Wrapper struct to specify sort order" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   /** The column index (in this row group) **/
--   1: required i32 column_idx
          [Char]
"columnIdx"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"The column index (in this row group)"
            Type
int32,
--
--   /** If true, indicates this column is sorted in descending order. **/
--   2: required bool descending
          [Char]
"descending"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"If true, indicates this column is sorted in descending order."
            Type
boolean,
--
--   /** If true, nulls will come before non-null values, otherwise,
--    * nulls go at the end. */
--   3: required bool nulls_first
          [Char]
"nullsFirst"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"If true, nulls will come before non-null values, otherwise, " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"nulls go at the end.")
            Type
boolean],
-- }

-- /**
--  * statistics of a given page type and encoding
--  */
-- struct PageEncodingStats {
      [Char] -> Type -> Element
def [Char]
"PageEncodingStats" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc [Char]
"statistics of a given page type and encoding" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--
--   /** the page type (data/dic/...) **/
--   1: required PageType page_type;
          [Char]
"pageType"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"the page type (data/dic/...)" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"PageType",
--
--   /** encoding of the page **/
--   2: required Encoding encoding;
          [Char]
"encoding"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"encoding of the page" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"Encoding",
--
--   /** number of pages of this type with this encoding **/
--   3: required i32 count;
          [Char]
"count"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"number of pages of this type with this encoding"
            Type
int32],
--
-- }

-- /**
--  * Description for column metadata
--  */
-- struct ColumnMetaData {
      [Char] -> Type -> Element
def [Char]
"ColumnMetaData" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc [Char]
"Description for column metadata" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   /** Type of this column **/
--   1: required Type type
          [Char]
"type"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Type of this column" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"Type",
--
--   /** Set of all encodings used for this column. The purpose is to validate
--    * whether we can decode those pages. **/
--   2: required list<Encoding> encodings
          [Char]
"encodings"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Set of all encodings used for this column. The purpose is to validate " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"whether we can decode those pages.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
list (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"Encoding",
--
--   /** Path in schema **/
--   3: required list<string> path_in_schema
          [Char]
"pathInSchema"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Path in schema" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
list Type
string,
--
--   /** Compression codec **/
--   4: required CompressionCodec codec
          [Char]
"codec"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Compression codec" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"CompressionCodec",
--
--   /** Number of values in this column **/
--   5: required i64 num_values
          [Char]
"numValues"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Number of values in this column"
            Type
int64,
--
--   /** total byte size of all uncompressed pages in this column chunk (including the headers) **/
--   6: required i64 total_uncompressed_size
          [Char]
"totalUncompressedSize"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"total byte size of all uncompressed pages in this column chunk (including the headers)"
            Type
int64,
--
--   /** total byte size of all compressed, and potentially encrypted, pages
--    *  in this column chunk (including the headers) **/
--   7: required i64 total_compressed_size
          [Char]
"totalCompressedSize"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"total byte size of all compressed, and potentially encrypted, pages " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"in this column chunk (including the headers)")
            Type
int64,
--
--   /** Optional key/value metadata **/
--   8: optional list<KeyValue> key_value_metadata
          [Char]
"keyValueMetadata"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Optional key/value metadata" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ Type -> Type
list (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"KeyValue",
--
--   /** Byte offset from beginning of file to first data page **/
--   9: required i64 data_page_offset
          [Char]
"dataPageOffset"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Byte offset from beginning of file to first data page"
            Type
int64,
--
--   /** Byte offset from beginning of file to root index page **/
--   10: optional i64 index_page_offset
          [Char]
"indexPageOffset"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Byte offset from beginning of file to root index page" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
int64,
--
--   /** Byte offset from the beginning of file to first (only) dictionary page **/
--   11: optional i64 dictionary_page_offset
          [Char]
"dictionaryPageOffset"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Byte offset from the beginning of file to first (only) dictionary page" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
int64,
--
--   /** optional statistics for this column chunk */
--   12: optional Statistics statistics;
          [Char]
"statistics"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"optional statistics for this column chunk" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"Statistics",
--
--   /** Set of all encodings used for pages in this column chunk.
--    * This information can be used to determine if all data pages are
--    * dictionary encoded for example **/
--   13: optional list<PageEncodingStats> encoding_stats;
          [Char]
"encodingStats"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Set of all encodings used for pages in this column chunk. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"This information can be used to determine if all data pages are " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"dictionary encoded for example") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ Type -> Type
list (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"PageEncodingStats",
--
--   /** Byte offset from beginning of file to Bloom filter data. **/
--   14: optional i64 bloom_filter_offset;
          [Char]
"bloomFilterOffset"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Byte offset from beginning of file to Bloom filter data." (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
int64],
-- }
--
-- struct EncryptionWithFooterKey {
      [Char] -> Type -> Element
def [Char]
"EncryptionWithFooterKey" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$ [FieldType] -> Type
record [],
-- }
--
-- struct EncryptionWithColumnKey {
      [Char] -> Type -> Element
def [Char]
"EncryptionWithColumnKey" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   /** Column path in schema **/
--   1: required list<string> path_in_schema
          [Char]
"pathInSchema"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Column path in schema" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
list Type
string,
--
--   /** Retrieval metadata of column encryption key **/
--   2: optional binary key_metadata
          [Char]
"keyMetadata"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Retrieval metadata of column encryption key" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
binary],
-- }
--
-- union ColumnCryptoMetaData {
      [Char] -> Type -> Element
def [Char]
"ColumnCryptoMetaData" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
union [
--   1: EncryptionWithFooterKey ENCRYPTION_WITH_FOOTER_KEY
          [Char]
"encryptionWithFooterKey"[Char] -> Type -> FieldType
>: [Char] -> Type
parquet [Char]
"EncryptionWithFooterKey",
--   2: EncryptionWithColumnKey ENCRYPTION_WITH_COLUMN_KEY
          [Char]
"encryptionWithColumnKey"[Char] -> Type -> FieldType
>: [Char] -> Type
parquet [Char]
"EncryptionWithColumnKey"],
-- }

-- struct ColumnChunk {
      [Char] -> Type -> Element
def [Char]
"ColumnChunk" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   /** File where column data is stored.  If not set, assumed to be same file as
--     * metadata.  This path is relative to the current file.
--     **/
--   1: optional string file_path
          [Char]
"filePath"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"File where column data is stored.  If not set, assumed to be same file as " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"metadata.  This path is relative to the current file.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
string,
--
--   /** Byte offset in file_path to the ColumnMetaData **/
--   2: required i64 file_offset
          [Char]
"fileOffset"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Byte offset in file_path to the ColumnMetaData"
            Type
int64,
--
--   /** Column metadata for this chunk. This is the same content as what is at
--    * file_path/file_offset.  Having it here has it replicated in the file
--    * metadata.
--    **/
--   3: optional ColumnMetaData meta_data
          [Char]
"metaData"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Column metadata for this chunk. This is the same content as what is at " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"file_path/file_offset.  Having it here has it replicated in the file " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"metadata.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"ColumnMetaData",
--
--   /** File offset of ColumnChunk's OffsetIndex **/
--   4: optional i64 offset_index_offset
          [Char]
"offsetIndexOffset"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"File offset of ColumnChunk's OffsetIndex" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
int64,
--
--   /** Size of ColumnChunk's OffsetIndex, in bytes **/
--   5: optional i32 offset_index_length
          [Char]
"offsetIndexLength"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Size of ColumnChunk's OffsetIndex, in bytes" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
int32,
--
--   /** File offset of ColumnChunk's ColumnIndex **/
--   6: optional i64 column_index_offset
          [Char]
"columnIndexOffset"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"File offset of ColumnChunk's ColumnIndex" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
int64,
--
--   /** Size of ColumnChunk's ColumnIndex, in bytes **/
--   7: optional i32 column_index_length
          [Char]
"columnIndexLength"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Size of ColumnChunk's ColumnIndex, in bytes" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
int32,
--
--   /** Crypto metadata of encrypted columns **/
--   8: optional ColumnCryptoMetaData crypto_metadata
          [Char]
"cryptoMetadata"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Crypto metadata of encrypted columns" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"ColumnCryptoMetaData",
--
--   /** Encrypted column metadata for this chunk **/
--   9: optional binary encrypted_column_metadata
          [Char]
"encryptedColumnMetadata"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Encrypted column metadata for this chunk" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
binary],
-- }

-- struct RowGroup {
      [Char] -> Type -> Element
def [Char]
"RowGroup" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   /** Kvdata for each column chunk in this row group.
--    * This list must have the same order as the SchemaElement list in FileMetaData.
--    **/
--   1: required list<ColumnChunk> columns
          [Char]
"columns"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Metadata for each column chunk in this row group. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"This list must have the same order as the SchemaElement list in FileMetaData.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
list (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"ColumnChunk",
--
--   /** Total byte size of all the uncompressed column data in this row group **/
--   2: required i64 total_byte_size
          [Char]
"totalByteSize"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Total byte size of all the uncompressed column data in this row group"
            Type
int64,
--
--   /** Number of rows in this row group **/
--   3: required i64 num_rows
          [Char]
"numRows"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Number of rows in this row group"
            Type
int64,
--
--   /** If set, specifies a sort ordering of the rows in this RowGroup.
--    * The sorting columns can be a subset of all the columns.
--    */
--   4: optional list<SortingColumn> sorting_columns
          [Char]
"sortingColumns"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"If set, specifies a sort ordering of the rows in this RowGroup. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"The sorting columns can be a subset of all the columns.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ Type -> Type
list (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"SortingColumn",
--
--   /** Byte offset from beginning of file to first page (data or dictionary)
--    * in this row group **/
--   5: optional i64 file_offset
          [Char]
"fileOffset"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Byte offset from beginning of file to first page (data or dictionary) " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"in this row group") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
int64,
--
--   /** Total byte size of all compressed (and potentially encrypted) column data
--    *  in this row group **/
--   6: optional i64 total_compressed_size
          [Char]
"totalCompressedSize"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Total byte size of all compressed (and potentially encrypted) column data " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"in this row group") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
int64,
--
--   /** Row group ordinal in the file **/
--   7: optional i16 ordinal
          [Char]
"ordinal"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Row group ordinal in the file" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
int16],
-- }
--
-- /** Empty struct to signal the order defined by the physical or logical type */
-- struct TypeDefinedOrder {}
--
-- /**
--  * Union to specify the order used for the min_value and max_value fields for a
--  * column. This union takes the role of an enhanced enum that allows rich
--  * elements (which will be needed for a collation-based ordering in the future).
--  *
--  * Possible values are:
--  * * TypeDefinedOrder - the column uses the order defined by its logical or
--  *                      physical type (if there is no logical type).
--  *
--  * If the reader does not support the value of this union, min and max stats
--  * for this column should be ignored.
--  */
-- union ColumnOrder {
      [Char] -> Type -> Element
def [Char]
"ColumnOrder" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc ([Char]
"Union to specify the order used for the min_value and max_value fields for a " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"column. This union takes the role of an enhanced enum that allows rich " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"elements (which will be needed for a collation-based ordering in the future). " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"Possible values are:\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"* TypeDefinedOrder - the column uses the order defined by its logical or " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"physical type (if there is no logical type).\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"If the reader does not support the value of this union, min and max stats " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"for this column should be ignored. ") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
union [
--
--   /**
--    * The sort orders for logical types are:
--    *   UTF8 - unsigned byte-wise comparison
--    *   INT8 - signed comparison
--    *   INT16 - signed comparison
--    *   INT32 - signed comparison
--    *   INT64 - signed comparison
--    *   UINT8 - unsigned comparison
--    *   UINT16 - unsigned comparison
--    *   UINT32 - unsigned comparison
--    *   UINT64 - unsigned comparison
--    *   DECIMAL - signed comparison of the represented value
--    *   DATE - signed comparison
--    *   TIME_MILLIS - signed comparison
--    *   TIME_MICROS - signed comparison
--    *   TIMESTAMP_MILLIS - signed comparison
--    *   TIMESTAMP_MICROS - signed comparison
--    *   INTERVAL - unsigned comparison
--    *   JSON - unsigned byte-wise comparison
--    *   BSON - unsigned byte-wise comparison
--    *   ENUM - unsigned byte-wise comparison
--    *   LIST - undefined
--    *   MAP - undefined
--    *
--    * In the absence of logical types, the sort order is determined by the physical type:
--    *   BOOLEAN - false, true
--    *   INT32 - signed comparison
--    *   INT64 - signed comparison
--    *   INT96 (only used for legacy timestamps) - undefined
--    *   FLOAT - signed comparison of the represented value (*)
--    *   DOUBLE - signed comparison of the represented value (*)
--    *   BYTE_ARRAY - unsigned byte-wise comparison
--    *   FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison
--    *
--    * (*) Because the sorting order is not specified properly for floating
--    *     point values (relations vs. total ordering) the following
--    *     compatibility rules should be applied when reading statistics:
--    *     - If the min is a NaN, it should be ignored.
--    *     - If the max is a NaN, it should be ignored.
--    *     - If the min is +0, the row group may contain -0 values as well.
--    *     - If the max is -0, the row group may contain +0 values as well.
--    *     - When looking for NaN values, min and max should be ignored.
--    */
--   1: TypeDefinedOrder TYPE_ORDER;
          [Char]
"typeOrder"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"The sort orders for logical types are:\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  UTF8 - unsigned byte-wise comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  INT8 - signed comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  INT16 - signed comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  INT32 - signed comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  INT64 - signed comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  UINT8 - unsigned comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  UINT16 - unsigned comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  UINT32 - unsigned comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  UINT64 - unsigned comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  DECIMAL - signed comparison of the represented value\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  DATE - signed comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  TIME_MILLIS - signed comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  TIME_MICROS - signed comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  TIMESTAMP_MILLIS - signed comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  TIMESTAMP_MICROS - signed comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  INTERVAL - unsigned comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  JSON - unsigned byte-wise comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  BSON - unsigned byte-wise comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  ENUM - unsigned byte-wise comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  LIST - undefined\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  MAP - undefined\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"In the absence of logical types, the sort order is determined by the physical type:\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  BOOLEAN - false, true\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  INT32 - signed comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  INT64 - signed comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  INT96 (only used for legacy timestamps) - undefined\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  FLOAT - signed comparison of the represented value (*)\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  DOUBLE - signed comparison of the represented value (*)\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  BYTE_ARRAY - unsigned byte-wise comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"  FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"(*) Because the sorting order is not specified properly for floating\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"    point values (relations vs. total ordering) the following\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"    compatibility rules should be applied when reading statistics:\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"    - If the min is a NaN, it should be ignored.\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"    - If the max is a NaN, it should be ignored.\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"    - If the min is +0, the row group may contain -0 values as well.\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"    - If the max is -0, the row group may contain +0 values as well.\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"    - When looking for NaN values, min and max should be ignored.") Type
unit],
-- }

-- struct PageLocation {
      [Char] -> Type -> Element
def [Char]
"PageLocation" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   /** Offset of the page in the file **/
--   1: required i64 offset
          [Char]
"offset"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Offset of the page in the file"
            Type
int64,
--
--   /**
--    * Size of the page, including header. Sum of compressed_page_size and header
--    * length
--    */
--   2: required i32 compressed_page_size
          [Char]
"compressedPageSize"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Size of the page, including header. Sum of compressed_page_size and header " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"length")
            Type
int32,
--
--   /**
--    * Index within the RowGroup of the first row of the page; this means pages
--    * change on record boundaries (r = 0).
--    */
--   3: required i64 first_row_index
          [Char]
"firstRowIndex"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Index within the RowGroup of the first row of the page; this means pages " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"change on record boundaries (r = 0).")
            Type
int64],
-- }
--
-- struct OffsetIndex {
      [Char] -> Type -> Element
def [Char]
"OffsetIndex" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   /**
--    * PageLocations, ordered by increasing PageLocation.offset. It is required
--    * that page_locations[i].first_row_index < page_locations[i+1].first_row_index.
--    */
--   1: required list<PageLocation> page_locations
          [Char]
"pageLocations"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"PageLocations, ordered by increasing PageLocation.offset. It is required " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"that page_locations[i].first_row_index < page_locations[i+1].first_row_index.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
list (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"PageLocation"],
-- }
--
-- /**
--  * Description for ColumnIndex.
--  * Each <array-field>[i] refers to the page at OffsetIndex.page_locations[i]
--  */
-- struct ColumnIndex {
      [Char] -> Type -> Element
def [Char]
"ColumnIndex" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc ([Char]
"Description for ColumnIndex. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
             [Char]
"Each <array-field>[i] refers to the page at OffsetIndex.page_locations[i]") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   /**
--    * A list of Boolean values to determine the validity of the corresponding
--    * min and max values. If true, a page contains only null values, and writers
--    * have to set the corresponding entries in min_values and max_values to
--    * byte[0], so that all lists have the same length. If false, the
--    * corresponding entries in min_values and max_values must be valid.
--    */
--   1: required list<bool> null_pages
          [Char]
"nullPages"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"A list of Boolean values to determine the validity of the corresponding " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"min and max values. If true, a page contains only null values, and writers " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"have to set the corresponding entries in min_values and max_values to " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"byte[0], so that all lists have the same length. If false, the " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"corresponding entries in min_values and max_values must be valid.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
list Type
boolean,
--
--   /**
--    * Two lists containing lower and upper bounds for the values of each page
--    * determined by the ColumnOrder of the column. These may be the actual
--    * minimum and maximum values found on a page, but can also be (more compact)
--    * values that do not exist on a page. For example, instead of storing ""Blart
--    * Versenwald III", a writer may set min_values[i]="B", max_values[i]="C".
--    * Such more compact values must still be valid values within the column's
--    * logical type. Readers must make sure that list entries are populated before
--    * using them by inspecting null_pages.
--    */
--   2: required list<binary> min_values
          [Char]
"minValues"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"minValues and maxValues are lists containing lower and upper bounds for the values of each page " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"determined by the ColumnOrder of the column. These may be the actual " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"minimum and maximum values found on a page, but can also be (more compact) " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"values that do not exist on a page. For example, instead of storing \"Blart " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"Versenwald III\", a writer may set min_values[i]=\"B\", max_values[i]=\"C\". " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"Such more compact values must still be valid values within the column's " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"logical type. Readers must make sure that list entries are populated before " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"using them by inspecting null_pages.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
list Type
binary,
--   3: required list<binary> max_values
          [Char]
"maxValues"[Char] -> Type -> FieldType
>: Type -> Type
list Type
binary,
--
--   /**
--    * Stores whether both min_values and max_values are orderd and if so, in
--    * which direction. This allows readers to perform binary searches in both
--    * lists. Readers cannot assume that max_values[i] <= min_values[i+1], even
--    * if the lists are ordered.
--    */
--   4: required BoundaryOrder boundary_order
          [Char]
"boundaryOrder"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Stores whether both min_values and max_values are orderd and if so, in " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"which direction. This allows readers to perform binary searches in both " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"lists. Readers cannot assume that max_values[i] <= min_values[i+1], even " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"if the lists are ordered.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"BoundaryOrder",
--
--   /** A list containing the number of null values for each page **/
--   5: optional list<i64> null_counts
          [Char]
"nullCounts"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"A list containing the number of null values for each page" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ Type -> Type
list Type
int64],
-- }

-- struct AesGcmV1 {
      [Char] -> Type -> Element
def [Char]
"AesGcmV1" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   /** AAD prefix **/
--   1: optional binary aad_prefix
          [Char]
"aadPrefix"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"AAD prefix" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
binary,
--
--   /** Unique file identifier part of AAD suffix **/
--   2: optional binary aad_file_unique
          [Char]
"aadFileUnique"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Unique file identifier part of AAD suffix" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
binary,
--
--   /** In files encrypted with AAD prefix without storing it,
--    * readers must supply the prefix **/
--   3: optional bool supply_aad_prefix
          [Char]
"supplyAadPrefix"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"In files encrypted with AAD prefix without storing it, " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"readers must supply the prefix") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
boolean],
-- }

-- struct AesGcmCtrV1 {
      [Char] -> Type -> Element
def [Char]
"AesGcmCtrV1" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   /** AAD prefix **/
--   1: optional binary aad_prefix
          [Char]
"aadPrefix"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"AAD prefix" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
binary,
--
--   /** Unique file identifier part of AAD suffix **/
--   2: optional binary aad_file_unique
          [Char]
"aadFileUnique"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Unique file identifier part of AAD suffix" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
binary,
--
--   /** In files encrypted with AAD prefix without storing it,
--    * readers must supply the prefix **/
--   3: optional bool supply_aad_prefix
          [Char]
"supplyAadPrefix"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"In files encrypted with AAD prefix without storing it, " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"readers must supply the prefix") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
boolean],
-- }

-- union EncryptionAlgorithm {
      [Char] -> Type -> Element
def [Char]
"EncryptionAlgorithm" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
union [
--   1: AesGcmV1 AES_GCM_V1
          [Char]
"aesGcmV1"[Char] -> Type -> FieldType
>: [Char] -> Type
parquet [Char]
"AesGcmV1",
--   2: AesGcmCtrV1 AES_GCM_CTR_V1
          [Char]
"aesGcmCtrV1"[Char] -> Type -> FieldType
>: [Char] -> Type
parquet [Char]
"AesGcmCtrV1"],
-- }

-- /**
--  * Description for file metadata
--  */
-- struct FileMetaData {
      [Char] -> Type -> Element
def [Char]
"FileMetaData" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc [Char]
"Description for file metadata" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   /** Version of this file **/
--   1: required i32 version
          [Char]
"version"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Version of this file"
            Type
int32,
--
--   /** Parquet schema for this file.  This schema contains metadata for all the columns.
--    * The schema is represented as a tree with a single root.  The nodes of the tree
--    * are flattened to a list by doing a depth-first traversal.
--    * The column metadata contains the path in the schema for that column which can be
--    * used to map columns to nodes in the schema.
--    * The first element is the root **/
--   2: required list<SchemaElement> schema;
          [Char]
"schema"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Parquet schema for this file.  This schema contains metadata for all the columns. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"The schema is represented as a tree with a single root.  The nodes of the tree " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"are flattened to a list by doing a depth-first traversal. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"The column metadata contains the path in the schema for that column which can be " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"used to map columns to nodes in the schema. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"The first element is the root") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
list (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"SchemaElement",
--
--   /** Number of rows in this file **/
--   3: required i64 num_rows
          [Char]
"numRows"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Number of rows in this file"
            Type
int64,
--
--   /** Row groups in this file **/
--   4: required list<RowGroup> row_groups
          [Char]
"rowGroups"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Row groups in this file" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
list (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"RowGroup",
--
--   /** Optional key/value metadata **/
--   5: optional list<KeyValue> key_value_metadata
          [Char]
"keyValueMetadata"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc [Char]
"Optional key/value metadata" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ Type -> Type
list (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"KeyValue",
--
--   /** String for application that wrote this file.  This should be in the format
--    * <Application> version <App Version> (build <App Build Hash>).
--    * e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55)
--    **/
--   6: optional string created_by
          [Char]
"createdBy"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"String for application that wrote this file.  This should be in the format " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"<Application> version <App Version> (build <App Build Hash>). " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55)") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
string,
--
--   /**
--    * Sort order used for the min_value and max_value fields in the Statistics
--    * objects and the min_values and max_values fields in the ColumnIndex
--    * objects of each column in this file. Sort orders are listed in the order
--    * matching the columns in the schema. The indexes are not necessary the same
--    * though, because only leaf nodes of the schema are represented in the list
--    * of sort orders.
--    *
--    * Without column_orders, the meaning of the min_value and max_value fields
--    * in the Statistics object and the ColumnIndex object is undefined. To ensure
--    * well-defined behaviour, if these fields are written to a Parquet file,
--    * column_orders must be written as well.
--    *
--    * The obsolete min and max fields in the Statistics object are always sorted
--    * by signed comparison regardless of column_orders.
--    */
--   7: optional list<ColumnOrder> column_orders;
          [Char]
"columnOrders"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Sort order used for the min_value and max_value fields in the Statistics " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"objects and the min_values and max_values fields in the ColumnIndex " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"objects of each column in this file. Sort orders are listed in the order " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"matching the columns in the schema. The indexes are not necessary the same " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"though, because only leaf nodes of the schema are represented in the list " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"of sort orders.\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"Without column_orders, the meaning of the min_value and max_value fields " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"in the Statistics object and the ColumnIndex object is undefined. To ensure " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"well-defined behaviour, if these fields are written to a Parquet file, " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"column_orders must be written as well.\n" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"The obsolete min and max fields in the Statistics object are always sorted " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"by signed comparison regardless of column_orders.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ Type -> Type
list (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"ColumnOrder",
--
--   /**
--    * Encryption algorithm. This field is set only in encrypted files
--    * with plaintext footer. Files with encrypted footer store algorithm id
--    * in FileCryptoMetaData structure.
--    */
--   8: optional EncryptionAlgorithm encryption_algorithm
          [Char]
"encryptionAlgorithm"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Encryption algorithm. This field is set only in encrypted files " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"with plaintext footer. Files with encrypted footer store algorithm id " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"in FileCryptoMetaData structure.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$ [Char] -> Type
parquet [Char]
"EncryptionAlgorithm",
--
--   /**
--    * Retrieval metadata of key used for signing the footer.
--    * Used only in encrypted files with plaintext footer.
--    */
--   9: optional binary footer_signing_key_metadata
          [Char]
"footerSigningKeyMetadata"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Retrieval metadata of key used for signing the footer. " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"Used only in encrypted files with plaintext footer.") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
binary],
-- }

-- /** Crypto metadata for files with encrypted footer **/
-- struct FileCryptoMetaData {
      [Char] -> Type -> Element
def [Char]
"FileCryptoMetaData" (Type -> Element) -> Type -> Element
forall a b. (a -> b) -> a -> b
$
        [Char] -> Type -> Type
doc [Char]
"Crypto metadata for files with encrypted footer" (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
        [FieldType] -> Type
record [
--   /**
--    * Encryption algorithm. This field is only used for files
--    * with encrypted footer. Files with plaintext footer store algorithm id
--    * inside footer (FileMetaData structure).
--    */
--   1: required EncryptionAlgorithm encryption_algorithm
          [Char]
"encryptionAlgorithm"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Encryption algorithm. This field is only used for files " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"with encrypted footer. Files with plaintext footer store algorithm id " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"inside footer (FileMetaData structure).") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            [Char] -> Type
parquet [Char]
"EncryptionAlgorithm",
--
--   /** Retrieval metadata of key used for encryption of footer,
--    *  and (possibly) columns **/
--   2: optional binary key_metadata
          [Char]
"keyMetadata"[Char] -> Type -> FieldType
>:
            [Char] -> Type -> Type
doc ([Char]
"Retrieval metadata of key used for encryption of footer, " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
                 [Char]
"and (possibly) columns") (Type -> Type) -> Type -> Type
forall a b. (a -> b) -> a -> b
$
            Type -> Type
optional Type
binary]]
-- }