//===----------------------------------------------------------------------===// // DuckDB // // duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp // // //===----------------------------------------------------------------------===// #pragma once #include "duckdb/execution/physical_operator.hpp" #include "duckdb/storage/data_table.hpp" #include "duckdb/parser/group_by_node.hpp" #include "duckdb/execution/radix_partitioned_hashtable.hpp" #include "duckdb/execution/operator/aggregate/grouped_aggregate_data.hpp" #include "duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp" namespace duckdb { class ClientContext; class BufferManager; struct HashAggregateGroupingData { public: HashAggregateGroupingData(GroupingSet &grouping_set_p, const GroupedAggregateData &grouped_aggregate_data, unique_ptr &info); public: RadixPartitionedHashTable table_data; unique_ptr distinct_data; public: bool HasDistinct() const; }; struct HashAggregateGroupingGlobalState { public: HashAggregateGroupingGlobalState(const HashAggregateGroupingData &data, ClientContext &context); // Radix state of the GROUPING_SET ht unique_ptr table_state; // State of the DISTINCT aggregates of this GROUPING_SET unique_ptr distinct_state; }; struct HashAggregateGroupingLocalState { public: HashAggregateGroupingLocalState(const PhysicalHashAggregate &op, const HashAggregateGroupingData &data, ExecutionContext &context); public: // Radix state of the GROUPING_SET ht unique_ptr table_state; // Local states of the DISTINCT aggregates hashtables vector> distinct_states; }; //! PhysicalHashAggregate is a group-by and aggregate implementation that uses a hash table to perform the grouping //! This only contains read-only variables, anything that is stateful instead gets stored in the Global/Local states class PhysicalHashAggregate : public PhysicalOperator { public: static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::HASH_GROUP_BY; public: PhysicalHashAggregate(ClientContext &context, vector types, vector> expressions, idx_t estimated_cardinality); PhysicalHashAggregate(ClientContext &context, vector types, vector> expressions, vector> groups, idx_t estimated_cardinality); PhysicalHashAggregate(ClientContext &context, vector types, vector> expressions, vector> groups, vector grouping_sets, vector> grouping_functions, idx_t estimated_cardinality); //! The grouping sets GroupedAggregateData grouped_aggregate_data; vector grouping_sets; //! The radix partitioned hash tables (one per grouping set) vector groupings; unique_ptr distinct_collection_info; //! A recreation of the input chunk, with nulls for everything that isnt a group vector input_group_types; // Filters given to Sink and friends unsafe_vector non_distinct_filter; unsafe_vector distinct_filter; unordered_map filter_indexes; public: // Source interface unique_ptr GetGlobalSourceState(ClientContext &context) const override; unique_ptr GetLocalSourceState(ExecutionContext &context, GlobalSourceState &gstate) const override; SourceResultType GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const override; bool IsSource() const override { return true; } bool ParallelSource() const override { return true; } OrderPreservationType SourceOrder() const override { return OrderPreservationType::NO_ORDER; } public: // Sink interface SinkResultType Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const override; void Combine(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate) const override; SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context, GlobalSinkState &gstate) const override; SinkFinalizeType FinalizeInternal(Pipeline &pipeline, Event &event, ClientContext &context, GlobalSinkState &gstate, bool check_distinct) const; unique_ptr GetLocalSinkState(ExecutionContext &context) const override; unique_ptr GetGlobalSinkState(ClientContext &context) const override; bool IsSink() const override { return true; } bool ParallelSink() const override { return true; } bool SinkOrderDependent() const override { return false; } public: string ParamsToString() const override; //! Toggle multi-scan capability on a hash table, which prevents the scan of the aggregate from being destructive //! If this is not toggled the GetData method will destroy the hash table as it is scanning it static void SetMultiScan(GlobalSinkState &state); private: //! When we only have distinct aggregates, we can delay adding groups to the main ht bool CanSkipRegularSink() const; //! Finalize the distinct aggregates SinkFinalizeType FinalizeDistinct(Pipeline &pipeline, Event &event, ClientContext &context, GlobalSinkState &gstate) const; //! Combine the distinct aggregates void CombineDistinct(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate) const; //! Sink the distinct aggregates for a single grouping void SinkDistinctGrouping(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input, idx_t grouping_idx) const; //! Sink the distinct aggregates void SinkDistinct(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const; //! Create groups in the main ht for groups that would otherwise get filtered out completely SinkResultType SinkGroupsOnly(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate, DataChunk &input) const; }; } // namespace duckdb