//===----------------------------------------------------------------------===// // DuckDB // // duckdb/storage/statistics/distinct_statistics.hpp // // //===----------------------------------------------------------------------===// #pragma once #include "duckdb/common/atomic.hpp" #include "duckdb/common/types/hyperloglog.hpp" #include "duckdb/storage/statistics/base_statistics.hpp" namespace duckdb { class Serializer; class Deserializer; class Vector; class DistinctStatistics { public: DistinctStatistics(); explicit DistinctStatistics(unique_ptr log, idx_t sample_count, idx_t total_count); //! The HLL of the table unique_ptr log; //! How many values have been sampled into the HLL atomic sample_count; //! How many values have been inserted (before sampling) atomic total_count; public: void Merge(const DistinctStatistics &other); unique_ptr Copy() const; void Serialize(Serializer &serializer) const; void Serialize(FieldWriter &writer) const; static unique_ptr Deserialize(Deserializer &source); static unique_ptr Deserialize(FieldReader &reader); void Update(Vector &update, idx_t count, bool sample = true); void Update(UnifiedVectorFormat &update_data, const LogicalType &ptype, idx_t count, bool sample = true); string ToString() const; idx_t GetCount() const; static bool TypeIsSupported(const LogicalType &type); private: //! For distinct statistics we sample the input to speed up insertions static constexpr const double SAMPLE_RATE = 0.1; }; } // namespace duckdb