//===----------------------------------------------------------------------===// // DuckDB // // duckdb/optimizer/join_order/join_order_optimizer.hpp // // //===----------------------------------------------------------------------===// #pragma once #include "duckdb/common/unordered_map.hpp" #include "duckdb/common/unordered_set.hpp" #include "duckdb/optimizer/join_order/join_relation.hpp" #include "duckdb/optimizer/join_order/cardinality_estimator.hpp" #include "duckdb/optimizer/join_order/query_graph.hpp" #include "duckdb/optimizer/join_order/join_node.hpp" #include "duckdb/parser/expression_map.hpp" #include "duckdb/planner/logical_operator.hpp" #include "duckdb/planner/logical_operator_visitor.hpp" #include namespace duckdb { struct GenerateJoinRelation { GenerateJoinRelation(JoinRelationSet &set, unique_ptr op_p) : set(set), op(std::move(op_p)) { } JoinRelationSet &set; unique_ptr op; }; class JoinOrderOptimizer { public: explicit JoinOrderOptimizer(ClientContext &context) : context(context), cardinality_estimator(context), full_plan_found(false), must_update_full_plan(false) { } //! Perform join reordering inside a plan unique_ptr Optimize(unique_ptr plan); unique_ptr CreateJoinTree(JoinRelationSet &set, const vector> &possible_connections, JoinNode &left, JoinNode &right); private: ClientContext &context; //! The total amount of join pairs that have been considered idx_t pairs = 0; //! Set of all relations considered in the join optimizer vector> relations; //! A mapping of base table index -> index into relations array (relation number) unordered_map relation_mapping; //! A structure holding all the created JoinRelationSet objects JoinRelationSetManager set_manager; //! The set of edges used in the join optimizer QueryGraph query_graph; //! The optimal join plan found for the specific JoinRelationSet* unordered_map> plans; //! The set of filters extracted from the query graph vector> filters; //! The set of filter infos created from the extracted filters vector> filter_infos; //! A map of all expressions a given expression has to be equivalent to. This is used to add "implied join edges". //! i.e. in the join A=B AND B=C, the equivalence set of {B} is {A, C}, thus we can add an implied join edge {A = C} expression_map_t> equivalence_sets; CardinalityEstimator cardinality_estimator; bool full_plan_found; bool must_update_full_plan; unordered_set join_nodes_in_full_plan; //! Extract the bindings referred to by an Expression bool ExtractBindings(Expression &expression, unordered_set &bindings); //! Get column bindings from a filter void GetColumnBinding(Expression &expression, ColumnBinding &binding); //! Traverse the query tree to find (1) base relations, (2) existing join conditions and (3) filters that can be //! rewritten into joins. Returns true if there are joins in the tree that can be reordered, false otherwise. bool ExtractJoinRelations(LogicalOperator &input_op, vector> &filter_operators, optional_ptr parent = nullptr); //! Emit a pair as a potential join candidate. Returns the best plan found for the (left, right) connection (either //! the newly created plan, or an existing plan) JoinNode &EmitPair(JoinRelationSet &left, JoinRelationSet &right, const vector> &info); //! Tries to emit a potential join candidate pair. Returns false if too many pairs have already been emitted, //! cancelling the dynamic programming step. bool TryEmitPair(JoinRelationSet &left, JoinRelationSet &right, const vector> &info); bool EnumerateCmpRecursive(JoinRelationSet &left, JoinRelationSet &right, unordered_set exclusion_set); //! Emit a relation set node bool EmitCSG(JoinRelationSet &node); //! Enumerate the possible connected subgraphs that can be joined together in the join graph bool EnumerateCSGRecursive(JoinRelationSet &node, unordered_set &exclusion_set); //! Rewrite a logical query plan given the join plan unique_ptr RewritePlan(unique_ptr plan, JoinNode &node); //! Generate cross product edges inside the side void GenerateCrossProducts(); //! Perform the join order solving void SolveJoinOrder(); //! Solve the join order exactly using dynamic programming. Returns true if it was completed successfully (i.e. did //! not time-out) bool SolveJoinOrderExactly(); //! Solve the join order approximately using a greedy algorithm void SolveJoinOrderApproximately(); void UpdateDPTree(JoinNode &new_plan); void UpdateJoinNodesInFullPlan(JoinNode &node); bool NodeInFullPlan(JoinNode &node); GenerateJoinRelation GenerateJoins(vector> &extracted_relations, JoinNode &node); }; } // namespace duckdb