From cd24775269c13ea6e0cdab5da3974b3a737a54dd Mon Sep 17 00:00:00 2001 From: "jie.wang" <38901892+jievince@users.noreply.github.com> Date: Mon, 19 Oct 2020 07:44:17 -0500 Subject: [PATCH] Optimizer rule for TopN (#279) * add TopN PlanNode and Executor * add topn rule * fix topn rule * fix auto * add topn rule test * fix indent * fix conflict * fix conflict * fix ; * add newline * fix colNames * update code * update code * fix static function naming style * fix name * fix name * check result data in test_optimzier * fix --- src/optimizer/CMakeLists.txt | 1 + src/optimizer/OptRule.h | 4 + .../rule/PushFilterDownGetNbrsRule.cpp | 4 +- .../rule/PushFilterDownGetNbrsRule.h | 2 + src/optimizer/rule/TopNRule.cpp | 75 ++++++++++++ src/optimizer/rule/TopNRule.h | 35 ++++++ src/planner/Query.h | 1 - tests/query/v2/test_optimizer.py | 110 +++++++++++++++--- 8 files changed, 216 insertions(+), 16 deletions(-) create mode 100644 src/optimizer/rule/TopNRule.cpp create mode 100644 src/optimizer/rule/TopNRule.h diff --git a/src/optimizer/CMakeLists.txt b/src/optimizer/CMakeLists.txt index 248d05f0..b9541b65 100644 --- a/src/optimizer/CMakeLists.txt +++ b/src/optimizer/CMakeLists.txt @@ -12,6 +12,7 @@ nebula_add_library( OptRule.cpp rule/PushFilterDownGetNbrsRule.cpp rule/IndexScanRule.cpp + rule/TopNRule.cpp ) nebula_add_subdirectory(test) diff --git a/src/optimizer/OptRule.h b/src/optimizer/OptRule.h index af3a764d..fe616376 100644 --- a/src/optimizer/OptRule.h +++ b/src/optimizer/OptRule.h @@ -47,6 +47,10 @@ private: class OptRule { public: struct TransformResult { + static const TransformResult &noTransform() { + static TransformResult kNoTrans{false, false, {}}; + return kNoTrans; + } bool eraseCurr{false}; bool eraseAll{false}; std::vector<OptGroupExpr *> newGroupExprs; diff --git a/src/optimizer/rule/PushFilterDownGetNbrsRule.cpp b/src/optimizer/rule/PushFilterDownGetNbrsRule.cpp index c992e4a4..430fcda8 100644 --- a/src/optimizer/rule/PushFilterDownGetNbrsRule.cpp +++ b/src/optimizer/rule/PushFilterDownGetNbrsRule.cpp @@ -50,7 +50,7 @@ StatusOr<OptRule::TransformResult> PushFilterDownGetNbrsRule::transform( graph::ExtractFilterExprVisitor visitor; condition->accept(&visitor); if (!visitor.ok()) { - return TransformResult{false, false, {}}; + return TransformResult::noTransform(); } auto pool = qctx->objPool(); @@ -91,8 +91,8 @@ StatusOr<OptRule::TransformResult> PushFilterDownGetNbrsRule::transform( } TransformResult result; - result.newGroupExprs.emplace_back(newFilterGroupExpr ? newFilterGroupExpr : newGnGroupExpr); result.eraseCurr = true; + result.newGroupExprs.emplace_back(newFilterGroupExpr ? newFilterGroupExpr : newGnGroupExpr); return result; } diff --git a/src/optimizer/rule/PushFilterDownGetNbrsRule.h b/src/optimizer/rule/PushFilterDownGetNbrsRule.h index 11ebbe6d..4cabc067 100644 --- a/src/optimizer/rule/PushFilterDownGetNbrsRule.h +++ b/src/optimizer/rule/PushFilterDownGetNbrsRule.h @@ -21,8 +21,10 @@ namespace opt { class PushFilterDownGetNbrsRule final : public OptRule { public: const Pattern &pattern() const override; + StatusOr<TransformResult> transform(graph::QueryContext *qctx, const MatchedResult &matched) const override; + std::string toString() const override; private: diff --git a/src/optimizer/rule/TopNRule.cpp b/src/optimizer/rule/TopNRule.cpp new file mode 100644 index 00000000..1912d096 --- /dev/null +++ b/src/optimizer/rule/TopNRule.cpp @@ -0,0 +1,75 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "optimizer/rule/TopNRule.h" + +#include "common/expression/BinaryExpression.h" +#include "common/expression/ConstantExpression.h" +#include "common/expression/Expression.h" +#include "common/expression/FunctionCallExpression.h" +#include "common/expression/LogicalExpression.h" +#include "common/expression/UnaryExpression.h" +#include "optimizer/OptGroup.h" +#include "planner/PlanNode.h" +#include "planner/Query.h" +#include "visitor/ExtractFilterExprVisitor.h" + +using nebula::graph::Limit; +using nebula::graph::PlanNode; +using nebula::graph::QueryContext; +using nebula::graph::Sort; +using nebula::graph::TopN; + +namespace nebula { +namespace opt { + +std::unique_ptr<OptRule> TopNRule::kInstance = std::unique_ptr<TopNRule>(new TopNRule()); + +TopNRule::TopNRule() { + RuleSet::QueryRules().addRule(this); +} + +const Pattern &TopNRule::pattern() const { + static Pattern pattern = Pattern::create(graph::PlanNode::Kind::kLimit, + {Pattern::create(graph::PlanNode::Kind::kSort)}); + return pattern; +} + +StatusOr<OptRule::TransformResult> TopNRule::transform(QueryContext *qctx, + const MatchedResult &matched) const { + auto limitExpr = matched.node; + auto sortExpr = matched.dependencies.front().node; + auto limit = static_cast<const Limit *>(limitExpr->node()); + auto sort = static_cast<const Sort *>(sortExpr->node()); + + // Currently, we cannot know the total amount of input data, + // so only apply topn rule when offset of limit is 0 + if (limit->offset() != 0) { + return TransformResult::noTransform(); + } + + auto topn = TopN::make(qctx, nullptr, sort->factors(), limit->offset(), limit->count()); + topn->setOutputVar(limit->outputVar()); + topn->setInputVar(sort->inputVar()); + topn->setColNames(sort->colNames()); + auto topnExpr = OptGroupExpr::create(qctx, topn, limitExpr->group()); + for (auto dep : sortExpr->dependencies()) { + topnExpr->dependsOn(dep); + } + + TransformResult result; + result.newGroupExprs.emplace_back(topnExpr); + result.eraseAll = true; + result.eraseCurr = true; + return result; +} + +std::string TopNRule::toString() const { + return "TopNRule"; +} + +} // namespace opt +} // namespace nebula diff --git a/src/optimizer/rule/TopNRule.h b/src/optimizer/rule/TopNRule.h new file mode 100644 index 00000000..f6114f23 --- /dev/null +++ b/src/optimizer/rule/TopNRule.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#ifndef OPTIMIZER_RULE_TOPNRULE_H_ +#define OPTIMIZER_RULE_TOPNRULE_H_ + +#include <memory> + +#include "optimizer/OptRule.h" + +namespace nebula { +namespace opt { + +class TopNRule final : public OptRule { +public: + const Pattern &pattern() const override; + + StatusOr<OptRule::TransformResult> transform(graph::QueryContext *qctx, + const MatchedResult &matched) const override; + + std::string toString() const override; + +private: + TopNRule(); + + static std::unique_ptr<OptRule> kInstance; +}; + +} // namespace opt +} // namespace nebula + +#endif // OPTIMIZER_RULE_TOPNRULE_H_ diff --git a/src/planner/Query.h b/src/planner/Query.h index 10215e34..64fa76d4 100644 --- a/src/planner/Query.h +++ b/src/planner/Query.h @@ -747,7 +747,6 @@ private: int64_t count_{-1}; }; - /** * Do Aggregation with the given set of records, * such as AVG(), COUNT()... diff --git a/tests/query/v2/test_optimizer.py b/tests/query/v2/test_optimizer.py index 434ce91a..6353588a 100644 --- a/tests/query/v2/test_optimizer.py +++ b/tests/query/v2/test_optimizer.py @@ -18,53 +18,61 @@ class TestOptimizer(NebulaTestSuite): def test_PushFilterDownGetNbrsRule(self): resp = self.execute_query(''' - GO 1 STEPS FROM "Kobe Bryant" OVER serve - WHERE $^.player.age > 18 YIELD $^.player.name AS name + GO 1 STEPS FROM "Boris Diaw" OVER serve + WHERE $^.player.age > 18 YIELD serve.start_year as start_year ''') expected_plan = [ ["Project", [1]], ["GetNeighbors", [2], ['($^.player.age>18)']], ["Start", []] ] + expected_data = [[2003], [2005], [2008], [2012], [2016]] self.check_exec_plan(resp, expected_plan) + self.check_out_of_order_result(resp, expected_data) resp = self.execute_query(''' - GO 1 STEPS FROM "Kobe Bryant" OVER like REVERSELY - WHERE $^.player.age > 18 YIELD $^.player.name AS name + GO 1 STEPS FROM "James Harden" OVER like REVERSELY + WHERE $^.player.age > 18 YIELD like.likeness as likeness ''') expected_plan = [ ["Project", [1]], ["GetNeighbors", [2], ['($^.player.age>18)']], ["Start", []] ] + expected_data = [[90], [80], [99]] self.check_exec_plan(resp, expected_plan) + self.check_out_of_order_result(resp, expected_data) resp = self.execute_query(''' - GO 1 STEPS FROM "Kobe Bryant" OVER serve - WHERE serve.start_year > 2002 YIELD $^.player.name AS name + GO 1 STEPS FROM "Boris Diaw" OVER serve + WHERE serve.start_year > 2005 YIELD serve.start_year as start_year ''') expected_plan = [ ["Project", [1]], - ["GetNeighbors", [2], ['(serve.start_year>2002)']], + ["GetNeighbors", [2], ['(serve.start_year>2005)']], ["Start", []] ] + expected_data = [[2008], [2012], [2016]] self.check_exec_plan(resp, expected_plan) + self.check_out_of_order_result(resp, expected_data) resp = self.execute_query(''' - GO 1 STEPS FROM "Lakerys" OVER serve REVERSELY - WHERE serve.start_year > 2002 YIELD $^.player.name AS name + GO 1 STEPS FROM "Lakers" OVER serve REVERSELY + WHERE serve.start_year < 2017 YIELD serve.start_year as start_year ''') expected_plan = [ ["Project", [1]], - ["GetNeighbors", [2], ['(serve.start_year>2002)']], + ["GetNeighbors", [2], ['(serve.start_year<2017)']], ["Start", []] ] + expected_data = [[2012], [1996], [2008], [1996], [2012]] self.check_exec_plan(resp, expected_plan) + self.check_out_of_order_result(resp, expected_data) @pytest.mark.skip(reason="Depends on other opt rules to eliminate duplicate project nodes") def test_PushFilterDownGetNbrsRule_Failed(self): resp = self.execute_query(''' - GO 1 STEPS FROM "Kobe Bryant" OVER serve + GO 1 STEPS FROM "Boris Diaw" OVER serve WHERE $^.player.age > 18 AND $$.team.name == "Lakers" YIELD $^.player.name AS name ''') @@ -74,10 +82,12 @@ class TestOptimizer(NebulaTestSuite): ["GetNeighbors", [3], ['($^.player.age>18)']], ["Start", []] ] + expected_data = [['Boris Diaw']] self.check_exec_plan(resp, expected_plan) + self.check_out_of_order_result(resp, expected_data) resp = self.execute_query(''' - GO 1 STEPS FROM "Kobe Bryant" OVER serve + GO 1 STEPS FROM "Boris Diaw" OVER serve WHERE $^.player.age > 18 OR $$.team.name == "Lakers" YIELD $^.player.name AS name ''') @@ -87,11 +97,13 @@ class TestOptimizer(NebulaTestSuite): ["GetNeighbors", [3]], ["Start", []] ] + expected_data = [['Boris Diaw']] self.check_exec_plan(resp, expected_plan) + self.check_out_of_order_result(resp, expected_data) # fail to optimize cases resp = self.execute_query(''' - GO 1 STEPS FROM "Kobe Bryant" OVER serve \ + GO 1 STEPS FROM "Boris Diaw" OVER serve \ WHERE $$.team.name == "Lakers" YIELD $^.player.name AS name ''') expected_plan = [ @@ -100,4 +112,76 @@ class TestOptimizer(NebulaTestSuite): ["GetNeighbors", [3]], ["Start", []] ] + expected_data = [['Boris Diaw']] self.check_exec_plan(resp, expected_plan) + self.check_out_of_order_result(resp, expected_data) + + def test_TopNRule(self): + resp = self.execute_query(''' + GO 1 STEPS FROM "Marco Belinelli" OVER like + YIELD like.likeness AS likeness + | ORDER BY likeness + | LIMIT 2 + ''') + expected_plan = [ + ["DataCollect", [1]], + ["TopN", [2]], + ["Project", [3]], + ["GetNeighbors", [4]], + ["Start", []] + ] + expected_data = [[50], [55]] + self.check_exec_plan(resp, expected_plan) + self.check_result(resp, expected_data) + + resp = self.execute_query(''' + GO 1 STEPS FROM "Marco Belinelli" OVER like REVERSELY + YIELD like.likeness AS likeness | + ORDER BY likeness | + LIMIT 1 + ''') + expected_plan = [ + ["DataCollect", [1]], + ["TopN", [2]], + ["Project", [3]], + ["GetNeighbors", [4]], + ["Start", []] + ] + expected_data = [[83]] + self.check_exec_plan(resp, expected_plan) + self.check_result(resp, expected_data) + + def test_TopNRule_Failed(self): + resp = self.execute_query(''' + GO 1 STEPS FROM "Marco Belinelli" OVER like + YIELD like.likeness as likeness + | ORDER BY likeness + | LIMIT 2, 3 + ''') + expected_plan = [ + ["DataCollect", [1]], + ["Limit", [2]], + ["Sort", [3]], + ["Project", [4]], + ["GetNeighbors", [5]], + ["Start", []] + ] + expected_data = [[60]] + self.check_exec_plan(resp, expected_plan) + self.check_result(resp, expected_data) + + resp = self.execute_query(''' + GO 1 STEPS FROM "Marco Belinelli" OVER like + YIELD like.likeness AS likeness + | ORDER BY likeness + ''') + expected_plan = [ + ["DataCollect", [1]], + ["Sort", [2]], + ["Project", [3]], + ["GetNeighbors", [4]], + ["Start", []] + ] + expected_data = [[50], [55], [60]] + self.check_exec_plan(resp, expected_plan) + self.check_result(resp, expected_data) -- GitLab