Skip to content
Snippets Groups Projects
Unverified Commit cd247752 authored by jie.wang's avatar jie.wang Committed by GitHub
Browse files

Optimizer rule for TopN (#279)

* add TopN PlanNode and Executor

* add topn rule

* fix topn rule

* fix auto

* add topn rule test

* fix indent

* fix conflict

* fix conflict

* fix ;

* add newline

* fix colNames

* update code

* update code

* fix static function naming style

* fix name

* fix name

* check result data in test_optimzier

* fix
parent 9dbc60b1
No related branches found
No related tags found
No related merge requests found
......@@ -12,6 +12,7 @@ nebula_add_library(
OptRule.cpp
rule/PushFilterDownGetNbrsRule.cpp
rule/IndexScanRule.cpp
rule/TopNRule.cpp
)
nebula_add_subdirectory(test)
......@@ -47,6 +47,10 @@ private:
class OptRule {
public:
struct TransformResult {
static const TransformResult &noTransform() {
static TransformResult kNoTrans{false, false, {}};
return kNoTrans;
}
bool eraseCurr{false};
bool eraseAll{false};
std::vector<OptGroupExpr *> newGroupExprs;
......
......@@ -50,7 +50,7 @@ StatusOr<OptRule::TransformResult> PushFilterDownGetNbrsRule::transform(
graph::ExtractFilterExprVisitor visitor;
condition->accept(&visitor);
if (!visitor.ok()) {
return TransformResult{false, false, {}};
return TransformResult::noTransform();
}
auto pool = qctx->objPool();
......@@ -91,8 +91,8 @@ StatusOr<OptRule::TransformResult> PushFilterDownGetNbrsRule::transform(
}
TransformResult result;
result.newGroupExprs.emplace_back(newFilterGroupExpr ? newFilterGroupExpr : newGnGroupExpr);
result.eraseCurr = true;
result.newGroupExprs.emplace_back(newFilterGroupExpr ? newFilterGroupExpr : newGnGroupExpr);
return result;
}
......
......@@ -21,8 +21,10 @@ namespace opt {
class PushFilterDownGetNbrsRule final : public OptRule {
public:
const Pattern &pattern() const override;
StatusOr<TransformResult> transform(graph::QueryContext *qctx,
const MatchedResult &matched) const override;
std::string toString() const override;
private:
......
/* Copyright (c) 2020 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License,
* attached with Common Clause Condition 1.0, found in the LICENSES directory.
*/
#include "optimizer/rule/TopNRule.h"
#include "common/expression/BinaryExpression.h"
#include "common/expression/ConstantExpression.h"
#include "common/expression/Expression.h"
#include "common/expression/FunctionCallExpression.h"
#include "common/expression/LogicalExpression.h"
#include "common/expression/UnaryExpression.h"
#include "optimizer/OptGroup.h"
#include "planner/PlanNode.h"
#include "planner/Query.h"
#include "visitor/ExtractFilterExprVisitor.h"
using nebula::graph::Limit;
using nebula::graph::PlanNode;
using nebula::graph::QueryContext;
using nebula::graph::Sort;
using nebula::graph::TopN;
namespace nebula {
namespace opt {
std::unique_ptr<OptRule> TopNRule::kInstance = std::unique_ptr<TopNRule>(new TopNRule());
TopNRule::TopNRule() {
RuleSet::QueryRules().addRule(this);
}
const Pattern &TopNRule::pattern() const {
static Pattern pattern = Pattern::create(graph::PlanNode::Kind::kLimit,
{Pattern::create(graph::PlanNode::Kind::kSort)});
return pattern;
}
StatusOr<OptRule::TransformResult> TopNRule::transform(QueryContext *qctx,
const MatchedResult &matched) const {
auto limitExpr = matched.node;
auto sortExpr = matched.dependencies.front().node;
auto limit = static_cast<const Limit *>(limitExpr->node());
auto sort = static_cast<const Sort *>(sortExpr->node());
// Currently, we cannot know the total amount of input data,
// so only apply topn rule when offset of limit is 0
if (limit->offset() != 0) {
return TransformResult::noTransform();
}
auto topn = TopN::make(qctx, nullptr, sort->factors(), limit->offset(), limit->count());
topn->setOutputVar(limit->outputVar());
topn->setInputVar(sort->inputVar());
topn->setColNames(sort->colNames());
auto topnExpr = OptGroupExpr::create(qctx, topn, limitExpr->group());
for (auto dep : sortExpr->dependencies()) {
topnExpr->dependsOn(dep);
}
TransformResult result;
result.newGroupExprs.emplace_back(topnExpr);
result.eraseAll = true;
result.eraseCurr = true;
return result;
}
std::string TopNRule::toString() const {
return "TopNRule";
}
} // namespace opt
} // namespace nebula
/* Copyright (c) 2020 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License,
* attached with Common Clause Condition 1.0, found in the LICENSES directory.
*/
#ifndef OPTIMIZER_RULE_TOPNRULE_H_
#define OPTIMIZER_RULE_TOPNRULE_H_
#include <memory>
#include "optimizer/OptRule.h"
namespace nebula {
namespace opt {
class TopNRule final : public OptRule {
public:
const Pattern &pattern() const override;
StatusOr<OptRule::TransformResult> transform(graph::QueryContext *qctx,
const MatchedResult &matched) const override;
std::string toString() const override;
private:
TopNRule();
static std::unique_ptr<OptRule> kInstance;
};
} // namespace opt
} // namespace nebula
#endif // OPTIMIZER_RULE_TOPNRULE_H_
......@@ -747,7 +747,6 @@ private:
int64_t count_{-1};
};
/**
* Do Aggregation with the given set of records,
* such as AVG(), COUNT()...
......
......@@ -18,53 +18,61 @@ class TestOptimizer(NebulaTestSuite):
def test_PushFilterDownGetNbrsRule(self):
resp = self.execute_query('''
GO 1 STEPS FROM "Kobe Bryant" OVER serve
WHERE $^.player.age > 18 YIELD $^.player.name AS name
GO 1 STEPS FROM "Boris Diaw" OVER serve
WHERE $^.player.age > 18 YIELD serve.start_year as start_year
''')
expected_plan = [
["Project", [1]],
["GetNeighbors", [2], ['($^.player.age>18)']],
["Start", []]
]
expected_data = [[2003], [2005], [2008], [2012], [2016]]
self.check_exec_plan(resp, expected_plan)
self.check_out_of_order_result(resp, expected_data)
resp = self.execute_query('''
GO 1 STEPS FROM "Kobe Bryant" OVER like REVERSELY
WHERE $^.player.age > 18 YIELD $^.player.name AS name
GO 1 STEPS FROM "James Harden" OVER like REVERSELY
WHERE $^.player.age > 18 YIELD like.likeness as likeness
''')
expected_plan = [
["Project", [1]],
["GetNeighbors", [2], ['($^.player.age>18)']],
["Start", []]
]
expected_data = [[90], [80], [99]]
self.check_exec_plan(resp, expected_plan)
self.check_out_of_order_result(resp, expected_data)
resp = self.execute_query('''
GO 1 STEPS FROM "Kobe Bryant" OVER serve
WHERE serve.start_year > 2002 YIELD $^.player.name AS name
GO 1 STEPS FROM "Boris Diaw" OVER serve
WHERE serve.start_year > 2005 YIELD serve.start_year as start_year
''')
expected_plan = [
["Project", [1]],
["GetNeighbors", [2], ['(serve.start_year>2002)']],
["GetNeighbors", [2], ['(serve.start_year>2005)']],
["Start", []]
]
expected_data = [[2008], [2012], [2016]]
self.check_exec_plan(resp, expected_plan)
self.check_out_of_order_result(resp, expected_data)
resp = self.execute_query('''
GO 1 STEPS FROM "Lakerys" OVER serve REVERSELY
WHERE serve.start_year > 2002 YIELD $^.player.name AS name
GO 1 STEPS FROM "Lakers" OVER serve REVERSELY
WHERE serve.start_year < 2017 YIELD serve.start_year as start_year
''')
expected_plan = [
["Project", [1]],
["GetNeighbors", [2], ['(serve.start_year>2002)']],
["GetNeighbors", [2], ['(serve.start_year<2017)']],
["Start", []]
]
expected_data = [[2012], [1996], [2008], [1996], [2012]]
self.check_exec_plan(resp, expected_plan)
self.check_out_of_order_result(resp, expected_data)
@pytest.mark.skip(reason="Depends on other opt rules to eliminate duplicate project nodes")
def test_PushFilterDownGetNbrsRule_Failed(self):
resp = self.execute_query('''
GO 1 STEPS FROM "Kobe Bryant" OVER serve
GO 1 STEPS FROM "Boris Diaw" OVER serve
WHERE $^.player.age > 18 AND $$.team.name == "Lakers"
YIELD $^.player.name AS name
''')
......@@ -74,10 +82,12 @@ class TestOptimizer(NebulaTestSuite):
["GetNeighbors", [3], ['($^.player.age>18)']],
["Start", []]
]
expected_data = [['Boris Diaw']]
self.check_exec_plan(resp, expected_plan)
self.check_out_of_order_result(resp, expected_data)
resp = self.execute_query('''
GO 1 STEPS FROM "Kobe Bryant" OVER serve
GO 1 STEPS FROM "Boris Diaw" OVER serve
WHERE $^.player.age > 18 OR $$.team.name == "Lakers"
YIELD $^.player.name AS name
''')
......@@ -87,11 +97,13 @@ class TestOptimizer(NebulaTestSuite):
["GetNeighbors", [3]],
["Start", []]
]
expected_data = [['Boris Diaw']]
self.check_exec_plan(resp, expected_plan)
self.check_out_of_order_result(resp, expected_data)
# fail to optimize cases
resp = self.execute_query('''
GO 1 STEPS FROM "Kobe Bryant" OVER serve \
GO 1 STEPS FROM "Boris Diaw" OVER serve \
WHERE $$.team.name == "Lakers" YIELD $^.player.name AS name
''')
expected_plan = [
......@@ -100,4 +112,76 @@ class TestOptimizer(NebulaTestSuite):
["GetNeighbors", [3]],
["Start", []]
]
expected_data = [['Boris Diaw']]
self.check_exec_plan(resp, expected_plan)
self.check_out_of_order_result(resp, expected_data)
def test_TopNRule(self):
resp = self.execute_query('''
GO 1 STEPS FROM "Marco Belinelli" OVER like
YIELD like.likeness AS likeness
| ORDER BY likeness
| LIMIT 2
''')
expected_plan = [
["DataCollect", [1]],
["TopN", [2]],
["Project", [3]],
["GetNeighbors", [4]],
["Start", []]
]
expected_data = [[50], [55]]
self.check_exec_plan(resp, expected_plan)
self.check_result(resp, expected_data)
resp = self.execute_query('''
GO 1 STEPS FROM "Marco Belinelli" OVER like REVERSELY
YIELD like.likeness AS likeness |
ORDER BY likeness |
LIMIT 1
''')
expected_plan = [
["DataCollect", [1]],
["TopN", [2]],
["Project", [3]],
["GetNeighbors", [4]],
["Start", []]
]
expected_data = [[83]]
self.check_exec_plan(resp, expected_plan)
self.check_result(resp, expected_data)
def test_TopNRule_Failed(self):
resp = self.execute_query('''
GO 1 STEPS FROM "Marco Belinelli" OVER like
YIELD like.likeness as likeness
| ORDER BY likeness
| LIMIT 2, 3
''')
expected_plan = [
["DataCollect", [1]],
["Limit", [2]],
["Sort", [3]],
["Project", [4]],
["GetNeighbors", [5]],
["Start", []]
]
expected_data = [[60]]
self.check_exec_plan(resp, expected_plan)
self.check_result(resp, expected_data)
resp = self.execute_query('''
GO 1 STEPS FROM "Marco Belinelli" OVER like
YIELD like.likeness AS likeness
| ORDER BY likeness
''')
expected_plan = [
["DataCollect", [1]],
["Sort", [2]],
["Project", [3]],
["GetNeighbors", [4]],
["Start", []]
]
expected_data = [[50], [55], [60]]
self.check_exec_plan(resp, expected_plan)
self.check_result(resp, expected_data)
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment