From ef89313d12a4d891804e5c83c7b2bcabb21799b1 Mon Sep 17 00:00:00 2001 From: Yee <2520865+yixinglu@users.noreply.github.com> Date: Thu, 10 Sep 2020 10:01:51 +0800 Subject: [PATCH] Add optimizer (#247) * Add optimizer * Fix input/output variable of new plan node * Split default and query opt rules Co-authored-by: dutor <440396+dutor@users.noreply.github.com> --- ci/test.sh | 1 + conf/nebula-graphd.conf.default | 2 + conf/nebula-graphd.conf.production | 2 + resources/gflags.json | 4 +- src/context/QueryContext.h | 4 + src/daemons/CMakeLists.txt | 2 +- src/mock/test/CMakeLists.txt | 4 +- src/optimizer/CMakeLists.txt | 7 +- src/optimizer/OptGroup.cpp | 181 ++++++++++++++++++ src/optimizer/OptGroup.h | 121 ++++++++++++ src/optimizer/OptRule.cpp | 44 +++++ src/optimizer/OptRule.h | 69 +++++++ src/optimizer/Optimizer.cpp | 108 +++++++++++ src/optimizer/Optimizer.h | 49 +++++ .../rule/PushFilterDownGetNbrsRule.cpp | 160 ++++++++++++++++ .../rule/PushFilterDownGetNbrsRule.h | 43 +++++ src/planner/ExecutionPlan.cpp | 3 +- src/planner/ExecutionPlan.h | 2 +- src/planner/PlanNode.cpp | 6 +- src/planner/PlanNode.h | 7 + src/planner/Query.cpp | 3 +- src/service/GraphFlags.cpp | 2 + src/service/GraphFlags.h | 3 + src/service/QueryInstance.cpp | 21 +- src/service/QueryInstance.h | 8 +- src/visitor/CMakeLists.txt | 1 + src/visitor/ExtractFilterExprVisitor.cpp | 102 ++++++++++ src/visitor/ExtractFilterExprVisitor.h | 57 ++++++ tests/admin/test_configs.py | 6 +- ...oad_test_data.py => global_data_loader.py} | 11 +- tests/common/nebula_service.py | 9 +- tests/common/nebula_test_suite.py | 89 ++++++--- tests/nebula-test-run.py | 30 ++- tests/ntr.out-of-source | 2 +- tests/query/v2/test_optimizer.py | 103 ++++++++++ 35 files changed, 1190 insertions(+), 76 deletions(-) create mode 100644 src/optimizer/OptGroup.cpp create mode 100644 src/optimizer/OptGroup.h create mode 100644 src/optimizer/OptRule.cpp create mode 100644 src/optimizer/OptRule.h create mode 100644 src/optimizer/Optimizer.cpp create mode 100644 src/optimizer/Optimizer.h create mode 100644 src/optimizer/rule/PushFilterDownGetNbrsRule.cpp create mode 100644 src/optimizer/rule/PushFilterDownGetNbrsRule.h create mode 100644 src/visitor/ExtractFilterExprVisitor.cpp create mode 100644 src/visitor/ExtractFilterExprVisitor.h rename tests/common/{load_test_data.py => global_data_loader.py} (97%) create mode 100644 tests/query/v2/test_optimizer.py diff --git a/ci/test.sh b/ci/test.sh index 7b58f113..ca6ea80d 100755 --- a/ci/test.sh +++ b/ci/test.sh @@ -84,6 +84,7 @@ function run_ctest() { function run_test() { cd $BUILD_DIR/tests + export PYTHONPATH=$PROJ_DIR:$PYTHONPATH ./ntr \ -n=8 \ --dist=loadfile \ diff --git a/conf/nebula-graphd.conf.default b/conf/nebula-graphd.conf.default index 0f421be5..d16efcf2 100644 --- a/conf/nebula-graphd.conf.default +++ b/conf/nebula-graphd.conf.default @@ -3,6 +3,8 @@ --daemonize=true # The file to host the process id --pid_file=pids/nebula-graphd.pid +# Whether to enable optimizer +--enable_optimizer=false ########## logging ########## # The directory to host logging files, which must already exists diff --git a/conf/nebula-graphd.conf.production b/conf/nebula-graphd.conf.production index af77883c..f3dc0ffc 100644 --- a/conf/nebula-graphd.conf.production +++ b/conf/nebula-graphd.conf.production @@ -3,6 +3,8 @@ --daemonize=true # The file to host the process id --pid_file=pids/nebula-graphd.pid +# Whether to enable optimizer +--enable_optimizer=false ########## logging ########## # The directory to host logging files, which must already exists diff --git a/resources/gflags.json b/resources/gflags.json index c265b9cd..c1d87708 100644 --- a/resources/gflags.json +++ b/resources/gflags.json @@ -9,7 +9,8 @@ "wal_ttl", "enable_reservoir_sampling", "custom_filter_interval_secs", - "enable_multi_versions" + "enable_multi_versions", + "enable_optimizer" ], "NESTED": [ "rocksdb_db_options", @@ -17,4 +18,3 @@ "rocksdb_block_based_table_options" ] } - diff --git a/src/context/QueryContext.h b/src/context/QueryContext.h index caeeb14e..97fe1f0f 100644 --- a/src/context/QueryContext.h +++ b/src/context/QueryContext.h @@ -91,6 +91,10 @@ public: return ep_.get(); } + void setPlan(std::unique_ptr<ExecutionPlan> plan) { + ep_ = std::move(plan); + } + meta::SchemaManager* schemaMng() const { return sm_; } diff --git a/src/daemons/CMakeLists.txt b/src/daemons/CMakeLists.txt index b4a05c34..7f76d5bf 100644 --- a/src/daemons/CMakeLists.txt +++ b/src/daemons/CMakeLists.txt @@ -13,10 +13,10 @@ nebula_add_executable( $<TARGET_OBJECTS:service_obj> $<TARGET_OBJECTS:session_obj> $<TARGET_OBJECTS:query_engine_obj> - $<TARGET_OBJECTS:graph_flags_obj> $<TARGET_OBJECTS:parser_obj> $<TARGET_OBJECTS:validator_obj> $<TARGET_OBJECTS:expr_visitor_obj> + $<TARGET_OBJECTS:optimizer_obj> $<TARGET_OBJECTS:planner_obj> $<TARGET_OBJECTS:executor_obj> $<TARGET_OBJECTS:scheduler_obj> diff --git a/src/mock/test/CMakeLists.txt b/src/mock/test/CMakeLists.txt index 06cd31f3..b2cf9f28 100644 --- a/src/mock/test/CMakeLists.txt +++ b/src/mock/test/CMakeLists.txt @@ -20,8 +20,8 @@ set(GRAPH_TEST_LIB $<TARGET_OBJECTS:idgenerator_obj> $<TARGET_OBJECTS:context_obj> $<TARGET_OBJECTS:graph_auth_obj> + $<TARGET_OBJECTS:optimizer_obj> $<TARGET_OBJECTS:common_time_function_obj> - $<TARGET_OBJECTS:common_graph_client_obj> $<TARGET_OBJECTS:common_expression_obj> $<TARGET_OBJECTS:common_http_client_obj> $<TARGET_OBJECTS:common_network_obj> @@ -29,10 +29,8 @@ set(GRAPH_TEST_LIB $<TARGET_OBJECTS:common_graph_client_obj> $<TARGET_OBJECTS:common_storage_client_base_obj> $<TARGET_OBJECTS:common_graph_storage_client_obj> - $<TARGET_OBJECTS:common_storage_client_base_obj> $<TARGET_OBJECTS:common_meta_client_obj> $<TARGET_OBJECTS:common_stats_obj> - $<TARGET_OBJECTS:common_time_obj> $<TARGET_OBJECTS:common_meta_thrift_obj> $<TARGET_OBJECTS:common_graph_thrift_obj> $<TARGET_OBJECTS:common_common_thrift_obj> diff --git a/src/optimizer/CMakeLists.txt b/src/optimizer/CMakeLists.txt index a3835155..39cbaea8 100644 --- a/src/optimizer/CMakeLists.txt +++ b/src/optimizer/CMakeLists.txt @@ -4,8 +4,13 @@ # attached with Common Clause Condition 1.0, found in the LICENSES directory. nebula_add_library( - optimizer_obj OBJECT + optimizer_obj + OBJECT OptimizerUtils.cpp + Optimizer.cpp + OptGroup.cpp + OptRule.cpp + rule/PushFilterDownGetNbrsRule.cpp ) nebula_add_subdirectory(test) diff --git a/src/optimizer/OptGroup.cpp b/src/optimizer/OptGroup.cpp new file mode 100644 index 00000000..4a055ee0 --- /dev/null +++ b/src/optimizer/OptGroup.cpp @@ -0,0 +1,181 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "optimizer/OptGroup.h" + +#include <limits> + +#include "context/QueryContext.h" +#include "optimizer/OptRule.h" +#include "planner/Logic.h" +#include "planner/PlanNode.h" + +using nebula::graph::BiInputNode; +using nebula::graph::Loop; +using nebula::graph::PlanNode; +using nebula::graph::QueryContext; +using nebula::graph::Select; +using nebula::graph::SingleDependencyNode; + +namespace nebula { +namespace opt { + +OptGroup *OptGroup::create(QueryContext *qctx) { + return qctx->objPool()->add(new OptGroup(qctx)); +} + +OptGroup::OptGroup(QueryContext *qctx) noexcept : qctx_(qctx) { + DCHECK(qctx != nullptr); +} + +void OptGroup::addGroupExpr(OptGroupExpr *groupExpr) { + DCHECK(groupExpr->group() == this); + groupExprs_.emplace_back(groupExpr); +} + +OptGroupExpr *OptGroup::makeGroupExpr(QueryContext *qctx, PlanNode *node) { + groupExprs_.emplace_back(OptGroupExpr::create(qctx, node, this)); + return groupExprs_.back(); +} + +Status OptGroup::explore(const OptRule *rule) { + if (isExplored(rule)) { + return Status::OK(); + } + setExplored(rule); + + for (auto iter = groupExprs_.begin(); iter != groupExprs_.end();) { + auto groupExpr = *iter; + if (groupExpr->isExplored(rule)) { + continue; + } + // Bottom to up exploration + NG_RETURN_IF_ERROR(groupExpr->explore(rule)); + + // Find more equivalents + if (!rule->match(groupExpr)) { + ++iter; + continue; + } + + OptRule::TransformResult result; + NG_RETURN_IF_ERROR(rule->transform(qctx_, groupExpr, &result)); + if (result.eraseAll) { + groupExprs_.clear(); + for (auto nge : result.newGroupExprs) { + addGroupExpr(nge); + } + break; + } + + if (!result.newGroupExprs.empty()) { + for (auto nge : result.newGroupExprs) { + addGroupExpr(nge); + } + + setUnexplored(rule); + } + + if (result.eraseCurr) { + iter = groupExprs_.erase(iter); + } else { + ++iter; + } + } + + return Status::OK(); +} + +std::pair<double, const OptGroupExpr *> OptGroup::findMinCostGroupExpr() const { + double minCost = std::numeric_limits<double>::max(); + const OptGroupExpr *minGroupExpr = nullptr; + for (auto &groupExpr : groupExprs_) { + double cost = groupExpr->getCost(); + if (minCost > cost) { + minCost = cost; + minGroupExpr = groupExpr; + } + } + return std::make_pair(minCost, minGroupExpr); +} + +double OptGroup::getCost() const { + return findMinCostGroupExpr().first; +} + +const PlanNode *OptGroup::getPlan() const { + const OptGroupExpr *minGroupExpr = findMinCostGroupExpr().second; + DCHECK(minGroupExpr != nullptr); + return minGroupExpr->getPlan(); +} + +OptGroupExpr *OptGroupExpr::create(QueryContext *qctx, PlanNode *node, const OptGroup *group) { + return qctx->objPool()->add(new OptGroupExpr(node, group)); +} + +OptGroupExpr::OptGroupExpr(PlanNode *node, const OptGroup *group) noexcept + : node_(node), group_(group) { + DCHECK(node != nullptr); + DCHECK(group != nullptr); +} + +Status OptGroupExpr::explore(const OptRule *rule) { + if (isExplored(rule)) return Status::OK(); + setExplored(rule); + + for (auto dep : dependencies_) { + if (!dep->isExplored(rule)) { + NG_RETURN_IF_ERROR(dep->explore(rule)); + } + } + + for (auto body : bodies_) { + if (!body->isExplored(rule)) { + NG_RETURN_IF_ERROR(body->explore(rule)); + } + } + return Status::OK(); +} + +double OptGroupExpr::getCost() const { + return node_->cost(); +} + +const PlanNode *OptGroupExpr::getPlan() const { + switch (node_->dependencies().size()) { + case 0: { + DCHECK(dependencies_.empty()); + break; + } + case 1: { + DCHECK_EQ(dependencies_.size(), 1U); + if (node_->kind() == PlanNode::Kind::kSelect) { + DCHECK_EQ(bodies_.size(), 2U); + auto select = static_cast<Select *>(node_); + select->setIf(const_cast<PlanNode *>(bodies_[0]->getPlan())); + select->setElse(const_cast<PlanNode *>(bodies_[1]->getPlan())); + } else if (node_->kind() == PlanNode::Kind::kLoop) { + DCHECK_EQ(bodies_.size(), 1U); + auto loop = static_cast<Loop *>(node_); + loop->setBody(const_cast<PlanNode *>(bodies_[0]->getPlan())); + } + auto singleDepNode = static_cast<SingleDependencyNode *>(node_); + singleDepNode->dependsOn(dependencies_[0]->getPlan()); + break; + } + case 2: { + DCHECK_EQ(dependencies_.size(), 2U); + auto bNode = static_cast<BiInputNode *>(node_); + bNode->setLeft(dependencies_[0]->getPlan()); + bNode->setRight(dependencies_[1]->getPlan()); + break; + } + } + return node_; +} + +} // namespace opt +} // namespace nebula diff --git a/src/optimizer/OptGroup.h b/src/optimizer/OptGroup.h new file mode 100644 index 00000000..0abab839 --- /dev/null +++ b/src/optimizer/OptGroup.h @@ -0,0 +1,121 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#ifndef OPTIMIZER_OPTGROUP_H_ +#define OPTIMIZER_OPTGROUP_H_ + +#include <algorithm> +#include <vector> +#include "common/base/Status.h" + +namespace nebula { +namespace graph { +class PlanNode; +class QueryContext; +} // namespace graph + +namespace opt { + +class OptGroupExpr; +class OptRule; + +class OptGroup final { +public: + static OptGroup *create(graph::QueryContext *qctx); + + bool isExplored(const OptRule *rule) const { + return std::find(exploredRules_.cbegin(), exploredRules_.cend(), rule) != + exploredRules_.cend(); + } + + void setExplored(const OptRule *rule) { + exploredRules_.emplace_back(rule); + } + + void setUnexplored(const OptRule *rule) { + auto iter = std::find(exploredRules_.begin(), exploredRules_.end(), rule); + if (iter != exploredRules_.end()) { + exploredRules_.erase(iter); + } + } + + void addGroupExpr(OptGroupExpr *groupExpr); + OptGroupExpr *makeGroupExpr(graph::QueryContext *qctx, graph::PlanNode *node); + const std::vector<OptGroupExpr *> &groupExprs() const { + return groupExprs_; + } + + Status explore(const OptRule *rule); + double getCost() const; + const graph::PlanNode *getPlan() const; + +private: + explicit OptGroup(graph::QueryContext *qctx) noexcept; + + std::pair<double, const OptGroupExpr *> findMinCostGroupExpr() const; + + graph::QueryContext *qctx_{nullptr}; + std::vector<OptGroupExpr *> groupExprs_; + std::vector<const OptRule *> exploredRules_; +}; + +class OptGroupExpr final { +public: + static OptGroupExpr *create(graph::QueryContext *qctx, + graph::PlanNode *node, + const OptGroup *group); + + void dependsOn(OptGroup *dep) { + dependencies_.emplace_back(dep); + } + + const std::vector<OptGroup *> &dependencies() const { + return dependencies_; + } + + void addBody(OptGroup *body) { + bodies_.emplace_back(body); + } + + const std::vector<OptGroup *> &bodies() const { + return bodies_; + } + + bool isExplored(const OptRule *rule) const { + return std::find(exploredRules_.cbegin(), exploredRules_.cend(), rule) != + exploredRules_.cend(); + } + + void setExplored(const OptRule *rule) { + exploredRules_.emplace_back(rule); + } + + const OptGroup *group() const { + return group_; + } + + const graph::PlanNode *node() const { + return node_; + } + + Status explore(const OptRule *rule); + double getCost() const; + const graph::PlanNode *getPlan() const; + +private: + OptGroupExpr(graph::PlanNode *node, const OptGroup *group) noexcept; + + graph::PlanNode *node_{nullptr}; + const OptGroup *group_{nullptr}; + std::vector<OptGroup *> dependencies_; + std::vector<OptGroup *> bodies_; + std::vector<const OptRule *> exploredRules_; +}; + +} // namespace opt +} // namespace nebula + +#endif // OPTIMIZER_OPTGROUP_H_ diff --git a/src/optimizer/OptRule.cpp b/src/optimizer/OptRule.cpp new file mode 100644 index 00000000..e15b10f8 --- /dev/null +++ b/src/optimizer/OptRule.cpp @@ -0,0 +1,44 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "optimizer/OptRule.h" + +#include "common/base/Logging.h" + +namespace nebula { +namespace opt { + +RuleSet &RuleSet::defaultRules() { + static RuleSet kDefaultRules("DefaultRuleSet"); + return kDefaultRules; +} + +RuleSet &RuleSet::queryRules() { + static RuleSet kQueryRules("QueryRules"); + return kQueryRules; +} + +RuleSet::RuleSet(const std::string &name) : name_(name) {} + +RuleSet *RuleSet::addRule(const OptRule *rule) { + DCHECK(rule != nullptr); + auto found = std::find(rules_.begin(), rules_.end(), rule); + if (found == rules_.end()) { + rules_.emplace_back(rule); + } else { + LOG(WARNING) << "Rule set " << name_ << " has contained this rule: " << rule->toString(); + } + return this; +} + +void RuleSet::merge(const RuleSet &ruleset) { + for (auto rule : ruleset.rules()) { + addRule(rule); + } +} + +} // namespace opt +} // namespace nebula diff --git a/src/optimizer/OptRule.h b/src/optimizer/OptRule.h new file mode 100644 index 00000000..7db633c7 --- /dev/null +++ b/src/optimizer/OptRule.h @@ -0,0 +1,69 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#ifndef OPTIMIZER_OPTRULE_H_ +#define OPTIMIZER_OPTRULE_H_ + +#include <memory> +#include <string> +#include <vector> + +#include "common/base/Status.h" +#include "planner/PlanNode.h" + +namespace nebula { +namespace graph { +class QueryContext; +} // namespace graph + +namespace opt { + +class OptGroupExpr; + +class OptRule { +public: + struct TransformResult { + bool eraseCurr; + bool eraseAll; + std::vector<OptGroupExpr *> newGroupExprs; + }; + + virtual ~OptRule() = default; + + virtual bool match(const OptGroupExpr *groupExpr) const = 0; + virtual Status transform(graph::QueryContext *qctx, + const OptGroupExpr *groupExpr, + TransformResult *result) const = 0; + virtual std::string toString() const = 0; + +protected: + OptRule() = default; +}; + +class RuleSet final { +public: + static RuleSet &defaultRules(); + static RuleSet &queryRules(); + + RuleSet *addRule(const OptRule *rule); + + void merge(const RuleSet &ruleset); + + const std::vector<const OptRule *> &rules() const { + return rules_; + } + +private: + explicit RuleSet(const std::string &name); + + std::string name_; + std::vector<const OptRule *> rules_; +}; + +} // namespace opt +} // namespace nebula + +#endif // OPTIMIZER_OPTRULE_H_ diff --git a/src/optimizer/Optimizer.cpp b/src/optimizer/Optimizer.cpp new file mode 100644 index 00000000..3e94edc3 --- /dev/null +++ b/src/optimizer/Optimizer.cpp @@ -0,0 +1,108 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "optimizer/Optimizer.h" + +#include "context/QueryContext.h" +#include "optimizer/OptGroup.h" +#include "optimizer/OptRule.h" +#include "planner/ExecutionPlan.h" +#include "planner/Logic.h" +#include "planner/PlanNode.h" + +using nebula::graph::BiInputNode; +using nebula::graph::Loop; +using nebula::graph::PlanNode; +using nebula::graph::QueryContext; +using nebula::graph::Select; +using nebula::graph::SingleDependencyNode; + +namespace nebula { +namespace opt { + +Optimizer::Optimizer(QueryContext *qctx, std::vector<const RuleSet *> ruleSets) + : qctx_(qctx), ruleSets_(std::move(ruleSets)) { + DCHECK(qctx != nullptr); +} + +StatusOr<const PlanNode *> Optimizer::findBestPlan(PlanNode *root) { + DCHECK(root != nullptr); + rootNode_ = root; + NG_RETURN_IF_ERROR(prepare()); + NG_RETURN_IF_ERROR(doExploration()); + return rootGroup_->getPlan(); +} + +Status Optimizer::prepare() { + visitedNodes_.clear(); + rootGroup_ = convertToGroup(rootNode_); + return Status::OK(); +} + +Status Optimizer::doExploration() { + // TODO(yee): Apply all rules recursively, not only once round + for (auto ruleSet : ruleSets_) { + for (auto rule : ruleSet->rules()) { + if (!rootGroup_->isExplored(rule)) { + NG_RETURN_IF_ERROR(rootGroup_->explore(rule)); + } + } + } + return Status::OK(); +} + +OptGroup *Optimizer::convertToGroup(PlanNode *node) { + auto iter = visitedNodes_.find(node->id()); + if (iter != visitedNodes_.cend()) { + return iter->second; + } + + auto group = OptGroup::create(qctx_); + auto groupExpr = group->makeGroupExpr(qctx_, node); + + switch (node->dependencies().size()) { + case 0: { + // Do nothing + break; + } + case 1: { + if (node->kind() == PlanNode::Kind::kSelect) { + auto select = static_cast<Select *>(node); + auto then = convertToGroup(const_cast<PlanNode *>(select->then())); + groupExpr->addBody(then); + auto otherwise = convertToGroup(const_cast<PlanNode *>(select->otherwise())); + groupExpr->addBody(otherwise); + } else if (node->kind() == PlanNode::Kind::kLoop) { + auto loop = static_cast<Loop *>(node); + auto body = convertToGroup(const_cast<PlanNode *>(loop->body())); + groupExpr->addBody(body); + } + auto dep = static_cast<SingleDependencyNode *>(node)->dep(); + DCHECK(dep != nullptr); + auto depGroup = convertToGroup(const_cast<graph::PlanNode *>(dep)); + groupExpr->dependsOn(depGroup); + break; + } + case 2: { + auto bNode = static_cast<BiInputNode *>(node); + auto leftGroup = convertToGroup(const_cast<graph::PlanNode *>(bNode->left())); + groupExpr->dependsOn(leftGroup); + auto rightGroup = convertToGroup(const_cast<graph::PlanNode *>(bNode->right())); + groupExpr->dependsOn(rightGroup); + break; + } + default: { + LOG(FATAL) << "Invalid number of plan node dependencies: " + << node->dependencies().size(); + break; + } + } + visitedNodes_.emplace(node->id(), group); + return group; +} + +} // namespace opt +} // namespace nebula diff --git a/src/optimizer/Optimizer.h b/src/optimizer/Optimizer.h new file mode 100644 index 00000000..c03d0e64 --- /dev/null +++ b/src/optimizer/Optimizer.h @@ -0,0 +1,49 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#ifndef OPTIMIZER_OPTIMIZER_H_ +#define OPTIMIZER_OPTIMIZER_H_ + +#include <memory> + +#include "common/base/StatusOr.h" + +namespace nebula { +namespace graph { +class PlanNode; +class QueryContext; +} // namespace graph + +namespace opt { + +class OptGroup; +class OptGroupExpr; +class RuleSet; + +class Optimizer final { +public: + Optimizer(graph::QueryContext *qctx, std::vector<const RuleSet *> ruleSets); + ~Optimizer() = default; + + StatusOr<const graph::PlanNode *> findBestPlan(graph::PlanNode *root); + +private: + Status prepare(); + Status doExploration(); + + OptGroup *convertToGroup(graph::PlanNode *node); + + graph::QueryContext *qctx_{nullptr}; + graph::PlanNode *rootNode_{nullptr}; + OptGroup *rootGroup_{nullptr}; + std::vector<const RuleSet *> ruleSets_; + std::unordered_map<int64_t, OptGroup *> visitedNodes_; +}; + +} // namespace opt +} // namespace nebula + +#endif // OPTIMIZER_OPTIMIZER_H_ diff --git a/src/optimizer/rule/PushFilterDownGetNbrsRule.cpp b/src/optimizer/rule/PushFilterDownGetNbrsRule.cpp new file mode 100644 index 00000000..2a5df8ea --- /dev/null +++ b/src/optimizer/rule/PushFilterDownGetNbrsRule.cpp @@ -0,0 +1,160 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "optimizer/rule/PushFilterDownGetNbrsRule.h" + +#include "common/expression/BinaryExpression.h" +#include "common/expression/ConstantExpression.h" +#include "common/expression/Expression.h" +#include "common/expression/FunctionCallExpression.h" +#include "common/expression/LogicalExpression.h" +#include "common/expression/UnaryExpression.h" +#include "optimizer/OptGroup.h" +#include "planner/PlanNode.h" +#include "planner/Query.h" +#include "visitor/ExtractFilterExprVisitor.h" + +using nebula::graph::Filter; +using nebula::graph::GetNeighbors; +using nebula::graph::PlanNode; +using nebula::graph::QueryContext; + +namespace nebula { +namespace opt { + +std::unique_ptr<OptRule> PushFilterDownGetNbrsRule::kInstance = + std::unique_ptr<PushFilterDownGetNbrsRule>(new PushFilterDownGetNbrsRule()); + +PushFilterDownGetNbrsRule::PushFilterDownGetNbrsRule() { + RuleSet::queryRules().addRule(this); +} + +bool PushFilterDownGetNbrsRule::match(const OptGroupExpr *groupExpr) const { + auto pair = findMatchedGroupExpr(groupExpr); + if (!pair.first) { + return false; + } + + return true; +} + +Status PushFilterDownGetNbrsRule::transform(QueryContext *qctx, + const OptGroupExpr *groupExpr, + TransformResult *result) const { + auto pair = findMatchedGroupExpr(groupExpr); + auto filter = static_cast<const Filter *>(groupExpr->node()); + auto gn = static_cast<const GetNeighbors *>(pair.second->node()); + + auto condition = filter->condition()->clone(); + graph::ExtractFilterExprVisitor visitor; + condition->accept(&visitor); + if (!visitor.ok()) { + result->eraseCurr = false; + result->eraseAll = false; + return Status::OK(); + } + + auto pool = qctx->objPool(); + auto remainedExpr = std::move(visitor).remainedExpr(); + OptGroupExpr *newFilterGroupExpr = nullptr; + if (remainedExpr != nullptr) { + auto newFilter = Filter::make(qctx, nullptr, pool->add(remainedExpr.release())); + newFilterGroupExpr = OptGroupExpr::create(qctx, newFilter, groupExpr->group()); + } + + auto newGNFilter = condition->encode(); + if (!gn->filter().empty()) { + auto filterExpr = Expression::decode(gn->filter()); + LogicalExpression logicExpr( + Expression::Kind::kLogicalAnd, condition.release(), filterExpr.release()); + newGNFilter = logicExpr.encode(); + } + + auto newGN = cloneGetNbrs(qctx, gn); + newGN->setFilter(newGNFilter); + + OptGroupExpr *newGroupExpr = nullptr; + if (newFilterGroupExpr != nullptr) { + // Filter(A&&B)->GetNeighbors(C) => Filter(A)->GetNeighbors(B&&C) + auto newGroup = OptGroup::create(qctx); + newGroupExpr = OptGroupExpr::create(qctx, newGN, newGroup); + newFilterGroupExpr->dependsOn(newGroup); + } else { + // Filter(A)->GetNeighbors(C) => GetNeighbors(A&&C) + newGroupExpr = OptGroupExpr::create(qctx, newGN, groupExpr->group()); + newGN->setOutputVar(filter->varName()); + } + + for (auto dep : pair.second->dependencies()) { + newGroupExpr->dependsOn(dep); + } + result->newGroupExprs.emplace_back(newFilterGroupExpr ? newFilterGroupExpr : newGroupExpr); + result->eraseAll = true; + result->eraseCurr = true; + + return Status::OK(); +} + +std::string PushFilterDownGetNbrsRule::toString() const { + return "PushFilterDownGetNbrsRule"; +} + +std::pair<bool, const OptGroupExpr *> PushFilterDownGetNbrsRule::findMatchedGroupExpr( + const OptGroupExpr *groupExpr) const { + auto node = groupExpr->node(); + if (node->kind() != PlanNode::Kind::kFilter) { + return std::make_pair(false, nullptr); + } + + for (auto dep : groupExpr->dependencies()) { + for (auto expr : dep->groupExprs()) { + if (expr->node()->kind() == PlanNode::Kind::kGetNeighbors) { + return std::make_pair(true, expr); + } + } + } + return std::make_pair(false, nullptr); +} + +GetNeighbors *PushFilterDownGetNbrsRule::cloneGetNbrs(QueryContext *qctx, + const GetNeighbors *gn) const { + auto newGN = GetNeighbors::make(qctx, nullptr, gn->space()); + newGN->setSrc(gn->src()); + newGN->setEdgeTypes(gn->edgeTypes()); + newGN->setEdgeDirection(gn->edgeDirection()); + newGN->setDedup(gn->dedup()); + newGN->setRandom(gn->random()); + newGN->setLimit(gn->limit()); + newGN->setInputVar(gn->inputVar()); + + if (gn->vertexProps()) { + auto vertexProps = *gn->vertexProps(); + auto vertexPropsPtr = std::make_unique<decltype(vertexProps)>(std::move(vertexProps)); + newGN->setVertexProps(std::move(vertexPropsPtr)); + } + + if (gn->edgeProps()) { + auto edgeProps = *gn->edgeProps(); + auto edgePropsPtr = std::make_unique<decltype(edgeProps)>(std::move(edgeProps)); + newGN->setEdgeProps(std::move(edgePropsPtr)); + } + + if (gn->statProps()) { + auto statProps = *gn->statProps(); + auto statPropsPtr = std::make_unique<decltype(statProps)>(std::move(statProps)); + newGN->setStatProps(std::move(statPropsPtr)); + } + + if (gn->exprs()) { + auto exprs = *gn->exprs(); + auto exprsPtr = std::make_unique<decltype(exprs)>(std::move(exprs)); + newGN->setExprs(std::move(exprsPtr)); + } + return newGN; +} + +} // namespace opt +} // namespace nebula diff --git a/src/optimizer/rule/PushFilterDownGetNbrsRule.h b/src/optimizer/rule/PushFilterDownGetNbrsRule.h new file mode 100644 index 00000000..0800b20f --- /dev/null +++ b/src/optimizer/rule/PushFilterDownGetNbrsRule.h @@ -0,0 +1,43 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#ifndef OPTIMIZER_RULE_PUSHFILTERDOWNGETNBRSRULE_H_ +#define OPTIMIZER_RULE_PUSHFILTERDOWNGETNBRSRULE_H_ + +#include <memory> + +#include "optimizer/OptRule.h" + +namespace nebula { +namespace graph { +class GetNeighbors; +} // namespace graph + +namespace opt { + +class PushFilterDownGetNbrsRule final : public OptRule { +public: + static std::unique_ptr<OptRule> kInstance; + + bool match(const OptGroupExpr *groupExpr) const override; + Status transform(graph::QueryContext *qctx, + const OptGroupExpr *groupExpr, + TransformResult *result) const override; + std::string toString() const override; + +private: + PushFilterDownGetNbrsRule(); + + graph::GetNeighbors *cloneGetNbrs(graph::QueryContext *qctx, + const graph::GetNeighbors *getNbrs) const; + + std::pair<bool, const OptGroupExpr *> findMatchedGroupExpr(const OptGroupExpr *groupExpr) const; +}; + +} // namespace opt +} // namespace nebula + +#endif // OPTIMIZER_RULE_PUSHFILTERDOWNGETNBRSRULE_H_ diff --git a/src/planner/ExecutionPlan.cpp b/src/planner/ExecutionPlan.cpp index c7d5ebe6..3730bd97 100644 --- a/src/planner/ExecutionPlan.cpp +++ b/src/planner/ExecutionPlan.cpp @@ -15,7 +15,7 @@ namespace nebula { namespace graph { -ExecutionPlan::ExecutionPlan() : id_(EPIdGenerator::instance().id()) {} +ExecutionPlan::ExecutionPlan(PlanNode* root) : id_(EPIdGenerator::instance().id()), root_(root) {} ExecutionPlan::~ExecutionPlan() {} @@ -71,6 +71,7 @@ static size_t makePlanNodeDesc(const PlanNode* node, cpp2::PlanDescription* plan } default: { // Other plan nodes have single dependency + DCHECK_EQ(node->dependencies().size(), 1U); auto singleDepNode = static_cast<const SingleDependencyNode*>(node); makePlanNodeDesc(singleDepNode->dep(), planDesc); break; diff --git a/src/planner/ExecutionPlan.h b/src/planner/ExecutionPlan.h index 3ab82c6d..b0f3f699 100644 --- a/src/planner/ExecutionPlan.h +++ b/src/planner/ExecutionPlan.h @@ -20,7 +20,7 @@ class PlanNode; class ExecutionPlan final { public: - ExecutionPlan(); + explicit ExecutionPlan(PlanNode* root = nullptr); ~ExecutionPlan(); void setRoot(PlanNode* root) { diff --git a/src/planner/PlanNode.cpp b/src/planner/PlanNode.cpp index b25ad81f..32fa30c8 100644 --- a/src/planner/PlanNode.cpp +++ b/src/planner/PlanNode.cpp @@ -81,7 +81,7 @@ const char* PlanNode::toString(PlanNode::Kind kind) { return "InsertEdges"; case Kind::kDataCollect: return "DataCollect"; - // acl + // ACL case Kind::kCreateUser: return "CreateUser"; case Kind::kDropUser: @@ -174,6 +174,10 @@ void PlanNode::addDescription(std::string key, std::string value, cpp2::PlanNode desc->get_description()->emplace_back(std::move(kv)); } +void PlanNode::calcCost() { + VLOG(1) << "unimplemented cost calculation."; +} + std::unique_ptr<cpp2::PlanNodeDescription> PlanNode::explain() const { auto desc = std::make_unique<cpp2::PlanNodeDescription>(); desc->set_id(id_); diff --git a/src/planner/PlanNode.h b/src/planner/PlanNode.h index 7af43b8d..946fc976 100644 --- a/src/planner/PlanNode.h +++ b/src/planner/PlanNode.h @@ -102,6 +102,8 @@ public: // Describe plan node virtual std::unique_ptr<cpp2::PlanNodeDescription> explain() const; + virtual void calcCost(); + Kind kind() const { return kind_; } @@ -154,11 +156,16 @@ public: static const char* toString(Kind kind); + double cost() const { + return cost_; + } + protected: static void addDescription(std::string key, std::string value, cpp2::PlanNodeDescription* desc); Kind kind_{Kind::kUnknown}; int64_t id_{-1}; + double cost_{0.0}; std::string outputVar_; std::vector<std::string> colNames_; std::vector<const PlanNode*> dependencies_; diff --git a/src/planner/Query.cpp b/src/planner/Query.cpp index fc6efb03..c0c86173 100644 --- a/src/planner/Query.cpp +++ b/src/planner/Query.cpp @@ -21,7 +21,8 @@ std::unique_ptr<cpp2::PlanNodeDescription> Explore::explain() const { addDescription("space", folly::to<std::string>(space_), desc.get()); addDescription("dedup", util::toJson(dedup_), desc.get()); addDescription("limit", folly::to<std::string>(limit_), desc.get()); - addDescription("filter", filter_, desc.get()); + auto filter = filter_.empty() ? filter_ : Expression::decode(filter_)->toString(); + addDescription("filter", filter, desc.get()); addDescription("orderBy", folly::toJson(util::toJson(orderBy_)), desc.get()); return desc; } diff --git a/src/service/GraphFlags.cpp b/src/service/GraphFlags.cpp index 6b9f2f21..a8a358c9 100644 --- a/src/service/GraphFlags.cpp +++ b/src/service/GraphFlags.cpp @@ -46,3 +46,5 @@ DEFINE_string(auth_type, "password", "User login authentication type," DEFINE_string(cloud_http_url, "", "cloud http url including ip, port, url path"); DEFINE_uint32(max_allowed_statements, 512, "Max allowed sequential statements"); + +DEFINE_bool(enable_optimizer, false, "Whether to enable optimizer"); diff --git a/src/service/GraphFlags.h b/src/service/GraphFlags.h index 3474bc5f..25b90c81 100644 --- a/src/service/GraphFlags.h +++ b/src/service/GraphFlags.h @@ -35,4 +35,7 @@ DECLARE_string(auth_type); DECLARE_string(cloud_http_url); DECLARE_uint32(max_allowed_statements); +// optimizer +DECLARE_bool(enable_optimizer); + #endif // GRAPH_GRAPHFLAGS_H_ diff --git a/src/service/QueryInstance.cpp b/src/service/QueryInstance.cpp index 08fbc7af..aeac8a1d 100644 --- a/src/service/QueryInstance.cpp +++ b/src/service/QueryInstance.cpp @@ -9,15 +9,31 @@ #include "common/base/Base.h" #include "executor/ExecutionError.h" #include "executor/Executor.h" +#include "optimizer/OptRule.h" #include "parser/ExplainSentence.h" #include "planner/ExecutionPlan.h" #include "planner/PlanNode.h" #include "scheduler/Scheduler.h" +#include "service/GraphFlags.h" #include "validator/Validator.h" +using nebula::opt::Optimizer; +using nebula::opt::OptRule; +using nebula::opt::RuleSet; + namespace nebula { namespace graph { +QueryInstance::QueryInstance(std::unique_ptr<QueryContext> qctx) { + qctx_ = std::move(qctx); + scheduler_ = std::make_unique<Scheduler>(qctx_.get()); + std::vector<const RuleSet *> rulesets{&RuleSet::defaultRules()}; + if (FLAGS_enable_optimizer) { + rulesets.emplace_back(&RuleSet::queryRules()); + } + optimizer_ = std::make_unique<Optimizer>(qctx_.get(), std::move(rulesets)); +} + void QueryInstance::execute() { Status status = validateAndOptimize(); if (!status.ok()) { @@ -51,7 +67,10 @@ Status QueryInstance::validateAndOptimize() { NG_RETURN_IF_ERROR(Validator::validate(sentence_.get(), qctx())); - // TODO: optional optimize for plan. + auto rootStatus = optimizer_->findBestPlan(qctx_->plan()->root()); + NG_RETURN_IF_ERROR(rootStatus); + auto newRoot = std::move(rootStatus).value(); + qctx_->setPlan(std::make_unique<ExecutionPlan>(const_cast<PlanNode *>(newRoot))); return Status::OK(); } diff --git a/src/service/QueryInstance.h b/src/service/QueryInstance.h index 8c813fdd..43e05c34 100644 --- a/src/service/QueryInstance.h +++ b/src/service/QueryInstance.h @@ -11,6 +11,7 @@ #include "common/base/Status.h" #include "common/cpp/helpers.h" #include "context/QueryContext.h" +#include "optimizer/Optimizer.h" #include "parser/GQLParser.h" #include "scheduler/Scheduler.h" @@ -25,11 +26,7 @@ namespace graph { class QueryInstance final : public cpp::NonCopyable, public cpp::NonMovable { public: - explicit QueryInstance(std::unique_ptr<QueryContext> qctx) { - qctx_ = std::move(qctx); - scheduler_ = std::make_unique<Scheduler>(qctx_.get()); - } - + explicit QueryInstance(std::unique_ptr<QueryContext> qctx); ~QueryInstance() = default; void execute(); @@ -59,6 +56,7 @@ private: std::unique_ptr<Sentence> sentence_; std::unique_ptr<QueryContext> qctx_; std::unique_ptr<Scheduler> scheduler_; + std::unique_ptr<opt::Optimizer> optimizer_; }; } // namespace graph diff --git a/src/visitor/CMakeLists.txt b/src/visitor/CMakeLists.txt index 6216ba9d..acc59d6e 100644 --- a/src/visitor/CMakeLists.txt +++ b/src/visitor/CMakeLists.txt @@ -9,6 +9,7 @@ nebula_add_library( CollectAllExprsVisitor.cpp DeducePropsVisitor.cpp DeduceTypeVisitor.cpp + ExtractFilterExprVisitor.cpp FindAnyExprVisitor.cpp RewriteLabelAttrVisitor.cpp ) diff --git a/src/visitor/ExtractFilterExprVisitor.cpp b/src/visitor/ExtractFilterExprVisitor.cpp new file mode 100644 index 00000000..b31a91a7 --- /dev/null +++ b/src/visitor/ExtractFilterExprVisitor.cpp @@ -0,0 +1,102 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "ExtractFilterExprVisitor.h" + +namespace nebula { +namespace graph { + +void ExtractFilterExprVisitor::visit(ConstantExpression *) { + canBePushed_ = true; +} + +void ExtractFilterExprVisitor::visit(LabelExpression *) { + canBePushed_ = false; +} + +void ExtractFilterExprVisitor::visit(UUIDExpression *) { + canBePushed_ = false; +} + +void ExtractFilterExprVisitor::visit(VariableExpression *) { + canBePushed_ = false; +} + +void ExtractFilterExprVisitor::visit(VersionedVariableExpression *) { + canBePushed_ = false; +} + +void ExtractFilterExprVisitor::visit(TagPropertyExpression *) { + canBePushed_ = false; +} + +void ExtractFilterExprVisitor::visit(EdgePropertyExpression *) { + canBePushed_ = true; +} + +void ExtractFilterExprVisitor::visit(InputPropertyExpression *) { + canBePushed_ = false; +} + +void ExtractFilterExprVisitor::visit(VariablePropertyExpression *) { + canBePushed_ = false; +} + +void ExtractFilterExprVisitor::visit(DestPropertyExpression *) { + canBePushed_ = false; +} + +void ExtractFilterExprVisitor::visit(SourcePropertyExpression *) { + canBePushed_ = true; +} + +void ExtractFilterExprVisitor::visit(EdgeSrcIdExpression *) { + canBePushed_ = true; +} + +void ExtractFilterExprVisitor::visit(EdgeTypeExpression *) { + canBePushed_ = true; +} + +void ExtractFilterExprVisitor::visit(EdgeRankExpression *) { + canBePushed_ = true; +} + +void ExtractFilterExprVisitor::visit(EdgeDstIdExpression *) { + canBePushed_ = true; +} + +void ExtractFilterExprVisitor::visit(VertexExpression *) { + canBePushed_ = false; +} + +void ExtractFilterExprVisitor::visit(EdgeExpression *) { + canBePushed_ = false; +} + +void ExtractFilterExprVisitor::visit(LogicalExpression *expr) { + if (expr->kind() == Expression::Kind::kLogicalAnd) { + expr->left()->accept(this); + auto canBePushedLeft = canBePushed_; + expr->right()->accept(this); + auto canBePushedRight = canBePushed_; + canBePushed_ = canBePushedLeft || canBePushedRight; + if (canBePushed_) { + if (!canBePushedLeft) { + remainedExpr_ = expr->left()->clone(); + expr->setLeft(new ConstantExpression(true)); + } else if (!canBePushedRight) { + remainedExpr_ = expr->right()->clone(); + expr->setRight(new ConstantExpression(true)); + } + } + } else { + ExprVisitorImpl::visit(expr); + } +} + +} // namespace graph +} // namespace nebula diff --git a/src/visitor/ExtractFilterExprVisitor.h b/src/visitor/ExtractFilterExprVisitor.h new file mode 100644 index 00000000..2efd9137 --- /dev/null +++ b/src/visitor/ExtractFilterExprVisitor.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ +#ifndef VISITOR_EXTRACTFILTEREXPRVISITOR_H_ +#define VISITOR_EXTRACTFILTEREXPRVISITOR_H_ + +#include <memory> + +#include "visitor/ExprVisitorImpl.h" + +namespace nebula { +namespace graph { + +class ExtractFilterExprVisitor final : public ExprVisitorImpl { +public: + ExtractFilterExprVisitor() = default; + + bool ok() const override { + return canBePushed_; + } + + std::unique_ptr<Expression> remainedExpr() && { + return std::move(remainedExpr_); + } + +private: + using ExprVisitorImpl::visit; + + void visit(ConstantExpression *) override; + void visit(LabelExpression *) override; + void visit(UUIDExpression *) override; + void visit(VariableExpression *) override; + void visit(VersionedVariableExpression *) override; + void visit(TagPropertyExpression *) override; + void visit(EdgePropertyExpression *) override; + void visit(InputPropertyExpression *) override; + void visit(VariablePropertyExpression *) override; + void visit(DestPropertyExpression *) override; + void visit(SourcePropertyExpression *) override; + void visit(EdgeSrcIdExpression *) override; + void visit(EdgeTypeExpression *) override; + void visit(EdgeRankExpression *) override; + void visit(EdgeDstIdExpression *) override; + void visit(VertexExpression *) override; + void visit(EdgeExpression *) override; + void visit(LogicalExpression *expr) override; + + bool canBePushed_{true}; + std::unique_ptr<Expression> remainedExpr_; +}; + +} // namespace graph +} // namespace nebula + +#endif // VISITOR_EXTRACTFILTEREXPRVISITOR_H_ diff --git a/tests/admin/test_configs.py b/tests/admin/test_configs.py index b0db1132..4862e069 100644 --- a/tests/admin/test_configs.py +++ b/tests/admin/test_configs.py @@ -5,11 +5,8 @@ # This source code is licensed under Apache 2.0 License, # attached with Common Clause Condition 1.0, found in the LICENSES directory. -import time -import re - from tests.common.nebula_test_suite import NebulaTestSuite -from nebula2.common import ttypes + class TestConfigs(NebulaTestSuite): @@ -62,6 +59,7 @@ class TestConfigs(NebulaTestSuite): expected_result = [['GRAPH', 'v', 'int', 'MUTABLE', v], ['GRAPH', 'minloglevel', 'int', 'MUTABLE', 0], ['GRAPH', 'slow_op_threshhold_ms', 'int', 'MUTABLE', 50], + ['GRAPH', 'enable_optimizer', 'bool', 'MUTABLE', True], ['GRAPH', 'heartbeat_interval_secs', 'int', 'MUTABLE', 1], ['GRAPH', 'meta_client_retry_times', 'int', 'MUTABLE', 3]] self.check_out_of_order_result(resp, expected_result) diff --git a/tests/common/load_test_data.py b/tests/common/global_data_loader.py similarity index 97% rename from tests/common/load_test_data.py rename to tests/common/global_data_loader.py index 321ac9c7..9199b9b4 100644 --- a/tests/common/load_test_data.py +++ b/tests/common/global_data_loader.py @@ -7,18 +7,19 @@ import time -from nebula2.Client import AuthException, ExecutionException, GraphClient -from nebula2.Common import * + +from nebula2.Client import GraphClient from nebula2.ConnectionPool import ConnectionPool from nebula2.graph import ttypes from tests.common.configs import get_delay_time -class LoadGlobalData(object): + +class GlobalDataLoader(object): def __init__(self, data_dir, ip, port, user, password): self.data_dir = data_dir self.ip = ip self.port = port - self.client_pool = ConnectionPool(ip = self.ip, port = self.port, network_timeout = 0) + self.client_pool = ConnectionPool(ip=self.ip, port=self.port, network_timeout=0) self.client = GraphClient(self.client_pool) self.user = user self.password = password @@ -33,7 +34,7 @@ class LoadGlobalData(object): # The whole test will load once, for the only read tests def load_nba(self): nba_file = self.data_dir + '/data/nba.ngql' - print("will open ", nba_file) + print("open: ", nba_file) with open(nba_file, 'r') as data_file: resp = self.client.execute( 'CREATE SPACE IF NOT EXISTS nba(partition_num=10, replica_factor=1, vid_size=30);USE nba;') diff --git a/tests/common/nebula_service.py b/tests/common/nebula_service.py index 13db53c7..9243cb2e 100644 --- a/tests/common/nebula_service.py +++ b/tests/common/nebula_service.py @@ -57,6 +57,8 @@ class NebulaService(object): param_format = "--meta_server_addrs={} --port={} --ws_http_port={} --ws_h2_port={} --heartbeat_interval_secs=1" param = param_format.format("127.0.0.1:" + str(meta_port), ports[0], ports[1], ports[2]) + if name == 'graphd': + param += ' --enable_optimizer=true' if name == 'storaged': param += ' --raft_heartbeat_interval_secs=30' if debug_log: @@ -132,7 +134,7 @@ class NebulaService(object): p = subprocess.Popen([command], shell=True, stdout=subprocess.PIPE) p.wait() if p.returncode != 0: - print("error: " + p.communicate()[0]) + print("error: " + bytes.decode(p.communicate()[0])) else: graph_port = ports[0] @@ -144,8 +146,7 @@ class NebulaService(object): for pf in glob.glob(self.work_dir + '/pids/*.pid'): with open(pf) as f: - pid = int(f.readline()) - self.pids[f.name] = pid + self.pids[f.name] = int(f.readline()) return graph_port @@ -155,7 +156,7 @@ class NebulaService(object): try: os.kill(self.pids[p], signal.SIGTERM) except OSError as err: - print("nebula stop " + p + " failed: " + str(err)) + print("nebula stop {} failed: {}".format(p, str(err))) max_retries = 30 while self.check_procs_alive() and max_retries >= 0: diff --git a/tests/common/nebula_test_suite.py b/tests/common/nebula_test_suite.py index a6bebb52..6a229807 100644 --- a/tests/common/nebula_test_suite.py +++ b/tests/common/nebula_test_suite.py @@ -33,6 +33,7 @@ T_NULL_BAD_TYPE.set_nVal(CommonTtypes.NullType.UNKNOWN_PROP) T_NULL_UNKNOWN_DIV_BY_ZERO = CommonTtypes.Value() T_NULL_UNKNOWN_DIV_BY_ZERO.set_nVal(CommonTtypes.NullType.DIV_BY_ZERO) + class NebulaTestSuite(object): @classmethod def set_delay(self): @@ -87,7 +88,7 @@ class NebulaTestSuite(object): self.data_loaded = True pathlist = Path(self.data_dir).rglob('*.ngql') for path in pathlist: - print("will open ", path) + print("open: ", path) with open(path, 'r') as data_file: space_name = path.name.split('.')[0] + datetime.datetime.now().strftime('%H_%M_%S_%f') self.spaces.append(space_name) @@ -141,7 +142,6 @@ class NebulaTestSuite(object): resp = self.execute('USE student_space;') self.check_resp_succeeded(resp) - @classmethod def create_nebula_clients(self): self.client_pool = ConnectionPool(ip=self.host, @@ -184,9 +184,10 @@ class NebulaTestSuite(object): @classmethod def check_resp_succeeded(self, resp): - assert resp.error_code == ttypes.ErrorCode.SUCCEEDED \ - or resp.error_code == ttypes.ErrorCode.E_STATEMENT_EMTPY, \ - bytes.decode(resp.error_msg) + assert ( + resp.error_code == ttypes.ErrorCode.SUCCEEDED + or resp.error_code == ttypes.ErrorCode.E_STATEMENT_EMTPY + ), bytes.decode(resp.error_msg) if resp.error_msg is not None else '' @classmethod def check_resp_failed(self, resp, error_code: ttypes.ErrorCode = ttypes.ErrorCode.SUCCEEDED): @@ -194,12 +195,14 @@ class NebulaTestSuite(object): assert resp.error_code != error_code, '{} == {}, {}'.format( ttypes.ErrorCode._VALUES_TO_NAMES[resp.error_code], ttypes.ErrorCode._VALUES_TO_NAMES[error_code], - bytes.decode(resp.error_msg)) + bytes.decode(resp.error_msg) if resp.error_msg is not None else '' + ) else: assert resp.error_code == error_code, '{} != {}, {}'.format( ttypes.ErrorCode._VALUES_TO_NAMES[resp.error_code], ttypes.ErrorCode._VALUES_TO_NAMES[error_code], - bytes.decode(resp.error_msg)) + bytes.decode(resp.error_msg) if resp.error_msg is not None else '' + ) @classmethod def check_value(self, col, expect): @@ -296,17 +299,17 @@ class NebulaTestSuite(object): return str(value_list) @classmethod - def search_result(self, resp, expect, is_regex = False): - ok, msg = self.search(resp, expect, is_regex) - assert ok, msg + def search_result(self, resp, expect, is_regex=False): + ok, msg = self.search(resp, expect, is_regex) + assert ok, msg @classmethod - def search_not_exist(self, resp, expect, is_regex = False): + def search_not_exist(self, resp, expect, is_regex=False): ok, msg = self.search(resp, expect, is_regex) assert not ok, 'expect "{}" has exist'.format(str(expect)) @classmethod - def search(self, resp, expect, is_regex = False): + def search(self, resp, expect, is_regex=False): if resp.data is None and len(expect) == 0: return True @@ -406,7 +409,7 @@ class NebulaTestSuite(object): return True, result, '' @classmethod - def check_result(self, resp, expect, ignore_col: Set[int] = set(), is_regex = False): + def check_result(self, resp, expect, ignore_col: Set[int] = set(), is_regex=False): if resp.data is None and len(expect) == 0: return @@ -425,7 +428,8 @@ class NebulaTestSuite(object): assert ok, 'convert expect failed, error msg: {}'.format(msg) for row, i in zip(rows, range(0, len(new_expect))): if isinstance(new_expect[i], CommonTtypes.Row): - assert len(row.values) - len(ignore_col) == len(new_expect[i].values), '{}, {}, {}'.format(len(row.values), len(ignore_col), len(new_expect[i].values)) + assert len(row.values) - len(ignore_col) == len(new_expect[i].values), \ + '{}, {}, {}'.format(len(row.values), len(ignore_col), len(new_expect[i].values)) else: assert len(row.values) - len(ignore_col) == len(new_expect[i]) ignored_col_count = 0 @@ -446,7 +450,7 @@ class NebulaTestSuite(object): self.row_to_string(row), expect_to_string, msg) @classmethod - def check_out_of_order_result(self, resp, expect, ignore_col: Set[int] = set()): + def check_out_of_order_result(self, resp, expect, ignore_col: Set[int]=set()): if resp.data is None and len(expect) == 0: return @@ -489,8 +493,8 @@ class NebulaTestSuite(object): pathEntry.set_vertex(vertex) path.entry_list.append(pathEntry) else: - assert len( - ecol) == 2, "invalid values size in expect result" + assert len(ecol) == 2, \ + "invalid values size in expect result" pathEntry = ttypes.PathEntry() edge = ttypes.Edge() edge.type = ecol[0] @@ -499,12 +503,10 @@ class NebulaTestSuite(object): path.entry_list.append(pathEntry) find = False for row in rows: - assert len( - row.values - ) == 1, "invalid values size in rows: {}".format(row) - assert row.values[0].getType()( - ) == ttypes.Value.PATH, "invalid column path type: {}".format( - row.values[0].getType()()) + assert len(row.values) == 1, \ + "invalid values size in rows: {}".format(row) + assert row.values[0].getType()() == ttypes.Value.PATH, \ + "invalid column path type: {}".format(row.values[0].getType()()) if row.values[0].get_path() == path: find = True break @@ -516,12 +518,11 @@ class NebulaTestSuite(object): def check_error_msg(self, resp, expect): self.check_resp_failed(resp) msg = self.check_format_str.format(resp.error_msg, expect) + err_msg = resp.error_msg.decode('utf-8') if isinstance(expect, Pattern): - if not expect.match(resp.error_msg.decode('utf-8')): - assert False, msg + assert expect.match(err_msg), msg else: - assert resp.error_msg.decode('utf-8') == expect, msg - assert resp.error_msg.decode('utf-8') == expect, msg + assert err_msg == expect, msg @classmethod def parse_line(self, line, dataType): @@ -720,3 +721,37 @@ class NebulaTestSuite(object): props[bytes('end_year', encoding = 'utf-8')] = end_year edge.props = props return edge + + @classmethod + def check_exec_plan(cls, resp, expect): + cls.check_resp_succeeded(resp) + if resp.plan_desc is None: + return + cls.diff_plan_node(resp.plan_desc, 0, expect, 0) + + @classmethod + def diff_plan_node(cls, plan_desc, line_num, expect, expect_idx): + plan_node_desc = plan_desc.plan_node_descs[line_num] + expect_node = expect[expect_idx] + name = bytes.decode(plan_node_desc.name) + + assert name.lower().startswith(expect_node[0].lower()), \ + "Different plan node: {} vs. {}".format(name, expect_node[0]) + + if len(expect_node) > 2: + descs = {bytes.decode(pair.value) for pair in plan_node_desc.description} + assert set(expect_node[2]).issubset(descs), \ + 'Invalid descriptions, expect: {} vs. resp: {}'.format( + '; '.join(map(str, expect_node[2])), + '; '.join(map(str, descs))) + + if plan_node_desc.dependencies is None: + return + + assert len(expect_node[1]) == len(plan_node_desc.dependencies), \ + "Different plan node dependencies: {} vs. {}".format( + len(plan_node_desc.dependencies), len(expect_node[1])) + + for i in range(len(plan_node_desc.dependencies)): + line_num = plan_desc.node_index_map[plan_node_desc.dependencies[i]] + cls.diff_plan_node(plan_desc, line_num, expect, expect_node[1][i]) diff --git a/tests/nebula-test-run.py b/tests/nebula-test-run.py index 80085836..5844a8f0 100755 --- a/tests/nebula-test-run.py +++ b/tests/nebula-test-run.py @@ -8,19 +8,20 @@ import itertools import os -import pytest import sys -from time import localtime, strftime from pathlib import Path +from time import localtime, strftime -TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -NEBULA_HOME = TEST_DIR + '/../' -sys.path.insert(0, NEBULA_HOME) +CURR_PATH = os.path.dirname(os.path.abspath(__file__)) +NEBULA_HOME = os.getenv('NEBULA_SOURCE_DIR', os.path.join(CURR_PATH, '..')) +TEST_DIR = os.path.join(NEBULA_HOME, 'tests') sys.path.insert(0, TEST_DIR) -from tests.common.nebula_service import NebulaService -from tests.common.load_test_data import LoadGlobalData +import pytest from tests.common.configs import init_configs +from tests.common.global_data_loader import GlobalDataLoader +from tests.common.nebula_service import NebulaService + TEST_LOGS_DIR = os.getenv('NEBULA_TEST_LOGS_DIR') if TEST_LOGS_DIR is None or TEST_LOGS_DIR == "": @@ -44,8 +45,6 @@ class TestExecutor(object): self.total_executed = 0 def run_tests(self, args): - # plugin = NebulaTestPlugin(TEST_DIR) - error_code = 0 try: error_code = pytest.main(args) @@ -53,14 +52,9 @@ class TestExecutor(object): sys.stderr.write( "Unexpected exception with pytest {0}".format(args)) error_code = 1 - - if '--collect-only' in args: - for test in plugin.tests_collected: - print(test) - - #self.total_executed += len(plugin.tests_executed) return error_code + if __name__ == "__main__": # If the user is just asking for --help, just print the help test and then exit. executor = TestExecutor() @@ -109,13 +103,13 @@ if __name__ == "__main__": print("Running TestExecutor with args: {} ".format(args)) # load nba data - load_data = LoadGlobalData(TEST_DIR, nebula_ip, nebula_port, configs.user, configs.password) - load_data.load_all_test_data() + data_loader = GlobalDataLoader(TEST_DIR, nebula_ip, nebula_port, configs.user, configs.password) + data_loader.load_all_test_data() # Switch to your $src_dir/tests os.chdir(TEST_DIR) error_code = executor.run_tests(args) - load_data.drop_data() + data_loader.drop_data() except Exception as x: print('\033[31m' + str(x) + '\033[0m') diff --git a/tests/ntr.out-of-source b/tests/ntr.out-of-source index b97cb3cf..1c4836e0 100644 --- a/tests/ntr.out-of-source +++ b/tests/ntr.out-of-source @@ -4,4 +4,4 @@ import os, sys os.environ['NEBULA_BUILD_DIR'] = '@CMAKE_BINARY_DIR@' os.environ['NEBULA_SOURCE_DIR'] = '@CMAKE_SOURCE_DIR@' os.chdir('@CMAKE_SOURCE_DIR@/tests') -exec(open("nebula-test-run.py").read()) +exec(open("@CMAKE_SOURCE_DIR@/tests/nebula-test-run.py").read()) diff --git a/tests/query/v2/test_optimizer.py b/tests/query/v2/test_optimizer.py new file mode 100644 index 00000000..434ce91a --- /dev/null +++ b/tests/query/v2/test_optimizer.py @@ -0,0 +1,103 @@ +# --coding:utf-8-- +# +# Copyright (c) 2020 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License, +# attached with Common Clause Condition 1.0, found in the LICENSES directory. + +import pytest + +from tests.common.nebula_test_suite import NebulaTestSuite + + +class TestOptimizer(NebulaTestSuite): + + @classmethod + def prepare(cls): + cls.use_nba() + + def test_PushFilterDownGetNbrsRule(self): + resp = self.execute_query(''' + GO 1 STEPS FROM "Kobe Bryant" OVER serve + WHERE $^.player.age > 18 YIELD $^.player.name AS name + ''') + expected_plan = [ + ["Project", [1]], + ["GetNeighbors", [2], ['($^.player.age>18)']], + ["Start", []] + ] + self.check_exec_plan(resp, expected_plan) + + resp = self.execute_query(''' + GO 1 STEPS FROM "Kobe Bryant" OVER like REVERSELY + WHERE $^.player.age > 18 YIELD $^.player.name AS name + ''') + expected_plan = [ + ["Project", [1]], + ["GetNeighbors", [2], ['($^.player.age>18)']], + ["Start", []] + ] + self.check_exec_plan(resp, expected_plan) + + resp = self.execute_query(''' + GO 1 STEPS FROM "Kobe Bryant" OVER serve + WHERE serve.start_year > 2002 YIELD $^.player.name AS name + ''') + expected_plan = [ + ["Project", [1]], + ["GetNeighbors", [2], ['(serve.start_year>2002)']], + ["Start", []] + ] + self.check_exec_plan(resp, expected_plan) + + resp = self.execute_query(''' + GO 1 STEPS FROM "Lakerys" OVER serve REVERSELY + WHERE serve.start_year > 2002 YIELD $^.player.name AS name + ''') + expected_plan = [ + ["Project", [1]], + ["GetNeighbors", [2], ['(serve.start_year>2002)']], + ["Start", []] + ] + self.check_exec_plan(resp, expected_plan) + + @pytest.mark.skip(reason="Depends on other opt rules to eliminate duplicate project nodes") + def test_PushFilterDownGetNbrsRule_Failed(self): + resp = self.execute_query(''' + GO 1 STEPS FROM "Kobe Bryant" OVER serve + WHERE $^.player.age > 18 AND $$.team.name == "Lakers" + YIELD $^.player.name AS name + ''') + expected_plan = [ + ["Project", [1]], + ["Filter", [2], ['($$.team.name=="Lakers")']], + ["GetNeighbors", [3], ['($^.player.age>18)']], + ["Start", []] + ] + self.check_exec_plan(resp, expected_plan) + + resp = self.execute_query(''' + GO 1 STEPS FROM "Kobe Bryant" OVER serve + WHERE $^.player.age > 18 OR $$.team.name == "Lakers" + YIELD $^.player.name AS name + ''') + expected_plan = [ + ["Project", [1]], + ["Filter", [2], ['($^.player.age>18) OR ($$.team.name=="Lakers")']] + ["GetNeighbors", [3]], + ["Start", []] + ] + self.check_exec_plan(resp, expected_plan) + + # fail to optimize cases + resp = self.execute_query(''' + GO 1 STEPS FROM "Kobe Bryant" OVER serve \ + WHERE $$.team.name == "Lakers" YIELD $^.player.name AS name + ''') + expected_plan = [ + ["Project", [1]], + ["Filter", [2]], + ["GetNeighbors", [3]], + ["Start", []] + ] + self.check_exec_plan(resp, expected_plan) -- GitLab