Skip to content
Snippets Groups Projects
Unverified Commit 3176f097 authored by nevermore's avatar nevermore Committed by GitHub
Browse files

add group by validator (#156)


* add group by validator

* check groupby & yield

* add groupby sentence

* fix compiler error

* add test case

* modify deduceProps

* modify code

* remove useless code

* add test case

* fix error

* add test case

* fix error

* check duplicate columnname

* remove useless code

* rebase code

* check duplicate

Co-authored-by: default avatarjimingquan <mingquan.ji@vesoft.com>
Co-authored-by: default avatarlaura-ding <48548375+laura-ding@users.noreply.github.com>
parent 46797781
No related branches found
No related tags found
No related merge requests found
Showing
with 1278 additions and 111 deletions
# Copyright (c) 2020 vesoft inc. All rights reserved.
#
# This source code is licensed under Apache 2.0 License,
# attached with Common Clause Condition 1.0, found in the LICENSES directory.
#
# This is the output of clang-format-7.0 --style=google --dump-config,
# except for changes mentioned below.
# We have locked the version of clang-format in order to avoid inconsistencies
# in the format caused by developers using different clang-format versions.
---
Language: Cpp
# BasedOnStyle: Google
AlignAfterOpenBracket: Align
AlignConsecutiveDeclarations: false
AlignOperands: true
AlignTrailingComments: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakTemplateDeclarations: Yes
BraceWrapping:
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeInheritanceComma: false
BreakInheritanceList: BeforeColon
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: true
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeBlocks: Preserve
IncludeCategories:
- Regex: '^<ext/.*\.h>'
Priority: 2
- Regex: '^<.*\.h>'
Priority: 1
- Regex: '^<.*'
Priority: 2
- Regex: '.*'
Priority: 3
IncludeIsMainRegex: '([-_](test|unittest))?$'
IndentCaseLabels: true
IndentPPDirectives: None
IndentWrappedFunctionNames: false
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBinPackProtocolList: Never
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
- Cpp
- CPP
- 'c++'
- 'C++'
CanonicalDelimiter: ''
BasedOnStyle: google
- Language: TextProto
Delimiters:
- pb
- PB
- proto
- PROTO
EnclosingFunctions:
- EqualsProto
- EquivToProto
- PARSE_PARTIAL_TEXT_PROTO
- PARSE_TEST_PROTO
- PARSE_TEXT_PROTO
- ParseTextOrDie
- ParseTextProtoOrDie
CanonicalDelimiter: ''
BasedOnStyle: google
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyParentheses: false
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
TabWidth: 4
UseTab: Never
#Different from google style
Standard: Cpp11
AccessModifierOffset: -4
AllowAllParametersOfDeclarationOnNextLine: false
ColumnLimit: 100
ObjCBlockIndentWidth: 4
AlignEscapedNewlines: Right
AlwaysBreakBeforeMultilineStrings: false
BinPackArguments: false
BinPackParameters: false
IndentWidth: 4
SpacesBeforeTrailingComments: 3
AllowShortFunctionsOnASingleLine: Empty
...
......@@ -82,6 +82,7 @@ function run_test() {
$PROJ_DIR/tests/maintain/* \
$PROJ_DIR/tests/mutate/* \
$PROJ_DIR/tests/query/stateless/test_new_go.py \
$PROJ_DIR/tests/query/stateless/test_new_groupby.py \
$PROJ_DIR/tests/query/v1/* \
$PROJ_DIR/tests/query/v2/* \
$PROJ_DIR/tests/query/stateless/test_schema.py \
......
......@@ -97,7 +97,7 @@ public:
kBalance,
kFindPath,
kLimit,
KGroupBy,
kGroupBy,
kReturn,
kCreateSnapshot,
kDropSnapshot,
......
......@@ -543,7 +543,7 @@ private:
class GroupBySentence final : public Sentence {
public:
GroupBySentence() {
kind_ = Kind::KGroupBy;
kind_ = Kind::kGroupBy;
}
void setGroupClause(GroupClause *clause) {
......
......@@ -116,7 +116,7 @@ bool PermissionCheck::permissionCheck(Session *session,
case Sentence::Kind::kFindPath :
case Sentence::Kind::kGetSubgraph:
case Sentence::Kind::kLimit :
case Sentence::Kind::KGroupBy :
case Sentence::Kind::kGroupBy :
case Sentence::Kind::kReturn : {
return PermissionManager::canReadSchemaOrData(session);
}
......
......@@ -23,6 +23,7 @@ nebula_add_library(
OrderByValidator.cpp
YieldValidator.cpp
ExplainValidator.cpp
GroupByValidator.cpp
)
add_subdirectory(test)
......@@ -24,15 +24,16 @@ Status GoValidator::validateImpl() {
NG_RETURN_IF_ERROR(validateWhere(goSentence->whereClause()));
NG_RETURN_IF_ERROR(validateYield(goSentence->yieldClause()));
if (!inputProps_.empty() && fromType_ != kPipe) {
if (!exprProps_.inputProps().empty() && fromType_ != kPipe) {
return Status::Error("$- must be referred in FROM before used in WHERE or YIELD");
}
if (!varProps_.empty() && fromType_ != kVariable) {
if (!exprProps_.varProps().empty() && fromType_ != kVariable) {
return Status::Error("A variable must be referred in FROM before used in WHERE or YIELD");
}
if ((!inputProps_.empty() && !varProps_.empty()) || varProps_.size() > 1) {
if ((!exprProps_.inputProps().empty() && !exprProps_.varProps().empty()) ||
exprProps_.varProps().size() > 1) {
return Status::Error("Only support single input in a go sentence.");
}
......@@ -169,7 +170,7 @@ Status GoValidator::validateWhere(WhereClause* where) {
return Status::Error(ss.str());
}
auto status = deduceProps(filter_);
auto status = deduceProps(filter_, exprProps_);
if (!status.ok()) {
return status;
}
......@@ -194,7 +195,7 @@ Status GoValidator::validateYield(YieldClause* yield) {
auto colName = deduceColName(col);
colNames_.emplace_back(colName);
outputs_.emplace_back(colName, Value::Type::STRING);
NG_RETURN_IF_ERROR(deduceProps(col->expr()));
NG_RETURN_IF_ERROR(deduceProps(col->expr(), exprProps_));
}
yields_ = newCols;
......@@ -221,9 +222,9 @@ Status GoValidator::validateYield(YieldClause* yield) {
auto type = typeStatus.value();
outputs_.emplace_back(colName, type);
NG_RETURN_IF_ERROR(deduceProps(col->expr()));
NG_RETURN_IF_ERROR(deduceProps(col->expr(), exprProps_));
}
for (auto& e : edgeProps_) {
for (auto& e : exprProps_.edgeProps()) {
auto found = std::find(edgeTypes_.begin(), edgeTypes_.end(), e.first);
if (found == edgeTypes_.end()) {
return Status::Error("Edges should be declared first in over clause.");
......@@ -258,12 +259,13 @@ Status GoValidator::oneStep(PlanNode* dependencyForGn,
PlanNode* dependencyForProjectResult = gn;
PlanNode* projectSrcEdgeProps = nullptr;
if (!inputProps_.empty() || !varProps_.empty() || !dstTagProps_.empty()) {
if (!exprProps_.inputProps().empty() || !exprProps_.varProps().empty() ||
!exprProps_.dstTagProps().empty()) {
projectSrcEdgeProps = buildProjectSrcEdgePropsForGN(gn);
}
PlanNode* joinDstProps = nullptr;
if (!dstTagProps_.empty() && projectSrcEdgeProps != nullptr) {
if (!exprProps_.dstTagProps().empty() && projectSrcEdgeProps != nullptr) {
joinDstProps = buildJoinDstProps(projectSrcEdgeProps);
}
if (joinDstProps != nullptr) {
......@@ -271,7 +273,7 @@ Status GoValidator::oneStep(PlanNode* dependencyForGn,
}
PlanNode* joinInput = nullptr;
if (!inputProps_.empty() || !varProps_.empty()) {
if (!exprProps_.inputProps().empty() || !exprProps_.varProps().empty()) {
joinInput = buildJoinPipeOrVariableInput(
projectFromJoin,
joinDstProps == nullptr ? projectSrcEdgeProps : joinDstProps);
......@@ -319,7 +321,7 @@ Status GoValidator::buildNStepsPlan() {
}
Project* projectLeftVarForJoin = nullptr;
if (!inputProps_.empty() || !varProps_.empty()) {
if (!exprProps_.inputProps().empty() || !exprProps_.varProps().empty()) {
projectLeftVarForJoin = buildLeftVarForTraceJoin(projectStartVid);
}
......@@ -332,7 +334,7 @@ Status GoValidator::buildNStepsPlan() {
Project* projectDstFromGN = projectDstVidsFromGN(gn, startVidsVar);
Project* projectFromJoin = nullptr;
if ((!inputProps_.empty() || !varProps_.empty()) &&
if ((!exprProps_.inputProps().empty() || !exprProps_.varProps().empty()) &&
projectLeftVarForJoin != nullptr && projectDstFromGN != nullptr) {
projectFromJoin = traceToStartVid(projectLeftVarForJoin, projectDstFromGN);
}
......@@ -366,7 +368,7 @@ PlanNode* GoValidator::buildProjectSrcEdgePropsForGN(PlanNode* gn) {
DCHECK(gn != nullptr);
auto* plan = qctx_->plan();
if (!inputProps_.empty() || !varProps_.empty()) {
if (!exprProps_.inputProps().empty() || !exprProps_.varProps().empty()) {
auto* srcVidCol = new YieldColumn(
new VariablePropertyExpression(new std::string(gn->varName()),
new std::string(kVid)),
......@@ -375,7 +377,7 @@ PlanNode* GoValidator::buildProjectSrcEdgePropsForGN(PlanNode* gn) {
}
VLOG(1) << "build dst cols";
if (!dstTagProps_.empty()) {
if (!exprProps_.dstTagProps().empty()) {
joinDstVidColName_ = vctx_->anonColGen()->getCol();
auto* dstVidCol = new YieldColumn(
new EdgePropertyExpression(new std::string("*"),
......@@ -536,7 +538,7 @@ Project* GoValidator::projectDstVidsFromGN(PlanNode* gn, const std::string& outp
columns->addColumn(column);
srcVidColName_ = vctx_->anonColGen()->getCol();
if (!inputProps_.empty() || !varProps_.empty()) {
if (!exprProps_.inputProps().empty() || !exprProps_.varProps().empty()) {
column =
new YieldColumn(new InputPropertyExpression(new std::string(kVid)),
new std::string(srcVidColName_));
......@@ -633,14 +635,15 @@ PlanNode* GoValidator::buildRuntimeInput() {
GetNeighbors::VertexProps GoValidator::buildSrcVertexProps() {
GetNeighbors::VertexProps vertexProps;
if (!srcTagProps_.empty()) {
if (!exprProps_.srcTagProps().empty()) {
vertexProps = std::make_unique<std::vector<storage::cpp2::VertexProp>>(
srcTagProps_.size());
std::transform(srcTagProps_.begin(), srcTagProps_.end(),
exprProps_.srcTagProps().size());
std::transform(exprProps_.srcTagProps().begin(), exprProps_.srcTagProps().end(),
vertexProps->begin(), [](auto& tag) {
storage::cpp2::VertexProp vp;
vp.tag = tag.first;
vp.props = std::move(tag.second);
std::vector<std::string>props(tag.second.begin(), tag.second.end());
vp.props = std::move(props);
return vp;
});
}
......@@ -648,13 +651,14 @@ GetNeighbors::VertexProps GoValidator::buildSrcVertexProps() {
}
std::vector<storage::cpp2::VertexProp> GoValidator::buildDstVertexProps() {
std::vector<storage::cpp2::VertexProp> vertexProps(dstTagProps_.size());
if (!dstTagProps_.empty()) {
std::transform(dstTagProps_.begin(), dstTagProps_.end(),
std::vector<storage::cpp2::VertexProp> vertexProps(exprProps_.dstTagProps().size());
if (!exprProps_.dstTagProps().empty()) {
std::transform(exprProps_.dstTagProps().begin(), exprProps_.dstTagProps().end(),
vertexProps.begin(), [](auto& tag) {
storage::cpp2::VertexProp vp;
vp.tag = tag.first;
vp.props = std::move(tag.second);
std::vector<std::string>props(tag.second.begin(), tag.second.end());
vp.props = std::move(props);
return vp;
});
}
......@@ -663,59 +667,75 @@ std::vector<storage::cpp2::VertexProp> GoValidator::buildDstVertexProps() {
GetNeighbors::EdgeProps GoValidator::buildEdgeProps() {
GetNeighbors::EdgeProps edgeProps;
if (!edgeProps_.empty()) {
if (!exprProps_.edgeProps().empty()) {
if (direction_ == storage::cpp2::EdgeDirection::IN_EDGE) {
edgeProps = std::make_unique<std::vector<storage::cpp2::EdgeProp>>(
edgeProps_.size());
std::transform(edgeProps_.begin(), edgeProps_.end(),
edgeProps->begin(), [this](auto& edge) {
exprProps_.edgeProps().size());
std::transform(exprProps_.edgeProps().begin(),
exprProps_.edgeProps().end(),
edgeProps->begin(),
[this](auto& edge) {
storage::cpp2::EdgeProp ep;
ep.type = -edge.first;
ep.props = std::move(edge.second);
if (!dstTagProps_.empty()) {
std::vector<std::string> props(edge.second.begin(),
edge.second.end());
ep.props = std::move(props);
if (!exprProps_.dstTagProps().empty()) {
ep.props.emplace_back(kDst);
}
return ep;
});
} else if (direction_ == storage::cpp2::EdgeDirection::BOTH) {
auto size = edgeProps_.size();
auto size = exprProps_.edgeProps().size();
edgeProps = std::make_unique<std::vector<storage::cpp2::EdgeProp>>(
size * 2);
std::transform(edgeProps_.begin(), edgeProps_.end(),
edgeProps->begin(), [this](auto& edge) {
std::transform(exprProps_.edgeProps().begin(),
exprProps_.edgeProps().end(),
edgeProps->begin(),
[this](auto& edge) {
storage::cpp2::EdgeProp ep;
ep.type = edge.first;
ep.props = edge.second;
if (!dstTagProps_.empty()) {
std::vector<std::string> props(edge.second.begin(),
edge.second.end());
ep.props = std::move(props);
if (!exprProps_.dstTagProps().empty()) {
ep.props.emplace_back(kDst);
}
return ep;
});
std::transform(edgeProps_.begin(), edgeProps_.end(),
edgeProps->begin() + size, [this](auto& edge) {
std::transform(exprProps_.edgeProps().begin(),
exprProps_.edgeProps().end(),
edgeProps->begin() + size,
[this](auto& edge) {
storage::cpp2::EdgeProp ep;
ep.type = -edge.first;
ep.props = std::move(edge.second);
if (!dstTagProps_.empty()) {
std::vector<std::string> props(edge.second.begin(),
edge.second.end());
ep.props = std::move(props);
if (!exprProps_.dstTagProps().empty()) {
ep.props.emplace_back(kDst);
}
return ep;
});
} else {
edgeProps = std::make_unique<std::vector<storage::cpp2::EdgeProp>>(
edgeProps_.size());
std::transform(edgeProps_.begin(), edgeProps_.end(),
edgeProps->begin(), [this](auto& edge) {
exprProps_.edgeProps().size());
std::transform(exprProps_.edgeProps().begin(),
exprProps_.edgeProps().end(),
edgeProps->begin(),
[this](auto& edge) {
storage::cpp2::EdgeProp ep;
ep.type = edge.first;
ep.props = std::move(edge.second);
if (!dstTagProps_.empty()) {
std::vector<std::string> props(edge.second.begin(),
edge.second.end());
ep.props = std::move(props);
if (!exprProps_.dstTagProps().empty()) {
ep.props.emplace_back(kDst);
}
return ep;
});
}
} else if (!dstTagProps_.empty()) {
} else if (!exprProps_.dstTagProps().empty()) {
return buildEdgeDst();
}
......@@ -724,7 +744,7 @@ GetNeighbors::EdgeProps GoValidator::buildEdgeProps() {
GetNeighbors::EdgeProps GoValidator::buildEdgeDst() {
GetNeighbors::EdgeProps edgeProps;
if (!edgeProps_.empty() || !dstTagProps_.empty()) {
if (!exprProps_.edgeProps().empty() || !exprProps_.dstTagProps().empty()) {
if (direction_ == storage::cpp2::EdgeDirection::IN_EDGE) {
edgeProps = std::make_unique<std::vector<storage::cpp2::EdgeProp>>(
edgeTypes_.size());
......@@ -998,19 +1018,21 @@ std::unique_ptr<Expression> GoValidator::rewriteToInputProp(Expression* expr) {
}
Status GoValidator::buildColumns() {
if (dstTagProps_.empty() && inputProps_.empty() && varProps_.empty()) {
return Status::OK();
}
if (exprProps_.dstTagProps().empty() && exprProps_.inputProps().empty() &&
exprProps_.varProps().empty()) {
return Status::OK();
}
if (!srcTagProps_.empty() || !edgeProps_.empty() || !dstTagProps_.empty()) {
srcAndEdgePropCols_ = qctx_->plan()->saveObject(new YieldColumns());
}
if (!exprProps_.srcTagProps().empty() || !exprProps_.edgeProps().empty() ||
!exprProps_.dstTagProps().empty()) {
srcAndEdgePropCols_ = qctx_->plan()->saveObject(new YieldColumns());
}
if (!dstTagProps_.empty()) {
if (!exprProps_.dstTagProps().empty()) {
dstPropCols_ = qctx_->plan()->saveObject(new YieldColumns());
}
if (!inputProps_.empty() || !varProps_.empty()) {
if (!exprProps_.inputProps().empty() || !exprProps_.varProps().empty()) {
inputPropCols_ = qctx_->plan()->saveObject(new YieldColumns());
}
......
......@@ -95,6 +95,8 @@ private:
bool distinct_{false};
std::string userDefinedVarName_;
ExpressionProps exprProps_;
// Generated by validator if needed, and the lifecycle of raw pinters would
// be managed by object pool
YieldColumns* srcAndEdgePropCols_{nullptr};
......
/* Copyright (c) 2020 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License,
* attached with Common Clause Condition 1.0, found in the LICENSES directory.
*/
#include "validator/GroupByValidator.h"
#include "planner/Query.h"
namespace nebula {
namespace graph {
Status GroupByValidator::validateImpl() {
auto *groupBySentence = static_cast<GroupBySentence*>(sentence_);
NG_RETURN_IF_ERROR(validateGroup(groupBySentence->groupClause()));
NG_RETURN_IF_ERROR(validateYield(groupBySentence->yieldClause()));
NG_RETURN_IF_ERROR(checkInputProps());
NG_RETURN_IF_ERROR(checkVarProps());
if (!exprProps_.srcTagProps().empty() || !exprProps_.dstTagProps().empty()) {
return Status::SemanticError("Only support input and variable in GroupBy sentence.");
}
if (!exprProps_.inputProps().empty() && !exprProps_.varProps().empty()) {
return Status::SemanticError("Not support both input and variable in GroupBy sentence.");
}
return Status::OK();
}
Status GroupByValidator::checkInputProps() const {
auto& inputProps = const_cast<ExpressionProps*>(&exprProps_)->inputProps();
if (inputs_.empty() && !inputProps.empty()) {
return Status::SemanticError("no inputs for GroupBy.");
}
for (auto &prop : inputProps) {
DCHECK_NE(prop, "*");
NG_RETURN_IF_ERROR(checkPropNonexistOrDuplicate(inputs_, prop, "GroupBy sentence"));
}
return Status::OK();
}
Status GroupByValidator::checkVarProps() const {
auto& varProps = const_cast<ExpressionProps*>(&exprProps_)->varProps();
for (auto &pair : varProps) {
auto &var = pair.first;
if (!vctx_->existVar(var)) {
return Status::SemanticError("variable `%s' not exist.", var.c_str());
}
auto &props = vctx_->getVar(var);
for (auto &prop : pair.second) {
DCHECK_NE(prop, "*");
NG_RETURN_IF_ERROR(checkPropNonexistOrDuplicate(props, prop, "GroupBy sentence"));
}
}
return Status::OK();
}
Status GroupByValidator::validateYield(const YieldClause *yieldClause) {
std::vector<YieldColumn*> columns;
if (yieldClause != nullptr) {
columns = yieldClause->columns();
}
if (columns.empty()) {
return Status::SemanticError("Yield cols is Empty");
}
for (auto* col : columns) {
auto fun = col->getAggFunName();
if (!fun.empty()) {
auto iter = AggFun::nameIdMap_.find(fun);
if (iter == AggFun::nameIdMap_.end()) {
return Status::SemanticError("Unkown aggregate function `%s`", fun.c_str());
}
if (iter->second != AggFun::Function::kCount && col->expr()->toString() == "*") {
return Status::SemanticError("`%s` invaild, * valid in count.",
col->toString().c_str());
}
}
// todo(jmq) count(distinct)
groupItems_.emplace_back(Aggregate::GroupItem{col->expr(), AggFun::nameIdMap_[fun], false});
auto status = deduceExprType(col->expr());
NG_RETURN_IF_ERROR(status);
auto type = std::move(status).value();
auto name = deduceColName(col);
outputs_.emplace_back(name, type);
outputColumnNames_.emplace_back(std::move(name));
// todo(jmq) extend $-.*
yieldCols_.emplace_back(col);
if (col->alias() != nullptr) {
aliases_.emplace(*col->alias(), col);
}
// check input yield filed without agg function and not in group cols
ExpressionProps yieldProps;
NG_RETURN_IF_ERROR(deduceProps(col->expr(), yieldProps));
if (col->getAggFunName().empty()) {
if (!yieldProps.inputProps().empty()) {
if (!exprProps_.isSubsetOfInput(yieldProps.inputProps())) {
return Status::SemanticError("Yield `%s` isn't in output fields",
col->toString().c_str());
}
} else if (!yieldProps.varProps().empty()) {
if (!exprProps_.isSubsetOfVar(yieldProps.varProps())) {
return Status::SemanticError("Yield `%s` isn't in output fields",
col->toString().c_str());
}
}
}
exprProps_.unionProps(std::move(yieldProps));
}
return Status::OK();
}
Status GroupByValidator::validateGroup(const GroupClause *groupClause) {
std::vector<YieldColumn*> columns;
if (groupClause != nullptr) {
columns = groupClause->columns();
}
if (columns.empty()) {
return Status::SemanticError("Group cols is Empty");
}
for (auto* col : columns) {
if (col->expr()->kind() != Expression::Kind::kInputProperty &&
col->expr()->kind() != Expression::Kind::kFunctionCall) {
return Status::SemanticError("Group `%s` invalid", col->expr()->toString().c_str());
}
if (!col->getAggFunName().empty()) {
return Status::SemanticError("Use invalid group function `%s`",
col->getAggFunName().c_str());
}
NG_RETURN_IF_ERROR(deduceExprType(col->expr()));
NG_RETURN_IF_ERROR(deduceProps(col->expr(), exprProps_));
groupCols_.emplace_back(col);
groupKeys_.emplace_back(col->expr());
}
return Status::OK();
}
Status GroupByValidator::toPlan() {
auto *plan = qctx_->plan();
auto *groupBy =
Aggregate::make(plan, nullptr, std::move(groupKeys_), std::move(groupItems_));
groupBy->setColNames(std::vector<std::string>(outputColumnNames_));
root_ = groupBy;
tail_ = groupBy;
return Status::OK();
}
} // namespace graph
} // namespace nebula
/* Copyright (c) 2020 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License,
* attached with Common Clause Condition 1.0, found in the LICENSES directory.
*/
#ifndef VALIDATOR_GROUPBY_VALIDATOR_H_
#define VALIDATOR_GROUPBY_VALIDATOR_H_
#include "common/base/Base.h"
#include "validator/Validator.h"
#include "planner/Query.h"
namespace nebula {
namespace graph {
class GroupByValidator final : public Validator {
public:
GroupByValidator(Sentence *sentence, QueryContext *context)
: Validator(sentence, context) {}
private:
Status validateImpl() override;
Status toPlan() override;
Status validateGroup(const GroupClause *groupClause);
Status validateYield(const YieldClause *yieldClause);
Status checkInputProps() const;
Status checkVarProps() const;
private:
std::vector<YieldColumn*> groupCols_;
std::vector<YieldColumn*> yieldCols_;
// key: alias, value: input name
std::unordered_map<std::string, YieldColumn*> aliases_;
std::vector<std::string> outputColumnNames_;
ExpressionProps exprProps_;
std::vector<Expression*> groupKeys_;
std::vector<Aggregate::GroupItem> groupItems_;
};
} // namespace graph
} // namespace nebula
#endif
......@@ -471,19 +471,20 @@ Status DeleteEdgesValidator::buildEdgeKeyRef(const std::vector<EdgeKey*> &edgeKe
Status DeleteEdgesValidator::checkInput() {
CHECK(!edgeKeyRefs_.empty());
auto &edgeKeyRef = *edgeKeyRefs_.begin();
NG_LOG_AND_RETURN_IF_ERROR(deduceProps(edgeKeyRef->srcid()));
NG_LOG_AND_RETURN_IF_ERROR(deduceProps(edgeKeyRef->dstid()));
NG_LOG_AND_RETURN_IF_ERROR(deduceProps(edgeKeyRef->rank()));
NG_LOG_AND_RETURN_IF_ERROR(deduceProps(edgeKeyRef->srcid(), exprProps_));
NG_LOG_AND_RETURN_IF_ERROR(deduceProps(edgeKeyRef->dstid(), exprProps_));
NG_LOG_AND_RETURN_IF_ERROR(deduceProps(edgeKeyRef->rank(), exprProps_));
if (!srcTagProps_.empty() || !dstTagProps_.empty() || !edgeProps_.empty()) {
if (!exprProps_.srcTagProps().empty() || !exprProps_.dstTagProps().empty() ||
!exprProps_.edgeProps().empty()) {
return Status::SyntaxError("Only support input and variable.");
}
if (!inputProps_.empty() && !varProps_.empty()) {
if (!exprProps_.inputProps().empty() && !exprProps_.varProps().empty()) {
return Status::Error("Not support both input and variable.");
}
if (!varProps_.empty() && varProps_.size() > 1) {
if (!exprProps_.varProps().empty() && exprProps_.varProps().size() > 1) {
return Status::Error("Only one variable allowed to use.");
}
......
......@@ -109,6 +109,7 @@ private:
// From InputPropertyExpression, ConstantExpression will covert to InputPropertyExpression
std::vector<EdgeKeyRef*> edgeKeyRefs_;
std::string edgeKeyVar_;
ExpressionProps exprProps_;
};
class UpdateValidator : public Validator {
......
......@@ -34,7 +34,7 @@ Status SequentialValidator::validateImpl() {
switch (firstSentence->kind()) {
case Sentence::Kind::kLimit:
case Sentence::Kind::kOrderBy:
case Sentence::Kind::KGroupBy:
case Sentence::Kind::kGroupBy:
return Status::SyntaxError("Could not start with the statement: %s",
firstSentence->toString().c_str());
default:
......
......@@ -27,6 +27,7 @@
#include "validator/SetValidator.h"
#include "validator/UseValidator.h"
#include "validator/YieldValidator.h"
#include "validator/GroupByValidator.h"
#include "common/function/FunctionManager.h"
namespace nebula {
......@@ -62,6 +63,8 @@ std::unique_ptr<Validator> Validator::makeValidator(Sentence* sentence, QueryCon
return std::make_unique<OrderByValidator>(sentence, context);
case Sentence::Kind::kYield:
return std::make_unique<YieldValidator>(sentence, context);
case Sentence::Kind::kGroupBy:
return std::make_unique<GroupByValidator>(sentence, context);
case Sentence::Kind::kCreateSpace:
return std::make_unique<CreateSpaceValidator>(sentence, context);
case Sentence::Kind::kCreateTag:
......@@ -541,7 +544,7 @@ StatusOr<Value::Type> Validator::deduceExprType(const Expression* expr) const {
static_cast<int64_t>(expr->kind()));
}
Status Validator::deduceProps(const Expression* expr) {
Status Validator::deduceProps(const Expression* expr, ExpressionProps& exprProps) {
switch (expr->kind()) {
case Expression::Kind::kConstant: {
break;
......@@ -562,21 +565,21 @@ Status Validator::deduceProps(const Expression* expr) {
case Expression::Kind::kLogicalOr:
case Expression::Kind::kLogicalXor: {
auto biExpr = static_cast<const BinaryExpression*>(expr);
NG_RETURN_IF_ERROR(deduceProps(biExpr->left()));
NG_RETURN_IF_ERROR(deduceProps(biExpr->right()));
NG_RETURN_IF_ERROR(deduceProps(biExpr->left(), exprProps));
NG_RETURN_IF_ERROR(deduceProps(biExpr->right(), exprProps));
break;
}
case Expression::Kind::kUnaryPlus:
case Expression::Kind::kUnaryNegate:
case Expression::Kind::kUnaryNot: {
auto unaryExpr = static_cast<const UnaryExpression*>(expr);
NG_RETURN_IF_ERROR(deduceProps(unaryExpr->operand()));
NG_RETURN_IF_ERROR(deduceProps(unaryExpr->operand(), exprProps));
break;
}
case Expression::Kind::kFunctionCall: {
auto funcExpr = static_cast<const FunctionCallExpression*>(expr);
for (auto& arg : funcExpr->args()->args()) {
NG_RETURN_IF_ERROR(deduceProps(arg.get()));
NG_RETURN_IF_ERROR(deduceProps(arg.get(), exprProps));
}
break;
}
......@@ -584,24 +587,21 @@ Status Validator::deduceProps(const Expression* expr) {
auto* tagPropExpr = static_cast<const SymbolPropertyExpression*>(expr);
auto status = qctx_->schemaMng()->toTagID(space_.id, *tagPropExpr->sym());
NG_RETURN_IF_ERROR(status);
auto& props = dstTagProps_[status.value()];
props.emplace_back(*tagPropExpr->prop());
exprProps.insertDstTagProp(status.value(), *tagPropExpr->prop());
break;
}
case Expression::Kind::kSrcProperty: {
auto* tagPropExpr = static_cast<const SymbolPropertyExpression*>(expr);
auto status = qctx_->schemaMng()->toTagID(space_.id, *tagPropExpr->sym());
NG_RETURN_IF_ERROR(status);
auto& props = srcTagProps_[status.value()];
props.emplace_back(*tagPropExpr->prop());
exprProps.insertSrcTagProp(status.value(), *tagPropExpr->prop());
break;
}
case Expression::Kind::kTagProperty: {
auto* tagPropExpr = static_cast<const SymbolPropertyExpression*>(expr);
auto status = qctx_->schemaMng()->toTagID(space_.id, *tagPropExpr->sym());
NG_RETURN_IF_ERROR(status);
auto& props = tagProps_[status.value()];
props.emplace_back(*tagPropExpr->prop());
exprProps.insertTagProp(status.value(), *tagPropExpr->prop());
break;
}
case Expression::Kind::kEdgeProperty:
......@@ -612,27 +612,22 @@ Status Validator::deduceProps(const Expression* expr) {
auto* edgePropExpr = static_cast<const SymbolPropertyExpression*>(expr);
auto status = qctx_->schemaMng()->toEdgeType(space_.id, *edgePropExpr->sym());
NG_RETURN_IF_ERROR(status);
auto& props = edgeProps_[status.value()];
props.emplace_back(*edgePropExpr->prop());
exprProps.insertEdgeProp(status.value(), *edgePropExpr->prop());
break;
}
case Expression::Kind::kInputProperty: {
auto* inputPropExpr = static_cast<const SymbolPropertyExpression*>(expr);
auto* prop = inputPropExpr->prop();
inputProps_.emplace_back(*prop);
exprProps.insertInputProp(*inputPropExpr->prop());
break;
}
case Expression::Kind::kVarProperty: {
auto* varPropExpr = static_cast<const SymbolPropertyExpression*>(expr);
auto* var = varPropExpr->sym();
auto* prop = varPropExpr->prop();
auto& props = varProps_[*var];
props.emplace_back(*prop);
exprProps.insertVarProp(*varPropExpr->sym(), *varPropExpr->prop());
break;
}
case Expression::Kind::kTypeCasting: {
auto* typeCastExpr = static_cast<const TypeCastingExpression*>(expr);
NG_RETURN_IF_ERROR(deduceProps(typeCastExpr->operand()));
NG_RETURN_IF_ERROR(deduceProps(typeCastExpr->operand(), exprProps));
break;
}
case Expression::Kind::kUUID:
......@@ -730,21 +725,21 @@ bool Validator::evaluableExpr(const Expression* expr) const {
// static
Status Validator::checkPropNonexistOrDuplicate(const ColsDef& cols,
const std::string& prop,
const std::string &validatorName) {
auto eq = [&](const ColDef& col) { return col.first == prop; };
const folly::StringPiece& prop,
const std::string& validatorName) {
auto eq = [&](const ColDef& col) { return col.first == prop.str(); };
auto iter = std::find_if(cols.cbegin(), cols.cend(), eq);
if (iter == cols.cend()) {
return Status::SemanticError("%s: prop `%s' not exists",
validatorName.c_str(),
prop.c_str());
prop.str().c_str());
}
iter = std::find_if(iter + 1, cols.cend(), eq);
if (iter != cols.cend()) {
return Status::SemanticError("%s: duplicate prop `%s'",
validatorName.c_str(),
prop.c_str());
prop.str().c_str());
}
return Status::OK();
......@@ -781,5 +776,94 @@ StatusOr<std::string> Validator::checkRef(const Expression* ref, Value::Type typ
}
}
} // namespace graph
} // namespace nebula
void ExpressionProps::insertVarProp(const std::string& varName, folly::StringPiece prop) {
auto& props = varProps_[varName];
props.emplace(prop);
}
void ExpressionProps::insertInputProp(folly::StringPiece prop) {
inputProps_.emplace(prop);
}
void ExpressionProps::insertSrcTagProp(TagID tagId, folly::StringPiece prop) {
auto& props = srcTagProps_[tagId];
props.emplace(prop);
}
void ExpressionProps::insertDstTagProp(TagID tagId, folly::StringPiece prop) {
auto& props = dstTagProps_[tagId];
props.emplace(prop);
}
void ExpressionProps::insertEdgeProp(EdgeType edgeType, folly::StringPiece prop) {
auto& props = edgeProps_[edgeType];
props.emplace(prop);
}
void ExpressionProps::insertTagProp(TagID tagId, folly::StringPiece prop) {
auto& props = tagProps_[tagId];
props.emplace(prop);
}
bool ExpressionProps::isSubsetOfInput(const std::set<folly::StringPiece>& props) {
for (auto& prop : props) {
if (inputProps_.find(prop) == inputProps_.end()) {
return false;
}
}
return true;
}
bool ExpressionProps::isSubsetOfVar(const VarPropMap& props) {
for (auto &iter : props) {
if (varProps_.find(iter.first) == varProps_.end()) {
return false;
}
for (auto& prop : iter.second) {
if (varProps_[iter.first].find(prop) == varProps_[iter.first].end()) {
return false;
}
}
}
return true;
}
void ExpressionProps::unionProps(ExpressionProps exprProps) {
if (!exprProps.inputProps().empty()) {
inputProps_.insert(std::make_move_iterator(exprProps.inputProps().begin()),
std::make_move_iterator(exprProps.inputProps().end()));
}
if (!exprProps.srcTagProps().empty()) {
for (auto& iter : exprProps.srcTagProps()) {
srcTagProps_[iter.first].insert(std::make_move_iterator(iter.second.begin()),
std::make_move_iterator(iter.second.end()));
}
}
if (!exprProps.dstTagProps().empty()) {
for (auto& iter : exprProps.dstTagProps()) {
dstTagProps_[iter.first].insert(std::make_move_iterator(iter.second.begin()),
std::make_move_iterator(iter.second.end()));
}
}
if (!exprProps.tagProps().empty()) {
for (auto& iter : exprProps.tagProps()) {
tagProps_[iter.first].insert(std::make_move_iterator(iter.second.begin()),
std::make_move_iterator(iter.second.end()));
}
}
if (!exprProps.varProps().empty()) {
for (auto& iter : exprProps.varProps()) {
varProps_[iter.first].insert(std::make_move_iterator(iter.second.begin()),
std::make_move_iterator(iter.second.end()));
}
}
if (!exprProps.edgeProps().empty()) {
for (auto& iter : exprProps.edgeProps()) {
edgeProps_[iter.first].insert(std::make_move_iterator(iter.second.begin()),
std::make_move_iterator(iter.second.end()));
}
}
}
} // namespace graph
} // namespace nebula
......@@ -19,6 +19,58 @@ class YieldColumns;
namespace graph {
class ExpressionProps final {
public:
using TagIDPropsMap = std::unordered_map<TagID, std::set<folly::StringPiece>>;
using EdgePropMap = std::unordered_map<EdgeType, std::set<folly::StringPiece>>;
using VarPropMap = std::unordered_map<std::string, std::set<folly::StringPiece>>;
void insertInputProp(folly::StringPiece prop);
void insertVarProp(const std::string& varName, folly::StringPiece prop);
void insertSrcTagProp(TagID tagId, folly::StringPiece prop);
void insertDstTagProp(TagID tagId, folly::StringPiece prop);
void insertEdgeProp(EdgeType edgeType, folly::StringPiece prop);
void insertTagProp(TagID tagId, folly::StringPiece prop);
std::set<folly::StringPiece>& inputProps() {
return inputProps_;
}
TagIDPropsMap& srcTagProps() {
return srcTagProps_;
}
TagIDPropsMap& dstTagProps() {
return dstTagProps_;
}
TagIDPropsMap& tagProps() {
return tagProps_;
}
EdgePropMap& edgeProps() {
return edgeProps_;
}
VarPropMap& varProps() {
return varProps_;
}
bool isSubsetOfInput(const std::set<folly::StringPiece>& props);
bool isSubsetOfVar(const VarPropMap& props);
void unionProps(ExpressionProps exprProps);
private:
std::set<folly::StringPiece> inputProps_;
VarPropMap varProps_;
TagIDPropsMap srcTagProps_;
TagIDPropsMap dstTagProps_;
EdgePropMap edgeProps_;
TagIDPropsMap tagProps_;
};
class Validator {
public:
virtual ~Validator() = default;
......@@ -82,12 +134,12 @@ protected:
StatusOr<Value::Type> deduceExprType(const Expression* expr) const;
Status deduceProps(const Expression* expr);
Status deduceProps(const Expression* expr, ExpressionProps& exprProps);
bool evaluableExpr(const Expression* expr) const;
static Status checkPropNonexistOrDuplicate(const ColsDef& cols,
const std::string& prop,
const folly::StringPiece& prop,
const std::string &validatorName);
static Status appendPlan(PlanNode* plan, PlanNode* appended);
......@@ -111,14 +163,6 @@ protected:
std::string inputVarName_;
// Admin sentences do not requires a space to be chosen.
bool noSpaceRequired_{false};
// properties
std::vector<std::string> inputProps_;
std::unordered_map<std::string, std::vector<std::string>> varProps_;
std::unordered_map<TagID, std::vector<std::string>> srcTagProps_;
std::unordered_map<TagID, std::vector<std::string>> dstTagProps_;
std::unordered_map<EdgeType, std::vector<std::string>> edgeProps_;
std::unordered_map<TagID, std::vector<std::string>> tagProps_;
};
} // namespace graph
......
......@@ -23,15 +23,16 @@ Status YieldValidator::validateImpl() {
NG_RETURN_IF_ERROR(validateYieldAndBuildOutputs(yield->yield()));
NG_RETURN_IF_ERROR(validateWhere(yield->where()));
if (!srcTagProps_.empty() || !dstTagProps_.empty() || !edgeProps_.empty()) {
if (!exprProps_.srcTagProps().empty() || !exprProps_.dstTagProps().empty() ||
!exprProps_.edgeProps().empty()) {
return Status::SemanticError("Only support input and variable in yield sentence.");
}
if (!inputProps_.empty() && !varProps_.empty()) {
if (!exprProps_.inputProps().empty() && !exprProps_.varProps().empty()) {
return Status::SemanticError("Not support both input and variable.");
}
if (!varProps_.empty() && varProps_.size() > 1) {
if (!exprProps_.varProps().empty() && exprProps_.varProps().size() > 1) {
return Status::SemanticError("Only one variable allowed to use.");
}
......@@ -64,10 +65,11 @@ Status YieldValidator::checkAggFunAndBuildGroupItems(const YieldClause *clause)
}
Status YieldValidator::checkInputProps() const {
if (inputs_.empty() && !inputProps_.empty()) {
auto& inputProps = const_cast<ExpressionProps*>(&exprProps_)->inputProps();
if (inputs_.empty() && !inputProps.empty()) {
return Status::SemanticError("no inputs for yield columns.");
}
for (auto &prop : inputProps_) {
for (auto &prop : inputProps) {
DCHECK_NE(prop, "*");
NG_RETURN_IF_ERROR(checkPropNonexistOrDuplicate(inputs_, prop, "Yield sentence"));
}
......@@ -75,7 +77,8 @@ Status YieldValidator::checkInputProps() const {
}
Status YieldValidator::checkVarProps() const {
for (auto &pair : varProps_) {
auto& varProps = const_cast<ExpressionProps*>(&exprProps_)->varProps();
for (auto &pair : varProps) {
auto &var = pair.first;
if (!vctx_->existVar(var)) {
return Status::SemanticError("variable `%s' not exist.", var.c_str());
......@@ -94,7 +97,7 @@ Status YieldValidator::makeOutputColumn(YieldColumn *column) {
auto expr = column->expr();
DCHECK(expr != nullptr);
NG_RETURN_IF_ERROR(deduceProps(expr));
NG_RETURN_IF_ERROR(deduceProps(expr, exprProps_));
auto status = deduceExprType(expr);
NG_RETURN_IF_ERROR(status);
......@@ -168,7 +171,7 @@ Status YieldValidator::validateWhere(const WhereClause *clause) {
filter = clause->filter();
}
if (filter != nullptr) {
NG_RETURN_IF_ERROR(deduceProps(filter));
NG_RETURN_IF_ERROR(deduceProps(filter, exprProps_));
}
return Status::OK();
}
......@@ -202,9 +205,9 @@ Status YieldValidator::toPlan() {
tail_ = dedupDep;
}
if (!varProps_.empty()) {
DCHECK_EQ(varProps_.size(), 1u);
auto var = varProps_.cbegin()->first;
if (!exprProps_.varProps().empty()) {
DCHECK_EQ(exprProps_.varProps().size(), 1u);
auto var = exprProps_.varProps().cbegin()->first;
static_cast<SingleInputNode *>(tail_)->setInputVar(var);
}
......
......@@ -43,9 +43,11 @@ private:
Status makeOutputColumn(YieldColumn *column);
bool hasAggFun_{false};
YieldColumns *columns_{nullptr};
std::vector<std::string> outputColumnNames_;
std::vector<Aggregate::GroupItem> groupItems_;
ExpressionProps exprProps_;
};
} // namespace graph
......
......@@ -60,6 +60,7 @@ nebula_add_test(
MutateValidatorTest.cpp
YieldValidatorTest.cpp
ExplainValidatorTest.cpp
GroupByValidatorTest.cpp
OBJECTS ${VALIDATOR_TEST_LIBS}
LIBRARIES
gtest
......
/* Copyright (c) 2020 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License,
* attached with Common Clause Condition 1.0, found in the LICENSES directory.
*/
#include "validator/GroupByValidator.h"
#include "validator/test/ValidatorTestBase.h"
namespace nebula {
namespace graph {
class GroupByValidatorTest : public ValidatorTestBase {
public:
};
using PK = nebula::graph::PlanNode::Kind;
TEST_F(GroupByValidatorTest, TestGroupBy) {
{
std::string query =
"GO FROM \"1\" OVER like YIELD like._dst AS id, $^.person.age AS age "
"| GROUP BY $-.age YIELD COUNT($-.id)";
std::vector<PlanNode::Kind> expected = {
PK::kAggregate,
PK::kProject,
PK::kGetNeighbors,
PK::kStart
};
EXPECT_TRUE(checkResult(query, expected));
}
{
std::string query =
"GO FROM \"NoExist\" OVER like YIELD like._dst AS id, $^.person.age AS age "
"| GROUP BY $-.id YIELD $-.id AS id";
std::vector<PlanNode::Kind> expected = {
PK::kAggregate,
PK::kProject,
PK::kGetNeighbors,
PK::kStart
};
EXPECT_TRUE(checkResult(query, expected));
}
{
std::string query = "GO FROM \"1\", \"2\" OVER like "
"YIELD $$.person.name as name, "
"$$.person.age AS dst_age, "
"$$.person.age AS src_age"
"| GROUP BY $-.name "
"YIELD $-.name AS name, "
"SUM($-.dst_age) AS sum_dst_age, "
"AVG($-.dst_age) AS avg_dst_age, "
"MAX($-.src_age) AS max_src_age, "
"MIN($-.src_age) AS min_src_age, "
"STD($-.src_age) AS std_src_age, "
"BIT_AND(1) AS bit_and, "
"BIT_OR(2) AS bit_or, "
"BIT_XOR(3) AS bit_xor";
std::vector<PlanNode::Kind> expected = {
PK::kAggregate,
PK::kProject,
PK::kDataJoin,
PK::kProject,
PK::kGetVertices,
PK::kProject,
PK::kGetNeighbors,
PK::kStart
};
EXPECT_TRUE(checkResult(query, expected));
}
{
// group one col
std::string query = "GO FROM \"1\" OVER like "
"YIELD $$.person.name as name, "
"like._dst AS id, "
"like.start AS start_year, "
"like.end AS end_year "
"| GROUP BY $-.start_year "
"YIELD COUNT($-.id), "
"$-.start_year AS start_year, "
"AVG($-.end_year) AS avg";
std::vector<PlanNode::Kind> expected = {
PK::kAggregate,
PK::kProject,
PK::kDataJoin,
PK::kProject,
PK::kGetVertices,
PK::kProject,
PK::kGetNeighbors,
PK::kStart
};
EXPECT_TRUE(checkResult(query, expected));
}
{
// group has fun col
std::string query = "GO FROM \"1\" OVER like "
"YIELD $$.person.name as name, "
"like._dst AS id, "
"like.start AS start_year, "
"like.end AS end_year"
"| GROUP BY $-.name, abs(5) "
"YIELD $-.name AS name, "
"SUM(1.5) AS sum, "
"COUNT(*) AS count, "
"1+1 AS cal";
std::vector<PlanNode::Kind> expected = {
PK::kAggregate,
PK::kProject,
PK::kDataJoin,
PK::kProject,
PK::kGetVertices,
PK::kProject,
PK::kGetNeighbors,
PK::kStart
};
EXPECT_TRUE(checkResult(query, expected));
}
{
// group has fun col
std::string query = "GO FROM \"1\" OVER like "
"YIELD $$.person.name as name, "
"like._dst AS id, "
"like.start AS start_year, "
"like.end AS end_year"
"| GROUP BY $-.name, $-.id "
"YIELD $-.name AS name, "
"SUM(1.5) AS sum, "
"COUNT(*) AS count, "
"1+1 AS cal";
std::vector<PlanNode::Kind> expected = {
PK::kAggregate,
PK::kProject,
PK::kDataJoin,
PK::kProject,
PK::kGetVertices,
PK::kProject,
PK::kGetNeighbors,
PK::kStart
};
EXPECT_TRUE(checkResult(query, expected));
}
}
TEST_F(GroupByValidatorTest, InvalidTest) {
{
// use groupby without input
std::string query = "GO FROM \"1\" OVER like YIELD like._dst AS id, $^.person.age AS age "
"| GROUP BY 1+1 YIELD COUNT(1), 1+1";
auto result = checkResult(query);
EXPECT_EQ(std::string(result.message()), "SemanticError: Group `(1+1)` invalid");
}
{
// src
std::string query = "GO FROM \"1\" OVER like YIELD like._dst AS id, $^.person.age AS age "
"| GROUP BY $-.age YIELD COUNT($var)";
auto result = checkResult(query);
EXPECT_EQ(std::string(result.message()), "SyntaxError: syntax error near `)'");
}
{
// use dst
std::string query = "GO FROM \"1\" OVER like YIELD like._dst AS id, $^.person.age AS age "
"| GROUP BY $-.age YIELD COUNT($$.person.name)";
auto result = checkResult(query);
EXPECT_EQ(std::string(result.message()),
"SemanticError: Only support input and variable in GroupBy sentence.");
}
{
// group input noexist
std::string query = "GO FROM \"1\" OVER like YIELD like._dst AS id, $^.person.age AS age "
"| GROUP BY $-.start_year YIELD COUNT($-.age)";
auto result = checkResult(query);
EXPECT_EQ(std::string(result.message()),
"SemanticError: `$-.start_year', not exist prop `start_year'");
}
{
// group name noexist
std::string query = "GO FROM \"1\" OVER like YIELD like._dst AS id, $^.person.age AS age "
"| GROUP BY noexist YIELD COUNT($-.age)";
auto result = checkResult(query);
EXPECT_EQ(std::string(result.message()), "SemanticError: Group `noexist` invalid");
}
{
// use sum(*)
std::string query = "GO FROM \"1\" OVER like YIELD like._dst AS id, $^.person.age AS age "
"| GROUP BY $-.id YIELD SUM(*)";
auto result = checkResult(query);
EXPECT_EQ(std::string(result.message()),
"SemanticError: `SUM(*)` invaild, * valid in count.");
}
{
// use agg fun has more than two inputs
std::string query = "GO FROM \"1\" OVER like YIELD like._dst AS id, $^.person.age AS age "
"| GROUP BY $-.id YIELD COUNT($-.id, $-.age)";
auto result = checkResult(query);
EXPECT_EQ(std::string(result.message()), "SyntaxError: syntax error near `, $-.age'");
}
{
// group col has agg fun
std::string query = "GO FROM \"1\" OVER like YIELD like._dst AS id, $^.person.age AS age "
"| GROUP BY $-.id, SUM($-.age) YIELD $-.id, SUM($-.age)";
auto result = checkResult(query);
EXPECT_EQ(std::string(result.message()), "SemanticError: Use invalid group function `SUM`");
}
{
// yield without group by
std::string query = "GO FROM \"1\" OVER like YIELD $^.person.age AS age, "
"COUNT(like._dst) AS id ";
auto result = checkResult(query);
EXPECT_EQ(std::string(result.message()),
"SemanticError: `COUNT(like._dst) AS id', not support "
"aggregate function in go sentence.");
}
{
// yield col not in group output
std::string query = "GO FROM \"1\" OVER like "
"YIELD $$.person.name as name, "
"like._dst AS id, "
"like.start AS start_year, "
"like.end AS end_year"
"| GROUP BY $-.start_year, abs(5) "
"YIELD $-.name AS name, "
"SUM(1.5) AS sum, "
"COUNT(*) AS count, "
"1+1 AS cal";
auto result = checkResult(query);
EXPECT_EQ(std::string(result.message()),
"SemanticError: Yield `$-.name AS name` isn't in output fields");
}
{
// duplicate col
std::string query =
"GO FROM \"1\" OVER like YIELD $$.person.age AS age, $^.person.age AS age"
"| GROUP BY $-.age YIELD $-.age, 1+1";
auto result = checkResult(query);
EXPECT_EQ(std::string(result.message()),
"SemanticError: GroupBy sentence: duplicate prop `age'");
}
{
// duplicate col
std::string query = "GO FROM \"1\" OVER like "
"YIELD $$.person.age AS age, $^.person.age AS age, like._dst AS id "
"| GROUP BY $-.id YIELD $-.id, COUNT($-.age)";
auto result = checkResult(query);
EXPECT_EQ(std::string(result.message()),
"SemanticError: GroupBy sentence: duplicate prop `age'");
}
// {
// // todo(jmq) not support $-.*
// std::string query = "GO FROM \"1\" OVER like YIELD like._dst AS id, $^.person.age AS age
// "
// "| GROUP BY $-.id YIELD COUNT($-.*)";
// auto result = checkResult(query);
// EXPECT_EQ(std::string(result.message()), "SemanticError: Use invalid group function
// `SUM`");
// }
// {
// // todo(jmq) not support $-.*
// std::string query = "GO FROM \"1\" OVER like YIELD like._dst AS id, $^.person.age AS age
// "
// "| GROUP BY $-.* YIELD $-.*";
// auto result = checkResult(query);
// EXPECT_EQ(std::string(result.message()), "SemanticError: Use invalid group function
// `SUM`");
// }
}
} // namespace graph
} // namespace nebula
# --coding:utf-8--
#
# Copyright (c) 2020 vesoft inc. All rights reserved.
#
# This source code is licensed under Apache 2.0 License,
# attached with Common Clause Condition 1.0, found in the LICENSES directory.
from tests.common.nebula_test_suite import NebulaTestSuite
from tests.common.nebula_test_suite import T_EMPTY, T_NULL
import pytest
class TestGroupBy(NebulaTestSuite):
@classmethod
def prepare(self):
self.load_data()
def cleanup():
pass
def test_syntax_error(self):
# Use groupby without input
stmt = '''GO FROM 'Marco Belinelli' OVER serve YIELD $$.team.name AS name
| GROUP BY 1+1 YIELD COUNT(1), 1+1 '''
resp = self.execute_query(stmt)
self.check_resp_failed(resp)
# use var
stmt = '''GO FROM 'Marco Belinelli' OVER serve YIELD $$.team.name AS name,
serve.end_year AS end_year | GROUP BY $-.start_year YIELD COUNT($var) '''
resp = self.execute_query(stmt)
self.check_resp_failed(resp)
# use dst
stmt = '''GO FROM 'Marco Belinelli' OVER serve YIELD $$.team.name AS name,
serve.end_year AS end_year | GROUP BY $-.start_year YIELD COUNT($$.team.name) '''
resp = self.execute_query(stmt)
self.check_resp_failed(resp)
# groupby input noexist
stmt = '''GO FROM 'Marco Belinelli' OVER serve YIELD $$.team.name AS name,
serve._dst AS id | GROUP BY $-.start_year YIELD COUNT($-.id) '''
resp = self.execute_query(stmt)
self.check_resp_failed(resp)
# group alias noexist
# stmt = '''GO FROM 'Marco Belinelli' OVER serve YIELD $$.team.name AS name,
# serve._dst AS id | GROUP BY team YIELD COUNT($-.id), $-.name AS teamName '''
# resp = self.execute_query(stmt)
# self.check_resp_failed(resp)
# Field nonexistent
stmt = '''GO FROM 'Marco Belinelli' OVER serve YIELD $$.team.name AS name,
serve._dst AS id | GROUP BY $-.name YIELD COUNT($-.start_year) '''
resp = self.execute_query(stmt)
self.check_resp_failed(resp)
# use sum(*)
stmt = '''GO FROM 'Marco Belinelli' OVER serve YIELD $$.team.name AS name,
serve._dst AS id | GROUP BY $-.name YIELD SUM(*) '''
resp = self.execute_query(stmt)
self.check_resp_failed(resp)
# use agg fun has more than one inputs
stmt = '''GO FROM 'Marco Belinelli' OVER serve YIELD $$.team.name AS name,
serve._dst AS id | GROUP BY $-.name YIELD COUNT($-.name, $-.id)'''
resp = self.execute_query(stmt)
self.check_resp_failed(resp)
# group col has agg fun
stmt = '''GO FROM 'Marco Belinelli' OVER serve YIELD $$.team.name AS name,
serve._dst AS id | GROUP BY $-.name, SUM($-.id) YIELD $-.name, SUM($-.id)'''
resp = self.execute_query(stmt)
self.check_resp_failed(resp)
# yield without group by
stmt = '''GO FROM 'Marco Belinelli' OVER serve YIELD $$.team.name AS name,
COUNT(serve._dst) AS id'''
resp = self.execute_query(stmt)
self.check_resp_failed(resp)
def test_group_by(self):
stmt = '''GO FROM 'Aron Baynes', 'Tracy McGrady' OVER serve
YIELD $$.team.name AS name,
serve._dst AS id,
serve.start_year AS start_year,
serve.end_year AS end_year
| GROUP BY $-.name, $-.start_year
YIELD $-.name AS teamName,
$-.start_year AS start_year,
MAX($-.start_year),
MIN($-.end_year),
AVG($-.end_year) AS avg_end_year,
STD($-.end_year) AS std_end_year,
COUNT($-.id)'''
resp = self.execute_query(stmt)
self.check_resp_succeeded(resp)
expected_data = {
"column_names" : ["teamName", "start_year", "MAX($-.start_year)", "MIN($-.end_year)", "avg_end_year", "std_end_year", "COUNT($-.id)"],
"rows" : [
["Celtics", 2017, 2017, 2019, 2019.0, 0.0, 1],
["Magic", 2000, 2000, 2004, 2004.0, 0.0, 1],
["Pistons", 2015, 2015, 2017, 2017.0, 0.0, 1],
["Raptors", 1997, 1997, 2000, 2000.0, 0.0, 1],
["Rockets", 2004, 2004, 2010, 2010.0, 0.0, 1],
["Spurs", 2013, 2013, 2013, 2014.0, 1.0, 2]
]
}
self.check_column_names(resp, expected_data["column_names"])
self.check_out_of_order_result(resp, expected_data["rows"])
# group one col
stmt = '''GO FROM 'Marco Belinelli' OVER serve
YIELD $$.team.name AS name,
serve._dst AS id,
serve.start_year AS start_year,
serve.end_year AS end_year
| GROUP BY $-.start_year
YIELD COUNT($-.id),
$-.start_year AS start_year,
AVG($-.end_year) as avg'''
resp = self.execute_query(stmt)
self.check_resp_succeeded(resp)
expected_data = {
"column_names" : ["COUNT($-.id)", "start_year", "avg"],
"rows" : [
[2, 2018, 2018.5],
[1, 2017, 2018.0],
[1, 2016, 2017.0],
[1, 2009, 2010.0],
[1, 2007, 2009.0],
[1, 2012, 2013.0],
[1, 2015, 2016.0]
]
}
self.check_column_names(resp, expected_data["column_names"])
self.check_out_of_order_result(resp, expected_data["rows"])
# group by aliasName not implement
# stmt = '''GO FROM 'Aron Baynes', 'Tracy McGrady' OVER serve YIELD $$.team.name AS name,
# serve._dst AS id, serve.start_year AS start_year, serve.end_year AS end_year
# | GROUP BY teamName, start_year YIELD $-.name AS teamName, $-.start_year AS start_year,
# MAX($-.start), MIN($-.end), AVG($-.end) AS avg_end_year, STD($-.end) AS std_end_year,
# COUNT($-.id)'''
# resp = self.execute_query(stmt)
# self.check_resp_succeeded(resp)
# expected_data = {
# "column_names" : ["teamName", "start_year", "MAX(%-.start)", "MIN($-.end)", "avg_end_year", "std_end_year", "COUNT($-.id)"],
# "rows" : [
# ["Celtics", 2017, 2017, 2019, 2019.0, 0, 1],
# ["Magic", 2000, 2000, 2004, 2004.0, 0, 1],
# ["Pistons", 2015, 2015, 2017, 2017.0, 0, 1],
# ["Raptors", 1997, 1997, 2000, 2000.0, 0, 1],
# ["Rockets", 2004, 2004, 2010, 2010.0, 0, 1],
# ["Spurs", 2013, 2013, 2013, 2014.0, 1, 2]
# ]
# }
# self.check_column_names(resp, expected_data["column_names"])
# self.check_out_of_order_result(resp, expected_data["rows"])
# count(distinct) not implement
# stmt = '''GO FROM 'Carmelo Anthony', 'Dwyane Wade' OVER like
# YIELD $$.player.name AS name,
# $$.player.age AS dst_age,
# $$.player.age AS src_age,
# like.likeness AS likeness
# | GROUP BY $-.name
# YIELD $-.name AS name,
# SUM($-.dst_age) AS sum_dst_age,
# AVG($-.dst_age) AS avg_dst_age,
# MAX($-.src_age) AS max_src_age,
# MIN($-.src_age) AS min_src_age,
# BIT_AND(1) AS bit_and,
# BIT_OR(2) AS bit_or,
# BIT_XOR(3) AS bit_xor,
# COUNT($-.likeness),
# COUNT_DISTINCT($-.likeness)'''
# resp = self.execute_query(stmt)
# self.check_resp_succeeded(resp)
# expected_data = {
# "column_names" : ["name", "sum_dst_age", "avg_dst_age", "max_src_age", "min_src_age", "bit_and",
# "bit_or", "bit_xor", "COUNT($-.likeness)", "COUNT_DISTINCT($-.likeness)"],
# "rows" : [
# ["LeBron James", 68, 34.0, 34, 34, 1, 2, 0, 2, 1],
# ["Chris Paul", 66, 33.0, 33, 33, 1, 2, 0, 2, 1],
# ["Dwyane Wade", 37, 37.0, 37, 37, 1, 2, 3, 1, 1],
# ["Carmelo Anthony", 34, 34.0, 34, 34, 1, 2, 3, 1, 1]
# ]
# }
# self.check_column_names(resp, expected_data["column_names"])
# self.check_out_of_order_result(resp, expected_data["rows"])
# group has all agg fun
stmt = '''GO FROM 'Carmelo Anthony', 'Dwyane Wade' OVER like
YIELD $$.player.name AS name,
$$.player.age AS dst_age,
$$.player.age AS src_age,
like.likeness AS likeness
| GROUP BY $-.name
YIELD $-.name AS name,
SUM($-.dst_age) AS sum_dst_age,
AVG($-.dst_age) AS avg_dst_age,
MAX($-.src_age) AS max_src_age,
MIN($-.src_age) AS min_src_age,
BIT_AND(1) AS bit_and,
BIT_OR(2) AS bit_or,
BIT_XOR(3) AS bit_xor,
COUNT($-.likeness)'''
resp = self.execute_query(stmt)
self.check_resp_succeeded(resp)
expected_data = {
"column_names" : ["name", "sum_dst_age", "avg_dst_age", "max_src_age", "min_src_age", "bit_and",
"bit_or", "bit_xor", "COUNT($-.likeness)"],
"rows" : [
["LeBron James", 68, 34.0, 34, 34, 1, 2, 0, 2],
["Chris Paul", 66, 33.0, 33, 33, 1, 2, 0, 2],
["Dwyane Wade", 37, 37.0, 37, 37, 1, 2, 3, 1],
["Carmelo Anthony", 34, 34.0, 34, 34, 1, 2, 3, 1]
]
}
self.check_column_names(resp, expected_data["column_names"])
self.check_out_of_order_result(resp, expected_data["rows"])
# group has fun col
stmt = '''GO FROM 'Carmelo Anthony', 'Dwyane Wade' OVER like
YIELD $$.player.name AS name
| GROUP BY $-.name, abs(5)
YIELD $-.name AS name,
SUM(1.5) AS sum,
COUNT(*) AS count,
1+1 AS cal'''
resp = self.execute_query(stmt)
self.check_resp_succeeded(resp)
expected_data = {
"column_names" : ["name", "sum", "count", "cal"],
"rows" : [
["LeBron James", 3.0, 2, 2],
["Chris Paul", 3.0, 2, 2],
["Dwyane Wade", 1.5, 1, 2],
["Carmelo Anthony", 1.5, 1, 2]
]
}
self.check_column_names(resp, expected_data["column_names"])
self.check_out_of_order_result(resp, expected_data["rows"])
# output next
stmt = '''GO FROM 'Paul Gasol' OVER like
YIELD $$.player.age AS age,
like._dst AS id
| GROUP BY $-.id
YIELD $-.id AS id,
SUM($-.age) AS age
| GO FROM $-.id OVER serve
YIELD $$.team.name AS name,
$-.age AS sumAge'''
resp = self.execute_query(stmt)
self.check_resp_succeeded(resp)
expected_data = {
"column_names" : ["name", "sumAge"],
"rows" : [
["Grizzlies", 34],
["Raptors", 34],
["Lakers", 40]
]
}
self.check_column_names(resp, expected_data["column_names"])
self.check_out_of_order_result(resp, expected_data["rows"])
def test_empty_input(self):
stmt = '''GO FROM 'noexist' OVER like
YIELD $$.player.name AS name
| GROUP BY $-.name, abs(5)
YIELD $-.name AS name,
SUM(1.5) AS sum,
COUNT(*) AS count
| ORDER BY $-.sum | LIMIT 2'''
resp = self.execute_query(stmt)
self.check_resp_succeeded(resp)
self.check_empty_result(resp)
stmt = '''GO FROM 'noexist' OVER serve
YIELD $^.player.name as name,
serve.start_year as start,
$$.team.name as team
| YIELD $-.name as name
WHERE $-.start > 20000
| GROUP BY $-.name
YIELD $-.name AS name'''
resp = self.execute_query(stmt)
self.check_resp_succeeded(resp)
self.check_empty_result(resp)
stmt = '''GO FROM 'noexist' OVER serve
YIELD $^.player.name as name,
serve.start_year as start,
$$.team.name as team
| YIELD $-.name as name
WHERE $-.start > 20000
| Limit 1'''
resp = self.execute_query(stmt)
self.check_resp_succeeded(resp)
self.check_empty_result(resp)
def test_duplicate_column(self):
stmt = '''GO FROM 'Marco Belinelli' OVER serve
YIELD $$.team.name AS name,
serve._dst AS id,
serve.start_year AS start_year,
serve.end_year AS start_year
| GROUP BY $-.start_year
YIELD COUNT($-.id),
$-.start_year AS start_year,
AVG($-.end_year) as avg'''
resp = self.execute_query(stmt)
self.check_resp_failed(resp)
stmt = '''GO FROM 'noexist' OVER serve
YIELD $^.player.name as name,
serve.start_year as start,
$$.team.name as name
| GROUP BY $-.name
YIELD $-.name AS name'''
resp = self.execute_query(stmt)
self.check_resp_failed(resp)
def test_groupby_orderby_limit(self):
# with orderby
stmt = '''GO FROM 'Carmelo Anthony', 'Dwyane Wade' OVER like
YIELD $$.player.name AS name
| GROUP BY $-.name, abs(5)
YIELD $-.name AS name,
SUM(1.5) AS sum,
COUNT(*) AS count
| ORDER BY $-.sum, $-.name'''
resp = self.execute_query(stmt)
self.check_resp_succeeded(resp)
expected_data = {
"column_names" : ["name", "sum", "count"],
"rows" : [
["Carmelo Anthony", 1.5, 1],
["Dwyane Wade", 1.5, 1],
["Chris Paul", 3.0, 2],
["LeBron James", 3.0, 2]
]
}
self.check_column_names(resp, expected_data["column_names"])
self.check_out_of_order_result(resp, expected_data["rows"])
# with limit ()
stmt = '''GO FROM 'Carmelo Anthony', 'Dwyane Wade' OVER like
YIELD $$.player.name AS name
| GROUP BY $-.name, abs(5)
YIELD $-.name AS name,
SUM(1.5) AS sum,
COUNT(*) AS count
| ORDER BY $-.sum, $-.name DESC | LIMIT 2'''
resp = self.execute_query(stmt)
self.check_resp_succeeded(resp)
expected_data = {
"column_names" : ["name", "sum", "count"],
"rows" : [
["Carmelo Anthony", 1.5, 1],
["Dwyane Wade", 1.5, 1]
]
}
self.check_column_names(resp, expected_data["column_names"])
self.check_out_of_order_result(resp, expected_data["rows"])
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment