From dae2da966a406c9d1e1e67aa0122edface0c5082 Mon Sep 17 00:00:00 2001 From: dutor <440396+dutor@users.noreply.github.com> Date: Fri, 21 Sep 2018 15:10:33 +0800 Subject: [PATCH] First Implementation of GQL parser (#11) * First Implementation of GQL parser * Address @sherman-the-tank 's comments * Implement syntax of UPDATE/INSERT In addition, added support for hexadecimal and octal numbers. * Reserve space on the buffer in `toString` --- CMakeLists.txt | 1 + src/parser/.gitignore | 6 + src/parser/AstTypes.cpp | 12 + src/parser/AstTypes.h | 1405 +++++++++++++++++++++++++++++++ src/parser/CMakeLists.txt | 17 + src/parser/GQLParser.cpp | 10 + src/parser/GQLParser.h | 46 + src/parser/VGraphScanner.h | 42 + src/parser/parser.yy | 653 ++++++++++++++ src/parser/scanner.lex | 222 +++++ src/parser/test/CMakeLists.txt | 9 + src/parser/test/ParserTest.cpp | 227 +++++ src/parser/test/ScannerTest.cpp | 189 +++++ 13 files changed, 2839 insertions(+) create mode 100644 src/parser/.gitignore create mode 100644 src/parser/AstTypes.cpp create mode 100644 src/parser/AstTypes.h create mode 100644 src/parser/CMakeLists.txt create mode 100644 src/parser/GQLParser.cpp create mode 100644 src/parser/GQLParser.h create mode 100644 src/parser/VGraphScanner.h create mode 100644 src/parser/parser.yy create mode 100644 src/parser/scanner.lex create mode 100644 src/parser/test/CMakeLists.txt create mode 100644 src/parser/test/ParserTest.cpp create mode 100644 src/parser/test/ScannerTest.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 050ad770..c3e66ead 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -109,6 +109,7 @@ add_subdirectory(server) add_subdirectory(console) add_subdirectory(raftex) #add_subdirectory(storage) +add_subdirectory(parser) add_dependencies(common third-party) #add_dependencies(storage_engines common) diff --git a/src/parser/.gitignore b/src/parser/.gitignore new file mode 100644 index 00000000..8ca6e678 --- /dev/null +++ b/src/parser/.gitignore @@ -0,0 +1,6 @@ +VGraphParser.cpp +VGraphParser.hpp +VGraphScanner.cpp +location.hh +position.hh +stack.hh diff --git a/src/parser/AstTypes.cpp b/src/parser/AstTypes.cpp new file mode 100644 index 00000000..31e68a91 --- /dev/null +++ b/src/parser/AstTypes.cpp @@ -0,0 +1,12 @@ +/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved + * + * This source code is licensed under Apache 2.0 License + * (found in the LICENSE.Apache file in the root directory) + */ +#include "parser/AstTypes.h" + +namespace vesoft { + + + +} diff --git a/src/parser/AstTypes.h b/src/parser/AstTypes.h new file mode 100644 index 00000000..96777fa3 --- /dev/null +++ b/src/parser/AstTypes.h @@ -0,0 +1,1405 @@ +/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved + * + * This source code is licensed under Apache 2.0 License + * (found in the LICENSE.Apache file in the root directory) + */ +#ifndef PARSER_ASTTYPES_H_ +#define PARSER_ASTTYPES_H_ + +#include <boost/variant.hpp> + +namespace vesoft { + +class Expression { +public: + virtual ~Expression() {} + + virtual std::string toString() const { + return ""; + } + + using ReturnType = boost::variant<int64_t, uint64_t, double, bool, std::string>; + virtual ReturnType eval() const = 0; + + static int64_t asInt(const ReturnType &value) { + return boost::get<int64_t>(value); + } + + static uint64_t asUInt(const ReturnType &value) { + return boost::get<uint64_t>(value); + } + + static double asDouble(const ReturnType &value) { + if (value.which() == 0) { + return (double)boost::get<int64_t>(value); + } + if (value.which() == 1) { + return (double)boost::get<uint64_t>(value); + } + return boost::get<double>(value); + } + + static bool asBool(const ReturnType &value) { + switch (value.which()) { + case 0: + return asInt(value) != 0; + case 1: + return asUInt(value) != 0; + case 2: + return asDouble(value) != 0.0; + case 3: + return boost::get<bool>(value); + case 4: + return asString(value).empty(); + default: + assert(false); + } + } + + static const std::string& asString(const ReturnType &value) { + return boost::get<std::string>(value); + } + + static bool isInt(const ReturnType &value) { + return value.which() == 0; + } + + static bool isUInt(const ReturnType &value) { + return value.which() == 1; + } + + static bool isDouble(const ReturnType &value) { + return value.which() == 2; + } + + static bool isBool(const ReturnType &value) { + return value.which() == 3; + } + + static bool isString(const ReturnType &value) { + return value.which() == 4; + } + + static bool isArithmetic(const ReturnType &value) { + return isInt(value) || isUInt(value) || isDouble(value); + } + + static void print(const ReturnType &value) { + switch (value.which()) { + case 0: + fprintf(stderr, "%ld\n", asInt(value)); + break; + case 1: + fprintf(stderr, "%luUL\n", asUInt(value)); + break; + case 2: + fprintf(stderr, "%lf\n", asDouble(value)); + break; + case 3: + fprintf(stderr, "%d\n", asBool(value)); + break; + case 4: + fprintf(stderr, "%s\n", asString(value).c_str()); + break; + } + } +}; + +class PropertyExpression final : public Expression { +public: + explicit PropertyExpression(std::string *minorProp) { + minorPropName_.reset(minorProp); + } + + PropertyExpression(std::string *majorProp, std::string *minorProp) { + minorPropName_.reset(minorProp); + majorPropName_.reset(majorProp); + } + + PropertyExpression(std::string *majorProp, std::string *minorProp, std::string *tag) { + minorPropName_.reset(minorProp); + majorPropName_.reset(majorProp); + tag_.reset(tag); + } + + ReturnType eval() const override { + // TODO evaluate property's value + return toString(); + } + + std::string toString() const override { + if (majorPropName_ != nullptr) { + if (tag_ != nullptr) { + return *majorPropName_ + "[" + *tag_ + "]" + "." + *minorPropName_; + } + return *majorPropName_ + "." + *minorPropName_; + } else { + return *minorPropName_; + } + } + +private: + std::unique_ptr<std::string> majorPropName_; + std::unique_ptr<std::string> minorPropName_; + std::unique_ptr<std::string> tag_; +}; + +class PrimaryExpression final : public Expression { +public: + using Operand = boost::variant<bool, int64_t, uint64_t, double, std::string>; + + std::string toString() const override { + char buf[1024]; + switch (operand_.which()) { + case 0: + snprintf(buf, sizeof(buf), "%s", boost::get<bool>(operand_) ? "true" : "false"); + break; + case 1: + snprintf(buf, sizeof(buf), "%ld", boost::get<int64_t>(operand_)); + break; + case 2: + snprintf(buf, sizeof(buf), "%luUL", boost::get<uint64_t>(operand_)); + break; + case 3: + return std::to_string(boost::get<double>(operand_)); + case 4: + return "\"" + boost::get<std::string>(operand_) + "\""; + } + return buf; + } + + PrimaryExpression(bool val) { + operand_ = val; + } + + PrimaryExpression(int64_t val) { + operand_ = val; + } + + PrimaryExpression(uint64_t val) { + operand_ = val; + } + + PrimaryExpression(double val) { + operand_ = val; + } + + PrimaryExpression(std::string val) { + operand_ = val; + } + + ReturnType eval() const override { + switch (operand_.which()) { + case 0: + return boost::get<int64_t>(operand_); + break; + case 1: + return boost::get<uint64_t>(operand_); + break; + case 2: + return boost::get<double>(operand_); + break; + case 3: + return boost::get<std::string>(operand_); + } + return "Unknown"; + } + +private: + Operand operand_; +}; + +class UnaryExpression final : public Expression { +public: + enum Operator { + PLUS, MINUS, NOT + }; + + UnaryExpression(Operator op, Expression *operand) { + op_ = op; + operand_.reset(operand); + } + + std::string toString() const override { + std::string buf; + buf.reserve(256); + switch (op_) { + case PLUS: + buf += '+'; + break; + case MINUS: + buf += '-'; + break; + case NOT: + buf += '!'; + break; + } + buf += '('; + buf.append(operand_->toString()); + buf += ')'; + return buf; + } + + ReturnType eval() const override { + // TODO + auto value = operand_->eval(); + if (op_ == PLUS) { + return value; + } else if (op_ == MINUS) { + if (isInt(value)) { + return -asInt(value); + } else if (isUInt(value)) { + return -asUInt(value); + } else if (isDouble(value)) { + return -asDouble(value); + } + } else { + return !asBool(value); + } + return value; + } + +private: + Operator op_; + std::unique_ptr<Expression> operand_; +}; + +enum ColumnType { + INT8, INT16, INT32, INT64, + UINT8, UINT16, UINT32, UINT64, + STRING, DOUBLE, BIGINT, BOOL, +}; + +inline std::string columnTypeToString(ColumnType type) { + switch (type) { + case INT8: + return "int8"; + case INT16: + return "int16"; + case INT32: + return "int32"; + case INT64: + return "int64"; + case UINT8: + return "uint8"; + case UINT16: + return "uint16"; + case UINT32: + return "uint32"; + case UINT64: + return "uint64"; + case STRING: + return "string"; + case DOUBLE: + return "double"; + case BIGINT: + return "bigint"; + case BOOL: + return "bool"; + default: + return "unknown"; + } +} + +class TypeCastingExpression final : public Expression { +public: + TypeCastingExpression(ColumnType type, Expression *operand) { + type_ = type; + operand_.reset(operand); + } + + std::string toString() const override { + return ""; + } + + ReturnType eval() const override { + return ReturnType(0UL); + } + +private: + ColumnType type_; + std::unique_ptr<Expression> operand_; +}; + +class ArithmeticExpression final : public Expression { +public: + enum Operator { + ADD, SUB, MUL, DIV, MOD + }; + + ArithmeticExpression(Expression *left, Operator op, Expression *right) { + op_ = op; + left_.reset(left); + right_.reset(right); + } + + std::string toString() const override { + std::string buf; + buf.reserve(256); + buf += '('; + buf.append(left_->toString()); + switch (op_) { + case ADD: + buf += '+'; + break; + case SUB: + buf += '-'; + break; + case MUL: + buf += '*'; + break; + case DIV: + buf += '/'; + break; + case MOD: + buf += '%'; + break; + } + buf.append(right_->toString()); + buf += ')'; + return buf; + } + + ReturnType eval() const override { + auto left = left_->eval(); + auto right = right_->eval(); + switch (op_) { + case ADD: + assert((isArithmetic(left) && isArithmetic(right)) + || (isString(left) && isString(right))); + if (isArithmetic(left) && isArithmetic(right)) { + if (isDouble(left) || isDouble(right)) { + return asDouble(left) + asDouble(right); + } + if (isInt(left) && isInt(right)) { + return asInt(left) + asInt(right); + } + return asUInt(left) + asUInt(right); + } + return asString(left) + asString(right); + case SUB: + assert(isArithmetic(left) && isArithmetic(right)); + if (isDouble(left) || isDouble(right)) { + return asDouble(left) - asDouble(right); + } + if (isInt(left) && isInt(right)) { + return asInt(left) - asInt(right); + } + return asUInt(left) - asUInt(right); + case MUL: + assert(isArithmetic(left) && isArithmetic(right)); + if (isDouble(left) || isDouble(right)) { + return asDouble(left) * asDouble(right); + } + if (isInt(left) && isInt(right)) { + return asInt(left) * asInt(right); + } + return asUInt(left) * asUInt(right); + case DIV: + assert(isArithmetic(left) && isArithmetic(right)); + if (isDouble(left) || isDouble(right)) { + return asDouble(left) / asDouble(right); + } + if (isInt(left) && isInt(right)) { + return asInt(left) / asInt(right); + } + return asUInt(left) / asUInt(right); + case MOD: + assert(isInt(left) && isInt(right)); + if (isUInt(left) || isUInt(right)) { + return asUInt(left) / asUInt(right); + } + return asInt(left) % asInt(right); + default: + assert(false); + } + } + +private: + Operator op_; + std::unique_ptr<Expression> left_; + std::unique_ptr<Expression> right_; +}; + +class RelationalExpression final : public Expression { +public: + enum Operator { + LT, LE, GT, GE, EQ, NE + }; + + RelationalExpression(Expression *left, Operator op, Expression *right) { + op_ = op; + left_.reset(left); + right_.reset(right); + } + + std::string toString() const override { + std::string buf; + buf.reserve(256); + buf += '('; + buf.append(left_->toString()); + switch (op_) { + case LT: + buf += '<'; + break; + case LE: + buf += '<'; + buf += '='; + break; + case GT: + buf += '>'; + break; + case GE: + buf += '>'; + buf += '='; + break; + case EQ: + buf += '='; + buf += '='; + break; + case NE: + buf += '!'; + buf += '='; + break; + } + buf.append(right_->toString()); + buf += ')'; + return buf; + } + + ReturnType eval() const override { + auto left = left_->eval(); + auto right = right_->eval(); + switch (op_) { + case LT: + return left < right; + case LE: + return left <= right; + case GT: + return left > right; + case GE: + return left >= right; + case EQ: + return left == right; + case NE: + return left != right; + } + return false; + } + +private: + Operator op_; + std::unique_ptr<Expression> left_; + std::unique_ptr<Expression> right_; +}; + +class LogicalExpression final : public Expression { +public: + enum Operator { + AND, OR + }; + + LogicalExpression(Expression *left, Operator op, Expression *right) { + op_ = op; + left_.reset(left); + right_.reset(right); + } + + std::string toString() const override { + std::string buf; + buf.reserve(256); + buf += '('; + buf.append(left_->toString()); + switch (op_) { + case AND: + buf += '&'; + buf += '&'; + break; + case OR: + buf += '|'; + buf += '|'; + break; + } + buf.append(right_->toString()); + buf += ')'; + return buf; + } + + ReturnType eval() const override { + // TODO + auto left = left_->eval(); + auto right = right_->eval(); + if (op_ == AND) { + return asBool(left) && asBool(right); + } else { + return asBool(left) || asBool(right); + } + } + +private: + Operator op_; + std::unique_ptr<Expression> left_; + std::unique_ptr<Expression> right_; +}; + +class ColumnSpecification final { +public: + ColumnSpecification(ColumnType type, std::string *name) { + type_ = type; + name_.reset(name); + } + + ColumnSpecification(ColumnType type, std::string *name, int64_t ttl) { + hasTTL_ = true; + ttl_ = ttl; + type_ = type; + name_.reset(name); + } + + bool hasTTL_{false}; + int64_t ttl_; + ColumnType type_; + std::unique_ptr<std::string> name_; +}; + +class ColumnSpecificationList final { +public: + ColumnSpecificationList() = default; + void addColumn(ColumnSpecification *column) { + columns_.emplace_back(column); + } + + std::vector<std::unique_ptr<ColumnSpecification>> columns_; +}; + +class Sentence { +public: + virtual ~Sentence() {} + virtual std::string toString() const = 0; +}; + +class StepClause final { +public: + explicit StepClause(uint64_t steps = 1, bool isUpto = false) { + steps_ = steps; + isUpto_ = isUpto; + } + + bool isUpto() const { + return isUpto_; + } + + std::string toString() const { + std::string buf; + buf.reserve(256); + if (isUpto()) { + buf += "UPTO "; + } + buf += std::to_string(steps_); + buf += " STEPS"; + return buf; + } + +private: + uint64_t steps_{1}; + bool isUpto_{false}; +}; + +class SourceNodeList final { +public: + void addNodeId(uint64_t id) { + nodes_.push_back(id); + } + + const std::vector<uint64_t>& nodeIds() const { + return nodes_; + } + + std::string toString(bool isRef) const { + std::string buf; + buf.reserve(256); + if (isRef) { + buf += "["; + } + for (auto id : nodes_) { + if (isRef) { + buf += "$"; + } + buf += std::to_string(id); + buf += ","; + } + buf.resize(buf.size() - 1); + if (isRef) { + buf += "]"; + } + return buf; + } + +private: + std::vector<uint64_t> nodes_; +}; + +class FromClause final { +public: + FromClause(SourceNodeList *srcNodeList, std::string *alias, bool isRef = false) { + srcNodeList_.reset(srcNodeList); + alias_.reset(alias); + isRef_ = isRef; + } + + void setSourceNodeList(SourceNodeList *clause) { + srcNodeList_.reset(clause); + } + + SourceNodeList* srcNodeList() const { + return srcNodeList_.get(); + } + + const std::string& alias() const { + return *alias_; + } + + bool isRef() const { + return isRef_; + } + + std::string toString() const { + std::string buf; + buf.reserve(256); + buf += "FROM "; + buf += srcNodeList_->toString(isRef_); + buf += " AS "; + buf += *alias_; + return buf; + } + +private: + std::unique_ptr<SourceNodeList> srcNodeList_; + std::unique_ptr<std::string> alias_; + bool isRef_{false}; +}; + +class OverClause final { +public: + explicit OverClause(std::string *edge, bool isReversely = false) { + edge_.reset(edge); + isReversely_ = isReversely; + } + + std::string toString() const { + std::string buf; + buf.reserve(256); + buf += "OVER "; + buf += *edge_; + if (isReversely_) { + buf += " REVERSELY"; + } + return buf; + } + +private: + std::unique_ptr<std::string> edge_; + bool isReversely_{false}; +}; + +class WhereClause final { +public: + explicit WhereClause(Expression *filter) { + filter_.reset(filter); + } + + std::string toString() const { + std::string buf; + buf.reserve(256); + buf += "WHERE "; + buf += filter_->toString(); + return buf; + } + +private: + std::unique_ptr<Expression> filter_; +}; + +class ReturnFields final { +public: + void addColumn(Expression *field) { + fields_.emplace_back(field); + } + + std::string toString() const { + std::string buf; + buf.reserve(256); + for (auto &expr : fields_) { + buf += expr->toString(); + buf += ","; + } + buf.resize(buf.size() -1 ); + return buf; + } + +private: + std::vector<std::unique_ptr<Expression>> fields_; +}; + +class ReturnClause final { +public: + explicit ReturnClause(ReturnFields *fields) { + returnFields_.reset(fields); + } + + std::string toString() const { + std::string buf; + buf.reserve(256); + buf += "RETURN "; + buf += returnFields_->toString(); + return buf; + } + +private: + std::unique_ptr<ReturnFields> returnFields_; +}; + +class GoSentence final : public Sentence { +public: + + void setStepClause(StepClause *clause) { + stepClause_.reset(clause); + } + + void setFromClause(FromClause *clause) { + fromClause_.reset(clause); + } + + void setOverClause(OverClause *clause) { + overClause_.reset(clause); + } + + void setWhereClause(WhereClause *clause) { + whereClause_.reset(clause); + } + + void setReturnClause(ReturnClause *clause) { + returnClause_.reset(clause); + } + + StepClause* stepClause() const { + return stepClause_.get(); + } + + FromClause* fromClause() const { + return fromClause_.get(); + } + + OverClause* overClause() const { + return overClause_.get(); + } + + WhereClause* whereClause() const { + return whereClause_.get(); + } + + ReturnClause* returnClause() const { + return returnClause_.get(); + } + + std::string toString() const override { + std::string buf; + buf.reserve(256); + buf += "GO "; + if (stepClause_ != nullptr) { + buf += stepClause_->toString(); + } + if (fromClause_ != nullptr) { + buf += " "; + buf += fromClause_->toString(); + } + if (overClause_ != nullptr) { + buf += " "; + buf += overClause_->toString(); + } + if (whereClause_ != nullptr) { + buf += " "; + buf += whereClause_->toString(); + } + if (returnClause_ != nullptr) { + buf += " "; + buf += returnClause_->toString(); + } + + return buf; + } + +private: + std::unique_ptr<StepClause> stepClause_; + std::unique_ptr<FromClause> fromClause_; + std::unique_ptr<OverClause> overClause_; + std::unique_ptr<WhereClause> whereClause_; + std::unique_ptr<ReturnClause> returnClause_; +}; + +class MatchSentence final : public Sentence { +public: + std::string toString() const override { + return "MATCH sentence"; + } +}; + +class UseSentence final : public Sentence { +public: + explicit UseSentence(std::string *ns) { + ns_.reset(ns); + } + std::string toString() const override { + return "USE NAMESPACE " + *ns_; + } + +private: + std::unique_ptr<std::string> ns_; +}; + +class DefineTagSentence final : public Sentence { +public: + DefineTagSentence(std::string *name, ColumnSpecificationList *columns) { + name_.reset(name); + columns_.reset(columns); + } + + std::string toString() const override { + std::string buf; + buf.reserve(256); + buf += "DEFINE TAG "; + buf += *name_; + buf += " ("; + for (auto &col : columns_->columns_) { + buf += *col->name_; + buf += " "; + buf += columnTypeToString(col->type_); + if (col->hasTTL_) { + buf += " TTL = "; + buf += std::to_string(col->ttl_); + } + buf += ","; + } + buf.resize(buf.size() - 1); + buf += ")"; + return buf; + } + +private: + std::unique_ptr<std::string> name_; + std::unique_ptr<ColumnSpecificationList> columns_; +}; + +class DefineEdgeSentence final : public Sentence { +public: + DefineEdgeSentence(std::string *name, ColumnSpecificationList *columns) { + name_.reset(name); + columns_.reset(columns); + } + + std::string toString() const override { + std::string buf; + buf.reserve(256); + buf += "DEFINE EDGE "; + buf += *name_; + buf += " ("; + for (auto &col : columns_->columns_) { + buf += *col->name_; + buf += " "; + buf += columnTypeToString(col->type_); + if (col->hasTTL_) { + buf += " TTL = "; + buf += std::to_string(col->ttl_); + } + buf += ","; + } + buf.resize(buf.size() - 1); + buf += ")"; + return buf; + } + +private: + std::unique_ptr<std::string> name_; + std::unique_ptr<ColumnSpecificationList> columns_; +}; + +class AlterTagSentence final : public Sentence { +public: + AlterTagSentence(std::string *name, ColumnSpecificationList *columns) { + name_.reset(name); + columns_.reset(columns); + } + + std::string toString() const override { + std::string buf; + buf.reserve(256); + buf += "ALTER TAG "; + buf += *name_; + buf += "("; + for (auto &col : columns_->columns_) { + buf += *col->name_; + buf += " "; + buf += columnTypeToString(col->type_); + if (col->hasTTL_) { + buf += " TTL = "; + buf += std::to_string(col->ttl_); + } + buf += ","; + } + buf.resize(buf.size() - 1); + buf += ")"; + return buf; + } + +private: + std::unique_ptr<std::string> name_; + std::unique_ptr<ColumnSpecificationList> columns_; +}; + +class AlterEdgeSentence final : public Sentence { +public: + AlterEdgeSentence(std::string *name, ColumnSpecificationList *columns) { + name_.reset(name); + columns_.reset(columns); + } + + std::string toString() const override { + std::string buf; + buf.reserve(256); + buf += "ALTER EDGE "; + buf += *name_; + buf += "("; + for (auto &col : columns_->columns_) { + buf += *col->name_; + buf += " "; + buf += columnTypeToString(col->type_); + if (col->hasTTL_) { + buf += " TTL = "; + buf += std::to_string(col->ttl_); + } + buf += ","; + } + buf.resize(buf.size() - 1); + buf += ")"; + return buf; + } + +private: + std::unique_ptr<std::string> name_; + std::unique_ptr<ColumnSpecificationList> columns_; +}; + +class SetSentence final : public Sentence { +public: + enum Operator { + UNION, INTERSECT, MINUS + }; + + SetSentence(Sentence *left, Operator op, Sentence *right) { + left_.reset(left); + right_.reset(right); + op_ = op; + } + + std::string toString() const override { + std::string buf; + buf.reserve(256); + buf = "("; + buf += left_->toString(); + switch (op_) { + case UNION: + buf += " UNION "; + break; + case INTERSECT: + buf += " INTERSECT "; + break; + case MINUS: + buf += " MINUS "; + break; + } + buf += right_->toString(); + buf += ")"; + return buf; + } + +private: + Operator op_; + std::unique_ptr<Sentence> left_; + std::unique_ptr<Sentence> right_; +}; + +class PipedSentence final : public Sentence { +public: + PipedSentence(Sentence *left, Sentence *right) { + left_.reset(left); + right_.reset(right); + } + + std::string toString() const override { + std::string buf; + buf.reserve(256); + buf += left_->toString(); + buf += " | "; + buf += right_->toString(); + return buf; + } + +private: + //std::vector<std::unique_ptr<Sentence>> sentences_; + std::unique_ptr<Sentence> left_; + std::unique_ptr<Sentence> right_; +}; + +class AssignmentSentence final : public Sentence { +public: + AssignmentSentence(std::string *variable, Sentence *sentence) { + variable_.reset(variable); + sentence_.reset(sentence); + } + + std::string toString() const override { + std::string buf; + buf.reserve(256); + buf += "$"; + buf += *variable_; + buf += " = "; + buf += sentence_->toString(); + return buf; + } + +private: + std::unique_ptr<std::string> variable_; + std::unique_ptr<Sentence> sentence_; +}; + +class PropertyList final { +public: + void addProp(std::string *propname) { + properties_.emplace_back(propname); + } + + std::string toString() const { + std::string buf; + buf.reserve(256); + for (auto &prop : properties_) { + buf += *prop; + buf += ","; + } + buf.resize(buf.size() - 1); + return buf; + } + +private: + std::vector<std::unique_ptr<std::string>> properties_; +}; + +class ValueList final { +public: + void addValue(Expression *value) { + values_.emplace_back(value); + } + + std::string toString() const { + std::string buf; + buf.reserve(256); + for (auto &expr : values_) { + buf += expr->toString(); + buf += ","; + } + buf.resize(buf.size() - 1); + return buf; + } + +private: + std::vector<std::unique_ptr<Expression>> values_; +}; + +class InsertVertexSentence final : public Sentence { +public: + InsertVertexSentence(int64_t id, std::string *vertex, PropertyList *props, ValueList *values, bool overwritable = true) { + id_ = id; + vertex_.reset(vertex); + properties_.reset(props); + values_.reset(values); + overwritable_ = overwritable; + } + + std::string toString() const { + std::string buf; + buf.reserve(256); + buf += "INSERT VERTEX "; + buf += *vertex_; + buf += "("; + buf += properties_->toString(); + buf += ") VALUES("; + buf += std::to_string(id_); + buf += ": "; + buf += values_->toString(); + buf += ")"; + return buf; + } + +private: + bool overwritable_{true}; + int64_t id_; + std::unique_ptr<std::string> vertex_; + std::unique_ptr<PropertyList> properties_; + std::unique_ptr<ValueList> values_; +}; + +class InsertEdgeSentence final : public Sentence { +public: + void setOverwrite(bool overwritable) { + overwritable_ = overwritable; + } + + void setSrcId(int64_t srcid) { + srcid_ = srcid; + } + + void setDstId(int64_t dstid) { + dstid_ = dstid; + } + + void setRank(int64_t rank) { + rank_ = rank; + } + + void setEdge(std::string *edge) { + edge_.reset(edge); + } + + void setProps(PropertyList *props) { + properties_.reset(props); + } + + void setValues(ValueList *values) { + values_.reset(values); + } + + std::string toString() const override { + std::string buf; + buf.reserve(256); + buf += "INSERT EDGE "; + if (!overwritable_) { + buf += "NO OVERWRITE "; + } + buf += *edge_; + buf += "("; + buf += properties_->toString(); + buf += ") "; + buf += "VALUES("; + buf += std::to_string(srcid_); + buf += " -> "; + buf += std::to_string(dstid_); + if (rank_ != 0) { + buf += " @"; + buf += std::to_string(rank_); + } + buf += ": "; + buf += values_->toString(); + buf += ")"; + return buf; + } + +private: + bool overwritable_{true}; + int64_t srcid_{0}; + int64_t dstid_{0}; + int64_t rank_{0}; + std::unique_ptr<std::string> edge_; + std::unique_ptr<PropertyList> properties_; + std::unique_ptr<ValueList> values_; +}; + +class UpdateItem final { +public: + UpdateItem(std::string *field, Expression *value) { + field_.reset(field); + value_.reset(value); + } + + std::string toString() const { + std::string buf; + buf.reserve(256); + buf += *field_; + buf += "="; + buf += value_->toString(); + return buf; + } + +private: + std::unique_ptr<std::string> field_; + std::unique_ptr<Expression> value_; +}; + +class UpdateList final { +public: + void addItem(UpdateItem *item) { + items_.emplace_back(item); + } + + std::string toString() const { + std::string buf; + buf.reserve(256); + for (auto &item : items_) { + buf += item->toString(); + buf += ","; + } + buf.resize(buf.size() - 1); + return buf; + } + +private: + std::vector<std::unique_ptr<UpdateItem>> items_; +}; + +class UpdateVertexSentence final : public Sentence { +public: + void setInsertable(bool insertable) { + insertable_ = insertable; + } + + void setVid(int64_t vid) { + vid_ = vid; + } + + void setUpdateList(UpdateList *items) { + updateItems_.reset(items); + } + + void setWhereClause(WhereClause *clause) { + whereClause_.reset(clause); + } + + void setReturnClause(ReturnClause *clause) { + returnClause_.reset(clause); + } + + std::string toString() const override { + std::string buf; + buf.reserve(256); + buf += "UPDATE "; + if (insertable_) { + buf += "OR INSERT "; + } + buf += "VERTEX "; + buf += std::to_string(vid_); + buf += " SET "; + buf += updateItems_->toString(); + if (whereClause_ != nullptr) { + buf += " "; + buf += whereClause_->toString(); + } + if (returnClause_ != nullptr) { + buf += " "; + buf += returnClause_->toString(); + } + + return buf; + } + +private: + bool insertable_{false}; + int64_t vid_{0}; + std::unique_ptr<UpdateList> updateItems_; + std::unique_ptr<WhereClause> whereClause_; + std::unique_ptr<ReturnClause> returnClause_; +}; + +class UpdateEdgeSentence final : public Sentence { +public: + void setInsertable(bool insertable) { + insertable_ = insertable; + } + + void setSrcId(int64_t srcid) { + srcid_ = srcid; + } + + void setDstId(int64_t dstid) { + dstid_ = dstid; + } + + void setRank(int64_t rank) { + rank_ = rank; + } + + void setUpdateList(UpdateList *items) { + updateItems_.reset(items); + } + + void setWhereClause(WhereClause *clause) { + whereClause_.reset(clause); + } + + void setReturnClause(ReturnClause *clause) { + returnClause_.reset(clause); + } + + std::string toString() const override { + std::string buf; + buf.reserve(256); + buf += "UPDATE "; + if (insertable_) { + buf += "OR INSERT "; + } + buf += "EDGE "; + buf += std::to_string(srcid_); + buf += "->"; + buf += std::to_string(dstid_); + buf += " SET "; + buf += updateItems_->toString(); + if (whereClause_ != nullptr) { + buf += " "; + buf += whereClause_->toString(); + } + if (returnClause_ != nullptr) { + buf += " "; + buf += returnClause_->toString(); + } + + return buf; + } + + +private: + bool insertable_{false}; + int64_t srcid_{0}; + int64_t dstid_{0}; + int64_t rank_{0}; + std::unique_ptr<UpdateList> updateItems_; + std::unique_ptr<WhereClause> whereClause_; + std::unique_ptr<ReturnClause> returnClause_; +}; + +class Statement final { +public: + explicit Statement(Sentence *sentence) { + sentences_.emplace_back(sentence); + } + + void addSentence(Sentence *sentence) { + sentences_.emplace_back(sentence); + } + + std::string toString() const { + std::string buf; + buf.reserve(1024); + auto i = 0UL; + buf += sentences_[i++]->toString(); + for ( ; i < sentences_.size(); i++) { + buf += "; "; + buf += sentences_[i]->toString(); + } + return buf; + } + +private: + std::vector<std::unique_ptr<Sentence>> sentences_; +}; + + +} + +#endif // PARSER_ASTTYPES_H_ diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt new file mode 100644 index 00000000..1ad99aa7 --- /dev/null +++ b/src/parser/CMakeLists.txt @@ -0,0 +1,17 @@ +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +find_package(BISON) +find_package(FLEX) +bison_target(Parser parser.yy ${CMAKE_CURRENT_BINARY_DIR}/VGraphParser.cpp COMPILE_FLAGS "-Werror") +flex_target(Scanner scanner.lex ${CMAKE_CURRENT_BINARY_DIR}/VGraphScanner.cpp) + +add_flex_bison_dependency(Scanner Parser) + +add_library(parser_obj OBJECT ${FLEX_Scanner_OUTPUTS} ${BISON_Parser_OUTPUTS}) + +target_include_directories(parser_obj SYSTEM BEFORE PUBLIC ${FLEX_INCLUDE_DIRS}) + +add_dependencies(parser_obj base_obj) + +add_subdirectory(test) diff --git a/src/parser/GQLParser.cpp b/src/parser/GQLParser.cpp new file mode 100644 index 00000000..2038a71d --- /dev/null +++ b/src/parser/GQLParser.cpp @@ -0,0 +1,10 @@ +/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved + * + * This source code is licensed under Apache 2.0 License + * (found in the LICENSE.Apache file in the root directory) + */ +#include "GQLParser.h" + +namespace vesoft { +} // namespace vesoft + diff --git a/src/parser/GQLParser.h b/src/parser/GQLParser.h new file mode 100644 index 00000000..59456435 --- /dev/null +++ b/src/parser/GQLParser.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved + * + * This source code is licensed under Apache 2.0 License + * (found in the LICENSE.Apache file in the root directory) + */ +#ifndef PARSER_GQLPARSER_H_ +#define PARSER_GQLPARSER_H_ + +#include <sstream> +#include <regex> +#include "VGraphParser.hpp" +#include "VGraphScanner.h" + +namespace vesoft { + +class GQLParser { +public: + GQLParser() : parser_(scanner_, error_, &statement_) { + } + + bool parse(const std::string &query) { + std::istringstream is(query); + scanner_.switch_streams(&is, nullptr); + return parser_.parse() == 0; + } + + auto statement() { + std::unique_ptr<Statement> statement(statement_); + statement_ = nullptr; + return statement; + } + + const std::string& error() const { + return error_; + } + +private: + vesoft::VGraphScanner scanner_; + vesoft::VGraphParser parser_; + std::string error_; + Statement *statement_ = nullptr; +}; + +} // namespace vesoft + +#endif // PARSER_GQLPARSER_H_ diff --git a/src/parser/VGraphScanner.h b/src/parser/VGraphScanner.h new file mode 100644 index 00000000..5a593412 --- /dev/null +++ b/src/parser/VGraphScanner.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved + * + * This source code is licensed under Apache 2.0 License + * (found in the LICENSE.Apache file in the root directory) + */ +#ifndef PARSER_VGRAPHSCANNER_H_ +#define PARSER_VGRAPHSCANNER_H_ + +// Only include FlexLexer.h if it hasn't been already included +#if !defined(yyFlexLexerOnce) +#include <FlexLexer.h> +#endif + +// Override the interface for yylex since we namespaced it +#undef YY_DECL +#define YY_DECL int vesoft::VGraphScanner::yylex() + +#include "VGraphParser.hpp" + +namespace vesoft { + +class VGraphScanner : public yyFlexLexer { +public: + int yylex(vesoft::VGraphParser::semantic_type * lval, + vesoft::VGraphParser::location_type *loc) { + yylval = lval; + yylloc = loc; + return yylex(); + } + + +private: + friend class Scanner_Basic_Test; + int yylex(); + + vesoft::VGraphParser::semantic_type * yylval{nullptr}; + vesoft::VGraphParser::location_type * yylloc{nullptr}; +}; + +} // namespace vesoft + +#endif // PARSER_VGRAPHSCANNER_H_ diff --git a/src/parser/parser.yy b/src/parser/parser.yy new file mode 100644 index 00000000..6df51faf --- /dev/null +++ b/src/parser/parser.yy @@ -0,0 +1,653 @@ +%language "C++" +%skeleton "lalr1.cc" +%no-lines +%locations +%define api.namespace { vesoft } +%define parser_class_name { VGraphParser } +%lex-param { vesoft::VGraphScanner& scanner } +%parse-param { vesoft::VGraphScanner& scanner } +%parse-param { std::string &errmsg } +%parse-param { vesoft::Statement** statement } + +%code requires { +#include <iostream> +#include <sstream> +#include <string> +#include "AstTypes.h" + +namespace vesoft { + +class VGraphScanner; + +} + +} + +%code { + static int yylex(vesoft::VGraphParser::semantic_type* yylval, + vesoft::VGraphParser::location_type *yylloc, + vesoft::VGraphScanner& scanner); +} + +%union { + bool boolval; + uint64_t intval; + double doubleval; + std::string *strval; + vesoft::Expression *expr; + vesoft::Sentence *sentence; + vesoft::Statement *statement; + vesoft::ColumnSpecification *colspec; + vesoft::ColumnSpecificationList *colspeclist; + vesoft::ColumnType type; + vesoft::StepClause *step_clause; + vesoft::FromClause *from_clause; + vesoft::SourceNodeList *src_node_list; + vesoft::OverClause *over_clause; + vesoft::WhereClause *where_clause; + vesoft::ReturnClause *return_clause; + vesoft::ReturnFields *return_fields; + vesoft::PropertyList *prop_list; + vesoft::ValueList *value_list; + vesoft::UpdateList *update_list; + vesoft::UpdateItem *update_item; +} +/* keywords */ +%token KW_GO KW_AS KW_TO KW_OR KW_USE KW_SET KW_FROM KW_WHERE KW_ALTER +%token KW_MATCH KW_INSERT KW_VALUES KW_RETURN KW_DEFINE KW_VERTEX KW_TTL +%token KW_EDGE KW_UPDATE KW_STEPS KW_OVER KW_UPTO KW_REVERSELY KW_NAMESPACE +%token KW_INT8 KW_INT16 KW_INT32 KW_INT64 KW_UINT8 KW_UINT16 KW_UINT32 KW_UINT64 +%token KW_BIGINT KW_DOUBLE KW_STRING KW_BOOL KW_TAG KW_UNION KW_INTERSECT KW_MINUS +%token KW_NO KW_OVERWRITE KW_IN +/* symbols */ +%token L_PAREN R_PAREN L_BRACKET R_BRACKET L_BRACE R_BRACE COMMA +%token PIPE OR AND LT LE GT GE EQ NE ADD SUB MUL DIV MOD NOT NEG ASSIGN +%token DOT COLON SEMICOLON L_ARROW R_ARROW AT + +/* token type specification */ +%token <boolval> BOOL +%token <intval> INTEGER UINTEGER COL_REF_ID +%token <doubleval> DOUBLE +%token <strval> STRING SYMBOL VARIABLE + +%type <expr> expression logic_or_expression logic_and_expression +%type <expr> relational_expression multiplicative_expression additive_expression +%type <expr> unary_expression primary_expression equality_expression +%type <type> type_spec +%type <step_clause> step_clause +%type <from_clause> from_clause +%type <src_node_list> id_list ref_list +%type <over_clause> over_clause +%type <where_clause> where_clause +%type <return_clause> return_clause +%type <return_fields> return_fields +%type <prop_list> prop_list +%type <value_list> value_list +%type <update_list> update_list +%type <update_item> update_item + +%type <intval> ttl_spec + +%type <colspec> column_spec +%type <colspeclist> column_spec_list + +%type <sentence> go_sentence match_sentence use_sentence +%type <sentence> define_tag_sentence define_edge_sentence +%type <sentence> alter_tag_sentence alter_edge_sentence +%type <sentence> traverse_sentence set_sentence piped_sentence assignment_sentence +%type <sentence> maintainance_sentence insert_vertex_sentence insert_edge_sentence +%type <sentence> mutate_sentence update_vertex_sentence update_edge_sentence +%type <statement> statement + +%start statement + +%% + +primary_expression + : INTEGER { + $$ = new PrimaryExpression((int64_t)$1); + } + | UINTEGER { + $$ = new PrimaryExpression((uint64_t)$1); + } + | DOUBLE { + $$ = new PrimaryExpression($1); + } + | STRING { + $$ = new PrimaryExpression(*$1); + } + | BOOL { + $$ = new PrimaryExpression($1); + } + | SYMBOL { + // TODO(dutor) detect semantic type of symbol + $$ = new PropertyExpression($1); + } + | SYMBOL DOT SYMBOL { + $$ = new PropertyExpression($1, $3); + } + | SYMBOL L_BRACKET SYMBOL R_BRACKET DOT SYMBOL { + $$ = new PropertyExpression($1, $6, $3); + } + | L_PAREN expression R_PAREN { + $$ = $2; + } + ; + +unary_expression + : primary_expression {} + | ADD primary_expression { + $$ = new UnaryExpression(UnaryExpression::PLUS, $2); + } + | SUB primary_expression { + $$ = new UnaryExpression(UnaryExpression::MINUS, $2); + } + | L_PAREN type_spec R_PAREN primary_expression { + $$ = new TypeCastingExpression($2, $4); + } + ; + +type_spec + : KW_INT8 { $$ = ColumnType::INT8; } + | KW_INT16 { $$ = ColumnType::INT16; } + | KW_INT32 { $$ = ColumnType::INT32; } + | KW_INT64 { $$ = ColumnType::INT64; } + | KW_UINT8 { $$ = ColumnType::UINT8; } + | KW_UINT16 { $$ = ColumnType::UINT16; } + | KW_UINT32 { $$ = ColumnType::UINT32; } + | KW_UINT64 { $$ = ColumnType::UINT64; } + | KW_DOUBLE { $$ = ColumnType::DOUBLE; } + | KW_STRING { $$ = ColumnType::STRING; } + | KW_BOOL { $$ = ColumnType::BOOL; } + | KW_BIGINT { $$ = ColumnType::BIGINT; } + ; + +multiplicative_expression + : unary_expression {} + | multiplicative_expression MUL unary_expression { + $$ = new ArithmeticExpression($1, ArithmeticExpression::MUL, $3); + } + | multiplicative_expression DIV unary_expression { + $$ = new ArithmeticExpression($1, ArithmeticExpression::DIV, $3); + } + | multiplicative_expression MOD unary_expression { + $$ = new ArithmeticExpression($1, ArithmeticExpression::MOD, $3); + } + ; + +additive_expression + : multiplicative_expression {} + | additive_expression ADD multiplicative_expression { + $$ = new ArithmeticExpression($1, ArithmeticExpression::ADD, $3); + } + | additive_expression SUB multiplicative_expression { + $$ = new ArithmeticExpression($1, ArithmeticExpression::SUB, $3); + } + ; + +relational_expression + : additive_expression {} + | relational_expression LT additive_expression { + $$ = new RelationalExpression($1, RelationalExpression::LT, $3); + } + | relational_expression GT additive_expression { + $$ = new RelationalExpression($1, RelationalExpression::GT, $3); + } + | relational_expression LE additive_expression { + $$ = new RelationalExpression($1, RelationalExpression::LE, $3); + } + | relational_expression GE additive_expression { + $$ = new RelationalExpression($1, RelationalExpression::GE, $3); + } + ; + +equality_expression + : relational_expression {} + | equality_expression EQ relational_expression { + $$ = new RelationalExpression($1, RelationalExpression::EQ, $3); + } + | equality_expression NE relational_expression { + $$ = new RelationalExpression($1, RelationalExpression::NE, $3); + } + ; + +logic_and_expression + : equality_expression {} + | logic_and_expression AND equality_expression { + $$ = new LogicalExpression($1, LogicalExpression::AND, $3); + } + ; + +logic_or_expression + : logic_and_expression {} + | logic_or_expression OR logic_and_expression { + $$ = new LogicalExpression($1, LogicalExpression::OR, $3); + } + ; + +expression + : logic_or_expression { } + ; + +go_sentence + : KW_GO step_clause from_clause over_clause where_clause return_clause { + //fprintf(stderr, "primary: %s\n", $5->toString().c_str()); + //fprintf(stderr, "result: "); + //Expression::print($5->eval()); + auto go = new GoSentence(); + go->setStepClause($2); + go->setFromClause($3); + go->setOverClause($4); + go->setWhereClause($5); + go->setReturnClause($6); + $$ = go; + } + ; + +step_clause + : %empty { $$ = new StepClause(); } + | INTEGER KW_STEPS { $$ = new StepClause($1); } + | KW_UPTO INTEGER KW_STEPS { $$ = new StepClause($2, true); } + ; + +from_clause + : KW_FROM id_list KW_AS SYMBOL { + auto from = new FromClause($2, $4); + $$ = from; + } + | KW_FROM L_BRACKET ref_list R_BRACKET KW_AS SYMBOL { + auto from = new FromClause($3, $6, true/* is ref id*/); + $$ = from; + } + ; + +id_list + : INTEGER { + auto list = new SourceNodeList(); + list->addNodeId($1); + $$ = list; + } + | id_list COMMA INTEGER { + $$ = $1; + $$->addNodeId($3); + } + ; + +ref_list + : COL_REF_ID { + auto list = new SourceNodeList(); + list->addNodeId($1); + $$ = list; + } + | ref_list COMMA COL_REF_ID { + $$ = $1; + $$->addNodeId($3); + } + | ref_list COMMA { + $$ = $1; + } + ; + +over_clause + : %empty { $$ = nullptr; } + | KW_OVER SYMBOL { + $$ = new OverClause($2); + } + | KW_OVER SYMBOL KW_REVERSELY { $$ = new OverClause($2, true); } + ; + +where_clause + : %empty { $$ = nullptr; } + | KW_WHERE expression { $$ = new WhereClause($2); } + ; + +return_clause + : %empty { $$ = nullptr; } + | KW_RETURN return_fields { $$ = new ReturnClause($2); } + ; + +return_fields + : expression { + auto fields = new ReturnFields(); + fields->addColumn($1); + $$ = fields; + } + | return_fields COMMA expression { + $1->addColumn($3); + $$ = $1; + } + ; + +match_sentence + : KW_MATCH { $$ = new MatchSentence; } + ; + +use_sentence + : KW_USE KW_NAMESPACE SYMBOL { $$ = new UseSentence($3); } + ; + +define_tag_sentence + : KW_DEFINE KW_TAG SYMBOL L_PAREN column_spec_list R_PAREN { + $$ = new DefineTagSentence($3, $5); + } + | KW_DEFINE KW_TAG SYMBOL L_PAREN column_spec_list COMMA R_PAREN { + $$ = new DefineTagSentence($3, $5); + } + ; + +alter_tag_sentence + : KW_ALTER KW_TAG SYMBOL L_PAREN column_spec_list R_PAREN { + $$ = new AlterTagSentence($3, $5); + } + | KW_ALTER KW_TAG SYMBOL L_PAREN column_spec_list COMMA R_PAREN { + $$ = new AlterTagSentence($3, $5); + } + ; + +define_edge_sentence + : KW_DEFINE KW_EDGE SYMBOL L_PAREN column_spec_list R_PAREN { + $$ = new DefineEdgeSentence($3, $5); + } + | KW_DEFINE KW_EDGE SYMBOL L_PAREN column_spec_list COMMA R_PAREN { + $$ = new DefineEdgeSentence($3, $5); + } + ; + +alter_edge_sentence + : KW_ALTER KW_EDGE SYMBOL L_PAREN column_spec_list R_PAREN { + $$ = new AlterEdgeSentence($3, $5); + } + | KW_ALTER KW_EDGE SYMBOL L_PAREN column_spec_list COMMA R_PAREN { + $$ = new AlterEdgeSentence($3, $5); + } + ; + +column_spec_list + : column_spec { + $$ = new ColumnSpecificationList(); + $$->addColumn($1); + } + | column_spec_list COMMA column_spec { + $$ = $1; + $$->addColumn($3); + } + ; + +column_spec + : SYMBOL type_spec { $$ = new ColumnSpecification($2, $1); } + | SYMBOL type_spec ttl_spec { $$ = new ColumnSpecification($2, $1, $3); } + ; + +ttl_spec + : KW_TTL ASSIGN INTEGER { $$ = $3; } + ; + +traverse_sentence + : go_sentence {} + | match_sentence {} + ; + +set_sentence + : traverse_sentence {} + | set_sentence KW_UNION traverse_sentence { $$ = new SetSentence($1, SetSentence::UNION, $3); } + | set_sentence KW_INTERSECT traverse_sentence { $$ = new SetSentence($1, SetSentence::INTERSECT, $3); } + | set_sentence KW_MINUS traverse_sentence { $$ = new SetSentence($1, SetSentence::MINUS, $3); } + | L_PAREN piped_sentence R_PAREN { $$ = $2; } + ; + +piped_sentence + : set_sentence {} + | piped_sentence PIPE set_sentence { $$ = new PipedSentence($1, $3); } + ; + +assignment_sentence + : VARIABLE ASSIGN piped_sentence { + $$ = new AssignmentSentence($1, $3); + } + ; + +insert_vertex_sentence + : KW_INSERT KW_VERTEX SYMBOL L_PAREN prop_list R_PAREN KW_VALUES L_PAREN INTEGER COLON value_list R_PAREN { + $$ = new InsertVertexSentence($9, $3, $5, $11); + } + | KW_INSERT KW_TAG SYMBOL L_PAREN prop_list R_PAREN KW_VALUES L_PAREN INTEGER COLON value_list R_PAREN { + $$ = new InsertVertexSentence($9, $3, $5, $11); + } + ; + +prop_list + : SYMBOL { + $$ = new PropertyList(); + $$->addProp($1); + } + | prop_list COMMA SYMBOL { + $$ = $1; + $$->addProp($3); + } + | prop_list COMMA { + $$ = $1; + } + ; + +value_list + : expression { + $$ = new ValueList(); + $$->addValue($1); + } + | value_list COMMA expression { + $$ = $1; + $$->addValue($3); + } + | value_list COMMA { + $$ = $1; + } + ; + +insert_edge_sentence + : KW_INSERT KW_EDGE SYMBOL L_PAREN prop_list R_PAREN KW_VALUES L_PAREN + INTEGER R_ARROW INTEGER COLON value_list R_PAREN { + auto sentence = new InsertEdgeSentence(); + sentence->setEdge($3); + sentence->setProps($5); + sentence->setSrcId($9); + sentence->setDstId($11); + sentence->setValues($13); + $$ = sentence; + } + | KW_INSERT KW_EDGE KW_NO KW_OVERWRITE SYMBOL L_PAREN prop_list R_PAREN + KW_VALUES L_PAREN INTEGER R_ARROW INTEGER COLON value_list R_PAREN { + auto sentence = new InsertEdgeSentence(); + sentence->setOverwrite(false); + sentence->setEdge($5); + sentence->setProps($7); + sentence->setSrcId($11); + sentence->setDstId($13); + sentence->setValues($15); + $$ = sentence; + } + | KW_INSERT KW_EDGE SYMBOL L_PAREN prop_list R_PAREN KW_VALUES L_PAREN + INTEGER R_ARROW INTEGER AT INTEGER COLON value_list R_PAREN { + auto sentence = new InsertEdgeSentence(); + sentence->setEdge($3); + sentence->setProps($5); + sentence->setSrcId($9); + sentence->setDstId($11); + sentence->setRank($13); + sentence->setValues($15); + $$ = sentence; + } + | KW_INSERT KW_EDGE KW_NO KW_OVERWRITE SYMBOL L_PAREN prop_list R_PAREN KW_VALUES L_PAREN + INTEGER R_ARROW INTEGER AT INTEGER COLON value_list R_PAREN { + auto sentence = new InsertEdgeSentence(); + sentence->setOverwrite(false); + sentence->setEdge($5); + sentence->setProps($7); + sentence->setSrcId($11); + sentence->setDstId($13); + sentence->setRank($15); + sentence->setValues($17); + $$ = sentence; + } + ; + +update_vertex_sentence + : KW_UPDATE KW_VERTEX INTEGER KW_SET update_list where_clause return_clause { + auto sentence = new UpdateVertexSentence(); + sentence->setVid($3); + sentence->setUpdateList($5); + sentence->setWhereClause($6); + sentence->setReturnClause($7); + $$ = sentence; + } + | KW_UPDATE KW_OR KW_INSERT KW_VERTEX INTEGER KW_SET update_list where_clause return_clause { + auto sentence = new UpdateVertexSentence(); + sentence->setInsertable(true); + sentence->setVid($5); + sentence->setUpdateList($7); + sentence->setWhereClause($8); + sentence->setReturnClause($9); + $$ = sentence; + } + ; + +update_list + : update_item { + $$ = new UpdateList(); + $$->addItem($1); + } + | update_list COMMA update_item { + $$ = $1; + $$->addItem($3); + } + ; + +update_item + : SYMBOL ASSIGN expression { + $$ = new UpdateItem($1, $3); + } + ; + +update_edge_sentence + : KW_UPDATE KW_EDGE INTEGER R_ARROW INTEGER + KW_SET update_list where_clause return_clause { + auto sentence = new UpdateEdgeSentence(); + sentence->setSrcId($3); + sentence->setDstId($5); + sentence->setUpdateList($7); + sentence->setWhereClause($8); + sentence->setReturnClause($9); + $$ = sentence; + } + | KW_UPDATE KW_OR KW_INSERT KW_EDGE INTEGER R_ARROW INTEGER + KW_SET update_list where_clause return_clause { + auto sentence = new UpdateEdgeSentence(); + sentence->setInsertable(true); + sentence->setSrcId($5); + sentence->setDstId($7); + sentence->setUpdateList($9); + sentence->setWhereClause($10); + sentence->setReturnClause($11); + $$ = sentence; + } + | KW_UPDATE KW_EDGE INTEGER R_ARROW INTEGER AT INTEGER + KW_SET update_list where_clause return_clause { + auto sentence = new UpdateEdgeSentence(); + sentence->setSrcId($3); + sentence->setDstId($5); + sentence->setRank($7); + sentence->setUpdateList($9); + sentence->setWhereClause($10); + sentence->setReturnClause($11); + $$ = sentence; + } + | KW_UPDATE KW_OR KW_INSERT KW_EDGE INTEGER R_ARROW INTEGER AT INTEGER KW_SET + update_list where_clause return_clause { + auto sentence = new UpdateEdgeSentence(); + sentence->setInsertable(true); + sentence->setSrcId($5); + sentence->setDstId($7); + sentence->setRank($9); + sentence->setUpdateList($11); + sentence->setWhereClause($12); + sentence->setReturnClause($13); + $$ = sentence; + } + ; + +mutate_sentence + : insert_vertex_sentence {} + | insert_edge_sentence {} + | update_vertex_sentence {} + | update_edge_sentence {} + ; + +maintainance_sentence + : define_tag_sentence {} + | define_edge_sentence {} + | alter_tag_sentence {} + | alter_edge_sentence {} + ; + +statement + : maintainance_sentence { + $$ = new Statement($1); + *statement = $$; + } + | use_sentence { + $$ = new Statement($1); + *statement = $$; + } + | piped_sentence { + $$ = new Statement($1); + *statement = $$; + } + | assignment_sentence { + $$ = new Statement($1); + *statement = $$; + } + | mutate_sentence { + $$ = new Statement($1); + *statement = $$; + } + | statement SEMICOLON maintainance_sentence { + $$ = $1; + $1->addSentence($3); + } + | statement SEMICOLON use_sentence { + $$ = $1; + $1->addSentence($3); + } + | statement SEMICOLON piped_sentence { + $$ = $1; + $1->addSentence($3); + } + | statement SEMICOLON assignment_sentence { + $$ = $1; + $1->addSentence($3); + } + | statement SEMICOLON mutate_sentence { + $$ = $1; + $1->addSentence($3); + } + | statement SEMICOLON { + $$ = $1; + } + ; + + +%% + +void vesoft::VGraphParser::error(const vesoft::VGraphParser::location_type& loc, + const std::string &msg) { + std::ostringstream os; + os << msg << " at " << loc; + errmsg = os.str(); +} + + +#include "VGraphScanner.h" +static int yylex(vesoft::VGraphParser::semantic_type* yylval, + vesoft::VGraphParser::location_type *yylloc, + vesoft::VGraphScanner& scanner) { + return scanner.yylex(yylval, yylloc); +} diff --git a/src/parser/scanner.lex b/src/parser/scanner.lex new file mode 100644 index 00000000..a6071523 --- /dev/null +++ b/src/parser/scanner.lex @@ -0,0 +1,222 @@ +%option c++ +%option yyclass="VGraphScanner" +%option nodefault noyywrap +%option never-interactive +%option yylineno + +%{ +#include "GQLParser.h" +#include "VGraphScanner.h" +#include "VGraphParser.hpp" + +#define YY_USER_ACTION yylloc->columns(yyleng); + +using TokenType = vesoft::VGraphParser::token; + +static constexpr size_t MAX_STRING = 4096; + + +%} + +%x STR + +GO ([Gg][Oo]) +AS ([Aa][Ss]) +TO ([Tt][Oo]) +OR ([Oo][Rr]) +USE ([Uu][Ss][Ee]) +SET ([Ss][Ee][Tt]) +FROM ([Ff][Rr][Oo][Mm]) +WHERE ([Ww][Hh][Ee][Rr][Ee]) +MATCH ([Mm][Aa][Tt][Cc][Hh]) +INSERT ([Ii][Nn][Ss][Ee][Rr][Tt]) +VALUES ([Vv][Aa][Ll][Uu][Ee][Ss]) +RETURN ([Rr][Ee][Tt][Uu][Rr][Nn]) +DEFINE ([Dd][Ee][Ff][Ii][Nn][Ee]) +VERTEX ([Vv][Ee][Rr][Tt][Ee][Xx]) +EDGE ([Ee][Dd][Gg][Ee]) +UPDATE ([Uu][Pp][Dd][Aa][Tt][Ee]) +ALTER ([Aa][Ll][Tt][Ee][Rr]) +STEPS ([Ss][Tt][Ee][Pp][Ss]) +OVER ([Oo][Vv][Ee][Rr]) +UPTO ([Uu][Pp][Tt][Oo]) +REVERSELY ([Rr][Ee][Vv][Ee][Rr][Ss][Ee][Ll][Yy]) +NAMESPACE ([Nn][Aa][Mm][Ee][Ss][Pp][Aa][Cc][Ee]) +TTL ([Tt][Tt][Ll]) +INT ([Ii][Nn][Tt]) +INT8 ({INT}8) +INT16 ({INT}16) +INT32 ({INT}32) +INT64 ({INT}64) +UINT ([Uu][Ii][Nn][Tt]) +UINT8 ({UINT}8) +UINT16 ({UINT}16) +UINT32 ({UINT}32) +UINT64 ({UINT}64) +BIGINT ([Bb][Ii][Gg][Ii][Nn][Tt]) +DOUBLE ([Dd][Oo][Uu][Bb][Ll][Ee]) +STRING ([Ss][Tt][Rr][Ii][Nn][Gg]) +BOOL ([Bb][Oo][Oo][Ll]) +TAG ([Tt][Aa][Gg]) +UNION ([Uu][Nn][Ii][Oo][Nn]) +INTERSECT ([Ii][Nn][Tt][Ee][Rr][Ss][Ee][Cc][Tt]) +MINUS ([Mm][Ii][Nn][Uu][Ss]) +NO ([Nn][Oo]) +OVERWRITE ([Oo][Vv][Ee][Rr][Ww][Rr][Ii][Tt][Ee]) +TRUE ([Tt][Rr][Uu][Ee]) +FALSE ([Ff][Aa][Ll][Ss][Ee]) + +ID ([_a-zA-Z][_a-zA-Z0-9]*) +DEC ([0-9]) +HEX ([0-9a-fA-F]) +OCT ([0-7]) + + +%% + + thread_local static char sbuf[MAX_STRING]; + size_t pos = 0; + +{GO} { return TokenType::KW_GO; } +{AS} { return TokenType::KW_AS; } +{TO} { return TokenType::KW_TO; } +{OR} { return TokenType::KW_OR; } +{USE} { return TokenType::KW_USE; } +{SET} { return TokenType::KW_SET; } +{FROM} { return TokenType::KW_FROM; } +{WHERE} { return TokenType::KW_WHERE; } +{MATCH} { return TokenType::KW_MATCH; } +{INSERT} { return TokenType::KW_INSERT; } +{VALUES} { return TokenType::KW_VALUES; } +{RETURN} { return TokenType::KW_RETURN; } +{DEFINE} { return TokenType::KW_DEFINE; } +{VERTEX} { return TokenType::KW_VERTEX; } +{EDGE} { return TokenType::KW_EDGE; } +{UPDATE} { return TokenType::KW_UPDATE; } +{ALTER} { return TokenType::KW_ALTER; } +{STEPS} { return TokenType::KW_STEPS; } +{OVER} { return TokenType::KW_OVER; } +{UPTO} { return TokenType::KW_UPTO; } +{REVERSELY} { return TokenType::KW_REVERSELY; } +{NAMESPACE} { return TokenType::KW_NAMESPACE; } +{TTL} { return TokenType::KW_TTL; } +{INT8} { return TokenType::KW_INT8; } +{INT16} { return TokenType::KW_INT16; } +{INT32} { return TokenType::KW_INT32; } +{INT64} { return TokenType::KW_INT64; } +{UINT8} { return TokenType::KW_UINT8; } +{UINT16} { return TokenType::KW_UINT16; } +{UINT32} { return TokenType::KW_UINT32; } +{UINT64} { return TokenType::KW_UINT64; } +{BIGINT} { return TokenType::KW_BIGINT; } +{DOUBLE} { return TokenType::KW_DOUBLE; } +{STRING} { return TokenType::KW_STRING; } +{BOOL} { return TokenType::KW_BOOL; } +{TAG} { return TokenType::KW_TAG; } +{UNION} { return TokenType::KW_UNION; } +{INTERSECT} { return TokenType::KW_INTERSECT; } +{MINUS} { return TokenType::KW_MINUS; } +{NO} { return TokenType::KW_NO; } +{OVERWRITE} { return TokenType::KW_OVERWRITE; } +{TRUE} { yylval->boolval = true; return TokenType::BOOL; } +{FALSE} { yylval->boolval = false; return TokenType::BOOL; } + +"." { return TokenType::DOT; } +"," { return TokenType::COMMA; } +":" { return TokenType::COLON; } +";" { return TokenType::SEMICOLON; } +"@" { return TokenType::AT; } + +"+" { return TokenType::ADD; } +"-" { return TokenType::SUB; } +"*" { return TokenType::MUL; } +"/" { return TokenType::DIV; } +"%" { return TokenType::MOD; } + +"<" { return TokenType::LT; } +"<=" { return TokenType::LE; } +">" { return TokenType::GT; } +">=" { return TokenType::GE; } +"==" { return TokenType::EQ; } +"!=" { return TokenType::NE; } + +"||" { return TokenType::OR; } +"&&" { return TokenType::AND; } +"|" { return TokenType::PIPE; } + +"=" { return TokenType::ASSIGN; } + +"(" { return TokenType::L_PAREN; } +")" { return TokenType::R_PAREN; } +"[" { return TokenType::L_BRACKET; } +"]" { return TokenType::R_BRACKET; } +"{" { return TokenType::L_BRACE; } +"}" { return TokenType::R_BRACE; } + +"<-" { return TokenType::L_ARROW; } +"->" { return TokenType::R_ARROW; } + +{ID} { + // TODO(dutor) Whether to forbid the ID format that simply consists of `_' + yylval->strval = new std::string(yytext, yyleng); + return TokenType::SYMBOL; + } + +{DEC}+ { yylval->intval = ::atoll(yytext); return TokenType::INTEGER; } +0[Xx]{HEX}+ { + int64_t val = 0; + sscanf(yytext, "%lx", &val); + yylval->intval = val; + return TokenType::INTEGER; + } +0{OCT}+ { + int64_t val = 0; + sscanf(yytext, "%lo", &val); + yylval->intval = val; + return TokenType::INTEGER; + } +{DEC}+[Uu][Ll]? { yylval->intval = ::atoll(yytext); return TokenType::UINTEGER; } +{DEC}+\.{DEC}* { yylval->doubleval = ::atof(yytext); return TokenType::DOUBLE; } +{DEC}*\.{DEC}+ { yylval->doubleval = ::atof(yytext); return TokenType::DOUBLE; } + +\${DEC}+ { yylval->intval = ::atoll(yytext + 1); return TokenType::COL_REF_ID; } +\${ID} { yylval->strval = new std::string(yytext + 1, yyleng - 1); return TokenType::VARIABLE; } + + +\" { BEGIN(STR); pos = 0; } +<STR>\" { + yylval->strval = new std::string(sbuf, pos); + BEGIN(INITIAL); + return TokenType::STRING; + } +<STR>\n { yyterminate(); } +<STR>[^\\\n\"]+ { + ::strncpy(sbuf + pos, yytext, yyleng); + pos += yyleng; + } +<STR>\\{OCT}{1,3} { + int val = 0; + sscanf(yytext + 1, "%o", &val); + if (val > 0xFF) { + yyterminate(); + } + sbuf[pos] = val; + pos++; + } +<STR>\\{DEC}+ { yyterminate(); } +<STR>\\n { sbuf[pos] = '\n'; pos++; } +<STR>\\t { sbuf[pos] = '\t'; pos++; } +<STR>\\r { sbuf[pos] = '\r'; pos++; } +<STR>\\b { sbuf[pos] = '\b'; pos++; } +<STR>\\f { sbuf[pos] = '\f'; pos++; } +<STR>\\(.|\n) { sbuf[pos] = yytext[1]; pos++; } + +[ \r\t] { yylloc->step(); } +\n { + yylineno++; + yylloc->lines(yyleng); + yylloc->step(); + } +. { printf("error %c\n", *yytext); yyterminate(); } + +%% diff --git a/src/parser/test/CMakeLists.txt b/src/parser/test/CMakeLists.txt new file mode 100644 index 00000000..829712ef --- /dev/null +++ b/src/parser/test/CMakeLists.txt @@ -0,0 +1,9 @@ +add_executable(parser_test ParserTest.cpp $<TARGET_OBJECTS:parser_obj>) +target_link_libraries(parser_test gtest gtest_main pthread) +target_include_directories(parser_test SYSTEM BEFORE PUBLIC ${FLEX_INCLUDE_DIRS}) +add_test(NAME parser_test COMMAND parser_test) + +add_executable(scanner_test ScannerTest.cpp $<TARGET_OBJECTS:parser_obj>) +target_link_libraries(scanner_test gtest gtest_main pthread) +target_include_directories(scanner_test SYSTEM BEFORE PUBLIC ${FLEX_INCLUDE_DIRS}) +add_test(NAME scanner_test COMMAND scanner_test) diff --git a/src/parser/test/ParserTest.cpp b/src/parser/test/ParserTest.cpp new file mode 100644 index 00000000..d28520bd --- /dev/null +++ b/src/parser/test/ParserTest.cpp @@ -0,0 +1,227 @@ +/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved + * + * This source code is licensed under Apache 2.0 License + * (found in the LICENSE.Apache file in the root directory) + */ +#include <gtest/gtest.h> +#include "parser/GQLParser.h" + +// TODO(dutor) Inspect the internal structures to check on the syntax and semantics + +namespace vesoft { + +TEST(Parser, Go) { + { + GQLParser parser; + std::string query = "GO FROM 1 AS person"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "GO FROM 1 AS person;"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "GO 2 STEPS FROM 1 AS person"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "GO UPTO 2 STEPS FROM 1 AS person"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "GO FROM 1 AS person OVER friend"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "GO FROM 1 AS person OVER friend REVERSELY"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "GO FROM 1 AS person RETURN person.name"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "GO FROM 1 AS person RETURN person[manager].name,person.age"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "GO FROM 1,2,3 AS person"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "GO FROM [$1,$2,$5] AS person"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "GO FROM 1,2,3 AS person WHERE person.name == \"dutor\""; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } +} + +TEST(Parser, UseNamespace) { + { + GQLParser parser; + std::string query = "USE NAMESPACE ns"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } +} + +TEST(Parser, DefineTag) { + { + GQLParser parser; + std::string query = "DEFINE TAG person(name string, age uint8 TTL = 100, " + "married bool, salary double)"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } +} + +TEST(Parser, AlterTag) { + { + GQLParser parser; + std::string query = "ALTER TAG person(age uint8 TTL = 200)"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } +} + +TEST(Parser, Set) { + { + GQLParser parser; + std::string query = "GO FROM 1 AS person INTERSECT GO FROM 2 AS person"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "GO FROM 1 AS person UNION GO FROM 2 AS person"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "GO FROM 1 AS person MINUS GO FROM 2 AS person"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "GO FROM 1 AS person MINUS GO FROM 2 AS person " + "UNION GO FROM 3 AS person"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } +} + +TEST(Parser, Pipe) { + { + GQLParser parser; + std::string query = "GO FROM 1 AS person | GO FROM 2 AS person | GO FROM 3 AS person"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "GO FROM 1 AS person MINUS GO FROM 2 AS person | GO FROM 3 AS person"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } +} + +TEST(Parser, InsertVertex) { + { + GQLParser parser; + std::string query = "INSERT VERTEX person(name,age,married,salary) " + "VALUES(12345: \"dutor\", 30, true, 3.14)"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "INSERT TAG person(name,age,married,salary) " + "VALUES(12345: \"dutor\", 30, true, 3.14)"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } +} + +TEST(Parser, UpdateVertex) { + { + GQLParser parser; + std::string query = "UPDATE VERTEX 12345 SET name=\"dutor\",age=30,married=true"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "UPDATE VERTEX 12345 SET name=\"dutor\",age=31,married=true " + "WHERE salary > 10000"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "UPDATE VERTEX 12345 SET name=\"dutor\",age=30,married=true " + "RETURN name,salary"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "UPDATE OR INSERT VERTEX 12345 SET name=\"dutor\",age=30,married=true " + "RETURN name,salary"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } +} + +TEST(Parser, InsertEdge) { + { + GQLParser parser; + std::string query = "INSERT EDGE transfer(amount, time) " + "VALUES(12345 -> 54321: 3.75, 1537408527)"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "INSERT EDGE NO OVERWRITE transfer(amount, time) " + "VALUES(12345 -> 54321: 3.75, 1537408527)"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "INSERT EDGE transfer(amount, time) " + "VALUES(12345 -> 54321 @1537408527: 3.75, 1537408527)"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "INSERT EDGE NO OVERWRITE transfer(amount, time) " + "VALUES(12345 -> 54321 @1537408527: 3.75, 1537408527)"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } +} + +TEST(Parser, UpdateEdge) { + { + GQLParser parser; + std::string query = "UPDATE EDGE 12345 -> 54321 SET amount=3.14,time=1537408527"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "UPDATE EDGE 12345 -> 54321 SET amount=3.14,time=1537408527 " + "WHERE amount > 3.14"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "UPDATE EDGE 12345 -> 54321 SET amount=3.14,time=1537408527 " + "WHERE amount > 3.14 RETURN amount,time"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } + { + GQLParser parser; + std::string query = "UPDATE OR INSERT EDGE 12345 -> 54321 SET amount=3.14,time=1537408527 " + "WHERE amount > 3.14 RETURN amount,time"; + ASSERT_TRUE(parser.parse(query)) << parser.error(); + } +} + +} // namespace vesoft diff --git a/src/parser/test/ScannerTest.cpp b/src/parser/test/ScannerTest.cpp new file mode 100644 index 00000000..0d0026e1 --- /dev/null +++ b/src/parser/test/ScannerTest.cpp @@ -0,0 +1,189 @@ +/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved + * + * This source code is licensed under Apache 2.0 License + * (found in the LICENSE.Apache file in the root directory) + */ +#include <gtest/gtest.h> +#include <sstream> +#include <vector> +#include <utility> +#include "parser/VGraphParser.hpp" +#include "parser/VGraphScanner.h" + +// TODO(dutor) Check on the sematic value of tokens + +namespace vesoft { + +TEST(Scanner, Basic) { + using TokenType = vesoft::VGraphParser::token::yytokentype; + VGraphScanner scanner; + vesoft::VGraphParser::semantic_type yylval; + vesoft::VGraphParser::location_type yyloc; + std::vector<std::pair<std::string, TokenType>> token_mappings = { + {" . ", TokenType::DOT}, + {" , ", TokenType::COMMA}, + {" : ", TokenType::COLON}, + {" ; ", TokenType::SEMICOLON}, + {" + ", TokenType::ADD}, + {" - ", TokenType::SUB}, + {" * ", TokenType::MUL}, + {" / ", TokenType::DIV}, + {" % ", TokenType::MOD}, + {" @ ", TokenType::AT}, + + {" < ", TokenType::LT}, + {" <= ", TokenType::LE}, + {" > ", TokenType::GT}, + {" >= ", TokenType::GE}, + {" == ", TokenType::EQ}, + {" != ", TokenType::NE}, + + {" || ", TokenType::OR}, + {" && ", TokenType::AND}, + {" | ", TokenType::PIPE}, + + {" = ", TokenType::ASSIGN}, + + {" ( ", TokenType::L_PAREN}, + {" ) ", TokenType::R_PAREN}, + {" [ ", TokenType::L_BRACKET}, + {" ] ", TokenType::R_BRACKET}, + {" { ", TokenType::L_BRACE}, + {" } ", TokenType::R_BRACE}, + + {" <- ", TokenType::L_ARROW}, + {" -> ", TokenType::R_ARROW}, + + {" GO ", TokenType::KW_GO}, + {" go ", TokenType::KW_GO}, + {" AS ", TokenType::KW_AS}, + {" as ", TokenType::KW_AS}, + {" TO ", TokenType::KW_TO}, + {" to ", TokenType::KW_TO}, + {" OR ", TokenType::KW_OR}, + {" or ", TokenType::KW_OR}, + {" USE ", TokenType::KW_USE}, + {" use ", TokenType::KW_USE}, + {" SET ", TokenType::KW_SET}, + {" set ", TokenType::KW_SET}, + {" FROM ", TokenType::KW_FROM}, + {" from ", TokenType::KW_FROM}, + {" WHERE ", TokenType::KW_WHERE}, + {" where ", TokenType::KW_WHERE}, + {" MATCH ", TokenType::KW_MATCH}, + {" match ", TokenType::KW_MATCH}, + {" INSERT ", TokenType::KW_INSERT}, + {" insert ", TokenType::KW_INSERT}, + {" VALUES ", TokenType::KW_VALUES}, + {" values ", TokenType::KW_VALUES}, + {" RETURN ", TokenType::KW_RETURN}, + {" return ", TokenType::KW_RETURN}, + {" DEFINE ", TokenType::KW_DEFINE}, + {" define ", TokenType::KW_DEFINE}, + {" VERTEX ", TokenType::KW_VERTEX}, + {" vertex ", TokenType::KW_VERTEX}, + {" EDGE ", TokenType::KW_EDGE}, + {" edge ", TokenType::KW_EDGE}, + {" UPDATE ", TokenType::KW_UPDATE}, + {" update ", TokenType::KW_UPDATE}, + {" ALTER ", TokenType::KW_ALTER}, + {" alter ", TokenType::KW_ALTER}, + {" STEPS ", TokenType::KW_STEPS}, + {" steps ", TokenType::KW_STEPS}, + {" OVER ", TokenType::KW_OVER}, + {" over ", TokenType::KW_OVER}, + {" UPTO ", TokenType::KW_UPTO}, + {" upto ", TokenType::KW_UPTO}, + {" REVERSELY ", TokenType::KW_REVERSELY}, + {" reversely ", TokenType::KW_REVERSELY}, + {" NAMESPACE ", TokenType::KW_NAMESPACE}, + {" namespace ", TokenType::KW_NAMESPACE}, + {" TTL ", TokenType::KW_TTL}, + {" ttl ", TokenType::KW_TTL}, + {" INT8 ", TokenType::KW_INT8}, + {" int8 ", TokenType::KW_INT8}, + {" INT16 ", TokenType::KW_INT16}, + {" int16 ", TokenType::KW_INT16}, + {" INT32 ", TokenType::KW_INT32}, + {" int32 ", TokenType::KW_INT32}, + {" INT64 ", TokenType::KW_INT64}, + {" int64 ", TokenType::KW_INT64}, + {" UINT8 ", TokenType::KW_UINT8}, + {" uint8 ", TokenType::KW_UINT8}, + {" UINT16 ", TokenType::KW_UINT16}, + {" uint16 ", TokenType::KW_UINT16}, + {" UINT32 ", TokenType::KW_UINT32}, + {" uint32 ", TokenType::KW_UINT32}, + {" UINT64 ", TokenType::KW_UINT64}, + {" uint64 ", TokenType::KW_UINT64}, + {" BIGINT ", TokenType::KW_BIGINT}, + {" bigint ", TokenType::KW_BIGINT}, + {" DOUBLE ", TokenType::KW_DOUBLE}, + {" double ", TokenType::KW_DOUBLE}, + {" STRING ", TokenType::KW_STRING}, + {" string ", TokenType::KW_STRING}, + {" BOOL ", TokenType::KW_BOOL}, + {" bool ", TokenType::KW_BOOL}, + {" TAG ", TokenType::KW_TAG}, + {" tag ", TokenType::KW_TAG}, + {" UNION ", TokenType::KW_UNION}, + {" union ", TokenType::KW_UNION}, + {" INTERSECT ", TokenType::KW_INTERSECT}, + {" intersect ", TokenType::KW_INTERSECT}, + {" MINUS ", TokenType::KW_MINUS}, + {" minus ", TokenType::KW_MINUS}, + + {" v ", TokenType::SYMBOL}, + {" v1 ", TokenType::SYMBOL}, + {" var ", TokenType::SYMBOL}, + {" _var ", TokenType::SYMBOL}, + {" var123 ", TokenType::SYMBOL}, + {" _var123 ", TokenType::SYMBOL}, + + {" 123 ", TokenType::INTEGER}, + {" 0x123 ", TokenType::INTEGER}, + {" 0Xdeadbeef ", TokenType::INTEGER}, + {" 0123 ", TokenType::INTEGER}, + {" 123u ", TokenType::UINTEGER}, + {" 123UL ", TokenType::UINTEGER}, + + {" .456 ", TokenType::DOUBLE}, + {" 123.", TokenType::DOUBLE}, + {" 123.456 ", TokenType::DOUBLE}, + + {" $1 ", TokenType::COL_REF_ID}, + {" $123 ", TokenType::COL_REF_ID}, + + {" $_ ", TokenType::VARIABLE}, + {" $var ", TokenType::VARIABLE}, + + {"\"Hello\"", TokenType::STRING}, // "Hello" ==> Hello + {"\"He\\nllo\"", TokenType::STRING}, // "He\nllo" ==> He + // llo + {"\"He\\\nllo\"", TokenType::STRING}, // "He\nllo" ==> He + // llo + {"\"Hell\\o\"", TokenType::STRING}, // "Hello" ==> Hello + {"\"Hello\\\\\"", TokenType::STRING}, // "Hello\\" ==> Hello\ // + {"\"\\110ello\"", TokenType::STRING}, // "Hello" ==> Hello + {"\"\\\"Hello\\\"\"", TokenType::STRING}, // "\"Hello\"" ==> "Hello" + }; + + std::string token_stream; + for (auto &pair : token_mappings) { + token_stream += pair.first; + } + + std::istringstream is(token_stream); + + scanner.switch_streams(&is, nullptr); + + for (auto &pair : token_mappings) { + auto &token = pair.first; + auto expected_type = static_cast<int>(pair.second); + auto actual_type = scanner.yylex(&yylval, &yyloc); + + ASSERT_EQ(expected_type, actual_type) << "Lex error for `" << token <<"'"; + } +} + +} // namespace vesoft -- GitLab