Skip to content
Snippets Groups Projects
Commit dae2da96 authored by dutor's avatar dutor Committed by GitHub
Browse files

First Implementation of GQL parser (#11)

* First Implementation of GQL parser

* Address @sherman-the-tank 's comments

* Implement syntax of UPDATE/INSERT

In addition, added support for hexadecimal and octal numbers.

* Reserve space on the buffer in `toString`
parent 534437fd
No related branches found
No related tags found
No related merge requests found
......@@ -109,6 +109,7 @@ add_subdirectory(server)
add_subdirectory(console)
add_subdirectory(raftex)
#add_subdirectory(storage)
add_subdirectory(parser)
add_dependencies(common third-party)
#add_dependencies(storage_engines common)
......
VGraphParser.cpp
VGraphParser.hpp
VGraphScanner.cpp
location.hh
position.hh
stack.hh
/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved
*
* This source code is licensed under Apache 2.0 License
* (found in the LICENSE.Apache file in the root directory)
*/
#include "parser/AstTypes.h"
namespace vesoft {
}
This diff is collapsed.
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
find_package(BISON)
find_package(FLEX)
bison_target(Parser parser.yy ${CMAKE_CURRENT_BINARY_DIR}/VGraphParser.cpp COMPILE_FLAGS "-Werror")
flex_target(Scanner scanner.lex ${CMAKE_CURRENT_BINARY_DIR}/VGraphScanner.cpp)
add_flex_bison_dependency(Scanner Parser)
add_library(parser_obj OBJECT ${FLEX_Scanner_OUTPUTS} ${BISON_Parser_OUTPUTS})
target_include_directories(parser_obj SYSTEM BEFORE PUBLIC ${FLEX_INCLUDE_DIRS})
add_dependencies(parser_obj base_obj)
add_subdirectory(test)
/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved
*
* This source code is licensed under Apache 2.0 License
* (found in the LICENSE.Apache file in the root directory)
*/
#include "GQLParser.h"
namespace vesoft {
} // namespace vesoft
/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved
*
* This source code is licensed under Apache 2.0 License
* (found in the LICENSE.Apache file in the root directory)
*/
#ifndef PARSER_GQLPARSER_H_
#define PARSER_GQLPARSER_H_
#include <sstream>
#include <regex>
#include "VGraphParser.hpp"
#include "VGraphScanner.h"
namespace vesoft {
class GQLParser {
public:
GQLParser() : parser_(scanner_, error_, &statement_) {
}
bool parse(const std::string &query) {
std::istringstream is(query);
scanner_.switch_streams(&is, nullptr);
return parser_.parse() == 0;
}
auto statement() {
std::unique_ptr<Statement> statement(statement_);
statement_ = nullptr;
return statement;
}
const std::string& error() const {
return error_;
}
private:
vesoft::VGraphScanner scanner_;
vesoft::VGraphParser parser_;
std::string error_;
Statement *statement_ = nullptr;
};
} // namespace vesoft
#endif // PARSER_GQLPARSER_H_
/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved
*
* This source code is licensed under Apache 2.0 License
* (found in the LICENSE.Apache file in the root directory)
*/
#ifndef PARSER_VGRAPHSCANNER_H_
#define PARSER_VGRAPHSCANNER_H_
// Only include FlexLexer.h if it hasn't been already included
#if !defined(yyFlexLexerOnce)
#include <FlexLexer.h>
#endif
// Override the interface for yylex since we namespaced it
#undef YY_DECL
#define YY_DECL int vesoft::VGraphScanner::yylex()
#include "VGraphParser.hpp"
namespace vesoft {
class VGraphScanner : public yyFlexLexer {
public:
int yylex(vesoft::VGraphParser::semantic_type * lval,
vesoft::VGraphParser::location_type *loc) {
yylval = lval;
yylloc = loc;
return yylex();
}
private:
friend class Scanner_Basic_Test;
int yylex();
vesoft::VGraphParser::semantic_type * yylval{nullptr};
vesoft::VGraphParser::location_type * yylloc{nullptr};
};
} // namespace vesoft
#endif // PARSER_VGRAPHSCANNER_H_
%language "C++"
%skeleton "lalr1.cc"
%no-lines
%locations
%define api.namespace { vesoft }
%define parser_class_name { VGraphParser }
%lex-param { vesoft::VGraphScanner& scanner }
%parse-param { vesoft::VGraphScanner& scanner }
%parse-param { std::string &errmsg }
%parse-param { vesoft::Statement** statement }
%code requires {
#include <iostream>
#include <sstream>
#include <string>
#include "AstTypes.h"
namespace vesoft {
class VGraphScanner;
}
}
%code {
static int yylex(vesoft::VGraphParser::semantic_type* yylval,
vesoft::VGraphParser::location_type *yylloc,
vesoft::VGraphScanner& scanner);
}
%union {
bool boolval;
uint64_t intval;
double doubleval;
std::string *strval;
vesoft::Expression *expr;
vesoft::Sentence *sentence;
vesoft::Statement *statement;
vesoft::ColumnSpecification *colspec;
vesoft::ColumnSpecificationList *colspeclist;
vesoft::ColumnType type;
vesoft::StepClause *step_clause;
vesoft::FromClause *from_clause;
vesoft::SourceNodeList *src_node_list;
vesoft::OverClause *over_clause;
vesoft::WhereClause *where_clause;
vesoft::ReturnClause *return_clause;
vesoft::ReturnFields *return_fields;
vesoft::PropertyList *prop_list;
vesoft::ValueList *value_list;
vesoft::UpdateList *update_list;
vesoft::UpdateItem *update_item;
}
/* keywords */
%token KW_GO KW_AS KW_TO KW_OR KW_USE KW_SET KW_FROM KW_WHERE KW_ALTER
%token KW_MATCH KW_INSERT KW_VALUES KW_RETURN KW_DEFINE KW_VERTEX KW_TTL
%token KW_EDGE KW_UPDATE KW_STEPS KW_OVER KW_UPTO KW_REVERSELY KW_NAMESPACE
%token KW_INT8 KW_INT16 KW_INT32 KW_INT64 KW_UINT8 KW_UINT16 KW_UINT32 KW_UINT64
%token KW_BIGINT KW_DOUBLE KW_STRING KW_BOOL KW_TAG KW_UNION KW_INTERSECT KW_MINUS
%token KW_NO KW_OVERWRITE KW_IN
/* symbols */
%token L_PAREN R_PAREN L_BRACKET R_BRACKET L_BRACE R_BRACE COMMA
%token PIPE OR AND LT LE GT GE EQ NE ADD SUB MUL DIV MOD NOT NEG ASSIGN
%token DOT COLON SEMICOLON L_ARROW R_ARROW AT
/* token type specification */
%token <boolval> BOOL
%token <intval> INTEGER UINTEGER COL_REF_ID
%token <doubleval> DOUBLE
%token <strval> STRING SYMBOL VARIABLE
%type <expr> expression logic_or_expression logic_and_expression
%type <expr> relational_expression multiplicative_expression additive_expression
%type <expr> unary_expression primary_expression equality_expression
%type <type> type_spec
%type <step_clause> step_clause
%type <from_clause> from_clause
%type <src_node_list> id_list ref_list
%type <over_clause> over_clause
%type <where_clause> where_clause
%type <return_clause> return_clause
%type <return_fields> return_fields
%type <prop_list> prop_list
%type <value_list> value_list
%type <update_list> update_list
%type <update_item> update_item
%type <intval> ttl_spec
%type <colspec> column_spec
%type <colspeclist> column_spec_list
%type <sentence> go_sentence match_sentence use_sentence
%type <sentence> define_tag_sentence define_edge_sentence
%type <sentence> alter_tag_sentence alter_edge_sentence
%type <sentence> traverse_sentence set_sentence piped_sentence assignment_sentence
%type <sentence> maintainance_sentence insert_vertex_sentence insert_edge_sentence
%type <sentence> mutate_sentence update_vertex_sentence update_edge_sentence
%type <statement> statement
%start statement
%%
primary_expression
: INTEGER {
$$ = new PrimaryExpression((int64_t)$1);
}
| UINTEGER {
$$ = new PrimaryExpression((uint64_t)$1);
}
| DOUBLE {
$$ = new PrimaryExpression($1);
}
| STRING {
$$ = new PrimaryExpression(*$1);
}
| BOOL {
$$ = new PrimaryExpression($1);
}
| SYMBOL {
// TODO(dutor) detect semantic type of symbol
$$ = new PropertyExpression($1);
}
| SYMBOL DOT SYMBOL {
$$ = new PropertyExpression($1, $3);
}
| SYMBOL L_BRACKET SYMBOL R_BRACKET DOT SYMBOL {
$$ = new PropertyExpression($1, $6, $3);
}
| L_PAREN expression R_PAREN {
$$ = $2;
}
;
unary_expression
: primary_expression {}
| ADD primary_expression {
$$ = new UnaryExpression(UnaryExpression::PLUS, $2);
}
| SUB primary_expression {
$$ = new UnaryExpression(UnaryExpression::MINUS, $2);
}
| L_PAREN type_spec R_PAREN primary_expression {
$$ = new TypeCastingExpression($2, $4);
}
;
type_spec
: KW_INT8 { $$ = ColumnType::INT8; }
| KW_INT16 { $$ = ColumnType::INT16; }
| KW_INT32 { $$ = ColumnType::INT32; }
| KW_INT64 { $$ = ColumnType::INT64; }
| KW_UINT8 { $$ = ColumnType::UINT8; }
| KW_UINT16 { $$ = ColumnType::UINT16; }
| KW_UINT32 { $$ = ColumnType::UINT32; }
| KW_UINT64 { $$ = ColumnType::UINT64; }
| KW_DOUBLE { $$ = ColumnType::DOUBLE; }
| KW_STRING { $$ = ColumnType::STRING; }
| KW_BOOL { $$ = ColumnType::BOOL; }
| KW_BIGINT { $$ = ColumnType::BIGINT; }
;
multiplicative_expression
: unary_expression {}
| multiplicative_expression MUL unary_expression {
$$ = new ArithmeticExpression($1, ArithmeticExpression::MUL, $3);
}
| multiplicative_expression DIV unary_expression {
$$ = new ArithmeticExpression($1, ArithmeticExpression::DIV, $3);
}
| multiplicative_expression MOD unary_expression {
$$ = new ArithmeticExpression($1, ArithmeticExpression::MOD, $3);
}
;
additive_expression
: multiplicative_expression {}
| additive_expression ADD multiplicative_expression {
$$ = new ArithmeticExpression($1, ArithmeticExpression::ADD, $3);
}
| additive_expression SUB multiplicative_expression {
$$ = new ArithmeticExpression($1, ArithmeticExpression::SUB, $3);
}
;
relational_expression
: additive_expression {}
| relational_expression LT additive_expression {
$$ = new RelationalExpression($1, RelationalExpression::LT, $3);
}
| relational_expression GT additive_expression {
$$ = new RelationalExpression($1, RelationalExpression::GT, $3);
}
| relational_expression LE additive_expression {
$$ = new RelationalExpression($1, RelationalExpression::LE, $3);
}
| relational_expression GE additive_expression {
$$ = new RelationalExpression($1, RelationalExpression::GE, $3);
}
;
equality_expression
: relational_expression {}
| equality_expression EQ relational_expression {
$$ = new RelationalExpression($1, RelationalExpression::EQ, $3);
}
| equality_expression NE relational_expression {
$$ = new RelationalExpression($1, RelationalExpression::NE, $3);
}
;
logic_and_expression
: equality_expression {}
| logic_and_expression AND equality_expression {
$$ = new LogicalExpression($1, LogicalExpression::AND, $3);
}
;
logic_or_expression
: logic_and_expression {}
| logic_or_expression OR logic_and_expression {
$$ = new LogicalExpression($1, LogicalExpression::OR, $3);
}
;
expression
: logic_or_expression { }
;
go_sentence
: KW_GO step_clause from_clause over_clause where_clause return_clause {
//fprintf(stderr, "primary: %s\n", $5->toString().c_str());
//fprintf(stderr, "result: ");
//Expression::print($5->eval());
auto go = new GoSentence();
go->setStepClause($2);
go->setFromClause($3);
go->setOverClause($4);
go->setWhereClause($5);
go->setReturnClause($6);
$$ = go;
}
;
step_clause
: %empty { $$ = new StepClause(); }
| INTEGER KW_STEPS { $$ = new StepClause($1); }
| KW_UPTO INTEGER KW_STEPS { $$ = new StepClause($2, true); }
;
from_clause
: KW_FROM id_list KW_AS SYMBOL {
auto from = new FromClause($2, $4);
$$ = from;
}
| KW_FROM L_BRACKET ref_list R_BRACKET KW_AS SYMBOL {
auto from = new FromClause($3, $6, true/* is ref id*/);
$$ = from;
}
;
id_list
: INTEGER {
auto list = new SourceNodeList();
list->addNodeId($1);
$$ = list;
}
| id_list COMMA INTEGER {
$$ = $1;
$$->addNodeId($3);
}
;
ref_list
: COL_REF_ID {
auto list = new SourceNodeList();
list->addNodeId($1);
$$ = list;
}
| ref_list COMMA COL_REF_ID {
$$ = $1;
$$->addNodeId($3);
}
| ref_list COMMA {
$$ = $1;
}
;
over_clause
: %empty { $$ = nullptr; }
| KW_OVER SYMBOL {
$$ = new OverClause($2);
}
| KW_OVER SYMBOL KW_REVERSELY { $$ = new OverClause($2, true); }
;
where_clause
: %empty { $$ = nullptr; }
| KW_WHERE expression { $$ = new WhereClause($2); }
;
return_clause
: %empty { $$ = nullptr; }
| KW_RETURN return_fields { $$ = new ReturnClause($2); }
;
return_fields
: expression {
auto fields = new ReturnFields();
fields->addColumn($1);
$$ = fields;
}
| return_fields COMMA expression {
$1->addColumn($3);
$$ = $1;
}
;
match_sentence
: KW_MATCH { $$ = new MatchSentence; }
;
use_sentence
: KW_USE KW_NAMESPACE SYMBOL { $$ = new UseSentence($3); }
;
define_tag_sentence
: KW_DEFINE KW_TAG SYMBOL L_PAREN column_spec_list R_PAREN {
$$ = new DefineTagSentence($3, $5);
}
| KW_DEFINE KW_TAG SYMBOL L_PAREN column_spec_list COMMA R_PAREN {
$$ = new DefineTagSentence($3, $5);
}
;
alter_tag_sentence
: KW_ALTER KW_TAG SYMBOL L_PAREN column_spec_list R_PAREN {
$$ = new AlterTagSentence($3, $5);
}
| KW_ALTER KW_TAG SYMBOL L_PAREN column_spec_list COMMA R_PAREN {
$$ = new AlterTagSentence($3, $5);
}
;
define_edge_sentence
: KW_DEFINE KW_EDGE SYMBOL L_PAREN column_spec_list R_PAREN {
$$ = new DefineEdgeSentence($3, $5);
}
| KW_DEFINE KW_EDGE SYMBOL L_PAREN column_spec_list COMMA R_PAREN {
$$ = new DefineEdgeSentence($3, $5);
}
;
alter_edge_sentence
: KW_ALTER KW_EDGE SYMBOL L_PAREN column_spec_list R_PAREN {
$$ = new AlterEdgeSentence($3, $5);
}
| KW_ALTER KW_EDGE SYMBOL L_PAREN column_spec_list COMMA R_PAREN {
$$ = new AlterEdgeSentence($3, $5);
}
;
column_spec_list
: column_spec {
$$ = new ColumnSpecificationList();
$$->addColumn($1);
}
| column_spec_list COMMA column_spec {
$$ = $1;
$$->addColumn($3);
}
;
column_spec
: SYMBOL type_spec { $$ = new ColumnSpecification($2, $1); }
| SYMBOL type_spec ttl_spec { $$ = new ColumnSpecification($2, $1, $3); }
;
ttl_spec
: KW_TTL ASSIGN INTEGER { $$ = $3; }
;
traverse_sentence
: go_sentence {}
| match_sentence {}
;
set_sentence
: traverse_sentence {}
| set_sentence KW_UNION traverse_sentence { $$ = new SetSentence($1, SetSentence::UNION, $3); }
| set_sentence KW_INTERSECT traverse_sentence { $$ = new SetSentence($1, SetSentence::INTERSECT, $3); }
| set_sentence KW_MINUS traverse_sentence { $$ = new SetSentence($1, SetSentence::MINUS, $3); }
| L_PAREN piped_sentence R_PAREN { $$ = $2; }
;
piped_sentence
: set_sentence {}
| piped_sentence PIPE set_sentence { $$ = new PipedSentence($1, $3); }
;
assignment_sentence
: VARIABLE ASSIGN piped_sentence {
$$ = new AssignmentSentence($1, $3);
}
;
insert_vertex_sentence
: KW_INSERT KW_VERTEX SYMBOL L_PAREN prop_list R_PAREN KW_VALUES L_PAREN INTEGER COLON value_list R_PAREN {
$$ = new InsertVertexSentence($9, $3, $5, $11);
}
| KW_INSERT KW_TAG SYMBOL L_PAREN prop_list R_PAREN KW_VALUES L_PAREN INTEGER COLON value_list R_PAREN {
$$ = new InsertVertexSentence($9, $3, $5, $11);
}
;
prop_list
: SYMBOL {
$$ = new PropertyList();
$$->addProp($1);
}
| prop_list COMMA SYMBOL {
$$ = $1;
$$->addProp($3);
}
| prop_list COMMA {
$$ = $1;
}
;
value_list
: expression {
$$ = new ValueList();
$$->addValue($1);
}
| value_list COMMA expression {
$$ = $1;
$$->addValue($3);
}
| value_list COMMA {
$$ = $1;
}
;
insert_edge_sentence
: KW_INSERT KW_EDGE SYMBOL L_PAREN prop_list R_PAREN KW_VALUES L_PAREN
INTEGER R_ARROW INTEGER COLON value_list R_PAREN {
auto sentence = new InsertEdgeSentence();
sentence->setEdge($3);
sentence->setProps($5);
sentence->setSrcId($9);
sentence->setDstId($11);
sentence->setValues($13);
$$ = sentence;
}
| KW_INSERT KW_EDGE KW_NO KW_OVERWRITE SYMBOL L_PAREN prop_list R_PAREN
KW_VALUES L_PAREN INTEGER R_ARROW INTEGER COLON value_list R_PAREN {
auto sentence = new InsertEdgeSentence();
sentence->setOverwrite(false);
sentence->setEdge($5);
sentence->setProps($7);
sentence->setSrcId($11);
sentence->setDstId($13);
sentence->setValues($15);
$$ = sentence;
}
| KW_INSERT KW_EDGE SYMBOL L_PAREN prop_list R_PAREN KW_VALUES L_PAREN
INTEGER R_ARROW INTEGER AT INTEGER COLON value_list R_PAREN {
auto sentence = new InsertEdgeSentence();
sentence->setEdge($3);
sentence->setProps($5);
sentence->setSrcId($9);
sentence->setDstId($11);
sentence->setRank($13);
sentence->setValues($15);
$$ = sentence;
}
| KW_INSERT KW_EDGE KW_NO KW_OVERWRITE SYMBOL L_PAREN prop_list R_PAREN KW_VALUES L_PAREN
INTEGER R_ARROW INTEGER AT INTEGER COLON value_list R_PAREN {
auto sentence = new InsertEdgeSentence();
sentence->setOverwrite(false);
sentence->setEdge($5);
sentence->setProps($7);
sentence->setSrcId($11);
sentence->setDstId($13);
sentence->setRank($15);
sentence->setValues($17);
$$ = sentence;
}
;
update_vertex_sentence
: KW_UPDATE KW_VERTEX INTEGER KW_SET update_list where_clause return_clause {
auto sentence = new UpdateVertexSentence();
sentence->setVid($3);
sentence->setUpdateList($5);
sentence->setWhereClause($6);
sentence->setReturnClause($7);
$$ = sentence;
}
| KW_UPDATE KW_OR KW_INSERT KW_VERTEX INTEGER KW_SET update_list where_clause return_clause {
auto sentence = new UpdateVertexSentence();
sentence->setInsertable(true);
sentence->setVid($5);
sentence->setUpdateList($7);
sentence->setWhereClause($8);
sentence->setReturnClause($9);
$$ = sentence;
}
;
update_list
: update_item {
$$ = new UpdateList();
$$->addItem($1);
}
| update_list COMMA update_item {
$$ = $1;
$$->addItem($3);
}
;
update_item
: SYMBOL ASSIGN expression {
$$ = new UpdateItem($1, $3);
}
;
update_edge_sentence
: KW_UPDATE KW_EDGE INTEGER R_ARROW INTEGER
KW_SET update_list where_clause return_clause {
auto sentence = new UpdateEdgeSentence();
sentence->setSrcId($3);
sentence->setDstId($5);
sentence->setUpdateList($7);
sentence->setWhereClause($8);
sentence->setReturnClause($9);
$$ = sentence;
}
| KW_UPDATE KW_OR KW_INSERT KW_EDGE INTEGER R_ARROW INTEGER
KW_SET update_list where_clause return_clause {
auto sentence = new UpdateEdgeSentence();
sentence->setInsertable(true);
sentence->setSrcId($5);
sentence->setDstId($7);
sentence->setUpdateList($9);
sentence->setWhereClause($10);
sentence->setReturnClause($11);
$$ = sentence;
}
| KW_UPDATE KW_EDGE INTEGER R_ARROW INTEGER AT INTEGER
KW_SET update_list where_clause return_clause {
auto sentence = new UpdateEdgeSentence();
sentence->setSrcId($3);
sentence->setDstId($5);
sentence->setRank($7);
sentence->setUpdateList($9);
sentence->setWhereClause($10);
sentence->setReturnClause($11);
$$ = sentence;
}
| KW_UPDATE KW_OR KW_INSERT KW_EDGE INTEGER R_ARROW INTEGER AT INTEGER KW_SET
update_list where_clause return_clause {
auto sentence = new UpdateEdgeSentence();
sentence->setInsertable(true);
sentence->setSrcId($5);
sentence->setDstId($7);
sentence->setRank($9);
sentence->setUpdateList($11);
sentence->setWhereClause($12);
sentence->setReturnClause($13);
$$ = sentence;
}
;
mutate_sentence
: insert_vertex_sentence {}
| insert_edge_sentence {}
| update_vertex_sentence {}
| update_edge_sentence {}
;
maintainance_sentence
: define_tag_sentence {}
| define_edge_sentence {}
| alter_tag_sentence {}
| alter_edge_sentence {}
;
statement
: maintainance_sentence {
$$ = new Statement($1);
*statement = $$;
}
| use_sentence {
$$ = new Statement($1);
*statement = $$;
}
| piped_sentence {
$$ = new Statement($1);
*statement = $$;
}
| assignment_sentence {
$$ = new Statement($1);
*statement = $$;
}
| mutate_sentence {
$$ = new Statement($1);
*statement = $$;
}
| statement SEMICOLON maintainance_sentence {
$$ = $1;
$1->addSentence($3);
}
| statement SEMICOLON use_sentence {
$$ = $1;
$1->addSentence($3);
}
| statement SEMICOLON piped_sentence {
$$ = $1;
$1->addSentence($3);
}
| statement SEMICOLON assignment_sentence {
$$ = $1;
$1->addSentence($3);
}
| statement SEMICOLON mutate_sentence {
$$ = $1;
$1->addSentence($3);
}
| statement SEMICOLON {
$$ = $1;
}
;
%%
void vesoft::VGraphParser::error(const vesoft::VGraphParser::location_type& loc,
const std::string &msg) {
std::ostringstream os;
os << msg << " at " << loc;
errmsg = os.str();
}
#include "VGraphScanner.h"
static int yylex(vesoft::VGraphParser::semantic_type* yylval,
vesoft::VGraphParser::location_type *yylloc,
vesoft::VGraphScanner& scanner) {
return scanner.yylex(yylval, yylloc);
}
%option c++
%option yyclass="VGraphScanner"
%option nodefault noyywrap
%option never-interactive
%option yylineno
%{
#include "GQLParser.h"
#include "VGraphScanner.h"
#include "VGraphParser.hpp"
#define YY_USER_ACTION yylloc->columns(yyleng);
using TokenType = vesoft::VGraphParser::token;
static constexpr size_t MAX_STRING = 4096;
%}
%x STR
GO ([Gg][Oo])
AS ([Aa][Ss])
TO ([Tt][Oo])
OR ([Oo][Rr])
USE ([Uu][Ss][Ee])
SET ([Ss][Ee][Tt])
FROM ([Ff][Rr][Oo][Mm])
WHERE ([Ww][Hh][Ee][Rr][Ee])
MATCH ([Mm][Aa][Tt][Cc][Hh])
INSERT ([Ii][Nn][Ss][Ee][Rr][Tt])
VALUES ([Vv][Aa][Ll][Uu][Ee][Ss])
RETURN ([Rr][Ee][Tt][Uu][Rr][Nn])
DEFINE ([Dd][Ee][Ff][Ii][Nn][Ee])
VERTEX ([Vv][Ee][Rr][Tt][Ee][Xx])
EDGE ([Ee][Dd][Gg][Ee])
UPDATE ([Uu][Pp][Dd][Aa][Tt][Ee])
ALTER ([Aa][Ll][Tt][Ee][Rr])
STEPS ([Ss][Tt][Ee][Pp][Ss])
OVER ([Oo][Vv][Ee][Rr])
UPTO ([Uu][Pp][Tt][Oo])
REVERSELY ([Rr][Ee][Vv][Ee][Rr][Ss][Ee][Ll][Yy])
NAMESPACE ([Nn][Aa][Mm][Ee][Ss][Pp][Aa][Cc][Ee])
TTL ([Tt][Tt][Ll])
INT ([Ii][Nn][Tt])
INT8 ({INT}8)
INT16 ({INT}16)
INT32 ({INT}32)
INT64 ({INT}64)
UINT ([Uu][Ii][Nn][Tt])
UINT8 ({UINT}8)
UINT16 ({UINT}16)
UINT32 ({UINT}32)
UINT64 ({UINT}64)
BIGINT ([Bb][Ii][Gg][Ii][Nn][Tt])
DOUBLE ([Dd][Oo][Uu][Bb][Ll][Ee])
STRING ([Ss][Tt][Rr][Ii][Nn][Gg])
BOOL ([Bb][Oo][Oo][Ll])
TAG ([Tt][Aa][Gg])
UNION ([Uu][Nn][Ii][Oo][Nn])
INTERSECT ([Ii][Nn][Tt][Ee][Rr][Ss][Ee][Cc][Tt])
MINUS ([Mm][Ii][Nn][Uu][Ss])
NO ([Nn][Oo])
OVERWRITE ([Oo][Vv][Ee][Rr][Ww][Rr][Ii][Tt][Ee])
TRUE ([Tt][Rr][Uu][Ee])
FALSE ([Ff][Aa][Ll][Ss][Ee])
ID ([_a-zA-Z][_a-zA-Z0-9]*)
DEC ([0-9])
HEX ([0-9a-fA-F])
OCT ([0-7])
%%
thread_local static char sbuf[MAX_STRING];
size_t pos = 0;
{GO} { return TokenType::KW_GO; }
{AS} { return TokenType::KW_AS; }
{TO} { return TokenType::KW_TO; }
{OR} { return TokenType::KW_OR; }
{USE} { return TokenType::KW_USE; }
{SET} { return TokenType::KW_SET; }
{FROM} { return TokenType::KW_FROM; }
{WHERE} { return TokenType::KW_WHERE; }
{MATCH} { return TokenType::KW_MATCH; }
{INSERT} { return TokenType::KW_INSERT; }
{VALUES} { return TokenType::KW_VALUES; }
{RETURN} { return TokenType::KW_RETURN; }
{DEFINE} { return TokenType::KW_DEFINE; }
{VERTEX} { return TokenType::KW_VERTEX; }
{EDGE} { return TokenType::KW_EDGE; }
{UPDATE} { return TokenType::KW_UPDATE; }
{ALTER} { return TokenType::KW_ALTER; }
{STEPS} { return TokenType::KW_STEPS; }
{OVER} { return TokenType::KW_OVER; }
{UPTO} { return TokenType::KW_UPTO; }
{REVERSELY} { return TokenType::KW_REVERSELY; }
{NAMESPACE} { return TokenType::KW_NAMESPACE; }
{TTL} { return TokenType::KW_TTL; }
{INT8} { return TokenType::KW_INT8; }
{INT16} { return TokenType::KW_INT16; }
{INT32} { return TokenType::KW_INT32; }
{INT64} { return TokenType::KW_INT64; }
{UINT8} { return TokenType::KW_UINT8; }
{UINT16} { return TokenType::KW_UINT16; }
{UINT32} { return TokenType::KW_UINT32; }
{UINT64} { return TokenType::KW_UINT64; }
{BIGINT} { return TokenType::KW_BIGINT; }
{DOUBLE} { return TokenType::KW_DOUBLE; }
{STRING} { return TokenType::KW_STRING; }
{BOOL} { return TokenType::KW_BOOL; }
{TAG} { return TokenType::KW_TAG; }
{UNION} { return TokenType::KW_UNION; }
{INTERSECT} { return TokenType::KW_INTERSECT; }
{MINUS} { return TokenType::KW_MINUS; }
{NO} { return TokenType::KW_NO; }
{OVERWRITE} { return TokenType::KW_OVERWRITE; }
{TRUE} { yylval->boolval = true; return TokenType::BOOL; }
{FALSE} { yylval->boolval = false; return TokenType::BOOL; }
"." { return TokenType::DOT; }
"," { return TokenType::COMMA; }
":" { return TokenType::COLON; }
";" { return TokenType::SEMICOLON; }
"@" { return TokenType::AT; }
"+" { return TokenType::ADD; }
"-" { return TokenType::SUB; }
"*" { return TokenType::MUL; }
"/" { return TokenType::DIV; }
"%" { return TokenType::MOD; }
"<" { return TokenType::LT; }
"<=" { return TokenType::LE; }
">" { return TokenType::GT; }
">=" { return TokenType::GE; }
"==" { return TokenType::EQ; }
"!=" { return TokenType::NE; }
"||" { return TokenType::OR; }
"&&" { return TokenType::AND; }
"|" { return TokenType::PIPE; }
"=" { return TokenType::ASSIGN; }
"(" { return TokenType::L_PAREN; }
")" { return TokenType::R_PAREN; }
"[" { return TokenType::L_BRACKET; }
"]" { return TokenType::R_BRACKET; }
"{" { return TokenType::L_BRACE; }
"}" { return TokenType::R_BRACE; }
"<-" { return TokenType::L_ARROW; }
"->" { return TokenType::R_ARROW; }
{ID} {
// TODO(dutor) Whether to forbid the ID format that simply consists of `_'
yylval->strval = new std::string(yytext, yyleng);
return TokenType::SYMBOL;
}
{DEC}+ { yylval->intval = ::atoll(yytext); return TokenType::INTEGER; }
0[Xx]{HEX}+ {
int64_t val = 0;
sscanf(yytext, "%lx", &val);
yylval->intval = val;
return TokenType::INTEGER;
}
0{OCT}+ {
int64_t val = 0;
sscanf(yytext, "%lo", &val);
yylval->intval = val;
return TokenType::INTEGER;
}
{DEC}+[Uu][Ll]? { yylval->intval = ::atoll(yytext); return TokenType::UINTEGER; }
{DEC}+\.{DEC}* { yylval->doubleval = ::atof(yytext); return TokenType::DOUBLE; }
{DEC}*\.{DEC}+ { yylval->doubleval = ::atof(yytext); return TokenType::DOUBLE; }
\${DEC}+ { yylval->intval = ::atoll(yytext + 1); return TokenType::COL_REF_ID; }
\${ID} { yylval->strval = new std::string(yytext + 1, yyleng - 1); return TokenType::VARIABLE; }
\" { BEGIN(STR); pos = 0; }
<STR>\" {
yylval->strval = new std::string(sbuf, pos);
BEGIN(INITIAL);
return TokenType::STRING;
}
<STR>\n { yyterminate(); }
<STR>[^\\\n\"]+ {
::strncpy(sbuf + pos, yytext, yyleng);
pos += yyleng;
}
<STR>\\{OCT}{1,3} {
int val = 0;
sscanf(yytext + 1, "%o", &val);
if (val > 0xFF) {
yyterminate();
}
sbuf[pos] = val;
pos++;
}
<STR>\\{DEC}+ { yyterminate(); }
<STR>\\n { sbuf[pos] = '\n'; pos++; }
<STR>\\t { sbuf[pos] = '\t'; pos++; }
<STR>\\r { sbuf[pos] = '\r'; pos++; }
<STR>\\b { sbuf[pos] = '\b'; pos++; }
<STR>\\f { sbuf[pos] = '\f'; pos++; }
<STR>\\(.|\n) { sbuf[pos] = yytext[1]; pos++; }
[ \r\t] { yylloc->step(); }
\n {
yylineno++;
yylloc->lines(yyleng);
yylloc->step();
}
. { printf("error %c\n", *yytext); yyterminate(); }
%%
add_executable(parser_test ParserTest.cpp $<TARGET_OBJECTS:parser_obj>)
target_link_libraries(parser_test gtest gtest_main pthread)
target_include_directories(parser_test SYSTEM BEFORE PUBLIC ${FLEX_INCLUDE_DIRS})
add_test(NAME parser_test COMMAND parser_test)
add_executable(scanner_test ScannerTest.cpp $<TARGET_OBJECTS:parser_obj>)
target_link_libraries(scanner_test gtest gtest_main pthread)
target_include_directories(scanner_test SYSTEM BEFORE PUBLIC ${FLEX_INCLUDE_DIRS})
add_test(NAME scanner_test COMMAND scanner_test)
/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved
*
* This source code is licensed under Apache 2.0 License
* (found in the LICENSE.Apache file in the root directory)
*/
#include <gtest/gtest.h>
#include "parser/GQLParser.h"
// TODO(dutor) Inspect the internal structures to check on the syntax and semantics
namespace vesoft {
TEST(Parser, Go) {
{
GQLParser parser;
std::string query = "GO FROM 1 AS person";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "GO FROM 1 AS person;";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "GO 2 STEPS FROM 1 AS person";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "GO UPTO 2 STEPS FROM 1 AS person";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "GO FROM 1 AS person OVER friend";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "GO FROM 1 AS person OVER friend REVERSELY";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "GO FROM 1 AS person RETURN person.name";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "GO FROM 1 AS person RETURN person[manager].name,person.age";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "GO FROM 1,2,3 AS person";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "GO FROM [$1,$2,$5] AS person";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "GO FROM 1,2,3 AS person WHERE person.name == \"dutor\"";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
}
TEST(Parser, UseNamespace) {
{
GQLParser parser;
std::string query = "USE NAMESPACE ns";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
}
TEST(Parser, DefineTag) {
{
GQLParser parser;
std::string query = "DEFINE TAG person(name string, age uint8 TTL = 100, "
"married bool, salary double)";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
}
TEST(Parser, AlterTag) {
{
GQLParser parser;
std::string query = "ALTER TAG person(age uint8 TTL = 200)";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
}
TEST(Parser, Set) {
{
GQLParser parser;
std::string query = "GO FROM 1 AS person INTERSECT GO FROM 2 AS person";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "GO FROM 1 AS person UNION GO FROM 2 AS person";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "GO FROM 1 AS person MINUS GO FROM 2 AS person";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "GO FROM 1 AS person MINUS GO FROM 2 AS person "
"UNION GO FROM 3 AS person";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
}
TEST(Parser, Pipe) {
{
GQLParser parser;
std::string query = "GO FROM 1 AS person | GO FROM 2 AS person | GO FROM 3 AS person";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "GO FROM 1 AS person MINUS GO FROM 2 AS person | GO FROM 3 AS person";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
}
TEST(Parser, InsertVertex) {
{
GQLParser parser;
std::string query = "INSERT VERTEX person(name,age,married,salary) "
"VALUES(12345: \"dutor\", 30, true, 3.14)";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "INSERT TAG person(name,age,married,salary) "
"VALUES(12345: \"dutor\", 30, true, 3.14)";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
}
TEST(Parser, UpdateVertex) {
{
GQLParser parser;
std::string query = "UPDATE VERTEX 12345 SET name=\"dutor\",age=30,married=true";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "UPDATE VERTEX 12345 SET name=\"dutor\",age=31,married=true "
"WHERE salary > 10000";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "UPDATE VERTEX 12345 SET name=\"dutor\",age=30,married=true "
"RETURN name,salary";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "UPDATE OR INSERT VERTEX 12345 SET name=\"dutor\",age=30,married=true "
"RETURN name,salary";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
}
TEST(Parser, InsertEdge) {
{
GQLParser parser;
std::string query = "INSERT EDGE transfer(amount, time) "
"VALUES(12345 -> 54321: 3.75, 1537408527)";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "INSERT EDGE NO OVERWRITE transfer(amount, time) "
"VALUES(12345 -> 54321: 3.75, 1537408527)";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "INSERT EDGE transfer(amount, time) "
"VALUES(12345 -> 54321 @1537408527: 3.75, 1537408527)";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "INSERT EDGE NO OVERWRITE transfer(amount, time) "
"VALUES(12345 -> 54321 @1537408527: 3.75, 1537408527)";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
}
TEST(Parser, UpdateEdge) {
{
GQLParser parser;
std::string query = "UPDATE EDGE 12345 -> 54321 SET amount=3.14,time=1537408527";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "UPDATE EDGE 12345 -> 54321 SET amount=3.14,time=1537408527 "
"WHERE amount > 3.14";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "UPDATE EDGE 12345 -> 54321 SET amount=3.14,time=1537408527 "
"WHERE amount > 3.14 RETURN amount,time";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
{
GQLParser parser;
std::string query = "UPDATE OR INSERT EDGE 12345 -> 54321 SET amount=3.14,time=1537408527 "
"WHERE amount > 3.14 RETURN amount,time";
ASSERT_TRUE(parser.parse(query)) << parser.error();
}
}
} // namespace vesoft
/* Copyright (c) 2018 - present, VE Software Inc. All rights reserved
*
* This source code is licensed under Apache 2.0 License
* (found in the LICENSE.Apache file in the root directory)
*/
#include <gtest/gtest.h>
#include <sstream>
#include <vector>
#include <utility>
#include "parser/VGraphParser.hpp"
#include "parser/VGraphScanner.h"
// TODO(dutor) Check on the sematic value of tokens
namespace vesoft {
TEST(Scanner, Basic) {
using TokenType = vesoft::VGraphParser::token::yytokentype;
VGraphScanner scanner;
vesoft::VGraphParser::semantic_type yylval;
vesoft::VGraphParser::location_type yyloc;
std::vector<std::pair<std::string, TokenType>> token_mappings = {
{" . ", TokenType::DOT},
{" , ", TokenType::COMMA},
{" : ", TokenType::COLON},
{" ; ", TokenType::SEMICOLON},
{" + ", TokenType::ADD},
{" - ", TokenType::SUB},
{" * ", TokenType::MUL},
{" / ", TokenType::DIV},
{" % ", TokenType::MOD},
{" @ ", TokenType::AT},
{" < ", TokenType::LT},
{" <= ", TokenType::LE},
{" > ", TokenType::GT},
{" >= ", TokenType::GE},
{" == ", TokenType::EQ},
{" != ", TokenType::NE},
{" || ", TokenType::OR},
{" && ", TokenType::AND},
{" | ", TokenType::PIPE},
{" = ", TokenType::ASSIGN},
{" ( ", TokenType::L_PAREN},
{" ) ", TokenType::R_PAREN},
{" [ ", TokenType::L_BRACKET},
{" ] ", TokenType::R_BRACKET},
{" { ", TokenType::L_BRACE},
{" } ", TokenType::R_BRACE},
{" <- ", TokenType::L_ARROW},
{" -> ", TokenType::R_ARROW},
{" GO ", TokenType::KW_GO},
{" go ", TokenType::KW_GO},
{" AS ", TokenType::KW_AS},
{" as ", TokenType::KW_AS},
{" TO ", TokenType::KW_TO},
{" to ", TokenType::KW_TO},
{" OR ", TokenType::KW_OR},
{" or ", TokenType::KW_OR},
{" USE ", TokenType::KW_USE},
{" use ", TokenType::KW_USE},
{" SET ", TokenType::KW_SET},
{" set ", TokenType::KW_SET},
{" FROM ", TokenType::KW_FROM},
{" from ", TokenType::KW_FROM},
{" WHERE ", TokenType::KW_WHERE},
{" where ", TokenType::KW_WHERE},
{" MATCH ", TokenType::KW_MATCH},
{" match ", TokenType::KW_MATCH},
{" INSERT ", TokenType::KW_INSERT},
{" insert ", TokenType::KW_INSERT},
{" VALUES ", TokenType::KW_VALUES},
{" values ", TokenType::KW_VALUES},
{" RETURN ", TokenType::KW_RETURN},
{" return ", TokenType::KW_RETURN},
{" DEFINE ", TokenType::KW_DEFINE},
{" define ", TokenType::KW_DEFINE},
{" VERTEX ", TokenType::KW_VERTEX},
{" vertex ", TokenType::KW_VERTEX},
{" EDGE ", TokenType::KW_EDGE},
{" edge ", TokenType::KW_EDGE},
{" UPDATE ", TokenType::KW_UPDATE},
{" update ", TokenType::KW_UPDATE},
{" ALTER ", TokenType::KW_ALTER},
{" alter ", TokenType::KW_ALTER},
{" STEPS ", TokenType::KW_STEPS},
{" steps ", TokenType::KW_STEPS},
{" OVER ", TokenType::KW_OVER},
{" over ", TokenType::KW_OVER},
{" UPTO ", TokenType::KW_UPTO},
{" upto ", TokenType::KW_UPTO},
{" REVERSELY ", TokenType::KW_REVERSELY},
{" reversely ", TokenType::KW_REVERSELY},
{" NAMESPACE ", TokenType::KW_NAMESPACE},
{" namespace ", TokenType::KW_NAMESPACE},
{" TTL ", TokenType::KW_TTL},
{" ttl ", TokenType::KW_TTL},
{" INT8 ", TokenType::KW_INT8},
{" int8 ", TokenType::KW_INT8},
{" INT16 ", TokenType::KW_INT16},
{" int16 ", TokenType::KW_INT16},
{" INT32 ", TokenType::KW_INT32},
{" int32 ", TokenType::KW_INT32},
{" INT64 ", TokenType::KW_INT64},
{" int64 ", TokenType::KW_INT64},
{" UINT8 ", TokenType::KW_UINT8},
{" uint8 ", TokenType::KW_UINT8},
{" UINT16 ", TokenType::KW_UINT16},
{" uint16 ", TokenType::KW_UINT16},
{" UINT32 ", TokenType::KW_UINT32},
{" uint32 ", TokenType::KW_UINT32},
{" UINT64 ", TokenType::KW_UINT64},
{" uint64 ", TokenType::KW_UINT64},
{" BIGINT ", TokenType::KW_BIGINT},
{" bigint ", TokenType::KW_BIGINT},
{" DOUBLE ", TokenType::KW_DOUBLE},
{" double ", TokenType::KW_DOUBLE},
{" STRING ", TokenType::KW_STRING},
{" string ", TokenType::KW_STRING},
{" BOOL ", TokenType::KW_BOOL},
{" bool ", TokenType::KW_BOOL},
{" TAG ", TokenType::KW_TAG},
{" tag ", TokenType::KW_TAG},
{" UNION ", TokenType::KW_UNION},
{" union ", TokenType::KW_UNION},
{" INTERSECT ", TokenType::KW_INTERSECT},
{" intersect ", TokenType::KW_INTERSECT},
{" MINUS ", TokenType::KW_MINUS},
{" minus ", TokenType::KW_MINUS},
{" v ", TokenType::SYMBOL},
{" v1 ", TokenType::SYMBOL},
{" var ", TokenType::SYMBOL},
{" _var ", TokenType::SYMBOL},
{" var123 ", TokenType::SYMBOL},
{" _var123 ", TokenType::SYMBOL},
{" 123 ", TokenType::INTEGER},
{" 0x123 ", TokenType::INTEGER},
{" 0Xdeadbeef ", TokenType::INTEGER},
{" 0123 ", TokenType::INTEGER},
{" 123u ", TokenType::UINTEGER},
{" 123UL ", TokenType::UINTEGER},
{" .456 ", TokenType::DOUBLE},
{" 123.", TokenType::DOUBLE},
{" 123.456 ", TokenType::DOUBLE},
{" $1 ", TokenType::COL_REF_ID},
{" $123 ", TokenType::COL_REF_ID},
{" $_ ", TokenType::VARIABLE},
{" $var ", TokenType::VARIABLE},
{"\"Hello\"", TokenType::STRING}, // "Hello" ==> Hello
{"\"He\\nllo\"", TokenType::STRING}, // "He\nllo" ==> He
// llo
{"\"He\\\nllo\"", TokenType::STRING}, // "He\nllo" ==> He
// llo
{"\"Hell\\o\"", TokenType::STRING}, // "Hello" ==> Hello
{"\"Hello\\\\\"", TokenType::STRING}, // "Hello\\" ==> Hello\ //
{"\"\\110ello\"", TokenType::STRING}, // "Hello" ==> Hello
{"\"\\\"Hello\\\"\"", TokenType::STRING}, // "\"Hello\"" ==> "Hello"
};
std::string token_stream;
for (auto &pair : token_mappings) {
token_stream += pair.first;
}
std::istringstream is(token_stream);
scanner.switch_streams(&is, nullptr);
for (auto &pair : token_mappings) {
auto &token = pair.first;
auto expected_type = static_cast<int>(pair.second);
auto actual_type = scanner.yylex(&yylval, &yyloc);
ASSERT_EQ(expected_type, actual_type) << "Lex error for `" << token <<"'";
}
}
} // namespace vesoft
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment