diff --git a/src/context/Iterator.h b/src/context/Iterator.h index 8a4e083cc5639a202f7fe85816e5db3f948feffc..3d48eb89dd76b6861f04a25327d17ccc2e0c8236 100644 --- a/src/context/Iterator.h +++ b/src/context/Iterator.h @@ -252,25 +252,18 @@ public: Value getEdge() const override; // getVertices and getEdges arg batch interface use for subgraph + // Its unique based on the plan List getVertices() { DCHECK(iter_ == logicalRows_.begin()); List vertices; - std::unordered_set<Value> vids; for (; valid(); next()) { - auto vid = getColumn(kVid); - if (vid.isNull()) { - continue; - } - auto found = vids.find(vid); - if (found == vids.end()) { - vertices.values.emplace_back(getVertex()); - vids.emplace(std::move(vid)); - } + vertices.values.emplace_back(getVertex()); } reset(); return vertices; } + // Its unique based on the GN interface dedup List getEdges() { DCHECK(iter_ == logicalRows_.begin()); List edges; diff --git a/src/context/test/IteratorTest.cpp b/src/context/test/IteratorTest.cpp index dc373f9fc4dd726c13f5197ef4cc8204d9a14072..4ba47c69fb74ea93cb8c67ce7d7e838cf5add8ff 100644 --- a/src/context/test/IteratorTest.cpp +++ b/src/context/test/IteratorTest.cpp @@ -339,6 +339,7 @@ TEST(IteratorTest, GetNeighbor) { Vertex vertex; vertex.vid = folly::to<std::string>(i); vertex.tags.emplace_back(tag1); + expected.emplace_back(vertex); expected.emplace_back(std::move(vertex)); } Tag tag2; @@ -348,14 +349,15 @@ TEST(IteratorTest, GetNeighbor) { Vertex vertex; vertex.vid = folly::to<std::string>(i); vertex.tags.emplace_back(tag2); + expected.emplace_back(vertex); expected.emplace_back(std::move(vertex)); } List result = iter.getVertices(); - EXPECT_EQ(result.values.size(), 20); + EXPECT_EQ(result.values.size(), 40); EXPECT_EQ(result.values, expected); result = iter.getVertices(); - EXPECT_EQ(result.values.size(), 20); + EXPECT_EQ(result.values.size(), 40); EXPECT_EQ(result.values, expected); } { diff --git a/src/exec/query/DataCollectExecutor.cpp b/src/exec/query/DataCollectExecutor.cpp index bfb8ae0abd69cf4115c843e707f6d27569091825..2afd2a1dff95bc61f1d3e7ea9ef6a30e8c3ddc02 100644 --- a/src/exec/query/DataCollectExecutor.cpp +++ b/src/exec/query/DataCollectExecutor.cpp @@ -44,16 +44,40 @@ folly::Future<Status> DataCollectExecutor::doCollect() { Status DataCollectExecutor::collectSubgraph(const std::vector<std::string>& vars) { DataSet ds; ds.colNames = std::move(colNames_); + // the subgraph not need duplicate vertices or edges, so dedup here directly + std::unordered_set<std::string> vids; + std::unordered_set<std::tuple<std::string, int64_t, int64_t, std::string>> edgeKeys; for (auto& var : vars) { auto& hist = ectx_->getHistory(var); for (auto& result : hist) { auto iter = result.iter(); if (iter->isGetNeighborsIter()) { - Row row; + List vertices; + List edges; auto* gnIter = static_cast<GetNeighborsIter*>(iter.get()); - row.values.emplace_back(gnIter->getVertices()); - row.values.emplace_back(gnIter->getEdges()); - ds.rows.emplace_back(std::move(row)); + auto originVertices = gnIter->getVertices(); + for (auto& v : originVertices.values) { + if (!v.isVertex()) { + continue; + } + if (vids.emplace(v.getVertex().vid).second) { + vertices.emplace_back(std::move(v)); + } + } + auto originEdges = gnIter->getEdges(); + for (auto& e : originEdges.values) { + if (!e.isEdge()) { + continue; + } + auto edgeKey = std::make_tuple(e.getEdge().src, + e.getEdge().type, + e.getEdge().ranking, + e.getEdge().dst); + if (edgeKeys.emplace(std::move(edgeKey)).second) { + edges.emplace_back(std::move(e)); + } + } + ds.rows.emplace_back(Row({std::move(vertices), std::move(edges)})); } else { return Status::Error("Iterator should be kind of GetNeighborIter."); } diff --git a/src/exec/query/test/DataCollectTest.cpp b/src/exec/query/test/DataCollectTest.cpp index c83dad84fc4d8ac0dbfae21afce4368eac9c6971..51589a576e0747159cba71e75162b7cf34ce7fd0 100644 --- a/src/exec/query/test/DataCollectTest.cpp +++ b/src/exec/query/test/DataCollectTest.cpp @@ -144,8 +144,34 @@ TEST_F(DataCollectTest, CollectSubgraph) { auto iter = input.iter(); auto* gNIter = static_cast<GetNeighborsIter*>(iter.get()); Row row; - row.values.emplace_back(gNIter->getVertices()); - row.values.emplace_back(gNIter->getEdges()); + std::unordered_set<std::string> vids; + std::unordered_set<std::tuple<std::string, int64_t, int64_t, std::string>> edgeKeys; + List vertices; + List edges; + auto originVertices = gNIter->getVertices(); + for (auto& v : originVertices.values) { + if (!v.isVertex()) { + continue; + } + if (vids.emplace(v.getVertex().vid).second) { + vertices.emplace_back(std::move(v)); + } + } + auto originEdges = gNIter->getEdges(); + for (auto& e : originEdges.values) { + if (!e.isEdge()) { + continue; + } + auto edgeKey = std::make_tuple(e.getEdge().src, + e.getEdge().type, + e.getEdge().ranking, + e.getEdge().dst); + if (edgeKeys.emplace(std::move(edgeKey)).second) { + edges.emplace_back(std::move(e)); + } + } + row.values.emplace_back(std::move(vertices)); + row.values.emplace_back(std::move(edges)); expected.rows.emplace_back(std::move(row)); EXPECT_EQ(result.value().getDataSet(), expected); diff --git a/src/validator/GetSubgraphValidator.cpp b/src/validator/GetSubgraphValidator.cpp index 15403cbd14ff8738580ddf0a7337e384cb5fe3d1..0b0498093c721248582253ba577173bbb068d792 100644 --- a/src/validator/GetSubgraphValidator.cpp +++ b/src/validator/GetSubgraphValidator.cpp @@ -180,7 +180,8 @@ Status GetSubgraphValidator::toPlan() { std::move(vertexProps), std::move(edgeProps), std::move(statProps), - std::move(exprs)); + std::move(exprs), + true /*subgraph not need duplicate*/); gn1->setInputVar(vidsToSave); auto* columns = new YieldColumns();