From c0a3a509f7e0dd8a89e2f2f3ed2969e705650a33 Mon Sep 17 00:00:00 2001 From: FluorineDog <guilin.gou@zilliz.com> Date: Fri, 15 Jan 2021 18:23:50 +0800 Subject: [PATCH] Remove todos, implement chunk_data and chunk_scalar_index Signed-off-by: FluorineDog <guilin.gou@zilliz.com> --- internal/core/src/query/CMakeLists.txt | 3 +- internal/core/src/query/Expr.h | 2 - internal/core/src/query/Plan.cpp | 1 - .../query/{Search.cpp => SearchOnGrowing.cpp} | 40 +++++++++++++++-- .../src/query/{Search.h => SearchOnGrowing.h} | 28 ++++-------- internal/core/src/query/SearchOnSealed.h | 2 +- internal/core/src/query/SubQueryResult.h | 2 +- .../src/query/generated/ExecPlanNodeVisitor.h | 5 +++ .../query/visitors/ExecPlanNodeVisitor.cpp | 45 +++++++------------ internal/core/src/segcore/ConcurrentVector.h | 1 - internal/core/src/segcore/IndexingEntry.cpp | 3 -- internal/core/src/segcore/IndexingEntry.h | 7 ++- .../core/src/segcore/SegmentGrowingImpl.h | 18 +++++++- internal/core/src/segcore/SegmentInterface.h | 22 +++++++++ internal/core/src/segcore/collection_c.cpp | 3 +- internal/core/src/segcore/segment_c.cpp | 9 ---- 16 files changed, 113 insertions(+), 78 deletions(-) rename internal/core/src/query/{Search.cpp => SearchOnGrowing.cpp} (81%) rename internal/core/src/query/{Search.h => SearchOnGrowing.h} (57%) diff --git a/internal/core/src/query/CMakeLists.txt b/internal/core/src/query/CMakeLists.txt index d26b2a3a9..06b15abab 100644 --- a/internal/core/src/query/CMakeLists.txt +++ b/internal/core/src/query/CMakeLists.txt @@ -1,4 +1,3 @@ -# TODO set(MILVUS_QUERY_SRCS deprecated/BinaryQuery.cpp generated/PlanNode.cpp @@ -10,7 +9,7 @@ set(MILVUS_QUERY_SRCS visitors/VerifyPlanNodeVisitor.cpp visitors/VerifyExprVisitor.cpp Plan.cpp - Search.cpp + SearchOnGrowing.cpp SearchOnSealed.cpp SearchOnIndex.cpp SearchBruteForce.cpp diff --git a/internal/core/src/query/Expr.h b/internal/core/src/query/Expr.h index 255e31fcd..30816bfa8 100644 --- a/internal/core/src/query/Expr.h +++ b/internal/core/src/query/Expr.h @@ -40,7 +40,6 @@ struct UnaryExpr : Expr { ExprPtr child_; }; -// TODO: not enabled in sprint 1 struct BoolUnaryExpr : UnaryExpr { enum class OpType { LogicalNot }; OpType op_type_; @@ -50,7 +49,6 @@ struct BoolUnaryExpr : UnaryExpr { accept(ExprVisitor&) override; }; -// TODO: not enabled in sprint 1 struct BoolBinaryExpr : BinaryExpr { // Note: bitA - bitB == bitA & ~bitB, alias to LogicalMinus enum class OpType { LogicalAnd, LogicalOr, LogicalXor, LogicalMinus }; diff --git a/internal/core/src/query/Plan.cpp b/internal/core/src/query/Plan.cpp index 5dc83a4b3..8fc1cc804 100644 --- a/internal/core/src/query/Plan.cpp +++ b/internal/core/src/query/Plan.cpp @@ -187,7 +187,6 @@ Parser::ParseTermNode(const Json& out_body) { std::unique_ptr<VectorPlanNode> Parser::ParseVecNode(const Json& out_body) { Assert(out_body.is_object()); - // TODO add binary info Assert(out_body.size() == 1); auto iter = out_body.begin(); auto field_name = FieldName(iter.key()); diff --git a/internal/core/src/query/Search.cpp b/internal/core/src/query/SearchOnGrowing.cpp similarity index 81% rename from internal/core/src/query/Search.cpp rename to internal/core/src/query/SearchOnGrowing.cpp index 3ba611637..46aecb343 100644 --- a/internal/core/src/query/Search.cpp +++ b/internal/core/src/query/SearchOnGrowing.cpp @@ -9,7 +9,7 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License -#include "Search.h" +#include "SearchOnGrowing.h" #include <knowhere/index/vector_index/adapter/VectorAdapter.h> #include <knowhere/index/vector_index/VecIndexFactory.h> #include "segcore/Reduce.h" @@ -65,7 +65,6 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment, auto topK = info.topK_; auto total_count = topK * num_queries; auto metric_type = GetMetricType(info.metric_type_); - // TODO: optimize // step 3: small indexing search // std::vector<int64_t> final_uids(total_count, -1); @@ -77,10 +76,9 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment, const auto& indexing_entry = indexing_record.get_vec_entry(vecfield_offset); auto search_conf = indexing_entry.get_search_conf(topK); - // TODO: use sub_qr for (int chunk_id = 0; chunk_id < max_indexed_id; ++chunk_id) { auto chunk_size = indexing_entry.get_chunk_size(); - auto indexing = indexing_entry.get_vec_indexing(chunk_id); + auto indexing = indexing_entry.get_indexing(chunk_id); auto sub_view = BitsetSubView(bitset, chunk_id * chunk_size, chunk_size); auto sub_qr = SearchOnIndex(query_dataset, *indexing, search_conf, sub_view); @@ -197,4 +195,38 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment, return Status::OK(); } +// TODO: refactor and merge this into one +template <typename VectorType> +void +SearchOnGrowing(const segcore::SegmentGrowingImpl& segment, + const query::QueryInfo& info, + const EmbeddedType<VectorType>* query_data, + int64_t num_queries, + Timestamp timestamp, + const faiss::BitsetView& bitset, + QueryResult& results) { + static_assert(IsVector<VectorType>); + if constexpr (std::is_same_v<VectorType, FloatVector>) { + FloatSearch(segment, info, query_data, num_queries, timestamp, bitset, results); + } else { + BinarySearch(segment, info, query_data, num_queries, timestamp, bitset, results); + } +} +template void +SearchOnGrowing<FloatVector>(const segcore::SegmentGrowingImpl& segment, + const query::QueryInfo& info, + const EmbeddedType<FloatVector>* query_data, + int64_t num_queries, + Timestamp timestamp, + const faiss::BitsetView& bitset, + QueryResult& results); +template void +SearchOnGrowing<BinaryVector>(const segcore::SegmentGrowingImpl& segment, + const query::QueryInfo& info, + const EmbeddedType<BinaryVector>* query_data, + int64_t num_queries, + Timestamp timestamp, + const faiss::BitsetView& bitset, + QueryResult& results); + } // namespace milvus::query diff --git a/internal/core/src/query/Search.h b/internal/core/src/query/SearchOnGrowing.h similarity index 57% rename from internal/core/src/query/Search.h rename to internal/core/src/query/SearchOnGrowing.h index 3aa8d1ec9..003bdcf35 100644 --- a/internal/core/src/query/Search.h +++ b/internal/core/src/query/SearchOnGrowing.h @@ -20,23 +20,13 @@ namespace milvus::query { using BitmapChunk = boost::dynamic_bitset<>; using BitmapSimple = std::deque<BitmapChunk>; -// TODO: merge these two search into one -// note: c++17 don't support optional ref -Status -FloatSearch(const segcore::SegmentGrowingImpl& segment, - const QueryInfo& info, - const float* query_data, - int64_t num_queries, - Timestamp timestamp, - const faiss::BitsetView& bitset, - QueryResult& results); - -Status -BinarySearch(const segcore::SegmentGrowingImpl& segment, - const query::QueryInfo& info, - const uint8_t* query_data, - int64_t num_queries, - Timestamp timestamp, - const faiss::BitsetView& bitset, - QueryResult& results); +template <typename VectorType> +void +SearchOnGrowing(const segcore::SegmentGrowingImpl& segment, + const query::QueryInfo& info, + const EmbeddedType<VectorType>* query_data, + int64_t num_queries, + Timestamp timestamp, + const faiss::BitsetView& bitset, + QueryResult& results); } // namespace milvus::query diff --git a/internal/core/src/query/SearchOnSealed.h b/internal/core/src/query/SearchOnSealed.h index 01f3864e0..227f1a15c 100644 --- a/internal/core/src/query/SearchOnSealed.h +++ b/internal/core/src/query/SearchOnSealed.h @@ -13,7 +13,7 @@ #include "segcore/SealedIndexingRecord.h" #include "query/PlanNode.h" -#include "query/Search.h" +#include "query/SearchOnGrowing.h" namespace milvus::query { diff --git a/internal/core/src/query/SubQueryResult.h b/internal/core/src/query/SubQueryResult.h index 6cf7aace5..7bb498039 100644 --- a/internal/core/src/query/SubQueryResult.h +++ b/internal/core/src/query/SubQueryResult.h @@ -33,7 +33,7 @@ class SubQueryResult { static constexpr bool is_descending(MetricType metric_type) { - // TODO + // TODO(dog): more types if (metric_type == MetricType::METRIC_INNER_PRODUCT) { return true; } else { diff --git a/internal/core/src/query/generated/ExecPlanNodeVisitor.h b/internal/core/src/query/generated/ExecPlanNodeVisitor.h index 2cd629edf..08ac2f694 100644 --- a/internal/core/src/query/generated/ExecPlanNodeVisitor.h +++ b/internal/core/src/query/generated/ExecPlanNodeVisitor.h @@ -46,6 +46,11 @@ class ExecPlanNodeVisitor : public PlanNodeVisitor { return ret; } + private: + template <typename VectorType> + void + VectorVisitorImpl(VectorPlanNode& node); + private: // std::optional<RetType> ret_; const segcore::SegmentGrowing& segment_; diff --git a/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp b/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp index fbe99079c..e43837a06 100644 --- a/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp +++ b/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp @@ -16,7 +16,7 @@ #include "query/generated/ExecPlanNodeVisitor.h" #include "segcore/SegmentGrowingImpl.h" #include "query/generated/ExecExprVisitor.h" -#include "query/Search.h" +#include "query/SearchOnGrowing.h" #include "query/SearchOnSealed.h" namespace milvus::query { @@ -45,6 +45,11 @@ class ExecPlanNodeVisitor : PlanNodeVisitor { return ret; } + private: + template <typename VectorType> + void + VectorVisitorImpl(VectorPlanNode& node); + private: // std::optional<RetType> ret_; const segcore::SegmentGrowing& segment_; @@ -56,15 +61,16 @@ class ExecPlanNodeVisitor : PlanNodeVisitor { } // namespace impl #endif +template <typename VectorType> void -ExecPlanNodeVisitor::visit(FloatVectorANNS& node) { +ExecPlanNodeVisitor::VectorVisitorImpl(VectorPlanNode& node) { // TODO: optimize here, remove the dynamic cast assert(!ret_.has_value()); auto segment = dynamic_cast<const segcore::SegmentGrowingImpl*>(&segment_); AssertInfo(segment, "support SegmentSmallIndex Only"); RetType ret; auto& ph = placeholder_group_.at(0); - auto src_data = ph.get_blob<float>(); + auto src_data = ph.get_blob<EmbeddedType<VectorType>>(); auto num_queries = ph.num_of_queries_; aligned_vector<uint8_t> bitset_holder; @@ -80,39 +86,20 @@ ExecPlanNodeVisitor::visit(FloatVectorANNS& node) { SearchOnSealed(segment->get_schema(), sealed_indexing, node.query_info_, src_data, num_queries, timestamp_, view, ret); } else { - FloatSearch(*segment, node.query_info_, src_data, num_queries, timestamp_, view, ret); + SearchOnGrowing<VectorType>(*segment, node.query_info_, src_data, num_queries, timestamp_, view, ret); } ret_ = ret; } void -ExecPlanNodeVisitor::visit(BinaryVectorANNS& node) { - // TODO: optimize here, remove the dynamic cast - assert(!ret_.has_value()); - auto segment = dynamic_cast<const segcore::SegmentGrowingImpl*>(&segment_); - AssertInfo(segment, "support SegmentSmallIndex Only"); - RetType ret; - auto& ph = placeholder_group_.at(0); - auto src_data = ph.get_blob<uint8_t>(); - auto num_queries = ph.num_of_queries_; - - aligned_vector<uint8_t> bitset_holder; - BitsetView view; - if (node.predicate_.has_value()) { - ExecExprVisitor::RetType expr_ret = ExecExprVisitor(*segment).call_child(*node.predicate_.value()); - bitset_holder = AssembleNegBitmap(expr_ret); - view = BitsetView(bitset_holder.data(), bitset_holder.size() * 8); - } +ExecPlanNodeVisitor::visit(FloatVectorANNS& node) { + VectorVisitorImpl<FloatVector>(node); +} - auto& sealed_indexing = segment->get_sealed_indexing_record(); - if (sealed_indexing.is_ready(node.query_info_.field_offset_)) { - SearchOnSealed(segment->get_schema(), sealed_indexing, node.query_info_, src_data, num_queries, timestamp_, - view, ret); - } else { - BinarySearch(*segment, node.query_info_, src_data, num_queries, timestamp_, view, ret); - } - ret_ = ret; +void +ExecPlanNodeVisitor::visit(BinaryVectorANNS& node) { + VectorVisitorImpl<BinaryVector>(node); } } // namespace milvus::query diff --git a/internal/core/src/segcore/ConcurrentVector.h b/internal/core/src/segcore/ConcurrentVector.h index f883d0bc3..87cfd76ac 100644 --- a/internal/core/src/segcore/ConcurrentVector.h +++ b/internal/core/src/segcore/ConcurrentVector.h @@ -39,7 +39,6 @@ class ThreadSafeVector { if (size <= size_) { return; } - // TODO: use multithread to speedup std::lock_guard lck(mutex_); while (vec_.size() < size) { vec_.emplace_back(std::forward<Args...>(args...)); diff --git a/internal/core/src/segcore/IndexingEntry.cpp b/internal/core/src/segcore/IndexingEntry.cpp index 16545544c..1cd640595 100644 --- a/internal/core/src/segcore/IndexingEntry.cpp +++ b/internal/core/src/segcore/IndexingEntry.cpp @@ -17,8 +17,6 @@ namespace milvus::segcore { void VecIndexingEntry::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) { - // TODO - assert(field_meta_.get_data_type() == DataType::VECTOR_FLOAT); auto dim = field_meta_.get_dim(); @@ -31,7 +29,6 @@ VecIndexingEntry::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const Vector for (int chunk_id = ack_beg; chunk_id < ack_end; chunk_id++) { const auto& chunk = source->get_chunk(chunk_id); // build index for chunk - // TODO auto indexing = std::make_unique<knowhere::IVF>(); auto dataset = knowhere::GenDataset(source->get_chunk_size(), dim, chunk.data()); indexing->Train(dataset, conf); diff --git a/internal/core/src/segcore/IndexingEntry.h b/internal/core/src/segcore/IndexingEntry.h index a4cde732a..f790f5e1a 100644 --- a/internal/core/src/segcore/IndexingEntry.h +++ b/internal/core/src/segcore/IndexingEntry.h @@ -47,6 +47,9 @@ class IndexingEntry { return chunk_size_; } + virtual knowhere::Index* + get_indexing(int64_t chunk_id) const = 0; + protected: // additional info const FieldMeta& field_meta_; @@ -62,7 +65,7 @@ class ScalarIndexingEntry : public IndexingEntry { // concurrent knowhere::scalar::StructuredIndex<T>* - get_indexing(int64_t chunk_id) const { + get_indexing(int64_t chunk_id) const override { Assert(!field_meta_.is_vector()); return data_.at(chunk_id).get(); } @@ -80,7 +83,7 @@ class VecIndexingEntry : public IndexingEntry { // concurrent knowhere::VecIndex* - get_vec_indexing(int64_t chunk_id) const { + get_indexing(int64_t chunk_id) const override { Assert(field_meta_.is_vector()); return data_.at(chunk_id).get(); } diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h index d3f05972b..15d129c35 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -39,8 +39,6 @@ class SegmentGrowingImpl : public SegmentGrowing { int64_t PreInsert(int64_t size) override; - // TODO: originally, id should be put into data_chunk - // TODO: Is it ok to put them the other side? Status Insert(int64_t reserved_offset, int64_t size, @@ -95,6 +93,22 @@ class SegmentGrowingImpl : public SegmentGrowing { return *schema_; } + // return count of index that has index, i.e., [0, num_chunk_index) have built index + int64_t + num_chunk_index_safe(FieldOffset field_offset) const final { + return indexing_record_.get_finished_ack(); + } + + const knowhere::Index* + chunk_index_impl(FieldOffset field_offset, int64_t chunk_id) const final { + return indexing_record_.get_entry(field_offset).get_indexing(chunk_id); + } + + int64_t + chunk_size() const final { + return chunk_size_; + } + public: ssize_t get_row_count() const override { diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h index 4ad2177e3..34c18b53e 100644 --- a/internal/core/src/segcore/SegmentInterface.h +++ b/internal/core/src/segcore/SegmentInterface.h @@ -14,6 +14,8 @@ #include "common/Schema.h" #include "query/Plan.h" #include "common/Span.h" +#include "IndexingEntry.h" +#include <knowhere/index/vector_index/VecIndex.h> namespace milvus::segcore { @@ -52,10 +54,30 @@ class SegmentInternalInterface : public SegmentInterface { return static_cast<Span<T>>(chunk_data_impl(field_offset, chunk_id)); } + virtual int64_t + num_chunk_index_safe(FieldOffset field_offset) const = 0; + + template <typename T> + const knowhere::scalar::StructuredIndex<T>& + chunk_scalar_index(FieldOffset field_offset, int64_t chunk_id) const { + static_assert(IsScalar<T>); + using IndexType = knowhere::scalar::StructuredIndex<T>; + auto base_ptr = chunk_index_impl(field_offset, chunk_id); + auto ptr = dynamic_cast<const IndexType*>(base_ptr); + AssertInfo(ptr, "entry mismatch"); + return *ptr; + } + + virtual int64_t + chunk_size() const = 0; + protected: // blob and row_count virtual SpanBase chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const = 0; + + virtual const knowhere::Index* + chunk_index_impl(FieldOffset field_offset, int64_t chunk_id) const = 0; }; } // namespace milvus::segcore diff --git a/internal/core/src/segcore/collection_c.cpp b/internal/core/src/segcore/collection_c.cpp index feb5fc7bf..34476504c 100644 --- a/internal/core/src/segcore/collection_c.cpp +++ b/internal/core/src/segcore/collection_c.cpp @@ -19,7 +19,6 @@ NewCollection(const char* schema_proto_blob) { auto collection = std::make_unique<milvus::segcore::Collection>(proto); - // TODO: delete print std::cout << "create collection " << collection->get_collection_name() << std::endl; return (void*)collection.release(); @@ -29,8 +28,8 @@ void DeleteCollection(CCollection collection) { auto col = (milvus::segcore::Collection*)collection; - // TODO: delete print std::cout << "delete collection " << col->get_collection_name() << std::endl; + delete col; } diff --git a/internal/core/src/segcore/segment_c.cpp b/internal/core/src/segcore/segment_c.cpp index d7e6d3663..523af1842 100644 --- a/internal/core/src/segcore/segment_c.cpp +++ b/internal/core/src/segcore/segment_c.cpp @@ -27,7 +27,6 @@ NewSegment(CCollection collection, uint64_t segment_id) { auto segment = milvus::segcore::CreateGrowingSegment(col->get_schema()); - // TODO: delete print std::cout << "create segment " << segment_id << std::endl; return (void*)segment.release(); } @@ -36,7 +35,6 @@ void DeleteSegment(CSegmentBase segment) { auto s = (milvus::segcore::SegmentGrowing*)segment; - // TODO: delete print std::cout << "delete segment " << std::endl; delete s; } @@ -78,17 +76,12 @@ Insert(CSegmentBase c_segment, status.error_msg = strdup(e.what()); return status; } - - // TODO: delete print - // std::cout << "do segment insert, sizeof_per_row = " << sizeof_per_row << std::endl; } int64_t PreInsert(CSegmentBase c_segment, int64_t size) { auto segment = (milvus::segcore::SegmentGrowing*)c_segment; - // TODO: delete print - // std::cout << "PreInsert segment " << std::endl; return segment->PreInsert(size); } @@ -116,8 +109,6 @@ int64_t PreDelete(CSegmentBase c_segment, int64_t size) { auto segment = (milvus::segcore::SegmentGrowing*)c_segment; - // TODO: delete print - // std::cout << "PreDelete segment " << std::endl; return segment->PreDelete(size); } -- GitLab