From c0a3a509f7e0dd8a89e2f2f3ed2969e705650a33 Mon Sep 17 00:00:00 2001
From: FluorineDog <guilin.gou@zilliz.com>
Date: Fri, 15 Jan 2021 18:23:50 +0800
Subject: [PATCH] Remove todos, implement chunk_data and chunk_scalar_index

Signed-off-by: FluorineDog <guilin.gou@zilliz.com>
---
 internal/core/src/query/CMakeLists.txt        |  3 +-
 internal/core/src/query/Expr.h                |  2 -
 internal/core/src/query/Plan.cpp              |  1 -
 .../query/{Search.cpp => SearchOnGrowing.cpp} | 40 +++++++++++++++--
 .../src/query/{Search.h => SearchOnGrowing.h} | 28 ++++--------
 internal/core/src/query/SearchOnSealed.h      |  2 +-
 internal/core/src/query/SubQueryResult.h      |  2 +-
 .../src/query/generated/ExecPlanNodeVisitor.h |  5 +++
 .../query/visitors/ExecPlanNodeVisitor.cpp    | 45 +++++++------------
 internal/core/src/segcore/ConcurrentVector.h  |  1 -
 internal/core/src/segcore/IndexingEntry.cpp   |  3 --
 internal/core/src/segcore/IndexingEntry.h     |  7 ++-
 .../core/src/segcore/SegmentGrowingImpl.h     | 18 +++++++-
 internal/core/src/segcore/SegmentInterface.h  | 22 +++++++++
 internal/core/src/segcore/collection_c.cpp    |  3 +-
 internal/core/src/segcore/segment_c.cpp       |  9 ----
 16 files changed, 113 insertions(+), 78 deletions(-)
 rename internal/core/src/query/{Search.cpp => SearchOnGrowing.cpp} (81%)
 rename internal/core/src/query/{Search.h => SearchOnGrowing.h} (57%)

diff --git a/internal/core/src/query/CMakeLists.txt b/internal/core/src/query/CMakeLists.txt
index d26b2a3a9..06b15abab 100644
--- a/internal/core/src/query/CMakeLists.txt
+++ b/internal/core/src/query/CMakeLists.txt
@@ -1,4 +1,3 @@
-# TODO
 set(MILVUS_QUERY_SRCS
         deprecated/BinaryQuery.cpp
         generated/PlanNode.cpp
@@ -10,7 +9,7 @@ set(MILVUS_QUERY_SRCS
         visitors/VerifyPlanNodeVisitor.cpp
         visitors/VerifyExprVisitor.cpp
         Plan.cpp
-        Search.cpp
+        SearchOnGrowing.cpp
         SearchOnSealed.cpp
         SearchOnIndex.cpp
         SearchBruteForce.cpp
diff --git a/internal/core/src/query/Expr.h b/internal/core/src/query/Expr.h
index 255e31fcd..30816bfa8 100644
--- a/internal/core/src/query/Expr.h
+++ b/internal/core/src/query/Expr.h
@@ -40,7 +40,6 @@ struct UnaryExpr : Expr {
     ExprPtr child_;
 };
 
-// TODO: not enabled in sprint 1
 struct BoolUnaryExpr : UnaryExpr {
     enum class OpType { LogicalNot };
     OpType op_type_;
@@ -50,7 +49,6 @@ struct BoolUnaryExpr : UnaryExpr {
     accept(ExprVisitor&) override;
 };
 
-// TODO: not enabled in sprint 1
 struct BoolBinaryExpr : BinaryExpr {
     // Note: bitA - bitB == bitA & ~bitB, alias to LogicalMinus
     enum class OpType { LogicalAnd, LogicalOr, LogicalXor, LogicalMinus };
diff --git a/internal/core/src/query/Plan.cpp b/internal/core/src/query/Plan.cpp
index 5dc83a4b3..8fc1cc804 100644
--- a/internal/core/src/query/Plan.cpp
+++ b/internal/core/src/query/Plan.cpp
@@ -187,7 +187,6 @@ Parser::ParseTermNode(const Json& out_body) {
 std::unique_ptr<VectorPlanNode>
 Parser::ParseVecNode(const Json& out_body) {
     Assert(out_body.is_object());
-    // TODO add binary info
     Assert(out_body.size() == 1);
     auto iter = out_body.begin();
     auto field_name = FieldName(iter.key());
diff --git a/internal/core/src/query/Search.cpp b/internal/core/src/query/SearchOnGrowing.cpp
similarity index 81%
rename from internal/core/src/query/Search.cpp
rename to internal/core/src/query/SearchOnGrowing.cpp
index 3ba611637..46aecb343 100644
--- a/internal/core/src/query/Search.cpp
+++ b/internal/core/src/query/SearchOnGrowing.cpp
@@ -9,7 +9,7 @@
 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 // or implied. See the License for the specific language governing permissions and limitations under the License
 
-#include "Search.h"
+#include "SearchOnGrowing.h"
 #include <knowhere/index/vector_index/adapter/VectorAdapter.h>
 #include <knowhere/index/vector_index/VecIndexFactory.h>
 #include "segcore/Reduce.h"
@@ -65,7 +65,6 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
     auto topK = info.topK_;
     auto total_count = topK * num_queries;
     auto metric_type = GetMetricType(info.metric_type_);
-    // TODO: optimize
 
     // step 3: small indexing search
     // std::vector<int64_t> final_uids(total_count, -1);
@@ -77,10 +76,9 @@ FloatSearch(const segcore::SegmentGrowingImpl& segment,
     const auto& indexing_entry = indexing_record.get_vec_entry(vecfield_offset);
     auto search_conf = indexing_entry.get_search_conf(topK);
 
-    // TODO: use sub_qr
     for (int chunk_id = 0; chunk_id < max_indexed_id; ++chunk_id) {
         auto chunk_size = indexing_entry.get_chunk_size();
-        auto indexing = indexing_entry.get_vec_indexing(chunk_id);
+        auto indexing = indexing_entry.get_indexing(chunk_id);
 
         auto sub_view = BitsetSubView(bitset, chunk_id * chunk_size, chunk_size);
         auto sub_qr = SearchOnIndex(query_dataset, *indexing, search_conf, sub_view);
@@ -197,4 +195,38 @@ BinarySearch(const segcore::SegmentGrowingImpl& segment,
     return Status::OK();
 }
 
+// TODO: refactor and merge this into one
+template <typename VectorType>
+void
+SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
+                const query::QueryInfo& info,
+                const EmbeddedType<VectorType>* query_data,
+                int64_t num_queries,
+                Timestamp timestamp,
+                const faiss::BitsetView& bitset,
+                QueryResult& results) {
+    static_assert(IsVector<VectorType>);
+    if constexpr (std::is_same_v<VectorType, FloatVector>) {
+        FloatSearch(segment, info, query_data, num_queries, timestamp, bitset, results);
+    } else {
+        BinarySearch(segment, info, query_data, num_queries, timestamp, bitset, results);
+    }
+}
+template void
+SearchOnGrowing<FloatVector>(const segcore::SegmentGrowingImpl& segment,
+                             const query::QueryInfo& info,
+                             const EmbeddedType<FloatVector>* query_data,
+                             int64_t num_queries,
+                             Timestamp timestamp,
+                             const faiss::BitsetView& bitset,
+                             QueryResult& results);
+template void
+SearchOnGrowing<BinaryVector>(const segcore::SegmentGrowingImpl& segment,
+                              const query::QueryInfo& info,
+                              const EmbeddedType<BinaryVector>* query_data,
+                              int64_t num_queries,
+                              Timestamp timestamp,
+                              const faiss::BitsetView& bitset,
+                              QueryResult& results);
+
 }  // namespace milvus::query
diff --git a/internal/core/src/query/Search.h b/internal/core/src/query/SearchOnGrowing.h
similarity index 57%
rename from internal/core/src/query/Search.h
rename to internal/core/src/query/SearchOnGrowing.h
index 3aa8d1ec9..003bdcf35 100644
--- a/internal/core/src/query/Search.h
+++ b/internal/core/src/query/SearchOnGrowing.h
@@ -20,23 +20,13 @@ namespace milvus::query {
 using BitmapChunk = boost::dynamic_bitset<>;
 using BitmapSimple = std::deque<BitmapChunk>;
 
-// TODO: merge these two search into one
-// note: c++17 don't support optional ref
-Status
-FloatSearch(const segcore::SegmentGrowingImpl& segment,
-            const QueryInfo& info,
-            const float* query_data,
-            int64_t num_queries,
-            Timestamp timestamp,
-            const faiss::BitsetView& bitset,
-            QueryResult& results);
-
-Status
-BinarySearch(const segcore::SegmentGrowingImpl& segment,
-             const query::QueryInfo& info,
-             const uint8_t* query_data,
-             int64_t num_queries,
-             Timestamp timestamp,
-             const faiss::BitsetView& bitset,
-             QueryResult& results);
+template <typename VectorType>
+void
+SearchOnGrowing(const segcore::SegmentGrowingImpl& segment,
+                const query::QueryInfo& info,
+                const EmbeddedType<VectorType>* query_data,
+                int64_t num_queries,
+                Timestamp timestamp,
+                const faiss::BitsetView& bitset,
+                QueryResult& results);
 }  // namespace milvus::query
diff --git a/internal/core/src/query/SearchOnSealed.h b/internal/core/src/query/SearchOnSealed.h
index 01f3864e0..227f1a15c 100644
--- a/internal/core/src/query/SearchOnSealed.h
+++ b/internal/core/src/query/SearchOnSealed.h
@@ -13,7 +13,7 @@
 
 #include "segcore/SealedIndexingRecord.h"
 #include "query/PlanNode.h"
-#include "query/Search.h"
+#include "query/SearchOnGrowing.h"
 
 namespace milvus::query {
 
diff --git a/internal/core/src/query/SubQueryResult.h b/internal/core/src/query/SubQueryResult.h
index 6cf7aace5..7bb498039 100644
--- a/internal/core/src/query/SubQueryResult.h
+++ b/internal/core/src/query/SubQueryResult.h
@@ -33,7 +33,7 @@ class SubQueryResult {
 
     static constexpr bool
     is_descending(MetricType metric_type) {
-        // TODO
+        // TODO(dog): more types
         if (metric_type == MetricType::METRIC_INNER_PRODUCT) {
             return true;
         } else {
diff --git a/internal/core/src/query/generated/ExecPlanNodeVisitor.h b/internal/core/src/query/generated/ExecPlanNodeVisitor.h
index 2cd629edf..08ac2f694 100644
--- a/internal/core/src/query/generated/ExecPlanNodeVisitor.h
+++ b/internal/core/src/query/generated/ExecPlanNodeVisitor.h
@@ -46,6 +46,11 @@ class ExecPlanNodeVisitor : public PlanNodeVisitor {
         return ret;
     }
 
+ private:
+    template <typename VectorType>
+    void
+    VectorVisitorImpl(VectorPlanNode& node);
+
  private:
     // std::optional<RetType> ret_;
     const segcore::SegmentGrowing& segment_;
diff --git a/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp b/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp
index fbe99079c..e43837a06 100644
--- a/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp
+++ b/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp
@@ -16,7 +16,7 @@
 #include "query/generated/ExecPlanNodeVisitor.h"
 #include "segcore/SegmentGrowingImpl.h"
 #include "query/generated/ExecExprVisitor.h"
-#include "query/Search.h"
+#include "query/SearchOnGrowing.h"
 #include "query/SearchOnSealed.h"
 
 namespace milvus::query {
@@ -45,6 +45,11 @@ class ExecPlanNodeVisitor : PlanNodeVisitor {
         return ret;
     }
 
+ private:
+    template <typename VectorType>
+    void
+    VectorVisitorImpl(VectorPlanNode& node);
+
  private:
     // std::optional<RetType> ret_;
     const segcore::SegmentGrowing& segment_;
@@ -56,15 +61,16 @@ class ExecPlanNodeVisitor : PlanNodeVisitor {
 }  // namespace impl
 #endif
 
+template <typename VectorType>
 void
-ExecPlanNodeVisitor::visit(FloatVectorANNS& node) {
+ExecPlanNodeVisitor::VectorVisitorImpl(VectorPlanNode& node) {
     // TODO: optimize here, remove the dynamic cast
     assert(!ret_.has_value());
     auto segment = dynamic_cast<const segcore::SegmentGrowingImpl*>(&segment_);
     AssertInfo(segment, "support SegmentSmallIndex Only");
     RetType ret;
     auto& ph = placeholder_group_.at(0);
-    auto src_data = ph.get_blob<float>();
+    auto src_data = ph.get_blob<EmbeddedType<VectorType>>();
     auto num_queries = ph.num_of_queries_;
 
     aligned_vector<uint8_t> bitset_holder;
@@ -80,39 +86,20 @@ ExecPlanNodeVisitor::visit(FloatVectorANNS& node) {
         SearchOnSealed(segment->get_schema(), sealed_indexing, node.query_info_, src_data, num_queries, timestamp_,
                        view, ret);
     } else {
-        FloatSearch(*segment, node.query_info_, src_data, num_queries, timestamp_, view, ret);
+        SearchOnGrowing<VectorType>(*segment, node.query_info_, src_data, num_queries, timestamp_, view, ret);
     }
 
     ret_ = ret;
 }
 
 void
-ExecPlanNodeVisitor::visit(BinaryVectorANNS& node) {
-    // TODO: optimize here, remove the dynamic cast
-    assert(!ret_.has_value());
-    auto segment = dynamic_cast<const segcore::SegmentGrowingImpl*>(&segment_);
-    AssertInfo(segment, "support SegmentSmallIndex Only");
-    RetType ret;
-    auto& ph = placeholder_group_.at(0);
-    auto src_data = ph.get_blob<uint8_t>();
-    auto num_queries = ph.num_of_queries_;
-
-    aligned_vector<uint8_t> bitset_holder;
-    BitsetView view;
-    if (node.predicate_.has_value()) {
-        ExecExprVisitor::RetType expr_ret = ExecExprVisitor(*segment).call_child(*node.predicate_.value());
-        bitset_holder = AssembleNegBitmap(expr_ret);
-        view = BitsetView(bitset_holder.data(), bitset_holder.size() * 8);
-    }
+ExecPlanNodeVisitor::visit(FloatVectorANNS& node) {
+    VectorVisitorImpl<FloatVector>(node);
+}
 
-    auto& sealed_indexing = segment->get_sealed_indexing_record();
-    if (sealed_indexing.is_ready(node.query_info_.field_offset_)) {
-        SearchOnSealed(segment->get_schema(), sealed_indexing, node.query_info_, src_data, num_queries, timestamp_,
-                       view, ret);
-    } else {
-        BinarySearch(*segment, node.query_info_, src_data, num_queries, timestamp_, view, ret);
-    }
-    ret_ = ret;
+void
+ExecPlanNodeVisitor::visit(BinaryVectorANNS& node) {
+    VectorVisitorImpl<BinaryVector>(node);
 }
 
 }  // namespace milvus::query
diff --git a/internal/core/src/segcore/ConcurrentVector.h b/internal/core/src/segcore/ConcurrentVector.h
index f883d0bc3..87cfd76ac 100644
--- a/internal/core/src/segcore/ConcurrentVector.h
+++ b/internal/core/src/segcore/ConcurrentVector.h
@@ -39,7 +39,6 @@ class ThreadSafeVector {
         if (size <= size_) {
             return;
         }
-        // TODO: use multithread to speedup
         std::lock_guard lck(mutex_);
         while (vec_.size() < size) {
             vec_.emplace_back(std::forward<Args...>(args...));
diff --git a/internal/core/src/segcore/IndexingEntry.cpp b/internal/core/src/segcore/IndexingEntry.cpp
index 16545544c..1cd640595 100644
--- a/internal/core/src/segcore/IndexingEntry.cpp
+++ b/internal/core/src/segcore/IndexingEntry.cpp
@@ -17,8 +17,6 @@
 namespace milvus::segcore {
 void
 VecIndexingEntry::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const VectorBase* vec_base) {
-    // TODO
-
     assert(field_meta_.get_data_type() == DataType::VECTOR_FLOAT);
     auto dim = field_meta_.get_dim();
 
@@ -31,7 +29,6 @@ VecIndexingEntry::BuildIndexRange(int64_t ack_beg, int64_t ack_end, const Vector
     for (int chunk_id = ack_beg; chunk_id < ack_end; chunk_id++) {
         const auto& chunk = source->get_chunk(chunk_id);
         // build index for chunk
-        // TODO
         auto indexing = std::make_unique<knowhere::IVF>();
         auto dataset = knowhere::GenDataset(source->get_chunk_size(), dim, chunk.data());
         indexing->Train(dataset, conf);
diff --git a/internal/core/src/segcore/IndexingEntry.h b/internal/core/src/segcore/IndexingEntry.h
index a4cde732a..f790f5e1a 100644
--- a/internal/core/src/segcore/IndexingEntry.h
+++ b/internal/core/src/segcore/IndexingEntry.h
@@ -47,6 +47,9 @@ class IndexingEntry {
         return chunk_size_;
     }
 
+    virtual knowhere::Index*
+    get_indexing(int64_t chunk_id) const = 0;
+
  protected:
     // additional info
     const FieldMeta& field_meta_;
@@ -62,7 +65,7 @@ class ScalarIndexingEntry : public IndexingEntry {
 
     // concurrent
     knowhere::scalar::StructuredIndex<T>*
-    get_indexing(int64_t chunk_id) const {
+    get_indexing(int64_t chunk_id) const override {
         Assert(!field_meta_.is_vector());
         return data_.at(chunk_id).get();
     }
@@ -80,7 +83,7 @@ class VecIndexingEntry : public IndexingEntry {
 
     // concurrent
     knowhere::VecIndex*
-    get_vec_indexing(int64_t chunk_id) const {
+    get_indexing(int64_t chunk_id) const override {
         Assert(field_meta_.is_vector());
         return data_.at(chunk_id).get();
     }
diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h
index d3f05972b..15d129c35 100644
--- a/internal/core/src/segcore/SegmentGrowingImpl.h
+++ b/internal/core/src/segcore/SegmentGrowingImpl.h
@@ -39,8 +39,6 @@ class SegmentGrowingImpl : public SegmentGrowing {
     int64_t
     PreInsert(int64_t size) override;
 
-    // TODO: originally, id should be put into data_chunk
-    // TODO: Is it ok to put them the other side?
     Status
     Insert(int64_t reserved_offset,
            int64_t size,
@@ -95,6 +93,22 @@ class SegmentGrowingImpl : public SegmentGrowing {
         return *schema_;
     }
 
+    // return count of index that has index, i.e., [0, num_chunk_index) have built index
+    int64_t
+    num_chunk_index_safe(FieldOffset field_offset) const final {
+        return indexing_record_.get_finished_ack();
+    }
+
+    const knowhere::Index*
+    chunk_index_impl(FieldOffset field_offset, int64_t chunk_id) const final {
+        return indexing_record_.get_entry(field_offset).get_indexing(chunk_id);
+    }
+
+    int64_t
+    chunk_size() const final {
+        return chunk_size_;
+    }
+
  public:
     ssize_t
     get_row_count() const override {
diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h
index 4ad2177e3..34c18b53e 100644
--- a/internal/core/src/segcore/SegmentInterface.h
+++ b/internal/core/src/segcore/SegmentInterface.h
@@ -14,6 +14,8 @@
 #include "common/Schema.h"
 #include "query/Plan.h"
 #include "common/Span.h"
+#include "IndexingEntry.h"
+#include <knowhere/index/vector_index/VecIndex.h>
 
 namespace milvus::segcore {
 
@@ -52,10 +54,30 @@ class SegmentInternalInterface : public SegmentInterface {
         return static_cast<Span<T>>(chunk_data_impl(field_offset, chunk_id));
     }
 
+    virtual int64_t
+    num_chunk_index_safe(FieldOffset field_offset) const = 0;
+
+    template <typename T>
+    const knowhere::scalar::StructuredIndex<T>&
+    chunk_scalar_index(FieldOffset field_offset, int64_t chunk_id) const {
+        static_assert(IsScalar<T>);
+        using IndexType = knowhere::scalar::StructuredIndex<T>;
+        auto base_ptr = chunk_index_impl(field_offset, chunk_id);
+        auto ptr = dynamic_cast<const IndexType*>(base_ptr);
+        AssertInfo(ptr, "entry mismatch");
+        return *ptr;
+    }
+
+    virtual int64_t
+    chunk_size() const = 0;
+
  protected:
     // blob and row_count
     virtual SpanBase
     chunk_data_impl(FieldOffset field_offset, int64_t chunk_id) const = 0;
+
+    virtual const knowhere::Index*
+    chunk_index_impl(FieldOffset field_offset, int64_t chunk_id) const = 0;
 };
 
 }  // namespace milvus::segcore
diff --git a/internal/core/src/segcore/collection_c.cpp b/internal/core/src/segcore/collection_c.cpp
index feb5fc7bf..34476504c 100644
--- a/internal/core/src/segcore/collection_c.cpp
+++ b/internal/core/src/segcore/collection_c.cpp
@@ -19,7 +19,6 @@ NewCollection(const char* schema_proto_blob) {
 
     auto collection = std::make_unique<milvus::segcore::Collection>(proto);
 
-    // TODO: delete print
     std::cout << "create collection " << collection->get_collection_name() << std::endl;
 
     return (void*)collection.release();
@@ -29,8 +28,8 @@ void
 DeleteCollection(CCollection collection) {
     auto col = (milvus::segcore::Collection*)collection;
 
-    // TODO: delete print
     std::cout << "delete collection " << col->get_collection_name() << std::endl;
+
     delete col;
 }
 
diff --git a/internal/core/src/segcore/segment_c.cpp b/internal/core/src/segcore/segment_c.cpp
index d7e6d3663..523af1842 100644
--- a/internal/core/src/segcore/segment_c.cpp
+++ b/internal/core/src/segcore/segment_c.cpp
@@ -27,7 +27,6 @@ NewSegment(CCollection collection, uint64_t segment_id) {
 
     auto segment = milvus::segcore::CreateGrowingSegment(col->get_schema());
 
-    // TODO: delete print
     std::cout << "create segment " << segment_id << std::endl;
     return (void*)segment.release();
 }
@@ -36,7 +35,6 @@ void
 DeleteSegment(CSegmentBase segment) {
     auto s = (milvus::segcore::SegmentGrowing*)segment;
 
-    // TODO: delete print
     std::cout << "delete segment " << std::endl;
     delete s;
 }
@@ -78,17 +76,12 @@ Insert(CSegmentBase c_segment,
         status.error_msg = strdup(e.what());
         return status;
     }
-
-    // TODO: delete print
-    // std::cout << "do segment insert, sizeof_per_row = " << sizeof_per_row << std::endl;
 }
 
 int64_t
 PreInsert(CSegmentBase c_segment, int64_t size) {
     auto segment = (milvus::segcore::SegmentGrowing*)c_segment;
 
-    // TODO: delete print
-    // std::cout << "PreInsert segment " << std::endl;
     return segment->PreInsert(size);
 }
 
@@ -116,8 +109,6 @@ int64_t
 PreDelete(CSegmentBase c_segment, int64_t size) {
     auto segment = (milvus::segcore::SegmentGrowing*)c_segment;
 
-    // TODO: delete print
-    // std::cout << "PreDelete segment " << std::endl;
     return segment->PreDelete(size);
 }
 
-- 
GitLab