diff --git a/internal/core/src/common/FieldMeta.h b/internal/core/src/common/FieldMeta.h index 00db216cf1bba5232697ca74804d3763f50cf9b7..f1967facac54ac0b176e1cc164035c0bd2e309c2 100644 --- a/internal/core/src/common/FieldMeta.h +++ b/internal/core/src/common/FieldMeta.h @@ -84,12 +84,12 @@ datatype_is_vector(DataType datatype) { struct FieldMeta { public: - FieldMeta(std::string_view name, DataType type) : name_(name), type_(type) { + FieldMeta(const FieldName& name, FieldId id, DataType type) : name_(name), id_(id), type_(type) { Assert(!is_vector()); } - FieldMeta(std::string_view name, DataType type, int64_t dim, MetricType metric_type) - : name_(name), type_(type), vector_info_(VectorInfo{dim, metric_type}) { + FieldMeta(const FieldName& name, FieldId id, DataType type, int64_t dim, MetricType metric_type) + : name_(name), id_(id), type_(type), vector_info_(VectorInfo{dim, metric_type}) { Assert(is_vector()); } @@ -113,11 +113,16 @@ struct FieldMeta { return vector_info_->metric_type_; } - const std::string& + const FieldName& get_name() const { return name_; } + const FieldId& + get_id() const { + return id_; + } + DataType get_data_type() const { return type_; @@ -137,7 +142,8 @@ struct FieldMeta { int64_t dim_; MetricType metric_type_; }; - std::string name_; + FieldName name_; + FieldId id_; DataType type_ = DataType::NONE; std::optional<VectorInfo> vector_info_; }; diff --git a/internal/core/src/common/Schema.cpp b/internal/core/src/common/Schema.cpp index c7b91a7b4c03d2bd0d1873a2382bd25489944870..dc3e69bcbade5f706c1db5ced4051a72021c8d61 100644 --- a/internal/core/src/common/Schema.cpp +++ b/internal/core/src/common/Schema.cpp @@ -35,6 +35,7 @@ Schema::ParseFrom(const milvus::proto::schema::CollectionSchema& schema_proto) { std::set<std::string> system_field_names = {"RowID", "Timestamp"}; for (const milvus::proto::schema::FieldSchema& child : schema_proto.fields()) { + auto field_offset = FieldOffset(schema->size()); auto field_id = child.fieldid(); auto name = child.name(); if (field_id < 100) { @@ -47,7 +48,7 @@ Schema::ParseFrom(const milvus::proto::schema::CollectionSchema& schema_proto) { if (child.is_primary_key()) { AssertInfo(!schema->primary_key_offset_opt_.has_value(), "repetitive primary key"); - schema->primary_key_offset_opt_ = schema->size(); + schema->primary_key_offset_opt_ = field_offset; } if (datatype_is_vector(data_type)) { @@ -63,9 +64,9 @@ Schema::ParseFrom(const milvus::proto::schema::CollectionSchema& schema_proto) { auto dim = boost::lexical_cast<int64_t>(type_map.at("dim")); AssertInfo(index_map.count("metric_type"), "index not found"); auto metric_type = GetMetricType(index_map.at("metric_type")); - schema->AddField(name, data_type, dim, metric_type); + schema->AddField(FieldName(name), FieldId(field_id), data_type, dim, metric_type); } else { - schema->AddField(name, data_type); + schema->AddField(FieldName(name), FieldId(field_id), data_type); } } return schema; diff --git a/internal/core/src/common/Schema.h b/internal/core/src/common/Schema.h index 7e1f75af691aef8587cd5b1d73661317bf911a4a..b7bf8584719af83ee7cfb3f346cae4f94202e2a3 100644 --- a/internal/core/src/common/Schema.h +++ b/internal/core/src/common/Schema.h @@ -24,14 +24,32 @@ namespace milvus { class Schema { public: void - AddField(std::string_view field_name, DataType data_type) { - auto field_meta = FieldMeta(field_name, data_type); + AddDebugField(const std::string& name, DataType data_type) { + static int64_t debug_id = 1000; + this->AddField(FieldName(name), FieldId(debug_id), data_type); + debug_id++; + } + + // auto gen field_id for convenience + void + AddDebugField(const std::string& name, DataType data_type, int64_t dim, MetricType metric_type) { + static int64_t debug_id = 2000; + auto field_meta = FieldMeta(FieldName(name), FieldId(debug_id), data_type, dim, metric_type); + debug_id++; this->AddField(std::move(field_meta)); } + // scalar type void - AddField(std::string_view field_name, DataType data_type, int64_t dim, MetricType metric_type) { - auto field_meta = FieldMeta(field_name, data_type, dim, metric_type); + AddField(const FieldName& name, const FieldId id, DataType data_type) { + auto field_meta = FieldMeta(name, id, data_type); + this->AddField(std::move(field_meta)); + } + + // vector type + void + AddField(const FieldName& name, const FieldId id, DataType data_type, int64_t dim, MetricType metric_type) { + auto field_meta = FieldMeta(name, id, data_type, dim, metric_type); this->AddField(std::move(field_meta)); } @@ -61,10 +79,10 @@ class Schema { } const FieldMeta& - operator[](int field_index) const { - Assert(field_index >= 0); - Assert(field_index < fields_.size()); - return fields_[field_index]; + operator[](FieldOffset field_offset) const { + Assert(field_offset.get() >= 0); + Assert(field_offset.get() < fields_.size()); + return fields_[field_offset.get()]; } auto @@ -72,34 +90,46 @@ class Schema { return total_sizeof_; } - const std::vector<int>& + const std::vector<int64_t>& get_sizeof_infos() const { return sizeof_infos_; } - std::optional<int> - get_offset(const std::string& field_name) const { - if (!offsets_.count(field_name)) { + [[deprecated]] std::optional<FieldOffset> + get_offset_opt(const FieldName& field_name) const { + if (!name_offsets_.count(field_name)) { return std::nullopt; } else { - return offsets_.at(field_name); + return name_offsets_.at(field_name); } } + FieldOffset + get_offset(const FieldName& field_name) const { + Assert(name_offsets_.count(field_name)); + return name_offsets_.at(field_name); + } + + FieldOffset + get_offset(const FieldId& field_id) const { + Assert(id_offsets_.count(field_id)); + return id_offsets_.at(field_id); + } + const std::vector<FieldMeta>& get_fields() const { return fields_; } const FieldMeta& - operator[](const std::string& field_name) const { - auto offset_iter = offsets_.find(field_name); - AssertInfo(offset_iter != offsets_.end(), "Cannot found field_name: " + field_name); + operator[](const FieldName& field_name) const { + auto offset_iter = name_offsets_.find(field_name); + AssertInfo(offset_iter != name_offsets_.end(), "Cannot find field_name: " + field_name.get()); auto offset = offset_iter->second; return (*this)[offset]; } - std::optional<int> + std::optional<FieldOffset> get_primary_key_offset() const { return primary_key_offset_opt_; } @@ -112,7 +142,10 @@ class Schema { AddField(FieldMeta&& field_meta) { auto offset = fields_.size(); fields_.emplace_back(field_meta); - offsets_.emplace(field_meta.get_name(), offset); + AssertInfo(!name_offsets_.count(field_meta.get_name()), "duplicated field name"); + name_offsets_.emplace(field_meta.get_name(), offset); + AssertInfo(!id_offsets_.count(field_meta.get_id()), "duplicated field id"); + id_offsets_.emplace(field_meta.get_id(), offset); auto field_sizeof = field_meta.get_sizeof(); sizeof_infos_.push_back(std::move(field_sizeof)); total_sizeof_ += field_sizeof; @@ -123,11 +156,12 @@ class Schema { std::vector<FieldMeta> fields_; // a mapping for random access - std::unordered_map<std::string, int> offsets_; - std::vector<int> sizeof_infos_; + std::unordered_map<FieldName, FieldOffset> name_offsets_; // field_name -> offset + std::unordered_map<FieldId, FieldOffset> id_offsets_; // field_id -> offset + std::vector<int64_t> sizeof_infos_; int total_sizeof_ = 0; bool is_auto_id_ = true; - std::optional<int> primary_key_offset_opt_; + std::optional<FieldOffset> primary_key_offset_opt_; }; using SchemaPtr = std::shared_ptr<Schema>; diff --git a/internal/core/src/common/Types.h b/internal/core/src/common/Types.h index 9d36a9c505920e73270040413b3f9a0b11529c8b..693284faf6b89f8342d1cf6a7e0ec2d456858841 100644 --- a/internal/core/src/common/Types.h +++ b/internal/core/src/common/Types.h @@ -16,6 +16,7 @@ #include <boost/align/aligned_allocator.hpp> #include <memory> #include <vector> +#include <NamedType/named_type.hpp> namespace milvus { using Timestamp = uint64_t; // TODO: use TiKV-like timestamp @@ -70,4 +71,8 @@ struct QueryResult { using QueryResultPtr = std::shared_ptr<QueryResult>; +using FieldId = fluent::NamedType<int64_t, struct FieldIdTag, fluent::Comparable, fluent::Hashable>; +using FieldName = fluent::NamedType<std::string, struct FieldNameTag, fluent::Comparable, fluent::Hashable>; +using FieldOffset = fluent::NamedType<int64_t, struct FieldOffsetTag, fluent::Comparable, fluent::Hashable>; + } // namespace milvus diff --git a/internal/core/src/query/Expr.h b/internal/core/src/query/Expr.h index 85cf4db42f358be7b4870220e69ab7ea56a505a0..255e31fcdd8455111ca3a6a443af7c1d9b3f9a3a 100644 --- a/internal/core/src/query/Expr.h +++ b/internal/core/src/query/Expr.h @@ -61,10 +61,8 @@ struct BoolBinaryExpr : BinaryExpr { accept(ExprVisitor&) override; }; -using FieldId = std::string; - struct TermExpr : Expr { - FieldId field_id_; + FieldOffset field_offset_; DataType data_type_ = DataType::NONE; // std::vector<std::any> terms_; @@ -78,7 +76,7 @@ struct TermExpr : Expr { }; struct RangeExpr : Expr { - FieldId field_id_; + FieldOffset field_offset_; DataType data_type_ = DataType::NONE; enum class OpType { GreaterThan = 0, GreaterEqual = 1, LessThan = 2, LessEqual = 3, Equal, NotEqual }; static const std::map<std::string, OpType> mapping_; // op_name -> op diff --git a/internal/core/src/query/Plan.cpp b/internal/core/src/query/Plan.cpp index d2a8bf04b33237bea0770dbd7d9a576e6838f4d0..5dc83a4b3a6f33f2bb7597c9b4c576ff81424a59 100644 --- a/internal/core/src/query/Plan.cpp +++ b/internal/core/src/query/Plan.cpp @@ -87,15 +87,15 @@ class Parser { template <typename T> ExprPtr - ParseRangeNodeImpl(const std::string& field_name, const Json& body); + ParseRangeNodeImpl(const FieldName& field_name, const Json& body); template <typename T> ExprPtr - ParseTermNodeImpl(const std::string& field_name, const Json& body); + ParseTermNodeImpl(const FieldName& field_name, const Json& body); private: const Schema& schema; - std::map<std::string, FieldId> tag2field_; // PlaceholderName -> FieldId + std::map<std::string, FieldOffset> tag2field_; // PlaceholderName -> field offset std::optional<std::unique_ptr<VectorPlanNode>> vector_node_opt_; }; @@ -104,7 +104,7 @@ Parser::ParseRangeNode(const Json& out_body) { Assert(out_body.is_object()); Assert(out_body.size() == 1); auto out_iter = out_body.begin(); - auto field_name = out_iter.key(); + auto field_name = FieldName(out_iter.key()); auto body = out_iter.value(); auto data_type = schema[field_name].get_data_type(); Assert(!datatype_is_vector(data_type)); @@ -152,7 +152,7 @@ ExprPtr Parser::ParseTermNode(const Json& out_body) { Assert(out_body.size() == 1); auto out_iter = out_body.begin(); - auto field_name = out_iter.key(); + auto field_name = FieldName(out_iter.key()); auto body = out_iter.value(); auto data_type = schema[field_name].get_data_type(); Assert(!datatype_is_vector(data_type)); @@ -190,7 +190,7 @@ Parser::ParseVecNode(const Json& out_body) { // TODO add binary info Assert(out_body.size() == 1); auto iter = out_body.begin(); - std::string field_name = iter.key(); + auto field_name = FieldName(iter.key()); auto& vec_info = iter.value(); Assert(vec_info.is_object()); @@ -198,8 +198,7 @@ Parser::ParseVecNode(const Json& out_body) { AssertInfo(topK > 0, "topK must greater than 0"); AssertInfo(topK < 16384, "topK is too large"); - auto field_offset_opt = schema.get_offset(field_name); - AssertInfo(field_offset_opt.has_value(), "field_name(" + field_name + ") not found"); + auto field_offset = schema.get_offset(field_name); auto vec_node = [&]() -> std::unique_ptr<VectorPlanNode> { auto field_meta = schema.operator[](field_name); @@ -213,24 +212,24 @@ Parser::ParseVecNode(const Json& out_body) { vec_node->query_info_.topK_ = topK; vec_node->query_info_.metric_type_ = vec_info.at("metric_type"); vec_node->query_info_.search_params_ = vec_info.at("params"); - vec_node->query_info_.field_id_ = field_name; - vec_node->query_info_.field_offset_ = field_offset_opt.value(); + vec_node->query_info_.field_offset_ = field_offset; vec_node->placeholder_tag_ = vec_info.at("query"); auto tag = vec_node->placeholder_tag_; AssertInfo(!tag2field_.count(tag), "duplicated placeholder tag"); - tag2field_.emplace(tag, field_name); + tag2field_.emplace(tag, field_offset); return vec_node; } template <typename T> ExprPtr -Parser::ParseTermNodeImpl(const std::string& field_name, const Json& body) { +Parser::ParseTermNodeImpl(const FieldName& field_name, const Json& body) { auto expr = std::make_unique<TermExprImpl<T>>(); + auto field_offset = schema.get_offset(field_name); auto data_type = schema[field_name].get_data_type(); Assert(body.is_object()); auto values = body["values"]; - expr->field_id_ = field_name; + expr->field_offset_ = field_offset; expr->data_type_ = data_type; for (auto& value : values) { if constexpr (std::is_same_v<T, bool>) { @@ -252,11 +251,12 @@ Parser::ParseTermNodeImpl(const std::string& field_name, const Json& body) { template <typename T> ExprPtr -Parser::ParseRangeNodeImpl(const std::string& field_name, const Json& body) { +Parser::ParseRangeNodeImpl(const FieldName& field_name, const Json& body) { auto expr = std::make_unique<RangeExprImpl<T>>(); - auto data_type = schema[field_name].get_data_type(); + auto field_meta = schema[field_name]; + auto data_type = field_meta.get_data_type(); expr->data_type_ = data_type; - expr->field_id_ = field_name; + expr->field_offset_ = schema.get_offset(field_name); Assert(body.is_object()); for (auto& item : body.items()) { auto op_name = boost::algorithm::to_lower_copy(std::string(item.key())); @@ -291,8 +291,8 @@ ParsePlaceholderGroup(const Plan* plan, const std::string& blob) { Placeholder element; element.tag_ = info.tag(); Assert(plan->tag2field_.count(element.tag_)); - auto field_id = plan->tag2field_.at(element.tag_); - auto& field_meta = plan->schema_[field_id]; + auto field_offset = plan->tag2field_.at(element.tag_); + auto& field_meta = plan->schema_[field_offset]; element.num_of_queries_ = info.values_size(); AssertInfo(element.num_of_queries_, "must have queries"); Assert(element.num_of_queries_ > 0); diff --git a/internal/core/src/query/PlanImpl.h b/internal/core/src/query/PlanImpl.h index 1d33eb18ae0aa1abc916b3986587c74af75b6a07..e09039e06fbe1fd39488f24eaab39987a5b501fa 100644 --- a/internal/core/src/query/PlanImpl.h +++ b/internal/core/src/query/PlanImpl.h @@ -33,7 +33,7 @@ struct Plan { public: const Schema& schema_; std::unique_ptr<VectorPlanNode> plan_node_; - std::map<std::string, FieldId> tag2field_; // PlaceholderName -> FieldId + std::map<std::string, FieldOffset> tag2field_; // PlaceholderName -> FieldOffset std::vector<std::string> target_entries_; // TODO: add move extra info }; diff --git a/internal/core/src/query/PlanNode.h b/internal/core/src/query/PlanNode.h index 302f618058fd65b39cc6f5572dbe1e489e6be14d..bbe5e9d35d6c32626f4ff3647c42ffa2324c0f00 100644 --- a/internal/core/src/query/PlanNode.h +++ b/internal/core/src/query/PlanNode.h @@ -40,8 +40,7 @@ using PlanNodePtr = std::unique_ptr<PlanNode>; struct QueryInfo { int64_t topK_; - FieldId field_id_; - int64_t field_offset_; + FieldOffset field_offset_; std::string metric_type_; // TODO: use enum nlohmann::json search_params_; }; diff --git a/internal/core/src/query/Search.cpp b/internal/core/src/query/Search.cpp index e27863d258578ca0b08cd1241657e273c8934c27..f812c9fd6f906023408d519219ff9e81a25cda4a 100644 --- a/internal/core/src/query/Search.cpp +++ b/internal/core/src/query/Search.cpp @@ -57,9 +57,7 @@ FloatSearch(const segcore::SegmentSmallIndex& segment, // step 2.1: get meta // step 2.2: get which vector field to search - auto vecfield_offset_opt = schema.get_offset(info.field_id_); - Assert(vecfield_offset_opt.has_value()); - auto vecfield_offset = vecfield_offset_opt.value(); + auto vecfield_offset = info.field_offset_; auto& field = schema[vecfield_offset]; Assert(field.get_data_type() == DataType::VECTOR_FLOAT); @@ -154,9 +152,7 @@ BinarySearch(const segcore::SegmentSmallIndex& segment, // step 2.1: get meta // step 2.2: get which vector field to search - auto vecfield_offset_opt = schema.get_offset(info.field_id_); - Assert(vecfield_offset_opt.has_value()); - auto vecfield_offset = vecfield_offset_opt.value(); + auto vecfield_offset = info.field_offset_; auto& field = schema[vecfield_offset]; Assert(field.get_data_type() == DataType::VECTOR_BINARY); diff --git a/internal/core/src/query/SearchOnSealed.cpp b/internal/core/src/query/SearchOnSealed.cpp index 94d111f3eb227d53cd85e3e32e00cd95c57c8f32..58ffed4f29ae336386a136a64cfba202e02036f9 100644 --- a/internal/core/src/query/SearchOnSealed.cpp +++ b/internal/core/src/query/SearchOnSealed.cpp @@ -61,7 +61,7 @@ SearchOnSealed(const Schema& schema, // Assert(field.get_data_type() == DataType::VECTOR_FLOAT); auto dim = field.get_dim(); - Assert(record.test_readiness(field_offset)); + Assert(record.is_ready(field_offset)); auto indexing_entry = record.get_entry(field_offset); std::cout << " SearchOnSealed, indexing_entry->metric:" << indexing_entry->metric_type_ << std::endl; std::cout << " SearchOnSealed, query_info.metric_type_:" << query_info.metric_type_ << std::endl; diff --git a/internal/core/src/query/visitors/ExecExprVisitor.cpp b/internal/core/src/query/visitors/ExecExprVisitor.cpp index 7097245c710a802a99c73783a8adb8e08bfc0a5e..a15214c0c26d72b24032d2ceb49cc5f7547f1b94 100644 --- a/internal/core/src/query/visitors/ExecExprVisitor.cpp +++ b/internal/core/src/query/visitors/ExecExprVisitor.cpp @@ -121,9 +121,7 @@ ExecExprVisitor::ExecRangeVisitorImpl(RangeExprImpl<T>& expr, IndexFunc index_fu auto& records = segment_.get_insert_record(); auto data_type = expr.data_type_; auto& schema = segment_.get_schema(); - auto field_offset_opt = schema.get_offset(expr.field_id_); - Assert(field_offset_opt); - auto field_offset = field_offset_opt.value(); + auto field_offset = expr.field_offset_; auto& field_meta = schema[field_offset]; auto vec_ptr = records.get_entity<T>(field_offset); auto& vec = *vec_ptr; @@ -234,7 +232,7 @@ ExecExprVisitor::ExecRangeVisitorDispatcher(RangeExpr& expr_raw) -> RetType { void ExecExprVisitor::visit(RangeExpr& expr) { - auto& field_meta = segment_.get_schema()[expr.field_id_]; + auto& field_meta = segment_.get_schema()[expr.field_offset_]; Assert(expr.data_type_ == field_meta.get_data_type()); RetType ret; switch (expr.data_type_) { @@ -279,9 +277,8 @@ ExecExprVisitor::ExecTermVisitorImpl(TermExpr& expr_raw) -> RetType { auto& records = segment_.get_insert_record(); auto data_type = expr.data_type_; auto& schema = segment_.get_schema(); - auto field_offset_opt = schema.get_offset(expr.field_id_); - Assert(field_offset_opt); - auto field_offset = field_offset_opt.value(); + + auto field_offset = expr_raw.field_offset_; auto& field_meta = schema[field_offset]; auto vec_ptr = records.get_entity<T>(field_offset); auto& vec = *vec_ptr; @@ -310,7 +307,7 @@ ExecExprVisitor::ExecTermVisitorImpl(TermExpr& expr_raw) -> RetType { void ExecExprVisitor::visit(TermExpr& expr) { - auto& field_meta = segment_.get_schema()[expr.field_id_]; + auto& field_meta = segment_.get_schema()[expr.field_offset_]; Assert(expr.data_type_ == field_meta.get_data_type()); RetType ret; switch (expr.data_type_) { diff --git a/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp b/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp index 946a6088b124d6d879b53f9e4e35eef447414c5c..a32347fa9c6c1db0d7fda0222d35e428fde929ce 100644 --- a/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp +++ b/internal/core/src/query/visitors/ExecPlanNodeVisitor.cpp @@ -75,7 +75,7 @@ ExecPlanNodeVisitor::visit(FloatVectorANNS& node) { auto& sealed_indexing = segment->get_sealed_indexing_record(); - if (sealed_indexing.test_readiness(node.query_info_.field_offset_)) { + if (sealed_indexing.is_ready(node.query_info_.field_offset_)) { SearchOnSealed(segment->get_schema(), sealed_indexing, node.query_info_, src_data, num_queries, timestamp_, bitset_pack, ret); } else { @@ -105,7 +105,7 @@ ExecPlanNodeVisitor::visit(BinaryVectorANNS& node) { } auto& sealed_indexing = segment->get_sealed_indexing_record(); - if (sealed_indexing.test_readiness(node.query_info_.field_offset_)) { + if (sealed_indexing.is_ready(node.query_info_.field_offset_)) { SearchOnSealed(segment->get_schema(), sealed_indexing, node.query_info_, src_data, num_queries, timestamp_, bitset_pack, ret); } else { diff --git a/internal/core/src/query/visitors/ShowExprVisitor.cpp b/internal/core/src/query/visitors/ShowExprVisitor.cpp index c9c52718546bfd9a209bf29ac84af3d8cd8af181..48e7af1c8605ef0d8371217777695fe9eedba91b 100644 --- a/internal/core/src/query/visitors/ShowExprVisitor.cpp +++ b/internal/core/src/query/visitors/ShowExprVisitor.cpp @@ -137,7 +137,7 @@ ShowExprVisitor::visit(TermExpr& expr) { }(); Json res{{"expr_type", "Term"}, - {"field_id", expr.field_id_}, + {"field_offset", expr.field_offset_.get()}, {"data_type", datatype_name(expr.data_type_)}, {"terms", std::move(terms)}}; @@ -184,7 +184,7 @@ ShowExprVisitor::visit(RangeExpr& expr) { }(); Json res{{"expr_type", "Range"}, - {"field_id", expr.field_id_}, + {"field_offset", expr.field_offset_.get()}, {"data_type", datatype_name(expr.data_type_)}, {"conditions", std::move(conditions)}}; ret_ = res; diff --git a/internal/core/src/query/visitors/ShowPlanNodeVisitor.cpp b/internal/core/src/query/visitors/ShowPlanNodeVisitor.cpp index 19dea7b40ed65f820c22521392e66e1758a9fe22..6154dee4c16645f9ef3d88dcf2a11ac54b4ab9c0 100644 --- a/internal/core/src/query/visitors/ShowPlanNodeVisitor.cpp +++ b/internal/core/src/query/visitors/ShowPlanNodeVisitor.cpp @@ -54,12 +54,12 @@ ShowPlanNodeVisitor::visit(FloatVectorANNS& node) { assert(!ret_); auto& info = node.query_info_; Json json_body{ - {"node_type", "FloatVectorANNS"}, // - {"metric_type", info.metric_type_}, // - {"field_id_", info.field_id_}, // - {"topK", info.topK_}, // - {"search_params", info.search_params_}, // - {"placeholder_tag", node.placeholder_tag_}, // + {"node_type", "FloatVectorANNS"}, // + {"metric_type", info.metric_type_}, // + {"field_offset_", info.field_offset_.get()}, // + {"topK", info.topK_}, // + {"search_params", info.search_params_}, // + {"placeholder_tag", node.placeholder_tag_}, // }; if (node.predicate_.has_value()) { ShowExprVisitor expr_show; @@ -76,12 +76,12 @@ ShowPlanNodeVisitor::visit(BinaryVectorANNS& node) { assert(!ret_); auto& info = node.query_info_; Json json_body{ - {"node_type", "BinaryVectorANNS"}, // - {"metric_type", info.metric_type_}, // - {"field_id_", info.field_id_}, // - {"topK", info.topK_}, // - {"search_params", info.search_params_}, // - {"placeholder_tag", node.placeholder_tag_}, // + {"node_type", "BinaryVectorANNS"}, // + {"metric_type", info.metric_type_}, // + {"field_offset_", info.field_offset_.get()}, // + {"topK", info.topK_}, // + {"search_params", info.search_params_}, // + {"placeholder_tag", node.placeholder_tag_}, // }; if (node.predicate_.has_value()) { ShowExprVisitor expr_show; diff --git a/internal/core/src/segcore/CMakeLists.txt b/internal/core/src/segcore/CMakeLists.txt index 1a011c984b690d92e62dc6bb172aa245cbfd140f..3646097a7a36d8e0c2978c0929c4ddcefa3719d2 100644 --- a/internal/core/src/segcore/CMakeLists.txt +++ b/internal/core/src/segcore/CMakeLists.txt @@ -1,8 +1,6 @@ set(SEGCORE_FILES - SegmentNaive.cpp SegmentSmallIndex.cpp - IndexMeta.cpp Collection.cpp collection_c.cpp segment_c.cpp diff --git a/internal/core/src/segcore/Collection.cpp b/internal/core/src/segcore/Collection.cpp index 7295d027071101fe40a67c67daaf9e82fe938283..c5e2a29d7fad77a167b6848561522e4f8e057d04 100644 --- a/internal/core/src/segcore/Collection.cpp +++ b/internal/core/src/segcore/Collection.cpp @@ -21,104 +21,7 @@ namespace milvus::segcore { Collection::Collection(const std::string& collection_proto) : schema_proto_(collection_proto) { parse(); - index_ = nullptr; } -#if 0 -void -Collection::AddIndex(const grpc::IndexParam& index_param) { - auto& index_name = index_param.index_name(); - auto& field_name = index_param.field_name(); - - Assert(!index_name.empty()); - Assert(!field_name.empty()); - - auto index_type = knowhere::IndexEnum::INDEX_FAISS_IVFPQ; - auto index_mode = knowhere::IndexMode::MODE_CPU; - knowhere::Config index_conf; - - bool found_index_type = false; - bool found_index_mode = false; - bool found_index_conf = false; - - auto extra_params = index_param.extra_params(); - for (auto& extra_param : extra_params) { - if (extra_param.key() == "index_type") { - index_type = extra_param.value().data(); - found_index_type = true; - continue; - } - if (extra_param.key() == "index_mode") { - auto index_mode_int = stoi(extra_param.value()); - if (index_mode_int == 0) { - found_index_mode = true; - continue; - } else if (index_mode_int == 1) { - index_mode = knowhere::IndexMode::MODE_GPU; - found_index_mode = true; - continue; - } else { - throw std::runtime_error("Illegal index mode, only 0 or 1 is supported."); - } - } - if (extra_param.key() == "params") { - index_conf = nlohmann::json::parse(extra_param.value()); - found_index_conf = true; - continue; - } - } - - if (!found_index_type) { - std::cout << "WARN: Not specify index type, use default index type: INDEX_FAISS_IVFPQ" << std::endl; - } - if (!found_index_mode) { - std::cout << "WARN: Not specify index mode, use default index mode: MODE_CPU" << std::endl; - } - if (!found_index_conf) { - int dim = 0; - - for (auto& field : schema_->get_fields()) { - if (field.get_data_type() == DataType::VECTOR_FLOAT) { - dim = field.get_dim(); - } - } - Assert(dim != 0); - - index_conf = milvus::knowhere::Config{ - {knowhere::meta::DIM, dim}, {knowhere::IndexParams::nlist, 100}, - {knowhere::IndexParams::nprobe, 4}, {knowhere::IndexParams::m, 4}, - {knowhere::IndexParams::nbits, 8}, {knowhere::Metric::TYPE, milvus::knowhere::Metric::L2}, - {knowhere::meta::DEVICEID, 0}, - }; - std::cout << "WARN: Not specify index config, use default index config" << std::endl; - } - - index_->AddEntry(index_name, field_name, index_type, index_mode, index_conf); -} - -void -Collection::CreateIndex(std::string& index_config) { - if (index_config.empty()) { - index_ = nullptr; - std::cout << "null index config when create index" << std::endl; - return; - } - - milvus::proto::etcd::CollectionMeta collection_meta; - auto suc = google::protobuf::TextFormat::ParseFromString(index_config, &collection_meta); - - if (!suc) { - std::cerr << "unmarshal index string failed" << std::endl; - } - - index_ = std::make_shared<IndexMeta>(schema_); - - // for (const auto& index : collection_meta.indexes()) { - // std::cout << "add index, index name =" << index.index_name() << ", field_name = " << index.field_name() - // << std::endl; - // AddIndex(index); - // } -} -#endif void Collection::parse() { @@ -126,8 +29,8 @@ Collection::parse() { // TODO: remove hard code use unittests are ready std::cout << "WARN: Use default schema" << std::endl; auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); - schema->AddField("age", DataType::INT32); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); + schema->AddDebugField("age", DataType::INT32); collection_name_ = "default-collection"; schema_ = schema; return; diff --git a/internal/core/src/segcore/Collection.h b/internal/core/src/segcore/Collection.h index 711e2ff986839be6877cfa0e0e968e24a29001a9..bc2cf8b0618904731c635ebe803c00b9aabe9648 100644 --- a/internal/core/src/segcore/Collection.h +++ b/internal/core/src/segcore/Collection.h @@ -12,7 +12,6 @@ #pragma once #include "common/Schema.h" -#include "IndexMeta.h" #include <string> #include <memory> @@ -31,18 +30,12 @@ class Collection { return schema_; } - IndexMetaPtr& - get_index() { - return index_; - } - const std::string& get_collection_name() { return collection_name_; } private: - IndexMetaPtr index_; std::string collection_name_; std::string schema_proto_; SchemaPtr schema_; diff --git a/internal/core/src/segcore/IndexingEntry.cpp b/internal/core/src/segcore/IndexingEntry.cpp index dea45419925236713e780bda95c8772a1672f5d1..947be92e2fd048ae4d0a824f246b676da1a50de9 100644 --- a/internal/core/src/segcore/IndexingEntry.cpp +++ b/internal/core/src/segcore/IndexingEntry.cpp @@ -75,8 +75,8 @@ IndexingRecord::UpdateResourceAck(int64_t chunk_ack, const InsertRecord& record) // std::thread([this, old_ack, chunk_ack, &record] { for (auto& [field_offset, entry] : entries_) { - auto vec_base = record.entity_vec_[field_offset].get(); - entry->BuildIndexRange(old_ack, chunk_ack, vec_base); + auto vec_base = record.get_base_entity(field_offset); + entry->BuildIndexRange(old_ack, chunk_ack, vec_base.get()); } finished_ack_.AddSegment(old_ack, chunk_ack); // }).detach(); diff --git a/internal/core/src/segcore/IndexingEntry.h b/internal/core/src/segcore/IndexingEntry.h index 955879c33e29d00a8d6617d6500b189b78df968e..a4cde732a55f4ab46c235abe5aa381f674d3bd27 100644 --- a/internal/core/src/segcore/IndexingEntry.h +++ b/internal/core/src/segcore/IndexingEntry.h @@ -108,7 +108,7 @@ class IndexingRecord { int offset = 0; for (auto& field : schema_) { if (field.get_data_type() != DataType::VECTOR_BINARY) { - entries_.try_emplace(offset, CreateIndex(field, chunk_size_)); + entries_.try_emplace(FieldOffset(offset), CreateIndex(field, chunk_size_)); } ++offset; } @@ -126,13 +126,13 @@ class IndexingRecord { } const IndexingEntry& - get_entry(int field_offset) const { + get_entry(FieldOffset field_offset) const { assert(entries_.count(field_offset)); return *entries_.at(field_offset); } const VecIndexingEntry& - get_vec_entry(int field_offset) const { + get_vec_entry(FieldOffset field_offset) const { auto& entry = get_entry(field_offset); auto ptr = dynamic_cast<const VecIndexingEntry*>(&entry); AssertInfo(ptr, "invalid indexing"); @@ -140,7 +140,7 @@ class IndexingRecord { } template <typename T> auto - get_scalar_entry(int field_offset) const -> const ScalarIndexingEntry<T>& { + get_scalar_entry(FieldOffset field_offset) const -> const ScalarIndexingEntry<T>& { auto& entry = get_entry(field_offset); auto ptr = dynamic_cast<const ScalarIndexingEntry<T>*>(&entry); AssertInfo(ptr, "invalid indexing"); @@ -160,7 +160,7 @@ class IndexingRecord { private: // field_offset => indexing - std::map<int, std::unique_ptr<IndexingEntry>> entries_; + std::map<FieldOffset, std::unique_ptr<IndexingEntry>> entries_; }; } // namespace milvus::segcore diff --git a/internal/core/src/segcore/InsertRecord.cpp b/internal/core/src/segcore/InsertRecord.cpp index 775ad569502886102af1f9bc3b831f7fe74ccd90..66bd9f8995ce88ccb314d7bf2a6018e0265f22df 100644 --- a/internal/core/src/segcore/InsertRecord.cpp +++ b/internal/core/src/segcore/InsertRecord.cpp @@ -17,10 +17,10 @@ InsertRecord::InsertRecord(const Schema& schema, int64_t chunk_size) : uids_(1), for (auto& field : schema) { if (field.is_vector()) { if (field.get_data_type() == DataType::VECTOR_FLOAT) { - entity_vec_.emplace_back(std::make_shared<ConcurrentVector<FloatVector>>(field.get_dim(), chunk_size)); + this->insert_entity<FloatVector>(field.get_dim(), chunk_size); continue; } else if (field.get_data_type() == DataType::VECTOR_BINARY) { - entity_vec_.emplace_back(std::make_shared<ConcurrentVector<BinaryVector>>(field.get_dim(), chunk_size)); + this->insert_entity<BinaryVector>(field.get_dim(), chunk_size); continue; } else { PanicInfo("unsupported"); @@ -28,34 +28,34 @@ InsertRecord::InsertRecord(const Schema& schema, int64_t chunk_size) : uids_(1), } switch (field.get_data_type()) { case DataType::BOOL: { - entity_vec_.emplace_back(std::make_shared<ConcurrentVector<bool>>(chunk_size)); + this->insert_entity<bool>(chunk_size); break; } case DataType::INT8: { - entity_vec_.emplace_back(std::make_shared<ConcurrentVector<int8_t>>(chunk_size)); + this->insert_entity<int8_t>(chunk_size); break; } case DataType::INT16: { - entity_vec_.emplace_back(std::make_shared<ConcurrentVector<int16_t>>(chunk_size)); + this->insert_entity<int16_t>(chunk_size); break; } case DataType::INT32: { - entity_vec_.emplace_back(std::make_shared<ConcurrentVector<int32_t>>(chunk_size)); + this->insert_entity<int32_t>(chunk_size); break; } case DataType::INT64: { - entity_vec_.emplace_back(std::make_shared<ConcurrentVector<int64_t>>(chunk_size)); + this->insert_entity<int64_t>(chunk_size); break; } case DataType::FLOAT: { - entity_vec_.emplace_back(std::make_shared<ConcurrentVector<float>>(chunk_size)); + this->insert_entity<float>(chunk_size); break; } case DataType::DOUBLE: { - entity_vec_.emplace_back(std::make_shared<ConcurrentVector<double>>(chunk_size)); + this->insert_entity<double>(chunk_size); break; } default: { diff --git a/internal/core/src/segcore/InsertRecord.h b/internal/core/src/segcore/InsertRecord.h index 661f6310ef2ec9120eddf3d93b50284b0d9dcd1d..2efa958c75aa5610dc4c8072f6a4e7130f0d1d48 100644 --- a/internal/core/src/segcore/InsertRecord.h +++ b/internal/core/src/segcore/InsertRecord.h @@ -23,23 +23,48 @@ struct InsertRecord { AckResponder ack_responder_; ConcurrentVector<Timestamp> timestamps_; ConcurrentVector<idx_t> uids_; - std::vector<std::shared_ptr<VectorBase>> entity_vec_; explicit InsertRecord(const Schema& schema, int64_t chunk_size); + + auto + get_base_entity(FieldOffset field_offset) const { + auto ptr = entity_vec_[field_offset.get()]; + return ptr; + } + template <typename Type> auto - get_entity(int offset) const { - auto ptr = std::dynamic_pointer_cast<const ConcurrentVector<Type>>(entity_vec_[offset]); + get_entity(FieldOffset field_offset) const { + auto base_ptr = get_base_entity(field_offset); + auto ptr = std::dynamic_pointer_cast<const ConcurrentVector<Type>>(base_ptr); Assert(ptr); return ptr; } template <typename Type> auto - get_entity(int offset) { - auto ptr = std::dynamic_pointer_cast<ConcurrentVector<Type>>(entity_vec_[offset]); + get_entity(FieldOffset field_offset) { + auto base_ptr = get_base_entity(field_offset); + auto ptr = std::dynamic_pointer_cast<ConcurrentVector<Type>>(base_ptr); Assert(ptr); return ptr; } + + template <typename Type> + void + insert_entity(int64_t chunk_size) { + static_assert(std::is_fundamental_v<Type>); + entity_vec_.emplace_back(std::make_shared<ConcurrentVector<Type>>(chunk_size)); + } + + template <typename VectorType> + void + insert_entity(int64_t dim, int64_t chunk_size) { + static_assert(std::is_base_of_v<VectorTrait, VectorType>); + entity_vec_.emplace_back(std::make_shared<ConcurrentVector<VectorType>>(dim, chunk_size)); + } + + private: + std::vector<std::shared_ptr<VectorBase>> entity_vec_; }; } // namespace milvus::segcore diff --git a/internal/core/src/segcore/SealedIndexingRecord.h b/internal/core/src/segcore/SealedIndexingRecord.h index 7676887a289f93edef5c108805b6a3caed98f01a..fcc9b4c4d1380395102836598aedb1c39551c712 100644 --- a/internal/core/src/segcore/SealedIndexingRecord.h +++ b/internal/core/src/segcore/SealedIndexingRecord.h @@ -31,27 +31,27 @@ using SealedIndexingEntryPtr = std::unique_ptr<SealedIndexingEntry>; struct SealedIndexingRecord { void - add_entry(int64_t field_offset, SealedIndexingEntryPtr&& ptr) { + add_entry(FieldOffset field_offset, SealedIndexingEntryPtr&& ptr) { std::unique_lock lck(mutex_); entries_[field_offset] = std::move(ptr); } const SealedIndexingEntry* - get_entry(int64_t field_offset) const { + get_entry(FieldOffset field_offset) const { std::shared_lock lck(mutex_); AssertInfo(entries_.count(field_offset), "field_offset not found"); return entries_.at(field_offset).get(); } bool - test_readiness(int64_t field_offset) const { + is_ready(FieldOffset field_offset) const { std::shared_lock lck(mutex_); return entries_.count(field_offset); } private: // field_offset -> SealedIndexingEntry - std::map<int64_t, SealedIndexingEntryPtr> entries_; + std::map<FieldOffset, SealedIndexingEntryPtr> entries_; mutable std::shared_mutex mutex_; }; } // namespace milvus::segcore diff --git a/internal/core/src/segcore/SegmentBase.h b/internal/core/src/segcore/SegmentBase.h index a7d427d66517e3ea10451e1c70d5130b9acbb33e..8e2d130dd1d21f8d9ed3d5c132fab44d08c0ee42 100644 --- a/internal/core/src/segcore/SegmentBase.h +++ b/internal/core/src/segcore/SegmentBase.h @@ -12,7 +12,6 @@ #pragma once #include <vector> -#include "IndexMeta.h" #include "common/Types.h" #include "common/Schema.h" #include <memory> @@ -83,23 +82,6 @@ class SegmentBase { virtual Status LoadIndexing(const LoadIndexInfo& info) = 0; - // // to make all data inserted visible - // // maybe a no-op? - // virtual Status - // Flush(Timestamp timestamp) = 0; - - // watch changes - // NOTE: Segment will use this ptr as correct - - virtual Status - DropRawData(std::string_view field_name) = 0; - - virtual Status - LoadRawData(std::string_view field_name, const char* blob, int64_t blob_size) = 0; - - virtual Status - BuildIndex(IndexMetaPtr index_meta) = 0; - virtual int64_t GetMemoryUsageInBytes() = 0; diff --git a/internal/core/src/segcore/SegmentSmallIndex.cpp b/internal/core/src/segcore/SegmentSmallIndex.cpp index fa7b0a3164cf0f4f17a785f8d1d181c8be88af6f..2bfcbbd486238b4552a96d2f4a9051c24067349f 100644 --- a/internal/core/src/segcore/SegmentSmallIndex.cpp +++ b/internal/core/src/segcore/SegmentSmallIndex.cpp @@ -168,7 +168,8 @@ SegmentSmallIndex::Insert(int64_t reserved_begin, record_.timestamps_.set_data(reserved_begin, timestamps.data(), size); record_.uids_.set_data(reserved_begin, uids.data(), size); for (int fid = 0; fid < schema_->size(); ++fid) { - record_.entity_vec_[fid]->set_data_raw(reserved_begin, entities[fid].data(), size); + auto field_offset = FieldOffset(fid); + record_.get_base_entity(field_offset)->set_data_raw(reserved_begin, entities[fid].data(), size); } for (int i = 0; i < uids.size(); ++i) { @@ -226,103 +227,9 @@ SegmentSmallIndex::Close() { return Status::OK(); } -template <typename Type> -knowhere::IndexPtr -SegmentSmallIndex::BuildVecIndexImpl(const IndexMeta::Entry& entry) { - auto offset_opt = schema_->get_offset(entry.field_name); - Assert(offset_opt.has_value()); - auto offset = offset_opt.value(); - auto field = (*schema_)[offset]; - auto dim = field.get_dim(); - - auto indexing = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(entry.type, entry.mode); - auto& uids = record_.uids_; - auto entities = record_.get_entity<FloatVector>(offset); - - std::vector<knowhere::DatasetPtr> datasets; - for (int chunk_id = 0; chunk_id < uids.num_chunk(); ++chunk_id) { - auto entities_chunk = entities->get_chunk(chunk_id).data(); - int64_t count = chunk_id == uids.num_chunk() - 1 ? record_.reserved - chunk_id * chunk_size_ : chunk_size_; - datasets.push_back(knowhere::GenDataset(count, dim, entities_chunk)); - } - for (auto& ds : datasets) { - indexing->Train(ds, entry.config); - } - for (auto& ds : datasets) { - indexing->AddWithoutIds(ds, entry.config); - } - return indexing; -} - -Status -SegmentSmallIndex::BuildIndex(IndexMetaPtr remote_index_meta) { - if (remote_index_meta == nullptr) { - PanicInfo("deprecated"); - std::cout << "WARN: Null index ptr is detected, use default index" << std::endl; - - int dim = 0; - std::string index_field_name; - - for (auto& field : schema_->get_fields()) { - if (field.get_data_type() == DataType::VECTOR_FLOAT) { - dim = field.get_dim(); - index_field_name = field.get_name(); - } - } - - Assert(dim != 0); - Assert(!index_field_name.empty()); - - auto index_meta = std::make_shared<IndexMeta>(schema_); - // TODO: this is merge of query conf and insert conf - // TODO: should be split into multiple configs - auto conf = milvus::knowhere::Config{ - {milvus::knowhere::meta::DIM, dim}, {milvus::knowhere::IndexParams::nlist, 100}, - {milvus::knowhere::IndexParams::nprobe, 4}, {milvus::knowhere::IndexParams::m, 4}, - {milvus::knowhere::IndexParams::nbits, 8}, {milvus::knowhere::Metric::TYPE, milvus::knowhere::Metric::L2}, - {milvus::knowhere::meta::DEVICEID, 0}, - }; - index_meta->AddEntry("fakeindex", index_field_name, knowhere::IndexEnum::INDEX_FAISS_IVFPQ, - knowhere::IndexMode::MODE_CPU, conf); - remote_index_meta = index_meta; - } - if (record_.ack_responder_.GetAck() < 1024 * 4) { - return Status(SERVER_BUILD_INDEX_ERROR, "too few elements"); - } - PanicInfo("unimplemented"); -#if 0 - index_meta_ = remote_index_meta; - for (auto& [index_name, entry] : index_meta_->get_entries()) { - Assert(entry.index_name == index_name); - const auto& field = (*schema_)[entry.field_name]; - - if (field.is_vector()) { - Assert(field.get_data_type() == engine::DataType::VECTOR_FLOAT); - auto index_ptr = BuildVecIndexImpl<float>(entry); - indexings_[index_name] = index_ptr; - } else { - throw std::runtime_error("unimplemented"); - } - } - - index_ready_ = true; - return Status::OK(); -#endif -} - int64_t SegmentSmallIndex::GetMemoryUsageInBytes() { int64_t total_bytes = 0; -#if 0 - if (index_ready_) { - auto& index_entries = index_meta_->get_entries(); - for (auto [index_name, entry] : index_entries) { - Assert(schema_->operator[](entry.field_name).is_vector()); - auto vec_ptr = std::static_pointer_cast<knowhere::VecIndex>(indexings_[index_name]); - total_bytes += vec_ptr->IndexSize(); - } - } -#endif int64_t ins_n = upper_align(record_.reserved, chunk_size_); total_bytes += ins_n * (schema_->get_total_sizeof() + 16 + 1); int64_t del_n = upper_align(deleted_record_.reserved, chunk_size_); @@ -380,8 +287,7 @@ SegmentSmallIndex::FillTargetEntry(const query::Plan* plan, QueryResult& results Status SegmentSmallIndex::LoadIndexing(const LoadIndexInfo& info) { - auto field_offset_opt = schema_->get_offset(info.field_name); - AssertInfo(field_offset_opt.has_value(), "field name(" + info.field_name + ") not found"); + auto field_offset = schema_->get_offset(FieldName(info.field_name)); Assert(info.index_params.count("metric_type")); auto metric_type_str = info.index_params.at("metric_type"); @@ -390,7 +296,7 @@ SegmentSmallIndex::LoadIndexing(const LoadIndexInfo& info) { entry->metric_type_ = GetMetricType(metric_type_str); entry->indexing_ = info.index; - sealed_indexing_record_.add_entry(field_offset_opt.value(), std::move(entry)); + sealed_indexing_record_.add_entry(field_offset, std::move(entry)); return Status::OK(); } diff --git a/internal/core/src/segcore/SegmentSmallIndex.h b/internal/core/src/segcore/SegmentSmallIndex.h index 36553e076656522c27b7202f7d139fd8a6217df9..8d89954954b1f61f44e7bdaeb17cb7b2af7fbe2c 100644 --- a/internal/core/src/segcore/SegmentSmallIndex.h +++ b/internal/core/src/segcore/SegmentSmallIndex.h @@ -67,26 +67,6 @@ class SegmentSmallIndex : public SegmentBase { Status Close() override; - // using IndexType = knowhere::IndexType; - // using IndexMode = knowhere::IndexMode; - // using IndexConfig = knowhere::Config; - // BuildIndex With Paramaters, must with Frozen State - // NOTE: index_params contains several policies for several index - // TODO: currently, index has to be set at startup, and can't be modified - // AddIndex and DropIndex will be added later - Status - BuildIndex(IndexMetaPtr index_meta) override; - - Status - DropRawData(std::string_view field_name) override { - PanicInfo("unimplemented"); - } - - Status - LoadRawData(std::string_view field_name, const char* blob, int64_t blob_size) override { - PanicInfo("unimplemented"); - } - int64_t GetMemoryUsageInBytes() override; @@ -150,10 +130,6 @@ class SegmentSmallIndex : public SegmentBase { std::shared_ptr<DeletedRecord::TmpBitmap> get_deleted_bitmap(int64_t del_barrier, Timestamp query_timestamp, int64_t insert_barrier, bool force = false); - template <typename Type> - knowhere::IndexPtr - BuildVecIndexImpl(const IndexMeta::Entry& entry); - Status FillTargetEntry(const query::Plan* Plan, QueryResult& results) override; @@ -161,7 +137,6 @@ class SegmentSmallIndex : public SegmentBase { int64_t chunk_size_; SchemaPtr schema_; std::atomic<SegmentState> state_ = SegmentState::Open; - IndexMetaPtr index_meta_; InsertRecord record_; DeletedRecord deleted_record_; diff --git a/internal/core/src/segcore/IndexMeta.cpp b/internal/core/src/segcore/deprecated/IndexMeta.cpp similarity index 94% rename from internal/core/src/segcore/IndexMeta.cpp rename to internal/core/src/segcore/deprecated/IndexMeta.cpp index 3934de2af21ab26e7848b34c4812366072e6db1d..6b6aa7355820ab18a73d6d28146727e262bd01ab 100644 --- a/internal/core/src/segcore/IndexMeta.cpp +++ b/internal/core/src/segcore/deprecated/IndexMeta.cpp @@ -8,7 +8,7 @@ // Unless required by applicable law or agreed to in writing, software distributed under the License // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License - +#if 0 #include "IndexMeta.h" #include <mutex> #include <cassert> @@ -16,7 +16,8 @@ namespace milvus::segcore { Status IndexMeta::AddEntry( - const std::string& index_name, const std::string& field_name, IndexType type, IndexMode mode, IndexConfig config) { + const std::string& index_name, const std::string& field_name_, IndexType type, IndexMode mode, IndexConfig config) { + auto field_name = FieldName(field_name_); Entry entry{index_name, field_name, type, mode, std::move(config)}; VerifyEntry(entry); @@ -59,3 +60,5 @@ IndexMeta::VerifyEntry(const Entry& entry) { } } // namespace milvus::segcore + +#endif diff --git a/internal/core/src/segcore/IndexMeta.h b/internal/core/src/segcore/deprecated/IndexMeta.h similarity index 89% rename from internal/core/src/segcore/IndexMeta.h rename to internal/core/src/segcore/deprecated/IndexMeta.h index 83500dbe9dc6b6a2639903e0a20a0690ba65493e..07b9ffc1e39d5dc8770699138bafcccc3ff2a305 100644 --- a/internal/core/src/segcore/IndexMeta.h +++ b/internal/core/src/segcore/deprecated/IndexMeta.h @@ -15,6 +15,7 @@ // //#include "common/Schema.h" // #include "segcore/SegmentBase.h" +#if 0 #include "common/Schema.h" #include "knowhere/index/IndexType.h" #include "knowhere/common/Config.h" @@ -33,7 +34,7 @@ class IndexMeta { struct Entry { std::string index_name; - std::string field_name; + FieldName field_name; IndexType type; IndexMode mode; IndexConfig config; @@ -55,8 +56,8 @@ class IndexMeta { } const Entry& - lookup_by_field(const std::string& field_name) { - AssertInfo(lookups_.count(field_name), field_name); + lookup_by_field(const FieldName& field_name) { + AssertInfo(lookups_.count(field_name), field_name.get()); auto index_name = lookups_.at(field_name); AssertInfo(entries_.count(index_name), index_name); return entries_.at(index_name); @@ -69,8 +70,10 @@ class IndexMeta { private: SchemaPtr schema_; std::map<std::string, Entry> entries_; // index_name => Entry - std::map<std::string, std::string> lookups_; // field_name => index_name + std::map<FieldName, std::string> lookups_; // field_name => index_name }; using IndexMetaPtr = std::shared_ptr<IndexMeta>; } // namespace milvus::segcore + +#endif diff --git a/internal/core/src/segcore/SegmentNaive.cpp b/internal/core/src/segcore/deprecated/SegmentNaive.cpp similarity index 94% rename from internal/core/src/segcore/SegmentNaive.cpp rename to internal/core/src/segcore/deprecated/SegmentNaive.cpp index b513247c7bc4c01e9d2a00080668c0fc4caf7784..ae319e6b68a76b81b1529ba0c7f49724d990ca0b 100644 --- a/internal/core/src/segcore/SegmentNaive.cpp +++ b/internal/core/src/segcore/deprecated/SegmentNaive.cpp @@ -8,8 +8,9 @@ // Unless required by applicable law or agreed to in writing, software distributed under the License // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License +#if 0 -#include <segcore/SegmentNaive.h> +#include <segcore/deprecated/SegmentNaive.h> #include <random> #include <algorithm> #include <numeric> @@ -239,20 +240,17 @@ SegmentNaive::QueryImpl(query::QueryDeprecatedPtr query_info, Timestamp timestam Assert(bitmap_holder); Assert(bitmap_holder->bitmap_ptr->count() == ins_barrier); - auto field_offset = schema_->get_offset(query_info->field_name); - auto& field = schema_->operator[](query_info->field_name); + auto field_name = FieldName(query_info->field_name); + auto field_offset = schema_->get_offset(field_name); + auto& field = schema_->operator[](field_name); Assert(field.get_data_type() == DataType::VECTOR_FLOAT); auto dim = field.get_dim(); auto bitmap = bitmap_holder->bitmap_ptr; auto topK = query_info->topK; auto num_queries = query_info->num_queries; - auto the_offset_opt = schema_->get_offset(query_info->field_name); - Assert(the_offset_opt.has_value()); - Assert(the_offset_opt.value() < record_.entity_vec_.size()); - auto vec_ptr = - std::static_pointer_cast<ConcurrentVector<FloatVector>>(record_.entity_vec_.at(the_offset_opt.value())); - auto index_entry = index_meta_->lookup_by_field(query_info->field_name); + auto vec_ptr = std::static_pointer_cast<ConcurrentVector<FloatVector>>(record_.entity_vec_.at(field_offset)); + auto index_entry = index_meta_->lookup_by_field(field_name); auto conf = index_entry.config; conf[milvus::knowhere::meta::TOPK] = query_info->topK; @@ -296,19 +294,17 @@ SegmentNaive::QuerySlowImpl(query::QueryDeprecatedPtr query_info, Timestamp time auto del_barrier = get_barrier(deleted_record_, timestamp); auto bitmap_holder = get_deleted_bitmap(del_barrier, timestamp, ins_barrier); Assert(bitmap_holder); - - auto& field = schema_->operator[](query_info->field_name); + auto field_name = FieldName(query_info->field_name); + auto& field = schema_->operator[](field_name); Assert(field.get_data_type() == DataType::VECTOR_FLOAT); auto dim = field.get_dim(); auto bitmap = bitmap_holder->bitmap_ptr; auto topK = query_info->topK; auto num_queries = query_info->num_queries; // TODO: optimize - auto the_offset_opt = schema_->get_offset(query_info->field_name); - Assert(the_offset_opt.has_value()); - Assert(the_offset_opt.value() < record_.entity_vec_.size()); - auto vec_ptr = - std::static_pointer_cast<ConcurrentVector<FloatVector>>(record_.entity_vec_.at(the_offset_opt.value())); + auto field_offset = schema_->get_offset(field_name); + Assert(field_offset < record_.entity_vec_.size()); + auto vec_ptr = std::static_pointer_cast<ConcurrentVector<FloatVector>>(record_.entity_vec_.at(field_offset)); std::vector<std::priority_queue<std::pair<float, int>>> records(num_queries); auto get_L2_distance = [dim](const float* a, const float* b) { @@ -370,7 +366,7 @@ SegmentNaive::QueryDeprecated(query::QueryDeprecatedPtr query_info, Timestamp ti query_info->topK = 10; query_info->num_queries = 1; - auto dim = schema_->operator[]("fakevec").get_dim(); + auto dim = schema_->operator[](FieldName("fakevec")).get_dim(); std::default_random_engine e(42); std::uniform_real_distribution<> dis(0.0, 1.0); query_info->query_raw_data.resize(query_info->num_queries * dim); @@ -415,7 +411,7 @@ SegmentNaive::BuildIndex(IndexMetaPtr remote_index_meta) { for (auto& field : schema_->get_fields()) { if (field.get_data_type() == DataType::VECTOR_FLOAT) { dim = field.get_dim(); - index_field_name = field.get_name(); + index_field_name = field.get_name().get(); } } @@ -464,3 +460,5 @@ SegmentNaive::GetMemoryUsageInBytes() { } } // namespace milvus::segcore + +#endif diff --git a/internal/core/src/segcore/SegmentNaive.h b/internal/core/src/segcore/deprecated/SegmentNaive.h similarity index 99% rename from internal/core/src/segcore/SegmentNaive.h rename to internal/core/src/segcore/deprecated/SegmentNaive.h index 7a24c263f99327712a9c6b88ce934f848a17bc48..15fd586d015ee5bd1940181936bc970528cd942f 100644 --- a/internal/core/src/segcore/SegmentNaive.h +++ b/internal/core/src/segcore/deprecated/SegmentNaive.h @@ -10,6 +10,7 @@ // or implied. See the License for the specific language governing permissions and limitations under the License #pragma once +#if 0 #include <tbb/concurrent_priority_queue.h> #include <tbb/concurrent_unordered_map.h> @@ -162,3 +163,5 @@ class SegmentNaive : public SegmentBase { tbb::concurrent_unordered_multimap<idx_t, int64_t> uid2offset_; }; } // namespace milvus::segcore + +#endif diff --git a/internal/core/src/segcore/segment_c.cpp b/internal/core/src/segcore/segment_c.cpp index c38b3921be18f2e8240320389b43d31ec56ad75d..479de9decb43d8ef3a4a96f8de5e57c83f12ae65 100644 --- a/internal/core/src/segcore/segment_c.cpp +++ b/internal/core/src/segcore/segment_c.cpp @@ -206,11 +206,7 @@ Close(CSegmentBase c_segment) { int BuildIndex(CCollection c_collection, CSegmentBase c_segment) { - auto collection = (milvus::segcore::Collection*)c_collection; - auto segment = (milvus::segcore::SegmentBase*)c_segment; - - auto status = segment->BuildIndex(collection->get_index()); - return status.code(); + PanicInfo("unimplemented"); } bool diff --git a/internal/core/thirdparty/NamedType/.clang-format b/internal/core/thirdparty/NamedType/.clang-format new file mode 100644 index 0000000000000000000000000000000000000000..b1b314fbe17543fb05cddef8c982a428cff3a33e --- /dev/null +++ b/internal/core/thirdparty/NamedType/.clang-format @@ -0,0 +1,110 @@ +Language: Cpp +AccessModifierOffset: -4 +AlignAfterOpenBracket: AlwaysBreak +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Right +AlignOperands: false +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: Yes +BinPackArguments: false +BinPackParameters: false +BraceWrapping: + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: true + AfterStruct: true + AfterUnion: false + AfterExternBlock: true + BeforeCatch: true + BeforeElse: true + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Allman +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeComma +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 120 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + - Regex: '.*' + Priority: 1 +IncludeIsMainRegex: '(Test)?$' +IndentCaseLabels: true +IndentPPDirectives: AfterHash +IndentWidth: 4 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Left +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 4 +UseTab: Never diff --git a/internal/core/thirdparty/NamedType/.gitignore b/internal/core/thirdparty/NamedType/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/internal/core/thirdparty/NamedType/.travis.yml b/internal/core/thirdparty/NamedType/.travis.yml new file mode 100644 index 0000000000000000000000000000000000000000..63cbb1def070930095db391142131b6a6da92126 --- /dev/null +++ b/internal/core/thirdparty/NamedType/.travis.yml @@ -0,0 +1,92 @@ +language: generic + +dist: bionic + +matrix: + include: + - env: CXX=g++-10 CC=gcc-10 + addons: + apt: + packages: + - g++-10 + sources: + - sourceline: 'ppa:ubuntu-toolchain-r/test' + - env: CXX=g++-9 CC=gcc-9 + addons: + apt: + packages: + - g++-9 + sources: + - sourceline: 'ppa:ubuntu-toolchain-r/test' + - env: CXX=g++-8 CC=gcc-8 + addons: + apt: + packages: + - g++-8 + - env: CXX=g++-7 CC=gcc-7 + addons: + apt: + packages: + - g++-7 + - env: CXX=g++-6 CC=gcc-6 + addons: + apt: + packages: + - g++-6 + - env: CXX=g++-5 CC=gcc-5 + addons: + apt: + packages: + - g++-5 + - env: CXX=clang++-11 CC=clang-11 + addons: + apt: + packages: + - clang-11 + - libc++-11-dev + - libc++abi-11-dev + sources: + - sourceline: 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-11 main' + key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' + - env: CXX=clang++-10 CC=clang-10 + addons: + apt: + packages: + - clang-10 + - libc++-10-dev + - libc++abi-10-dev + sources: + - sourceline: 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-10 main' + key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' + - env: CXX=clang++-9 CC=clang-9 + addons: + apt: + packages: + - clang-9 + - libc++-9-dev + - libc++abi-9-dev + sources: + - sourceline: 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-9 main' + key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key' + - env: CXX=clang++-8 CC=clang-8 + addons: + apt: + packages: + - clang-8 + - libc++-8-dev + - libc++abi-8-dev + - env: CXX=clang++-7 CC=clang-7 + addons: + apt: + packages: + - clang-7 + - libc++-7-dev + - libc++abi-7-dev + + +script: + - if [[ "$CXX" == clang* ]]; then export CXXFLAGS="-stdlib=libc++"; fi + - mkdir build && cd build + - cmake -DENABLE_TEST=ON .. + - make + - test/NamedTypeTest diff --git a/internal/core/thirdparty/NamedType/LICENSE b/internal/core/thirdparty/NamedType/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..c021b4694dd6955f72ae0645d6da22ac6bd7418c --- /dev/null +++ b/internal/core/thirdparty/NamedType/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 Jonathan Boccara + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/internal/core/thirdparty/NamedType/README.md b/internal/core/thirdparty/NamedType/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a966992ca34ce09c85cf2a4ddd07e9e2906a0b64 --- /dev/null +++ b/internal/core/thirdparty/NamedType/README.md @@ -0,0 +1,108 @@ +[](https://travis-ci.org/joboccara/NamedType) + + +A **strong type** is a type used in place of another type to carry specific **meaning** through its **name**. + +This project experiments with strong types in C++. All components are in the namespace `fluent`. You can find a collection of blog posts explaining the rationale of the library and usages for strong types on [Fluent C++](https://www.fluentcpp.com/2016/12/08/strong-types-for-strong-interfaces/). + +<a href="https://www.patreon.com/join/fluentcpp?"><img alt="become a patron" src="https://c5.patreon.com/external/logo/become_a_patron_button.png" height="35px"></a> + +## Basic usage + +It central piece is the templated class NamedType, which can be used to declare a strong type with a typedef-like syntax: + +```cpp +using Width = NamedType<double, struct WidthTag>; +using Height = NamedType<double, struct HeightTag>; +``` + +which can be used to make interfaces more expressive and more robust. +Note how the below constructor shows in which order it expects its parameters: + +```cpp +class Rectangle +{ +public: + Rectangle(Width width, Height height) : width_(width.get()), height_(height.get()) {} + double getWidth() const { return width_; } + double getHeight() const { return height_; } + +private: + double width_; + double height_; +}; +``` + +**Strong types are about better expressing your intentions, both to the compiler and to other human developers.** + +## Strong typing over generic types + +This implementation of strong types can be used to add strong typing over generic or unknown types such as lambdas: + +```cpp +template<typename Function> +using Comparator = NamedType<Function, struct ComparatorTag>; + +template <typename Function> +void performAction(Comparator<Function> comp) +{ + comp.get()(); +} + +performAction(make_named<Comparator>([](){ std::cout << "compare\n"; })); +``` + +## Strong typing over references + +The NamedType class is designed so that the following usage: + +```cpp +using FamilyNameRef = NamedType<std:string&, struct FamilyNameRefTag>; +``` + +behaves like a reference on an std::string, strongly typed. + +## Inheriting the underlying type functionalities + +You can declare which functionalities should be inherited from the underlying type. So far, only basic operators are taken into account. +For instance, to inherit from operator+ and operator<<, you can declare the strong type: + +```cpp +using Meter = NamedType<double, MeterTag, Addable, Printable> +``` + +There is one special skill, `FunctionCallable`, that lets the strong type be converted in the underlying type. This has the effect of removing the need to call .get() to get the underlying value. And `MethodCallable` enables `operator->` on the strong type to invoke methods on the underlying type. + +The skill `Callable` is the union of `FunctionCallable` and `MethodCallable`. + +## Named arguments +By their nature strong types can play the role of named parameters: + +```cpp +using FirstName = NamedType<std::string, struct FirstNameTag>; +using LastName = NamedType<std::string, struct LastNameTag>; + +void displayName(FirstName const& theFirstName, LastName const& theLastName); + +// Call site +displayName(FirstName("John"), LastName("Doe")); +``` + +But the nested type `argument` allows to emulate a named argument syntax: + +```cpp +using FirstName = NamedType<std::string, struct FirstNameTag>; +using LastName = NamedType<std::string, struct LastNameTag>; + +static const FirstName::argument firstName; +static const LastName::argument lastName; + +void displayName(FirstName const& theFirstName, LastName const& theLastName); + +// Call site +displayName(firstName = "John", lastName = "Doe"); +``` + +You can have a look at tests.cpp for usage examples. + +<a href="https://www.patreon.com/join/fluentcpp?"><img alt="become a patron" src="https://c5.patreon.com/external/logo/become_a_patron_button.png" height="35px"></a> diff --git a/internal/core/thirdparty/NamedType/crtp.hpp b/internal/core/thirdparty/NamedType/crtp.hpp new file mode 100644 index 0000000000000000000000000000000000000000..913fbc672d66a282570189a4f3a928863d1ce0ac --- /dev/null +++ b/internal/core/thirdparty/NamedType/crtp.hpp @@ -0,0 +1,22 @@ +#ifndef CRTP_HPP +#define CRTP_HPP + +namespace fluent +{ + +template <typename T, template <typename> class crtpType> +struct crtp +{ + constexpr T& underlying() + { + return static_cast<T&>(*this); + } + constexpr T const& underlying() const + { + return static_cast<T const&>(*this); + } +}; + +} // namespace fluent + +#endif diff --git a/internal/core/thirdparty/NamedType/named_type.hpp b/internal/core/thirdparty/NamedType/named_type.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c7e6e1509fa2febbed307a833c4b551c4a532cd1 --- /dev/null +++ b/internal/core/thirdparty/NamedType/named_type.hpp @@ -0,0 +1,8 @@ +#ifndef NAMED_TYPE_HPP +#define NAMED_TYPE_HPP + +#include "named_type_impl.hpp" +#include "underlying_functionalities.hpp" +#include "version.hpp" + +#endif diff --git a/internal/core/thirdparty/NamedType/named_type_impl.hpp b/internal/core/thirdparty/NamedType/named_type_impl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..191e68169f00d380ff45d4e180b3bf7b8389da70 --- /dev/null +++ b/internal/core/thirdparty/NamedType/named_type_impl.hpp @@ -0,0 +1,146 @@ +#ifndef named_type_impl_h +#define named_type_impl_h + +#include <tuple> +#include <type_traits> +#include <utility> + +// C++17 detection +#if defined(_MSC_VER) && (defined(_HAS_CXX17) && _HAS_CXX17) +# define FLUENT_CPP17_PRESENT 1 +#elif __cplusplus >= 201703L +# define FLUENT_CPP17_PRESENT 1 +#else +# define FLUENT_CPP17_PRESENT 0 +#endif + +// Use [[nodiscard]] if available +#ifndef FLUENT_NODISCARD_PRESENT +# define FLUENT_NODISCARD_PRESENT FLUENT_CPP17_PRESENT +#endif + +#if FLUENT_NODISCARD_PRESENT +# define FLUENT_NODISCARD [[nodiscard]] +#else +# define FLUENT_NODISCARD +#endif + +// Enable empty base class optimization with multiple inheritance on Visual Studio. +#if defined(_MSC_VER) && _MSC_VER >= 1910 +# define FLUENT_EBCO __declspec(empty_bases) +#else +# define FLUENT_EBCO +#endif + +#if defined(__clang__) || defined(__GNUC__) +# define IGNORE_SHOULD_RETURN_REFERENCE_TO_THIS_BEGIN \ + _Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Weffc++\"") +# define IGNORE_SHOULD_RETURN_REFERENCE_TO_THIS_END _Pragma("GCC diagnostic pop") +#else +# define IGNORE_SHOULD_RETURN_REFERENCE_TO_THIS_BEGIN /* Nothing */ +# define IGNORE_SHOULD_RETURN_REFERENCE_TO_THIS_END /* Nothing */ +#endif + +namespace fluent +{ + +template <typename T> +using IsNotReference = typename std::enable_if<!std::is_reference<T>::value, void>::type; + +template <typename T, typename Parameter, template <typename> class... Skills> +class FLUENT_EBCO NamedType : public Skills<NamedType<T, Parameter, Skills...>>... +{ +public: + using UnderlyingType = T; + + // constructor + NamedType() = default; + + explicit constexpr NamedType(T const& value) noexcept(std::is_nothrow_copy_constructible<T>::value) : value_(value) + { + } + + template <typename T_ = T, typename = IsNotReference<T_>> + explicit constexpr NamedType(T&& value) noexcept(std::is_nothrow_move_constructible<T>::value) + : value_(std::move(value)) + { + } + + // get + FLUENT_NODISCARD constexpr T& get() noexcept + { + return value_; + } + + FLUENT_NODISCARD constexpr std::remove_reference_t<T> const& get() const noexcept + { + return value_; + } + + // conversions + using ref = NamedType<T&, Parameter, Skills...>; + operator ref() + { + return ref(value_); + } + + struct argument + { + NamedType operator=(T&& value) const + { + IGNORE_SHOULD_RETURN_REFERENCE_TO_THIS_BEGIN + + return NamedType(std::forward<T>(value)); + + IGNORE_SHOULD_RETURN_REFERENCE_TO_THIS_END + } + template <typename U> + NamedType operator=(U&& value) const + { + IGNORE_SHOULD_RETURN_REFERENCE_TO_THIS_BEGIN + + return NamedType(std::forward<U>(value)); + + IGNORE_SHOULD_RETURN_REFERENCE_TO_THIS_END + } + + argument() = default; + argument(argument const&) = delete; + argument(argument&&) = delete; + argument& operator=(argument const&) = delete; + argument& operator=(argument&&) = delete; + }; + +private: + T value_; +}; + +template <template <typename T> class StrongType, typename T> +constexpr StrongType<T> make_named(T const& value) +{ + return StrongType<T>(value); +} + +namespace details { +template <class F, class... Ts> +struct AnyOrderCallable{ + F f; + template <class... Us> + auto operator()(Us&&...args) const + { + static_assert(sizeof...(Ts) == sizeof...(Us), "Passing wrong number of arguments"); + auto x = std::make_tuple(std::forward<Us>(args)...); + return f(std::move(std::get<Ts>(x))...); + } +}; +} //namespace details + +// EXPERIMENTAL - CAN BE CHANGED IN THE FUTURE. FEEDBACK WELCOME FOR IMPROVEMENTS! +template <class... Args, class F> +auto make_named_arg_function(F&& f) +{ + return details::AnyOrderCallable<F, Args...>{std::forward<F>(f)}; +} +} // namespace fluent + +#endif /* named_type_impl_h */ diff --git a/internal/core/thirdparty/NamedType/underlying_functionalities.hpp b/internal/core/thirdparty/NamedType/underlying_functionalities.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3dcebb1d5751703a9ff4d8c4b2ddb186f7cd498e --- /dev/null +++ b/internal/core/thirdparty/NamedType/underlying_functionalities.hpp @@ -0,0 +1,430 @@ +#ifndef UNDERLYING_FUNCTIONALITIES_HPP +#define UNDERLYING_FUNCTIONALITIES_HPP + +#include "crtp.hpp" +#include "named_type_impl.hpp" + +#include <functional> +#include <iostream> +#include <memory> + +// C++17 constexpr additions +#if FLUENT_CPP17_PRESENT +# define FLUENT_CONSTEXPR17 constexpr +#else +# define FLUENT_CONSTEXPR17 +#endif + +namespace fluent +{ + +template <typename T> +struct PreIncrementable : crtp<T, PreIncrementable> +{ + IGNORE_SHOULD_RETURN_REFERENCE_TO_THIS_BEGIN + + FLUENT_CONSTEXPR17 T& operator++() + { + ++this->underlying().get(); + return this->underlying(); + } + + IGNORE_SHOULD_RETURN_REFERENCE_TO_THIS_END +}; + +template <typename T> +struct PostIncrementable : crtp<T, PostIncrementable> +{ + IGNORE_SHOULD_RETURN_REFERENCE_TO_THIS_BEGIN + + FLUENT_CONSTEXPR17 T operator++(int) + { + return T(this->underlying().get()++); + } + + IGNORE_SHOULD_RETURN_REFERENCE_TO_THIS_END +}; + +template <typename T> +struct PreDecrementable : crtp<T, PreDecrementable> +{ + IGNORE_SHOULD_RETURN_REFERENCE_TO_THIS_BEGIN + + FLUENT_CONSTEXPR17 T& operator--() + { + --this->underlying().get(); + return this->underlying(); + } + + IGNORE_SHOULD_RETURN_REFERENCE_TO_THIS_END +}; + +template <typename T> +struct PostDecrementable : crtp<T, PostDecrementable> +{ + IGNORE_SHOULD_RETURN_REFERENCE_TO_THIS_BEGIN + + FLUENT_CONSTEXPR17 T operator--(int) + { + return T( this->underlying().get()-- ); + } + + IGNORE_SHOULD_RETURN_REFERENCE_TO_THIS_END +}; + +template <typename T> +struct BinaryAddable : crtp<T, BinaryAddable> +{ + FLUENT_NODISCARD constexpr T operator+(T const& other) const + { + return T(this->underlying().get() + other.get()); + } + FLUENT_CONSTEXPR17 T& operator+=(T const& other) + { + this->underlying().get() += other.get(); + return this->underlying(); + } +}; + +template <typename T> +struct UnaryAddable : crtp<T, UnaryAddable> +{ + FLUENT_NODISCARD constexpr T operator+() const + { + return T(+this->underlying().get()); + } +}; + +template <typename T> +struct Addable : BinaryAddable<T>, UnaryAddable<T> +{ + using BinaryAddable<T>::operator+; + using UnaryAddable<T>::operator+; +}; + +template <typename T> +struct BinarySubtractable : crtp<T, BinarySubtractable> +{ + FLUENT_NODISCARD constexpr T operator-(T const& other) const + { + return T(this->underlying().get() - other.get()); + } + FLUENT_CONSTEXPR17 T& operator-=(T const& other) + { + this->underlying().get() -= other.get(); + return this->underlying(); + } +}; + +template <typename T> +struct UnarySubtractable : crtp<T, UnarySubtractable> +{ + FLUENT_NODISCARD constexpr T operator-() const + { + return T(-this->underlying().get()); + } +}; + +template <typename T> +struct Subtractable : BinarySubtractable<T>, UnarySubtractable<T> +{ + using UnarySubtractable<T>::operator-; + using BinarySubtractable<T>::operator-; +}; + +template <typename T> +struct Multiplicable : crtp<T, Multiplicable> +{ + FLUENT_NODISCARD constexpr T operator*(T const& other) const + { + return T(this->underlying().get() * other.get()); + } + FLUENT_CONSTEXPR17 T& operator*=(T const& other) + { + this->underlying().get() *= other.get(); + return this->underlying(); + } +}; + +template <typename T> +struct Divisible : crtp<T, Divisible> +{ + FLUENT_NODISCARD constexpr T operator/(T const& other) const + { + return T(this->underlying().get() / other.get()); + } + FLUENT_CONSTEXPR17 T& operator/=(T const& other) + { + this->underlying().get() /= other.get(); + return this->underlying(); + } +}; + +template <typename T> +struct Modulable : crtp<T, Modulable> +{ + FLUENT_NODISCARD constexpr T operator%(T const& other) const + { + return T(this->underlying().get() % other.get()); + } + FLUENT_CONSTEXPR17 T& operator%=(T const& other) + { + this->underlying().get() %= other.get(); + return this->underlying(); + } +}; + +template <typename T> +struct BitWiseInvertable : crtp<T, BitWiseInvertable> +{ + FLUENT_NODISCARD constexpr T operator~() const + { + return T(~this->underlying().get()); + } +}; + +template <typename T> +struct BitWiseAndable : crtp<T, BitWiseAndable> +{ + FLUENT_NODISCARD constexpr T operator&(T const& other) const + { + return T(this->underlying().get() & other.get()); + } + FLUENT_CONSTEXPR17 T& operator&=(T const& other) + { + this->underlying().get() &= other.get(); + return this->underlying(); + } +}; + +template <typename T> +struct BitWiseOrable : crtp<T, BitWiseOrable> +{ + FLUENT_NODISCARD constexpr T operator|(T const& other) const + { + return T(this->underlying().get() | other.get()); + } + FLUENT_CONSTEXPR17 T& operator|=(T const& other) + { + this->underlying().get() |= other.get(); + return this->underlying(); + } +}; + +template <typename T> +struct BitWiseXorable : crtp<T, BitWiseXorable> +{ + FLUENT_NODISCARD constexpr T operator^(T const& other) const + { + return T(this->underlying().get() ^ other.get()); + } + FLUENT_CONSTEXPR17 T& operator^=(T const& other) + { + this->underlying().get() ^= other.get(); + return this->underlying(); + } +}; + +template <typename T> +struct BitWiseLeftShiftable : crtp<T, BitWiseLeftShiftable> +{ + FLUENT_NODISCARD constexpr T operator<<(T const& other) const + { + return T(this->underlying().get() << other.get()); + } + FLUENT_CONSTEXPR17 T& operator<<=(T const& other) + { + this->underlying().get() <<= other.get(); + return this->underlying(); + } +}; + +template <typename T> +struct BitWiseRightShiftable : crtp<T, BitWiseRightShiftable> +{ + FLUENT_NODISCARD constexpr T operator>>(T const& other) const + { + return T(this->underlying().get() >> other.get()); + } + FLUENT_CONSTEXPR17 T& operator>>=(T const& other) + { + this->underlying().get() >>= other.get(); + return this->underlying(); + } +}; + +template <typename T> +struct Comparable : crtp<T, Comparable> +{ + FLUENT_NODISCARD constexpr bool operator<(T const& other) const + { + return this->underlying().get() < other.get(); + } + FLUENT_NODISCARD constexpr bool operator>(T const& other) const + { + return other.get() < this->underlying().get(); + } + FLUENT_NODISCARD constexpr bool operator<=(T const& other) const + { + return !(other.get() < this->underlying().get()); + } + FLUENT_NODISCARD constexpr bool operator>=(T const& other) const + { + return !(*this < other); + } +// On Visual Studio before 19.22, you cannot define constexpr with friend function +// See: https://stackoverflow.com/a/60400110 +#if defined(_MSC_VER) && _MSC_VER < 1922 + FLUENT_NODISCARD constexpr bool operator==(T const& other) const + { + return !(*this < other) && !(other.get() < this->underlying().get()); + } +#else + FLUENT_NODISCARD friend constexpr bool operator==(Comparable<T> const& self, T const& other) + { + return !(self < other) && !(other.get() < self.underlying().get()); + } +#endif + FLUENT_NODISCARD constexpr bool operator!=(T const& other) const + { + return !(*this == other); + } +}; + +template< typename T > +struct Dereferencable; + +template< typename T, typename Parameter, template< typename > class ... Skills > +struct Dereferencable<NamedType<T, Parameter, Skills...>> : crtp<NamedType<T, Parameter, Skills...>, Dereferencable> +{ + FLUENT_NODISCARD constexpr T& operator*() & + { + return this->underlying().get(); + } + FLUENT_NODISCARD constexpr std::remove_reference_t<T> const& operator*() const & + { + return this->underlying().get(); + } +}; + +template <typename Destination> +struct ImplicitlyConvertibleTo +{ + template <typename T> + struct templ : crtp<T, templ> + { + FLUENT_NODISCARD constexpr operator Destination() const + { + return this->underlying().get(); + } + }; +}; + +template <typename T> +struct Printable : crtp<T, Printable> +{ + static constexpr bool is_printable = true; + + void print(std::ostream& os) const + { + os << this->underlying().get(); + } +}; + +template <typename T, typename Parameter, template <typename> class... Skills> +typename std::enable_if<NamedType<T, Parameter, Skills...>::is_printable, std::ostream&>::type +operator<<(std::ostream& os, NamedType<T, Parameter, Skills...> const& object) +{ + object.print(os); + return os; +} + +template <typename T> +struct Hashable +{ + static constexpr bool is_hashable = true; +}; + +template <typename NamedType_> +struct FunctionCallable; + +template <typename T, typename Parameter, template <typename> class... Skills> +struct FunctionCallable<NamedType<T, Parameter, Skills...>> : crtp<NamedType<T, Parameter, Skills...>, FunctionCallable> +{ + FLUENT_NODISCARD constexpr operator T const&() const + { + return this->underlying().get(); + } + FLUENT_NODISCARD constexpr operator T&() + { + return this->underlying().get(); + } +}; + +template <typename NamedType_> +struct MethodCallable; + +template <typename T, typename Parameter, template <typename> class... Skills> +struct MethodCallable<NamedType<T, Parameter, Skills...>> : crtp<NamedType<T, Parameter, Skills...>, MethodCallable> +{ + FLUENT_NODISCARD FLUENT_CONSTEXPR17 std::remove_reference_t<T> const* operator->() const + { + return std::addressof(this->underlying().get()); + } + FLUENT_NODISCARD FLUENT_CONSTEXPR17 std::remove_reference_t<T>* operator->() + { + return std::addressof(this->underlying().get()); + } +}; + +template <typename NamedType_> +struct Callable + : FunctionCallable<NamedType_> + , MethodCallable<NamedType_> +{ +}; + +template <typename T> +struct Arithmetic + : PreIncrementable<T> + , PostIncrementable<T> + , PreDecrementable<T> + , PostDecrementable<T> + , Addable<T> + , Subtractable<T> + , Multiplicable<T> + , Divisible<T> + , Modulable<T> + , BitWiseInvertable<T> + , BitWiseAndable<T> + , BitWiseOrable<T> + , BitWiseXorable<T> + , BitWiseLeftShiftable<T> + , BitWiseRightShiftable<T> + , Comparable<T> + , Printable<T> + , Hashable<T> +{ +}; + +} // namespace fluent + +namespace std +{ +template <typename T, typename Parameter, template <typename> class... Skills> +struct hash<fluent::NamedType<T, Parameter, Skills...>> +{ + using NamedType = fluent::NamedType<T, Parameter, Skills...>; + using checkIfHashable = typename std::enable_if<NamedType::is_hashable, void>::type; + + size_t operator()(fluent::NamedType<T, Parameter, Skills...> const& x) const noexcept + { + static_assert(noexcept(std::hash<T>()(x.get())), "hash fuction should not throw"); + + return std::hash<T>()(x.get()); + } +}; + +} // namespace std + +#endif diff --git a/internal/core/thirdparty/NamedType/version.hpp b/internal/core/thirdparty/NamedType/version.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8acee43b2227824ba4f9bbbf1d09194faaef962e --- /dev/null +++ b/internal/core/thirdparty/NamedType/version.hpp @@ -0,0 +1,9 @@ +#pragma once + +namespace fluent +{ +#define NAMED_TYPE_VERSION_MAJOR 1 +#define NAMED_TYPE_VERSION_MINOR 1 +#define NAMED_TYPE_VERSION_PATCH 0 +#define NAMED_TYPE_VERSION "1.1.0" +} // namespace fluent diff --git a/internal/core/unittest/test_binary.cpp b/internal/core/unittest/test_binary.cpp index 13a5fb7217149f1d559ae88243473cf928c6a766..bf5bb25944c9d26f772b1d80d8034ca735c38b26 100644 --- a/internal/core/unittest/test_binary.cpp +++ b/internal/core/unittest/test_binary.cpp @@ -21,8 +21,8 @@ TEST(Binary, Insert) { int64_t num_queries = 10; int64_t topK = 5; auto schema = std::make_shared<Schema>(); - schema->AddField("vecbin", DataType::VECTOR_BINARY, 128, MetricType::METRIC_Jaccard); - schema->AddField("age", DataType::INT32); + schema->AddDebugField("vecbin", DataType::VECTOR_BINARY, 128, MetricType::METRIC_Jaccard); + schema->AddDebugField("age", DataType::INT32); auto dataset = DataGen(schema, N, 10); auto segment = CreateSegment(schema); segment->PreInsert(N); diff --git a/internal/core/unittest/test_bitmap.cpp b/internal/core/unittest/test_bitmap.cpp index 7ae87003557a2be1217f7ebe0aea82cce35adbe5..5b8a99c94f6324d846fa09f7c7baf3c26f814c7e 100644 --- a/internal/core/unittest/test_bitmap.cpp +++ b/internal/core/unittest/test_bitmap.cpp @@ -18,7 +18,7 @@ TEST(Bitmap, Naive) { using namespace milvus::segcore; using namespace milvus::query; auto schema = std::make_shared<Schema>(); - schema->AddField("height", DataType::FLOAT); + schema->AddDebugField("height", DataType::FLOAT); int N = 10000; auto raw_data = DataGen(schema, N); auto vec = raw_data.get_col<float>(0); diff --git a/internal/core/unittest/test_expr.cpp b/internal/core/unittest/test_expr.cpp index fe224b27e7ef6c549732fe01804902c8101eb2f6..621d6839e935f7f4148a549ad4c3dfd15db35984 100644 --- a/internal/core/unittest/test_expr.cpp +++ b/internal/core/unittest/test_expr.cpp @@ -98,11 +98,11 @@ TEST(Expr, Range) { } })"; auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); - schema->AddField("age", DataType::INT32); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); + schema->AddDebugField("age", DataType::INT32); auto plan = CreatePlan(*schema, dsl_string); ShowPlanNodeVisitor shower; - Assert(plan->tag2field_.at("$0") == "fakevec"); + Assert(plan->tag2field_.at("$0") == schema->get_offset(FieldName("fakevec"))); auto out = shower.call_child(*plan->plan_node_); std::cout << out.dump(4); } @@ -140,11 +140,11 @@ TEST(Expr, RangeBinary) { } })"; auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard); - schema->AddField("age", DataType::INT32); + schema->AddDebugField("fakevec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard); + schema->AddDebugField("age", DataType::INT32); auto plan = CreatePlan(*schema, dsl_string); ShowPlanNodeVisitor shower; - Assert(plan->tag2field_.at("$0") == "fakevec"); + Assert(plan->tag2field_.at("$0") == schema->get_offset(FieldName("fakevec"))); auto out = shower.call_child(*plan->plan_node_); std::cout << out.dump(4); } @@ -182,8 +182,8 @@ TEST(Expr, InvalidRange) { } })"; auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); - schema->AddField("age", DataType::INT32); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); + schema->AddDebugField("age", DataType::INT32); ASSERT_ANY_THROW(CreatePlan(*schema, dsl_string)); } @@ -221,8 +221,8 @@ TEST(Expr, InvalidDSL) { })"; auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); - schema->AddField("age", DataType::INT32); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); + schema->AddDebugField("age", DataType::INT32); ASSERT_ANY_THROW(CreatePlan(*schema, dsl_string)); } @@ -231,13 +231,13 @@ TEST(Expr, ShowExecutor) { using namespace milvus::segcore; auto node = std::make_unique<FloatVectorANNS>(); auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); int64_t num_queries = 100L; auto raw_data = DataGen(schema, num_queries); auto& info = node->query_info_; info.metric_type_ = "L2"; info.topK_ = 20; - info.field_id_ = "fakevec"; + info.field_offset_ = FieldOffset(0); node->predicate_ = std::nullopt; ShowPlanNodeVisitor show_visitor; PlanNodePtr base(node.release()); @@ -290,8 +290,8 @@ TEST(Expr, TestRange) { } })"; auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); - schema->AddField("age", DataType::INT32); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); + schema->AddDebugField("age", DataType::INT32); auto seg = CreateSegment(schema); int N = 10000; @@ -374,8 +374,8 @@ TEST(Expr, TestTerm) { } })"; auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); - schema->AddField("age", DataType::INT32); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); + schema->AddDebugField("age", DataType::INT32); auto seg = CreateSegment(schema); int N = 10000; @@ -477,8 +477,8 @@ TEST(Expr, TestSimpleDsl) { } auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); - schema->AddField("age", DataType::INT32); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); + schema->AddDebugField("age", DataType::INT32); auto seg = CreateSegment(schema); std::vector<int> age_col; diff --git a/internal/core/unittest/test_index_wrapper.cpp b/internal/core/unittest/test_index_wrapper.cpp index d50f11d18a78fc2540cb15fcdbe1322b4c9238ef..fad216093fb64c931d71c12a9970c92407bb2b66 100644 --- a/internal/core/unittest/test_index_wrapper.cpp +++ b/internal/core/unittest/test_index_wrapper.cpp @@ -227,10 +227,10 @@ GenDataset(int64_t N, const milvus::knowhere::MetricType& metric_type, bool is_b auto schema = std::make_shared<milvus::Schema>(); auto faiss_metric_type = milvus::knowhere::GetMetricType(metric_type); if (!is_binary) { - schema->AddField("fakevec", milvus::engine::DataType::VECTOR_FLOAT, dim, faiss_metric_type); + schema->AddDebugField("fakevec", milvus::engine::DataType::VECTOR_FLOAT, dim, faiss_metric_type); return milvus::segcore::DataGen(schema, N); } else { - schema->AddField("fakebinvec", milvus::engine::DataType::VECTOR_BINARY, dim, faiss_metric_type); + schema->AddDebugField("fakebinvec", milvus::engine::DataType::VECTOR_BINARY, dim, faiss_metric_type); return milvus::segcore::DataGen(schema, N); } } diff --git a/internal/core/unittest/test_indexing.cpp b/internal/core/unittest/test_indexing.cpp index f937e5012dda9a855dcb7b5d89e9c8865ab9587a..4ff065e871803c11c195b82a75f3636590d50e64 100644 --- a/internal/core/unittest/test_indexing.cpp +++ b/internal/core/unittest/test_indexing.cpp @@ -242,8 +242,8 @@ TEST(Indexing, BinaryBruteForce) { int64_t dim = 512; auto result_count = topk * num_queries; auto schema = std::make_shared<Schema>(); - schema->AddField("vecbin", DataType::VECTOR_BINARY, dim, MetricType::METRIC_Jaccard); - schema->AddField("age", DataType::INT64); + schema->AddDebugField("vecbin", DataType::VECTOR_BINARY, dim, MetricType::METRIC_Jaccard); + schema->AddDebugField("age", DataType::INT64); auto dataset = DataGen(schema, N, 10); auto bin_vec = dataset.get_col<uint8_t>(0); auto query_data = 1024 * dim / 8 + bin_vec.data(); diff --git a/internal/core/unittest/test_query.cpp b/internal/core/unittest/test_query.cpp index 8a35806669fd477fd811519e287bf4f1bc4518c3..db6f37b2b640a3cbb016df08cc0b52f2a11355cf 100644 --- a/internal/core/unittest/test_query.cpp +++ b/internal/core/unittest/test_query.cpp @@ -72,13 +72,13 @@ TEST(Query, ShowExecutor) { using namespace milvus; auto node = std::make_unique<FloatVectorANNS>(); auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); int64_t num_queries = 100L; auto raw_data = DataGen(schema, num_queries); auto& info = node->query_info_; info.metric_type_ = "L2"; info.topK_ = 20; - info.field_id_ = "fakevec"; + info.field_offset_ = FieldOffset(1000); node->predicate_ = std::nullopt; ShowPlanNodeVisitor show_visitor; PlanNodePtr base(node.release()); @@ -113,7 +113,7 @@ TEST(Query, DSL) { })"; auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); auto plan = CreatePlan(*schema, dsl_string); auto res = shower.call_child(*plan->plan_node_); @@ -159,7 +159,7 @@ TEST(Query, ParsePlaceholderGroup) { })"; auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); auto plan = CreatePlan(*schema, dsl_string); int64_t num_queries = 100000; int dim = 16; @@ -172,8 +172,8 @@ TEST(Query, ExecWithPredicate) { using namespace milvus::query; using namespace milvus::segcore; auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); - schema->AddField("age", DataType::FLOAT); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); + schema->AddDebugField("age", DataType::FLOAT); std::string dsl = R"({ "bool": { "must": [ @@ -263,8 +263,8 @@ TEST(Query, ExecTerm) { using namespace milvus::query; using namespace milvus::segcore; auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); - schema->AddField("age", DataType::FLOAT); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); + schema->AddDebugField("age", DataType::FLOAT); std::string dsl = R"({ "bool": { "must": [ @@ -316,8 +316,8 @@ TEST(Query, ExecEmpty) { using namespace milvus::query; using namespace milvus::segcore; auto schema = std::make_shared<Schema>(); - schema->AddField("age", DataType::FLOAT); - schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); + schema->AddDebugField("age", DataType::FLOAT); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); std::string dsl = R"({ "bool": { "must": [ @@ -361,8 +361,8 @@ TEST(Query, ExecWithoutPredicate) { using namespace milvus::query; using namespace milvus::segcore; auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); - schema->AddField("age", DataType::FLOAT); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); + schema->AddDebugField("age", DataType::FLOAT); std::string dsl = R"({ "bool": { "must": [ @@ -466,7 +466,7 @@ TEST(Indexing, InnerProduct) { ] } })"; - schema->AddField("normalized", DataType::VECTOR_FLOAT, dim, MetricType::METRIC_INNER_PRODUCT); + schema->AddDebugField("normalized", DataType::VECTOR_FLOAT, dim, MetricType::METRIC_INNER_PRODUCT); auto dataset = DataGen(schema, N); auto segment = CreateSegment(schema); auto plan = CreatePlan(*schema, dsl); @@ -571,8 +571,8 @@ TEST(Query, ExecWithPredicateBinary) { using namespace milvus::query; using namespace milvus::segcore; auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard); - schema->AddField("age", DataType::FLOAT); + schema->AddDebugField("fakevec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard); + schema->AddDebugField("age", DataType::FLOAT); std::string dsl = R"({ "bool": { "must": [ diff --git a/internal/core/unittest/test_sealed.cpp b/internal/core/unittest/test_sealed.cpp index 1701c0a5a1ca12bc4274f04379ac543db5bdaf0c..38305ddaed62f4c0203493925124ab11f859d476 100644 --- a/internal/core/unittest/test_sealed.cpp +++ b/internal/core/unittest/test_sealed.cpp @@ -30,8 +30,8 @@ TEST(Sealed, without_predicate) { auto dim = 16; auto topK = 5; auto metric_type = MetricType::METRIC_L2; - schema->AddField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type); - schema->AddField("age", DataType::FLOAT); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type); + schema->AddDebugField("age", DataType::FLOAT); std::string dsl = R"({ "bool": { "must": [ @@ -127,8 +127,8 @@ TEST(Sealed, with_predicate) { auto dim = 16; auto topK = 5; auto metric_type = MetricType::METRIC_L2; - schema->AddField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type); - schema->AddField("counter", DataType::INT64); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, metric_type); + schema->AddDebugField("counter", DataType::INT64); std::string dsl = R"({ "bool": { "must": [ diff --git a/internal/core/unittest/test_segcore.cpp b/internal/core/unittest/test_segcore.cpp index 1e68c43c1a8a9c1546b5d86b1eb62ea9d6585f82..df4aec443b19b39e1d5b94d2580e8b6f9431f582 100644 --- a/internal/core/unittest/test_segcore.cpp +++ b/internal/core/unittest/test_segcore.cpp @@ -63,8 +63,8 @@ TEST(SegmentCoreTest, NormalDistributionTest) { using namespace milvus::segcore; using namespace milvus::engine; auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); - schema->AddField("age", DataType::INT32); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); + schema->AddDebugField("age", DataType::INT32); int N = 1000 * 1000; auto [raw_data, timestamps, uids] = generate_data(N); auto segment = CreateSegment(schema); @@ -76,8 +76,8 @@ TEST(SegmentCoreTest, MockTest) { using namespace milvus::segcore; using namespace milvus::engine; auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); - schema->AddField("age", DataType::INT32); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); + schema->AddDebugField("age", DataType::INT32); std::vector<char> raw_data; std::vector<Timestamp> timestamps; std::vector<int64_t> uids; @@ -116,8 +116,8 @@ TEST(SegmentCoreTest, SmallIndex) { using namespace milvus::segcore; using namespace milvus::engine; auto schema = std::make_shared<Schema>(); - schema->AddField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); - schema->AddField("age", DataType::INT32); + schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2); + schema->AddDebugField("age", DataType::INT32); int N = 1024 * 1024; auto data = DataGen(schema, N); } diff --git a/internal/core/unittest/test_utils/DataGen.h b/internal/core/unittest/test_utils/DataGen.h index e26374ab5eefbc5018e3c84682aa3e022075ea90..8eb7adcdec979ba1dc5063cad6c4b024f8614189 100644 --- a/internal/core/unittest/test_utils/DataGen.h +++ b/internal/core/unittest/test_utils/DataGen.h @@ -96,7 +96,7 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42) { case engine::DataType::VECTOR_FLOAT: { auto dim = field.get_dim(); vector<float> final; - bool is_ip = starts_with(field.get_name(), "normalized"); + bool is_ip = starts_with(field.get_name().get(), "normalized"); for (int n = 0; n < N; ++n) { vector<float> data(dim); float sum = 0; @@ -129,7 +129,7 @@ DataGen(SchemaPtr schema, int64_t N, uint64_t seed = 42) { case engine::DataType::INT64: { vector<int64_t> data(N); // begin with counter - if (starts_with(field.get_name(), "counter")) { + if (starts_with(field.get_name().get(), "counter")) { int64_t index = 0; for (auto& x : data) { x = index++; diff --git a/internal/querynode/collection_replica.go b/internal/querynode/collection_replica.go index c035069146627edf0c4af0282eb5e6d8b68ad10d..d36c55aaf97b5db26019c8ac9187d6c8350c5aba 100644 --- a/internal/querynode/collection_replica.go +++ b/internal/querynode/collection_replica.go @@ -64,11 +64,11 @@ type collectionReplica interface { } type collectionReplicaImpl struct { - tSafe tSafe - - mu sync.RWMutex // guards collections and segments + mu sync.RWMutex collections []*Collection segments map[UniqueID]*Segment + + tSafe tSafe } //----------------------------------------------------------------------------------------------------- tSafe @@ -95,10 +95,11 @@ func (colReplica *collectionReplicaImpl) addCollection(collectionID UniqueID, sc } func (colReplica *collectionReplicaImpl) removeCollection(collectionID UniqueID) error { + collection, err := colReplica.getCollectionByID(collectionID) + colReplica.mu.Lock() defer colReplica.mu.Unlock() - collection, err := colReplica.getCollectionByIDPrivate(collectionID) if err != nil { return err } @@ -127,10 +128,6 @@ func (colReplica *collectionReplicaImpl) getCollectionByID(collectionID UniqueID colReplica.mu.RLock() defer colReplica.mu.RUnlock() - return colReplica.getCollectionByIDPrivate(collectionID) -} - -func (colReplica *collectionReplicaImpl) getCollectionByIDPrivate(collectionID UniqueID) (*Collection, error) { for _, collection := range colReplica.collections { if collection.ID() == collectionID { return collection, nil @@ -167,26 +164,26 @@ func (colReplica *collectionReplicaImpl) hasCollection(collectionID UniqueID) bo //----------------------------------------------------------------------------------------------------- partition func (colReplica *collectionReplicaImpl) getPartitionNum(collectionID UniqueID) (int, error) { - colReplica.mu.RLock() - defer colReplica.mu.RUnlock() - - collection, err := colReplica.getCollectionByIDPrivate(collectionID) + collection, err := colReplica.getCollectionByID(collectionID) if err != nil { return -1, err } + colReplica.mu.RLock() + defer colReplica.mu.RUnlock() + return len(collection.partitions), nil } func (colReplica *collectionReplicaImpl) addPartition(collectionID UniqueID, partitionTag string) error { - colReplica.mu.Lock() - defer colReplica.mu.Unlock() - - collection, err := colReplica.getCollectionByIDPrivate(collectionID) + collection, err := colReplica.getCollectionByID(collectionID) if err != nil { return err } + colReplica.mu.Lock() + defer colReplica.mu.Unlock() + var newPartition = newPartition(partitionTag) *collection.Partitions() = append(*collection.Partitions(), newPartition) @@ -194,18 +191,14 @@ func (colReplica *collectionReplicaImpl) addPartition(collectionID UniqueID, par } func (colReplica *collectionReplicaImpl) removePartition(collectionID UniqueID, partitionTag string) error { - colReplica.mu.Lock() - defer colReplica.mu.Unlock() - - return colReplica.removePartitionPrivate(collectionID, partitionTag) -} - -func (colReplica *collectionReplicaImpl) removePartitionPrivate(collectionID UniqueID, partitionTag string) error { - collection, err := colReplica.getCollectionByIDPrivate(collectionID) + collection, err := colReplica.getCollectionByID(collectionID) if err != nil { return err } + colReplica.mu.Lock() + defer colReplica.mu.Unlock() + var tmpPartitions = make([]*Partition, 0) for _, p := range *collection.Partitions() { if p.Tag() == partitionTag { @@ -222,7 +215,6 @@ func (colReplica *collectionReplicaImpl) removePartitionPrivate(collectionID Uni return nil } -// deprecated func (colReplica *collectionReplicaImpl) addPartitionsByCollectionMeta(colMeta *etcdpb.CollectionMeta) error { if !colReplica.hasCollection(colMeta.ID) { err := errors.New("Cannot find collection, id = " + strconv.FormatInt(colMeta.ID, 10)) @@ -247,14 +239,13 @@ func (colReplica *collectionReplicaImpl) addPartitionsByCollectionMeta(colMeta * } func (colReplica *collectionReplicaImpl) removePartitionsByCollectionMeta(colMeta *etcdpb.CollectionMeta) error { - colReplica.mu.Lock() - defer colReplica.mu.Unlock() - - col, err := colReplica.getCollectionByIDPrivate(colMeta.ID) + col, err := colReplica.getCollectionByID(colMeta.ID) if err != nil { return err } + colReplica.mu.Lock() + pToDel := make([]string, 0) for _, partition := range col.partitions { hasPartition := false @@ -268,8 +259,10 @@ func (colReplica *collectionReplicaImpl) removePartitionsByCollectionMeta(colMet } } + colReplica.mu.Unlock() + for _, tag := range pToDel { - err := colReplica.removePartitionPrivate(col.ID(), tag) + err := colReplica.removePartition(col.ID(), tag) if err != nil { log.Println(err) } @@ -280,18 +273,14 @@ func (colReplica *collectionReplicaImpl) removePartitionsByCollectionMeta(colMet } func (colReplica *collectionReplicaImpl) getPartitionByTag(collectionID UniqueID, partitionTag string) (*Partition, error) { - colReplica.mu.RLock() - defer colReplica.mu.RUnlock() - - return colReplica.getPartitionByTagPrivate(collectionID, partitionTag) -} - -func (colReplica *collectionReplicaImpl) getPartitionByTagPrivate(collectionID UniqueID, partitionTag string) (*Partition, error) { - collection, err := colReplica.getCollectionByIDPrivate(collectionID) + collection, err := colReplica.getCollectionByID(collectionID) if err != nil { return nil, err } + colReplica.mu.RLock() + defer colReplica.mu.RUnlock() + for _, p := range *collection.Partitions() { if p.Tag() == partitionTag { return p, nil @@ -302,15 +291,15 @@ func (colReplica *collectionReplicaImpl) getPartitionByTagPrivate(collectionID U } func (colReplica *collectionReplicaImpl) hasPartition(collectionID UniqueID, partitionTag string) bool { - colReplica.mu.RLock() - defer colReplica.mu.RUnlock() - - collection, err := colReplica.getCollectionByIDPrivate(collectionID) + collection, err := colReplica.getCollectionByID(collectionID) if err != nil { log.Println(err) return false } + colReplica.mu.RLock() + defer colReplica.mu.RUnlock() + for _, p := range *collection.Partitions() { if p.Tag() == partitionTag { return true @@ -354,19 +343,19 @@ func (colReplica *collectionReplicaImpl) getSegmentStatistics() []*internalpb.Se } func (colReplica *collectionReplicaImpl) addSegment(segmentID UniqueID, partitionTag string, collectionID UniqueID) error { - colReplica.mu.Lock() - defer colReplica.mu.Unlock() - - collection, err := colReplica.getCollectionByIDPrivate(collectionID) + collection, err := colReplica.getCollectionByID(collectionID) if err != nil { return err } - partition, err2 := colReplica.getPartitionByTagPrivate(collectionID, partitionTag) + partition, err2 := colReplica.getPartitionByTag(collectionID, partitionTag) if err2 != nil { return err2 } + colReplica.mu.Lock() + defer colReplica.mu.Unlock() + var newSegment = newSegment(collection, segmentID, partitionTag, collectionID) colReplica.segments[segmentID] = newSegment diff --git a/internal/querynode/search_service.go b/internal/querynode/search_service.go index 5a6ce44a55ba9d171912a0d13cd929039e97be87..a009326540c9c346af3f443368c51d564cd6a259 100644 --- a/internal/querynode/search_service.go +++ b/internal/querynode/search_service.go @@ -27,8 +27,8 @@ type searchService struct { replica collectionReplica tSafeWatcher *tSafeWatcher - serviceableTimeMutex sync.Mutex // guards serviceableTime serviceableTime Timestamp + serviceableTimeMutex sync.Mutex msgBuffer chan msgstream.TsMsg unsolvedMsg []msgstream.TsMsg diff --git a/internal/querynode/segment.go b/internal/querynode/segment.go index 2e00200a2fb6f5af96e5886ffb80cd8b531dd9bc..9333f610d1bc59a80ac4bc8e808dfd4bd4c214a0 100644 --- a/internal/querynode/segment.go +++ b/internal/querynode/segment.go @@ -25,18 +25,16 @@ import ( type indexParam = map[string]string type Segment struct { - segmentPtr C.CSegmentBase - segmentID UniqueID - partitionTag string // TODO: use partitionID - collectionID UniqueID - lastMemSize int64 - lastRowCount int64 - - rmMutex sync.Mutex // guards recentlyModified + segmentPtr C.CSegmentBase + segmentID UniqueID + partitionTag string // TODO: use partitionID + collectionID UniqueID + lastMemSize int64 + lastRowCount int64 + mu sync.Mutex recentlyModified bool - - paramMutex sync.RWMutex // guards indexParam - indexParam map[int64]indexParam + indexParam map[int64]indexParam + paramMutex sync.RWMutex } func (s *Segment) ID() UniqueID { @@ -44,14 +42,14 @@ func (s *Segment) ID() UniqueID { } func (s *Segment) SetRecentlyModified(modify bool) { - s.rmMutex.Lock() - defer s.rmMutex.Unlock() + s.mu.Lock() + defer s.mu.Unlock() s.recentlyModified = modify } func (s *Segment) GetRecentlyModified() bool { - s.rmMutex.Lock() - defer s.rmMutex.Unlock() + s.mu.Lock() + defer s.mu.Unlock() return s.recentlyModified } diff --git a/internal/querynode/tsafe.go b/internal/querynode/tsafe.go index 60529a3c9868837c3e651dc057b8d407281edeb7..8673359d655c3bcaff38b4976af983eb3dc1568a 100644 --- a/internal/querynode/tsafe.go +++ b/internal/querynode/tsafe.go @@ -31,7 +31,7 @@ type tSafe interface { } type tSafeImpl struct { - tSafeMu sync.Mutex // guards all fields + tSafeMu sync.Mutex tSafe Timestamp watcherList []*tSafeWatcher } diff --git a/tests/python/test_index.py b/tests/python/test_index.py index 8162fa37fe82c30a55a1a423d89b45b1b128cc6f..44e176a95a06ec2b143ea0c95399aabe6453410f 100644 --- a/tests/python/test_index.py +++ b/tests/python/test_index.py @@ -286,6 +286,7 @@ class TestIndexBase: res = connect.search(collection, query) assert len(res) == nq + @pytest.mark.skip("test_create_index_multithread_ip") @pytest.mark.timeout(BUILD_TIMEOUT) @pytest.mark.level(2) def test_create_index_multithread_ip(self, connect, collection, args):