diff --git a/internal/core/src/indexbuilder/IndexWrapper.cpp b/internal/core/src/indexbuilder/IndexWrapper.cpp index 7938db060c1553697c5852dc3059e5cffea103d2..157f2e1674612aa5bfd99b9f2e1e872545581764 100644 --- a/internal/core/src/indexbuilder/IndexWrapper.cpp +++ b/internal/core/src/indexbuilder/IndexWrapper.cpp @@ -67,75 +67,65 @@ IndexWrapper::parse() { config_[key] = value; } - if (!config_.contains(milvus::knowhere::meta::DIM)) { - // should raise exception here? - PanicInfo("dim must be specific in type params or index params!"); - } else { - auto dim = config_[milvus::knowhere::meta::DIM].get<std::string>(); - config_[milvus::knowhere::meta::DIM] = std::stoi(dim); - } + auto stoi_closure = [](const std::string& s) -> int { return std::stoi(s); }; - if (!config_.contains(milvus::knowhere::meta::TOPK)) { - } else { - auto topk = config_[milvus::knowhere::meta::TOPK].get<std::string>(); - config_[milvus::knowhere::meta::TOPK] = std::stoi(topk); - } + /***************************** meta *******************************/ + check_parameter<int>(milvus::knowhere::meta::DIM, stoi_closure, std::nullopt); + check_parameter<int>(milvus::knowhere::meta::TOPK, stoi_closure, std::nullopt); - if (!config_.contains(milvus::knowhere::IndexParams::nlist)) { - } else { - auto nlist = config_[milvus::knowhere::IndexParams::nlist].get<std::string>(); - config_[milvus::knowhere::IndexParams::nlist] = std::stoi(nlist); - } + /***************************** IVF Params *******************************/ + check_parameter<int>(milvus::knowhere::IndexParams::nprobe, stoi_closure, std::nullopt); + check_parameter<int>(milvus::knowhere::IndexParams::nlist, stoi_closure, std::nullopt); + check_parameter<int>(milvus::knowhere::IndexParams::m, stoi_closure, std::nullopt); + check_parameter<int>(milvus::knowhere::IndexParams::nbits, stoi_closure, std::nullopt); - if (!config_.contains(milvus::knowhere::IndexParams::nprobe)) { - } else { - auto nprobe = config_[milvus::knowhere::IndexParams::nprobe].get<std::string>(); - config_[milvus::knowhere::IndexParams::nprobe] = std::stoi(nprobe); - } + /************************** NSG Parameter **************************/ + check_parameter<int>(milvus::knowhere::IndexParams::knng, stoi_closure, std::nullopt); + check_parameter<int>(milvus::knowhere::IndexParams::search_length, stoi_closure, std::nullopt); + check_parameter<int>(milvus::knowhere::IndexParams::out_degree, stoi_closure, std::nullopt); + check_parameter<int>(milvus::knowhere::IndexParams::candidate, stoi_closure, std::nullopt); - if (!config_.contains(milvus::knowhere::IndexParams::nbits)) { - } else { - auto nbits = config_[milvus::knowhere::IndexParams::nbits].get<std::string>(); - config_[milvus::knowhere::IndexParams::nbits] = std::stoi(nbits); - } + /************************** HNSW Params *****************************/ + check_parameter<int>(milvus::knowhere::IndexParams::efConstruction, stoi_closure, std::nullopt); + check_parameter<int>(milvus::knowhere::IndexParams::M, stoi_closure, std::nullopt); + check_parameter<int>(milvus::knowhere::IndexParams::ef, stoi_closure, std::nullopt); - if (!config_.contains(milvus::knowhere::IndexParams::m)) { - } else { - auto m = config_[milvus::knowhere::IndexParams::m].get<std::string>(); - config_[milvus::knowhere::IndexParams::m] = std::stoi(m); - } + /************************** Annoy Params *****************************/ + check_parameter<int>(milvus::knowhere::IndexParams::n_trees, stoi_closure, std::nullopt); + check_parameter<int>(milvus::knowhere::IndexParams::search_k, stoi_closure, std::nullopt); - /************************** NSG Parameter **************************/ - if (!config_.contains(milvus::knowhere::IndexParams::knng)) { - } else { - auto knng = config_[milvus::knowhere::IndexParams::knng].get<std::string>(); - config_[milvus::knowhere::IndexParams::knng] = std::stoi(knng); - } + /************************** PQ Params *****************************/ + check_parameter<int>(milvus::knowhere::IndexParams::PQM, stoi_closure, std::nullopt); - if (!config_.contains(milvus::knowhere::IndexParams::search_length)) { - } else { - auto search_length = config_[milvus::knowhere::IndexParams::search_length].get<std::string>(); - config_[milvus::knowhere::IndexParams::search_length] = std::stoi(search_length); - } + /************************** NGT Params *****************************/ + check_parameter<int>(milvus::knowhere::IndexParams::edge_size, stoi_closure, std::nullopt); - if (!config_.contains(milvus::knowhere::IndexParams::out_degree)) { - } else { - auto out_degree = config_[milvus::knowhere::IndexParams::out_degree].get<std::string>(); - config_[milvus::knowhere::IndexParams::out_degree] = std::stoi(out_degree); - } + /************************** NGT Search Params *****************************/ + check_parameter<int>(milvus::knowhere::IndexParams::epsilon, stoi_closure, std::nullopt); + check_parameter<int>(milvus::knowhere::IndexParams::max_search_edges, stoi_closure, std::nullopt); - if (!config_.contains(milvus::knowhere::IndexParams::candidate)) { - } else { - auto candidate = config_[milvus::knowhere::IndexParams::candidate].get<std::string>(); - config_[milvus::knowhere::IndexParams::candidate] = std::stoi(candidate); - } + /************************** NGT_PANNG Params *****************************/ + check_parameter<int>(milvus::knowhere::IndexParams::forcedly_pruned_edge_size, stoi_closure, std::nullopt); + check_parameter<int>(milvus::knowhere::IndexParams::selectively_pruned_edge_size, stoi_closure, std::nullopt); + + /************************** NGT_ONNG Params *****************************/ + check_parameter<int>(milvus::knowhere::IndexParams::outgoing_edge_size, stoi_closure, std::nullopt); + check_parameter<int>(milvus::knowhere::IndexParams::incoming_edge_size, stoi_closure, std::nullopt); + + /************************** Serialize Params *******************************/ + check_parameter<int>(milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, stoi_closure, std::optional{4}); +} - /************************** Serialize *******************************/ - if (!config_.contains(milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE)) { - config_[milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE] = 4; +template <typename T> +void +IndexWrapper::check_parameter(const std::string& key, std::function<T(std::string)> fn, std::optional<T> default_v) { + if (!config_.contains(key)) { + if (default_v.has_value()) { + config_[key] = default_v.value(); + } } else { - auto slice_size = config_[milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE].get<std::string>(); - config_[milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE] = std::stoi(slice_size); + auto value = config_[key]; + config_[key] = fn(value); } } diff --git a/internal/core/src/indexbuilder/IndexWrapper.h b/internal/core/src/indexbuilder/IndexWrapper.h index 562ef77f1d52f84ebb6686da2a63c4553c123f67..f0d75da54c3b51786e5b78b699dba2775a226a51 100644 --- a/internal/core/src/indexbuilder/IndexWrapper.h +++ b/internal/core/src/indexbuilder/IndexWrapper.h @@ -52,6 +52,12 @@ class IndexWrapper { void StoreRawData(const knowhere::DatasetPtr& dataset); + template <typename T> + void + check_parameter(const std::string& key, + std::function<T(std::string)> fn, + std::optional<T> default_v = std::nullopt); + public: void BuildWithIds(const knowhere::DatasetPtr& dataset); diff --git a/internal/core/src/indexbuilder/utils.h b/internal/core/src/indexbuilder/utils.h index 7e40e6283aa163dd67ca9edb18ae86a93d921b2d..e1ed0804965cd9698a0a42cb182931d6c2a94cc3 100644 --- a/internal/core/src/indexbuilder/utils.h +++ b/internal/core/src/indexbuilder/utils.h @@ -25,14 +25,17 @@ NM_List() { static std::vector<std::string> ret{ milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, milvus::knowhere::IndexEnum::INDEX_NSG, + milvus::knowhere::IndexEnum::INDEX_RHNSWFlat, }; return ret; } std::vector<std::string> BIN_List() { - static std::vector<std::string> ret{milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, - milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT}; + static std::vector<std::string> ret{ + milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, + milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, + }; return ret; } @@ -40,7 +43,7 @@ std::vector<std::string> Need_ID_List() { static std::vector<std::string> ret{ // milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, - // milvus::knowhere::IndexEnum::INDEX_NSG + // milvus::knowhere::IndexEnum::INDEX_NSG, }; return ret; @@ -48,7 +51,9 @@ Need_ID_List() { std::vector<std::string> Need_BuildAll_list() { - static std::vector<std::string> ret{milvus::knowhere::IndexEnum::INDEX_NSG}; + static std::vector<std::string> ret{ + milvus::knowhere::IndexEnum::INDEX_NSG, + }; return ret; } diff --git a/internal/core/src/segcore/segment_c.cpp b/internal/core/src/segcore/segment_c.cpp index 2bed416f9397cb3f8e86acb2af7f56d8f8c4fa1f..182301c7b7edb06cf4b48c6238d8f2fccc4a00e6 100644 --- a/internal/core/src/segcore/segment_c.cpp +++ b/internal/core/src/segcore/segment_c.cpp @@ -181,15 +181,15 @@ FillTargetEntry(CSegmentBase c_segment, CPlan c_plan, CQueryResult c_result) { CStatus UpdateSegmentIndex(CSegmentBase c_segment, CLoadIndexInfo c_load_index_info) { - auto status = CStatus(); try { auto segment = (milvus::segcore::SegmentBase*)c_segment; auto load_index_info = (LoadIndexInfo*)c_load_index_info; - auto res = segment->LoadIndexing(*load_index_info); + auto status = CStatus(); status.error_code = Success; status.error_msg = ""; return status; } catch (std::exception& e) { + auto status = CStatus(); status.error_code = UnexpectedException; status.error_msg = strdup(e.what()); return status; diff --git a/internal/core/unittest/test_c_api.cpp b/internal/core/unittest/test_c_api.cpp index a24770e03754498cfd8306147a9d70d6315e2747..dedfd71c2987734d35872728e265a116cd7c4c17 100644 --- a/internal/core/unittest/test_c_api.cpp +++ b/internal/core/unittest/test_c_api.cpp @@ -14,7 +14,6 @@ #include <random> #include <gtest/gtest.h> #include <chrono> -#include <google/protobuf/text_format.h> #include "pb/service_msg.pb.h" #include "segcore/reduce_c.h" @@ -24,17 +23,9 @@ #include <index/knowhere/knowhere/index/vector_index/VecIndexFactory.h> #include <index/knowhere/knowhere/index/vector_index/IndexIVFPQ.h> #include <common/LoadIndex.h> -#include <utils/Types.h> -#include <segcore/Collection.h> -#include "test_utils/DataGen.h" namespace chrono = std::chrono; -using namespace milvus; -using namespace milvus::segcore; -using namespace milvus::proto; -using namespace milvus::knowhere; - TEST(CApiTest, CollectionTest) { auto schema_tmp_conf = ""; auto collection = NewCollection(schema_tmp_conf); @@ -353,11 +344,11 @@ TEST(CApiTest, GetMemoryUsageInBytesTest) { namespace { auto generate_data(int N) { - std::vector<char> raw_data; + std::vector<float> raw_data; std::vector<uint64_t> timestamps; std::vector<int64_t> uids; std::default_random_engine er(42); - std::normal_distribution<> distribution(0.0, 1.0); + std::uniform_real_distribution<> distribution(0.0, 1.0); std::default_random_engine ei(42); for (int i = 0; i < N; ++i) { uids.push_back(10 * N + i); @@ -367,58 +358,12 @@ generate_data(int N) { for (auto& x : vec) { x = distribution(er); } - raw_data.insert(raw_data.end(), (const char*)std::begin(vec), (const char*)std::end(vec)); + raw_data.insert(raw_data.end(), std::begin(vec), std::end(vec)); int age = ei() % 100; raw_data.insert(raw_data.end(), (const char*)&age, ((const char*)&age) + sizeof(age)); } return std::make_tuple(raw_data, timestamps, uids); } - -std::string -generate_collection_shema(std::string metric_type, std::string dim) { - schema::CollectionSchema collection_schema; - collection_schema.set_name("collection_test"); - collection_schema.set_autoid(true); - - auto vec_field_schema = collection_schema.add_fields(); - vec_field_schema->set_name("fakevec"); - vec_field_schema->set_fieldid(0); - vec_field_schema->set_data_type(schema::DataType::VECTOR_FLOAT); - auto metric_type_param = vec_field_schema->add_index_params(); - metric_type_param->set_key("metric_type"); - metric_type_param->set_value(metric_type); - auto dim_param = vec_field_schema->add_type_params(); - dim_param->set_key("dim"); - dim_param->set_value(dim); - - auto other_field_schema = collection_schema.add_fields(); - ; - other_field_schema->set_name("counter"); - other_field_schema->set_fieldid(1); - other_field_schema->set_data_type(schema::DataType::INT64); - - std::string schema_string; - auto marshal = google::protobuf::TextFormat::PrintToString(collection_schema, &schema_string); - assert(marshal == true); - return schema_string; -} - -VecIndexPtr -generate_index( - void* raw_data, milvus::knowhere::Config conf, int64_t dim, int64_t topK, int64_t N, std::string index_type) { - auto indexing = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type, knowhere::IndexMode::MODE_CPU); - - auto database = milvus::knowhere::GenDataset(N, dim, raw_data); - indexing->Train(database, conf); - indexing->AddWithoutIds(database, conf); - EXPECT_EQ(indexing->Count(), N); - EXPECT_EQ(indexing->Dim(), dim); - - EXPECT_EQ(indexing->Count(), N); - EXPECT_EQ(indexing->Dim(), dim); - return indexing; -} - } // namespace // TEST(CApiTest, TestSearchPreference) { @@ -831,404 +776,4 @@ TEST(CApiTest, LoadIndex_Search) { for (int i = 0; i < std::min(num_query * K, 100); ++i) { std::cout << ids[i] << "->" << dis[i] << std::endl; } -} - -TEST(CApiTest, UpdateSegmentIndex_Without_Predicate) { - // insert data to segment - constexpr auto DIM = 16; - constexpr auto K = 5; - - std::string schema_string = generate_collection_shema("L2", "16"); - auto collection = NewCollection(schema_string.c_str()); - auto schema = ((segcore::Collection*)collection)->get_schema(); - auto segment = NewSegment(collection, 0); - - auto N = 1000 * 1000; - auto dataset = DataGen(schema, N); - auto vec_col = dataset.get_col<float>(0); - auto query_ptr = vec_col.data() + 420000 * DIM; - - PreInsert(segment, N); - auto ins_res = Insert(segment, 0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_.raw_data, - dataset.raw_.sizeof_per_row, dataset.raw_.count); - assert(ins_res.error_code == Success); - - const char* dsl_string = R"( - { - "bool": { - "vector": { - "fakevec": { - "metric_type": "L2", - "params": { - "nprobe": 10 - }, - "query": "$0", - "topk": 5 - } - } - } - })"; - - // create place_holder_group - int num_queries = 5; - auto raw_group = CreatePlaceholderGroupFromBlob(num_queries, 16, query_ptr); - auto blob = raw_group.SerializeAsString(); - - // search on segment's small index - void* plan = nullptr; - auto status = CreatePlan(collection, dsl_string, &plan); - assert(status.error_code == Success); - - void* placeholderGroup = nullptr; - status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); - assert(status.error_code == Success); - - std::vector<CPlaceholderGroup> placeholderGroups; - placeholderGroups.push_back(placeholderGroup); - Timestamp time = 10000000; - - CQueryResult c_search_result_on_smallIndex; - auto res_before_load_index = - Search(segment, plan, placeholderGroups.data(), &time, 1, &c_search_result_on_smallIndex); - assert(res_before_load_index.error_code == Success); - - // load index to segment - auto conf = milvus::knowhere::Config{{milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::meta::TOPK, K}, - {milvus::knowhere::IndexParams::nlist, 100}, - {milvus::knowhere::IndexParams::nprobe, 10}, - {milvus::knowhere::IndexParams::m, 4}, - {milvus::knowhere::IndexParams::nbits, 8}, - {milvus::knowhere::Metric::TYPE, milvus::knowhere::Metric::L2}, - {milvus::knowhere::meta::DEVICEID, 0}}; - auto indexing = generate_index(vec_col.data(), conf, DIM, K, N, IndexEnum::INDEX_FAISS_IVFPQ); - - // gen query dataset - auto query_dataset = milvus::knowhere::GenDataset(num_queries, DIM, query_ptr); - auto result_on_index = indexing->Query(query_dataset, conf, nullptr); - auto ids = result_on_index->Get<int64_t*>(milvus::knowhere::meta::IDS); - auto dis = result_on_index->Get<float*>(milvus::knowhere::meta::DISTANCE); - std::vector<int64_t> vec_ids(ids, ids + K * num_queries); - std::vector<float> vec_dis; - for (int j = 0; j < K * num_queries; ++j) { - vec_dis.push_back(dis[j] * -1); - } - - auto search_result_on_raw_index = (QueryResult*)c_search_result_on_smallIndex; - search_result_on_raw_index->internal_seg_offsets_ = vec_ids; - search_result_on_raw_index->result_distances_ = vec_dis; - - auto binary_set = indexing->Serialize(conf); - void* c_load_index_info = nullptr; - status = NewLoadIndexInfo(&c_load_index_info); - assert(status.error_code == Success); - std::string index_type_key = "index_type"; - std::string index_type_value = "IVF_PQ"; - std::string index_mode_key = "index_mode"; - std::string index_mode_value = "cpu"; - std::string metric_type_key = "metric_type"; - std::string metric_type_value = "L2"; - - AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str()); - AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str()); - AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str()); - AppendFieldInfo(c_load_index_info, "fakevec", 0); - AppendIndex(c_load_index_info, (CBinarySet)&binary_set); - - status = UpdateSegmentIndex(segment, c_load_index_info); - assert(status.error_code == Success); - - CQueryResult c_search_result_on_bigIndex; - auto res_after_load_index = Search(segment, plan, placeholderGroups.data(), &time, 1, &c_search_result_on_bigIndex); - assert(res_after_load_index.error_code == Success); - - auto search_result_on_raw_index_json = QueryResultToJson(*search_result_on_raw_index); - auto search_result_on_bigIndex_json = QueryResultToJson((*(QueryResult*)c_search_result_on_bigIndex)); - std::cout << search_result_on_raw_index_json.dump(1) << std::endl; - std::cout << search_result_on_bigIndex_json.dump(1) << std::endl; - - ASSERT_EQ(search_result_on_raw_index_json.dump(1), search_result_on_bigIndex_json.dump(1)); - - DeleteLoadIndexInfo(c_load_index_info); - DeletePlan(plan); - DeletePlaceholderGroup(placeholderGroup); - DeleteQueryResult(c_search_result_on_smallIndex); - DeleteQueryResult(c_search_result_on_bigIndex); - DeleteCollection(collection); - DeleteSegment(segment); -} - -TEST(CApiTest, UpdateSegmentIndex_With_Predicate_Range) { - // insert data to segment - constexpr auto DIM = 16; - constexpr auto K = 5; - - std::string schema_string = generate_collection_shema("L2", "16"); - auto collection = NewCollection(schema_string.c_str()); - auto schema = ((segcore::Collection*)collection)->get_schema(); - auto segment = NewSegment(collection, 0); - - auto N = 1000 * 1000; - auto dataset = DataGen(schema, N); - auto vec_col = dataset.get_col<float>(0); - auto query_ptr = vec_col.data() + 420000 * DIM; - - PreInsert(segment, N); - auto ins_res = Insert(segment, 0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_.raw_data, - dataset.raw_.sizeof_per_row, dataset.raw_.count); - assert(ins_res.error_code == Success); - - const char* dsl_string = R"({ - "bool": { - "must": [ - { - "range": { - "counter": { - "GE": 420000, - "LT": 420010 - } - } - }, - { - "vector": { - "fakevec": { - "metric_type": "L2", - "params": { - "nprobe": 10 - }, - "query": "$0", - "topk": 5 - } - } - } - ] - } - })"; - - // create place_holder_group - int num_queries = 10; - auto raw_group = CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr); - auto blob = raw_group.SerializeAsString(); - - // search on segment's small index - void* plan = nullptr; - auto status = CreatePlan(collection, dsl_string, &plan); - assert(status.error_code == Success); - - void* placeholderGroup = nullptr; - status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); - assert(status.error_code == Success); - - std::vector<CPlaceholderGroup> placeholderGroups; - placeholderGroups.push_back(placeholderGroup); - Timestamp time = 10000000; - - CQueryResult c_search_result_on_smallIndex; - auto res_before_load_index = - Search(segment, plan, placeholderGroups.data(), &time, 1, &c_search_result_on_smallIndex); - assert(res_before_load_index.error_code == Success); - - // load index to segment - auto conf = milvus::knowhere::Config{{milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::meta::TOPK, K}, - {milvus::knowhere::IndexParams::nlist, 100}, - {milvus::knowhere::IndexParams::nprobe, 10}, - {milvus::knowhere::IndexParams::m, 4}, - {milvus::knowhere::IndexParams::nbits, 8}, - {milvus::knowhere::Metric::TYPE, milvus::knowhere::Metric::L2}, - {milvus::knowhere::meta::DEVICEID, 0}}; - - auto indexing = generate_index(vec_col.data(), conf, DIM, K, N, IndexEnum::INDEX_FAISS_IVFPQ); - - // gen query dataset - auto query_dataset = milvus::knowhere::GenDataset(num_queries, DIM, query_ptr); - auto result_on_index = indexing->Query(query_dataset, conf, nullptr); - auto ids = result_on_index->Get<int64_t*>(milvus::knowhere::meta::IDS); - auto dis = result_on_index->Get<float*>(milvus::knowhere::meta::DISTANCE); - std::vector<int64_t> vec_ids(ids, ids + K * num_queries); - std::vector<float> vec_dis; - for (int j = 0; j < K * num_queries; ++j) { - vec_dis.push_back(dis[j] * -1); - } - - auto search_result_on_raw_index = (QueryResult*)c_search_result_on_smallIndex; - search_result_on_raw_index->internal_seg_offsets_ = vec_ids; - search_result_on_raw_index->result_distances_ = vec_dis; - - auto binary_set = indexing->Serialize(conf); - void* c_load_index_info = nullptr; - status = NewLoadIndexInfo(&c_load_index_info); - assert(status.error_code == Success); - std::string index_type_key = "index_type"; - std::string index_type_value = "IVF_PQ"; - std::string index_mode_key = "index_mode"; - std::string index_mode_value = "cpu"; - std::string metric_type_key = "metric_type"; - std::string metric_type_value = "L2"; - - AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str()); - AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str()); - AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str()); - AppendFieldInfo(c_load_index_info, "fakevec", 0); - AppendIndex(c_load_index_info, (CBinarySet)&binary_set); - - status = UpdateSegmentIndex(segment, c_load_index_info); - assert(status.error_code == Success); - - CQueryResult c_search_result_on_bigIndex; - auto res_after_load_index = Search(segment, plan, placeholderGroups.data(), &time, 1, &c_search_result_on_bigIndex); - assert(res_after_load_index.error_code == Success); - - auto search_result_on_bigIndex = (*(QueryResult*)c_search_result_on_bigIndex); - for (int i = 0; i < num_queries; ++i) { - auto offset = i * K; - ASSERT_EQ(search_result_on_bigIndex.internal_seg_offsets_[offset], 420000 + i); - ASSERT_EQ(search_result_on_bigIndex.result_distances_[offset], - search_result_on_raw_index->result_distances_[offset]); - } - - DeleteLoadIndexInfo(c_load_index_info); - DeletePlan(plan); - DeletePlaceholderGroup(placeholderGroup); - DeleteQueryResult(c_search_result_on_smallIndex); - DeleteQueryResult(c_search_result_on_bigIndex); - DeleteCollection(collection); - DeleteSegment(segment); -} - -TEST(CApiTest, UpdateSegmentIndex_With_Predicate_Term) { - // insert data to segment - constexpr auto DIM = 16; - constexpr auto K = 5; - - std::string schema_string = generate_collection_shema("L2", "16"); - auto collection = NewCollection(schema_string.c_str()); - auto schema = ((segcore::Collection*)collection)->get_schema(); - auto segment = NewSegment(collection, 0); - - auto N = 1000 * 1000; - auto dataset = DataGen(schema, N); - auto vec_col = dataset.get_col<float>(0); - auto query_ptr = vec_col.data() + 420000 * DIM; - - PreInsert(segment, N); - auto ins_res = Insert(segment, 0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_.raw_data, - dataset.raw_.sizeof_per_row, dataset.raw_.count); - assert(ins_res.error_code == Success); - - const char* dsl_string = R"({ - "bool": { - "must": [ - { - "term": { - "counter": { - "values": [420000, 420001, 420002, 420003, 420004] - } - } - }, - { - "vector": { - "fakevec": { - "metric_type": "L2", - "params": { - "nprobe": 10 - }, - "query": "$0", - "topk": 5 - } - } - } - ] - } - })"; - - // create place_holder_group - int num_queries = 5; - auto raw_group = CreatePlaceholderGroupFromBlob(num_queries, DIM, query_ptr); - auto blob = raw_group.SerializeAsString(); - - // search on segment's small index - void* plan = nullptr; - auto status = CreatePlan(collection, dsl_string, &plan); - assert(status.error_code == Success); - - void* placeholderGroup = nullptr; - status = ParsePlaceholderGroup(plan, blob.data(), blob.length(), &placeholderGroup); - assert(status.error_code == Success); - - std::vector<CPlaceholderGroup> placeholderGroups; - placeholderGroups.push_back(placeholderGroup); - Timestamp time = 10000000; - - CQueryResult c_search_result_on_smallIndex; - auto res_before_load_index = - Search(segment, plan, placeholderGroups.data(), &time, 1, &c_search_result_on_smallIndex); - assert(res_before_load_index.error_code == Success); - - // load index to segment - auto conf = milvus::knowhere::Config{{milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::meta::TOPK, K}, - {milvus::knowhere::IndexParams::nlist, 100}, - {milvus::knowhere::IndexParams::nprobe, 10}, - {milvus::knowhere::IndexParams::m, 4}, - {milvus::knowhere::IndexParams::nbits, 8}, - {milvus::knowhere::Metric::TYPE, milvus::knowhere::Metric::L2}, - {milvus::knowhere::meta::DEVICEID, 0}}; - - auto indexing = generate_index(vec_col.data(), conf, DIM, K, N, IndexEnum::INDEX_FAISS_IVFPQ); - - // gen query dataset - auto query_dataset = milvus::knowhere::GenDataset(num_queries, DIM, query_ptr); - auto result_on_index = indexing->Query(query_dataset, conf, nullptr); - auto ids = result_on_index->Get<int64_t*>(milvus::knowhere::meta::IDS); - auto dis = result_on_index->Get<float*>(milvus::knowhere::meta::DISTANCE); - std::vector<int64_t> vec_ids(ids, ids + K * num_queries); - std::vector<float> vec_dis; - for (int j = 0; j < K * num_queries; ++j) { - vec_dis.push_back(dis[j] * -1); - } - - auto search_result_on_raw_index = (QueryResult*)c_search_result_on_smallIndex; - search_result_on_raw_index->internal_seg_offsets_ = vec_ids; - search_result_on_raw_index->result_distances_ = vec_dis; - - auto binary_set = indexing->Serialize(conf); - void* c_load_index_info = nullptr; - status = NewLoadIndexInfo(&c_load_index_info); - assert(status.error_code == Success); - std::string index_type_key = "index_type"; - std::string index_type_value = "IVF_PQ"; - std::string index_mode_key = "index_mode"; - std::string index_mode_value = "cpu"; - std::string metric_type_key = "metric_type"; - std::string metric_type_value = "L2"; - - AppendIndexParam(c_load_index_info, index_type_key.c_str(), index_type_value.c_str()); - AppendIndexParam(c_load_index_info, index_mode_key.c_str(), index_mode_value.c_str()); - AppendIndexParam(c_load_index_info, metric_type_key.c_str(), metric_type_value.c_str()); - AppendFieldInfo(c_load_index_info, "fakevec", 0); - AppendIndex(c_load_index_info, (CBinarySet)&binary_set); - - status = UpdateSegmentIndex(segment, c_load_index_info); - assert(status.error_code == Success); - - CQueryResult c_search_result_on_bigIndex; - auto res_after_load_index = Search(segment, plan, placeholderGroups.data(), &time, 1, &c_search_result_on_bigIndex); - assert(res_after_load_index.error_code == Success); - - auto search_result_on_bigIndex = (*(QueryResult*)c_search_result_on_bigIndex); - for (int i = 0; i < num_queries; ++i) { - auto offset = i * K; - ASSERT_EQ(search_result_on_bigIndex.internal_seg_offsets_[offset], 420000 + i); - ASSERT_EQ(search_result_on_bigIndex.result_distances_[offset], - search_result_on_raw_index->result_distances_[offset]); - } - - DeleteLoadIndexInfo(c_load_index_info); - DeletePlan(plan); - DeletePlaceholderGroup(placeholderGroup); - DeleteQueryResult(c_search_result_on_smallIndex); - DeleteQueryResult(c_search_result_on_bigIndex); - DeleteCollection(collection); - DeleteSegment(segment); } \ No newline at end of file diff --git a/internal/core/unittest/test_index_wrapper.cpp b/internal/core/unittest/test_index_wrapper.cpp index 50aed377d32c59c5667b2ae63afd8a5a3c54e64c..a885c837a096b9558da69ec74c73d2c9c019e510 100644 --- a/internal/core/unittest/test_index_wrapper.cpp +++ b/internal/core/unittest/test_index_wrapper.cpp @@ -99,14 +99,105 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh {milvus::knowhere::Metric::TYPE, metric_type}, }; } else if (index_type == milvus::knowhere::IndexEnum::INDEX_NSG) { - return milvus::knowhere::Config{{milvus::knowhere::meta::DIM, DIM}, - {milvus::knowhere::IndexParams::nlist, 163}, - {milvus::knowhere::IndexParams::nprobe, 8}, - {milvus::knowhere::IndexParams::knng, 20}, - {milvus::knowhere::IndexParams::search_length, 40}, - {milvus::knowhere::IndexParams::out_degree, 30}, - {milvus::knowhere::IndexParams::candidate, 100}, - {milvus::knowhere::Metric::TYPE, metric_type}}; + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + {milvus::knowhere::IndexParams::nlist, 163}, + {milvus::knowhere::IndexParams::nprobe, 8}, + {milvus::knowhere::IndexParams::knng, 20}, + {milvus::knowhere::IndexParams::search_length, 40}, + {milvus::knowhere::IndexParams::out_degree, 30}, + {milvus::knowhere::IndexParams::candidate, 100}, + {milvus::knowhere::Metric::TYPE, metric_type}, + }; +#ifdef MILVUS_SUPPORT_SPTAG + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_SPTAG_KDT_RNT) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + // {milvus::knowhere::meta::TOPK, 10}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_SPTAG_BKT_RNT) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + // {milvus::knowhere::meta::TOPK, 10}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + }; +#endif + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_HNSW) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + // {milvus::knowhere::meta::TOPK, 10}, + {milvus::knowhere::IndexParams::M, 16}, + {milvus::knowhere::IndexParams::efConstruction, 200}, + {milvus::knowhere::IndexParams::ef, 200}, + {milvus::knowhere::Metric::TYPE, metric_type}, + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_ANNOY) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + // {milvus::knowhere::meta::TOPK, 10}, + {milvus::knowhere::IndexParams::n_trees, 4}, + {milvus::knowhere::IndexParams::search_k, 100}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_RHNSWFlat) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + // {milvus::knowhere::meta::TOPK, 10}, + {milvus::knowhere::IndexParams::M, 16}, + {milvus::knowhere::IndexParams::efConstruction, 200}, + {milvus::knowhere::IndexParams::ef, 200}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_RHNSWPQ) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + // {milvus::knowhere::meta::TOPK, 10}, + {milvus::knowhere::IndexParams::M, 16}, + {milvus::knowhere::IndexParams::efConstruction, 200}, + {milvus::knowhere::IndexParams::ef, 200}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + {milvus::knowhere::IndexParams::PQM, 8}, + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_RHNSWSQ) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + // {milvus::knowhere::meta::TOPK, 10}, + {milvus::knowhere::IndexParams::M, 16}, + {milvus::knowhere::IndexParams::efConstruction, 200}, + {milvus::knowhere::IndexParams::ef, 200}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_NGTPANNG) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + // {milvus::knowhere::meta::TOPK, 10}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::IndexParams::edge_size, 10}, + {milvus::knowhere::IndexParams::epsilon, 0.1}, + {milvus::knowhere::IndexParams::max_search_edges, 50}, + {milvus::knowhere::IndexParams::forcedly_pruned_edge_size, 60}, + {milvus::knowhere::IndexParams::selectively_pruned_edge_size, 30}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + }; + } else if (index_type == milvus::knowhere::IndexEnum::INDEX_NGTONNG) { + return milvus::knowhere::Config{ + {milvus::knowhere::meta::DIM, DIM}, + // {milvus::knowhere::meta::TOPK, 10}, + {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::IndexParams::edge_size, 20}, + {milvus::knowhere::IndexParams::epsilon, 0.1}, + {milvus::knowhere::IndexParams::max_search_edges, 50}, + {milvus::knowhere::IndexParams::outgoing_edge_size, 5}, + {milvus::knowhere::IndexParams::incoming_edge_size, 40}, + {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, + }; } return milvus::knowhere::Config(); } @@ -366,6 +457,17 @@ INSTANTIATE_TEST_CASE_P( std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, milvus::knowhere::Metric::JACCARD), std::pair(milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, milvus::knowhere::Metric::JACCARD), +#ifdef MILVUS_SUPPORT_SPTAG + std::pair(milvus::knowhere::IndexEnum::INDEX_SPTAG_KDT_RNT, milvus::knowhere::Metric::L2), + std::pair(milvus::knowhere::IndexEnum::INDEX_SPTAG_BKT_RNT, milvus::knowhere::Metric::L2), +#endif + std::pair(milvus::knowhere::IndexEnum::INDEX_HNSW, milvus::knowhere::Metric::L2), + std::pair(milvus::knowhere::IndexEnum::INDEX_ANNOY, milvus::knowhere::Metric::L2), + std::pair(milvus::knowhere::IndexEnum::INDEX_RHNSWFlat, milvus::knowhere::Metric::L2), + std::pair(milvus::knowhere::IndexEnum::INDEX_RHNSWPQ, milvus::knowhere::Metric::L2), + std::pair(milvus::knowhere::IndexEnum::INDEX_RHNSWSQ, milvus::knowhere::Metric::L2), + std::pair(milvus::knowhere::IndexEnum::INDEX_NGTPANNG, milvus::knowhere::Metric::L2), + std::pair(milvus::knowhere::IndexEnum::INDEX_NGTONNG, milvus::knowhere::Metric::L2), std::pair(milvus::knowhere::IndexEnum::INDEX_NSG, milvus::knowhere::Metric::L2))); TEST_P(IndexWrapperTest, Constructor) { diff --git a/internal/indexbuilder/index_test.go b/internal/indexbuilder/index_test.go index 88cb7e3b4b364725a47b661470f09da7129117f3..f64f1289b474cc5b2ef9231024ed45ab5c39139a 100644 --- a/internal/indexbuilder/index_test.go +++ b/internal/indexbuilder/index_test.go @@ -181,6 +181,8 @@ func TestCIndex_Codec(t *testing.T) { err = index.Delete() assert.Equal(t, err, nil) + err = copyIndex.Delete() + assert.Equal(t, err, nil) } } diff --git a/internal/master/flush_scheduler.go b/internal/master/flush_scheduler.go index 7648330b5d4c569ba003db9cc13471ce90ac8c15..beb2a3829efbcc0a7854cbeb4a7943a52bbc2fb4 100644 --- a/internal/master/flush_scheduler.go +++ b/internal/master/flush_scheduler.go @@ -68,6 +68,18 @@ func (scheduler *FlushScheduler) describe() error { return err } for fieldID, data := range mapData { + // check field indexable + segMeta, err := scheduler.metaTable.GetSegmentByID(singleSegmentID) + if err != nil { + return err + } + indexable, err := scheduler.metaTable.IsIndexable(segMeta.CollectionID, fieldID) + if err != nil { + return err + } + if !indexable { + continue + } info := &IndexBuildInfo{ segmentID: singleSegmentID, fieldID: fieldID, diff --git a/internal/master/meta_table.go b/internal/master/meta_table.go index 89e8267968413ebcc1e603842e1b02edac3ab84b..94c75cf95af6d7190a19f6977359a5152c4c79dd 100644 --- a/internal/master/meta_table.go +++ b/internal/master/meta_table.go @@ -5,6 +5,8 @@ import ( "strconv" "sync" + "github.com/zilliztech/milvus-distributed/internal/proto/schemapb" + "github.com/zilliztech/milvus-distributed/internal/proto/commonpb" "github.com/zilliztech/milvus-distributed/internal/util/typeutil" @@ -678,3 +680,23 @@ func (mt *metaTable) UpdateFieldIndexParams(collName string, fieldName string, i return fmt.Errorf("can not find field with id %s", fieldName) } + +func (mt *metaTable) IsIndexable(collID UniqueID, fieldID UniqueID) (bool, error) { + mt.ddLock.RLock() + defer mt.ddLock.RUnlock() + + if _, ok := mt.collID2Meta[collID]; !ok { + return false, fmt.Errorf("can not find collection with id %d", collID) + } + + for _, v := range mt.collID2Meta[collID].Schema.Fields { + // field is vector type and index params is not empty + if v.FieldID == fieldID && (v.DataType == schemapb.DataType_VECTOR_BINARY || v.DataType == schemapb.DataType_VECTOR_FLOAT) && + len(v.IndexParams) != 0 { + return true, nil + } + } + + // fieldID is not in schema(eg: timestamp) or not indexable + return false, nil +} diff --git a/internal/master/persistence_scheduler_test.go b/internal/master/persistence_scheduler_test.go index 025d9b313b87e336037c6b1e0c64e6c92a522949..44110f3a0b94a4f29ba5dc7036bf42bf41616ab5 100644 --- a/internal/master/persistence_scheduler_test.go +++ b/internal/master/persistence_scheduler_test.go @@ -5,6 +5,8 @@ import ( "testing" "time" + "github.com/zilliztech/milvus-distributed/internal/proto/commonpb" + "github.com/zilliztech/milvus-distributed/internal/proto/etcdpb" "github.com/zilliztech/milvus-distributed/internal/proto/schemapb" @@ -43,7 +45,7 @@ func TestPersistenceScheduler(t *testing.T) { Name: "testcoll", Fields: []*schemapb.FieldSchema{ {FieldID: 1}, - {FieldID: 100}, + {FieldID: 100, DataType: schemapb.DataType_VECTOR_FLOAT, IndexParams: []*commonpb.KeyValuePair{{Key: "k", Value: "v"}}}, }, }, }) diff --git a/internal/querynode/load_index_service_test.go b/internal/querynode/load_index_service_test.go index 4145ba3bcd43c2e61aaf59bcf56e3f8eea50f1a1..564719be8111516731f1411d1609d1ed67b63121 100644 --- a/internal/querynode/load_index_service_test.go +++ b/internal/querynode/load_index_service_test.go @@ -16,6 +16,23 @@ import ( "github.com/zilliztech/milvus-distributed/internal/querynode/client" ) +//func TestLoadIndexClient_LoadIndex(t *testing.T) { +// pulsarURL := Params.PulsarAddress +// loadIndexChannels := Params.LoadIndexChannelNames +// loadIndexClient := client.NewLoadIndexClient(context.Background(), pulsarURL, loadIndexChannels) +// +// loadIndexPath := "collection0-segment0-field0" +// loadIndexPaths := make([]string, 0) +// loadIndexPaths = append(loadIndexPaths, loadIndexPath) +// +// indexParams := make(map[string]string) +// indexParams["index_type"] = "IVF_PQ" +// indexParams["index_mode"] = "cpu" +// +// loadIndexClient.LoadIndex(loadIndexPaths, 0, 0, "field0", indexParams) +// loadIndexClient.Close() +//} + func TestLoadIndexService(t *testing.T) { node := newQueryNode() collectionID := rand.Int63n(1000000)