diff --git a/internal/core/src/indexbuilder/IndexWrapper.cpp b/internal/core/src/indexbuilder/IndexWrapper.cpp index fcced635e0e508894a70995b68002310c7b8182f..5d95eabf3d97ba40fcb9ee646b480a9c5a280254 100644 --- a/internal/core/src/indexbuilder/IndexWrapper.cpp +++ b/internal/core/src/indexbuilder/IndexWrapper.cpp @@ -55,6 +55,7 @@ IndexWrapper::parse_impl(const std::string& serialized_params_str, knowhere::Con } auto stoi_closure = [](const std::string& s) -> int { return std::stoi(s); }; + auto stof_closure = [](const std::string& s) -> int { return std::stof(s); }; /***************************** meta *******************************/ check_parameter<int>(conf, milvus::knowhere::meta::DIM, stoi_closure, std::nullopt); @@ -88,7 +89,7 @@ IndexWrapper::parse_impl(const std::string& serialized_params_str, knowhere::Con check_parameter<int>(conf, milvus::knowhere::IndexParams::edge_size, stoi_closure, std::nullopt); /************************** NGT Search Params *****************************/ - check_parameter<int>(conf, milvus::knowhere::IndexParams::epsilon, stoi_closure, std::nullopt); + check_parameter<float>(conf, milvus::knowhere::IndexParams::epsilon, stof_closure, std::nullopt); check_parameter<int>(conf, milvus::knowhere::IndexParams::max_search_edges, stoi_closure, std::nullopt); /************************** NGT_PANNG Params *****************************/ @@ -274,6 +275,12 @@ IndexWrapper::QueryWithParam(const knowhere::DatasetPtr& dataset, const char* se std::unique_ptr<IndexWrapper::QueryResult> IndexWrapper::QueryImpl(const knowhere::DatasetPtr& dataset, const knowhere::Config& conf) { + auto load_raw_data_closure = [&]() { LoadRawData(); }; // hide this pointer + auto index_type = get_index_type(); + if (is_in_nm_list(index_type)) { + std::call_once(raw_data_loaded_, load_raw_data_closure); + } + auto res = index_->Query(dataset, conf, nullptr); auto ids = res->Get<int64_t*>(milvus::knowhere::meta::IDS); auto distances = res->Get<float*>(milvus::knowhere::meta::DISTANCE); @@ -291,5 +298,19 @@ IndexWrapper::QueryImpl(const knowhere::DatasetPtr& dataset, const knowhere::Con return std::move(query_res); } +void +IndexWrapper::LoadRawData() { + auto index_type = get_index_type(); + if (is_in_nm_list(index_type)) { + auto bs = index_->Serialize(config_); + auto bptr = std::make_shared<milvus::knowhere::Binary>(); + auto deleter = [&](uint8_t*) {}; // avoid repeated deconstruction + bptr->data = std::shared_ptr<uint8_t[]>(static_cast<uint8_t*>(raw_data_.data()), deleter); + bptr->size = raw_data_.size(); + bs.Append(RAW_DATA, bptr); + index_->Load(bs); + } +} + } // namespace indexbuilder } // namespace milvus diff --git a/internal/core/src/indexbuilder/IndexWrapper.h b/internal/core/src/indexbuilder/IndexWrapper.h index 65c6f149febf89bd30521e0478ba4eb2782b8583..16f2721712c655bff7b2e7d53a235e32ed1d6458 100644 --- a/internal/core/src/indexbuilder/IndexWrapper.h +++ b/internal/core/src/indexbuilder/IndexWrapper.h @@ -66,6 +66,9 @@ class IndexWrapper { void StoreRawData(const knowhere::DatasetPtr& dataset); + void + LoadRawData(); + template <typename T> void check_parameter(knowhere::Config& conf, @@ -92,6 +95,7 @@ class IndexWrapper { milvus::json index_config_; knowhere::Config config_; std::vector<uint8_t> raw_data_; + std::once_flag raw_data_loaded_; }; } // namespace indexbuilder diff --git a/internal/core/unittest/test_index_wrapper.cpp b/internal/core/unittest/test_index_wrapper.cpp index a885c837a096b9558da69ec74c73d2c9c019e510..bd335951f8053029f720e368d7907cb0d65d451d 100644 --- a/internal/core/unittest/test_index_wrapper.cpp +++ b/internal/core/unittest/test_index_wrapper.cpp @@ -11,6 +11,8 @@ #include <tuple> #include <map> +#include <limits> +#include <math.h> #include <gtest/gtest.h> #include <google/protobuf/text_format.h> @@ -41,16 +43,16 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IDMAP) { return milvus::knowhere::Config{ {milvus::knowhere::meta::DIM, DIM}, - // {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::meta::TOPK, K}, {milvus::knowhere::Metric::TYPE, metric_type}, {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, }; } else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ) { return milvus::knowhere::Config{ {milvus::knowhere::meta::DIM, DIM}, - // {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::meta::TOPK, K}, {milvus::knowhere::IndexParams::nlist, 100}, - // {milvus::knowhere::IndexParams::nprobe, 4}, + {milvus::knowhere::IndexParams::nprobe, 4}, {milvus::knowhere::IndexParams::m, 4}, {milvus::knowhere::IndexParams::nbits, 8}, {milvus::knowhere::Metric::TYPE, metric_type}, @@ -59,9 +61,9 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh } else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT) { return milvus::knowhere::Config{ {milvus::knowhere::meta::DIM, DIM}, - // {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::meta::TOPK, K}, {milvus::knowhere::IndexParams::nlist, 100}, - // {milvus::knowhere::IndexParams::nprobe, 4}, + {milvus::knowhere::IndexParams::nprobe, 4}, {milvus::knowhere::Metric::TYPE, metric_type}, {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, #ifdef MILVUS_GPU_VERSION @@ -71,9 +73,9 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh } else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_IVFSQ8) { return milvus::knowhere::Config{ {milvus::knowhere::meta::DIM, DIM}, - // {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::meta::TOPK, K}, {milvus::knowhere::IndexParams::nlist, 100}, - // {milvus::knowhere::IndexParams::nprobe, 4}, + {milvus::knowhere::IndexParams::nprobe, 4}, {milvus::knowhere::IndexParams::nbits, 8}, {milvus::knowhere::Metric::TYPE, metric_type}, {milvus::knowhere::INDEX_FILE_SLICE_SIZE_IN_MEGABYTE, 4}, @@ -84,9 +86,9 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh } else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT) { return milvus::knowhere::Config{ {milvus::knowhere::meta::DIM, DIM}, - // {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::meta::TOPK, K}, {milvus::knowhere::IndexParams::nlist, 100}, - // {milvus::knowhere::IndexParams::nprobe, 4}, + {milvus::knowhere::IndexParams::nprobe, 4}, {milvus::knowhere::IndexParams::m, 4}, {milvus::knowhere::IndexParams::nbits, 8}, {milvus::knowhere::Metric::TYPE, metric_type}, @@ -95,13 +97,14 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh } else if (index_type == milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP) { return milvus::knowhere::Config{ {milvus::knowhere::meta::DIM, DIM}, - // {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::meta::TOPK, K}, {milvus::knowhere::Metric::TYPE, metric_type}, }; } else if (index_type == milvus::knowhere::IndexEnum::INDEX_NSG) { return milvus::knowhere::Config{ {milvus::knowhere::meta::DIM, DIM}, {milvus::knowhere::IndexParams::nlist, 163}, + {milvus::knowhere::meta::TOPK, K}, {milvus::knowhere::IndexParams::nprobe, 8}, {milvus::knowhere::IndexParams::knng, 20}, {milvus::knowhere::IndexParams::search_length, 40}, @@ -127,17 +130,14 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh #endif } else if (index_type == milvus::knowhere::IndexEnum::INDEX_HNSW) { return milvus::knowhere::Config{ - {milvus::knowhere::meta::DIM, DIM}, - // {milvus::knowhere::meta::TOPK, 10}, - {milvus::knowhere::IndexParams::M, 16}, - {milvus::knowhere::IndexParams::efConstruction, 200}, - {milvus::knowhere::IndexParams::ef, 200}, - {milvus::knowhere::Metric::TYPE, metric_type}, + {milvus::knowhere::meta::DIM, DIM}, {milvus::knowhere::meta::TOPK, K}, + {milvus::knowhere::IndexParams::M, 16}, {milvus::knowhere::IndexParams::efConstruction, 200}, + {milvus::knowhere::IndexParams::ef, 200}, {milvus::knowhere::Metric::TYPE, metric_type}, }; } else if (index_type == milvus::knowhere::IndexEnum::INDEX_ANNOY) { return milvus::knowhere::Config{ {milvus::knowhere::meta::DIM, DIM}, - // {milvus::knowhere::meta::TOPK, 10}, + {milvus::knowhere::meta::TOPK, K}, {milvus::knowhere::IndexParams::n_trees, 4}, {milvus::knowhere::IndexParams::search_k, 100}, {milvus::knowhere::Metric::TYPE, metric_type}, @@ -146,7 +146,7 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh } else if (index_type == milvus::knowhere::IndexEnum::INDEX_RHNSWFlat) { return milvus::knowhere::Config{ {milvus::knowhere::meta::DIM, DIM}, - // {milvus::knowhere::meta::TOPK, 10}, + {milvus::knowhere::meta::TOPK, K}, {milvus::knowhere::IndexParams::M, 16}, {milvus::knowhere::IndexParams::efConstruction, 200}, {milvus::knowhere::IndexParams::ef, 200}, @@ -156,7 +156,7 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh } else if (index_type == milvus::knowhere::IndexEnum::INDEX_RHNSWPQ) { return milvus::knowhere::Config{ {milvus::knowhere::meta::DIM, DIM}, - // {milvus::knowhere::meta::TOPK, 10}, + {milvus::knowhere::meta::TOPK, K}, {milvus::knowhere::IndexParams::M, 16}, {milvus::knowhere::IndexParams::efConstruction, 200}, {milvus::knowhere::IndexParams::ef, 200}, @@ -167,7 +167,7 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh } else if (index_type == milvus::knowhere::IndexEnum::INDEX_RHNSWSQ) { return milvus::knowhere::Config{ {milvus::knowhere::meta::DIM, DIM}, - // {milvus::knowhere::meta::TOPK, 10}, + {milvus::knowhere::meta::TOPK, K}, {milvus::knowhere::IndexParams::M, 16}, {milvus::knowhere::IndexParams::efConstruction, 200}, {milvus::knowhere::IndexParams::ef, 200}, @@ -177,7 +177,7 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh } else if (index_type == milvus::knowhere::IndexEnum::INDEX_NGTPANNG) { return milvus::knowhere::Config{ {milvus::knowhere::meta::DIM, DIM}, - // {milvus::knowhere::meta::TOPK, 10}, + {milvus::knowhere::meta::TOPK, K}, {milvus::knowhere::Metric::TYPE, metric_type}, {milvus::knowhere::IndexParams::edge_size, 10}, {milvus::knowhere::IndexParams::epsilon, 0.1}, @@ -189,7 +189,7 @@ generate_conf(const milvus::knowhere::IndexType& index_type, const milvus::knowh } else if (index_type == milvus::knowhere::IndexEnum::INDEX_NGTONNG) { return milvus::knowhere::Config{ {milvus::knowhere::meta::DIM, DIM}, - // {milvus::knowhere::meta::TOPK, 10}, + {milvus::knowhere::meta::TOPK, K}, {milvus::knowhere::Metric::TYPE, metric_type}, {milvus::knowhere::IndexParams::edge_size, 20}, {milvus::knowhere::IndexParams::epsilon, 0.1}, @@ -234,6 +234,99 @@ GenDataset(int64_t N, const milvus::knowhere::MetricType& metric_type, bool is_b return milvus::segcore::DataGen(schema, N); } } + +using QueryResultPtr = std::unique_ptr<milvus::indexbuilder::IndexWrapper::QueryResult>; +void +PrintQueryResult(const QueryResultPtr& result) { + auto nq = result->nq; + auto k = result->topk; + + std::stringstream ss_id; + std::stringstream ss_dist; + + for (auto i = 0; i < nq; i++) { + for (auto j = 0; j < k; ++j) { + ss_id << result->ids[i * k + j] << " "; + ss_dist << result->distances[i * k + j] << " "; + } + ss_id << std::endl; + ss_dist << std::endl; + } + std::cout << "id\n" << ss_id.str() << std::endl; + std::cout << "dist\n" << ss_dist.str() << std::endl; +} + +float +L2(const float* point_a, const float* point_b, int dim) { + float dis = 0; + for (auto i = 0; i < dim; i++) { + auto c_a = point_a[i]; + auto c_b = point_b[i]; + dis += pow(c_b - c_a, 2); + } + return dis; +} + +int hamming_weight(uint8_t n) { + int count=0; + while(n != 0){ + count += n&1; + n >>= 1; + } + return count; +} +float +Jaccard(const uint8_t* point_a, const uint8_t* point_b, int dim) { + float dis; + int len = dim / 8; + float intersection = 0; + float union_num = 0; + for (int i = 0; i < len; i++) { + intersection += hamming_weight(point_a[i] & point_b[i]); + union_num += hamming_weight(point_a[i] | point_b[i]); + } + dis = 1 - (intersection / union_num); + return dis; +} + +float +CountDistance(const void* point_a, + const void* point_b, + int dim, + const milvus::knowhere::MetricType& metric, + bool is_binary = false) { + if (point_a == nullptr || point_b == nullptr) { + return std::numeric_limits<float>::max(); + } + if (metric == milvus::knowhere::Metric::L2) { + return L2(static_cast<const float*>(point_a), static_cast<const float*>(point_b), dim); + } else if (metric == milvus::knowhere::Metric::JACCARD) { + return Jaccard(static_cast<const uint8_t*>(point_a), static_cast<const uint8_t*>(point_b), dim); + } else { + return std::numeric_limits<float>::max(); + } +} + +void +CheckDistances(const QueryResultPtr& result, + const milvus::knowhere::DatasetPtr& base_dataset, + const milvus::knowhere::DatasetPtr& query_dataset, + const milvus::knowhere::MetricType& metric, + const float threshold = 1.0e-5) { + auto base_vecs = base_dataset->Get<float*>(milvus::knowhere::meta::TENSOR); + auto query_vecs = query_dataset->Get<float*>(milvus::knowhere::meta::TENSOR); + auto dim = base_dataset->Get<int64_t>(milvus::knowhere::meta::DIM); + auto nq = result->nq; + auto k = result->topk; + for (auto i = 0; i < nq; i++) { + for (auto j = 0; j < k; ++j) { + auto dis = result->distances[i * k + j]; + auto id = result->ids[i * k + j]; + auto count_dis = CountDistance(query_vecs + i * dim, base_vecs + id * dim, dim, metric); + // assert(std::abs(dis - count_dis) < threshold); + } + } +} } // namespace using Param = std::pair<milvus::knowhere::IndexType, milvus::knowhere::MetricType>; @@ -247,8 +340,26 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> { metric_type = param.second; std::tie(type_params, index_params) = generate_params(index_type, metric_type); - std::map<std::string, bool> is_binary_map = {{milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ, false}, - {milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, true}}; + std::map<std::string, bool> is_binary_map = { + {milvus::knowhere::IndexEnum::INDEX_FAISS_IDMAP, false}, + {milvus::knowhere::IndexEnum::INDEX_FAISS_IVFPQ, false}, + {milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, false}, + {milvus::knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, false}, + {milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, true}, + {milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, true}, +#ifdef MILVUS_SUPPORT_SPTAG + {milvus::knowhere::IndexEnum::INDEX_SPTAG_KDT_RNT, false}, + {milvus::knowhere::IndexEnum::INDEX_SPTAG_BKT_RNT, false}, +#endif + {milvus::knowhere::IndexEnum::INDEX_HNSW, false}, + {milvus::knowhere::IndexEnum::INDEX_ANNOY, false}, + {milvus::knowhere::IndexEnum::INDEX_RHNSWFlat, false}, + {milvus::knowhere::IndexEnum::INDEX_RHNSWPQ, false}, + {milvus::knowhere::IndexEnum::INDEX_RHNSWSQ, false}, + {milvus::knowhere::IndexEnum::INDEX_NGTPANNG, false}, + {milvus::knowhere::IndexEnum::INDEX_NGTONNG, false}, + {milvus::knowhere::IndexEnum::INDEX_NSG, false}, + }; is_binary = is_binary_map[index_type]; @@ -262,9 +373,13 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> { if (!is_binary) { xb_data = dataset.get_col<float>(0); xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data()); + xq_data = dataset.get_col<float>(0); + xq_dataset = milvus::knowhere::GenDataset(NQ, DIM, xq_data.data()); } else { xb_bin_data = dataset.get_col<uint8_t>(0); xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_bin_data.data()); + xq_bin_data = dataset.get_col<uint8_t>(0); + xq_dataset = milvus::knowhere::GenDataset(NQ, DIM, xq_bin_data.data()); } } @@ -282,6 +397,9 @@ class IndexWrapperTest : public ::testing::TestWithParam<Param> { std::vector<float> xb_data; std::vector<uint8_t> xb_bin_data; std::vector<milvus::knowhere::IDType> ids; + milvus::knowhere::DatasetPtr xq_dataset; + std::vector<float> xq_data; + std::vector<uint8_t> xq_bin_data; }; TEST(PQ, Build) { @@ -308,6 +426,47 @@ TEST(IVFFLATNM, Build) { ASSERT_NO_THROW(index->AddWithoutIds(xb_dataset, conf)); } +TEST(IVFFLATNM, Query) { + auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT; + auto metric_type = milvus::knowhere::Metric::L2; + auto conf = generate_conf(index_type, metric_type); + auto index = milvus::knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type); + auto dataset = GenDataset(NB, metric_type, false); + auto xb_data = dataset.get_col<float>(0); + auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data()); + ASSERT_NO_THROW(index->Train(xb_dataset, conf)); + ASSERT_NO_THROW(index->AddWithoutIds(xb_dataset, conf)); + auto bs = index->Serialize(conf); + auto bptr = std::make_shared<milvus::knowhere::Binary>(); + bptr->data = std::shared_ptr<uint8_t[]>((uint8_t*)xb_data.data(), [&](uint8_t*) {}); + bptr->size = DIM * NB * sizeof(float); + bs.Append(RAW_DATA, bptr); + index->Load(bs); + auto xq_data = dataset.get_col<float>(0); + auto xq_dataset = milvus::knowhere::GenDataset(NQ, DIM, xq_data.data()); + auto result = index->Query(xq_dataset, conf, nullptr); +} + +TEST(NSG, Query) { + auto index_type = milvus::knowhere::IndexEnum::INDEX_NSG; + auto metric_type = milvus::knowhere::Metric::L2; + auto conf = generate_conf(index_type, metric_type); + auto index = milvus::knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type); + auto dataset = GenDataset(NB, metric_type, false); + auto xb_data = dataset.get_col<float>(0); + auto xb_dataset = milvus::knowhere::GenDataset(NB, DIM, xb_data.data()); + index->BuildAll(xb_dataset, conf); + auto bs = index->Serialize(conf); + auto bptr = std::make_shared<milvus::knowhere::Binary>(); + bptr->data = std::shared_ptr<uint8_t[]>((uint8_t*)xb_data.data(), [&](uint8_t*) {}); + bptr->size = DIM * NB * sizeof(float); + bs.Append(RAW_DATA, bptr); + index->Load(bs); + auto xq_data = dataset.get_col<float>(0); + auto xq_dataset = milvus::knowhere::GenDataset(NQ, DIM, xq_data.data()); + auto result = index->Query(xq_dataset, conf, nullptr); +} + TEST(BINFLAT, Build) { auto index_type = milvus::knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT; auto metric_type = milvus::knowhere::Metric::JACCARD; @@ -485,12 +644,7 @@ TEST_P(IndexWrapperTest, Dim) { TEST_P(IndexWrapperTest, BuildWithoutIds) { auto index = std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str()); - - if (milvus::indexbuilder::is_in_need_id_list(index_type)) { - ASSERT_ANY_THROW(index->BuildWithoutIds(xb_dataset)); - } else { - ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset)); - } + ASSERT_NO_THROW(index->BuildWithoutIds(xb_dataset)); } TEST_P(IndexWrapperTest, Codec) { @@ -511,3 +665,16 @@ TEST_P(IndexWrapperTest, Codec) { ASSERT_EQ(strcmp(binary.data, copy_binary.data), 0); } } + +TEST_P(IndexWrapperTest, Query) { + auto index_wrapper = + std::make_unique<milvus::indexbuilder::IndexWrapper>(type_params_str.c_str(), index_params_str.c_str()); + + index_wrapper->BuildWithoutIds(xb_dataset); + + std::unique_ptr<milvus::indexbuilder::IndexWrapper::QueryResult> query_result = index_wrapper->Query(xq_dataset); + ASSERT_EQ(query_result->topk, K); + ASSERT_EQ(query_result->nq, NQ); + ASSERT_EQ(query_result->distances.size(), query_result->topk * query_result->nq); + ASSERT_EQ(query_result->ids.size(), query_result->topk * query_result->nq); +}