Skip to content
Snippets Groups Projects
Unverified Commit b03da87d authored by zhenshan.cao's avatar zhenshan.cao Committed by GitHub
Browse files

Accelerate query speed in sealed segment (#16287)


Fix delete duplicate key

Signed-off-by: default avatarzhenshan.cao <zhenshan.cao@zilliz.com>
parent 58addbf9
No related branches found
Tags v2.0.0-testing-20220331
No related merge requests found
Showing
with 232 additions and 231 deletions
......@@ -461,12 +461,35 @@ auto
ExecExprVisitor::ExecTermVisitorImpl(TermExpr& expr_raw) -> BitsetType {
auto& expr = static_cast<TermExprImpl<T>&>(expr_raw);
auto& schema = segment_.get_schema();
auto primary_offset = schema.get_primary_key_offset();
auto field_offset = expr_raw.field_offset_;
auto& field_meta = schema[field_offset];
bool use_pk_index = false;
if (primary_offset.has_value()) {
use_pk_index = primary_offset.value() == field_offset && field_meta.get_data_type() == engine::DataType::INT64;
}
if (use_pk_index) {
auto id_array = std::make_unique<IdArray>();
auto dst_ids = id_array->mutable_int_id();
for (const auto& id : expr.terms_) {
dst_ids->add_data(id);
}
auto [uids, seg_offsets] = segment_.search_ids(*id_array, timestamp_);
BitsetType bitset(row_count_);
for (const auto& offset : seg_offsets) {
auto _offset = (int64_t)offset.get();
bitset[_offset] = true;
}
AssertInfo(bitset.size() == row_count_, "[ExecExprVisitor]Size of results not equal row count");
return bitset;
}
// not use pk index
std::deque<BitsetType> bitsets;
auto size_per_chunk = segment_.size_per_chunk();
auto num_chunk = upper_div(row_count_, size_per_chunk);
std::deque<BitsetType> bitsets;
std::unordered_set<T> term_set(expr.terms_.begin(), expr.terms_.end());
for (int64_t chunk_id = 0; chunk_id < num_chunk; ++chunk_id) {
Span<T> chunk = segment_.chunk_data<T>(field_offset, chunk_id);
......
......@@ -34,17 +34,11 @@ ScalarIndexVector::do_search_ids(const IdArray& ids) const {
std::equal_range(mapping_.begin(), mapping_.end(), std::make_pair(id, SegOffset(0)),
[](const Pair& left, const Pair& right) { return left.first < right.first; });
if (iter_beg == iter_end) {
// no data
continue;
for (auto& iter = iter_beg; iter != iter_end; iter++) {
auto [entry_id, entry_offset] = *iter;
dst_ids->add_data(entry_id);
dst_offsets.push_back(entry_offset);
}
// TODO: for repeated key, decide the final offset with Timestamp
// no repeated key, simplified logic
// AssertInfo(iter_beg + 1 == iter_end, "There are no repeated keys in more than one results");
auto [entry_id, entry_offset] = *iter_beg;
dst_ids->add_data(entry_id);
dst_offsets.push_back(entry_offset);
}
return {std::move(res_ids), std::move(dst_offsets)};
}
......@@ -60,17 +54,11 @@ ScalarIndexVector::do_search_ids(const std::vector<idx_t>& ids) const {
std::equal_range(mapping_.begin(), mapping_.end(), std::make_pair(id, SegOffset(0)),
[](const Pair& left, const Pair& right) { return left.first < right.first; });
if (iter_beg == iter_end) {
// no data
continue;
for (auto& iter = iter_beg; iter != iter_end; iter++) {
auto [entry_id, entry_offset] = *iter_beg;
dst_ids.emplace_back(entry_id);
dst_offsets.push_back(entry_offset);
}
// TODO: for repeated key, decide the final offset with Timestamp
// no repeated key, simplified logic
// AssertInfo(iter_beg + 1 == iter_end, "There are no repeated keys in more than one results");
auto [entry_id, entry_offset] = *iter_beg;
dst_ids.push_back(entry_id);
dst_offsets.push_back(entry_offset);
}
return {std::move(dst_ids), std::move(dst_offsets)};
}
......@@ -82,6 +70,7 @@ ScalarIndexVector::append_data(const ScalarIndexVector::T* ids, int64_t count, S
mapping_.emplace_back(ids[i], offset);
}
}
void
ScalarIndexVector::build() {
std::sort(mapping_.begin(), mapping_.end());
......
......@@ -44,7 +44,6 @@ SegmentGrowingImpl::get_deleted_bitmap(int64_t del_barrier,
int64_t insert_barrier,
bool force) const {
auto old = deleted_record_.get_lru_entry();
if (old->bitmap_ptr->count() == insert_barrier) {
if (old->del_barrier == del_barrier) {
return old;
......@@ -55,58 +54,39 @@ SegmentGrowingImpl::get_deleted_bitmap(int64_t del_barrier,
current->del_barrier = del_barrier;
auto bitmap = current->bitmap_ptr;
int64_t start, end;
if (del_barrier < old->del_barrier) {
for (auto del_index = del_barrier; del_index < old->del_barrier; ++del_index) {
// get uid in delete logs
auto uid = deleted_record_.uids_[del_index];
// map uid to corresponding offsets, select the max one, which should be the target
// the max one should be closest to query_timestamp, so the delete log should refer to it
int64_t the_offset = -1;
auto [iter_b, iter_e] = uid2offset_.equal_range(uid);
for (auto iter = iter_b; iter != iter_e; ++iter) {
auto offset = iter->second;
if (record_.timestamps_[offset] < query_timestamp) {
AssertInfo(offset < insert_barrier, "Timestamp offset is larger than insert barrier");
the_offset = std::max(the_offset, offset);
}
}
// if not found, skip
if (the_offset == -1) {
continue;
}
// otherwise, clear the flag
bitmap->clear(the_offset);
}
return current;
start = del_barrier;
end = old->del_barrier;
} else {
for (auto del_index = old->del_barrier; del_index < del_barrier; ++del_index) {
// get uid in delete logs
auto uid = deleted_record_.uids_[del_index];
// map uid to corresponding offsets, select the max one, which should be the target
// the max one should be closest to query_timestamp, so the delete log should refer to it
int64_t the_offset = -1;
auto [iter_b, iter_e] = uid2offset_.equal_range(uid);
for (auto iter = iter_b; iter != iter_e; ++iter) {
auto offset = iter->second;
if (offset >= insert_barrier) {
continue;
}
if (record_.timestamps_[offset] < query_timestamp) {
AssertInfo(offset < insert_barrier, "Timestamp offset is larger than insert barrier");
the_offset = std::max(the_offset, offset);
}
}
start = old->del_barrier;
end = del_barrier;
}
for (auto del_index = start; del_index < end; ++del_index) {
// get uid in delete logs
auto uid = deleted_record_.uids_[del_index];
// if not found, skip
// map uid to corresponding offsets, select the max one, which should be the target
// the max one should be closest to query_timestamp, so the delete log should refer to it
int64_t the_offset = -1;
auto [iter_b, iter_e] = uid2offset_.equal_range(uid);
for (auto iter = iter_b; iter != iter_e; ++iter) {
auto offset = iter->second;
AssertInfo(offset < insert_barrier, "Timestamp offset is larger than insert barrier");
the_offset = std::max(the_offset, offset);
if (the_offset == -1) {
continue;
}
// otherwise, set the flag
bitmap->set(the_offset);
if (record_.timestamps_[the_offset] >= query_timestamp) {
bitmap->clear(the_offset);
} else {
bitmap->set(the_offset);
}
}
this->deleted_record_.insert_lru_entry(current);
}
this->deleted_record_.insert_lru_entry(current);
return current;
}
......@@ -128,9 +108,7 @@ SegmentGrowingImpl::get_filtered_bitmap(const BitsetView& bitset, int64_t ins_ba
AssertInfo(deleted_bitmap->count() == bitset.size(), "Deleted bitmap count not equal to filtered bitmap count");
auto filtered_bitmap = std::make_shared<faiss::ConcurrentBitset>(bitset.size(), bitset.data());
auto final_bitmap = (*deleted_bitmap.get()) | (*filtered_bitmap.get());
BitsetView res = BitsetView(final_bitmap);
return res;
}
......@@ -508,13 +486,13 @@ SegmentGrowingImpl::search_ids(const IdArray& id_array, Timestamp timestamp) con
if (record_.timestamps_[offset.get()] < timestamp) {
the_offset = std::max(the_offset, offset);
}
// if not found, skip
if (the_offset == SegOffset(-1)) {
continue;
}
res_int_id_arr->add_data(uid);
res_offsets.push_back(the_offset);
}
// if not found, skip
if (the_offset == SegOffset(-1)) {
continue;
}
res_int_id_arr->add_data(uid);
res_offsets.push_back(the_offset);
}
return {std::move(res_id_arr), std::move(res_offsets)};
}
......
......@@ -138,6 +138,9 @@ class SegmentInternalInterface : public SegmentInterface {
virtual std::vector<SegOffset>
search_ids(const BitsetView& view, Timestamp timestamp) const = 0;
virtual std::pair<std::unique_ptr<IdArray>, std::vector<SegOffset>>
search_ids(const IdArray& id_array, Timestamp timestamp) const = 0;
protected:
// internal API: return chunk_data in span
virtual SpanBase
......@@ -161,9 +164,6 @@ class SegmentInternalInterface : public SegmentInterface {
virtual std::unique_ptr<DataArray>
BulkSubScript(FieldOffset field_offset, const SegOffset* seg_offsets, int64_t count) const;
virtual std::pair<std::unique_ptr<IdArray>, std::vector<SegOffset>>
search_ids(const IdArray& id_array, Timestamp timestamp) const = 0;
virtual void
check_search(const query::Plan* plan) const = 0;
......
......@@ -241,7 +241,6 @@ SegmentSealedImpl::get_deleted_bitmap(int64_t del_barrier,
int64_t insert_barrier,
bool force) const {
auto old = deleted_record_.get_lru_entry();
auto current = old->clone(insert_barrier);
current->del_barrier = del_barrier;
auto bitmap = current->bitmap_ptr;
......@@ -260,25 +259,25 @@ SegmentSealedImpl::get_deleted_bitmap(int64_t del_barrier,
return current;
}
int64_t start, end;
if (del_barrier < old->del_barrier) {
for (auto del_index = del_barrier; del_index < old->del_barrier; ++del_index) {
int64_t the_offset = seg_offsets[del_index].get();
AssertInfo(the_offset >= 0, "Seg offset is invalid");
if (deleted_record_.timestamps_[del_index] < query_timestamp) {
bitmap->clear(the_offset);
}
}
return current;
start = del_barrier;
end = old->del_barrier;
} else {
for (auto del_index = old->del_barrier; del_index < del_barrier; ++del_index) {
int64_t the_offset = seg_offsets[del_index].get();
AssertInfo(the_offset >= 0, "Seg offset is invalid");
if (deleted_record_.timestamps_[del_index] < query_timestamp) {
bitmap->set(the_offset);
}
start = old->del_barrier;
end = del_barrier;
}
for (auto del_index = start; del_index < end; ++del_index) {
int64_t the_offset = seg_offsets[del_index].get();
AssertInfo(the_offset >= 0, "Seg offset is invalid");
if (deleted_record_.timestamps_[del_index] >= query_timestamp) {
bitmap->clear(the_offset);
} else {
bitmap->set(the_offset);
}
this->deleted_record_.insert_lru_entry(current);
}
this->deleted_record_.insert_lru_entry(current);
return current;
}
......@@ -300,9 +299,9 @@ SegmentSealedImpl::get_filtered_bitmap(const BitsetView& bitset, int64_t ins_bar
AssertInfo(deleted_bitmap->count() == bitset.size(), "Deleted bitmap count not equal to filtered bitmap count");
auto filtered_bitmap = std::make_shared<faiss::ConcurrentBitset>(bitset.size(), bitset.data());
auto final_bitmap = (*deleted_bitmap.get()) | (*filtered_bitmap.get());
return BitsetView(final_bitmap);
auto res = BitsetView(final_bitmap);
return res;
}
void
......@@ -429,6 +428,7 @@ SegmentSealedImpl::SegmentSealedImpl(SchemaPtr schema, int64_t segment_id)
scalar_indexings_(schema->size()),
id_(segment_id) {
}
void
SegmentSealedImpl::bulk_subscript(SystemFieldType system_type,
const int64_t* seg_offsets,
......@@ -438,6 +438,7 @@ SegmentSealedImpl::bulk_subscript(SystemFieldType system_type,
AssertInfo(system_type == SystemFieldType::RowId, "System field type of id column is not RowId");
bulk_subscript_impl<int64_t>(row_ids_.data(), seg_offsets, count, output);
}
template <typename T>
void
SegmentSealedImpl::bulk_subscript_impl(const void* src_raw, const int64_t* seg_offsets, int64_t count, void* dst_raw) {
......@@ -564,7 +565,6 @@ SegmentSealedImpl::Delete(int64_t reserved_offset,
src_timestamps[i] = t;
src_uids[i] = uid;
}
auto current_size = deleted_record_.record_size_;
deleted_record_.timestamps_.set_data(reserved_offset, src_timestamps.data(), row_count);
deleted_record_.uids_.set_data(reserved_offset, src_uids.data(), row_count);
deleted_record_.ack_responder_.AddSegment(reserved_offset, row_count);
......@@ -612,11 +612,13 @@ SegmentSealedImpl::LoadSegmentMeta(const proto::segcore::LoadSegmentMeta& segmen
timestamp_index_.set_length_meta(std::move(slice_lengths));
PanicInfo("unimplemented");
}
int64_t
SegmentSealedImpl::get_active_count(Timestamp ts) const {
// TODO optimize here to reduce expr search range
return this->get_row_count();
}
void
SegmentSealedImpl::mask_with_timestamps(BitsetType& bitset_chunk, Timestamp timestamp) const {
// TODO change the
......
......@@ -24,7 +24,7 @@ if (LINUX)
test_concurrent_vector.cpp
test_c_api.cpp
test_expr.cpp
test_get_entity_by_ids.cpp
test_retrieve.cpp
test_indexing.cpp
test_index_wrapper.cpp
test_init.cpp
......
......@@ -186,8 +186,7 @@ generate_collection_schema(std::string metric_type, int dim, bool is_binary) {
}
VecIndexPtr
generate_index(
void* raw_data, knowhere::Config conf, int64_t dim, int64_t topK, int64_t N, std::string index_type) {
generate_index(void* raw_data, knowhere::Config conf, int64_t dim, int64_t topK, int64_t N, std::string index_type) {
auto indexing = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type, knowhere::IndexMode::MODE_CPU);
auto database = knowhere::GenDataset(N, dim, raw_data);
......@@ -552,7 +551,7 @@ TEST(CApiTest, RetrieveTestWithExpr2) {
int N = 10000;
auto [raw_data, timestamps, uids] = generate_column_data(N);
int64_t offset;
PreInsert(segment, N, &offset);
......@@ -1074,8 +1073,7 @@ TEST(CApiTest, LoadIndex_Search) {
index_params["index_type"] = "IVF_PQ";
index_params["index_mode"] = "CPU";
auto mode = knowhere::IndexMode::MODE_CPU;
load_index_info.index =
knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_params["index_type"], mode);
load_index_info.index = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_params["index_type"], mode);
load_index_info.index->Load(binary_set);
// search
......
......@@ -389,31 +389,30 @@ TEST(BinIdMapWrapper, Build) {
INSTANTIATE_TEST_CASE_P(
IndexTypeParameters,
IndexWrapperTest,
::testing::Values(
std::pair(knowhere::IndexEnum::INDEX_FAISS_IDMAP, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, knowhere::Metric::JACCARD),
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, knowhere::Metric::TANIMOTO),
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, knowhere::Metric::JACCARD),
::testing::Values(std::pair(knowhere::IndexEnum::INDEX_FAISS_IDMAP, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFPQ, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_IVFSQ8, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, knowhere::Metric::JACCARD),
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT, knowhere::Metric::TANIMOTO),
std::pair(knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP, knowhere::Metric::JACCARD),
#ifdef MILVUS_SUPPORT_SPTAG
std::pair(knowhere::IndexEnum::INDEX_SPTAG_KDT_RNT, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_SPTAG_BKT_RNT, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_SPTAG_KDT_RNT, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_SPTAG_BKT_RNT, knowhere::Metric::L2),
#endif
std::pair(knowhere::IndexEnum::INDEX_HNSW, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_ANNOY, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_RHNSWFlat, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_RHNSWPQ, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_RHNSWSQ, knowhere::Metric::L2)
std::pair(knowhere::IndexEnum::INDEX_HNSW, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_ANNOY, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_RHNSWFlat, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_RHNSWPQ, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_RHNSWSQ, knowhere::Metric::L2)
#ifdef MILVUS_SUPPORT_NGT
std::pair(knowhere::IndexEnum::INDEX_NGTPANNG, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_NGTONNG, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_NGTPANNG, knowhere::Metric::L2),
std::pair(knowhere::IndexEnum::INDEX_NGTONNG, knowhere::Metric::L2),
#endif
#ifdef MILVUS_SUPPORT_NSG
std::pair(knowhere::IndexEnum::INDEX_NSG, knowhere::Metric::L2)
std::pair(knowhere::IndexEnum::INDEX_NSG, knowhere::Metric::L2)
#endif
));
));
TEST_P(IndexWrapperTest, Constructor) {
auto index =
......
......@@ -18,7 +18,7 @@
using namespace milvus;
using namespace milvus::segcore;
TEST(GetEntityByIds, ScalarIndex) {
TEST(Retrieve, ScalarIndex) {
SUCCEED();
auto index = std::make_unique<ScalarIndexVector>();
std::vector<int64_t> data;
......@@ -44,64 +44,58 @@ TEST(GetEntityByIds, ScalarIndex) {
}
}
TEST(GetEntityByIds, AUTOID) {
TEST(Retrieve, AutoID) {
auto schema = std::make_shared<Schema>();
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
auto DIM = 16;
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, MetricType::METRIC_L2);
schema->set_primary_key(FieldOffset(0));
int64_t N = 10000;
int64_t N = 100;
int64_t req_size = 10;
auto choose = [=](int i) { return i * 3 % N; };
auto dataset = DataGen(schema, N);
auto segment = CreateSealedSegment(schema);
SealedLoader(dataset, *segment);
auto i64_col = dataset.get_col<int64_t>(0);
auto req_ids = std::make_unique<IdArray>();
auto req_ids_arr = req_ids->mutable_int_id();
auto plan = std::make_unique<query::RetrievePlan>(*schema);
std::vector<int64_t> values;
for (int i = 0; i < req_size; ++i) {
values.emplace_back(i64_col[choose(i)]);
}
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(FieldOffset(0), DataType::INT64, values);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->predicate_ = std::move(term_expr);
std::vector<FieldOffset> target_offsets{FieldOffset(0), FieldOffset(1)};
plan->field_offsets_ = target_offsets;
auto retrieve_results = segment->Retrieve(plan.get(), 100);
Assert(retrieve_results->fields_data_size() == target_offsets.size());
FieldOffset field_offset(0);
auto field0 = retrieve_results->fields_data(0);
Assert(field0.has_scalars());
auto field0_data = field0.scalars().long_data();
auto i64_col = dataset.get_col<int64_t>(0);
auto vf_col = dataset.get_col<float>(1);
for (int i = 0; i < req_size; ++i) {
req_ids_arr->add_data(dataset.row_ids_[choose(i)]);
auto index = choose(i);
auto data = field0_data.data(i);
}
// should be ruled out
req_ids_arr->add_data(-1);
for (int i = 0; i < req_size; ++i) {
auto index = choose(i);
auto data = field0_data.data(i);
ASSERT_EQ(data, i64_col[index]);
}
std::vector<FieldOffset> target_offsets{FieldOffset(0), FieldOffset(1)};
// auto retrieve_results = segment->GetEntityById(target_offsets, *req_ids, 0);
// auto ids = retrieve_results->ids().int_id();
// Assert(retrieve_results->fields_data_size() == target_offsets.size());
// FieldOffset field_offset(0);
// auto field0 = retrieve_results->fields_data(0);
// Assert(field0.has_scalars());
// auto field0_data = field0.scalars().long_data();
// for (int i = 0; i < req_size; ++i) {
// auto id = ids.data(i);
// auto index = choose(i);
// ASSERT_EQ(id, dataset.row_ids_[index]);
// auto data = field0_data.data(i);
// ASSERT_EQ(data, i64_col[index]);
// }
//
// auto field1 = retrieve_results->fields_data(1);
// Assert(field1.has_vectors());
// auto field1_data = field1.vectors().float_vector();
// ASSERT_EQ(field1_data.data_size(), DIM * req_size);
//
// for (int i = 0; i < req_size; ++i) {
// for (int d = 0; d < DIM; ++d) {
// auto index = choose(i);
// auto data = field1_data.data(i * DIM + d);
// auto ref = vf_col[index * DIM + d];
// ASSERT_EQ(data, ref);
// }
// }
auto field1 = retrieve_results->fields_data(1);
Assert(field1.has_vectors());
auto field1_data = field1.vectors().float_vector();
ASSERT_EQ(field1_data.data_size(), DIM * req_size);
}
TEST(Retrieve, AUTOID) {
TEST(Retrieve, AutoID2) {
auto schema = std::make_shared<Schema>();
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
auto DIM = 16;
......@@ -138,8 +132,52 @@ TEST(Retrieve, AUTOID) {
for (int i = 0; i < req_size; ++i) {
auto index = choose(i);
auto data = field0_data.data(i);
ASSERT_EQ(data, i64_col[index]);
}
auto field1 = retrieve_results->fields_data(1);
Assert(field1.has_vectors());
auto field1_data = field1.vectors().float_vector();
ASSERT_EQ(field1_data.data_size(), DIM * req_size);
}
TEST(Retrieve, NotExist) {
auto schema = std::make_shared<Schema>();
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
auto DIM = 16;
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, MetricType::METRIC_L2);
schema->set_primary_key(FieldOffset(0));
int64_t N = 100;
int64_t req_size = 10;
auto choose = [=](int i) { return i * 3 % N; };
auto choose2 = [=](int i) { return i * 3 % N + 3 * N; };
auto dataset = DataGen(schema, N);
auto segment = CreateSealedSegment(schema);
SealedLoader(dataset, *segment);
auto i64_col = dataset.get_col<int64_t>(0);
auto plan = std::make_unique<query::RetrievePlan>(*schema);
std::vector<int64_t> values;
for (int i = 0; i < req_size; ++i) {
values.emplace_back(i64_col[choose(i)]);
values.emplace_back(choose2(i));
}
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(FieldOffset(0), DataType::INT64, values);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->predicate_ = std::move(term_expr);
std::vector<FieldOffset> target_offsets{FieldOffset(0), FieldOffset(1)};
plan->field_offsets_ = target_offsets;
auto retrieve_results = segment->Retrieve(plan.get(), 100);
Assert(retrieve_results->fields_data_size() == target_offsets.size());
FieldOffset field_offset(0);
auto field0 = retrieve_results->fields_data(0);
Assert(field0.has_scalars());
auto field0_data = field0.scalars().long_data();
for (int i = 0; i < req_size; ++i) {
auto index = choose(i);
auto data = field0_data.data(i);
......@@ -152,7 +190,42 @@ TEST(Retrieve, AUTOID) {
ASSERT_EQ(field1_data.data_size(), DIM * req_size);
}
TEST(Retrieve2, LargeTimestamp) {
TEST(Retrieve, Empty) {
auto schema = std::make_shared<Schema>();
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
auto DIM = 16;
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, MetricType::METRIC_L2);
schema->set_primary_key(FieldOffset(0));
int64_t N = 100;
int64_t req_size = 10;
auto choose = [=](int i) { return i * 3 % N; };
auto segment = CreateSealedSegment(schema);
auto plan = std::make_unique<query::RetrievePlan>(*schema);
std::vector<int64_t> values;
for (int i = 0; i < req_size; ++i) {
values.emplace_back(choose(i));
}
auto term_expr = std::make_unique<query::TermExprImpl<int64_t>>(FieldOffset(0), DataType::INT64, values);
plan->plan_node_ = std::make_unique<query::RetrievePlanNode>();
plan->plan_node_->predicate_ = std::move(term_expr);
std::vector<FieldOffset> target_offsets{FieldOffset(0), FieldOffset(1)};
plan->field_offsets_ = target_offsets;
auto retrieve_results = segment->Retrieve(plan.get(), 100);
Assert(retrieve_results->fields_data_size() == target_offsets.size());
auto field0 = retrieve_results->fields_data(0);
auto field1 = retrieve_results->fields_data(1);
Assert(field0.has_scalars());
auto field0_data = field0.scalars().long_data();
Assert(field0_data.data_size() == 0);
Assert(field1.vectors().float_vector().data_size() == 0);
}
TEST(Retrieve, LargeTimestamp) {
auto schema = std::make_shared<Schema>();
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
auto DIM = 16;
......@@ -196,65 +269,7 @@ TEST(Retrieve2, LargeTimestamp) {
}
}
TEST(GetEntityByIds, PrimaryKey) {
auto schema = std::make_shared<Schema>();
auto fid_64 = schema->AddDebugField("counter_i64", DataType::INT64);
auto DIM = 16;
auto fid_vec = schema->AddDebugField("vector_64", DataType::VECTOR_FLOAT, DIM, MetricType::METRIC_L2);
schema->set_primary_key(FieldOffset(0));
int64_t N = 10000;
int64_t req_size = 10;
auto choose = [=](int i) { return i * 3 % N; };
auto dataset = DataGen(schema, N);
auto segment = CreateSealedSegment(schema);
SealedLoader(dataset, *segment);
auto req_ids = std::make_unique<IdArray>();
auto req_ids_arr = req_ids->mutable_int_id();
auto i64_col = dataset.get_col<int64_t>(0);
auto vf_col = dataset.get_col<float>(1);
for (int i = 0; i < req_size; ++i) {
req_ids_arr->add_data(i64_col[choose(i)]);
}
// should be ruled out
req_ids_arr->add_data(-1);
std::vector<FieldOffset> target_offsets{FieldOffset(0), FieldOffset(1)};
// auto retrieve_results = segment->GetEntityById(target_offsets, *req_ids, 0);
// auto ids = retrieve_results->ids().int_id();
// Assert(retrieve_results->fields_data_size() == target_offsets.size());
// FieldOffset field_offset(0);
// auto field0 = retrieve_results->fields_data(0);
// Assert(field0.has_scalars());
// auto field0_data = field0.scalars().long_data();
// for (int i = 0; i < req_size; ++i) {
// auto id = ids.data(i);
// auto index = choose(i);
// ASSERT_EQ(id, i64_col[index]);
// auto data = field0_data.data(i);
// ASSERT_EQ(data, i64_col[index]);
// }
//
// auto field1 = retrieve_results->fields_data(1);
// Assert(field1.has_vectors());
// auto field1_data = field1.vectors().float_vector();
// ASSERT_EQ(field1_data.data_size(), DIM * req_size);
//
// for (int i = 0; i < req_size; ++i) {
// for (int d = 0; d < DIM; ++d) {
// auto index = choose(i);
// auto data = field1_data.data(i * DIM + d);
// auto ref = vf_col[index * DIM + d];
// ASSERT_EQ(data, ref);
// }
// }
}
TEST(GetEntityByIds, delete_retrieve) {
TEST(Retrieve, Delete) {
auto schema = std::make_shared<Schema>();
auto fid_64 = schema->AddDebugField("i64", DataType::INT64);
auto DIM = 16;
......@@ -345,4 +360,4 @@ TEST(GetEntityByIds, delete_retrieve) {
auto field1_data = field1.vectors().float_vector();
ASSERT_EQ(field1_data.data_size(), DIM * size);
}
}
\ No newline at end of file
}
......@@ -36,9 +36,9 @@ TEST(Dummy, Aha) {
constexpr int64_t nb = 100;
namespace indexcgo = milvus::proto::indexcgo;
namespace schemapb = milvus::proto::schema;
using knowhere::scalar::OperatorType;
using milvus::indexbuilder::MapParams;
using milvus::indexbuilder::ScalarIndexCreator;
using knowhere::scalar::OperatorType;
using ScalarTestParams = std::pair<MapParams, MapParams>;
namespace {
......
......@@ -40,9 +40,9 @@ int DEVICEID = 0;
namespace indexcgo = milvus::proto::indexcgo;
namespace schemapb = milvus::proto::schema;
using knowhere::scalar::OperatorType;
using milvus::indexbuilder::MapParams;
using milvus::indexbuilder::ScalarIndexCreator;
using knowhere::scalar::OperatorType;
using ScalarTestParams = std::pair<MapParams, MapParams>;
namespace {
......@@ -301,11 +301,8 @@ Jaccard(const uint8_t* point_a, const uint8_t* point_b, int dim) {
}
float
CountDistance(const void* point_a,
const void* point_b,
int dim,
const knowhere::MetricType& metric,
bool is_binary = false) {
CountDistance(
const void* point_a, const void* point_b, int dim, const knowhere::MetricType& metric, bool is_binary = false) {
if (point_a == nullptr || point_b == nullptr) {
return std::numeric_limits<float>::max();
}
......
......@@ -462,7 +462,7 @@ class TestDeleteOperation(TestcaseBase):
# Just one query res and search res, because de-dup
res, _ = collection_w.query(tmp_expr, output_fields=["*"])
assert len(res) == 1
assert len(res) == 0
search_res, _ = collection_w.search([df[ct.default_float_vec_field_name][1]],
ct.default_float_vec_field_name,
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment