mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-23 21:26:26 +00:00
remove-me: Debugging the missing key hannoy bug
This commit is contained in:
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -2603,8 +2603,6 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "hannoy"
|
||||
version = "0.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0cac6ebc04fc7246356d29908b55315c26c695a2ea2f692de9f72c0ac61ca1b1"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
|
@ -88,7 +88,7 @@ rhai = { version = "1.22.2", features = [
|
||||
"sync",
|
||||
] }
|
||||
arroy = "0.6.1"
|
||||
hannoy = "0.0.3"
|
||||
hannoy = { path = "../../../hannoy" }
|
||||
rand = "0.8.5"
|
||||
tracing = "0.1.41"
|
||||
ureq = { version = "2.12.1", features = ["json"] }
|
||||
|
@ -393,20 +393,17 @@ fn delete_old_embedders_and_fragments<SD>(
|
||||
where
|
||||
SD: SettingsDelta,
|
||||
{
|
||||
let index_version = index.get_version(wtxn)?.unwrap();
|
||||
for action in settings_delta.embedder_actions().values() {
|
||||
let Some(WriteBackToDocuments { embedder_id, .. }) = action.write_back() else {
|
||||
continue;
|
||||
};
|
||||
let reader = VectorStore::new(
|
||||
index.get_version(wtxn)?.unwrap(),
|
||||
index.vector_store,
|
||||
*embedder_id,
|
||||
action.was_quantized,
|
||||
);
|
||||
let Some(dimensions) = reader.dimensions(wtxn)? else {
|
||||
let vector_store =
|
||||
VectorStore::new(index_version, index.vector_store, *embedder_id, action.was_quantized);
|
||||
let Some(dimensions) = vector_store.dimensions(wtxn)? else {
|
||||
continue;
|
||||
};
|
||||
reader.clear(wtxn, dimensions)?;
|
||||
vector_store.clear(wtxn, dimensions)?;
|
||||
}
|
||||
|
||||
// remove all vectors for the specified fragments
|
||||
@ -418,13 +415,9 @@ where
|
||||
let Some(infos) = index.embedding_configs().embedder_info(wtxn, embedder_name)? else {
|
||||
continue;
|
||||
};
|
||||
let arroy = VectorStore::new(
|
||||
index.get_version(wtxn)?.unwrap(),
|
||||
index.vector_store,
|
||||
infos.embedder_id,
|
||||
was_quantized,
|
||||
);
|
||||
let Some(dimensions) = arroy.dimensions(wtxn)? else {
|
||||
let vector_store =
|
||||
VectorStore::new(index_version, index.vector_store, infos.embedder_id, was_quantized);
|
||||
let Some(dimensions) = vector_store.dimensions(wtxn)? else {
|
||||
continue;
|
||||
};
|
||||
for fragment_id in fragment_ids {
|
||||
@ -432,17 +425,17 @@ where
|
||||
|
||||
if infos.embedding_status.user_provided_docids().is_empty() {
|
||||
// no user provided: clear store
|
||||
arroy.clear_store(wtxn, *fragment_id, dimensions)?;
|
||||
vector_store.clear_store(wtxn, *fragment_id, dimensions)?;
|
||||
continue;
|
||||
}
|
||||
|
||||
// some user provided, remove only the ids that are not user provided
|
||||
let to_delete = arroy.items_in_store(wtxn, *fragment_id, |items| {
|
||||
let to_delete = vector_store.items_in_store(wtxn, *fragment_id, |items| {
|
||||
items - infos.embedding_status.user_provided_docids()
|
||||
})?;
|
||||
|
||||
for to_delete in to_delete {
|
||||
arroy.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?;
|
||||
vector_store.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -256,6 +256,7 @@ impl VectorStore {
|
||||
hannoy_memory: Option<usize>,
|
||||
cancel: &(impl Fn() -> bool + Sync + Send),
|
||||
) -> Result<(), hannoy::Error> {
|
||||
eprintln!("Build and quantize embedder_index={}", self.embedder_index);
|
||||
for index in vector_store_range_for_embedder(self.embedder_index) {
|
||||
if self.quantized {
|
||||
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
|
||||
@ -309,6 +310,10 @@ impl VectorStore {
|
||||
item_id: hannoy::ItemId,
|
||||
embeddings: &Embeddings<f32>,
|
||||
) -> Result<(), hannoy::Error> {
|
||||
eprintln!(
|
||||
"Adding item_id={item_id} to all stores in embedder_index={}",
|
||||
self.embedder_index
|
||||
);
|
||||
let dimension = embeddings.dimension();
|
||||
for (index, vector) in
|
||||
vector_store_range_for_embedder(self.embedder_index).zip(embeddings.iter())
|
||||
@ -331,6 +336,7 @@ impl VectorStore {
|
||||
item_id: hannoy::ItemId,
|
||||
vector: &[f32],
|
||||
) -> Result<(), hannoy::Error> {
|
||||
eprintln!("Adding item_id={item_id} and embedder_index={}", self.embedder_index);
|
||||
if self.quantized {
|
||||
self._add_item(wtxn, self.quantized_db(), item_id, vector)
|
||||
} else {
|
||||
@ -367,6 +373,10 @@ impl VectorStore {
|
||||
store_id: u8,
|
||||
vector: &[f32],
|
||||
) -> Result<(), hannoy::Error> {
|
||||
eprintln!(
|
||||
"Adding item_id={item_id} in store_id={store_id} and embedder_index={}",
|
||||
self.embedder_index
|
||||
);
|
||||
if self.quantized {
|
||||
self._add_item_in_store(wtxn, self.quantized_db(), item_id, store_id, vector)
|
||||
} else {
|
||||
@ -396,6 +406,10 @@ impl VectorStore {
|
||||
dimension: usize,
|
||||
item_id: hannoy::ItemId,
|
||||
) -> Result<(), hannoy::Error> {
|
||||
eprintln!(
|
||||
"Deleting item_id={item_id} in all stores in embedder_index={}",
|
||||
self.embedder_index
|
||||
);
|
||||
for index in vector_store_range_for_embedder(self.embedder_index) {
|
||||
if self.quantized {
|
||||
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
|
||||
@ -423,6 +437,10 @@ impl VectorStore {
|
||||
store_id: u8,
|
||||
dimensions: usize,
|
||||
) -> Result<bool, hannoy::Error> {
|
||||
eprintln!(
|
||||
"Deleting item_id={item_id} in store_id={store_id} and embedder_index={}",
|
||||
self.embedder_index
|
||||
);
|
||||
if self.quantized {
|
||||
self._del_item_in_store(wtxn, self.quantized_db(), item_id, store_id, dimensions)
|
||||
} else {
|
||||
@ -454,6 +472,10 @@ impl VectorStore {
|
||||
store_id: u8,
|
||||
dimensions: usize,
|
||||
) -> Result<(), hannoy::Error> {
|
||||
eprintln!(
|
||||
"Clearing items in store_id={store_id} and embedder_index={}",
|
||||
self.embedder_index
|
||||
);
|
||||
if self.quantized {
|
||||
self._clear_store(wtxn, self.quantized_db(), store_id, dimensions)
|
||||
} else {
|
||||
@ -480,6 +502,10 @@ impl VectorStore {
|
||||
item_id: hannoy::ItemId,
|
||||
vector: &[f32],
|
||||
) -> Result<bool, hannoy::Error> {
|
||||
eprintln!(
|
||||
"Deleting item_id={item_id} from all stores in embedder_index={}",
|
||||
self.embedder_index
|
||||
);
|
||||
if self.quantized {
|
||||
self._del_item(wtxn, self.quantized_db(), item_id, vector)
|
||||
} else {
|
||||
@ -506,6 +532,7 @@ impl VectorStore {
|
||||
}
|
||||
|
||||
pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), hannoy::Error> {
|
||||
eprintln!("Clearing all items from embedder_index={}", self.embedder_index);
|
||||
for index in vector_store_range_for_embedder(self.embedder_index) {
|
||||
if self.quantized {
|
||||
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
|
||||
|
Reference in New Issue
Block a user