mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-23 21:26:26 +00:00
remove-me: Debugging the missing key hannoy bug
This commit is contained in:
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -2603,8 +2603,6 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "hannoy"
|
name = "hannoy"
|
||||||
version = "0.0.3"
|
version = "0.0.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "0cac6ebc04fc7246356d29908b55315c26c695a2ea2f692de9f72c0ac61ca1b1"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
|
@ -88,7 +88,7 @@ rhai = { version = "1.22.2", features = [
|
|||||||
"sync",
|
"sync",
|
||||||
] }
|
] }
|
||||||
arroy = "0.6.1"
|
arroy = "0.6.1"
|
||||||
hannoy = "0.0.3"
|
hannoy = { path = "../../../hannoy" }
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
tracing = "0.1.41"
|
tracing = "0.1.41"
|
||||||
ureq = { version = "2.12.1", features = ["json"] }
|
ureq = { version = "2.12.1", features = ["json"] }
|
||||||
|
@ -393,20 +393,17 @@ fn delete_old_embedders_and_fragments<SD>(
|
|||||||
where
|
where
|
||||||
SD: SettingsDelta,
|
SD: SettingsDelta,
|
||||||
{
|
{
|
||||||
|
let index_version = index.get_version(wtxn)?.unwrap();
|
||||||
for action in settings_delta.embedder_actions().values() {
|
for action in settings_delta.embedder_actions().values() {
|
||||||
let Some(WriteBackToDocuments { embedder_id, .. }) = action.write_back() else {
|
let Some(WriteBackToDocuments { embedder_id, .. }) = action.write_back() else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
let reader = VectorStore::new(
|
let vector_store =
|
||||||
index.get_version(wtxn)?.unwrap(),
|
VectorStore::new(index_version, index.vector_store, *embedder_id, action.was_quantized);
|
||||||
index.vector_store,
|
let Some(dimensions) = vector_store.dimensions(wtxn)? else {
|
||||||
*embedder_id,
|
|
||||||
action.was_quantized,
|
|
||||||
);
|
|
||||||
let Some(dimensions) = reader.dimensions(wtxn)? else {
|
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
reader.clear(wtxn, dimensions)?;
|
vector_store.clear(wtxn, dimensions)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove all vectors for the specified fragments
|
// remove all vectors for the specified fragments
|
||||||
@ -418,13 +415,9 @@ where
|
|||||||
let Some(infos) = index.embedding_configs().embedder_info(wtxn, embedder_name)? else {
|
let Some(infos) = index.embedding_configs().embedder_info(wtxn, embedder_name)? else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
let arroy = VectorStore::new(
|
let vector_store =
|
||||||
index.get_version(wtxn)?.unwrap(),
|
VectorStore::new(index_version, index.vector_store, infos.embedder_id, was_quantized);
|
||||||
index.vector_store,
|
let Some(dimensions) = vector_store.dimensions(wtxn)? else {
|
||||||
infos.embedder_id,
|
|
||||||
was_quantized,
|
|
||||||
);
|
|
||||||
let Some(dimensions) = arroy.dimensions(wtxn)? else {
|
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
for fragment_id in fragment_ids {
|
for fragment_id in fragment_ids {
|
||||||
@ -432,17 +425,17 @@ where
|
|||||||
|
|
||||||
if infos.embedding_status.user_provided_docids().is_empty() {
|
if infos.embedding_status.user_provided_docids().is_empty() {
|
||||||
// no user provided: clear store
|
// no user provided: clear store
|
||||||
arroy.clear_store(wtxn, *fragment_id, dimensions)?;
|
vector_store.clear_store(wtxn, *fragment_id, dimensions)?;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// some user provided, remove only the ids that are not user provided
|
// some user provided, remove only the ids that are not user provided
|
||||||
let to_delete = arroy.items_in_store(wtxn, *fragment_id, |items| {
|
let to_delete = vector_store.items_in_store(wtxn, *fragment_id, |items| {
|
||||||
items - infos.embedding_status.user_provided_docids()
|
items - infos.embedding_status.user_provided_docids()
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
for to_delete in to_delete {
|
for to_delete in to_delete {
|
||||||
arroy.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?;
|
vector_store.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -256,6 +256,7 @@ impl VectorStore {
|
|||||||
hannoy_memory: Option<usize>,
|
hannoy_memory: Option<usize>,
|
||||||
cancel: &(impl Fn() -> bool + Sync + Send),
|
cancel: &(impl Fn() -> bool + Sync + Send),
|
||||||
) -> Result<(), hannoy::Error> {
|
) -> Result<(), hannoy::Error> {
|
||||||
|
eprintln!("Build and quantize embedder_index={}", self.embedder_index);
|
||||||
for index in vector_store_range_for_embedder(self.embedder_index) {
|
for index in vector_store_range_for_embedder(self.embedder_index) {
|
||||||
if self.quantized {
|
if self.quantized {
|
||||||
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
|
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
|
||||||
@ -309,6 +310,10 @@ impl VectorStore {
|
|||||||
item_id: hannoy::ItemId,
|
item_id: hannoy::ItemId,
|
||||||
embeddings: &Embeddings<f32>,
|
embeddings: &Embeddings<f32>,
|
||||||
) -> Result<(), hannoy::Error> {
|
) -> Result<(), hannoy::Error> {
|
||||||
|
eprintln!(
|
||||||
|
"Adding item_id={item_id} to all stores in embedder_index={}",
|
||||||
|
self.embedder_index
|
||||||
|
);
|
||||||
let dimension = embeddings.dimension();
|
let dimension = embeddings.dimension();
|
||||||
for (index, vector) in
|
for (index, vector) in
|
||||||
vector_store_range_for_embedder(self.embedder_index).zip(embeddings.iter())
|
vector_store_range_for_embedder(self.embedder_index).zip(embeddings.iter())
|
||||||
@ -331,6 +336,7 @@ impl VectorStore {
|
|||||||
item_id: hannoy::ItemId,
|
item_id: hannoy::ItemId,
|
||||||
vector: &[f32],
|
vector: &[f32],
|
||||||
) -> Result<(), hannoy::Error> {
|
) -> Result<(), hannoy::Error> {
|
||||||
|
eprintln!("Adding item_id={item_id} and embedder_index={}", self.embedder_index);
|
||||||
if self.quantized {
|
if self.quantized {
|
||||||
self._add_item(wtxn, self.quantized_db(), item_id, vector)
|
self._add_item(wtxn, self.quantized_db(), item_id, vector)
|
||||||
} else {
|
} else {
|
||||||
@ -367,6 +373,10 @@ impl VectorStore {
|
|||||||
store_id: u8,
|
store_id: u8,
|
||||||
vector: &[f32],
|
vector: &[f32],
|
||||||
) -> Result<(), hannoy::Error> {
|
) -> Result<(), hannoy::Error> {
|
||||||
|
eprintln!(
|
||||||
|
"Adding item_id={item_id} in store_id={store_id} and embedder_index={}",
|
||||||
|
self.embedder_index
|
||||||
|
);
|
||||||
if self.quantized {
|
if self.quantized {
|
||||||
self._add_item_in_store(wtxn, self.quantized_db(), item_id, store_id, vector)
|
self._add_item_in_store(wtxn, self.quantized_db(), item_id, store_id, vector)
|
||||||
} else {
|
} else {
|
||||||
@ -396,6 +406,10 @@ impl VectorStore {
|
|||||||
dimension: usize,
|
dimension: usize,
|
||||||
item_id: hannoy::ItemId,
|
item_id: hannoy::ItemId,
|
||||||
) -> Result<(), hannoy::Error> {
|
) -> Result<(), hannoy::Error> {
|
||||||
|
eprintln!(
|
||||||
|
"Deleting item_id={item_id} in all stores in embedder_index={}",
|
||||||
|
self.embedder_index
|
||||||
|
);
|
||||||
for index in vector_store_range_for_embedder(self.embedder_index) {
|
for index in vector_store_range_for_embedder(self.embedder_index) {
|
||||||
if self.quantized {
|
if self.quantized {
|
||||||
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
|
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
|
||||||
@ -423,6 +437,10 @@ impl VectorStore {
|
|||||||
store_id: u8,
|
store_id: u8,
|
||||||
dimensions: usize,
|
dimensions: usize,
|
||||||
) -> Result<bool, hannoy::Error> {
|
) -> Result<bool, hannoy::Error> {
|
||||||
|
eprintln!(
|
||||||
|
"Deleting item_id={item_id} in store_id={store_id} and embedder_index={}",
|
||||||
|
self.embedder_index
|
||||||
|
);
|
||||||
if self.quantized {
|
if self.quantized {
|
||||||
self._del_item_in_store(wtxn, self.quantized_db(), item_id, store_id, dimensions)
|
self._del_item_in_store(wtxn, self.quantized_db(), item_id, store_id, dimensions)
|
||||||
} else {
|
} else {
|
||||||
@ -454,6 +472,10 @@ impl VectorStore {
|
|||||||
store_id: u8,
|
store_id: u8,
|
||||||
dimensions: usize,
|
dimensions: usize,
|
||||||
) -> Result<(), hannoy::Error> {
|
) -> Result<(), hannoy::Error> {
|
||||||
|
eprintln!(
|
||||||
|
"Clearing items in store_id={store_id} and embedder_index={}",
|
||||||
|
self.embedder_index
|
||||||
|
);
|
||||||
if self.quantized {
|
if self.quantized {
|
||||||
self._clear_store(wtxn, self.quantized_db(), store_id, dimensions)
|
self._clear_store(wtxn, self.quantized_db(), store_id, dimensions)
|
||||||
} else {
|
} else {
|
||||||
@ -480,6 +502,10 @@ impl VectorStore {
|
|||||||
item_id: hannoy::ItemId,
|
item_id: hannoy::ItemId,
|
||||||
vector: &[f32],
|
vector: &[f32],
|
||||||
) -> Result<bool, hannoy::Error> {
|
) -> Result<bool, hannoy::Error> {
|
||||||
|
eprintln!(
|
||||||
|
"Deleting item_id={item_id} from all stores in embedder_index={}",
|
||||||
|
self.embedder_index
|
||||||
|
);
|
||||||
if self.quantized {
|
if self.quantized {
|
||||||
self._del_item(wtxn, self.quantized_db(), item_id, vector)
|
self._del_item(wtxn, self.quantized_db(), item_id, vector)
|
||||||
} else {
|
} else {
|
||||||
@ -506,6 +532,7 @@ impl VectorStore {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), hannoy::Error> {
|
pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), hannoy::Error> {
|
||||||
|
eprintln!("Clearing all items from embedder_index={}", self.embedder_index);
|
||||||
for index in vector_store_range_for_embedder(self.embedder_index) {
|
for index in vector_store_range_for_embedder(self.embedder_index) {
|
||||||
if self.quantized {
|
if self.quantized {
|
||||||
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
|
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
|
||||||
|
Reference in New Issue
Block a user