mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-06-06 20:25:40 +00:00
set the memory in arroy
This commit is contained in:
parent
d3d22d8ed4
commit
ef9d9f8481
3
Cargo.lock
generated
3
Cargo.lock
generated
@ -394,7 +394,7 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "arroy"
|
name = "arroy"
|
||||||
version = "0.6.0"
|
version = "0.6.0"
|
||||||
source = "git+https://github.com/meilisearch/arroy?branch=main#80a7f1ba60bd7d88d55ce958a7579d664fc769ce"
|
source = "git+https://github.com/meilisearch/arroy?branch=main#3350696381a4e29a838209663f39c1c58e9bc7b6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
@ -403,6 +403,7 @@ dependencies = [
|
|||||||
"memmap2",
|
"memmap2",
|
||||||
"nohash",
|
"nohash",
|
||||||
"ordered-float",
|
"ordered-float",
|
||||||
|
"page_size",
|
||||||
"rand",
|
"rand",
|
||||||
"rayon",
|
"rayon",
|
||||||
"roaring",
|
"roaring",
|
||||||
|
@ -520,7 +520,15 @@ where
|
|||||||
|
|
||||||
pool.install(|| {
|
pool.install(|| {
|
||||||
let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
|
let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
|
||||||
writer.build_and_quantize(wtxn, &mut rng, dimension, is_quantizing, cancel)?;
|
writer.build_and_quantize(
|
||||||
|
wtxn,
|
||||||
|
&mut rng,
|
||||||
|
dimension,
|
||||||
|
is_quantizing,
|
||||||
|
// Arroy should only use 50% of the memory
|
||||||
|
self.indexer_config.max_memory.map(|mm| mm / 2),
|
||||||
|
cancel,
|
||||||
|
)?;
|
||||||
Result::Ok(())
|
Result::Ok(())
|
||||||
})
|
})
|
||||||
.map_err(InternalError::from)??;
|
.map_err(InternalError::from)??;
|
||||||
|
@ -70,6 +70,8 @@ where
|
|||||||
max_memory: grenad_parameters.max_memory.map(|mm| mm * 5 / 100),
|
max_memory: grenad_parameters.max_memory.map(|mm| mm * 5 / 100),
|
||||||
..grenad_parameters
|
..grenad_parameters
|
||||||
};
|
};
|
||||||
|
// Arroy should use 50% of the grenad memory instead of 5%
|
||||||
|
let arroy_memory = grenad_parameters.max_memory.map(|mm| mm * 10);
|
||||||
|
|
||||||
// 5% percent of the allocated memory for the extractors, or min 100MiB
|
// 5% percent of the allocated memory for the extractors, or min 100MiB
|
||||||
// 5% percent of the allocated memory for the bbqueues, or min 50MiB
|
// 5% percent of the allocated memory for the bbqueues, or min 50MiB
|
||||||
@ -200,6 +202,7 @@ where
|
|||||||
index,
|
index,
|
||||||
wtxn,
|
wtxn,
|
||||||
index_embeddings,
|
index_embeddings,
|
||||||
|
arroy_memory,
|
||||||
&mut arroy_writers,
|
&mut arroy_writers,
|
||||||
&indexing_context.must_stop_processing,
|
&indexing_context.must_stop_processing,
|
||||||
)
|
)
|
||||||
|
@ -101,6 +101,7 @@ pub fn build_vectors<MSP>(
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
wtxn: &mut RwTxn<'_>,
|
wtxn: &mut RwTxn<'_>,
|
||||||
index_embeddings: Vec<IndexEmbeddingConfig>,
|
index_embeddings: Vec<IndexEmbeddingConfig>,
|
||||||
|
arroy_memory: Option<usize>,
|
||||||
arroy_writers: &mut HashMap<u8, (&str, &Embedder, ArroyWrapper, usize)>,
|
arroy_writers: &mut HashMap<u8, (&str, &Embedder, ArroyWrapper, usize)>,
|
||||||
must_stop_processing: &MSP,
|
must_stop_processing: &MSP,
|
||||||
) -> Result<()>
|
) -> Result<()>
|
||||||
@ -114,7 +115,14 @@ where
|
|||||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||||
for (_index, (_embedder_name, _embedder, writer, dimensions)) in arroy_writers {
|
for (_index, (_embedder_name, _embedder, writer, dimensions)) in arroy_writers {
|
||||||
let dimensions = *dimensions;
|
let dimensions = *dimensions;
|
||||||
writer.build_and_quantize(wtxn, &mut rng, dimensions, false, must_stop_processing)?;
|
writer.build_and_quantize(
|
||||||
|
wtxn,
|
||||||
|
&mut rng,
|
||||||
|
dimensions,
|
||||||
|
false,
|
||||||
|
arroy_memory,
|
||||||
|
must_stop_processing,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
index.put_embedding_configs(wtxn, index_embeddings)?;
|
index.put_embedding_configs(wtxn, index_embeddings)?;
|
||||||
|
@ -86,6 +86,7 @@ impl ArroyWrapper {
|
|||||||
rng: &mut R,
|
rng: &mut R,
|
||||||
dimension: usize,
|
dimension: usize,
|
||||||
quantizing: bool,
|
quantizing: bool,
|
||||||
|
arroy_memory: Option<usize>,
|
||||||
cancel: &(impl Fn() -> bool + Sync + Send),
|
cancel: &(impl Fn() -> bool + Sync + Send),
|
||||||
) -> Result<(), arroy::Error> {
|
) -> Result<(), arroy::Error> {
|
||||||
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
for index in arroy_db_range_for_embedder(self.embedder_index) {
|
||||||
@ -105,9 +106,17 @@ impl ArroyWrapper {
|
|||||||
// sensitive.
|
// sensitive.
|
||||||
if quantizing && !self.quantized {
|
if quantizing && !self.quantized {
|
||||||
let writer = writer.prepare_changing_distance::<BinaryQuantizedCosine>(wtxn)?;
|
let writer = writer.prepare_changing_distance::<BinaryQuantizedCosine>(wtxn)?;
|
||||||
writer.builder(rng).cancel(cancel).build(wtxn)?;
|
writer
|
||||||
|
.builder(rng)
|
||||||
|
.available_memory(arroy_memory.unwrap_or(usize::MAX))
|
||||||
|
.cancel(cancel)
|
||||||
|
.build(wtxn)?;
|
||||||
} else if writer.need_build(wtxn)? {
|
} else if writer.need_build(wtxn)? {
|
||||||
writer.builder(rng).cancel(cancel).build(wtxn)?;
|
writer
|
||||||
|
.builder(rng)
|
||||||
|
.available_memory(arroy_memory.unwrap_or(usize::MAX))
|
||||||
|
.cancel(cancel)
|
||||||
|
.build(wtxn)?;
|
||||||
} else if writer.is_empty(wtxn)? {
|
} else if writer.is_empty(wtxn)? {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user