mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-10-11 06:06:32 +00:00
Use Redis to measure the Sorter insertions
This commit is contained in:
@@ -67,6 +67,8 @@ filter-parser = { path = "../filter-parser" }
|
||||
# documents words self-join
|
||||
itertools = "0.13.0"
|
||||
|
||||
redis = "0.25.4"
|
||||
|
||||
csv = "1.3.0"
|
||||
candle-core = { version = "0.6.0" }
|
||||
candle-transformers = { version = "0.6.0" }
|
||||
|
@@ -29,6 +29,8 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
|
||||
let mut conn = super::REDIS_CLIENT.get_connection().unwrap();
|
||||
|
||||
let max_positions_per_attributes = max_positions_per_attributes
|
||||
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
@@ -148,6 +150,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
for (field_id, value) in obkv.iter() {
|
||||
key_buffer.truncate(mem::size_of::<u32>());
|
||||
key_buffer.extend_from_slice(&field_id.to_be_bytes());
|
||||
redis::cmd("INCR").arg(key_buffer.as_slice()).query::<usize>(&mut conn).unwrap();
|
||||
docid_word_positions_sorter.insert(&key_buffer, value)?;
|
||||
}
|
||||
|
||||
|
@@ -26,6 +26,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
_settings_diff: &InnerIndexSettingsDiff,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
let mut conn = super::REDIS_CLIENT.get_connection().unwrap();
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut fid_word_count_docids_sorter = create_sorter(
|
||||
@@ -70,6 +71,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
||||
key_buffer.clear();
|
||||
key_buffer.extend_from_slice(fid_bytes);
|
||||
key_buffer.push(word_count as u8);
|
||||
redis::cmd("INCR").arg(key_buffer.as_slice()).query::<usize>(&mut conn).unwrap();
|
||||
fid_word_count_docids_sorter
|
||||
.insert(&key_buffer, value_writer.into_inner().unwrap())?;
|
||||
}
|
||||
@@ -81,6 +83,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
||||
key_buffer.clear();
|
||||
key_buffer.extend_from_slice(fid_bytes);
|
||||
key_buffer.push(word_count as u8);
|
||||
redis::cmd("INCR").arg(key_buffer.as_slice()).query::<usize>(&mut conn).unwrap();
|
||||
fid_word_count_docids_sorter
|
||||
.insert(&key_buffer, value_writer.into_inner().unwrap())?;
|
||||
}
|
||||
|
@@ -10,6 +10,7 @@ use super::helpers::{
|
||||
create_sorter, create_writer, merge_deladd_cbo_roaring_bitmaps, try_split_array_at,
|
||||
writer_into_reader, GrenadParameters,
|
||||
};
|
||||
use super::REDIS_CLIENT;
|
||||
use crate::error::SerializationError;
|
||||
use crate::heed_codec::StrBEU16Codec;
|
||||
use crate::index::db_name::DOCID_WORD_POSITIONS;
|
||||
@@ -37,6 +38,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
grenad::Reader<BufReader<File>>,
|
||||
)> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
let mut conn = REDIS_CLIENT.get_connection().unwrap();
|
||||
|
||||
let mut word_fid_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Unstable,
|
||||
@@ -80,6 +82,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
&del_words,
|
||||
&add_words,
|
||||
&mut word_fid_docids_sorter,
|
||||
&mut conn,
|
||||
)?;
|
||||
|
||||
del_words.clear();
|
||||
@@ -164,6 +167,7 @@ fn words_into_sorter(
|
||||
del_words: &BTreeSet<Vec<u8>>,
|
||||
add_words: &BTreeSet<Vec<u8>>,
|
||||
word_fid_docids_sorter: &mut grenad::Sorter<MergeFn>,
|
||||
conn: &mut redis::Connection,
|
||||
) -> Result<()> {
|
||||
use itertools::merge_join_by;
|
||||
use itertools::EitherOrBoth::{Both, Left, Right};
|
||||
@@ -192,18 +196,21 @@ fn words_into_sorter(
|
||||
key_buffer.extend_from_slice(word_bytes);
|
||||
key_buffer.push(0);
|
||||
key_buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
redis::cmd("INCR").arg(key_buffer.as_slice()).query::<usize>(conn).unwrap();
|
||||
word_fid_docids_sorter.insert(&key_buffer, value_writer.into_inner().unwrap())?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// TODO do we still use this?
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||
fn docids_into_writers<W>(
|
||||
word: &str,
|
||||
deletions: &RoaringBitmap,
|
||||
additions: &RoaringBitmap,
|
||||
writer: &mut grenad::Writer<W>,
|
||||
conn: &mut redis::Connection,
|
||||
) -> Result<()>
|
||||
where
|
||||
W: std::io::Write,
|
||||
@@ -235,6 +242,7 @@ where
|
||||
}
|
||||
|
||||
// insert everything in the same writer.
|
||||
redis::cmd("INCR").arg(word.as_bytes()).query::<usize>(conn).unwrap();
|
||||
writer.insert(word.as_bytes(), obkv.into_inner().unwrap())?;
|
||||
|
||||
Ok(())
|
||||
|
@@ -26,6 +26,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
let mut conn = super::REDIS_CLIENT.get_connection().unwrap();
|
||||
|
||||
// early return if the data shouldn't be deleted nor created.
|
||||
if settings_diff.settings_update_only && !settings_diff.reindex_proximities() {
|
||||
let writer = create_writer(
|
||||
@@ -78,6 +80,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
&del_word_pair_proximity,
|
||||
&add_word_pair_proximity,
|
||||
&mut word_pair_proximity_docids_sorters,
|
||||
&mut conn,
|
||||
)?;
|
||||
del_word_pair_proximity.clear();
|
||||
add_word_pair_proximity.clear();
|
||||
@@ -168,6 +171,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
&del_word_pair_proximity,
|
||||
&add_word_pair_proximity,
|
||||
&mut word_pair_proximity_docids_sorters,
|
||||
&mut conn,
|
||||
)?;
|
||||
}
|
||||
{
|
||||
@@ -198,6 +202,7 @@ fn document_word_positions_into_sorter(
|
||||
del_word_pair_proximity: &BTreeMap<(String, String), u8>,
|
||||
add_word_pair_proximity: &BTreeMap<(String, String), u8>,
|
||||
word_pair_proximity_docids_sorters: &mut [grenad::Sorter<MergeFn>],
|
||||
conn: &mut redis::Connection,
|
||||
) -> Result<()> {
|
||||
use itertools::merge_join_by;
|
||||
use itertools::EitherOrBoth::{Both, Left, Right};
|
||||
@@ -233,6 +238,7 @@ fn document_word_positions_into_sorter(
|
||||
key_buffer.push(0);
|
||||
key_buffer.extend_from_slice(w2.as_bytes());
|
||||
|
||||
redis::cmd("INCR").arg(key_buffer.as_slice()).query::<usize>(conn).unwrap();
|
||||
word_pair_proximity_docids_sorters[*prox as usize - 1]
|
||||
.insert(&key_buffer, value_writer.into_inner().unwrap())?;
|
||||
}
|
||||
|
@@ -25,6 +25,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
_settings_diff: &InnerIndexSettingsDiff,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
let mut conn = super::REDIS_CLIENT.get_connection().unwrap();
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_position_docids_sorter = create_sorter(
|
||||
@@ -53,6 +54,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
||||
&del_word_positions,
|
||||
&add_word_positions,
|
||||
&mut word_position_docids_sorter,
|
||||
&mut conn,
|
||||
)?;
|
||||
del_word_positions.clear();
|
||||
add_word_positions.clear();
|
||||
@@ -85,6 +87,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
||||
&del_word_positions,
|
||||
&add_word_positions,
|
||||
&mut word_position_docids_sorter,
|
||||
&mut conn,
|
||||
)?;
|
||||
}
|
||||
|
||||
@@ -101,6 +104,7 @@ fn words_position_into_sorter(
|
||||
del_word_positions: &BTreeSet<(u16, Vec<u8>)>,
|
||||
add_word_positions: &BTreeSet<(u16, Vec<u8>)>,
|
||||
word_position_docids_sorter: &mut grenad::Sorter<MergeFn>,
|
||||
conn: &mut redis::Connection,
|
||||
) -> Result<()> {
|
||||
use itertools::merge_join_by;
|
||||
use itertools::EitherOrBoth::{Both, Left, Right};
|
||||
@@ -131,6 +135,7 @@ fn words_position_into_sorter(
|
||||
key_buffer.extend_from_slice(word_bytes);
|
||||
key_buffer.push(0);
|
||||
key_buffer.extend_from_slice(&position.to_be_bytes());
|
||||
redis::cmd("INCR").arg(key_buffer.as_slice()).query::<usize>(conn).unwrap();
|
||||
word_position_docids_sorter.insert(&key_buffer, value_writer.into_inner().unwrap())?;
|
||||
}
|
||||
|
||||
|
@@ -35,6 +35,9 @@ use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::vector::error::PossibleEmbeddingMistakes;
|
||||
use crate::{FieldId, Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
|
||||
|
||||
pub static REDIS_CLIENT: once_cell::sync::Lazy<redis::Client> =
|
||||
once_cell::sync::Lazy::new(|| redis::Client::open("redis://127.0.0.1/").unwrap());
|
||||
|
||||
/// Extract data for each databases from obkv documents in parallel.
|
||||
/// Send data in grenad file over provided Sender.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
|
Reference in New Issue
Block a user