mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-28 01:01:00 +00:00
Fix the tests for the new DocumentsBatchBuilder/Reader
This commit is contained in:
@ -25,7 +25,7 @@ pub use self::helpers::{
|
||||
};
|
||||
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
||||
pub use self::transform::{Transform, TransformOutput};
|
||||
use crate::documents::DocumentBatchReader;
|
||||
use crate::documents::DocumentsBatchReader;
|
||||
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
||||
use crate::update::{
|
||||
self, Facets, IndexerConfig, UpdateIndexingStep, WordPrefixDocids,
|
||||
@ -121,7 +121,7 @@ where
|
||||
/// builder, and the builder must be discarded.
|
||||
///
|
||||
/// Returns the number of documents added to the builder.
|
||||
pub fn add_documents<R>(&mut self, reader: DocumentBatchReader<R>) -> Result<u64>
|
||||
pub fn add_documents<R>(&mut self, reader: DocumentsBatchReader<R>) -> Result<u64>
|
||||
where
|
||||
R: Read + Seek,
|
||||
{
|
||||
@ -590,9 +590,8 @@ mod tests {
|
||||
use maplit::hashset;
|
||||
|
||||
use super::*;
|
||||
use crate::documents::DocumentBatchBuilder;
|
||||
use crate::documents::DocumentsBatchBuilder;
|
||||
use crate::update::DeleteDocuments;
|
||||
use crate::HashMap;
|
||||
|
||||
#[test]
|
||||
fn simple_document_replacement() {
|
||||
@ -1252,21 +1251,17 @@ mod tests {
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
let mut big_object = HashMap::new();
|
||||
big_object.insert(S("id"), "wow");
|
||||
let mut big_object = serde_json::Map::new();
|
||||
big_object.insert(S("id"), serde_json::Value::from("wow"));
|
||||
for i in 0..1000 {
|
||||
let key = i.to_string();
|
||||
big_object.insert(key, "I am a text!");
|
||||
big_object.insert(key, serde_json::Value::from("I am a text!"));
|
||||
}
|
||||
|
||||
let mut cursor = Cursor::new(Vec::new());
|
||||
|
||||
let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
||||
let big_object = Cursor::new(serde_json::to_vec(&big_object).unwrap());
|
||||
builder.extend_from_json(big_object).unwrap();
|
||||
builder.finish().unwrap();
|
||||
cursor.set_position(0);
|
||||
let content = DocumentBatchReader::from_reader(cursor).unwrap();
|
||||
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||
builder.append_json_object(&big_object).unwrap();
|
||||
let vector = builder.into_inner().unwrap();
|
||||
let content = DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
@ -1288,23 +1283,19 @@ mod tests {
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
let mut big_object = HashMap::new();
|
||||
big_object.insert(S("id"), "wow");
|
||||
let mut big_object = serde_json::Map::new();
|
||||
big_object.insert(S("id"), serde_json::Value::from("wow"));
|
||||
let content: String = (0..=u16::MAX)
|
||||
.into_iter()
|
||||
.map(|p| p.to_string())
|
||||
.reduce(|a, b| a + " " + b.as_ref())
|
||||
.unwrap();
|
||||
big_object.insert("content".to_string(), &content);
|
||||
big_object.insert("content".to_string(), serde_json::Value::from(content));
|
||||
|
||||
let mut cursor = Cursor::new(Vec::new());
|
||||
|
||||
let big_object = serde_json::to_string(&big_object).unwrap();
|
||||
let mut builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
|
||||
builder.extend_from_json(&mut big_object.as_bytes()).unwrap();
|
||||
builder.finish().unwrap();
|
||||
cursor.set_position(0);
|
||||
let content = DocumentBatchReader::from_reader(cursor).unwrap();
|
||||
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||
builder.append_json_object(&big_object).unwrap();
|
||||
let vector = builder.into_inner().unwrap();
|
||||
let content = DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
@ -1843,18 +1834,20 @@ mod tests {
|
||||
|
||||
// Create 200 documents with a long text
|
||||
let content = {
|
||||
let documents: Vec<_> = (0..200i32)
|
||||
let documents_iter = (0..200i32)
|
||||
.into_iter()
|
||||
.map(|i| serde_json::json!({ "id": i, "script": script }))
|
||||
.collect();
|
||||
.filter_map(|json| match json {
|
||||
serde_json::Value::Object(object) => Some(object),
|
||||
_ => None,
|
||||
});
|
||||
|
||||
let mut writer = std::io::Cursor::new(Vec::new());
|
||||
let mut builder = crate::documents::DocumentBatchBuilder::new(&mut writer).unwrap();
|
||||
let documents = serde_json::to_vec(&documents).unwrap();
|
||||
builder.extend_from_json(std::io::Cursor::new(documents)).unwrap();
|
||||
builder.finish().unwrap();
|
||||
writer.set_position(0);
|
||||
crate::documents::DocumentBatchReader::from_reader(writer).unwrap()
|
||||
let mut builder = crate::documents::DocumentsBatchBuilder::new(Vec::new());
|
||||
for object in documents_iter {
|
||||
builder.append_json_object(&object).unwrap();
|
||||
}
|
||||
let vector = builder.into_inner().unwrap();
|
||||
crate::documents::DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap()
|
||||
};
|
||||
|
||||
// Index those 200 long documents
|
||||
|
@ -14,7 +14,7 @@ use smartstring::SmartString;
|
||||
|
||||
use super::helpers::{create_sorter, create_writer, keep_latest_obkv, merge_obkvs, MergeFn};
|
||||
use super::{IndexDocumentsMethod, IndexerConfig};
|
||||
use crate::documents::{DocumentBatchReader, DocumentsBatchIndex};
|
||||
use crate::documents::{DocumentsBatchIndex, DocumentsBatchReader};
|
||||
use crate::error::{Error, InternalError, UserError};
|
||||
use crate::index::db_name;
|
||||
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
|
||||
@ -152,7 +152,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
|
||||
pub fn read_documents<R, F>(
|
||||
&mut self,
|
||||
mut reader: DocumentBatchReader<R>,
|
||||
reader: DocumentsBatchReader<R>,
|
||||
wtxn: &mut heed::RwTxn,
|
||||
progress_callback: F,
|
||||
) -> Result<usize>
|
||||
@ -160,7 +160,8 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
R: Read + Seek,
|
||||
F: Fn(UpdateIndexingStep) + Sync,
|
||||
{
|
||||
let fields_index = reader.index();
|
||||
let mut cursor = reader.into_cursor();
|
||||
let fields_index = cursor.documents_batch_index();
|
||||
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
|
||||
|
||||
let mapping = create_fields_mapping(&mut self.fields_ids_map, fields_index)?;
|
||||
@ -186,7 +187,8 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
let mut documents_count = 0;
|
||||
let mut external_id_buffer = Vec::new();
|
||||
let mut field_buffer: Vec<(u16, Cow<[u8]>)> = Vec::new();
|
||||
while let Some((addition_index, document)) = reader.next_document_with_index()? {
|
||||
let addition_index = cursor.documents_batch_index().clone();
|
||||
while let Some(document) = cursor.next_document()? {
|
||||
let mut field_buffer_cache = drop_and_reuse(field_buffer);
|
||||
if self.indexer_settings.log_every_n.map_or(false, |len| documents_count % len == 0) {
|
||||
progress_callback(UpdateIndexingStep::RemapDocumentAddition {
|
||||
@ -840,7 +842,7 @@ fn update_primary_key<'a>(
|
||||
None => {
|
||||
let mut json = Map::new();
|
||||
for (key, value) in document.iter() {
|
||||
let key = addition_index.name(key).cloned();
|
||||
let key = addition_index.name(key).map(ToString::to_string);
|
||||
let value = serde_json::from_slice::<Value>(&value).ok();
|
||||
|
||||
if let Some((k, v)) = key.zip(value) {
|
||||
|
Reference in New Issue
Block a user