Change DocumentsBatchReader to access cursor and index at same time

Otherwise it is not possible to iterate over all documents while
using the fields index at the same time.
This commit is contained in:
Loïc Lecrenier
2022-07-18 16:08:01 +02:00
parent ab1571cdec
commit fc9f3f31e7
6 changed files with 65 additions and 82 deletions

View File

@ -27,8 +27,8 @@ pub fn enrich_documents_batch<R: Read + Seek>(
autogenerate_docids: bool,
reader: DocumentsBatchReader<R>,
) -> Result<StdResult<EnrichedDocumentsBatchReader<R>, UserError>> {
let mut cursor = reader.into_cursor();
let mut documents_batch_index = cursor.documents_batch_index().clone();
let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();
let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;
let mut uuid_buffer = [0; uuid::fmt::Hyphenated::LENGTH];
@ -103,9 +103,10 @@ pub fn enrich_documents_batch<R: Read + Seek>(
}
let external_ids = writer_into_reader(external_ids)?;
let primary_key_name = primary_key.name().to_string();
let reader = EnrichedDocumentsBatchReader::new(
cursor.into_reader(),
primary_key.name().to_string(),
DocumentsBatchReader::new(cursor, documents_batch_index),
primary_key_name,
external_ids,
)?;