Fix the tests for the new DocumentsBatchBuilder/Reader

This commit is contained in:
Kerollmops
2022-06-14 16:04:27 +02:00
parent 419ce3966c
commit e8297ad27e
9 changed files with 292 additions and 374 deletions

View File

@ -3,9 +3,10 @@ use std::io::Cursor;
use big_s::S;
use heed::EnvOpenOptions;
use maplit::hashset;
use milli::documents::{DocumentBatchBuilder, DocumentBatchReader};
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use milli::{FacetDistribution, Index};
use serde_json::{Deserializer, Map, Value};
#[test]
fn test_facet_distribution_with_no_facet_values() {
@ -30,35 +31,30 @@ fn test_facet_distribution_with_no_facet_values() {
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
let mut cursor = Cursor::new(Vec::new());
let mut documents_builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
let mut documents_builder = DocumentsBatchBuilder::new(Vec::new());
let reader = Cursor::new(
r#"[
{
r#"{
"id": 123,
"title": "What a week, hu...",
"genres": [],
"tags": ["blue"]
},
}
{
"id": 345,
"title": "I am the pig!",
"tags": ["red"]
}
]"#,
}"#,
);
for doc in serde_json::Deserializer::from_reader(reader).into_iter::<serde_json::Value>() {
let doc = Cursor::new(serde_json::to_vec(&doc.unwrap()).unwrap());
documents_builder.extend_from_json(doc).unwrap();
for result in Deserializer::from_reader(reader).into_iter::<Map<String, Value>>() {
let object = result.unwrap();
documents_builder.append_json_object(&object).unwrap();
}
documents_builder.finish().unwrap();
cursor.set_position(0);
let vector = documents_builder.into_inner().unwrap();
// index documents
let content = DocumentBatchReader::from_reader(cursor).unwrap();
let content = DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
builder.add_documents(content).unwrap();
builder.execute().unwrap();

View File

@ -6,10 +6,11 @@ use big_s::S;
use either::{Either, Left, Right};
use heed::EnvOpenOptions;
use maplit::{hashmap, hashset};
use milli::documents::{DocumentBatchBuilder, DocumentBatchReader};
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use milli::{AscDesc, Criterion, DocumentId, Index, Member};
use serde::Deserialize;
use serde_json::{Deserializer, Map, Value};
use slice_group_by::GroupBy;
mod distinct;
@ -62,21 +63,18 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
let mut cursor = Cursor::new(Vec::new());
let mut documents_builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
let mut documents_builder = DocumentsBatchBuilder::new(Vec::new());
let reader = Cursor::new(CONTENT.as_bytes());
for doc in serde_json::Deserializer::from_reader(reader).into_iter::<serde_json::Value>() {
let doc = Cursor::new(serde_json::to_vec(&doc.unwrap()).unwrap());
documents_builder.extend_from_json(doc).unwrap();
for result in Deserializer::from_reader(reader).into_iter::<Map<String, Value>>() {
let object = result.unwrap();
documents_builder.append_json_object(&object).unwrap();
}
documents_builder.finish().unwrap();
cursor.set_position(0);
let vector = documents_builder.into_inner().unwrap();
// index documents
let content = DocumentBatchReader::from_reader(cursor).unwrap();
let content = DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
builder.add_documents(content).unwrap();
builder.execute().unwrap();

View File

@ -5,7 +5,7 @@ use big_s::S;
use heed::EnvOpenOptions;
use itertools::Itertools;
use maplit::hashset;
use milli::documents::{DocumentBatchBuilder, DocumentBatchReader};
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult};
use rand::Rng;
@ -393,8 +393,7 @@ fn criteria_ascdesc() {
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
let mut cursor = Cursor::new(Vec::new());
let mut batch_builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
let mut batch_builder = DocumentsBatchBuilder::new(Vec::new());
(0..ASC_DESC_CANDIDATES_THRESHOLD + 1).for_each(|_| {
let mut rng = rand::thread_rng();
@ -412,16 +411,17 @@ fn criteria_ascdesc() {
"age": age,
});
let json = Cursor::new(serde_json::to_vec(&json).unwrap());
batch_builder.extend_from_json(json).unwrap();
let object = match json {
serde_json::Value::Object(object) => object,
_ => panic!(),
};
batch_builder.append_json_object(&object).unwrap();
});
batch_builder.finish().unwrap();
cursor.set_position(0);
let reader = DocumentBatchReader::from_reader(cursor).unwrap();
let vector = batch_builder.into_inner().unwrap();
let reader = DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
builder.add_documents(reader).unwrap();
builder.execute().unwrap();

View File

@ -106,26 +106,23 @@ fn test_typo_disabled_on_word() {
options.map_size(4096 * 100);
let index = Index::new(options, tmp.path()).unwrap();
let documents = json!([
{
"id": 1usize,
"data": "zealand",
},
{
"id": 2usize,
"data": "zearand",
},
]);
let mut builder = milli::documents::DocumentsBatchBuilder::new(Vec::new());
let doc1 = json!({
"id": 1usize,
"data": "zealand",
});
let mut writer = std::io::Cursor::new(Vec::new());
let mut builder = milli::documents::DocumentBatchBuilder::new(&mut writer).unwrap();
let documents = serde_json::to_vec(&documents).unwrap();
builder.extend_from_json(std::io::Cursor::new(documents)).unwrap();
builder.finish().unwrap();
let doc2 = json!({
"id": 2usize,
"data": "zearand",
});
writer.set_position(0);
builder.append_json_object(doc1.as_object().unwrap()).unwrap();
builder.append_json_object(doc2.as_object().unwrap()).unwrap();
let vector = builder.into_inner().unwrap();
let documents = milli::documents::DocumentBatchReader::from_reader(writer).unwrap();
let documents =
milli::documents::DocumentsBatchReader::from_reader(std::io::Cursor::new(vector)).unwrap();
let mut txn = index.write_txn().unwrap();
let config = IndexerConfig::default();