Use the Error enum everywhere in the project

This commit is contained in:
Kerollmops
2021-06-14 16:46:19 +02:00
parent ca78cb5aca
commit 312c2d1d8e
35 changed files with 385 additions and 300 deletions

View File

@ -1,6 +1,7 @@
use chrono::Utc;
use roaring::RoaringBitmap;
use crate::{ExternalDocumentsIds, Index, FieldsDistribution};
use crate::{ExternalDocumentsIds, Index, FieldsDistribution, Result};
pub struct ClearDocuments<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
@ -18,7 +19,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
ClearDocuments { wtxn, index, _update_id: update_id }
}
pub fn execute(self) -> anyhow::Result<u64> {
pub fn execute(self) -> Result<u64> {
self.index.set_updated_at(self.wtxn, &Utc::now())?;
let Index {
env: _env,

View File

@ -1,15 +1,15 @@
use std::collections::HashMap;
use std::collections::hash_map::Entry;
use anyhow::{anyhow, Context};
use chrono::Utc;
use fst::IntoStreamer;
use heed::types::{ByteSlice, Unit};
use roaring::RoaringBitmap;
use serde_json::Value;
use crate::error::{InternalError, UserError};
use crate::heed_codec::CboRoaringBitmapCodec;
use crate::{Index, DocumentId, FieldId, BEU32, SmallString32, ExternalDocumentsIds};
use crate::{Index, DocumentId, FieldId, BEU32, SmallString32, ExternalDocumentsIds, Result};
use super::ClearDocuments;
pub struct DeleteDocuments<'t, 'u, 'i> {
@ -25,7 +25,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
index: &'i Index,
update_id: u64,
) -> anyhow::Result<DeleteDocuments<'t, 'u, 'i>>
) -> Result<DeleteDocuments<'t, 'u, 'i>>
{
let external_documents_ids = index
.external_documents_ids(wtxn)?
@ -54,7 +54,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
Some(docid)
}
pub fn execute(self) -> anyhow::Result<u64> {
pub fn execute(self) -> Result<u64> {
self.index.set_updated_at(self.wtxn, &Utc::now())?;
// We retrieve the current documents ids that are in the database.
let mut documents_ids = self.index.documents_ids(self.wtxn)?;
@ -77,7 +77,9 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
}
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let primary_key = self.index.primary_key(self.wtxn)?.context("missing primary key")?;
let primary_key = self.index.primary_key(self.wtxn)?.ok_or_else(|| {
InternalError::DatabaseMissingEntry { db_name: "main", key: Some("primary-key") }
})?;
let id_field = fields_ids_map.id(primary_key).expect(r#"the field "id" to be present"#);
let Index {
@ -119,7 +121,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
let external_id = match serde_json::from_slice(content).unwrap() {
Value::String(string) => SmallString32::from(string.as_str()),
Value::Number(number) => SmallString32::from(number.to_string()),
_ => return Err(anyhow!("documents ids must be either strings or numbers")),
document_id => return Err(UserError::InvalidDocumentId { document_id }.into()),
};
external_ids.push(external_id);
}

View File

@ -9,11 +9,12 @@ use heed::{BytesEncode, Error};
use log::debug;
use roaring::RoaringBitmap;
use crate::error::InternalError;
use crate::heed_codec::CboRoaringBitmapCodec;
use crate::heed_codec::facet::FacetLevelValueF64Codec;
use crate::Index;
use crate::update::index_documents::WriteMethod;
use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database};
use crate::{Index, Result};
pub struct Facets<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
@ -55,7 +56,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
self
}
pub fn execute(self) -> anyhow::Result<()> {
pub fn execute(self) -> Result<()> {
self.index.set_updated_at(self.wtxn, &Utc::now())?;
// We get the faceted fields to be able to create the facet levels.
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
@ -102,7 +103,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
self.wtxn,
*self.index.facet_id_f64_docids.as_polymorph(),
content,
|_, _| anyhow::bail!("invalid facet number level merging"),
|_, _| Err(InternalError::IndexingMergingKeys { process: "facet number level" }),
WriteMethod::GetMergePut,
)?;
}
@ -132,7 +133,7 @@ fn compute_facet_number_levels<'t>(
level_group_size: NonZeroUsize,
min_level_size: NonZeroUsize,
field_id: u8,
) -> anyhow::Result<Reader<FileFuse>>
) -> Result<Reader<FileFuse>>
{
let first_level_size = db
.remap_key_type::<ByteSlice>()
@ -195,7 +196,7 @@ fn compute_faceted_documents_ids(
rtxn: &heed::RoTxn,
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
field_id: u8,
) -> anyhow::Result<RoaringBitmap>
) -> Result<RoaringBitmap>
{
let mut documents_ids = RoaringBitmap::new();
@ -214,7 +215,7 @@ fn write_number_entry(
left: f64,
right: f64,
ids: &RoaringBitmap,
) -> anyhow::Result<()>
) -> Result<()>
{
let key = (field_id, level, left, right);
let key = FacetLevelValueF64Codec::bytes_encode(&key).ok_or(Error::Encoding)?;

View File

@ -1,17 +1,19 @@
use std::borrow::Cow;
use std::result::Result as StdResult;
use fst::IntoStreamer;
use roaring::RoaringBitmap;
use crate::heed_codec::CboRoaringBitmapCodec;
use crate::Result;
/// Only the last value associated with an id is kept.
pub fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
pub fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> Result<Vec<u8>> {
Ok(obkvs.last().unwrap().clone().into_owned())
}
/// Merge all the obks in the order we see them.
pub fn merge_obkvs(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
pub fn merge_obkvs(_key: &[u8], obkvs: &[Cow<[u8]>]) -> Result<Vec<u8>> {
let mut iter = obkvs.iter();
let first = iter.next().map(|b| b.clone().into_owned()).unwrap();
Ok(iter.fold(first, |acc, current| {
@ -24,8 +26,8 @@ pub fn merge_obkvs(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>>
}
// Union of multiple FSTs
pub fn fst_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
let fsts = values.iter().map(fst::Set::new).collect::<Result<Vec<_>, _>>()?;
pub fn fst_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
let fsts = values.iter().map(fst::Set::new).collect::<StdResult<Vec<_>, _>>()?;
let op_builder: fst::set::OpBuilder = fsts.iter().map(|fst| fst.into_stream()).collect();
let op = op_builder.r#union();
@ -34,7 +36,7 @@ pub fn fst_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
Ok(build.into_inner().unwrap())
}
pub fn keep_first(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
pub fn keep_first(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
Ok(values.first().unwrap().to_vec())
}
@ -54,7 +56,7 @@ pub fn merge_two_obkvs(base: obkv::KvReader, update: obkv::KvReader, buffer: &mu
writer.finish().unwrap();
}
pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
let (head, tail) = values.split_first().unwrap();
let mut head = RoaringBitmap::deserialize_from(&head[..])?;
@ -68,7 +70,7 @@ pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result
Ok(vec)
}
pub fn cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
pub fn cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
let (head, tail) = values.split_first().unwrap();
let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;

View File

@ -3,11 +3,11 @@ use std::collections::HashSet;
use std::fs::File;
use std::io::{self, Seek, SeekFrom, BufReader, BufRead};
use std::num::{NonZeroU32, NonZeroUsize};
use std::result::Result as StdResult;
use std::str;
use std::sync::mpsc::sync_channel;
use std::time::Instant;
use anyhow::Context;
use bstr::ByteSlice as _;
use chrono::Utc;
use grenad::{MergerIter, Writer, Sorter, Merger, Reader, FileFuse, CompressionType};
@ -18,7 +18,8 @@ use rayon::prelude::*;
use rayon::ThreadPool;
use serde::{Serialize, Deserialize};
use crate::index::Index;
use crate::error::{Error, InternalError};
use crate::{Index, Result};
use crate::update::{
Facets, WordsLevelPositions, WordPrefixDocids, WordsPrefixesFst, UpdateIndexingStep,
WordPrefixPairProximityDocids,
@ -56,14 +57,14 @@ pub fn create_writer(typ: CompressionType, level: Option<u32>, file: File) -> io
builder.build(file)
}
pub fn create_sorter(
merge: MergeFn,
pub fn create_sorter<E>(
merge: MergeFn<E>,
chunk_compression_type: CompressionType,
chunk_compression_level: Option<u32>,
chunk_fusing_shrink_size: Option<u64>,
max_nb_chunks: Option<usize>,
max_memory: Option<usize>,
) -> Sorter<MergeFn>
) -> Sorter<MergeFn<E>>
{
let mut builder = Sorter::builder(merge);
if let Some(shrink_size) = chunk_fusing_shrink_size {
@ -82,7 +83,7 @@ pub fn create_sorter(
builder.build()
}
pub fn writer_into_reader(writer: Writer<File>, shrink_size: Option<u64>) -> anyhow::Result<Reader<FileFuse>> {
pub fn writer_into_reader(writer: Writer<File>, shrink_size: Option<u64>) -> Result<Reader<FileFuse>> {
let mut file = writer.into_inner()?;
file.seek(SeekFrom::Start(0))?;
let file = if let Some(shrink_size) = shrink_size {
@ -93,19 +94,25 @@ pub fn writer_into_reader(writer: Writer<File>, shrink_size: Option<u64>) -> any
Reader::new(file).map_err(Into::into)
}
pub fn merge_readers(sources: Vec<Reader<FileFuse>>, merge: MergeFn) -> Merger<FileFuse, MergeFn> {
pub fn merge_readers<E>(
sources: Vec<Reader<FileFuse>>,
merge: MergeFn<E>,
) -> Merger<FileFuse, MergeFn<E>>
{
let mut builder = Merger::builder(merge);
builder.extend(sources);
builder.build()
}
pub fn merge_into_lmdb_database(
pub fn merge_into_lmdb_database<E>(
wtxn: &mut heed::RwTxn,
database: heed::PolyDatabase,
sources: Vec<Reader<FileFuse>>,
merge: MergeFn,
merge: MergeFn<E>,
method: WriteMethod,
) -> anyhow::Result<()>
) -> Result<()>
where
Error: From<E>,
{
debug!("Merging {} MTBL stores...", sources.len());
let before = Instant::now();
@ -123,13 +130,15 @@ pub fn merge_into_lmdb_database(
Ok(())
}
pub fn write_into_lmdb_database(
pub fn write_into_lmdb_database<E>(
wtxn: &mut heed::RwTxn,
database: heed::PolyDatabase,
mut reader: Reader<FileFuse>,
merge: MergeFn,
merge: MergeFn<E>,
method: WriteMethod,
) -> anyhow::Result<()>
) -> Result<()>
where
Error: From<E>,
{
debug!("Writing MTBL stores...");
let before = Instant::now();
@ -138,9 +147,7 @@ pub fn write_into_lmdb_database(
WriteMethod::Append => {
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
while let Some((k, v)) = reader.next()? {
out_iter.append(k, v).with_context(|| {
format!("writing {:?} into LMDB", k.as_bstr())
})?;
out_iter.append(k, v)?;
}
},
WriteMethod::GetMergePut => {
@ -165,13 +172,16 @@ pub fn write_into_lmdb_database(
Ok(())
}
pub fn sorter_into_lmdb_database(
pub fn sorter_into_lmdb_database<E>(
wtxn: &mut heed::RwTxn,
database: heed::PolyDatabase,
sorter: Sorter<MergeFn>,
merge: MergeFn,
sorter: Sorter<MergeFn<E>>,
merge: MergeFn<E>,
method: WriteMethod,
) -> anyhow::Result<()>
) -> Result<()>
where
Error: From<E>,
Error: From<grenad::Error<E>>
{
debug!("Writing MTBL sorter...");
let before = Instant::now();
@ -188,21 +198,21 @@ pub fn sorter_into_lmdb_database(
Ok(())
}
fn merger_iter_into_lmdb_database<R: io::Read>(
fn merger_iter_into_lmdb_database<R: io::Read, E>(
wtxn: &mut heed::RwTxn,
database: heed::PolyDatabase,
mut sorter: MergerIter<R, MergeFn>,
merge: MergeFn,
mut sorter: MergerIter<R, MergeFn<E>>,
merge: MergeFn<E>,
method: WriteMethod,
) -> anyhow::Result<()>
) -> Result<()>
where
Error: From<E>,
{
match method {
WriteMethod::Append => {
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
while let Some((k, v)) = sorter.next()? {
out_iter.append(k, v).with_context(|| {
format!("writing {:?} into LMDB", k.as_bstr())
})?;
out_iter.append(k, v)?;
}
},
WriteMethod::GetMergePut => {
@ -211,7 +221,10 @@ fn merger_iter_into_lmdb_database<R: io::Read>(
match iter.next().transpose()? {
Some((key, old_val)) if key == k => {
let vals = vec![Cow::Borrowed(old_val), Cow::Borrowed(v)];
let val = merge(k, &vals).expect("merge failed");
let val = merge(k, &vals).map_err(|_| {
// TODO just wrap this error?
InternalError::IndexingMergingKeys { process: "get-put-merge" }
})?;
iter.put_current(k, &val)?;
},
_ => {
@ -318,7 +331,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
self.autogenerate_docids = false;
}
pub fn execute<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<DocumentAdditionResult>
pub fn execute<R, F>(self, reader: R, progress_callback: F) -> Result<DocumentAdditionResult>
where
R: io::Read,
F: Fn(UpdateIndexingStep, u64) + Sync,
@ -365,7 +378,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
Ok(DocumentAdditionResult { nb_documents })
}
pub fn execute_raw<F>(self, output: TransformOutput, progress_callback: F) -> anyhow::Result<()>
pub fn execute_raw<F>(self, output: TransformOutput, progress_callback: F) -> Result<()>
where
F: Fn(UpdateIndexingStep) + Sync
{
@ -403,15 +416,12 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
debug!("{} documents actually deleted", deleted_documents_count);
}
let mmap;
let bytes = if documents_count == 0 {
&[][..]
} else {
mmap = unsafe { Mmap::map(&documents_file).context("mmaping the transform documents file")? };
&mmap
};
if documents_count == 0 {
return Ok(());
}
let documents = grenad::Reader::new(bytes).unwrap();
let bytes = unsafe { Mmap::map(&documents_file)? };
let documents = grenad::Reader::new(bytes.as_bytes()).unwrap();
// The enum which indicates the type of the readers
// merges that are potentially done on different threads.
@ -477,7 +487,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
&progress_callback,
)
})
.collect::<Result<Vec<_>, _>>()?;
.collect::<StdResult<Vec<_>, _>>()?;
let mut main_readers = Vec::with_capacity(readers.len());
let mut word_docids_readers = Vec::with_capacity(readers.len());
@ -535,7 +545,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
debug!("Merging the main, word docids and words pairs proximity docids in parallel...");
rayon::spawn(move || {
vec![
(DatabaseType::Main, main_readers, fst_merge as MergeFn),
(DatabaseType::Main, main_readers, fst_merge as MergeFn<_>),
(DatabaseType::WordDocids, word_docids_readers, roaring_bitmap_merge),
(
DatabaseType::FacetLevel0NumbersDocids,
@ -570,7 +580,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
facet_field_strings_docids_readers,
field_id_docid_facet_numbers_readers,
field_id_docid_facet_strings_readers,
)) as anyhow::Result<_>
)) as Result<_>
})?;
let (

View File

@ -6,7 +6,6 @@ use std::iter::FromIterator;
use std::time::Instant;
use std::{cmp, iter};
use anyhow::Context;
use bstr::ByteSlice as _;
use fst::Set;
use grenad::{Reader, FileFuse, Writer, Sorter, CompressionType};
@ -19,11 +18,12 @@ use roaring::RoaringBitmap;
use serde_json::Value;
use tempfile::tempfile;
use crate::error::{Error, InternalError, SerializationError};
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec};
use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec};
use crate::update::UpdateIndexingStep;
use crate::{json_to_string, SmallVec32, Position, DocumentId, FieldId};
use crate::{json_to_string, SmallVec32, Position, DocumentId, FieldId, Result};
use super::{MergeFn, create_writer, create_sorter, writer_into_reader};
use super::merge_function::{fst_merge, keep_first, roaring_bitmap_merge, cbo_roaring_bitmap_merge};
@ -66,15 +66,15 @@ pub struct Store<'s, A> {
chunk_compression_level: Option<u32>,
chunk_fusing_shrink_size: Option<u64>,
// MTBL sorters
main_sorter: Sorter<MergeFn>,
word_docids_sorter: Sorter<MergeFn>,
words_pairs_proximities_docids_sorter: Sorter<MergeFn>,
word_level_position_docids_sorter: Sorter<MergeFn>,
field_id_word_count_docids_sorter: Sorter<MergeFn>,
facet_field_numbers_docids_sorter: Sorter<MergeFn>,
facet_field_strings_docids_sorter: Sorter<MergeFn>,
field_id_docid_facet_numbers_sorter: Sorter<MergeFn>,
field_id_docid_facet_strings_sorter: Sorter<MergeFn>,
main_sorter: Sorter<MergeFn<Error>>,
word_docids_sorter: Sorter<MergeFn<Error>>,
words_pairs_proximities_docids_sorter: Sorter<MergeFn<Error>>,
word_level_position_docids_sorter: Sorter<MergeFn<Error>>,
field_id_word_count_docids_sorter: Sorter<MergeFn<Error>>,
facet_field_numbers_docids_sorter: Sorter<MergeFn<Error>>,
facet_field_strings_docids_sorter: Sorter<MergeFn<Error>>,
field_id_docid_facet_numbers_sorter: Sorter<MergeFn<Error>>,
field_id_docid_facet_strings_sorter: Sorter<MergeFn<Error>>,
// MTBL writers
docid_word_positions_writer: Writer<File>,
documents_writer: Writer<File>,
@ -93,7 +93,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
chunk_compression_level: Option<u32>,
chunk_fusing_shrink_size: Option<u64>,
stop_words: Option<&'s Set<A>>,
) -> anyhow::Result<Self>
) -> Result<Self>
{
// We divide the max memory by the number of sorter the Store have.
let max_memory = max_memory.map(|mm| cmp::max(ONE_KILOBYTE, mm / 5));
@ -221,7 +221,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
}
// Save the documents ids under the position and word we have seen it.
fn insert_word_docid(&mut self, word: &str, id: DocumentId) -> anyhow::Result<()> {
fn insert_word_docid(&mut self, word: &str, id: DocumentId) -> Result<()> {
// if get_refresh finds the element it is assured to be at the end of the linked hash map.
match self.word_docids.get_refresh(word.as_bytes()) {
Some(old) => { old.insert(id); },
@ -246,7 +246,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
field_id: FieldId,
value: OrderedFloat<f64>,
id: DocumentId,
) -> anyhow::Result<()>
) -> Result<()>
{
let sorter = &mut self.field_id_docid_facet_numbers_sorter;
Self::write_field_id_docid_facet_number_value(sorter, field_id, id, value)?;
@ -279,7 +279,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
field_id: FieldId,
value: String,
id: DocumentId,
) -> anyhow::Result<()>
) -> Result<()>
{
let sorter = &mut self.field_id_docid_facet_strings_sorter;
Self::write_field_id_docid_facet_string_value(sorter, field_id, id, &value)?;
@ -311,7 +311,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
&mut self,
words_pairs_proximities: impl IntoIterator<Item=((&'a str, &'a str), u8)>,
id: DocumentId,
) -> anyhow::Result<()>
) -> Result<()>
{
for ((w1, w2), prox) in words_pairs_proximities {
let w1 = SmallVec32::from(w1.as_bytes());
@ -350,7 +350,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
facet_numbers_values: &mut HashMap<FieldId, Vec<f64>>,
facet_strings_values: &mut HashMap<FieldId, Vec<String>>,
record: &[u8],
) -> anyhow::Result<()>
) -> Result<()>
{
// We compute the list of words pairs proximities (self-join) and write it directly to disk.
let words_pair_proximities = compute_words_pair_proximities(&words_positions);
@ -385,10 +385,12 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
Ok(())
}
fn write_words_pairs_proximities(
sorter: &mut Sorter<MergeFn>,
fn write_words_pairs_proximities<E>(
sorter: &mut Sorter<MergeFn<E>>,
iter: impl IntoIterator<Item=((SmallVec32<u8>, SmallVec32<u8>, u8), RoaringBitmap)>,
) -> anyhow::Result<()>
) -> Result<()>
where
Error: From<E>,
{
let mut key = Vec::new();
let mut buffer = Vec::new();
@ -417,7 +419,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
writer: &mut Writer<File>,
id: DocumentId,
words_positions: &HashMap<String, SmallVec32<Position>>,
) -> anyhow::Result<()>
) -> Result<()>
{
// We prefix the words by the document id.
let mut key = id.to_be_bytes().to_vec();
@ -445,11 +447,13 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
Ok(())
}
fn write_word_position_docids(
writer: &mut Sorter<MergeFn>,
fn write_word_position_docids<E>(
writer: &mut Sorter<MergeFn<E>>,
document_id: DocumentId,
words_positions: &HashMap<String, SmallVec32<Position>>,
) -> anyhow::Result<()>
) -> Result<()>
where
Error: From<E>,
{
let mut key_buffer = Vec::new();
let mut data_buffer = Vec::new();
@ -480,11 +484,13 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
Ok(())
}
fn write_facet_field_string_docids<I>(
sorter: &mut Sorter<MergeFn>,
fn write_facet_field_string_docids<I, E>(
sorter: &mut Sorter<MergeFn<E>>,
iter: I,
) -> anyhow::Result<()>
where I: IntoIterator<Item=((FieldId, String), RoaringBitmap)>
) -> Result<()>
where
I: IntoIterator<Item=((FieldId, String), RoaringBitmap)>,
Error: From<E>,
{
let mut key_buffer = Vec::new();
let mut data_buffer = Vec::new();
@ -504,11 +510,13 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
Ok(())
}
fn write_facet_field_number_docids<I>(
sorter: &mut Sorter<MergeFn>,
fn write_facet_field_number_docids<I, E>(
sorter: &mut Sorter<MergeFn<E>>,
iter: I,
) -> anyhow::Result<()>
where I: IntoIterator<Item=((FieldId, OrderedFloat<f64>), RoaringBitmap)>
) -> Result<()>
where
I: IntoIterator<Item=((FieldId, OrderedFloat<f64>), RoaringBitmap)>,
Error: From<E>,
{
let mut data_buffer = Vec::new();
@ -517,7 +525,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
let key = FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *value, *value))
.map(Cow::into_owned)
.context("could not serialize facet level value key")?;
.ok_or(SerializationError::Encoding { db_name: Some("facet level value") })?;
CboRoaringBitmapCodec::serialize_into(&docids, &mut data_buffer);
@ -529,16 +537,18 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
Ok(())
}
fn write_field_id_docid_facet_number_value(
sorter: &mut Sorter<MergeFn>,
fn write_field_id_docid_facet_number_value<E>(
sorter: &mut Sorter<MergeFn<E>>,
field_id: FieldId,
document_id: DocumentId,
value: OrderedFloat<f64>,
) -> anyhow::Result<()>
) -> Result<()>
where
Error: From<E>,
{
let key = FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, *value))
.map(Cow::into_owned)
.context("could not serialize facet level value key")?;
.ok_or(SerializationError::Encoding { db_name: Some("facet level value") })?;
if lmdb_key_valid_size(&key) {
sorter.insert(&key, &[])?;
@ -547,12 +557,14 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
Ok(())
}
fn write_field_id_docid_facet_string_value(
sorter: &mut Sorter<MergeFn>,
fn write_field_id_docid_facet_string_value<E>(
sorter: &mut Sorter<MergeFn<E>>,
field_id: FieldId,
document_id: DocumentId,
value: &str,
) -> anyhow::Result<()>
) -> Result<()>
where
Error: From<E>,
{
let mut buffer = Vec::new();
@ -565,8 +577,10 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
Ok(())
}
fn write_word_docids<I>(sorter: &mut Sorter<MergeFn>, iter: I) -> anyhow::Result<()>
where I: IntoIterator<Item=(SmallVec32<u8>, RoaringBitmap)>
fn write_word_docids<I, E>(sorter: &mut Sorter<MergeFn<E>>, iter: I) -> Result<()>
where
I: IntoIterator<Item=(SmallVec32<u8>, RoaringBitmap)>,
Error: From<E>,
{
let mut key = Vec::new();
let mut buffer = Vec::new();
@ -596,7 +610,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
num_threads: usize,
log_every_n: Option<usize>,
mut progress_callback: F,
) -> anyhow::Result<Readers>
) -> Result<Readers>
where F: FnMut(UpdateIndexingStep),
{
debug!("{:?}: Indexing in a Store...", thread_index);
@ -625,7 +639,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
for (attr, content) in document.iter() {
if self.faceted_fields.contains(&attr) || self.searchable_fields.contains(&attr) {
let value = serde_json::from_slice(content)?;
let value = serde_json::from_slice(content).map_err(InternalError::SerdeJson)?;
let (facet_numbers, facet_strings) = extract_facet_values(&value);
facet_numbers_values.entry(attr).or_insert_with(Vec::new).extend(facet_numbers);
@ -679,7 +693,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
Ok(readers)
}
fn finish(mut self) -> anyhow::Result<Readers> {
fn finish(mut self) -> Result<Readers> {
let comp_type = self.chunk_compression_type;
let comp_level = self.chunk_compression_level;
let shrink_size = self.chunk_fusing_shrink_size;

View File

@ -2,17 +2,19 @@ use std::borrow::Cow;
use std::fs::File;
use std::io::{Read, Seek, SeekFrom};
use std::iter::Peekable;
use std::result::Result as StdResult;
use std::time::Instant;
use anyhow::{anyhow, Context};
use grenad::CompressionType;
use log::info;
use roaring::RoaringBitmap;
use serde_json::{Map, Value};
use crate::error::{Error, UserError, InternalError};
use crate::update::index_documents::merge_function::{merge_obkvs, keep_latest_obkv};
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
use crate::{Index, BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId, FieldsDistribution};
use crate::{BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId, FieldsDistribution};
use crate::{Index, Result};
use super::merge_function::merge_two_obkvs;
use super::{create_writer, create_sorter, IndexDocumentsMethod};
@ -53,7 +55,7 @@ fn is_primary_key(field: impl AsRef<str>) -> bool {
}
impl Transform<'_, '_> {
pub fn output_from_json<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<TransformOutput>
pub fn output_from_json<R, F>(self, reader: R, progress_callback: F) -> Result<TransformOutput>
where
R: Read,
F: Fn(UpdateIndexingStep) + Sync,
@ -61,7 +63,7 @@ impl Transform<'_, '_> {
self.output_from_generic_json(reader, false, progress_callback)
}
pub fn output_from_json_stream<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<TransformOutput>
pub fn output_from_json_stream<R, F>(self, reader: R, progress_callback: F) -> Result<TransformOutput>
where
R: Read,
F: Fn(UpdateIndexingStep) + Sync,
@ -74,7 +76,7 @@ impl Transform<'_, '_> {
reader: R,
is_stream: bool,
progress_callback: F,
) -> anyhow::Result<TransformOutput>
) -> Result<TransformOutput>
where
R: Read,
F: Fn(UpdateIndexingStep) + Sync,
@ -88,7 +90,7 @@ impl Transform<'_, '_> {
let iter = Box::new(iter) as Box<dyn Iterator<Item=_>>;
iter.peekable()
} else {
let vec: Vec<_> = serde_json::from_reader(reader)?;
let vec: Vec<_> = serde_json::from_reader(reader).map_err(UserError::SerdeJson)?;
let iter = vec.into_iter().map(Ok);
let iter = Box::new(iter) as Box<dyn Iterator<Item=_>>;
iter.peekable()
@ -96,9 +98,12 @@ impl Transform<'_, '_> {
// We extract the primary key from the first document in
// the batch if it hasn't already been defined in the index
let first = match documents.peek().map(Result::as_ref).transpose() {
let first = match documents.peek().map(StdResult::as_ref).transpose() {
Ok(first) => first,
Err(_) => return Err(documents.next().unwrap().unwrap_err().into()),
Err(_) => {
let error = documents.next().unwrap().unwrap_err();
return Err(UserError::SerdeJson(error).into());
},
};
let alternative_name = first.and_then(|doc| doc.keys().find(|f| is_primary_key(f)).cloned());
@ -145,7 +150,7 @@ impl Transform<'_, '_> {
let mut documents_count = 0;
for result in documents {
let document = result?;
let document = result.map_err(UserError::SerdeJson)?;
if self.log_every_n.map_or(false, |len| documents_count % len == 0) {
progress_callback(UpdateIndexingStep::TransformFromUserIntoGenericFormat {
@ -158,7 +163,7 @@ impl Transform<'_, '_> {
// We prepare the fields ids map with the documents keys.
for (key, _value) in &document {
fields_ids_map.insert(&key).context("field id limit reached")?;
fields_ids_map.insert(&key).ok_or(UserError::AttributeLimitReached)?;
}
// We retrieve the user id from the document based on the primary key name,
@ -167,11 +172,13 @@ impl Transform<'_, '_> {
Some(value) => match value {
Value::String(string) => Cow::Borrowed(string.as_str()),
Value::Number(number) => Cow::Owned(number.to_string()),
_ => return Err(anyhow!("documents ids must be either strings or numbers")),
content => return Err(UserError::InvalidDocumentId {
document_id: content.clone(),
}.into()),
},
None => {
if !self.autogenerate_docids {
return Err(anyhow!("missing primary key"));
return Err(UserError::MissingPrimaryKey.into());
}
let uuid = uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer);
Cow::Borrowed(uuid)
@ -186,13 +193,15 @@ impl Transform<'_, '_> {
// and this should be the document id we return the one we generated.
if let Some(value) = document.get(name) {
// We serialize the attribute values.
serde_json::to_writer(&mut json_buffer, value)?;
serde_json::to_writer(&mut json_buffer, value).map_err(InternalError::SerdeJson)?;
writer.insert(field_id, &json_buffer)?;
}
// We validate the document id [a-zA-Z0-9\-_].
if field_id == primary_key_id && validate_document_id(&external_id).is_none() {
return Err(anyhow!("invalid document id: {:?}", external_id));
return Err(UserError::InvalidDocumentId {
document_id: Value::from(external_id),
}.into());
}
}
@ -217,7 +226,7 @@ impl Transform<'_, '_> {
)
}
pub fn output_from_csv<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<TransformOutput>
pub fn output_from_csv<R, F>(self, reader: R, progress_callback: F) -> Result<TransformOutput>
where
R: Read,
F: Fn(UpdateIndexingStep) + Sync,
@ -226,12 +235,12 @@ impl Transform<'_, '_> {
let external_documents_ids = self.index.external_documents_ids(self.rtxn).unwrap();
let mut csv = csv::Reader::from_reader(reader);
let headers = csv.headers()?;
let headers = csv.headers().map_err(UserError::Csv)?;
let mut fields_ids = Vec::new();
// Generate the new fields ids based on the current fields ids and this CSV headers.
for (i, header) in headers.iter().enumerate() {
let id = fields_ids_map.insert(header).context("field id limit reached)")?;
let id = fields_ids_map.insert(header).ok_or(UserError::AttributeLimitReached)?;
fields_ids.push((id, i));
}
@ -281,7 +290,7 @@ impl Transform<'_, '_> {
let mut documents_count = 0;
let mut record = csv::StringRecord::new();
while csv.read_record(&mut record)? {
while csv.read_record(&mut record).map_err(UserError::Csv)? {
obkv_buffer.clear();
let mut writer = obkv::KvWriter::new(&mut obkv_buffer);
@ -298,7 +307,9 @@ impl Transform<'_, '_> {
// We validate the document id [a-zA-Z0-9\-_].
match validate_document_id(&external_id) {
Some(valid) => valid,
None => return Err(anyhow!("invalid document id: {:?}", external_id)),
None => return Err(UserError::InvalidDocumentId {
document_id: Value::from(external_id),
}.into()),
}
},
None => uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer),
@ -316,7 +327,7 @@ impl Transform<'_, '_> {
for (field_id, field) in iter {
// We serialize the attribute values as JSON strings.
json_buffer.clear();
serde_json::to_writer(&mut json_buffer, &field)?;
serde_json::to_writer(&mut json_buffer, &field).map_err(InternalError::SerdeJson)?;
writer.insert(*field_id, &json_buffer)?;
}
@ -344,17 +355,18 @@ impl Transform<'_, '_> {
/// Generate the `TransformOutput` based on the given sorter that can be generated from any
/// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document
/// id for the user side and the value must be an obkv where keys are valid fields ids.
fn output_from_sorter<F>(
fn output_from_sorter<F, E>(
self,
sorter: grenad::Sorter<MergeFn>,
sorter: grenad::Sorter<MergeFn<E>>,
primary_key: String,
fields_ids_map: FieldsIdsMap,
approximate_number_of_documents: usize,
mut external_documents_ids: ExternalDocumentsIds<'_>,
progress_callback: F,
) -> anyhow::Result<TransformOutput>
) -> Result<TransformOutput>
where
F: Fn(UpdateIndexingStep) + Sync,
Error: From<E>,
{
let documents_ids = self.index.documents_ids(self.rtxn)?;
let mut fields_distribution = self.index.fields_distribution(self.rtxn)?;
@ -362,7 +374,7 @@ impl Transform<'_, '_> {
// Once we have sort and deduplicated the documents we write them into a final file.
let mut final_sorter = create_sorter(
|_docid, _obkvs| Err(anyhow!("cannot merge two documents")),
|_id, _obkvs| Err(InternalError::IndexingMergingKeys { process: "merging documents" }),
self.chunk_compression_type,
self.chunk_compression_level,
self.chunk_fusing_shrink_size,
@ -398,7 +410,10 @@ impl Transform<'_, '_> {
IndexDocumentsMethod::UpdateDocuments => {
let key = BEU32::new(docid);
let base_obkv = self.index.documents.get(&self.rtxn, &key)?
.context("document not found")?;
.ok_or(InternalError::DatabaseMissingEntry {
db_name: "documents",
key: None,
})?;
let update_obkv = obkv::KvReader::new(update_obkv);
merge_two_obkvs(base_obkv, update_obkv, &mut obkv_buffer);
(docid, obkv_buffer.as_slice())
@ -409,7 +424,7 @@ impl Transform<'_, '_> {
// If this user id is new we add it to the external documents ids map
// for new ids and into the list of new documents.
let new_docid = available_documents_ids.next()
.context("no more available documents ids")?;
.ok_or(UserError::DocumentLimitReached)?;
new_external_documents_ids_builder.insert(external_id, new_docid as u64)?;
new_documents_ids.insert(new_docid);
(new_docid, update_obkv)
@ -469,7 +484,7 @@ impl Transform<'_, '_> {
primary_key: String,
old_fields_ids_map: FieldsIdsMap,
new_fields_ids_map: FieldsIdsMap,
) -> anyhow::Result<TransformOutput>
) -> Result<TransformOutput>
{
let fields_distribution = self.index.fields_distribution(self.rtxn)?;
let external_documents_ids = self.index.external_documents_ids(self.rtxn)?;
@ -529,10 +544,10 @@ fn compute_primary_key_pair(
fields_ids_map: &mut FieldsIdsMap,
alternative_name: Option<String>,
autogenerate_docids: bool,
) -> anyhow::Result<(FieldId, String)> {
) -> Result<(FieldId, String)> {
match primary_key {
Some(primary_key) => {
let id = fields_ids_map.insert(primary_key).ok_or(anyhow!("Maximum number of fields exceeded"))?;
let id = fields_ids_map.insert(primary_key).ok_or(UserError::AttributeLimitReached)?;
Ok((id, primary_key.to_string()))
}
None => {
@ -542,12 +557,12 @@ fn compute_primary_key_pair(
if !autogenerate_docids {
// If there is no primary key in the current document batch, we must
// return an error and not automatically generate any document id.
anyhow::bail!("missing primary key")
return Err(UserError::MissingPrimaryKey.into());
}
DEFAULT_PRIMARY_KEY_NAME.to_string()
},
};
let id = fields_ids_map.insert(&name).context("field id limit reached")?;
let id = fields_ids_map.insert(&name).ok_or(UserError::AttributeLimitReached)?;
Ok((id, name))
},
}

View File

@ -1,6 +1,6 @@
use std::collections::{BTreeSet, HashMap, HashSet};
use std::result::Result as StdResult;
use anyhow::Context;
use chrono::Utc;
use grenad::CompressionType;
use itertools::Itertools;
@ -9,9 +9,10 @@ use rayon::ThreadPool;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use crate::criterion::Criterion;
use crate::error::UserError;
use crate::update::index_documents::{IndexDocumentsMethod, Transform};
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
use crate::{FieldsIdsMap, Index};
use crate::{FieldsIdsMap, Index, Result};
#[derive(Debug, Clone, PartialEq)]
pub enum Setting<T> {
@ -33,7 +34,7 @@ impl<T> Setting<T> {
}
impl<T: Serialize> Serialize for Setting<T> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer {
fn serialize<S>(&self, serializer: S) -> StdResult<S::Ok, S::Error> where S: Serializer {
match self {
Self::Set(value) => Some(value),
// Usually not_set isn't serialized by setting skip_serializing_if field attribute
@ -43,7 +44,7 @@ impl<T: Serialize> Serialize for Setting<T> {
}
impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: Deserializer<'de> {
fn deserialize<D>(deserializer: D) -> StdResult<Self, D::Error> where D: Deserializer<'de> {
Deserialize::deserialize(deserializer).map(|x| match x {
Some(x) => Self::Set(x),
None => Self::Reset, // Reset is forced by sending null value
@ -165,7 +166,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
}
}
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()>
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
where
F: Fn(UpdateIndexingStep, u64) + Sync
{
@ -192,7 +193,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
};
// There already has been a document addition, the primary key should be set by now.
let primary_key = self.index.primary_key(&self.wtxn)?.context("Index must have a primary key")?;
let primary_key = self.index.primary_key(&self.wtxn)?.ok_or(UserError::MissingPrimaryKey)?;
// We remap the documents fields based on the new `FieldsIdsMap`.
let output = transform.remap_index_documents(
@ -220,7 +221,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
Ok(())
}
fn update_displayed(&mut self) -> anyhow::Result<bool> {
fn update_displayed(&mut self) -> Result<bool> {
match self.displayed_fields {
Setting::Set(ref fields) => {
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
@ -234,7 +235,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
for name in names.iter() {
fields_ids_map
.insert(name)
.context("field id limit exceeded")?;
.ok_or(UserError::AttributeLimitReached)?;
}
self.index.put_displayed_fields(self.wtxn, &names)?;
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
@ -245,13 +246,13 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
Ok(true)
}
fn update_distinct_field(&mut self) -> anyhow::Result<bool> {
fn update_distinct_field(&mut self) -> Result<bool> {
match self.distinct_field {
Setting::Set(ref attr) => {
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
fields_ids_map
.insert(attr)
.context("field id limit exceeded")?;
.ok_or(UserError::AttributeLimitReached)?;
self.index.put_distinct_field(self.wtxn, &attr)?;
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
@ -264,7 +265,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
/// Updates the index's searchable attributes. This causes the field map to be recomputed to
/// reflect the order of the searchable attributes.
fn update_searchable(&mut self) -> anyhow::Result<bool> {
fn update_searchable(&mut self) -> Result<bool> {
match self.searchable_fields {
Setting::Set(ref fields) => {
// every time the searchable attributes are updated, we need to update the
@ -285,13 +286,13 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
for name in names.iter() {
new_fields_ids_map
.insert(&name)
.context("field id limit exceeded")?;
.ok_or(UserError::AttributeLimitReached)?;
}
for (_, name) in old_fields_ids_map.iter() {
new_fields_ids_map
.insert(&name)
.context("field id limit exceeded")?;
.ok_or(UserError::AttributeLimitReached)?;
}
self.index.put_searchable_fields(self.wtxn, &names)?;
@ -303,7 +304,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
Ok(true)
}
fn update_stop_words(&mut self) -> anyhow::Result<bool> {
fn update_stop_words(&mut self) -> Result<bool> {
match self.stop_words {
Setting::Set(ref stop_words) => {
let current = self.index.stop_words(self.wtxn)?;
@ -325,7 +326,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
}
}
fn update_synonyms(&mut self) -> anyhow::Result<bool> {
fn update_synonyms(&mut self) -> Result<bool> {
match self.synonyms {
Setting::Set(ref synonyms) => {
fn normalize(analyzer: &Analyzer<&[u8]>, text: &str) -> Vec<String> {
@ -383,13 +384,13 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
}
}
fn update_filterable(&mut self) -> anyhow::Result<()> {
fn update_filterable(&mut self) -> Result<()> {
match self.filterable_fields {
Setting::Set(ref fields) => {
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let mut new_facets = HashSet::new();
for name in fields {
fields_ids_map.insert(name).context("field id limit exceeded")?;
fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
new_facets.insert(name.clone());
}
self.index.put_filterable_fields(self.wtxn, &new_facets)?;
@ -401,7 +402,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
Ok(())
}
fn update_criteria(&mut self) -> anyhow::Result<()> {
fn update_criteria(&mut self) -> Result<()> {
match self.criteria {
Setting::Set(ref fields) => {
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
@ -409,7 +410,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
for name in fields {
let criterion: Criterion = name.parse()?;
if let Some(name) = criterion.field_name() {
fields_ids_map.insert(name).context("field id limit exceeded")?;
fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
}
new_criteria.push(criterion);
}
@ -422,7 +423,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
Ok(())
}
pub fn execute<F>(mut self, progress_callback: F) -> anyhow::Result<()>
pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
where
F: Fn(UpdateIndexingStep, u64) + Sync
{

View File

@ -1,7 +1,7 @@
use grenad::CompressionType;
use rayon::ThreadPool;
use crate::Index;
use crate::{Index, Result};
use super::{ClearDocuments, DeleteDocuments, IndexDocuments, Settings, Facets};
pub struct UpdateBuilder<'a> {
@ -76,7 +76,7 @@ impl<'a> UpdateBuilder<'a> {
self,
wtxn: &'t mut heed::RwTxn<'i, 'u>,
index: &'i Index,
) -> anyhow::Result<DeleteDocuments<'t, 'u, 'i>>
) -> Result<DeleteDocuments<'t, 'u, 'i>>
{
DeleteDocuments::new(wtxn, index, self.update_id)
}

View File

@ -5,6 +5,7 @@ use fst::Streamer;
use grenad::CompressionType;
use heed::types::ByteSlice;
use crate::Result;
use crate::update::index_documents::WriteMethod;
use crate::update::index_documents::{
create_sorter, roaring_bitmap_merge, sorter_into_lmdb_database,
@ -33,7 +34,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
}
}
pub fn execute(self) -> anyhow::Result<()> {
pub fn execute(self) -> Result<()> {
// Clear the word prefix docids database.
self.index.word_prefix_docids.clear(self.wtxn)?;

View File

@ -7,7 +7,7 @@ use heed::BytesEncode;
use heed::types::ByteSlice;
use log::debug;
use crate::Index;
use crate::{Index, Result};
use crate::heed_codec::StrStrU8Codec;
use crate::update::index_documents::{
WriteMethod, create_sorter, sorter_into_lmdb_database,
@ -41,7 +41,7 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
}
}
pub fn execute(self) -> anyhow::Result<()> {
pub fn execute(self) -> Result<()> {
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
self.index.word_prefix_pair_proximity_docids.clear(self.wtxn)?;

View File

@ -11,7 +11,9 @@ use heed::{BytesEncode, Error};
use log::debug;
use roaring::RoaringBitmap;
use crate::error::InternalError;
use crate::heed_codec::{StrLevelPositionCodec, CboRoaringBitmapCodec};
use crate::Result;
use crate::update::index_documents::WriteMethod;
use crate::update::index_documents::{
create_writer, create_sorter, writer_into_reader, write_into_lmdb_database,
@ -56,7 +58,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> {
self
}
pub fn execute(self) -> anyhow::Result<()> {
pub fn execute(self) -> Result<()> {
debug!("Computing and writing the word levels positions docids into LMDB on disk...");
let entries = compute_positions_levels(
@ -78,7 +80,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> {
self.wtxn,
*self.index.word_level_position_docids.as_polymorph(),
entries,
|_, _| anyhow::bail!("invalid word level position merging"),
|_, _| Err(InternalError::IndexingMergingKeys { process: "word level position" }),
WriteMethod::Append,
)?;
@ -142,7 +144,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> {
self.wtxn,
*self.index.word_prefix_level_position_docids.as_polymorph(),
entries,
|_, _| anyhow::bail!("invalid word prefix level position merging"),
|_, _| Err(InternalError::IndexingMergingKeys { process: "word prefix level position" }),
WriteMethod::Append,
)?;
@ -174,7 +176,7 @@ fn compute_positions_levels(
shrink_size: Option<u64>,
level_group_size: NonZeroU32,
min_level_size: NonZeroU32,
) -> anyhow::Result<Reader<FileFuse>>
) -> Result<Reader<FileFuse>>
{
// It is forbidden to keep a cursor and write in a database at the same time with LMDB
// therefore we write the facet levels entries into a grenad file before transfering them.
@ -251,7 +253,7 @@ fn write_level_entry(
left: u32,
right: u32,
ids: &RoaringBitmap,
) -> anyhow::Result<()>
) -> Result<()>
{
let key = (word, level, left, right);
let key = StrLevelPositionCodec::bytes_encode(&key).ok_or(Error::Encoding)?;

View File

@ -2,7 +2,7 @@ use std::iter::FromIterator;
use std::str;
use fst::Streamer;
use crate::{Index, SmallString32};
use crate::{Index, SmallString32, Result};
pub struct WordsPrefixesFst<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
@ -48,7 +48,7 @@ impl<'t, 'u, 'i> WordsPrefixesFst<'t, 'u, 'i> {
self
}
pub fn execute(self) -> anyhow::Result<()> {
pub fn execute(self) -> Result<()> {
let words_fst = self.index.words_fst(&self.wtxn)?;
let number_of_words = words_fst.len();
let min_number_of_words = (number_of_words as f64 * self.threshold) as usize;