squash-me

This commit is contained in:
qdequele
2020-01-10 18:20:30 +01:00
parent 2ee90a891c
commit bbe1845f66
20 changed files with 1118 additions and 676 deletions

View File

@@ -1,7 +1,7 @@
use std::cmp::Ordering;
use std::error::Error;
use std::fmt;
use meilisearch_schema::{Schema, SchemaAttr};
use meilisearch_schema::{Schema, FieldId};
use crate::{RankedMap, RawDocument};
use super::{Criterion, Context};
@@ -41,7 +41,7 @@ use super::{Criterion, Context};
/// ```
pub struct SortByAttr<'a> {
ranked_map: &'a RankedMap,
attr: SchemaAttr,
field_id: FieldId,
reversed: bool,
}
@@ -68,18 +68,18 @@ impl<'a> SortByAttr<'a> {
attr_name: &str,
reversed: bool,
) -> Result<SortByAttr<'a>, SortByAttrError> {
let attr = match schema.attribute(attr_name) {
Some(attr) => attr,
let field_id = match schema.get_id(attr_name) {
Some(field_id) => *field_id,
None => return Err(SortByAttrError::AttributeNotFound),
};
if !schema.props(attr).is_ranked() {
if !schema.id_is_ranked(field_id) {
return Err(SortByAttrError::AttributeNotRegisteredForRanking);
}
Ok(SortByAttr {
ranked_map,
attr,
field_id,
reversed,
})
}
@@ -91,8 +91,8 @@ impl Criterion for SortByAttr<'_> {
}
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
let lhs = self.ranked_map.get(lhs.id, self.attr);
let rhs = self.ranked_map.get(rhs.id, self.attr);
let lhs = self.ranked_map.get(lhs.id, self.field_id);
let rhs = self.ranked_map.get(rhs.id, self.field_id);
match (lhs, rhs) {
(Some(lhs), Some(rhs)) => {

View File

@@ -12,6 +12,7 @@ pub enum Error {
SchemaMissing,
WordIndexMissing,
MissingDocumentId,
MaxFieldsLimitExceeded,
Zlmdb(heed::Error),
Fst(fst::Error),
SerdeJson(SerdeJsonError),

View File

@@ -0,0 +1,94 @@
use std::io::{Read, Write};
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use crate::{MResult, Error};
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct FieldsMap {
name_map: HashMap<String, u16>,
id_map: HashMap<u16, String>,
next_id: u16
}
impl FieldsMap {
pub fn len(&self) -> usize {
self.name_map.len()
}
pub fn is_empty(&self) -> bool {
self.name_map.is_empty()
}
pub fn insert<T: ToString>(&mut self, name: T) -> MResult<u16> {
let name = name.to_string();
if let Some(id) = self.name_map.get(&name) {
return Ok(*id)
}
let id = self.next_id;
if self.next_id.checked_add(1).is_none() {
return Err(Error::MaxFieldsLimitExceeded)
} else {
self.next_id += 1;
}
self.name_map.insert(name.clone(), id);
self.id_map.insert(id, name);
Ok(id)
}
pub fn remove<T: ToString>(&mut self, name: T) {
let name = name.to_string();
if let Some(id) = self.name_map.get(&name) {
self.id_map.remove(&id);
}
self.name_map.remove(&name);
}
pub fn get_id<T: ToString>(&self, name: T) -> Option<&u16> {
let name = name.to_string();
self.name_map.get(&name)
}
pub fn get_name(&self, id: u16) -> Option<&String> {
self.id_map.get(&id)
}
pub fn read_from_bin<R: Read>(reader: R) -> bincode::Result<FieldsMap> {
bincode::deserialize_from(reader)
}
pub fn write_to_bin<W: Write>(&self, writer: W) -> bincode::Result<()> {
bincode::serialize_into(writer, &self)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn fields_map() {
let mut fields_map = FieldsMap::default();
assert_eq!(fields_map.insert("id").unwrap(), 0);
assert_eq!(fields_map.insert("title").unwrap(), 1);
assert_eq!(fields_map.insert("descritpion").unwrap(), 2);
assert_eq!(fields_map.insert("id").unwrap(), 0);
assert_eq!(fields_map.insert("title").unwrap(), 1);
assert_eq!(fields_map.insert("descritpion").unwrap(), 2);
assert_eq!(fields_map.get_id("id"), Some(&0));
assert_eq!(fields_map.get_id("title"), Some(&1));
assert_eq!(fields_map.get_id("descritpion"), Some(&2));
assert_eq!(fields_map.get_id("date"), None);
assert_eq!(fields_map.len(), 3);
assert_eq!(fields_map.get_name(0), Some(&"id".to_owned()));
assert_eq!(fields_map.get_name(1), Some(&"title".to_owned()));
assert_eq!(fields_map.get_name(2), Some(&"descritpion".to_owned()));
assert_eq!(fields_map.get_name(4), None);
fields_map.remove("title");
assert_eq!(fields_map.get_id("title"), None);
assert_eq!(fields_map.insert("title").unwrap(), 3);
assert_eq!(fields_map.len(), 3);
}
}

View File

@@ -16,7 +16,8 @@ mod ranked_map;
mod raw_document;
mod reordered_attrs;
mod update;
mod settings;
// mod fields_map;
pub mod settings;
pub mod criterion;
pub mod raw_indexer;
pub mod serde;
@@ -26,6 +27,7 @@ pub use self::database::{BoxUpdateFn, Database, MainT, UpdateT};
pub use self::error::{Error, MResult};
pub use self::number::{Number, ParseNumberError};
pub use self::ranked_map::RankedMap;
// pub use self::fields_map::FieldsMap;
pub use self::raw_document::RawDocument;
pub use self::store::Index;
pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus, UpdateType};

View File

@@ -1,14 +1,14 @@
use std::io::{Read, Write};
use hashbrown::HashMap;
use meilisearch_schema::SchemaAttr;
use meilisearch_schema::FieldId;
use serde::{Deserialize, Serialize};
use crate::{DocumentId, Number};
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(transparent)]
pub struct RankedMap(HashMap<(DocumentId, SchemaAttr), Number>);
pub struct RankedMap(HashMap<(DocumentId, FieldId), Number>);
impl RankedMap {
pub fn len(&self) -> usize {
@@ -19,15 +19,15 @@ impl RankedMap {
self.0.is_empty()
}
pub fn insert(&mut self, document: DocumentId, attribute: SchemaAttr, number: Number) {
pub fn insert(&mut self, document: DocumentId, attribute: FieldId, number: Number) {
self.0.insert((document, attribute), number);
}
pub fn remove(&mut self, document: DocumentId, attribute: SchemaAttr) {
pub fn remove(&mut self, document: DocumentId, attribute: FieldId) {
self.0.remove(&(document, attribute));
}
pub fn get(&self, document: DocumentId, attribute: SchemaAttr) -> Option<Number> {
pub fn get(&self, document: DocumentId, attribute: FieldId) -> Option<Number> {
self.0.get(&(document, attribute)).cloned()
}

View File

@@ -3,7 +3,7 @@ use std::convert::TryFrom;
use crate::{DocIndex, DocumentId};
use deunicode::deunicode_with_tofu;
use meilisearch_schema::SchemaAttr;
use meilisearch_schema::IndexedPos;
use meilisearch_tokenizer::{is_cjk, SeqTokenizer, Token, Tokenizer};
use sdset::SetBuf;
@@ -37,14 +37,14 @@ impl RawIndexer {
}
}
pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) -> usize {
pub fn index_text(&mut self, id: DocumentId, indexed_pos: IndexedPos, text: &str) -> usize {
let mut number_of_words = 0;
for token in Tokenizer::new(text) {
let must_continue = index_token(
token,
id,
attr,
indexed_pos,
self.word_limit,
&self.stop_words,
&mut self.words_doc_indexes,
@@ -61,7 +61,7 @@ impl RawIndexer {
number_of_words
}
pub fn index_text_seq<'a, I>(&mut self, id: DocumentId, attr: SchemaAttr, iter: I)
pub fn index_text_seq<'a, I>(&mut self, id: DocumentId, indexed_pos: IndexedPos, iter: I)
where
I: IntoIterator<Item = &'a str>,
{
@@ -70,7 +70,7 @@ impl RawIndexer {
let must_continue = index_token(
token,
id,
attr,
indexed_pos,
self.word_limit,
&self.stop_words,
&mut self.words_doc_indexes,
@@ -110,7 +110,7 @@ impl RawIndexer {
fn index_token(
token: Token,
id: DocumentId,
attr: SchemaAttr,
indexed_pos: IndexedPos,
word_limit: usize,
stop_words: &fst::Set,
words_doc_indexes: &mut BTreeMap<Word, Vec<DocIndex>>,
@@ -127,7 +127,7 @@ fn index_token(
};
if !stop_words.contains(&token.word) {
match token_to_docindex(id, attr, token) {
match token_to_docindex(id, indexed_pos, token) {
Some(docindex) => {
let word = Vec::from(token.word);
@@ -160,14 +160,14 @@ fn index_token(
true
}
fn token_to_docindex(id: DocumentId, attr: SchemaAttr, token: Token) -> Option<DocIndex> {
fn token_to_docindex(id: DocumentId, indexed_pos: IndexedPos, token: Token) -> Option<DocIndex> {
let word_index = u16::try_from(token.word_index).ok()?;
let char_index = u16::try_from(token.char_index).ok()?;
let char_length = u16::try_from(token.word.chars().count()).ok()?;
let docindex = DocIndex {
document_id: id,
attribute: attr.0,
attribute: indexed_pos.0,
word_index,
char_index,
char_length,
@@ -178,7 +178,9 @@ fn token_to_docindex(id: DocumentId, attr: SchemaAttr, token: Token) -> Option<D
#[cfg(test)]
mod tests {
use super::*;
use meilisearch_schema::SchemaAttr;
#[test]
fn strange_apostrophe() {

View File

@@ -2,7 +2,7 @@ use std::collections::HashSet;
use std::io::Cursor;
use std::{error::Error, fmt};
use meilisearch_schema::{Schema, SchemaAttr};
use meilisearch_schema::{Schema, FieldId};
use serde::{de, forward_to_deserialize_any};
use serde_json::de::IoRead as SerdeJsonIoRead;
use serde_json::Deserializer as SerdeJsonDeserializer;
@@ -54,7 +54,7 @@ pub struct Deserializer<'a> {
pub reader: &'a heed::RoTxn<MainT>,
pub documents_fields: DocumentsFields,
pub schema: &'a Schema,
pub attributes: Option<&'a HashSet<SchemaAttr>>,
pub attributes: Option<&'a HashSet<FieldId>>,
}
impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
@@ -92,15 +92,17 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
}
};
let is_displayed = self.schema.props(attr).is_displayed();
let is_displayed = self.schema.id_is_displayed(attr);
if is_displayed && self.attributes.map_or(true, |f| f.contains(&attr)) {
let attribute_name = self.schema.attribute_name(attr);
if let Some(attribute_name) = self.schema.get_name(attr) {
let cursor = Cursor::new(value.to_owned());
let ioread = SerdeJsonIoRead::new(cursor);
let value = Value(SerdeJsonDeserializer::new(ioread));
Some((attribute_name, value))
Some((*attribute_name, value))
} else {
None
}
} else {
None
}

View File

@@ -1,4 +1,4 @@
use meilisearch_schema::SchemaAttr;
use meilisearch_schema::{IndexedPos};
use serde::ser;
use serde::Serialize;
@@ -7,7 +7,7 @@ use crate::raw_indexer::RawIndexer;
use crate::DocumentId;
pub struct Indexer<'a> {
pub attribute: SchemaAttr,
pub pos: IndexedPos,
pub indexer: &'a mut RawIndexer,
pub document_id: DocumentId,
}
@@ -85,7 +85,7 @@ impl<'a> ser::Serializer for Indexer<'a> {
fn serialize_str(self, text: &str) -> Result<Self::Ok, Self::Error> {
let number_of_words = self
.indexer
.index_text(self.document_id, self.attribute, text);
.index_text(self.document_id, self.pos, text);
Ok(Some(number_of_words))
}
@@ -104,7 +104,7 @@ impl<'a> ser::Serializer for Indexer<'a> {
let text = value.serialize(ConvertToString)?;
let number_of_words = self
.indexer
.index_text(self.document_id, self.attribute, &text);
.index_text(self.document_id, self.pos, &text);
Ok(Some(number_of_words))
}
@@ -153,7 +153,7 @@ impl<'a> ser::Serializer for Indexer<'a> {
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
let indexer = SeqIndexer {
attribute: self.attribute,
pos: self.pos,
document_id: self.document_id,
indexer: self.indexer,
texts: Vec::new(),
@@ -164,7 +164,7 @@ impl<'a> ser::Serializer for Indexer<'a> {
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
let indexer = TupleIndexer {
attribute: self.attribute,
pos: self.pos,
document_id: self.document_id,
indexer: self.indexer,
texts: Vec::new(),
@@ -197,7 +197,7 @@ impl<'a> ser::Serializer for Indexer<'a> {
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
let indexer = MapIndexer {
attribute: self.attribute,
pos: self.pos,
document_id: self.document_id,
indexer: self.indexer,
texts: Vec::new(),
@@ -212,7 +212,7 @@ impl<'a> ser::Serializer for Indexer<'a> {
_len: usize,
) -> Result<Self::SerializeStruct, Self::Error> {
let indexer = StructIndexer {
attribute: self.attribute,
pos: self.pos,
document_id: self.document_id,
indexer: self.indexer,
texts: Vec::new(),
@@ -235,7 +235,7 @@ impl<'a> ser::Serializer for Indexer<'a> {
}
pub struct SeqIndexer<'a> {
attribute: SchemaAttr,
pos: IndexedPos,
document_id: DocumentId,
indexer: &'a mut RawIndexer,
texts: Vec<String>,
@@ -257,13 +257,13 @@ impl<'a> ser::SerializeSeq for SeqIndexer<'a> {
fn end(self) -> Result<Self::Ok, Self::Error> {
let texts = self.texts.iter().map(String::as_str);
self.indexer
.index_text_seq(self.document_id, self.attribute, texts);
.index_text_seq(self.document_id, self.pos, texts);
Ok(None)
}
}
pub struct MapIndexer<'a> {
attribute: SchemaAttr,
pos: IndexedPos,
document_id: DocumentId,
indexer: &'a mut RawIndexer,
texts: Vec<String>,
@@ -294,13 +294,13 @@ impl<'a> ser::SerializeMap for MapIndexer<'a> {
fn end(self) -> Result<Self::Ok, Self::Error> {
let texts = self.texts.iter().map(String::as_str);
self.indexer
.index_text_seq(self.document_id, self.attribute, texts);
.index_text_seq(self.document_id, self.pos, texts);
Ok(None)
}
}
pub struct StructIndexer<'a> {
attribute: SchemaAttr,
pos: IndexedPos,
document_id: DocumentId,
indexer: &'a mut RawIndexer,
texts: Vec<String>,
@@ -328,13 +328,13 @@ impl<'a> ser::SerializeStruct for StructIndexer<'a> {
fn end(self) -> Result<Self::Ok, Self::Error> {
let texts = self.texts.iter().map(String::as_str);
self.indexer
.index_text_seq(self.document_id, self.attribute, texts);
.index_text_seq(self.document_id, self.pos, texts);
Ok(None)
}
}
pub struct TupleIndexer<'a> {
attribute: SchemaAttr,
pos: IndexedPos,
document_id: DocumentId,
indexer: &'a mut RawIndexer,
texts: Vec<String>,
@@ -356,7 +356,7 @@ impl<'a> ser::SerializeTuple for TupleIndexer<'a> {
fn end(self) -> Result<Self::Ok, Self::Error> {
let texts = self.texts.iter().map(String::as_str);
self.indexer
.index_text_seq(self.document_id, self.attribute, texts);
.index_text_seq(self.document_id, self.pos, texts);
Ok(None)
}
}

View File

@@ -26,6 +26,7 @@ use std::{error::Error, fmt};
use serde::ser;
use serde_json::Error as SerdeJsonError;
use meilisearch_schema::Error as SchemaError;
use crate::ParseNumberError;
@@ -36,6 +37,7 @@ pub enum SerializerError {
Zlmdb(heed::Error),
SerdeJson(SerdeJsonError),
ParseNumber(ParseNumberError),
Schema(SchemaError),
UnserializableType { type_name: &'static str },
UnindexableType { type_name: &'static str },
UnrankableType { type_name: &'static str },
@@ -62,6 +64,7 @@ impl fmt::Display for SerializerError {
SerializerError::ParseNumber(e) => {
write!(f, "error while trying to parse a number: {}", e)
}
SerializerError::Schema(e) => write!(f, "impossible to update schema: {}", e),
SerializerError::UnserializableType { type_name } => {
write!(f, "{} is not a serializable type", type_name)
}
@@ -101,3 +104,9 @@ impl From<ParseNumberError> for SerializerError {
SerializerError::ParseNumber(error)
}
}
impl From<SchemaError> for SerializerError {
fn from(error: SchemaError) -> SerializerError {
SerializerError::Schema(error)
}
}

View File

@@ -1,4 +1,4 @@
use meilisearch_schema::{Schema, SchemaAttr, SchemaProps};
use meilisearch_schema::{Schema, FieldsMap};
use serde::ser;
use crate::database::MainT;
@@ -15,6 +15,7 @@ pub struct Serializer<'a, 'b> {
pub document_fields_counts: DocumentsFieldsCounts,
pub indexer: &'a mut RawIndexer,
pub ranked_map: &'a mut RankedMap,
pub fields_map: &'a mut FieldsMap,
pub document_id: DocumentId,
}
@@ -158,6 +159,7 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
document_fields_counts: self.document_fields_counts,
indexer: self.indexer,
ranked_map: self.ranked_map,
fields_map: self.fields_map,
current_key_name: None,
})
}
@@ -175,6 +177,7 @@ impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
document_fields_counts: self.document_fields_counts,
indexer: self.indexer,
ranked_map: self.ranked_map,
fields_map: self.fields_map,
})
}
@@ -199,6 +202,7 @@ pub struct MapSerializer<'a, 'b> {
document_fields_counts: DocumentsFieldsCounts,
indexer: &'a mut RawIndexer,
ranked_map: &'a mut RankedMap,
fields_map: &'a mut FieldsMap,
current_key_name: Option<String>,
}
@@ -243,6 +247,7 @@ impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> {
self.document_fields_counts,
self.indexer,
self.ranked_map,
self.fields_map,
value,
),
None => Ok(()),
@@ -262,6 +267,7 @@ pub struct StructSerializer<'a, 'b> {
document_fields_counts: DocumentsFieldsCounts,
indexer: &'a mut RawIndexer,
ranked_map: &'a mut RankedMap,
fields_map: &'a mut FieldsMap,
}
impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
@@ -276,8 +282,16 @@ impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
where
T: ser::Serialize,
{
match self.schema.attribute(key) {
Some(attribute) => serialize_value(
// let id = fields_map.insert(key)?;
// let attribute = match self.schema.attribute(id) {
// Some(attribute) => attribute,
// None => {
// },
// }
serialize_value(
self.txn,
attribute,
self.schema.props(attribute),
@@ -287,9 +301,7 @@ impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
self.indexer,
self.ranked_map,
value,
),
None => Ok(()),
}
)
}
fn end(self) -> Result<Self::Ok, Self::Error> {
@@ -297,10 +309,10 @@ impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
}
}
pub fn serialize_value<T: ?Sized>(
pub fn serialize_value<'a, T: ?Sized>(
txn: &mut heed::RwTxn<MainT>,
attribute: SchemaAttr,
props: SchemaProps,
attribute: &'static str,
schema: &'a Schema,
document_id: DocumentId,
document_store: DocumentsFields,
documents_fields_counts: DocumentsFieldsCounts,
@@ -312,11 +324,12 @@ where
T: ser::Serialize,
{
let serialized = serde_json::to_vec(value)?;
document_store.put_document_field(txn, document_id, attribute, &serialized)?;
let field_id = schema.get_or_create(attribute)?;
document_store.put_document_field(txn, document_id, field_id, &serialized)?;
if props.is_indexed() {
if let Some(indexed_pos) = schema.id_is_indexed(field_id) {
let indexer = Indexer {
attribute,
field_id,
indexer,
document_id,
};
@@ -324,15 +337,15 @@ where
documents_fields_counts.put_document_field_count(
txn,
document_id,
attribute,
field_id,
number_of_words as u16,
)?;
}
}
if props.is_ranked() {
if let Some(field_id) = schema.id_is_ranked(field_id) {
let number = value.serialize(ConvertToNumber)?;
ranked_map.insert(document_id, attribute, number);
ranked_map.insert(document_id, field_id, number);
}
Ok(())

View File

@@ -3,7 +3,7 @@ use crate::database::MainT;
use crate::DocumentId;
use heed::types::OwnedType;
use heed::Result as ZResult;
use meilisearch_schema::SchemaAttr;
use meilisearch_schema::FieldId;
#[derive(Copy, Clone)]
pub struct DocumentsFieldsCounts {
@@ -15,7 +15,7 @@ impl DocumentsFieldsCounts {
self,
writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId,
attribute: SchemaAttr,
attribute: FieldId,
value: u16,
) -> ZResult<()> {
let key = DocumentAttrKey::new(document_id, attribute);
@@ -27,8 +27,8 @@ impl DocumentsFieldsCounts {
writer: &mut heed::RwTxn<MainT>,
document_id: DocumentId,
) -> ZResult<usize> {
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
let start = DocumentAttrKey::new(document_id, FieldId::min());
let end = DocumentAttrKey::new(document_id, FieldId::max());
self.documents_fields_counts
.delete_range(writer, &(start..=end))
}
@@ -41,7 +41,7 @@ impl DocumentsFieldsCounts {
self,
reader: &heed::RoTxn<MainT>,
document_id: DocumentId,
attribute: SchemaAttr,
attribute: FieldId,
) -> ZResult<Option<u16>> {
let key = DocumentAttrKey::new(document_id, attribute);
match self.documents_fields_counts.get(reader, &key)? {
@@ -55,8 +55,8 @@ impl DocumentsFieldsCounts {
reader: &'txn heed::RoTxn<MainT>,
document_id: DocumentId,
) -> ZResult<DocumentFieldsCountsIter<'txn>> {
let start = DocumentAttrKey::new(document_id, SchemaAttr::min());
let end = DocumentAttrKey::new(document_id, SchemaAttr::max());
let start = DocumentAttrKey::new(document_id, FieldId::min());
let end = DocumentAttrKey::new(document_id, FieldId::max());
let iter = self.documents_fields_counts.range(reader, &(start..=end))?;
Ok(DocumentFieldsCountsIter { iter })
}
@@ -83,12 +83,12 @@ pub struct DocumentFieldsCountsIter<'txn> {
}
impl Iterator for DocumentFieldsCountsIter<'_> {
type Item = ZResult<(SchemaAttr, u16)>;
type Item = ZResult<(FieldId, u16)>;
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(Ok((key, count))) => {
let attr = SchemaAttr(key.attr.get());
let attr = FieldId(key.attr.get());
Some(Ok((attr, count)))
}
Some(Err(e)) => Some(Err(e)),
@@ -127,13 +127,13 @@ pub struct AllDocumentsFieldsCountsIter<'txn> {
}
impl Iterator for AllDocumentsFieldsCountsIter<'_> {
type Item = ZResult<(DocumentId, SchemaAttr, u16)>;
type Item = ZResult<(DocumentId, FieldId, u16)>;
fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() {
Some(Ok((key, count))) => {
let docid = DocumentId(key.docid.get());
let attr = SchemaAttr(key.attr.get());
let attr = FieldId(key.attr.get());
Some(Ok((docid, attr, count)))
}
Some(Err(e)) => Some(Err(e)),

View File

@@ -1,3 +1,4 @@
use crate::fields_map::FieldsMap;
use crate::database::MainT;
use crate::RankedMap;
use chrono::{DateTime, Utc};
@@ -17,6 +18,7 @@ const FIELDS_FREQUENCY_KEY: &str = "fields-frequency";
const NAME_KEY: &str = "name";
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
const RANKED_MAP_KEY: &str = "ranked-map";
const FIELDS_MAP_KEY: &str = "fields-map";
const SCHEMA_KEY: &str = "schema";
const UPDATED_AT_KEY: &str = "updated-at";
const WORDS_KEY: &str = "words";
@@ -112,6 +114,16 @@ impl Main {
.get::<_, Str, SerdeBincode<RankedMap>>(reader, RANKED_MAP_KEY)
}
pub fn put_fields_map(self, writer: &mut heed::RwTxn<MainT>, fields_map: &FieldsMap) -> ZResult<()> {
self.main
.put::<_, Str, SerdeBincode<FieldsMap>>(writer, FIELDS_MAP_KEY, &fields_map)
}
pub fn fields_map(self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<FieldsMap>> {
self.main
.get::<_, Str, SerdeBincode<FieldsMap>>(reader, FIELDS_MAP_KEY)
}
pub fn put_synonyms_fst(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set) -> ZResult<()> {
let bytes = fst.as_fst().as_bytes();
self.main.put::<_, Str, ByteSlice>(writer, SYNONYMS_KEY, bytes)

View File

@@ -206,11 +206,10 @@ impl Index {
let schema = self.main.schema(reader)?;
let schema = schema.ok_or(Error::SchemaMissing)?;
// let attributes = attributes.map(|a| a.iter().filter_map(|name| schema.get_id(*name)).collect());
let attributes = match attributes {
Some(attributes) => attributes
.iter()
.map(|name| schema.attribute(name))
.collect(),
Some(attributes) => Some(attributes.iter().filter_map(|name| schema.get_id(*name)).collect()),
None => None,
};

View File

@@ -147,6 +147,8 @@ pub fn apply_documents_addition<'a, 'b>(
None => fst::Set::default(),
};
let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default();
// 3. index the documents fields in the stores
let mut indexer = RawIndexer::new(stop_words);
@@ -158,6 +160,7 @@ pub fn apply_documents_addition<'a, 'b>(
document_fields_counts: index.documents_fields_counts,
indexer: &mut indexer,
ranked_map: &mut ranked_map,
fields_map: &mut fields_map,
document_id,
};
@@ -238,6 +241,8 @@ pub fn apply_documents_partial_addition<'a, 'b>(
None => fst::Set::default(),
};
let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default();
// 3. index the documents fields in the stores
let mut indexer = RawIndexer::new(stop_words);
@@ -249,6 +254,7 @@ pub fn apply_documents_partial_addition<'a, 'b>(
document_fields_counts: index.documents_fields_counts,
indexer: &mut indexer,
ranked_map: &mut ranked_map,
fields_map: &mut fields_map,
document_id,
};
@@ -275,6 +281,7 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
};
let mut ranked_map = RankedMap::default();
let mut fields_map = main_store.fields_map(writer)?.unwrap_or_default();
// 1. retrieve all documents ids
let mut documents_ids_to_reindex = Vec::new();
@@ -318,6 +325,7 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
index.documents_fields_counts,
&mut indexer,
&mut ranked_map,
&mut fields_map,
&value,
)?;
}

View File

@@ -1,6 +1,9 @@
use std::collections::BTreeSet;
use http::StatusCode;
use tide::response::IntoResponse;
use tide::{Context, Response};
use meilisearch_core::settings::{SettingsUpdate, UpdateState};
use crate::error::{ResponseError, SResult};
use crate::helpers::tide::ContextExt;
@@ -33,18 +36,17 @@ pub async fn update(mut ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(SettingsRead)?;
let index = ctx.index()?;
let data: Vec<String> = ctx.body_json().await.map_err(ResponseError::bad_request)?;
let data: BTreeSet<String> = ctx.body_json().await.map_err(ResponseError::bad_request)?;
let db = &ctx.state().db;
let mut writer = db.update_write_txn().map_err(ResponseError::internal)?;
let mut stop_words_update = index.stop_words_update();
for stop_word in data {
stop_words_update.add_stop_word(stop_word);
}
let settings = SettingsUpdate {
stop_words: UpdateState::Update(data),
.. SettingsUpdate::default()
};
let update_id = stop_words_update
.finalize(&mut writer)
let update_id = index.settings_update(&mut writer, settings)
.map_err(ResponseError::internal)?;
writer.commit().map_err(ResponseError::internal)?;
@@ -62,10 +64,12 @@ pub async fn delete(ctx: Context<Data>) -> SResult<Response> {
let db = &ctx.state().db;
let mut writer = db.update_write_txn().map_err(ResponseError::internal)?;
let stop_words_deletion = index.stop_words_update();
let settings = SettingsUpdate {
stop_words: UpdateState::Clear,
.. SettingsUpdate::default()
};
let update_id = stop_words_deletion
.finalize(&mut writer)
let update_id = index.settings_update(&mut writer, settings)
.map_err(ResponseError::internal)?;
writer.commit().map_err(ResponseError::internal)?;

View File

@@ -1,9 +1,10 @@
use std::collections::HashMap;
use std::collections::BTreeMap;
use http::StatusCode;
use tide::response::IntoResponse;
use tide::{Context, Response};
use indexmap::IndexMap;
use meilisearch_core::settings::{SettingsUpdate, UpdateState};
use crate::error::{ResponseError, SResult};
use crate::helpers::tide::ContextExt;
@@ -47,21 +48,19 @@ pub async fn get(ctx: Context<Data>) -> SResult<Response> {
pub async fn update(mut ctx: Context<Data>) -> SResult<Response> {
ctx.is_allowed(SettingsWrite)?;
let data: HashMap<String, Vec<String>> = ctx.body_json().await.map_err(ResponseError::bad_request)?;
let data: BTreeMap<String, Vec<String>> = ctx.body_json().await.map_err(ResponseError::bad_request)?;
let index = ctx.index()?;
let db = &ctx.state().db;
let mut writer = db.update_write_txn().map_err(ResponseError::internal)?;
let mut synonyms_update = index.synonyms_update();
let settings = SettingsUpdate {
synonyms: UpdateState::Update(data),
.. SettingsUpdate::default()
};
for (input, synonyms) in data {
synonyms_update.add_synonym(input, synonyms.into_iter());
}
let update_id = synonyms_update
.finalize(&mut writer)
let update_id = index.settings_update(&mut writer, settings)
.map_err(ResponseError::internal)?;
writer.commit().map_err(ResponseError::internal)?;
@@ -86,10 +85,7 @@ pub async fn delete(ctx: Context<Data>) -> SResult<Response> {
.. SettingsUpdate::default()
};
let synonyms_update = index.synonyms_update();
let update_id = synonyms_update
.finalize(&mut writer)
let update_id = index.settings_update(&mut writer, settings)
.map_err(ResponseError::internal)?;
writer.commit().map_err(ResponseError::internal)?;

View File

@@ -0,0 +1,20 @@
use std::{error, fmt};
pub type SResult<T> = Result<T, Error>;
#[derive(Debug)]
pub enum Error {
MaxFieldsLimitExceeded,
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Error::*;
match self {
MaxFieldsLimitExceeded => write!(f, "The maximum of possible reatributed field id has been reached"),
}
}
}
impl error::Error for Error {}

View File

@@ -0,0 +1,91 @@
use std::io::{Read, Write};
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use crate::{SResult, SchemaAttr};
pub type FieldId = SchemaAttr;
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct FieldsMap {
name_map: HashMap<String, FieldId>,
id_map: HashMap<FieldId, String>,
next_id: FieldId
}
impl FieldsMap {
pub fn len(&self) -> usize {
self.name_map.len()
}
pub fn is_empty(&self) -> bool {
self.name_map.is_empty()
}
pub fn insert<S: Into<String>>(&mut self, name: S) -> SResult<FieldId> {
let name = name.into();
if let Some(id) = self.name_map.get(&name) {
return Ok(*id)
}
let id = self.next_id.into();
self.next_id = self.next_id.next()?;
self.name_map.insert(name.clone(), id);
self.id_map.insert(id, name);
Ok(id)
}
pub fn remove<S: Into<String>>(&mut self, name: S) {
let name = name.into();
if let Some(id) = self.name_map.get(&name) {
self.id_map.remove(&id);
}
self.name_map.remove(&name);
}
pub fn get_id<S: Into<String>>(&self, name: S) -> Option<&FieldId> {
let name = name.into();
self.name_map.get(&name)
}
pub fn get_name<I: Into<SchemaAttr>>(&self, id: I) -> Option<&String> {
self.id_map.get(&id.into())
}
pub fn read_from_bin<R: Read>(reader: R) -> bincode::Result<FieldsMap> {
bincode::deserialize_from(reader)
}
pub fn write_to_bin<W: Write>(&self, writer: W) -> bincode::Result<()> {
bincode::serialize_into(writer, &self)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn fields_map() {
let mut fields_map = FieldsMap::default();
assert_eq!(fields_map.insert("id").unwrap(), 0.into());
assert_eq!(fields_map.insert("title").unwrap(), 1.into());
assert_eq!(fields_map.insert("descritpion").unwrap(), 2.into());
assert_eq!(fields_map.insert("id").unwrap(), 0.into());
assert_eq!(fields_map.insert("title").unwrap(), 1.into());
assert_eq!(fields_map.insert("descritpion").unwrap(), 2.into());
assert_eq!(fields_map.get_id("id"), Some(&0.into()));
assert_eq!(fields_map.get_id("title"), Some(&1.into()));
assert_eq!(fields_map.get_id("descritpion"), Some(&2.into()));
assert_eq!(fields_map.get_id("date"), None);
assert_eq!(fields_map.len(), 3);
assert_eq!(fields_map.get_name(0), Some(&"id".to_owned()));
assert_eq!(fields_map.get_name(1), Some(&"title".to_owned()));
assert_eq!(fields_map.get_name(2), Some(&"descritpion".to_owned()));
assert_eq!(fields_map.get_name(4), None);
fields_map.remove("title");
assert_eq!(fields_map.get_id("title"), None);
assert_eq!(fields_map.insert("title").unwrap(), 3.into());
assert_eq!(fields_map.len(), 3);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,141 @@
use std::collections::{HashMap, HashSet};
use crate::{FieldsMap, FieldId, SResult, SchemaAttr};
pub type IndexedPos = SchemaAttr;
#[derive(Default)]
pub struct Schema {
fields_map: FieldsMap,
identifier: FieldId,
ranked: HashSet<FieldId>,
displayed: HashSet<FieldId>,
indexed: Vec<FieldId>,
indexed_map: HashMap<FieldId, IndexedPos>,
}
impl Schema {
pub fn with_identifier<S: Into<String>>(name: S) -> Schema {
let mut schema = Schema::default();
let field_id = schema.fields_map.insert(name.into()).unwrap();
schema.identifier = field_id;
schema
}
pub fn identifier(&self) -> String {
self.fields_map.get_name(self.identifier).unwrap().to_string()
}
pub fn get_id<S: Into<String>>(&self, name: S) -> Option<&FieldId> {
self.fields_map.get_id(name)
}
pub fn get_name<I: Into<SchemaAttr>>(&self, id: I) -> Option<&String> {
self.fields_map.get_name(id)
}
pub fn contains<S: Into<String>>(&self, name: S) -> bool {
match self.fields_map.get_id(name.into()) {
Some(_) => true,
None => false,
}
}
pub fn get_or_create_empty<S: Into<String>>(&mut self, name: S) -> SResult<FieldId> {
self.fields_map.insert(name)
}
pub fn get_or_create<S: Into<String> + std::clone::Clone>(&mut self, name: S) -> SResult<FieldId> {
match self.fields_map.get_id(name.clone()) {
Some(id) => {
Ok(*id)
}
None => {
self.set_indexed(name.clone())?;
self.set_displayed(name)
}
}
}
pub fn set_ranked<S: Into<String>>(&mut self, name: S) -> SResult<FieldId> {
let id = self.fields_map.insert(name.into())?;
self.ranked.insert(id);
Ok(id)
}
pub fn set_displayed<S: Into<String>>(&mut self, name: S) -> SResult<FieldId> {
let id = self.fields_map.insert(name.into())?;
self.displayed.insert(id);
Ok(id)
}
pub fn set_indexed<S: Into<String>>(&mut self, name: S) -> SResult<(FieldId, IndexedPos)> {
let id = self.fields_map.insert(name.into())?;
let pos = self.indexed.len() as u16;
self.indexed.push(id);
self.indexed_map.insert(id, pos.into());
Ok((id, pos.into()))
}
pub fn is_ranked<S: Into<String>>(&self, name: S) -> Option<&FieldId> {
match self.fields_map.get_id(name.into()) {
Some(id) => self.ranked.get(id),
None => None,
}
}
pub fn is_displayed<S: Into<String>>(&self, name: S) -> Option<&FieldId> {
match self.fields_map.get_id(name.into()) {
Some(id) => self.displayed.get(id),
None => None,
}
}
pub fn is_indexed<S: Into<String>>(&self, name: S) -> Option<&IndexedPos> {
match self.fields_map.get_id(name.into()) {
Some(id) => self.indexed_map.get(id),
None => None,
}
}
pub fn id_is_ranked(&self, id: FieldId) -> bool {
self.ranked.get(&id).is_some()
}
pub fn id_is_displayed(&self, id: FieldId) -> bool {
self.displayed.get(&id).is_some()
}
pub fn id_is_indexed(&self, id: FieldId) -> Option<&IndexedPos> {
self.indexed_map.get(&id)
}
pub fn update_ranked<S: Into<String>>(&mut self, data: impl IntoIterator<Item = S>) -> SResult<()> {
self.ranked = HashSet::new();
for name in data {
self.set_ranked(name)?;
}
Ok(())
}
pub fn update_displayed<S: Into<String>>(&mut self, data: impl IntoIterator<Item = S>) -> SResult<()> {
self.displayed = HashSet::new();
for name in data {
self.set_displayed(name)?;
}
Ok(())
}
pub fn update_indexed<S: Into<String>>(&mut self, data: Vec<S>) -> SResult<()> {
self.indexed = Vec::new();
self.indexed_map = HashMap::new();
for name in data {
self.set_indexed(name)?;
}
Ok(())
}
}