mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Move the helper function to their own module
This commit is contained in:
		| @@ -1,25 +1,20 @@ | ||||
| use std::collections::HashMap; | ||||
| use std::fmt::Write as _; | ||||
| use std::hash::{Hash, Hasher}; | ||||
|  | ||||
| use fst::{set::OpBuilder, SetBuilder}; | ||||
| use indexmap::IndexMap; | ||||
| use sdset::{duo::Union, SetOperation}; | ||||
| use serde::Deserialize; | ||||
| use serde_json::Value; | ||||
| use siphasher::sip::SipHasher; | ||||
|  | ||||
| use meilisearch_types::DocumentId; | ||||
| use meilisearch_schema::IndexedPos; | ||||
|  | ||||
| use crate::database::{MainT, UpdateT}; | ||||
| use crate::database::{UpdateEvent, UpdateEventsEmitter}; | ||||
| use crate::facets; | ||||
| use crate::raw_indexer::RawIndexer; | ||||
| use crate::serde::{Deserializer, SerializerError}; | ||||
| use crate::serde::Deserializer; | ||||
| use crate::store; | ||||
| use crate::update::helpers::{index_value, value_to_number, extract_document_id}; | ||||
| use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update}; | ||||
| use crate::{Error, Number, MResult, RankedMap}; | ||||
| use crate::{Error, MResult, RankedMap}; | ||||
|  | ||||
| pub struct DocumentsAddition<D> { | ||||
|     updates_store: store::Updates, | ||||
| @@ -111,121 +106,6 @@ pub fn push_documents_addition<D: serde::Serialize>( | ||||
|     Ok(last_update_id) | ||||
| } | ||||
|  | ||||
| // TODO move this helper functions elsewhere | ||||
| /// Returns the number of words indexed or `None` if the type | ||||
| fn index_value( | ||||
|     indexer: &mut RawIndexer, | ||||
|     document_id: DocumentId, | ||||
|     indexed_pos: IndexedPos, | ||||
|     value: &Value, | ||||
| ) -> Option<usize> | ||||
| { | ||||
|     match value { | ||||
|         Value::Null => None, | ||||
|         Value::Bool(boolean) => { | ||||
|             let text = boolean.to_string(); | ||||
|             let number_of_words = indexer.index_text(document_id, indexed_pos, &text); | ||||
|             Some(number_of_words) | ||||
|         }, | ||||
|         Value::Number(number) => { | ||||
|             let text = number.to_string(); | ||||
|             let number_of_words = indexer.index_text(document_id, indexed_pos, &text); | ||||
|             Some(number_of_words) | ||||
|         }, | ||||
|         Value::String(string) => { | ||||
|             let number_of_words = indexer.index_text(document_id, indexed_pos, &string); | ||||
|             Some(number_of_words) | ||||
|         }, | ||||
|         Value::Array(_) => { | ||||
|             let text = value_to_string(value); | ||||
|             let number_of_words = indexer.index_text(document_id, indexed_pos, &text); | ||||
|             Some(number_of_words) | ||||
|         }, | ||||
|         Value::Object(_) => { | ||||
|             let text = value_to_string(value); | ||||
|             let number_of_words = indexer.index_text(document_id, indexed_pos, &text); | ||||
|             Some(number_of_words) | ||||
|         }, | ||||
|     } | ||||
| } | ||||
|  | ||||
| // TODO move this helper functions elsewhere | ||||
| pub fn value_to_string(value: &Value) -> String { | ||||
|     fn internal_value_to_string(string: &mut String, value: &Value) { | ||||
|         match value { | ||||
|             Value::Null => (), | ||||
|             Value::Bool(boolean) => { let _ = write!(string, "{}", &boolean); }, | ||||
|             Value::Number(number) => { let _ = write!(string, "{}", &number); }, | ||||
|             Value::String(text) => string.push_str(&text), | ||||
|             Value::Array(array) => { | ||||
|                 for value in array { | ||||
|                     internal_value_to_string(string, value); | ||||
|                     let _ = string.write_str(". "); | ||||
|                 } | ||||
|             }, | ||||
|             Value::Object(object) => { | ||||
|                 for (key, value) in object { | ||||
|                     string.push_str(key); | ||||
|                     let _ = string.write_str(". "); | ||||
|                     internal_value_to_string(string, value); | ||||
|                     let _ = string.write_str(". "); | ||||
|                 } | ||||
|             }, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     let mut string = String::new(); | ||||
|     internal_value_to_string(&mut string, value); | ||||
|     string | ||||
| } | ||||
|  | ||||
| // TODO move this helper functions elsewhere | ||||
| fn value_to_number(value: &Value) -> Option<Number> { | ||||
|     use std::str::FromStr; | ||||
|  | ||||
|     match value { | ||||
|         Value::Null => None, | ||||
|         Value::Bool(boolean) => Some(Number::Unsigned(*boolean as u64)), | ||||
|         Value::Number(number) => Number::from_str(&number.to_string()).ok(), // TODO improve that | ||||
|         Value::String(string) => Number::from_str(string).ok(), | ||||
|         Value::Array(_array) => None, | ||||
|         Value::Object(_object) => None, | ||||
|     } | ||||
| } | ||||
|  | ||||
| // TODO move this helper functions elsewhere | ||||
| pub fn compute_document_id<H: Hash>(t: H) -> DocumentId { | ||||
|     let mut s = SipHasher::new(); | ||||
|     t.hash(&mut s); | ||||
|     let hash = s.finish(); | ||||
|     DocumentId(hash) | ||||
| } | ||||
|  | ||||
| // TODO move this helper functions elsewhere | ||||
| pub fn extract_document_id(primary_key: &str, document: &IndexMap<String, Value>) -> Result<DocumentId, SerializerError> { | ||||
|  | ||||
|     fn validate_document_id(string: &str) -> bool { | ||||
|         string.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') | ||||
|     } | ||||
|  | ||||
|     match document.get(primary_key) { | ||||
|         Some(value) => { | ||||
|             let string = match value { | ||||
|                 Value::Number(number) => number.to_string(), | ||||
|                 Value::String(string) => string.clone(), | ||||
|                 _ => return Err(SerializerError::InvalidDocumentIdFormat), | ||||
|             }; | ||||
|  | ||||
|             if validate_document_id(&string) { | ||||
|                 Ok(compute_document_id(string)) | ||||
|             } else { | ||||
|                 Err(SerializerError::InvalidDocumentIdFormat) | ||||
|             } | ||||
|         } | ||||
|         None => Err(SerializerError::DocumentIdNotFound), | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn apply_addition<'a, 'b>( | ||||
|     writer: &'a mut heed::RwTxn<'b, MainT>, | ||||
|     index: &store::Index, | ||||
|   | ||||
							
								
								
									
										122
									
								
								meilisearch-core/src/update/helpers.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								meilisearch-core/src/update/helpers.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,122 @@ | ||||
| use std::fmt::Write as _; | ||||
| use std::hash::{Hash, Hasher}; | ||||
|  | ||||
| use indexmap::IndexMap; | ||||
| use meilisearch_schema::IndexedPos; | ||||
| use meilisearch_types::DocumentId; | ||||
| use serde_json::Value; | ||||
| use siphasher::sip::SipHasher; | ||||
|  | ||||
| use crate::raw_indexer::RawIndexer; | ||||
| use crate::serde::SerializerError; | ||||
| use crate::Number; | ||||
|  | ||||
| /// Returns the number of words indexed or `None` if the type is unindexable. | ||||
| pub fn index_value( | ||||
|     indexer: &mut RawIndexer, | ||||
|     document_id: DocumentId, | ||||
|     indexed_pos: IndexedPos, | ||||
|     value: &Value, | ||||
| ) -> Option<usize> | ||||
| { | ||||
|     match value { | ||||
|         Value::Null => None, | ||||
|         Value::Bool(boolean) => { | ||||
|             let text = boolean.to_string(); | ||||
|             let number_of_words = indexer.index_text(document_id, indexed_pos, &text); | ||||
|             Some(number_of_words) | ||||
|         }, | ||||
|         Value::Number(number) => { | ||||
|             let text = number.to_string(); | ||||
|             Some(indexer.index_text(document_id, indexed_pos, &text)) | ||||
|         }, | ||||
|         Value::String(string) => { | ||||
|             Some(indexer.index_text(document_id, indexed_pos, &string)) | ||||
|         }, | ||||
|         Value::Array(_) => { | ||||
|             let text = value_to_string(value); | ||||
|             Some(indexer.index_text(document_id, indexed_pos, &text)) | ||||
|         }, | ||||
|         Value::Object(_) => { | ||||
|             let text = value_to_string(value); | ||||
|             Some(indexer.index_text(document_id, indexed_pos, &text)) | ||||
|         }, | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Transforms the JSON Value type into a String. | ||||
| pub fn value_to_string(value: &Value) -> String { | ||||
|     fn internal_value_to_string(string: &mut String, value: &Value) { | ||||
|         match value { | ||||
|             Value::Null => (), | ||||
|             Value::Bool(boolean) => { let _ = write!(string, "{}", &boolean); }, | ||||
|             Value::Number(number) => { let _ = write!(string, "{}", &number); }, | ||||
|             Value::String(text) => string.push_str(&text), | ||||
|             Value::Array(array) => { | ||||
|                 for value in array { | ||||
|                     internal_value_to_string(string, value); | ||||
|                     let _ = string.write_str(". "); | ||||
|                 } | ||||
|             }, | ||||
|             Value::Object(object) => { | ||||
|                 for (key, value) in object { | ||||
|                     string.push_str(key); | ||||
|                     let _ = string.write_str(". "); | ||||
|                     internal_value_to_string(string, value); | ||||
|                     let _ = string.write_str(". "); | ||||
|                 } | ||||
|             }, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     let mut string = String::new(); | ||||
|     internal_value_to_string(&mut string, value); | ||||
|     string | ||||
| } | ||||
|  | ||||
| /// Transforms the JSON Value type into a Number. | ||||
| pub fn value_to_number(value: &Value) -> Option<Number> { | ||||
|     use std::str::FromStr; | ||||
|  | ||||
|     match value { | ||||
|         Value::Null => None, | ||||
|         Value::Bool(boolean) => Some(Number::Unsigned(*boolean as u64)), | ||||
|         Value::Number(number) => Number::from_str(&number.to_string()).ok(), // TODO improve that | ||||
|         Value::String(string) => Number::from_str(string).ok(), | ||||
|         Value::Array(_array) => None, | ||||
|         Value::Object(_object) => None, | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Compute the hash of the given type, this is the way we produce documents ids. | ||||
| pub fn compute_document_id<H: Hash>(t: H) -> DocumentId { | ||||
|     let mut s = SipHasher::new(); | ||||
|     t.hash(&mut s); | ||||
|     let hash = s.finish(); | ||||
|     DocumentId(hash) | ||||
| } | ||||
|  | ||||
| /// Validates a string representation to be a correct document id. | ||||
| pub fn validate_document_id(string: &str) -> bool { | ||||
|     string.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') | ||||
| } | ||||
|  | ||||
| /// Extracts and validates the document id of a document. | ||||
| pub fn extract_document_id(primary_key: &str, document: &IndexMap<String, Value>) -> Result<DocumentId, SerializerError> { | ||||
|     match document.get(primary_key) { | ||||
|         Some(value) => { | ||||
|             let string = match value { | ||||
|                 Value::Number(number) => number.to_string(), | ||||
|                 Value::String(string) => string.clone(), | ||||
|                 _ => return Err(SerializerError::InvalidDocumentIdFormat), | ||||
|             }; | ||||
|  | ||||
|             if validate_document_id(&string) { | ||||
|                 Ok(compute_document_id(string)) | ||||
|             } else { | ||||
|                 Err(SerializerError::InvalidDocumentIdFormat) | ||||
|             } | ||||
|         } | ||||
|         None => Err(SerializerError::DocumentIdNotFound), | ||||
|     } | ||||
| } | ||||
| @@ -3,14 +3,13 @@ mod customs_update; | ||||
| mod documents_addition; | ||||
| mod documents_deletion; | ||||
| mod settings_update; | ||||
| mod helpers; | ||||
|  | ||||
| pub use self::clear_all::{apply_clear_all, push_clear_all}; | ||||
| pub use self::customs_update::{apply_customs_update, push_customs_update}; | ||||
| pub use self::documents_addition::{ | ||||
|     apply_documents_addition, apply_documents_partial_addition, DocumentsAddition, | ||||
|     value_to_string, compute_document_id, extract_document_id, | ||||
| }; | ||||
| pub use self::documents_addition::{apply_documents_addition, apply_documents_partial_addition, DocumentsAddition}; | ||||
| pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion}; | ||||
| pub use self::helpers::{index_value, value_to_string, value_to_number, compute_document_id, extract_document_id, validate_document_id}; | ||||
| pub use self::settings_update::{apply_settings_update, push_settings_update}; | ||||
|  | ||||
| use std::cmp; | ||||
| @@ -23,6 +22,7 @@ use indexmap::IndexMap; | ||||
| use log::debug; | ||||
| use sdset::Set; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use serde_json::Value; | ||||
|  | ||||
| use crate::{store, DocumentId, MResult}; | ||||
| use crate::database::{MainT, UpdateT}; | ||||
| @@ -49,14 +49,14 @@ impl Update { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn documents_addition(data: Vec<IndexMap<String, serde_json::Value>>) -> Update { | ||||
|     fn documents_addition(data: Vec<IndexMap<String, Value>>) -> Update { | ||||
|         Update { | ||||
|             data: UpdateData::DocumentsAddition(data), | ||||
|             enqueued_at: Utc::now(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn documents_partial(data: Vec<IndexMap<String, serde_json::Value>>) -> Update { | ||||
|     fn documents_partial(data: Vec<IndexMap<String, Value>>) -> Update { | ||||
|         Update { | ||||
|             data: UpdateData::DocumentsPartial(data), | ||||
|             enqueued_at: Utc::now(), | ||||
| @@ -82,8 +82,8 @@ impl Update { | ||||
| pub enum UpdateData { | ||||
|     ClearAll, | ||||
|     Customs(Vec<u8>), | ||||
|     DocumentsAddition(Vec<IndexMap<String, serde_json::Value>>), | ||||
|     DocumentsPartial(Vec<IndexMap<String, serde_json::Value>>), | ||||
|     DocumentsAddition(Vec<IndexMap<String, Value>>), | ||||
|     DocumentsPartial(Vec<IndexMap<String, Value>>), | ||||
|     DocumentsDeletion(Vec<DocumentId>), | ||||
|     Settings(SettingsUpdate) | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user