mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 21:46:27 +00:00 
			
		
		
		
	Move the helper function to their own module
This commit is contained in:
		| @@ -1,25 +1,20 @@ | |||||||
| use std::collections::HashMap; | use std::collections::HashMap; | ||||||
| use std::fmt::Write as _; |  | ||||||
| use std::hash::{Hash, Hasher}; |  | ||||||
|  |  | ||||||
| use fst::{set::OpBuilder, SetBuilder}; | use fst::{set::OpBuilder, SetBuilder}; | ||||||
| use indexmap::IndexMap; | use indexmap::IndexMap; | ||||||
| use sdset::{duo::Union, SetOperation}; | use sdset::{duo::Union, SetOperation}; | ||||||
| use serde::Deserialize; | use serde::Deserialize; | ||||||
| use serde_json::Value; | use serde_json::Value; | ||||||
| use siphasher::sip::SipHasher; |  | ||||||
|  |  | ||||||
| use meilisearch_types::DocumentId; |  | ||||||
| use meilisearch_schema::IndexedPos; |  | ||||||
|  |  | ||||||
| use crate::database::{MainT, UpdateT}; | use crate::database::{MainT, UpdateT}; | ||||||
| use crate::database::{UpdateEvent, UpdateEventsEmitter}; | use crate::database::{UpdateEvent, UpdateEventsEmitter}; | ||||||
| use crate::facets; | use crate::facets; | ||||||
| use crate::raw_indexer::RawIndexer; | use crate::raw_indexer::RawIndexer; | ||||||
| use crate::serde::{Deserializer, SerializerError}; | use crate::serde::Deserializer; | ||||||
| use crate::store; | use crate::store; | ||||||
|  | use crate::update::helpers::{index_value, value_to_number, extract_document_id}; | ||||||
| use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update}; | use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update}; | ||||||
| use crate::{Error, Number, MResult, RankedMap}; | use crate::{Error, MResult, RankedMap}; | ||||||
|  |  | ||||||
| pub struct DocumentsAddition<D> { | pub struct DocumentsAddition<D> { | ||||||
|     updates_store: store::Updates, |     updates_store: store::Updates, | ||||||
| @@ -111,121 +106,6 @@ pub fn push_documents_addition<D: serde::Serialize>( | |||||||
|     Ok(last_update_id) |     Ok(last_update_id) | ||||||
| } | } | ||||||
|  |  | ||||||
| // TODO move this helper functions elsewhere |  | ||||||
| /// Returns the number of words indexed or `None` if the type |  | ||||||
| fn index_value( |  | ||||||
|     indexer: &mut RawIndexer, |  | ||||||
|     document_id: DocumentId, |  | ||||||
|     indexed_pos: IndexedPos, |  | ||||||
|     value: &Value, |  | ||||||
| ) -> Option<usize> |  | ||||||
| { |  | ||||||
|     match value { |  | ||||||
|         Value::Null => None, |  | ||||||
|         Value::Bool(boolean) => { |  | ||||||
|             let text = boolean.to_string(); |  | ||||||
|             let number_of_words = indexer.index_text(document_id, indexed_pos, &text); |  | ||||||
|             Some(number_of_words) |  | ||||||
|         }, |  | ||||||
|         Value::Number(number) => { |  | ||||||
|             let text = number.to_string(); |  | ||||||
|             let number_of_words = indexer.index_text(document_id, indexed_pos, &text); |  | ||||||
|             Some(number_of_words) |  | ||||||
|         }, |  | ||||||
|         Value::String(string) => { |  | ||||||
|             let number_of_words = indexer.index_text(document_id, indexed_pos, &string); |  | ||||||
|             Some(number_of_words) |  | ||||||
|         }, |  | ||||||
|         Value::Array(_) => { |  | ||||||
|             let text = value_to_string(value); |  | ||||||
|             let number_of_words = indexer.index_text(document_id, indexed_pos, &text); |  | ||||||
|             Some(number_of_words) |  | ||||||
|         }, |  | ||||||
|         Value::Object(_) => { |  | ||||||
|             let text = value_to_string(value); |  | ||||||
|             let number_of_words = indexer.index_text(document_id, indexed_pos, &text); |  | ||||||
|             Some(number_of_words) |  | ||||||
|         }, |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // TODO move this helper functions elsewhere |  | ||||||
| pub fn value_to_string(value: &Value) -> String { |  | ||||||
|     fn internal_value_to_string(string: &mut String, value: &Value) { |  | ||||||
|         match value { |  | ||||||
|             Value::Null => (), |  | ||||||
|             Value::Bool(boolean) => { let _ = write!(string, "{}", &boolean); }, |  | ||||||
|             Value::Number(number) => { let _ = write!(string, "{}", &number); }, |  | ||||||
|             Value::String(text) => string.push_str(&text), |  | ||||||
|             Value::Array(array) => { |  | ||||||
|                 for value in array { |  | ||||||
|                     internal_value_to_string(string, value); |  | ||||||
|                     let _ = string.write_str(". "); |  | ||||||
|                 } |  | ||||||
|             }, |  | ||||||
|             Value::Object(object) => { |  | ||||||
|                 for (key, value) in object { |  | ||||||
|                     string.push_str(key); |  | ||||||
|                     let _ = string.write_str(". "); |  | ||||||
|                     internal_value_to_string(string, value); |  | ||||||
|                     let _ = string.write_str(". "); |  | ||||||
|                 } |  | ||||||
|             }, |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     let mut string = String::new(); |  | ||||||
|     internal_value_to_string(&mut string, value); |  | ||||||
|     string |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // TODO move this helper functions elsewhere |  | ||||||
| fn value_to_number(value: &Value) -> Option<Number> { |  | ||||||
|     use std::str::FromStr; |  | ||||||
|  |  | ||||||
|     match value { |  | ||||||
|         Value::Null => None, |  | ||||||
|         Value::Bool(boolean) => Some(Number::Unsigned(*boolean as u64)), |  | ||||||
|         Value::Number(number) => Number::from_str(&number.to_string()).ok(), // TODO improve that |  | ||||||
|         Value::String(string) => Number::from_str(string).ok(), |  | ||||||
|         Value::Array(_array) => None, |  | ||||||
|         Value::Object(_object) => None, |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // TODO move this helper functions elsewhere |  | ||||||
| pub fn compute_document_id<H: Hash>(t: H) -> DocumentId { |  | ||||||
|     let mut s = SipHasher::new(); |  | ||||||
|     t.hash(&mut s); |  | ||||||
|     let hash = s.finish(); |  | ||||||
|     DocumentId(hash) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // TODO move this helper functions elsewhere |  | ||||||
| pub fn extract_document_id(primary_key: &str, document: &IndexMap<String, Value>) -> Result<DocumentId, SerializerError> { |  | ||||||
|  |  | ||||||
|     fn validate_document_id(string: &str) -> bool { |  | ||||||
|         string.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     match document.get(primary_key) { |  | ||||||
|         Some(value) => { |  | ||||||
|             let string = match value { |  | ||||||
|                 Value::Number(number) => number.to_string(), |  | ||||||
|                 Value::String(string) => string.clone(), |  | ||||||
|                 _ => return Err(SerializerError::InvalidDocumentIdFormat), |  | ||||||
|             }; |  | ||||||
|  |  | ||||||
|             if validate_document_id(&string) { |  | ||||||
|                 Ok(compute_document_id(string)) |  | ||||||
|             } else { |  | ||||||
|                 Err(SerializerError::InvalidDocumentIdFormat) |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         None => Err(SerializerError::DocumentIdNotFound), |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| pub fn apply_addition<'a, 'b>( | pub fn apply_addition<'a, 'b>( | ||||||
|     writer: &'a mut heed::RwTxn<'b, MainT>, |     writer: &'a mut heed::RwTxn<'b, MainT>, | ||||||
|     index: &store::Index, |     index: &store::Index, | ||||||
|   | |||||||
							
								
								
									
										122
									
								
								meilisearch-core/src/update/helpers.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								meilisearch-core/src/update/helpers.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,122 @@ | |||||||
|  | use std::fmt::Write as _; | ||||||
|  | use std::hash::{Hash, Hasher}; | ||||||
|  |  | ||||||
|  | use indexmap::IndexMap; | ||||||
|  | use meilisearch_schema::IndexedPos; | ||||||
|  | use meilisearch_types::DocumentId; | ||||||
|  | use serde_json::Value; | ||||||
|  | use siphasher::sip::SipHasher; | ||||||
|  |  | ||||||
|  | use crate::raw_indexer::RawIndexer; | ||||||
|  | use crate::serde::SerializerError; | ||||||
|  | use crate::Number; | ||||||
|  |  | ||||||
|  | /// Returns the number of words indexed or `None` if the type is unindexable. | ||||||
|  | pub fn index_value( | ||||||
|  |     indexer: &mut RawIndexer, | ||||||
|  |     document_id: DocumentId, | ||||||
|  |     indexed_pos: IndexedPos, | ||||||
|  |     value: &Value, | ||||||
|  | ) -> Option<usize> | ||||||
|  | { | ||||||
|  |     match value { | ||||||
|  |         Value::Null => None, | ||||||
|  |         Value::Bool(boolean) => { | ||||||
|  |             let text = boolean.to_string(); | ||||||
|  |             let number_of_words = indexer.index_text(document_id, indexed_pos, &text); | ||||||
|  |             Some(number_of_words) | ||||||
|  |         }, | ||||||
|  |         Value::Number(number) => { | ||||||
|  |             let text = number.to_string(); | ||||||
|  |             Some(indexer.index_text(document_id, indexed_pos, &text)) | ||||||
|  |         }, | ||||||
|  |         Value::String(string) => { | ||||||
|  |             Some(indexer.index_text(document_id, indexed_pos, &string)) | ||||||
|  |         }, | ||||||
|  |         Value::Array(_) => { | ||||||
|  |             let text = value_to_string(value); | ||||||
|  |             Some(indexer.index_text(document_id, indexed_pos, &text)) | ||||||
|  |         }, | ||||||
|  |         Value::Object(_) => { | ||||||
|  |             let text = value_to_string(value); | ||||||
|  |             Some(indexer.index_text(document_id, indexed_pos, &text)) | ||||||
|  |         }, | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Transforms the JSON Value type into a String. | ||||||
|  | pub fn value_to_string(value: &Value) -> String { | ||||||
|  |     fn internal_value_to_string(string: &mut String, value: &Value) { | ||||||
|  |         match value { | ||||||
|  |             Value::Null => (), | ||||||
|  |             Value::Bool(boolean) => { let _ = write!(string, "{}", &boolean); }, | ||||||
|  |             Value::Number(number) => { let _ = write!(string, "{}", &number); }, | ||||||
|  |             Value::String(text) => string.push_str(&text), | ||||||
|  |             Value::Array(array) => { | ||||||
|  |                 for value in array { | ||||||
|  |                     internal_value_to_string(string, value); | ||||||
|  |                     let _ = string.write_str(". "); | ||||||
|  |                 } | ||||||
|  |             }, | ||||||
|  |             Value::Object(object) => { | ||||||
|  |                 for (key, value) in object { | ||||||
|  |                     string.push_str(key); | ||||||
|  |                     let _ = string.write_str(". "); | ||||||
|  |                     internal_value_to_string(string, value); | ||||||
|  |                     let _ = string.write_str(". "); | ||||||
|  |                 } | ||||||
|  |             }, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     let mut string = String::new(); | ||||||
|  |     internal_value_to_string(&mut string, value); | ||||||
|  |     string | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Transforms the JSON Value type into a Number. | ||||||
|  | pub fn value_to_number(value: &Value) -> Option<Number> { | ||||||
|  |     use std::str::FromStr; | ||||||
|  |  | ||||||
|  |     match value { | ||||||
|  |         Value::Null => None, | ||||||
|  |         Value::Bool(boolean) => Some(Number::Unsigned(*boolean as u64)), | ||||||
|  |         Value::Number(number) => Number::from_str(&number.to_string()).ok(), // TODO improve that | ||||||
|  |         Value::String(string) => Number::from_str(string).ok(), | ||||||
|  |         Value::Array(_array) => None, | ||||||
|  |         Value::Object(_object) => None, | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Compute the hash of the given type, this is the way we produce documents ids. | ||||||
|  | pub fn compute_document_id<H: Hash>(t: H) -> DocumentId { | ||||||
|  |     let mut s = SipHasher::new(); | ||||||
|  |     t.hash(&mut s); | ||||||
|  |     let hash = s.finish(); | ||||||
|  |     DocumentId(hash) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Validates a string representation to be a correct document id. | ||||||
|  | pub fn validate_document_id(string: &str) -> bool { | ||||||
|  |     string.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Extracts and validates the document id of a document. | ||||||
|  | pub fn extract_document_id(primary_key: &str, document: &IndexMap<String, Value>) -> Result<DocumentId, SerializerError> { | ||||||
|  |     match document.get(primary_key) { | ||||||
|  |         Some(value) => { | ||||||
|  |             let string = match value { | ||||||
|  |                 Value::Number(number) => number.to_string(), | ||||||
|  |                 Value::String(string) => string.clone(), | ||||||
|  |                 _ => return Err(SerializerError::InvalidDocumentIdFormat), | ||||||
|  |             }; | ||||||
|  |  | ||||||
|  |             if validate_document_id(&string) { | ||||||
|  |                 Ok(compute_document_id(string)) | ||||||
|  |             } else { | ||||||
|  |                 Err(SerializerError::InvalidDocumentIdFormat) | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         None => Err(SerializerError::DocumentIdNotFound), | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -3,14 +3,13 @@ mod customs_update; | |||||||
| mod documents_addition; | mod documents_addition; | ||||||
| mod documents_deletion; | mod documents_deletion; | ||||||
| mod settings_update; | mod settings_update; | ||||||
|  | mod helpers; | ||||||
|  |  | ||||||
| pub use self::clear_all::{apply_clear_all, push_clear_all}; | pub use self::clear_all::{apply_clear_all, push_clear_all}; | ||||||
| pub use self::customs_update::{apply_customs_update, push_customs_update}; | pub use self::customs_update::{apply_customs_update, push_customs_update}; | ||||||
| pub use self::documents_addition::{ | pub use self::documents_addition::{apply_documents_addition, apply_documents_partial_addition, DocumentsAddition}; | ||||||
|     apply_documents_addition, apply_documents_partial_addition, DocumentsAddition, |  | ||||||
|     value_to_string, compute_document_id, extract_document_id, |  | ||||||
| }; |  | ||||||
| pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion}; | pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion}; | ||||||
|  | pub use self::helpers::{index_value, value_to_string, value_to_number, compute_document_id, extract_document_id, validate_document_id}; | ||||||
| pub use self::settings_update::{apply_settings_update, push_settings_update}; | pub use self::settings_update::{apply_settings_update, push_settings_update}; | ||||||
|  |  | ||||||
| use std::cmp; | use std::cmp; | ||||||
| @@ -23,6 +22,7 @@ use indexmap::IndexMap; | |||||||
| use log::debug; | use log::debug; | ||||||
| use sdset::Set; | use sdset::Set; | ||||||
| use serde::{Deserialize, Serialize}; | use serde::{Deserialize, Serialize}; | ||||||
|  | use serde_json::Value; | ||||||
|  |  | ||||||
| use crate::{store, DocumentId, MResult}; | use crate::{store, DocumentId, MResult}; | ||||||
| use crate::database::{MainT, UpdateT}; | use crate::database::{MainT, UpdateT}; | ||||||
| @@ -49,14 +49,14 @@ impl Update { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn documents_addition(data: Vec<IndexMap<String, serde_json::Value>>) -> Update { |     fn documents_addition(data: Vec<IndexMap<String, Value>>) -> Update { | ||||||
|         Update { |         Update { | ||||||
|             data: UpdateData::DocumentsAddition(data), |             data: UpdateData::DocumentsAddition(data), | ||||||
|             enqueued_at: Utc::now(), |             enqueued_at: Utc::now(), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn documents_partial(data: Vec<IndexMap<String, serde_json::Value>>) -> Update { |     fn documents_partial(data: Vec<IndexMap<String, Value>>) -> Update { | ||||||
|         Update { |         Update { | ||||||
|             data: UpdateData::DocumentsPartial(data), |             data: UpdateData::DocumentsPartial(data), | ||||||
|             enqueued_at: Utc::now(), |             enqueued_at: Utc::now(), | ||||||
| @@ -82,8 +82,8 @@ impl Update { | |||||||
| pub enum UpdateData { | pub enum UpdateData { | ||||||
|     ClearAll, |     ClearAll, | ||||||
|     Customs(Vec<u8>), |     Customs(Vec<u8>), | ||||||
|     DocumentsAddition(Vec<IndexMap<String, serde_json::Value>>), |     DocumentsAddition(Vec<IndexMap<String, Value>>), | ||||||
|     DocumentsPartial(Vec<IndexMap<String, serde_json::Value>>), |     DocumentsPartial(Vec<IndexMap<String, Value>>), | ||||||
|     DocumentsDeletion(Vec<DocumentId>), |     DocumentsDeletion(Vec<DocumentId>), | ||||||
|     Settings(SettingsUpdate) |     Settings(SettingsUpdate) | ||||||
| } | } | ||||||
|   | |||||||
| @@ -6,6 +6,9 @@ use indexmap::IndexMap; | |||||||
| use serde::Deserialize; | use serde::Deserialize; | ||||||
| use serde_json::Value; | use serde_json::Value; | ||||||
|  |  | ||||||
|  | use meilisearch_core::{Error, serde::SerializerError}; | ||||||
|  | use meilisearch_core::update; | ||||||
|  |  | ||||||
| use crate::error::ResponseError; | use crate::error::ResponseError; | ||||||
| use crate::helpers::Authentication; | use crate::helpers::Authentication; | ||||||
| use crate::routes::{IndexParam, IndexUpdateResponse}; | use crate::routes::{IndexParam, IndexUpdateResponse}; | ||||||
| @@ -42,8 +45,11 @@ async fn get_document( | |||||||
|         .open_index(&path.index_uid) |         .open_index(&path.index_uid) | ||||||
|         .ok_or(ResponseError::index_not_found(&path.index_uid))?; |         .ok_or(ResponseError::index_not_found(&path.index_uid))?; | ||||||
|  |  | ||||||
|     let document_id = meilisearch_core::update::compute_document_id(&path.document_id); |     if !update::validate_document_id(&path.document_id) { | ||||||
|  |         return Err(Error::Serializer(SerializerError::InvalidDocumentIdFormat).into()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     let document_id = update::compute_document_id(&path.document_id); | ||||||
|     let reader = data.db.main_read_txn()?; |     let reader = data.db.main_read_txn()?; | ||||||
|  |  | ||||||
|     let response: Document = index |     let response: Document = index | ||||||
| @@ -65,7 +71,12 @@ async fn delete_document( | |||||||
|         .db |         .db | ||||||
|         .open_index(&path.index_uid) |         .open_index(&path.index_uid) | ||||||
|         .ok_or(ResponseError::index_not_found(&path.index_uid))?; |         .ok_or(ResponseError::index_not_found(&path.index_uid))?; | ||||||
|     let document_id = meilisearch_core::update::compute_document_id(&path.document_id); |  | ||||||
|  |     if !update::validate_document_id(&path.document_id) { | ||||||
|  |         return Err(Error::Serializer(SerializerError::InvalidDocumentIdFormat).into()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     let document_id = update::compute_document_id(&path.document_id); | ||||||
|  |  | ||||||
|     let mut update_writer = data.db.update_write_txn()?; |     let mut update_writer = data.db.update_write_txn()?; | ||||||
|  |  | ||||||
| @@ -237,8 +248,11 @@ async fn delete_documents( | |||||||
|     let mut documents_deletion = index.documents_deletion(); |     let mut documents_deletion = index.documents_deletion(); | ||||||
|  |  | ||||||
|     for document_id in body.into_inner() { |     for document_id in body.into_inner() { | ||||||
|         let document_id_string = meilisearch_core::update::value_to_string(&document_id); |         let document_id_string = update::value_to_string(&document_id); | ||||||
|         let document_id = meilisearch_core::update::compute_document_id(document_id_string); |         if !update::validate_document_id(&document_id_string) { | ||||||
|  |             return Err(Error::Serializer(SerializerError::InvalidDocumentIdFormat).into()) | ||||||
|  |         } | ||||||
|  |         let document_id = update::compute_document_id(document_id_string); | ||||||
|         documents_deletion.delete_document_by_id(document_id); |         documents_deletion.delete_document_by_id(document_id); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user