mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Improve the highlight formatted outputs
This commit is contained in:
		| @@ -17,6 +17,7 @@ pub enum ResponseError { | |||||||
|     DocumentNotFound(String), |     DocumentNotFound(String), | ||||||
|     MissingHeader(String), |     MissingHeader(String), | ||||||
|     BadParameter(String, String), |     BadParameter(String, String), | ||||||
|  |     OpenIndex(String), | ||||||
|     CreateIndex(String), |     CreateIndex(String), | ||||||
|     Maintenance, |     Maintenance, | ||||||
| } | } | ||||||
| @@ -54,6 +55,10 @@ impl ResponseError { | |||||||
|         ResponseError::BadParameter(name.to_string(), message.to_string()) |         ResponseError::BadParameter(name.to_string(), message.to_string()) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn open_index(message: impl Display) -> ResponseError { | ||||||
|  |         ResponseError::OpenIndex(message.to_string()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn create_index(message: impl Display) -> ResponseError { |     pub fn create_index(message: impl Display) -> ResponseError { | ||||||
|         ResponseError::CreateIndex(message.to_string()) |         ResponseError::CreateIndex(message.to_string()) | ||||||
|     } |     } | ||||||
| @@ -96,6 +101,10 @@ impl IntoResponse for ResponseError { | |||||||
|                 format!("Impossible to create index; {}", err), |                 format!("Impossible to create index; {}", err), | ||||||
|                 StatusCode::BAD_REQUEST, |                 StatusCode::BAD_REQUEST, | ||||||
|             ), |             ), | ||||||
|  |             ResponseError::OpenIndex(err) => error( | ||||||
|  |                 format!("Impossible to open index; {}", err), | ||||||
|  |                 StatusCode::BAD_REQUEST, | ||||||
|  |             ), | ||||||
|             ResponseError::Maintenance => error( |             ResponseError::Maintenance => error( | ||||||
|                 String::from("Server is in maintenance, please try again later"), |                 String::from("Server is in maintenance, please try again later"), | ||||||
|                 StatusCode::SERVICE_UNAVAILABLE, |                 StatusCode::SERVICE_UNAVAILABLE, | ||||||
|   | |||||||
| @@ -235,43 +235,35 @@ impl<'a> SearchBuilder<'a> { | |||||||
|                 } |                 } | ||||||
|                 fields = Some(set); |                 fields = Some(set); | ||||||
|             } |             } | ||||||
|             let mut document: IndexMap<String, Value> = self |  | ||||||
|  |             let document: IndexMap<String, Value> = self | ||||||
|                 .index |                 .index | ||||||
|                 .document(reader, fields.as_ref(), doc.id) |                 .document(reader, fields.as_ref(), doc.id) | ||||||
|                 .map_err(|e| Error::RetrieveDocument(doc.id.0, e.to_string()))? |                 .map_err(|e| Error::RetrieveDocument(doc.id.0, e.to_string()))? | ||||||
|                 .ok_or(Error::DocumentNotFound(doc.id.0))?; |                 .ok_or(Error::DocumentNotFound(doc.id.0))?; | ||||||
|  |  | ||||||
|  |             let mut formatted = document.clone(); | ||||||
|             let mut matches = doc.highlights.clone(); |             let mut matches = doc.highlights.clone(); | ||||||
|  |  | ||||||
|             // Crops fields if needed |             // Crops fields if needed | ||||||
|             if let Some(fields) = self.attributes_to_crop.clone() { |             if let Some(fields) = &self.attributes_to_crop { | ||||||
|                 for (field, length) in fields { |                 crop_document(&mut formatted, &mut matches, &schema, fields); | ||||||
|                     let _ = crop_document(&mut document, &mut matches, &schema, &field, length); |  | ||||||
|                 } |  | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             // Transform to readable matches |             // Transform to readable matches | ||||||
|             let matches = calculate_matches(matches, self.attributes_to_retrieve.clone(), &schema); |             let matches = calculate_matches(matches, self.attributes_to_retrieve.clone(), &schema); | ||||||
|  |  | ||||||
|             if !self.matches { |             if !self.matches { | ||||||
|                 if let Some(attributes_to_highlight) = self.attributes_to_highlight.clone() { |                 if let Some(attributes_to_highlight) = &self.attributes_to_highlight { | ||||||
|                     let highlights = calculate_highlights( |                     formatted = calculate_highlights(&formatted, &matches, attributes_to_highlight); | ||||||
|                         document.clone(), |  | ||||||
|                         matches.clone(), |  | ||||||
|                         attributes_to_highlight, |  | ||||||
|                     ); |  | ||||||
|                     for (key, value) in highlights { |  | ||||||
|                         if let Some(content) = document.get_mut(&key) { |  | ||||||
|                             *content = value; |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             let matches_info = if self.matches { Some(matches) } else { None }; |             let matches_info = if self.matches { Some(matches) } else { None }; | ||||||
|  |  | ||||||
|             let hit = SearchHit { |             let hit = SearchHit { | ||||||
|                 hit: document, |                 document, | ||||||
|  |                 formatted, | ||||||
|                 matches_info, |                 matches_info, | ||||||
|             }; |             }; | ||||||
|  |  | ||||||
| @@ -388,7 +380,9 @@ pub type MatchesInfos = HashMap<String, Vec<MatchPosition>>; | |||||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | #[derive(Debug, Clone, Serialize, Deserialize)] | ||||||
| pub struct SearchHit { | pub struct SearchHit { | ||||||
|     #[serde(flatten)] |     #[serde(flatten)] | ||||||
|     pub hit: IndexMap<String, Value>, |     pub document: IndexMap<String, Value>, | ||||||
|  |     #[serde(rename = "_formatted", skip_serializing_if = "IndexMap::is_empty")] | ||||||
|  |     pub formatted: IndexMap<String, Value>, | ||||||
|     #[serde(rename = "_matchesInfo", skip_serializing_if = "Option::is_none")] |     #[serde(rename = "_matchesInfo", skip_serializing_if = "Option::is_none")] | ||||||
|     pub matches_info: Option<MatchesInfos>, |     pub matches_info: Option<MatchesInfos>, | ||||||
| } | } | ||||||
| @@ -431,32 +425,31 @@ fn crop_document( | |||||||
|     document: &mut IndexMap<String, Value>, |     document: &mut IndexMap<String, Value>, | ||||||
|     matches: &mut Vec<Highlight>, |     matches: &mut Vec<Highlight>, | ||||||
|     schema: &Schema, |     schema: &Schema, | ||||||
|     field: &str, |     fields: &HashMap<String, usize>, | ||||||
|     length: usize, | ) { | ||||||
| ) -> Result<(), Error> { |  | ||||||
|     matches.sort_unstable_by_key(|m| (m.char_index, m.char_length)); |     matches.sort_unstable_by_key(|m| (m.char_index, m.char_length)); | ||||||
|  |  | ||||||
|     let attribute = schema |     for (field, length) in fields { | ||||||
|         .attribute(field) |         let attribute = match schema.attribute(field) { | ||||||
|         .ok_or(Error::AttributeNotFoundOnSchema(field.to_string()))?; |             Some(attribute) => attribute, | ||||||
|     let selected_matches = matches |             None => continue, | ||||||
|         .iter() |         }; | ||||||
|         .filter(|m| SchemaAttr::new(m.attribute) == attribute) |  | ||||||
|         .cloned(); |  | ||||||
|     let original_text = match document.get(field) { |  | ||||||
|         Some(Value::String(text)) => text, |  | ||||||
|         Some(_) => return Err(Error::CropFieldWrongType(field.to_string())), |  | ||||||
|         None => return Err(Error::AttributeNotFoundOnDocument(field.to_string())), |  | ||||||
|     }; |  | ||||||
|     let (cropped_text, cropped_matches) = crop_text(&original_text, selected_matches, length); |  | ||||||
|  |  | ||||||
|     document.insert( |         let selected_matches = matches | ||||||
|         field.to_string(), |             .iter() | ||||||
|         serde_json::value::Value::String(cropped_text), |             .filter(|m| SchemaAttr::new(m.attribute) == attribute) | ||||||
|     ); |             .cloned(); | ||||||
|     matches.retain(|m| SchemaAttr::new(m.attribute) != attribute); |  | ||||||
|     matches.extend_from_slice(&cropped_matches); |         if let Some(Value::String(ref mut original_text)) = document.get_mut(field) { | ||||||
|     Ok(()) |             let (cropped_text, cropped_matches) = | ||||||
|  |                 crop_text(original_text, selected_matches, *length); | ||||||
|  |  | ||||||
|  |             *original_text = cropped_text; | ||||||
|  |  | ||||||
|  |             matches.retain(|m| SchemaAttr::new(m.attribute) != attribute); | ||||||
|  |             matches.extend_from_slice(&cropped_matches); | ||||||
|  |         } | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| fn calculate_matches( | fn calculate_matches( | ||||||
| @@ -496,13 +489,14 @@ fn calculate_matches( | |||||||
| } | } | ||||||
|  |  | ||||||
| fn calculate_highlights( | fn calculate_highlights( | ||||||
|     document: IndexMap<String, Value>, |     document: &IndexMap<String, Value>, | ||||||
|     matches: MatchesInfos, |     matches: &MatchesInfos, | ||||||
|     attributes_to_highlight: HashSet<String>, |     attributes_to_highlight: &HashSet<String>, | ||||||
| ) -> HighlightInfos { | ) -> IndexMap<String, Value> { | ||||||
|     let mut highlight_result: HashMap<String, Value> = HashMap::new(); |     let mut highlight_result = IndexMap::new(); | ||||||
|  |  | ||||||
|     for (attribute, matches) in matches.iter() { |     for (attribute, matches) in matches.iter() { | ||||||
|         if attributes_to_highlight.contains("*") || attributes_to_highlight.contains(attribute) { |         if attributes_to_highlight.contains(attribute) { | ||||||
|             if let Some(Value::String(value)) = document.get(attribute) { |             if let Some(Value::String(value)) = document.get(attribute) { | ||||||
|                 let value: Vec<_> = value.chars().collect(); |                 let value: Vec<_> = value.chars().collect(); | ||||||
|                 let mut highlighted_value = String::new(); |                 let mut highlighted_value = String::new(); | ||||||
| @@ -527,6 +521,7 @@ fn calculate_highlights( | |||||||
|             }; |             }; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     highlight_result |     highlight_result | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -543,9 +538,10 @@ mod tests { | |||||||
|  |  | ||||||
|         let document: IndexMap<String, Value> = serde_json::from_str(data).unwrap(); |         let document: IndexMap<String, Value> = serde_json::from_str(data).unwrap(); | ||||||
|         let mut attributes_to_highlight = HashSet::new(); |         let mut attributes_to_highlight = HashSet::new(); | ||||||
|         attributes_to_highlight.insert("*".to_string()); |         attributes_to_highlight.insert("title".to_string()); | ||||||
|  |         attributes_to_highlight.insert("description".to_string()); | ||||||
|  |  | ||||||
|         let mut matches: HashMap<String, Vec<MatchPosition>> = HashMap::new(); |         let mut matches = HashMap::new(); | ||||||
|  |  | ||||||
|         let mut m = Vec::new(); |         let mut m = Vec::new(); | ||||||
|         m.push(MatchPosition { |         m.push(MatchPosition { | ||||||
| @@ -560,9 +556,9 @@ mod tests { | |||||||
|             length: 9, |             length: 9, | ||||||
|         }); |         }); | ||||||
|         matches.insert("description".to_string(), m); |         matches.insert("description".to_string(), m); | ||||||
|         let result = super::calculate_highlights(document, matches, attributes_to_highlight); |         let result = super::calculate_highlights(&document, &matches, &attributes_to_highlight); | ||||||
|  |  | ||||||
|         let mut result_expected = HashMap::new(); |         let mut result_expected = IndexMap::new(); | ||||||
|         result_expected.insert( |         result_expected.insert( | ||||||
|             "title".to_string(), |             "title".to_string(), | ||||||
|             Value::String("<em>Fondation</em> (Isaac ASIMOV)".to_string()), |             Value::String("<em>Fondation</em> (Isaac ASIMOV)".to_string()), | ||||||
|   | |||||||
| @@ -36,6 +36,12 @@ pub async fn search_with_url_query(ctx: Context<Data>) -> SResult<Response> { | |||||||
|     let env = &ctx.state().db.env; |     let env = &ctx.state().db.env; | ||||||
|     let reader = env.read_txn().map_err(ResponseError::internal)?; |     let reader = env.read_txn().map_err(ResponseError::internal)?; | ||||||
|  |  | ||||||
|  |     let schema = index | ||||||
|  |         .main | ||||||
|  |         .schema(&reader) | ||||||
|  |         .map_err(ResponseError::internal)? | ||||||
|  |         .ok_or(ResponseError::open_index("No Schema found"))?; | ||||||
|  |  | ||||||
|     let query: SearchQuery = ctx |     let query: SearchQuery = ctx | ||||||
|         .url_query() |         .url_query() | ||||||
|         .map_err(|_| ResponseError::bad_request("invalid query parameter"))?; |         .map_err(|_| ResponseError::bad_request("invalid query parameter"))?; | ||||||
| @@ -61,18 +67,31 @@ pub async fn search_with_url_query(ctx: Context<Data>) -> SResult<Response> { | |||||||
|     } |     } | ||||||
|     if let Some(attributes_to_crop) = query.attributes_to_crop { |     if let Some(attributes_to_crop) = query.attributes_to_crop { | ||||||
|         let crop_length = query.crop_length.unwrap_or(200); |         let crop_length = query.crop_length.unwrap_or(200); | ||||||
|         let attributes_to_crop = attributes_to_crop |         if attributes_to_crop == "*" { | ||||||
|             .split(',') |             let attributes_to_crop = schema | ||||||
|             .map(|r| (r.to_string(), crop_length)) |                 .iter() | ||||||
|             .collect(); |                 .map(|(attr, ..)| (attr.to_string(), crop_length)) | ||||||
|         search_builder.attributes_to_crop(attributes_to_crop); |                 .collect(); | ||||||
|  |             search_builder.attributes_to_crop(attributes_to_crop); | ||||||
|  |         } else { | ||||||
|  |             let attributes_to_crop = attributes_to_crop | ||||||
|  |                 .split(',') | ||||||
|  |                 .map(|r| (r.to_string(), crop_length)) | ||||||
|  |                 .collect(); | ||||||
|  |             search_builder.attributes_to_crop(attributes_to_crop); | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if let Some(attributes_to_highlight) = query.attributes_to_highlight { |     if let Some(attributes_to_highlight) = query.attributes_to_highlight { | ||||||
|         let attributes_to_highlight = attributes_to_highlight |         let attributes_to_highlight = if attributes_to_highlight == "*" { | ||||||
|             .split(',') |             schema.iter().map(|(attr, ..)| attr.to_string()).collect() | ||||||
|             .map(ToString::to_string) |         } else { | ||||||
|             .collect(); |             attributes_to_highlight | ||||||
|  |                 .split(',') | ||||||
|  |                 .map(ToString::to_string) | ||||||
|  |                 .collect() | ||||||
|  |         }; | ||||||
|  |  | ||||||
|         search_builder.attributes_to_highlight(attributes_to_highlight); |         search_builder.attributes_to_highlight(attributes_to_highlight); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user