mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	write the dump export
This commit is contained in:
		| @@ -9,5 +9,7 @@ pub mod tasks; | ||||
| pub use milli; | ||||
| pub use milli::heed; | ||||
| pub use milli::Index; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| pub type Document = serde_json::Map<String, serde_json::Value>; | ||||
| pub type InstanceUid = Uuid; | ||||
|   | ||||
| @@ -2,9 +2,15 @@ use std::collections::{BTreeMap, BTreeSet}; | ||||
| use std::marker::PhantomData; | ||||
| use std::num::NonZeroUsize; | ||||
|  | ||||
| use fst::IntoStreamer; | ||||
| use milli::update::Setting; | ||||
| use milli::{Index, DEFAULT_VALUES_PER_FACET}; | ||||
| use serde::{Deserialize, Serialize, Serializer}; | ||||
|  | ||||
| /// The maximimum number of results that the engine | ||||
| /// will be able to return in one search call. | ||||
| pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000; | ||||
|  | ||||
| fn serialize_with_wildcard<S>( | ||||
|     field: &Setting<Vec<String>>, | ||||
|     s: S, | ||||
| @@ -366,6 +372,114 @@ pub fn apply_settings_to_builder( | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn settings( | ||||
|     index: &Index, | ||||
|     rtxn: &crate::heed::RoTxn, | ||||
| ) -> Result<Settings<Checked>, milli::Error> { | ||||
|     let displayed_attributes = index | ||||
|         .displayed_fields(rtxn)? | ||||
|         .map(|fields| fields.into_iter().map(String::from).collect()); | ||||
|  | ||||
|     let searchable_attributes = index | ||||
|         .user_defined_searchable_fields(rtxn)? | ||||
|         .map(|fields| fields.into_iter().map(String::from).collect()); | ||||
|  | ||||
|     let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect(); | ||||
|  | ||||
|     let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect(); | ||||
|  | ||||
|     let criteria = index | ||||
|         .criteria(rtxn)? | ||||
|         .into_iter() | ||||
|         .map(|c| c.to_string()) | ||||
|         .collect(); | ||||
|  | ||||
|     let stop_words = index | ||||
|         .stop_words(rtxn)? | ||||
|         .map(|stop_words| -> Result<BTreeSet<_>, milli::Error> { | ||||
|             Ok(stop_words.stream().into_strs()?.into_iter().collect()) | ||||
|         }) | ||||
|         .transpose()? | ||||
|         .unwrap_or_default(); | ||||
|     let distinct_field = index.distinct_field(rtxn)?.map(String::from); | ||||
|  | ||||
|     // in milli each word in the synonyms map were split on their separator. Since we lost | ||||
|     // this information we are going to put space between words. | ||||
|     let synonyms = index | ||||
|         .synonyms(rtxn)? | ||||
|         .iter() | ||||
|         .map(|(key, values)| { | ||||
|             ( | ||||
|                 key.join(" "), | ||||
|                 values.iter().map(|value| value.join(" ")).collect(), | ||||
|             ) | ||||
|         }) | ||||
|         .collect(); | ||||
|  | ||||
|     let min_typo_word_len = MinWordSizeTyposSetting { | ||||
|         one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?), | ||||
|         two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?), | ||||
|     }; | ||||
|  | ||||
|     let disabled_words = match index.exact_words(rtxn)? { | ||||
|         Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(), | ||||
|         None => BTreeSet::new(), | ||||
|     }; | ||||
|  | ||||
|     let disabled_attributes = index | ||||
|         .exact_attributes(rtxn)? | ||||
|         .into_iter() | ||||
|         .map(String::from) | ||||
|         .collect(); | ||||
|  | ||||
|     let typo_tolerance = TypoSettings { | ||||
|         enabled: Setting::Set(index.authorize_typos(rtxn)?), | ||||
|         min_word_size_for_typos: Setting::Set(min_typo_word_len), | ||||
|         disable_on_words: Setting::Set(disabled_words), | ||||
|         disable_on_attributes: Setting::Set(disabled_attributes), | ||||
|     }; | ||||
|  | ||||
|     let faceting = FacetingSettings { | ||||
|         max_values_per_facet: Setting::Set( | ||||
|             index | ||||
|                 .max_values_per_facet(rtxn)? | ||||
|                 .unwrap_or(DEFAULT_VALUES_PER_FACET), | ||||
|         ), | ||||
|     }; | ||||
|  | ||||
|     let pagination = PaginationSettings { | ||||
|         max_total_hits: Setting::Set( | ||||
|             index | ||||
|                 .pagination_max_total_hits(rtxn)? | ||||
|                 .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS), | ||||
|         ), | ||||
|     }; | ||||
|  | ||||
|     Ok(Settings { | ||||
|         displayed_attributes: match displayed_attributes { | ||||
|             Some(attrs) => Setting::Set(attrs), | ||||
|             None => Setting::Reset, | ||||
|         }, | ||||
|         searchable_attributes: match searchable_attributes { | ||||
|             Some(attrs) => Setting::Set(attrs), | ||||
|             None => Setting::Reset, | ||||
|         }, | ||||
|         filterable_attributes: Setting::Set(filterable_attributes), | ||||
|         sortable_attributes: Setting::Set(sortable_attributes), | ||||
|         ranking_rules: Setting::Set(criteria), | ||||
|         stop_words: Setting::Set(stop_words), | ||||
|         distinct_attribute: match distinct_field { | ||||
|             Some(field) => Setting::Set(field), | ||||
|             None => Setting::Reset, | ||||
|         }, | ||||
|         synonyms: Setting::Set(synonyms), | ||||
|         typo_tolerance: Setting::Set(typo_tolerance), | ||||
|         faceting: Setting::Set(faceting), | ||||
|         pagination: Setting::Set(pagination), | ||||
|         _kind: PhantomData, | ||||
|     }) | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| pub(crate) mod test { | ||||
|     use proptest::prelude::*; | ||||
|   | ||||
| @@ -3,7 +3,6 @@ use roaring::RoaringBitmap; | ||||
| use serde::{Deserialize, Serialize, Serializer}; | ||||
| use std::{ | ||||
|     fmt::{Display, Write}, | ||||
|     path::PathBuf, | ||||
|     str::FromStr, | ||||
| }; | ||||
| use time::{Duration, OffsetDateTime}; | ||||
| @@ -11,7 +10,9 @@ use uuid::Uuid; | ||||
|  | ||||
| use crate::{ | ||||
|     error::{Code, ResponseError}, | ||||
|     keys::Key, | ||||
|     settings::{Settings, Unchecked}, | ||||
|     InstanceUid, | ||||
| }; | ||||
|  | ||||
| pub type TaskId = u32; | ||||
| @@ -71,6 +72,26 @@ impl Task { | ||||
|             IndexSwap { lhs, rhs } => Some(vec![lhs, rhs]), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Return the content-uuid if there is one | ||||
|     pub fn content_uuid(&self) -> Option<&Uuid> { | ||||
|         match self.kind { | ||||
|             KindWithContent::DocumentImport { | ||||
|                 ref content_file, .. | ||||
|             } => Some(content_file), | ||||
|             KindWithContent::DocumentDeletion { .. } | ||||
|             | KindWithContent::DocumentClear { .. } | ||||
|             | KindWithContent::Settings { .. } | ||||
|             | KindWithContent::IndexDeletion { .. } | ||||
|             | KindWithContent::IndexCreation { .. } | ||||
|             | KindWithContent::IndexUpdate { .. } | ||||
|             | KindWithContent::IndexSwap { .. } | ||||
|             | KindWithContent::CancelTask { .. } | ||||
|             | KindWithContent::DeleteTasks { .. } | ||||
|             | KindWithContent::DumpExport { .. } | ||||
|             | KindWithContent::Snapshot => None, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] | ||||
| @@ -120,7 +141,9 @@ pub enum KindWithContent { | ||||
|         tasks: RoaringBitmap, | ||||
|     }, | ||||
|     DumpExport { | ||||
|         output: PathBuf, | ||||
|         dump_uid: String, | ||||
|         keys: Vec<Key>, | ||||
|         instance_uid: Option<InstanceUid>, | ||||
|     }, | ||||
|     Snapshot, | ||||
| } | ||||
| @@ -167,7 +190,7 @@ impl KindWithContent { | ||||
|                 documents_count, .. | ||||
|             } => Some(Details::DocumentAddition { | ||||
|                 received_documents: *documents_count, | ||||
|                 indexed_documents: 0, | ||||
|                 indexed_documents: Some(0), | ||||
|             }), | ||||
|             KindWithContent::DocumentDeletion { | ||||
|                 index_uid: _, | ||||
| @@ -204,6 +227,38 @@ impl KindWithContent { | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<&KindWithContent> for Option<Details> { | ||||
|     fn from(kind: &KindWithContent) -> Self { | ||||
|         match kind { | ||||
|             KindWithContent::DocumentImport { | ||||
|                 documents_count, .. | ||||
|             } => Some(Details::DocumentAddition { | ||||
|                 received_documents: *documents_count, | ||||
|                 indexed_documents: None, | ||||
|             }), | ||||
|             KindWithContent::DocumentDeletion { .. } => None, | ||||
|             KindWithContent::DocumentClear { .. } => None, | ||||
|             KindWithContent::Settings { new_settings, .. } => Some(Details::Settings { | ||||
|                 settings: new_settings.clone(), | ||||
|             }), | ||||
|             KindWithContent::IndexDeletion { .. } => None, | ||||
|             KindWithContent::IndexCreation { primary_key, .. } => Some(Details::IndexInfo { | ||||
|                 primary_key: primary_key.clone(), | ||||
|             }), | ||||
|             KindWithContent::IndexUpdate { primary_key, .. } => Some(Details::IndexInfo { | ||||
|                 primary_key: primary_key.clone(), | ||||
|             }), | ||||
|             KindWithContent::IndexSwap { .. } => None, | ||||
|             KindWithContent::CancelTask { .. } => None, | ||||
|             KindWithContent::DeleteTasks { .. } => todo!(), | ||||
|             KindWithContent::DumpExport { dump_uid, .. } => Some(Details::Dump { | ||||
|                 dump_uid: dump_uid.clone(), | ||||
|             }), | ||||
|             KindWithContent::Snapshot => None, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub enum Status { | ||||
| @@ -289,7 +344,7 @@ impl FromStr for Kind { | ||||
| pub enum Details { | ||||
|     DocumentAddition { | ||||
|         received_documents: u64, | ||||
|         indexed_documents: u64, | ||||
|         indexed_documents: Option<u64>, | ||||
|     }, | ||||
|     Settings { | ||||
|         settings: Settings<Unchecked>, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user