mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Merge pull request #99 from meilisearch/infos-missing-db-names
Add missing databases to the infos subcommand
This commit is contained in:
		| @@ -19,6 +19,8 @@ const WORD_DOCIDS_DB_NAME: &str = "word-docids"; | |||||||
| const WORD_PREFIX_DOCIDS_DB_NAME: &str = "word-prefix-docids"; | const WORD_PREFIX_DOCIDS_DB_NAME: &str = "word-prefix-docids"; | ||||||
| const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions"; | const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions"; | ||||||
| const WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-pair-proximity-docids"; | const WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-pair-proximity-docids"; | ||||||
|  | const FACET_FIELD_ID_VALUE_DOCIDS_NAME: &str = "facet-field-id-value-docids"; | ||||||
|  | const FIELD_ID_DOCID_FACET_VALUES_NAME: &str = "field-id-docid-facet-values"; | ||||||
| const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-prefix-pair-proximity-docids"; | const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-prefix-pair-proximity-docids"; | ||||||
| const DOCUMENTS_DB_NAME: &str = "documents"; | const DOCUMENTS_DB_NAME: &str = "documents"; | ||||||
|  |  | ||||||
| @@ -29,6 +31,8 @@ const ALL_DATABASE_NAMES: &[&str] = &[ | |||||||
|     DOCID_WORD_POSITIONS_DB_NAME, |     DOCID_WORD_POSITIONS_DB_NAME, | ||||||
|     WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME, |     WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME, | ||||||
|     WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME, |     WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME, | ||||||
|  |     FACET_FIELD_ID_VALUE_DOCIDS_NAME, | ||||||
|  |     FIELD_ID_DOCID_FACET_VALUES_NAME, | ||||||
|     DOCUMENTS_DB_NAME, |     DOCUMENTS_DB_NAME, | ||||||
| ]; | ]; | ||||||
|  |  | ||||||
| @@ -110,15 +114,22 @@ enum Command { | |||||||
|         field_name: String, |         field_name: String, | ||||||
|     }, |     }, | ||||||
|  |  | ||||||
|  |     /// Outputs a CSV with the documents ids, words and the positions where this word appears. | ||||||
|  |     DocidsWordsPositions { | ||||||
|  |         /// Display the whole positions in detail. | ||||||
|  |         #[structopt(long)] | ||||||
|  |         full_display: bool, | ||||||
|  |  | ||||||
|  |         /// If defined, only retrieve the documents that corresponds to these internal ids. | ||||||
|  |         internal_documents_ids: Vec<u32>, | ||||||
|  |     }, | ||||||
|  |  | ||||||
|     /// Outputs some facets statistics for the given facet name. |     /// Outputs some facets statistics for the given facet name. | ||||||
|     FacetStats { |     FacetStats { | ||||||
|         /// The field name in the document. |         /// The field name in the document. | ||||||
|         field_name: String, |         field_name: String, | ||||||
|     }, |     }, | ||||||
|  |  | ||||||
|     /// Outputs the total size of all the docid-word-positions keys and values. |  | ||||||
|     TotalDocidWordPositionsSize, |  | ||||||
|  |  | ||||||
|     /// Outputs the average number of *different* words by document. |     /// Outputs the average number of *different* words by document. | ||||||
|     AverageNumberOfWordsByDoc, |     AverageNumberOfWordsByDoc, | ||||||
|  |  | ||||||
| @@ -132,10 +143,12 @@ enum Command { | |||||||
|         database: String, |         database: String, | ||||||
|     }, |     }, | ||||||
|  |  | ||||||
|     /// Outputs the size in bytes of the specified database. |     /// Outputs the size in bytes of the specified databases names. | ||||||
|     SizeOfDatabase { |     SizeOfDatabase { | ||||||
|  |         /// The name of the database to measure the size of, if not specified it's equivalent | ||||||
|  |         /// to specifying all the databases names. | ||||||
|         #[structopt(possible_values = ALL_DATABASE_NAMES)] |         #[structopt(possible_values = ALL_DATABASE_NAMES)] | ||||||
|         database: String, |         databases: Vec<String>, | ||||||
|     }, |     }, | ||||||
|  |  | ||||||
|     /// Outputs a CSV with the proximities for the two specidied words and |     /// Outputs a CSV with the proximities for the two specidied words and | ||||||
| @@ -208,13 +221,15 @@ fn main() -> anyhow::Result<()> { | |||||||
|         FacetValuesDocids { full_display, field_name } => { |         FacetValuesDocids { full_display, field_name } => { | ||||||
|             facet_values_docids(&index, &rtxn, !full_display, field_name) |             facet_values_docids(&index, &rtxn, !full_display, field_name) | ||||||
|         }, |         }, | ||||||
|  |         DocidsWordsPositions { full_display, internal_documents_ids } => { | ||||||
|  |             docids_words_positions(&index, &rtxn, !full_display, internal_documents_ids) | ||||||
|  |         }, | ||||||
|         FacetStats { field_name } => facet_stats(&index, &rtxn, field_name), |         FacetStats { field_name } => facet_stats(&index, &rtxn, field_name), | ||||||
|         TotalDocidWordPositionsSize => total_docid_word_positions_size(&index, &rtxn), |  | ||||||
|         AverageNumberOfWordsByDoc => average_number_of_words_by_doc(&index, &rtxn), |         AverageNumberOfWordsByDoc => average_number_of_words_by_doc(&index, &rtxn), | ||||||
|         AverageNumberOfPositionsByWord => { |         AverageNumberOfPositionsByWord => { | ||||||
|             average_number_of_positions_by_word(&index, &rtxn) |             average_number_of_positions_by_word(&index, &rtxn) | ||||||
|         }, |         }, | ||||||
|         SizeOfDatabase { database } => size_of_database(&index, &rtxn, &database), |         SizeOfDatabase { databases } => size_of_databases(&index, &rtxn, databases), | ||||||
|         DatabaseStats { database } => database_stats(&index, &rtxn, &database), |         DatabaseStats { database } => database_stats(&index, &rtxn, &database), | ||||||
|         WordPairProximitiesDocids { full_display, word1, word2 } => { |         WordPairProximitiesDocids { full_display, word1, word2 } => { | ||||||
|             word_pair_proximities_docids(&index, &rtxn, !full_display, word1, word2) |             word_pair_proximities_docids(&index, &rtxn, !full_display, word1, word2) | ||||||
| @@ -525,6 +540,39 @@ fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_nam | |||||||
|     Ok(wtr.flush()?) |     Ok(wtr.flush()?) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | fn docids_words_positions( | ||||||
|  |     index: &Index, | ||||||
|  |     rtxn: &heed::RoTxn, | ||||||
|  |     debug: bool, | ||||||
|  |     internal_ids: Vec<u32>, | ||||||
|  | ) -> anyhow::Result<()> | ||||||
|  | { | ||||||
|  |     let stdout = io::stdout(); | ||||||
|  |     let mut wtr = csv::Writer::from_writer(stdout.lock()); | ||||||
|  |     wtr.write_record(&["document_id", "word", "positions"])?; | ||||||
|  |  | ||||||
|  |     let iter: Box<dyn Iterator<Item = _>> = if internal_ids.is_empty() { | ||||||
|  |         Box::new(index.docid_word_positions.iter(rtxn)?) | ||||||
|  |     } else { | ||||||
|  |         let vec: heed::Result<Vec<_>> = internal_ids.into_iter().map(|id| { | ||||||
|  |             index.docid_word_positions.prefix_iter(rtxn, &(id, "")) | ||||||
|  |         }).collect(); | ||||||
|  |         Box::new(vec?.into_iter().flatten()) | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     for result in iter { | ||||||
|  |         let ((id, word), positions) = result?; | ||||||
|  |         let positions = if debug { | ||||||
|  |             format!("{:?}", positions) | ||||||
|  |         } else { | ||||||
|  |             format!("{:?}", positions.iter().collect::<Vec<_>>()) | ||||||
|  |         }; | ||||||
|  |         wtr.write_record(&[&id.to_string(), word, &positions])?; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     Ok(wtr.flush()?) | ||||||
|  | } | ||||||
|  |  | ||||||
| fn facet_stats(index: &Index, rtxn: &heed::RoTxn, field_name: String) -> anyhow::Result<()> { | fn facet_stats(index: &Index, rtxn: &heed::RoTxn, field_name: String) -> anyhow::Result<()> { | ||||||
|     let fields_ids_map = index.fields_ids_map(&rtxn)?; |     let fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||||
|     let faceted_fields = index.faceted_fields_ids(&rtxn)?; |     let faceted_fields = index.faceted_fields_ids(&rtxn)?; | ||||||
| @@ -620,28 +668,6 @@ fn export_documents(index: &Index, rtxn: &heed::RoTxn, internal_ids: Vec<u32>) - | |||||||
|     Ok(()) |     Ok(()) | ||||||
| } | } | ||||||
|  |  | ||||||
| fn total_docid_word_positions_size(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> { |  | ||||||
|     use heed::types::ByteSlice; |  | ||||||
|  |  | ||||||
|     let mut total_key_size = 0; |  | ||||||
|     let mut total_val_size = 0; |  | ||||||
|     let mut count = 0; |  | ||||||
|  |  | ||||||
|     let iter = index.docid_word_positions.as_polymorph().iter::<_, ByteSlice, ByteSlice>(rtxn)?; |  | ||||||
|     for result in iter { |  | ||||||
|         let (key, val) = result?; |  | ||||||
|         total_key_size += key.len(); |  | ||||||
|         total_val_size += val.len(); |  | ||||||
|         count += 1; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     println!("number of keys: {}", count); |  | ||||||
|     println!("total key size: {}", total_key_size); |  | ||||||
|     println!("total value size: {}", total_val_size); |  | ||||||
|  |  | ||||||
|     Ok(()) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> { | fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> { | ||||||
|     use heed::types::DecodeIgnore; |     use heed::types::DecodeIgnore; | ||||||
|     use milli::{DocumentId, BEU32StrCodec}; |     use milli::{DocumentId, BEU32StrCodec}; | ||||||
| @@ -703,32 +729,42 @@ fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> any | |||||||
|     Ok(()) |     Ok(()) | ||||||
| } | } | ||||||
|  |  | ||||||
| fn size_of_database(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Result<()> { | fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec<String>) -> anyhow::Result<()> { | ||||||
|     use heed::types::ByteSlice; |     use heed::types::ByteSlice; | ||||||
|  |  | ||||||
|     let database = match name { |     let names = if names.is_empty() { | ||||||
|         MAIN_DB_NAME => &index.main, |         ALL_DATABASE_NAMES.iter().map(|s| s.to_string()).collect() | ||||||
|         WORD_PREFIX_DOCIDS_DB_NAME => index.word_prefix_docids.as_polymorph(), |     } else { | ||||||
|         WORD_DOCIDS_DB_NAME => index.word_docids.as_polymorph(), |         names | ||||||
|         DOCID_WORD_POSITIONS_DB_NAME => index.docid_word_positions.as_polymorph(), |  | ||||||
|         WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME => index.word_pair_proximity_docids.as_polymorph(), |  | ||||||
|         WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME => index.word_prefix_pair_proximity_docids.as_polymorph(), |  | ||||||
|         DOCUMENTS_DB_NAME => index.documents.as_polymorph(), |  | ||||||
|         unknown => anyhow::bail!("unknown database {:?}", unknown), |  | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     let mut key_size: u64 = 0; |     for name in names { | ||||||
|     let mut val_size: u64 = 0; |         let database = match name.as_str() { | ||||||
|     for result in database.iter::<_, ByteSlice, ByteSlice>(rtxn)? { |             MAIN_DB_NAME => &index.main, | ||||||
|         let (k, v) = result?; |             WORD_PREFIX_DOCIDS_DB_NAME => index.word_prefix_docids.as_polymorph(), | ||||||
|         key_size += k.len() as u64; |             WORD_DOCIDS_DB_NAME => index.word_docids.as_polymorph(), | ||||||
|         val_size += v.len() as u64; |             DOCID_WORD_POSITIONS_DB_NAME => index.docid_word_positions.as_polymorph(), | ||||||
|     } |             WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME => index.word_pair_proximity_docids.as_polymorph(), | ||||||
|  |             WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME => index.word_prefix_pair_proximity_docids.as_polymorph(), | ||||||
|  |             FACET_FIELD_ID_VALUE_DOCIDS_NAME => index.facet_field_id_value_docids.as_polymorph(), | ||||||
|  |             FIELD_ID_DOCID_FACET_VALUES_NAME => index.field_id_docid_facet_values.as_polymorph(), | ||||||
|  |             DOCUMENTS_DB_NAME => index.documents.as_polymorph(), | ||||||
|  |             unknown => anyhow::bail!("unknown database {:?}", unknown), | ||||||
|  |         }; | ||||||
|  |  | ||||||
|     println!("The {} database weigh:", name); |         let mut key_size: u64 = 0; | ||||||
|     println!("\ttotal key size: {} bytes", key_size); |         let mut val_size: u64 = 0; | ||||||
|     println!("\ttotal val size: {} bytes", val_size); |         for result in database.iter::<_, ByteSlice, ByteSlice>(rtxn)? { | ||||||
|     println!("\ttotal size: {} bytes", key_size + val_size); |             let (k, v) = result?; | ||||||
|  |             key_size += k.len() as u64; | ||||||
|  |             val_size += v.len() as u64; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         println!("The {} database weigh:", name); | ||||||
|  |         println!("\ttotal key size: {}", Byte::from(key_size).get_appropriate_unit(true)); | ||||||
|  |         println!("\ttotal val size: {}", Byte::from(val_size).get_appropriate_unit(true)); | ||||||
|  |         println!("\ttotal size: {}", Byte::from(key_size + val_size).get_appropriate_unit(true)); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     Ok(()) |     Ok(()) | ||||||
| } | } | ||||||
| @@ -782,9 +818,9 @@ fn database_stats(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Resu | |||||||
|         println!("\tminimum: {}", minimum); |         println!("\tminimum: {}", minimum); | ||||||
|         println!("\tmaximum: {}", maximum); |         println!("\tmaximum: {}", maximum); | ||||||
|         println!("\taverage: {}", sum as f64 / count as f64); |         println!("\taverage: {}", sum as f64 / count as f64); | ||||||
|         println!("\ttotal key size: {} bytes", key_size); |         println!("\ttotal key size: {}", Byte::from(key_size).get_appropriate_unit(true)); | ||||||
|         println!("\ttotal val size: {} bytes", val_size); |         println!("\ttotal val size: {}", Byte::from(val_size).get_appropriate_unit(true)); | ||||||
|         println!("\ttotal size: {} bytes", key_size + val_size); |         println!("\ttotal size: {}", Byte::from(key_size + val_size).get_appropriate_unit(true)); | ||||||
|  |  | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user