Introduce an infos command to extract the words prefixes fst

This commit is contained in:
Kerollmops
2021-02-10 12:18:56 +01:00
committed by Clément Renault
parent a4a48be923
commit 7a0f86a04f

View File

@@ -153,6 +153,12 @@ enum Command {
/// you can install it using `cargo install fst-bin`. /// you can install it using `cargo install fst-bin`.
ExportWordsFst, ExportWordsFst,
/// Outputs the words prefix FST to standard output.
///
/// One can use the FST binary helper to dissect and analyze it,
/// you can install it using `cargo install fst-bin`.
ExportWordsPrefixFst,
/// Outputs the documents as JSON lines to the standard output. /// Outputs the documents as JSON lines to the standard output.
/// ///
/// All of the fields are extracted, not just the displayed ones. /// All of the fields are extracted, not just the displayed ones.
@@ -207,6 +213,7 @@ fn run(opt: Opt) -> anyhow::Result<()> {
word_pair_proximities_docids(&index, &rtxn, !full_display, word1, word2) word_pair_proximities_docids(&index, &rtxn, !full_display, word1, word2)
}, },
ExportWordsFst => export_words_fst(&index, &rtxn), ExportWordsFst => export_words_fst(&index, &rtxn),
ExportWordsPrefixFst => export_words_prefix_fst(&index, &rtxn),
ExportDocuments => export_documents(&index, &rtxn), ExportDocuments => export_documents(&index, &rtxn),
PatchToNewExternalIds => { PatchToNewExternalIds => {
drop(rtxn); drop(rtxn);
@@ -548,6 +555,16 @@ fn export_words_fst(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
Ok(()) Ok(())
} }
fn export_words_prefix_fst(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
use std::io::Write as _;
let mut stdout = io::stdout();
let words_prefixes_fst = index.words_prefixes_fst(rtxn)?;
stdout.write_all(words_prefixes_fst.as_fst().as_bytes())?;
Ok(())
}
fn export_documents(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> { fn export_documents(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
use std::io::{BufWriter, Write as _}; use std::io::{BufWriter, Write as _};
use milli::obkv_to_json; use milli::obkv_to_json;