mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-18 20:30:47 +00:00
Compare commits
5 Commits
binary-qua
...
prototype-
Author | SHA1 | Date | |
---|---|---|---|
00caf25ea9 | |||
dc2546af21 | |||
077797b50a | |||
cde36923c5 | |||
01bacf500e |
178
meilisearch/src/routes/indexes/facet_search.rs
Normal file
178
meilisearch/src/routes/indexes/facet_search.rs
Normal file
@ -0,0 +1,178 @@
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::facet;
|
||||
use meilisearch_types::serde_cs::vec::CS;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::analytics::{Analytics, SearchAggregator};
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(search)));
|
||||
}
|
||||
|
||||
// #[derive(Debug, deserr::Deserr)]
|
||||
// #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
// pub struct FacetSearchQuery {
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchQ>)]
|
||||
// facetQuery: Option<String>,
|
||||
// #[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError<InvalidSearchOffset>)]
|
||||
// offset: Param<usize>,
|
||||
// #[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError<InvalidSearchLimit>)]
|
||||
// limit: Param<usize>,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchPage>)]
|
||||
// page: Option<Param<usize>>,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchHitsPerPage>)]
|
||||
// hits_per_page: Option<Param<usize>>,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToRetrieve>)]
|
||||
// attributes_to_retrieve: Option<CS<String>>,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToCrop>)]
|
||||
// attributes_to_crop: Option<CS<String>>,
|
||||
// #[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError<InvalidSearchCropLength>)]
|
||||
// crop_length: Param<usize>,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToHighlight>)]
|
||||
// attributes_to_highlight: Option<CS<String>>,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchFilter>)]
|
||||
// filter: Option<String>,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
|
||||
// sort: Option<String>,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
|
||||
// show_matches_position: Param<bool>,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchFacets>)]
|
||||
// facets: Option<CS<String>>,
|
||||
// #[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPreTag>)]
|
||||
// highlight_pre_tag: String,
|
||||
// #[deserr( default = DEFAULT_HIGHLIGHT_POST_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPostTag>)]
|
||||
// highlight_post_tag: String,
|
||||
// #[deserr(default = DEFAULT_CROP_MARKER(), error = DeserrQueryParamError<InvalidSearchCropMarker>)]
|
||||
// crop_marker: String,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchMatchingStrategy>)]
|
||||
// matching_strategy: MatchingStrategy,
|
||||
// }
|
||||
|
||||
// TODO improve the error messages
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq, deserr::Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct FacetSearchQuery {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub facet_query: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub facet_name: String,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub q: Option<String>,
|
||||
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||
pub offset: usize,
|
||||
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||
pub limit: usize,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
|
||||
pub page: Option<usize>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
|
||||
pub hits_per_page: Option<usize>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
|
||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
|
||||
pub attributes_to_crop: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
|
||||
pub crop_length: usize,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
|
||||
pub attributes_to_highlight: Option<HashSet<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
|
||||
pub show_matches_position: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||
pub filter: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
|
||||
pub sort: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
|
||||
pub facets: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
|
||||
pub highlight_pre_tag: String,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPostTag>, default = DEFAULT_HIGHLIGHT_POST_TAG())]
|
||||
pub highlight_post_tag: String,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchCropMarker>, default = DEFAULT_CROP_MARKER())]
|
||||
pub crop_marker: String,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
|
||||
pub matching_strategy: MatchingStrategy,
|
||||
}
|
||||
|
||||
pub async fn search(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebJson<FacetSearchQuery, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let query = params.into_inner();
|
||||
debug!("facet search called with params: {:?}", query);
|
||||
|
||||
let facet_query = query.facet_query.clone();
|
||||
let facet_name = query.facet_name.clone();
|
||||
let mut search_query = SearchQuery::from(query);
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
add_search_rules(&mut search_query, search_rules);
|
||||
}
|
||||
|
||||
// TODO log stuff
|
||||
// let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_facet_search(&index, search_query, facet_query, facet_name)
|
||||
})
|
||||
.await?;
|
||||
|
||||
// TODO log stuff
|
||||
// if let Ok(ref search_result) = search_result {
|
||||
// aggregate.succeed(search_result);
|
||||
// }
|
||||
// TODO analytics
|
||||
// analytics.post_search(aggregate);
|
||||
|
||||
let search_result = search_result?;
|
||||
|
||||
debug!("returns: {:?}", search_result);
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
impl From<FacetSearchQuery> for SearchQuery {
|
||||
fn from(value: FacetSearchQuery) -> Self {
|
||||
SearchQuery {
|
||||
q: value.q,
|
||||
offset: value.offset,
|
||||
limit: value.limit,
|
||||
page: value.page,
|
||||
hits_per_page: value.hits_per_page,
|
||||
attributes_to_retrieve: value.attributes_to_retrieve,
|
||||
attributes_to_crop: value.attributes_to_crop,
|
||||
crop_length: value.crop_length,
|
||||
attributes_to_highlight: value.attributes_to_highlight,
|
||||
show_matches_position: value.show_matches_position,
|
||||
filter: value.filter,
|
||||
sort: value.sort,
|
||||
facets: value.facets,
|
||||
highlight_pre_tag: value.highlight_pre_tag,
|
||||
highlight_post_tag: value.highlight_post_tag,
|
||||
crop_marker: value.crop_marker,
|
||||
matching_strategy: value.matching_strategy,
|
||||
}
|
||||
}
|
||||
}
|
@ -24,6 +24,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
|
||||
pub mod documents;
|
||||
pub mod facet_search;
|
||||
pub mod search;
|
||||
pub mod settings;
|
||||
|
||||
@ -44,6 +45,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
|
||||
.service(web::scope("/documents").configure(documents::configure))
|
||||
.service(web::scope("/search").configure(search::configure))
|
||||
.service(web::scope("/facet-search").configure(facet_search::configure))
|
||||
.service(web::scope("/settings").configure(settings::configure)),
|
||||
);
|
||||
}
|
||||
|
@ -3,12 +3,15 @@ use std::collections::{BTreeMap, BTreeSet, HashSet};
|
||||
use std::str::FromStr;
|
||||
use std::time::Instant;
|
||||
|
||||
use actix_http::header::q;
|
||||
use deserr::Deserr;
|
||||
use either::Either;
|
||||
use meilisearch_auth::IndexSearchRules;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::{facet, FacetSearchResult, SearchForFacetValue};
|
||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
use meilisearch_types::{milli, Document};
|
||||
use milli::tokenizer::TokenizerBuilder;
|
||||
@ -21,6 +24,7 @@ use serde::Serialize;
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::routes::indexes::facet_search::FacetSearchQuery;
|
||||
|
||||
type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
|
||||
|
||||
@ -170,7 +174,7 @@ impl SearchQueryWithIndex {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)]
|
||||
#[deserr(rename_all = camelCase)]
|
||||
pub enum MatchingStrategy {
|
||||
/// Remove query words from last to first
|
||||
@ -261,13 +265,11 @@ pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn perform_search(
|
||||
index: &Index,
|
||||
query: SearchQuery,
|
||||
) -> Result<SearchResult, MeilisearchHttpError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
fn prepare_search<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t RoTxn,
|
||||
query: &'t SearchQuery,
|
||||
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
|
||||
let mut search = index.search(&rtxn);
|
||||
|
||||
if let Some(ref query) = query.q {
|
||||
@ -320,6 +322,19 @@ pub fn perform_search(
|
||||
search.sort_criteria(sort);
|
||||
}
|
||||
|
||||
Ok((search, is_finite_pagination, max_total_hits, offset))
|
||||
}
|
||||
|
||||
pub fn perform_search(
|
||||
index: &Index,
|
||||
query: SearchQuery,
|
||||
) -> Result<SearchResult, MeilisearchHttpError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let (search, is_finite_pagination, max_total_hits, offset) =
|
||||
prepare_search(index, &rtxn, &query)?;
|
||||
|
||||
let milli::SearchResult { documents_ids, matching_words, candidates, .. } = search.execute()?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
@ -473,6 +488,24 @@ pub fn perform_search(
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn perform_facet_search(
|
||||
index: &Index,
|
||||
search_query: SearchQuery,
|
||||
mut facet_query: Option<String>,
|
||||
facet_name: String,
|
||||
) -> Result<Vec<FacetSearchResult>, MeilisearchHttpError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query)?;
|
||||
let mut facet_search = SearchForFacetValue::new(facet_name, search);
|
||||
if let Some(facet_query) = facet_query.take() {
|
||||
facet_search.query(facet_query);
|
||||
}
|
||||
|
||||
facet_search.execute().map_err(Into::into)
|
||||
}
|
||||
|
||||
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
|
||||
lazy_static::lazy_static! {
|
||||
static ref GEO_REGEX: Regex =
|
||||
|
23
milli/src/heed_codec/fst_set_codec.rs
Normal file
23
milli/src/heed_codec/fst_set_codec.rs
Normal file
@ -0,0 +1,23 @@
|
||||
use fst::Set;
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
|
||||
/// A codec for values of type `Set<&[u8]>`.
|
||||
pub struct FstSetCodec;
|
||||
|
||||
impl<'a> BytesEncode<'a> for FstSetCodec {
|
||||
type EItem = Set<Vec<u8>>;
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
Some(Cow::Borrowed(item.as_fst().as_bytes()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for FstSetCodec {
|
||||
type DItem = Set<&'a [u8]>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
Some(Set::new(bytes).ok()?)
|
||||
}
|
||||
}
|
@ -2,6 +2,7 @@ mod beu32_str_codec;
|
||||
mod byte_slice_ref;
|
||||
pub mod facet;
|
||||
mod field_id_word_count_codec;
|
||||
mod fst_set_codec;
|
||||
mod obkv_codec;
|
||||
mod roaring_bitmap;
|
||||
mod roaring_bitmap_length;
|
||||
@ -15,6 +16,7 @@ pub use str_ref::StrRefCodec;
|
||||
|
||||
pub use self::beu32_str_codec::BEU32StrCodec;
|
||||
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
|
||||
pub use self::fst_set_codec::FstSetCodec;
|
||||
pub use self::obkv_codec::ObkvCodec;
|
||||
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
|
||||
pub use self::roaring_bitmap_length::{
|
||||
|
@ -19,7 +19,7 @@ use crate::heed_codec::facet::{
|
||||
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||
FieldIdCodec, OrderedF64Codec,
|
||||
};
|
||||
use crate::heed_codec::{ScriptLanguageCodec, StrRefCodec};
|
||||
use crate::heed_codec::{FstSetCodec, ScriptLanguageCodec, StrRefCodec};
|
||||
use crate::{
|
||||
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
||||
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
||||
@ -81,6 +81,7 @@ pub mod db_name {
|
||||
pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids";
|
||||
pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids";
|
||||
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
|
||||
pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst";
|
||||
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
|
||||
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
||||
pub const DOCUMENTS: &str = "documents";
|
||||
@ -134,6 +135,8 @@ pub struct Index {
|
||||
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||
/// Maps the facet field id and ranges of strings with the docids that corresponds to them.
|
||||
pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
|
||||
/// Maps the facet field id of the string facets with an FST containing all the facets values.
|
||||
pub facet_id_string_fst: Database<OwnedType<BEU16>, FstSetCodec>,
|
||||
|
||||
/// Maps the document id, the facet field id and the numbers.
|
||||
pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
|
||||
@ -153,7 +156,7 @@ impl Index {
|
||||
) -> Result<Index> {
|
||||
use db_name::*;
|
||||
|
||||
options.max_dbs(19);
|
||||
options.max_dbs(20);
|
||||
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
|
||||
|
||||
let env = options.open(path)?;
|
||||
@ -174,6 +177,7 @@ impl Index {
|
||||
let word_prefix_position_docids = env.create_database(Some(WORD_PREFIX_POSITION_DOCIDS))?;
|
||||
let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?;
|
||||
let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?;
|
||||
let facet_id_string_fst = env.create_database(Some(FACET_ID_STRING_FST))?;
|
||||
let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?;
|
||||
|
||||
let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?;
|
||||
@ -200,6 +204,7 @@ impl Index {
|
||||
field_id_word_count_docids,
|
||||
facet_id_f64_docids,
|
||||
facet_id_string_docids,
|
||||
facet_id_string_fst,
|
||||
facet_id_exists_docids,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
|
@ -43,9 +43,9 @@ pub use self::heed_codec::{
|
||||
};
|
||||
pub use self::index::Index;
|
||||
pub use self::search::{
|
||||
CriterionImplementationStrategy, FacetDistribution, Filter, FormatOptions, MatchBounds,
|
||||
MatcherBuilder, MatchingWord, MatchingWords, Search, SearchResult, TermsMatchingStrategy,
|
||||
DEFAULT_VALUES_PER_FACET,
|
||||
CriterionImplementationStrategy, FacetDistribution, FacetSearchResult, Filter, FormatOptions,
|
||||
MatchBounds, MatcherBuilder, MatchingWord, MatchingWords, Search, SearchForFacetValue,
|
||||
SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||
};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||
|
@ -22,15 +22,20 @@ pub use self::matches::{
|
||||
};
|
||||
use self::query_tree::QueryTreeBuilder;
|
||||
use crate::error::UserError;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
||||
use crate::index::db_name::EXACT_WORD_DOCIDS;
|
||||
use crate::search::criteria::r#final::{Final, FinalResult};
|
||||
use crate::search::criteria::InitialCandidates;
|
||||
use crate::{AscDesc, Criterion, DocumentId, Index, Member, Result};
|
||||
use crate::{fields_ids_map, AscDesc, Criterion, DocumentId, Index, Member, Result, BEU16};
|
||||
|
||||
// Building these factories is not free.
|
||||
static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
|
||||
static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
|
||||
static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
|
||||
|
||||
/// The maximum number of facets returned by the facet search route.
|
||||
const MAX_NUMBER_OF_FACETS: usize = 1000;
|
||||
|
||||
mod criteria;
|
||||
mod distinct;
|
||||
pub mod facet;
|
||||
@ -446,6 +451,114 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SearchForFacetValue<'a> {
|
||||
query: Option<String>,
|
||||
facet: String,
|
||||
search_query: Search<'a>,
|
||||
}
|
||||
|
||||
impl<'a> SearchForFacetValue<'a> {
|
||||
pub fn new(facet: String, search_query: Search<'a>) -> SearchForFacetValue<'a> {
|
||||
SearchForFacetValue { query: None, facet, search_query }
|
||||
}
|
||||
|
||||
pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
|
||||
self.query = Some(query.into());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn execute(&self) -> Result<Vec<FacetSearchResult>> {
|
||||
let index = self.search_query.index;
|
||||
let rtxn = self.search_query.rtxn;
|
||||
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
if !filterable_fields.contains(&self.facet) {
|
||||
// TODO create a new type of error
|
||||
return Err(UserError::InvalidSortableAttribute {
|
||||
field: self.facet.clone(),
|
||||
valid_fields: filterable_fields.into_iter().collect(),
|
||||
})?;
|
||||
}
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let (field_id, fst) = match fields_ids_map.id(&self.facet) {
|
||||
Some(fid) => {
|
||||
match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? {
|
||||
Some(fst) => (fid, fst),
|
||||
None => todo!("return an error, is the user trying to search in numbers?"),
|
||||
}
|
||||
}
|
||||
None => todo!("return an internal error bug"),
|
||||
};
|
||||
|
||||
let search_candidates = self.search_query.execute()?.candidates;
|
||||
|
||||
match self.query.as_ref() {
|
||||
Some(query) => {
|
||||
let is_prefix = true;
|
||||
let starts = StartsWith(Str::new(get_first(query)));
|
||||
let first = Intersection(build_dfa(query, 1, is_prefix), Complement(&starts));
|
||||
let second_dfa = build_dfa(query, 2, is_prefix);
|
||||
let second = Intersection(&second_dfa, &starts);
|
||||
let automaton = Union(first, &second);
|
||||
|
||||
let mut stream = fst.search(automaton).into_stream();
|
||||
let mut result = vec![];
|
||||
let mut length = 0;
|
||||
while let Some(facet_value) = stream.next() {
|
||||
let value = std::str::from_utf8(facet_value)?;
|
||||
let key = FacetGroupKey { field_id, level: 0, left_bound: value };
|
||||
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
|
||||
Some(FacetGroupValue { bitmap, .. }) => bitmap,
|
||||
None => todo!("return an internal error"),
|
||||
};
|
||||
let count = search_candidates.intersection_len(&docids);
|
||||
if count != 0 {
|
||||
result.push(FacetSearchResult { value: value.to_string(), count });
|
||||
length += 1;
|
||||
}
|
||||
if length >= MAX_NUMBER_OF_FACETS {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
None => {
|
||||
let mut stream = fst.stream();
|
||||
let mut result = vec![];
|
||||
let mut length = 0;
|
||||
while let Some(facet_value) = stream.next() {
|
||||
let value = std::str::from_utf8(facet_value)?;
|
||||
let key = FacetGroupKey { field_id, level: 0, left_bound: value };
|
||||
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
|
||||
Some(FacetGroupValue { bitmap, .. }) => bitmap,
|
||||
None => todo!("return an internal error"),
|
||||
};
|
||||
let count = search_candidates.intersection_len(&docids);
|
||||
if count != 0 {
|
||||
result.push(FacetSearchResult { value: value.to_string(), count });
|
||||
length += 1;
|
||||
}
|
||||
if length >= MAX_NUMBER_OF_FACETS {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Serialize)]
|
||||
pub struct FacetSearchResult {
|
||||
/// The original facet value
|
||||
pub value: String,
|
||||
/// The number of documents associated to this facet
|
||||
pub count: u64,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
@ -33,6 +33,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
||||
script_language_docids,
|
||||
facet_id_f64_docids,
|
||||
facet_id_string_docids,
|
||||
facet_id_string_fst: _,
|
||||
facet_id_exists_docids,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
|
@ -241,6 +241,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
word_prefix_position_docids,
|
||||
facet_id_f64_docids: _,
|
||||
facet_id_string_docids: _,
|
||||
facet_id_string_fst: _,
|
||||
field_id_docid_facet_f64s: _,
|
||||
field_id_docid_facet_strings: _,
|
||||
script_language_docids,
|
||||
|
@ -78,15 +78,16 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
|
||||
|
||||
use std::fs::File;
|
||||
|
||||
use heed::types::DecodeIgnore;
|
||||
use log::debug;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use self::incremental::FacetsUpdateIncremental;
|
||||
use super::FacetsUpdateBulk;
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::{Index, Result};
|
||||
use crate::{Index, Result, BEU16};
|
||||
|
||||
pub mod bulk;
|
||||
pub mod delete;
|
||||
@ -157,6 +158,43 @@ impl<'i> FacetsUpdate<'i> {
|
||||
);
|
||||
incremental_update.execute(wtxn)?;
|
||||
}
|
||||
|
||||
// We compute one FST by string facet
|
||||
let mut text_fsts = vec![];
|
||||
let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None;
|
||||
let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
|
||||
for result in database.iter(&wtxn)? {
|
||||
let (facet_group_key, _) = result?;
|
||||
if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
|
||||
current_fst = match current_fst.take() {
|
||||
Some((fid, fst_builder)) if fid != field_id => {
|
||||
let fst = fst_builder.into_set();
|
||||
text_fsts.push((field_id, fst));
|
||||
Some((field_id, fst::SetBuilder::memory()))
|
||||
}
|
||||
Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
|
||||
None => Some((field_id, fst::SetBuilder::memory())),
|
||||
};
|
||||
|
||||
if let Some((_, fst_builder)) = current_fst.as_mut() {
|
||||
fst_builder.insert(left_bound)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some((field_id, fst_builder)) = current_fst {
|
||||
let fst = fst_builder.into_set();
|
||||
text_fsts.push((field_id, fst));
|
||||
}
|
||||
|
||||
// We remove all of the previous FSTs that were in this database
|
||||
self.index.facet_id_string_fst.clear(wtxn)?;
|
||||
|
||||
// We write those FSTs in LMDB now
|
||||
for (field_id, fst) in text_fsts {
|
||||
self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -1 +0,0 @@
|
||||
|
Reference in New Issue
Block a user