Compare commits

...

20 Commits

Author SHA1 Message Date
25d49f5811 Use the minWordSizeForTypos index settings 2023-06-06 10:48:43 +02:00
e9af506591 Format the code 2023-06-06 10:48:43 +02:00
6ee4f4b544 Fix compilation issues 2023-06-06 10:48:42 +02:00
e92576e0d4 Simplify the placeholder search of the facet-search route 2023-06-06 10:48:08 +02:00
7e1a49e7fa Use the disableOnAttributes parameter on the facet-search route 2023-06-06 10:48:08 +02:00
17e86e9c42 Use the disableOnWords parameter on the facet-search route 2023-06-06 10:48:08 +02:00
f4f5ae70d6 Support the typoTolerant.enabled parameter 2023-06-06 10:48:08 +02:00
edf3031dae Log an error when a facet value is missing from the database 2023-06-06 10:48:08 +02:00
09d440a427 Rename the SearchForFacetValues struct 2023-06-06 10:48:08 +02:00
8b66318a6b Return an internal error when a field id is missing 2023-06-06 10:48:08 +02:00
196a2b3d58 Make clippy happy 2023-06-06 10:48:07 +02:00
c153cbc593 Improve the returned errors from the facet-search route 2023-06-06 10:48:07 +02:00
e731f1c8ba Fix the max number of facets to be returned to 100 2023-06-06 10:48:07 +02:00
c39d830ff8 Return the correct response JSON object from the facet-search route 2023-06-06 10:48:07 +02:00
2dca4d82d8 Send analytics about the facet-search route 2023-06-06 10:48:07 +02:00
ce87ee8ea0 Make the search for facet work 2023-06-06 10:37:27 +02:00
f06bb445a6 Introduce the facet search route 2023-06-06 10:37:26 +02:00
81792eb5f7 Restrict the number of facet search results to 1000 2023-06-06 10:37:26 +02:00
7a49bbc8df Introduce the SearchForFacetValue struct 2023-06-06 10:37:26 +02:00
ca16aaaa30 Store the facet string values in multiple FSTs 2023-06-06 10:37:26 +02:00
17 changed files with 677 additions and 51 deletions

View File

@ -239,8 +239,11 @@ InvalidSearchMatchingStrategy , InvalidRequest , BAD_REQUEST ;
InvalidSearchOffset , InvalidRequest , BAD_REQUEST ;
InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchQuery , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
InvalidSearchFacet , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
@ -330,6 +333,7 @@ impl ErrorCode for milli::Error {
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
UserError::InvalidSearchFacet { .. } => Code::InvalidSearchFacet,
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
UserError::SortError(_) => Code::InvalidSearchSort,

View File

@ -38,6 +38,18 @@ impl MultiSearchAggregator {
pub fn succeed(&mut self) {}
}
#[derive(Default)]
pub struct FacetSearchAggregator;
#[allow(dead_code)]
impl FacetSearchAggregator {
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
Self::default()
}
pub fn succeed(&mut self, _: &dyn Any) {}
}
impl MockAnalytics {
#[allow(clippy::new_ret_no_self)]
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
@ -56,6 +68,7 @@ impl Analytics for MockAnalytics {
fn get_search(&self, _aggregate: super::SearchAggregator) {}
fn post_search(&self, _aggregate: super::SearchAggregator) {}
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
fn add_documents(
&self,
_documents_query: &UpdateDocumentsQuery,

View File

@ -25,6 +25,8 @@ pub type SegmentAnalytics = mock_analytics::MockAnalytics;
pub type SearchAggregator = mock_analytics::SearchAggregator;
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;
// if we are in release mode and the feature analytics was enabled
// we use the real analytics
@ -34,6 +36,8 @@ pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
pub type SearchAggregator = segment_analytics::SearchAggregator;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
/// The Meilisearch config dir:
/// `~/.config/Meilisearch` on *NIX or *BSD.
@ -88,6 +92,9 @@ pub trait Analytics: Sync + Send {
/// This method should be called to aggregate a post array of searches
fn post_multi_search(&self, aggregate: MultiSearchAggregator);
/// This method should be called to aggregate post facet values searches
fn post_facet_search(&self, aggregate: FacetSearchAggregator);
// this method should be called to aggregate a add documents request
fn add_documents(
&self,

View File

@ -1,5 +1,6 @@
use std::collections::{BinaryHeap, HashMap, HashSet};
use std::fs;
use std::mem::take;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{Duration, Instant};
@ -29,11 +30,13 @@ use super::{
use crate::analytics::Analytics;
use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, ScheduleSnapshot};
use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::indexes::facet_search::FacetSearchQuery;
use crate::routes::tasks::TasksFilterQuery;
use crate::routes::{create_all_stats, Stats};
use crate::search::{
SearchQuery, SearchQueryWithIndex, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
};
use crate::Opt;
@ -71,6 +74,7 @@ pub enum AnalyticsMsg {
AggregateGetSearch(SearchAggregator),
AggregatePostSearch(SearchAggregator),
AggregatePostMultiSearch(MultiSearchAggregator),
AggregatePostFacetSearch(FacetSearchAggregator),
AggregateAddDocuments(DocumentsAggregator),
AggregateDeleteDocuments(DocumentsDeletionAggregator),
AggregateUpdateDocuments(DocumentsAggregator),
@ -139,6 +143,7 @@ impl SegmentAnalytics {
batcher,
post_search_aggregator: SearchAggregator::default(),
post_multi_search_aggregator: MultiSearchAggregator::default(),
post_facet_search_aggregator: FacetSearchAggregator::default(),
get_search_aggregator: SearchAggregator::default(),
add_documents_aggregator: DocumentsAggregator::default(),
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
@ -182,6 +187,10 @@ impl super::Analytics for SegmentAnalytics {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
}
fn post_facet_search(&self, aggregate: FacetSearchAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate));
}
fn post_multi_search(&self, aggregate: MultiSearchAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate));
}
@ -354,6 +363,7 @@ pub struct Segment {
get_search_aggregator: SearchAggregator,
post_search_aggregator: SearchAggregator,
post_multi_search_aggregator: MultiSearchAggregator,
post_facet_search_aggregator: FacetSearchAggregator,
add_documents_aggregator: DocumentsAggregator,
delete_documents_aggregator: DocumentsDeletionAggregator,
update_documents_aggregator: DocumentsAggregator,
@ -418,6 +428,7 @@ impl Segment {
Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostFacetSearch(agreg)) => self.post_facet_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
@ -461,55 +472,72 @@ impl Segment {
})
.await;
}
let get_search = std::mem::take(&mut self.get_search_aggregator)
.into_event(&self.user, "Documents Searched GET");
let post_search = std::mem::take(&mut self.post_search_aggregator)
.into_event(&self.user, "Documents Searched POST");
let post_multi_search = std::mem::take(&mut self.post_multi_search_aggregator)
.into_event(&self.user, "Documents Searched by Multi-Search POST");
let add_documents = std::mem::take(&mut self.add_documents_aggregator)
.into_event(&self.user, "Documents Added");
let delete_documents = std::mem::take(&mut self.delete_documents_aggregator)
.into_event(&self.user, "Documents Deleted");
let update_documents = std::mem::take(&mut self.update_documents_aggregator)
.into_event(&self.user, "Documents Updated");
let get_fetch_documents = std::mem::take(&mut self.get_fetch_documents_aggregator)
.into_event(&self.user, "Documents Fetched GET");
let post_fetch_documents = std::mem::take(&mut self.post_fetch_documents_aggregator)
.into_event(&self.user, "Documents Fetched POST");
let get_tasks =
std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen");
let health =
std::mem::take(&mut self.health_aggregator).into_event(&self.user, "Health Seen");
if let Some(get_search) = get_search {
let Segment {
inbox: _,
opt: _,
batcher: _,
user,
get_search_aggregator,
post_search_aggregator,
post_multi_search_aggregator,
post_facet_search_aggregator,
add_documents_aggregator,
delete_documents_aggregator,
update_documents_aggregator,
get_fetch_documents_aggregator,
post_fetch_documents_aggregator,
get_tasks_aggregator,
health_aggregator,
} = self;
if let Some(get_search) =
take(get_search_aggregator).into_event(&user, "Documents Searched GET")
{
let _ = self.batcher.push(get_search).await;
}
if let Some(post_search) = post_search {
if let Some(post_search) =
take(post_search_aggregator).into_event(&user, "Documents Searched POST")
{
let _ = self.batcher.push(post_search).await;
}
if let Some(post_multi_search) = post_multi_search {
if let Some(post_multi_search) = take(post_multi_search_aggregator)
.into_event(&user, "Documents Searched by Multi-Search POST")
{
let _ = self.batcher.push(post_multi_search).await;
}
if let Some(add_documents) = add_documents {
if let Some(post_facet_search) = take(post_facet_search_aggregator)
.into_event(&user, "Documents Searched by Facet-Search POST")
{
let _ = self.batcher.push(post_facet_search).await;
}
if let Some(add_documents) =
take(add_documents_aggregator).into_event(&user, "Documents Added")
{
let _ = self.batcher.push(add_documents).await;
}
if let Some(delete_documents) = delete_documents {
if let Some(delete_documents) =
take(delete_documents_aggregator).into_event(&user, "Documents Deleted")
{
let _ = self.batcher.push(delete_documents).await;
}
if let Some(update_documents) = update_documents {
if let Some(update_documents) =
take(update_documents_aggregator).into_event(&user, "Documents Updated")
{
let _ = self.batcher.push(update_documents).await;
}
if let Some(get_fetch_documents) = get_fetch_documents {
if let Some(get_fetch_documents) =
take(get_fetch_documents_aggregator).into_event(&user, "Documents Fetched GET") {
let _ = self.batcher.push(get_fetch_documents).await;
}
if let Some(post_fetch_documents) = post_fetch_documents {
if let Some(post_fetch_documents) =
take(post_fetch_documents_aggregator).into_event(&user, "Documents Fetched POST") {
let _ = self.batcher.push(post_fetch_documents).await;
}
if let Some(get_tasks) = get_tasks {
if let Some(get_tasks) = take(get_tasks_aggregator).into_event(&user, "Tasks Seen") {
let _ = self.batcher.push(get_tasks).await;
}
if let Some(health) = health {
if let Some(health) = take(health_aggregator).into_event(&user, "Health Seen") {
let _ = self.batcher.push(health).await;
}
let _ = self.batcher.flush().await;
@ -886,6 +914,144 @@ impl MultiSearchAggregator {
}
}
#[derive(Default)]
pub struct FacetSearchAggregator {
timestamp: Option<OffsetDateTime>,
// context
user_agents: HashSet<String>,
// requests
total_received: usize,
total_succeeded: usize,
time_spent: BinaryHeap<usize>,
// The set of all facetNames that were used
facet_names: HashSet<String>,
// As there been any other parameter than the facetName or facetQuery ones?
additional_search_parameters_provided: bool,
}
impl FacetSearchAggregator {
pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self {
let FacetSearchQuery {
facet_query: _,
facet_name,
q,
offset,
limit,
page,
hits_per_page,
attributes_to_retrieve,
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_matches_position,
filter,
sort,
facets,
highlight_pre_tag,
highlight_post_tag,
crop_marker,
matching_strategy,
} = query;
let mut ret = Self::default();
ret.timestamp = Some(OffsetDateTime::now_utc());
ret.total_received = 1;
ret.user_agents = extract_user_agents(request).into_iter().collect();
ret.facet_names = Some(facet_name.clone()).into_iter().collect();
ret.additional_search_parameters_provided = q.is_some()
|| *offset != DEFAULT_SEARCH_OFFSET()
|| *limit != DEFAULT_SEARCH_LIMIT()
|| page.is_some()
|| hits_per_page.is_some()
|| attributes_to_retrieve.is_some()
|| attributes_to_crop.is_some()
|| *crop_length != DEFAULT_CROP_LENGTH()
|| attributes_to_highlight.is_some()
|| *show_matches_position
|| filter.is_some()
|| sort.is_some()
|| facets.is_some()
|| *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG()
|| *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG()
|| *crop_marker != DEFAULT_CROP_MARKER()
|| *matching_strategy != MatchingStrategy::default();
ret
}
pub fn succeed(&mut self, result: &FacetSearchResult) {
self.total_succeeded = self.total_succeeded.saturating_add(1);
self.time_spent.push(result.processing_time_ms as usize);
}
/// Aggregate one [SearchAggregator] into another.
pub fn aggregate(&mut self, mut other: Self) {
if self.timestamp.is_none() {
self.timestamp = other.timestamp;
}
// context
for user_agent in other.user_agents.into_iter() {
self.user_agents.insert(user_agent);
}
// request
self.total_received = self.total_received.saturating_add(other.total_received);
self.total_succeeded = self.total_succeeded.saturating_add(other.total_succeeded);
self.time_spent.append(&mut other.time_spent);
// facet_names
for facet_name in other.facet_names.into_iter() {
self.facet_names.insert(facet_name);
}
// additional_search_parameters_provided
self.additional_search_parameters_provided = self.additional_search_parameters_provided
| other.additional_search_parameters_provided;
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
if self.total_received == 0 {
None
} else {
// the index of the 99th percentage of value
let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.;
// we get all the values in a sorted manner
let time_spent = self.time_spent.into_sorted_vec();
// We are only interested by the slowest value of the 99th fastest results
let time_spent = time_spent.get(percentile_99th as usize);
let properties = json!({
"user-agent": self.user_agents,
"requests": {
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
"total_succeeded": self.total_succeeded,
"total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics
"total_received": self.total_received,
},
"facets": {
"total_distinct_facet_count": self.facet_names.len(),
},
"additional_search_parameters_provided": self.additional_search_parameters_provided,
});
Some(Track {
timestamp: self.timestamp,
user: user.clone(),
event: event_name.to_string(),
properties,
..Default::default()
})
}
}
}
#[derive(Default)]
pub struct DocumentsAggregator {
timestamp: Option<OffsetDateTime>,

View File

@ -0,0 +1,133 @@
use std::collections::{BTreeSet, HashSet};
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use serde_json::Value;
use crate::analytics::{Analytics, FacetSearchAggregator};
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::search::{
add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(search)));
}
// TODO improve the error messages
#[derive(Debug, Clone, Default, PartialEq, Eq, deserr::Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct FacetSearchQuery {
#[deserr(default, error = DeserrJsonError<InvalidFacetSearchQuery>)]
pub facet_query: Option<String>,
#[deserr(error = DeserrJsonError<InvalidFacetSearchName>)]
pub facet_name: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
pub q: Option<String>,
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: usize,
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
pub limit: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
pub page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
pub hits_per_page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
pub attributes_to_retrieve: Option<BTreeSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
pub attributes_to_crop: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
pub crop_length: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
pub attributes_to_highlight: Option<HashSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
pub show_matches_position: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub sort: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
pub facets: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
pub highlight_pre_tag: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPostTag>, default = DEFAULT_HIGHLIGHT_POST_TAG())]
pub highlight_post_tag: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropMarker>, default = DEFAULT_CROP_MARKER())]
pub crop_marker: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
pub matching_strategy: MatchingStrategy,
}
pub async fn search(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebJson<FacetSearchQuery, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.into_inner();
debug!("facet search called with params: {:?}", query);
let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
let facet_query = query.facet_query.clone();
let facet_name = query.facet_name.clone();
let mut search_query = SearchQuery::from(query);
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut search_query, search_rules);
}
let index = index_scheduler.index(&index_uid)?;
let search_result = tokio::task::spawn_blocking(move || {
perform_facet_search(&index, search_query, facet_query, facet_name)
})
.await?;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
}
analytics.post_facet_search(aggregate);
let search_result = search_result?;
debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result))
}
impl From<FacetSearchQuery> for SearchQuery {
fn from(value: FacetSearchQuery) -> Self {
SearchQuery {
q: value.q,
offset: value.offset,
limit: value.limit,
page: value.page,
hits_per_page: value.hits_per_page,
attributes_to_retrieve: value.attributes_to_retrieve,
attributes_to_crop: value.attributes_to_crop,
crop_length: value.crop_length,
attributes_to_highlight: value.attributes_to_highlight,
show_matches_position: value.show_matches_position,
filter: value.filter,
sort: value.sort,
facets: value.facets,
highlight_pre_tag: value.highlight_pre_tag,
highlight_post_tag: value.highlight_post_tag,
crop_marker: value.crop_marker,
matching_strategy: value.matching_strategy,
}
}
}

View File

@ -24,6 +24,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
pub mod documents;
pub mod facet_search;
pub mod search;
pub mod settings;
@ -44,6 +45,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
.service(web::scope("/documents").configure(documents::configure))
.service(web::scope("/search").configure(search::configure))
.service(web::scope("/facet-search").configure(facet_search::configure))
.service(web::scope("/settings").configure(settings::configure)),
);
}

View File

@ -8,7 +8,9 @@ use either::Either;
use meilisearch_auth::IndexSearchRules;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::{FacetValueHit, SearchForFacetValues};
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document};
use milli::tokenizer::TokenizerBuilder;
@ -170,7 +172,7 @@ impl SearchQueryWithIndex {
}
}
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)]
#[deserr(rename_all = camelCase)]
pub enum MatchingStrategy {
/// Remove query words from last to first
@ -241,6 +243,14 @@ pub struct FacetStats {
pub max: f64,
}
#[derive(Serialize, Debug, Clone, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct FacetSearchResult {
pub hits: Vec<FacetValueHit>,
pub query: Option<String>,
pub processing_time_ms: u128,
}
/// Incorporate search rules in search query
pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
query.filter = match (query.filter.take(), rules.filter) {
@ -261,14 +271,12 @@ pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
}
}
pub fn perform_search(
index: &Index,
query: SearchQuery,
) -> Result<SearchResult, MeilisearchHttpError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let mut search = index.search(&rtxn);
fn prepare_search<'t>(
index: &'t Index,
rtxn: &'t RoTxn,
query: &'t SearchQuery,
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
let mut search = index.search(rtxn);
if let Some(ref query) = query.q {
search.query(query);
@ -278,7 +286,7 @@ pub fn perform_search(
search.terms_matching_strategy(query.matching_strategy.into());
let max_total_hits = index
.pagination_max_total_hits(&rtxn)
.pagination_max_total_hits(rtxn)
.map_err(milli::Error::from)?
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
@ -320,6 +328,19 @@ pub fn perform_search(
search.sort_criteria(sort);
}
Ok((search, is_finite_pagination, max_total_hits, offset))
}
pub fn perform_search(
index: &Index,
query: SearchQuery,
) -> Result<SearchResult, MeilisearchHttpError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let (search, is_finite_pagination, max_total_hits, offset) =
prepare_search(index, &rtxn, &query)?;
let milli::SearchResult { documents_ids, matching_words, candidates, .. } = search.execute()?;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@ -473,6 +494,30 @@ pub fn perform_search(
Ok(result)
}
pub fn perform_facet_search(
index: &Index,
search_query: SearchQuery,
facet_query: Option<String>,
facet_name: String,
) -> Result<FacetSearchResult, MeilisearchHttpError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query)?;
let mut facet_search = SearchForFacetValues::new(facet_name, search);
if let Some(facet_query) = &facet_query {
facet_search.query(facet_query);
}
let hits = facet_search.execute()?;
Ok(FacetSearchResult {
hits,
query: facet_query,
processing_time_ms: before_search.elapsed().as_millis(),
})
}
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
lazy_static::lazy_static! {
static ref GEO_REGEX: Regex =

View File

@ -124,6 +124,16 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
}
)]
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
#[error("Attribute `{}` is not filterable. {}",
.field,
match .valid_fields.is_empty() {
true => "This index does not have configured filterable attributes.".to_string(),
false => format!("Available filterable attributes are: `{}`.",
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
),
}
)]
InvalidSearchFacet { field: String, valid_fields: BTreeSet<String> },
#[error("{}", HeedError::BadOpenOptions)]
InvalidLmdbOpenOptions,
#[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]

View File

@ -0,0 +1,23 @@
use std::borrow::Cow;
use fst::Set;
use heed::{BytesDecode, BytesEncode};
/// A codec for values of type `Set<&[u8]>`.
pub struct FstSetCodec;
impl<'a> BytesEncode<'a> for FstSetCodec {
type EItem = Set<Vec<u8>>;
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
Some(Cow::Borrowed(item.as_fst().as_bytes()))
}
}
impl<'a> BytesDecode<'a> for FstSetCodec {
type DItem = Set<&'a [u8]>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
Set::new(bytes).ok()
}
}

View File

@ -2,6 +2,7 @@ mod beu32_str_codec;
mod byte_slice_ref;
pub mod facet;
mod field_id_word_count_codec;
mod fst_set_codec;
mod obkv_codec;
mod roaring_bitmap;
mod roaring_bitmap_length;
@ -15,6 +16,7 @@ pub use str_ref::StrRefCodec;
pub use self::beu32_str_codec::BEU32StrCodec;
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
pub use self::fst_set_codec::FstSetCodec;
pub use self::obkv_codec::ObkvCodec;
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
pub use self::roaring_bitmap_length::{

View File

@ -19,7 +19,7 @@ use crate::heed_codec::facet::{
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
FieldIdCodec, OrderedF64Codec,
};
use crate::heed_codec::{ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
use crate::heed_codec::{FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
use crate::{
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
@ -85,6 +85,7 @@ pub mod db_name {
pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids";
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst";
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
pub const DOCUMENTS: &str = "documents";
@ -147,6 +148,8 @@ pub struct Index {
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
/// Maps the facet field id and ranges of strings with the docids that corresponds to them.
pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
/// Maps the facet field id of the string facets with an FST containing all the facets values.
pub facet_id_string_fst: Database<OwnedType<BEU16>, FstSetCodec>,
/// Maps the document id, the facet field id and the numbers.
pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
@ -166,7 +169,7 @@ impl Index {
) -> Result<Index> {
use db_name::*;
options.max_dbs(23);
options.max_dbs(24);
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
let env = options.open(path)?;
@ -197,13 +200,13 @@ impl Index {
let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?;
let facet_id_string_docids =
env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?;
let facet_id_string_fst = env.create_database(&mut wtxn, Some(FACET_ID_STRING_FST))?;
let facet_id_exists_docids =
env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?;
let facet_id_is_null_docids =
env.create_database(&mut wtxn, Some(FACET_ID_IS_NULL_DOCIDS))?;
let facet_id_is_empty_docids =
env.create_database(&mut wtxn, Some(FACET_ID_IS_EMPTY_DOCIDS))?;
let field_id_docid_facet_f64s =
env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?;
let field_id_docid_facet_strings =
@ -232,6 +235,7 @@ impl Index {
field_id_word_count_docids,
facet_id_f64_docids,
facet_id_string_docids,
facet_id_string_fst,
facet_id_exists_docids,
facet_id_is_null_docids,
facet_id_is_empty_docids,

View File

@ -99,8 +99,9 @@ pub use self::heed_codec::{
};
pub use self::index::Index;
pub use self::search::{
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, Search,
SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder,
MatchingWords, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy,
DEFAULT_VALUES_PER_FACET,
};
pub type Result<T> = std::result::Result<T, error::Error>;

View File

@ -1,14 +1,20 @@
use std::fmt;
use fst::automaton::{Automaton, Str};
use fst::{IntoStreamer, Streamer};
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
use log::error;
use once_cell::sync::Lazy;
use roaring::bitmap::RoaringBitmap;
pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
use self::new::PartialSearchResult;
use crate::error::UserError;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::{
execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext,
execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldIdMapMissingEntry, Index,
Result, SearchContext, BEU16,
};
// Building these factories is not free.
@ -16,6 +22,9 @@ static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
/// The maximum number of facets returned by the facet search route.
const MAX_NUMBER_OF_FACETS: usize = 100;
pub mod facet;
mod fst_utils;
pub mod new;
@ -199,6 +208,174 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {
}
}
pub struct SearchForFacetValues<'a> {
query: Option<String>,
facet: String,
search_query: Search<'a>,
}
impl<'a> SearchForFacetValues<'a> {
pub fn new(facet: String, search_query: Search<'a>) -> SearchForFacetValues<'a> {
SearchForFacetValues { query: None, facet, search_query }
}
pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
self.query = Some(query.into());
self
}
pub fn execute(&self) -> Result<Vec<FacetValueHit>> {
let index = self.search_query.index;
let rtxn = self.search_query.rtxn;
let filterable_fields = index.filterable_fields(rtxn)?;
if !filterable_fields.contains(&self.facet) {
return Err(UserError::InvalidSearchFacet {
field: self.facet.clone(),
valid_fields: filterable_fields.into_iter().collect(),
}
.into());
}
let fields_ids_map = index.fields_ids_map(rtxn)?;
let fid = match fields_ids_map.id(&self.facet) {
Some(fid) => fid,
None => {
return Err(FieldIdMapMissingEntry::FieldName {
field_name: self.facet.clone(),
process: "search for facet values",
}
.into());
}
};
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? {
Some(fst) => fst,
None => return Ok(vec![]),
};
let search_candidates = self.search_query.execute()?.candidates;
match self.query.as_ref() {
Some(query) => {
let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
let field_authorizes_typos =
!self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid);
if authorize_typos && field_authorizes_typos {
let mut results = vec![];
let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
let key =
FacetGroupKey { field_id: fid, level: 0, left_bound: query.as_ref() };
if let Some(FacetGroupValue { bitmap, .. }) =
index.facet_id_string_docids.get(rtxn, &key)?
{
let count = search_candidates.intersection_len(&bitmap);
if count != 0 {
results.push(FacetValueHit { value: query.to_string(), count });
}
}
} else {
let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?;
let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?;
let is_prefix = true;
let automaton = if query.len() < one_typo as usize {
build_dfa(query, 0, is_prefix)
} else if query.len() < two_typos as usize {
build_dfa(query, 1, is_prefix)
} else {
build_dfa(query, 2, is_prefix)
};
let mut stream = fst.search(automaton).into_stream();
let mut length = 0;
while let Some(facet_value) = stream.next() {
let value = std::str::from_utf8(facet_value)?;
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
Some(FacetGroupValue { bitmap, .. }) => bitmap,
None => {
error!(
"the facet value is missing from the facet database: {key:?}"
);
continue;
}
};
let count = search_candidates.intersection_len(&docids);
if count != 0 {
results.push(FacetValueHit { value: value.to_string(), count });
length += 1;
}
if length >= MAX_NUMBER_OF_FACETS {
break;
}
}
}
Ok(results)
} else {
let automaton = Str::new(query).starts_with();
let mut stream = fst.search(automaton).into_stream();
let mut results = vec![];
let mut length = 0;
while let Some(facet_value) = stream.next() {
let value = std::str::from_utf8(facet_value)?;
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
Some(FacetGroupValue { bitmap, .. }) => bitmap,
None => {
error!(
"the facet value is missing from the facet database: {key:?}"
);
continue;
}
};
let count = search_candidates.intersection_len(&docids);
if count != 0 {
results.push(FacetValueHit { value: value.to_string(), count });
length += 1;
}
if length >= MAX_NUMBER_OF_FACETS {
break;
}
}
Ok(results)
}
}
None => {
let mut results = vec![];
let mut length = 0;
let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" };
for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? {
let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) =
result?;
let count = search_candidates.intersection_len(&bitmap);
if count != 0 {
results.push(FacetValueHit { value: left_bound.to_string(), count });
length += 1;
}
if length >= MAX_NUMBER_OF_FACETS {
break;
}
}
Ok(results)
}
}
}
}
#[derive(Debug, Clone, serde::Serialize, PartialEq)]
pub struct FacetValueHit {
/// The original facet value
pub value: String,
/// The number of documents associated to this facet
pub count: u64,
}
#[cfg(test)]
mod test {
#[allow(unused_imports)]

View File

@ -35,6 +35,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
script_language_docids,
facet_id_f64_docids,
facet_id_string_docids,
facet_id_string_fst: _,
facet_id_exists_docids,
facet_id_is_null_docids,
facet_id_is_empty_docids,

View File

@ -243,6 +243,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
word_prefix_fid_docids,
facet_id_f64_docids: _,
facet_id_string_docids: _,
facet_id_string_fst: _,
field_id_docid_facet_f64s: _,
field_id_docid_facet_strings: _,
script_language_docids,

View File

@ -78,15 +78,16 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
use std::fs::File;
use heed::types::DecodeIgnore;
use log::debug;
use time::OffsetDateTime;
use self::incremental::FacetsUpdateIncremental;
use super::FacetsUpdateBulk;
use crate::facet::FacetType;
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec;
use crate::{Index, Result};
use crate::{Index, Result, BEU16};
pub mod bulk;
pub mod delete;
@ -157,6 +158,43 @@ impl<'i> FacetsUpdate<'i> {
);
incremental_update.execute(wtxn)?;
}
// We compute one FST by string facet
let mut text_fsts = vec![];
let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None;
let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
for result in database.iter(wtxn)? {
let (facet_group_key, _) = result?;
if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
current_fst = match current_fst.take() {
Some((fid, fst_builder)) if fid != field_id => {
let fst = fst_builder.into_set();
text_fsts.push((field_id, fst));
Some((field_id, fst::SetBuilder::memory()))
}
Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
None => Some((field_id, fst::SetBuilder::memory())),
};
if let Some((_, fst_builder)) = current_fst.as_mut() {
fst_builder.insert(left_bound)?;
}
}
}
if let Some((field_id, fst_builder)) = current_fst {
let fst = fst_builder.into_set();
text_fsts.push((field_id, fst));
}
// We remove all of the previous FSTs that were in this database
self.index.facet_id_string_fst.clear(wtxn)?;
// We write those FSTs in LMDB now
for (field_id, fst) in text_fsts {
self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?;
}
Ok(())
}
}

View File

@ -1 +0,0 @@