Compare commits

...

45 Commits

Author SHA1 Message Date
Kerollmops
54d2a7846e Return one of the original facet values when doing a facet search 2023-06-12 11:39:31 +02:00
Kerollmops
0513a0bafa Make sure the facet queries are normalized 2023-06-12 11:13:34 +02:00
Kerollmops
51891c898d Remove useless InvalidSearchFacet error 2023-06-07 17:58:05 +02:00
Kerollmops
6147688fe4 Make rustfmt happy 2023-06-07 17:02:41 +02:00
Kerollmops
681920ba3e Rename the hits and query output into facetHits and facetQuery respectively 2023-06-07 11:20:46 +02:00
Kerollmops
975a88a093 Fix the error code returned when the facetName field is missing 2023-06-07 11:14:14 +02:00
Kerollmops
ef0fe1195f Introduce a new invalid_facet_search_facet_name error code 2023-06-07 11:04:53 +02:00
Kerollmops
377ebb8a52 Use the right field id to write the string facet values FST 2023-06-07 10:52:35 +02:00
Kerollmops
3b174c6f15 Return an empty list of results if attribute is set as filterable 2023-06-07 10:33:11 +02:00
Clément Renault
25d49f5811 Use the minWordSizeForTypos index settings 2023-06-06 10:48:43 +02:00
Clément Renault
e9af506591 Format the code 2023-06-06 10:48:43 +02:00
Clément Renault
6ee4f4b544 Fix compilation issues 2023-06-06 10:48:42 +02:00
Clément Renault
e92576e0d4 Simplify the placeholder search of the facet-search route 2023-06-06 10:48:08 +02:00
Clément Renault
7e1a49e7fa Use the disableOnAttributes parameter on the facet-search route 2023-06-06 10:48:08 +02:00
Clément Renault
17e86e9c42 Use the disableOnWords parameter on the facet-search route 2023-06-06 10:48:08 +02:00
Clément Renault
f4f5ae70d6 Support the typoTolerant.enabled parameter 2023-06-06 10:48:08 +02:00
Clément Renault
edf3031dae Log an error when a facet value is missing from the database 2023-06-06 10:48:08 +02:00
Clément Renault
09d440a427 Rename the SearchForFacetValues struct 2023-06-06 10:48:08 +02:00
Clément Renault
8b66318a6b Return an internal error when a field id is missing 2023-06-06 10:48:08 +02:00
Clément Renault
196a2b3d58 Make clippy happy 2023-06-06 10:48:07 +02:00
Clément Renault
c153cbc593 Improve the returned errors from the facet-search route 2023-06-06 10:48:07 +02:00
Clément Renault
e731f1c8ba Fix the max number of facets to be returned to 100 2023-06-06 10:48:07 +02:00
Clément Renault
c39d830ff8 Return the correct response JSON object from the facet-search route 2023-06-06 10:48:07 +02:00
Clément Renault
2dca4d82d8 Send analytics about the facet-search route 2023-06-06 10:48:07 +02:00
Clément Renault
ce87ee8ea0 Make the search for facet work 2023-06-06 10:37:27 +02:00
Kerollmops
f06bb445a6 Introduce the facet search route 2023-06-06 10:37:26 +02:00
Kerollmops
81792eb5f7 Restrict the number of facet search results to 1000 2023-06-06 10:37:26 +02:00
Kerollmops
7a49bbc8df Introduce the SearchForFacetValue struct 2023-06-06 10:37:26 +02:00
Clément Renault
ca16aaaa30 Store the facet string values in multiple FSTs 2023-06-06 10:37:26 +02:00
meili-bors[bot]
d963b5f85a Merge #3792
3792: fix the type of the document deletion by filter tasks r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3791

## What does this PR do?
- Hide the deleteDocumentByFilter internal type from the users.


Co-authored-by: Tamo <tamo@meilisearch.com>
2023-05-30 18:20:28 +00:00
Tamo
2acc3ec5ee fix the type of the document deletion by filter tasks 2023-05-30 15:18:52 +02:00
meili-bors[bot]
0a7817a002 Merge #3786
3786: Consistently use wrapping add to avoid overflow in debug when query s… r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3785

## What does this PR do?
- Some of the code paths would erroneously use the default addition operator that has the semantics that "overflow is an error, checked at runtime in debug" instead of the intended "overflow is expected" semantics that this code use (this code is using `u16::MAX` as a sentinel). This PR makes it so the wrapping add operator is used everywhere.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-05-29 12:39:54 +00:00
Louis Dureuil
1dfc4038ab Add test that fails before PR and passes now 2023-05-29 11:58:26 +02:00
Louis Dureuil
73198179f1 Consistently use wrapping add to avoid overflow in debug when query starts with a separator 2023-05-29 11:54:12 +02:00
meili-bors[bot]
087866d59f Merge #3775
3775: Last error code changes on the new get/delete documents routes r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes #3774

## What does this PR do?
Following the specification: https://github.com/meilisearch/specifications/pull/236

1. Get rid of the `invalid_document_delete_filter` and always use the `invalid_document_filter`
2. Introduce a new `missing_document_filter` instead of returning `invalid_document_delete_filter` (that’s consistent with all the other routes that have a mandatory parameter)
3. Always return the `original_filter` in the details (potentially set to `null`) instead of hiding it if it wasn’t used


Co-authored-by: Tamo <tamo@meilisearch.com>
2023-05-24 10:07:41 +00:00
Tamo
9111f5176f get rid of the invalid document delete filter in favor of the invalid document filter 2023-05-24 11:53:16 +02:00
Tamo
b9dd092a62 make the details return null in the originalFilter field if no filter was provided + add a big test on the details 2023-05-24 11:48:22 +02:00
Tamo
ca99bc3188 implement the missing document filter error code when deleting documents 2023-05-24 11:29:20 +02:00
meili-bors[bot]
2e49d6aec1 Merge #3768
3768: Fix bugs in graph-based ranking rules + make `words` a graph-based ranking rule r=dureuill a=loiclec

This PR contains three changes:

## 1. Don't call the `words` ranking rule if the term matching strategy is `All`

This is because the purpose of `words` is only to remove nodes from the query graph. It would never do any useful work when the matching strategy was `All`. Remember that the universe was already computed before by computing all the docids corresponding to the "maximally reduced" query graph, which, in the case of `All`, is equal to the original graph.

## 2. The `words` ranking rule is replaced by a graph-based ranking rule. 

This is for three reasons:

1. **performance**: graph-based ranking rules benefit from a lot of optimisations by default, which ensures that they are never too slow. The previous implementation of `words` could call `compute_query_graph_docids` many times if some words had to be removed from the query, which would be quite expensive. I was especially worried about its performance in cases where it is placed right after the `sort` ranking rule. Furthermore, `compute_query_graph_docids` would clone a lot of bitmaps many times unnecessarily.

2. **consistency**: every other ranking rule (except `sort`) is graph-based. It makes sense to implement `words` like that as well. It will automatically benefit from all the features, optimisations, and bug fixes that all the other ranking rules get.

3. **surfacing bugs**: as the first ranking rule to be called (most of the time), I'd like `words` to behave the same as the other ranking rules so that we can quickly detect bugs in our graph algorithms. This actually already happened, which is why this PR also contains a bug fix.

## 3. Fix the `update_all_costs_before_nodes` function

It is a bit difficult to explain what was wrong, but I'll try. The bug happened when we had graphs like:
<img width="730" alt="Screenshot 2023-05-16 at 10 58 57" src="https://github.com/meilisearch/meilisearch/assets/6040237/40db1a68-d852-4e89-99d5-0d65757242a7">
and we gave the node `is` as argument.

Then, we'd walk backwards from the node breadth-first. We'd update the costs of:
1. `sun`
2. `thesun`
3. `start`
4. `the`

which is an incorrect order. The correct order is:

1. `sun`
2. `thesun`
3. `the`
4. `start`

That is, we can only update the cost of a node when all of its successors have either already been visited or were not affected by the update to the node passed as argument. To solve this bug, I factored out the graph-traversal logic into a `traverse_breadth_first_backward` function.


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-05-23 13:28:08 +00:00
Louis Dureuil
51043f78f0 Remove trailing whitespace 2023-05-23 15:27:25 +02:00
Louis Dureuil
a490a11325 Add explanatory comment on the way we're recomputing costs 2023-05-23 15:24:24 +02:00
Loïc Lecrenier
ec8f685d84 Fix bug in cheapest path algorithm 2023-05-16 17:01:30 +02:00
Loïc Lecrenier
5758268866 Don't compute split_words for phrases 2023-05-16 17:01:18 +02:00
Loïc Lecrenier
a37da36766 Implement words as a graph-based ranking rule and fix some bugs 2023-05-16 10:42:11 +02:00
Loïc Lecrenier
85d96d35a8 Highlight ngram matches as well 2023-05-16 10:39:36 +02:00
37 changed files with 1068 additions and 241 deletions

View File

@@ -466,7 +466,7 @@ impl IndexScheduler {
}
}
Details::DocumentDeletionByFilter { deleted_documents, original_filter: _ } => {
assert_eq!(kind.as_kind(), Kind::DocumentDeletionByFilter);
assert_eq!(kind.as_kind(), Kind::DocumentDeletion);
let (index_uid, _) = if let KindWithContent::DocumentDeletionByFilter {
ref index_uid,
ref filter_expr,

View File

@@ -150,6 +150,11 @@ make_missing_field_convenience_builder!(MissingApiKeyActions, missing_api_key_ac
make_missing_field_convenience_builder!(MissingApiKeyExpiresAt, missing_api_key_expires_at);
make_missing_field_convenience_builder!(MissingApiKeyIndexes, missing_api_key_indexes);
make_missing_field_convenience_builder!(MissingSwapIndexes, missing_swap_indexes);
make_missing_field_convenience_builder!(MissingDocumentFilter, missing_document_filter);
make_missing_field_convenience_builder!(
MissingFacetSearchFacetName,
missing_facet_search_facet_name
);
// Integrate a sub-error into a [`DeserrError`] by taking its error message but using
// the default error code (C) from `Self`

View File

@@ -214,12 +214,12 @@ InvalidApiKeyUid , InvalidRequest , BAD_REQUEST ;
InvalidContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
InvalidDocumentCsvDelimiter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentFields , InvalidRequest , BAD_REQUEST ;
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ;
InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
InvalidDocumentDeleteFilter , InvalidRequest , BAD_REQUEST ;
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
@@ -230,6 +230,7 @@ InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
InvalidSearchHighlightPostTag , InvalidRequest , BAD_REQUEST ;
InvalidSearchHighlightPreTag , InvalidRequest , BAD_REQUEST ;
@@ -239,6 +240,8 @@ InvalidSearchMatchingStrategy , InvalidRequest , BAD_REQUEST ;
InvalidSearchOffset , InvalidRequest , BAD_REQUEST ;
InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchQuery , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
@@ -277,6 +280,7 @@ MissingApiKeyIndexes , InvalidRequest , BAD_REQUEST ;
MissingAuthorizationHeader , Auth , UNAUTHORIZED ;
MissingContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
MissingDocumentId , InvalidRequest , BAD_REQUEST ;
MissingFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
MissingIndexUid , InvalidRequest , BAD_REQUEST ;
MissingMasterKey , Auth , UNAUTHORIZED ;
MissingPayload , InvalidRequest , BAD_REQUEST ;
@@ -330,6 +334,9 @@ impl ErrorCode for milli::Error {
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
UserError::InvalidFacetSearchFacetName { .. } => {
Code::InvalidFacetSearchFacetName
}
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
UserError::SortError(_) => Code::InvalidSearchSort,

View File

@@ -395,7 +395,6 @@ impl std::error::Error for ParseTaskStatusError {}
pub enum Kind {
DocumentAdditionOrUpdate,
DocumentDeletion,
DocumentDeletionByFilter,
SettingsUpdate,
IndexCreation,
IndexDeletion,
@@ -412,7 +411,6 @@ impl Kind {
match self {
Kind::DocumentAdditionOrUpdate
| Kind::DocumentDeletion
| Kind::DocumentDeletionByFilter
| Kind::SettingsUpdate
| Kind::IndexCreation
| Kind::IndexDeletion
@@ -430,7 +428,6 @@ impl Display for Kind {
match self {
Kind::DocumentAdditionOrUpdate => write!(f, "documentAdditionOrUpdate"),
Kind::DocumentDeletion => write!(f, "documentDeletion"),
Kind::DocumentDeletionByFilter => write!(f, "documentDeletionByFilter"),
Kind::SettingsUpdate => write!(f, "settingsUpdate"),
Kind::IndexCreation => write!(f, "indexCreation"),
Kind::IndexDeletion => write!(f, "indexDeletion"),

View File

@@ -38,6 +38,18 @@ impl MultiSearchAggregator {
pub fn succeed(&mut self) {}
}
#[derive(Default)]
pub struct FacetSearchAggregator;
#[allow(dead_code)]
impl FacetSearchAggregator {
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
Self::default()
}
pub fn succeed(&mut self, _: &dyn Any) {}
}
impl MockAnalytics {
#[allow(clippy::new_ret_no_self)]
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
@@ -56,6 +68,7 @@ impl Analytics for MockAnalytics {
fn get_search(&self, _aggregate: super::SearchAggregator) {}
fn post_search(&self, _aggregate: super::SearchAggregator) {}
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
fn add_documents(
&self,
_documents_query: &UpdateDocumentsQuery,

View File

@@ -25,6 +25,8 @@ pub type SegmentAnalytics = mock_analytics::MockAnalytics;
pub type SearchAggregator = mock_analytics::SearchAggregator;
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;
// if we are in release mode and the feature analytics was enabled
// we use the real analytics
@@ -34,6 +36,8 @@ pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
pub type SearchAggregator = segment_analytics::SearchAggregator;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
/// The Meilisearch config dir:
/// `~/.config/Meilisearch` on *NIX or *BSD.
@@ -88,6 +92,9 @@ pub trait Analytics: Sync + Send {
/// This method should be called to aggregate a post array of searches
fn post_multi_search(&self, aggregate: MultiSearchAggregator);
/// This method should be called to aggregate post facet values searches
fn post_facet_search(&self, aggregate: FacetSearchAggregator);
// this method should be called to aggregate a add documents request
fn add_documents(
&self,

View File

@@ -1,5 +1,6 @@
use std::collections::{BinaryHeap, HashMap, HashSet};
use std::fs;
use std::mem::take;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{Duration, Instant};
@@ -29,11 +30,13 @@ use super::{
use crate::analytics::Analytics;
use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, ScheduleSnapshot};
use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::indexes::facet_search::FacetSearchQuery;
use crate::routes::tasks::TasksFilterQuery;
use crate::routes::{create_all_stats, Stats};
use crate::search::{
SearchQuery, SearchQueryWithIndex, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
};
use crate::Opt;
@@ -71,6 +74,7 @@ pub enum AnalyticsMsg {
AggregateGetSearch(SearchAggregator),
AggregatePostSearch(SearchAggregator),
AggregatePostMultiSearch(MultiSearchAggregator),
AggregatePostFacetSearch(FacetSearchAggregator),
AggregateAddDocuments(DocumentsAggregator),
AggregateDeleteDocuments(DocumentsDeletionAggregator),
AggregateUpdateDocuments(DocumentsAggregator),
@@ -139,6 +143,7 @@ impl SegmentAnalytics {
batcher,
post_search_aggregator: SearchAggregator::default(),
post_multi_search_aggregator: MultiSearchAggregator::default(),
post_facet_search_aggregator: FacetSearchAggregator::default(),
get_search_aggregator: SearchAggregator::default(),
add_documents_aggregator: DocumentsAggregator::default(),
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
@@ -182,6 +187,10 @@ impl super::Analytics for SegmentAnalytics {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
}
fn post_facet_search(&self, aggregate: FacetSearchAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate));
}
fn post_multi_search(&self, aggregate: MultiSearchAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate));
}
@@ -354,6 +363,7 @@ pub struct Segment {
get_search_aggregator: SearchAggregator,
post_search_aggregator: SearchAggregator,
post_multi_search_aggregator: MultiSearchAggregator,
post_facet_search_aggregator: FacetSearchAggregator,
add_documents_aggregator: DocumentsAggregator,
delete_documents_aggregator: DocumentsDeletionAggregator,
update_documents_aggregator: DocumentsAggregator,
@@ -418,6 +428,7 @@ impl Segment {
Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostFacetSearch(agreg)) => self.post_facet_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
@@ -461,55 +472,74 @@ impl Segment {
})
.await;
}
let get_search = std::mem::take(&mut self.get_search_aggregator)
.into_event(&self.user, "Documents Searched GET");
let post_search = std::mem::take(&mut self.post_search_aggregator)
.into_event(&self.user, "Documents Searched POST");
let post_multi_search = std::mem::take(&mut self.post_multi_search_aggregator)
.into_event(&self.user, "Documents Searched by Multi-Search POST");
let add_documents = std::mem::take(&mut self.add_documents_aggregator)
.into_event(&self.user, "Documents Added");
let delete_documents = std::mem::take(&mut self.delete_documents_aggregator)
.into_event(&self.user, "Documents Deleted");
let update_documents = std::mem::take(&mut self.update_documents_aggregator)
.into_event(&self.user, "Documents Updated");
let get_fetch_documents = std::mem::take(&mut self.get_fetch_documents_aggregator)
.into_event(&self.user, "Documents Fetched GET");
let post_fetch_documents = std::mem::take(&mut self.post_fetch_documents_aggregator)
.into_event(&self.user, "Documents Fetched POST");
let get_tasks =
std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen");
let health =
std::mem::take(&mut self.health_aggregator).into_event(&self.user, "Health Seen");
if let Some(get_search) = get_search {
let Segment {
inbox: _,
opt: _,
batcher: _,
user,
get_search_aggregator,
post_search_aggregator,
post_multi_search_aggregator,
post_facet_search_aggregator,
add_documents_aggregator,
delete_documents_aggregator,
update_documents_aggregator,
get_fetch_documents_aggregator,
post_fetch_documents_aggregator,
get_tasks_aggregator,
health_aggregator,
} = self;
if let Some(get_search) =
take(get_search_aggregator).into_event(&user, "Documents Searched GET")
{
let _ = self.batcher.push(get_search).await;
}
if let Some(post_search) = post_search {
if let Some(post_search) =
take(post_search_aggregator).into_event(&user, "Documents Searched POST")
{
let _ = self.batcher.push(post_search).await;
}
if let Some(post_multi_search) = post_multi_search {
if let Some(post_multi_search) = take(post_multi_search_aggregator)
.into_event(&user, "Documents Searched by Multi-Search POST")
{
let _ = self.batcher.push(post_multi_search).await;
}
if let Some(add_documents) = add_documents {
if let Some(post_facet_search) = take(post_facet_search_aggregator)
.into_event(&user, "Documents Searched by Facet-Search POST")
{
let _ = self.batcher.push(post_facet_search).await;
}
if let Some(add_documents) =
take(add_documents_aggregator).into_event(&user, "Documents Added")
{
let _ = self.batcher.push(add_documents).await;
}
if let Some(delete_documents) = delete_documents {
if let Some(delete_documents) =
take(delete_documents_aggregator).into_event(&user, "Documents Deleted")
{
let _ = self.batcher.push(delete_documents).await;
}
if let Some(update_documents) = update_documents {
if let Some(update_documents) =
take(update_documents_aggregator).into_event(&user, "Documents Updated")
{
let _ = self.batcher.push(update_documents).await;
}
if let Some(get_fetch_documents) = get_fetch_documents {
if let Some(get_fetch_documents) =
take(get_fetch_documents_aggregator).into_event(&user, "Documents Fetched GET")
{
let _ = self.batcher.push(get_fetch_documents).await;
}
if let Some(post_fetch_documents) = post_fetch_documents {
if let Some(post_fetch_documents) =
take(post_fetch_documents_aggregator).into_event(&user, "Documents Fetched POST")
{
let _ = self.batcher.push(post_fetch_documents).await;
}
if let Some(get_tasks) = get_tasks {
if let Some(get_tasks) = take(get_tasks_aggregator).into_event(&user, "Tasks Seen") {
let _ = self.batcher.push(get_tasks).await;
}
if let Some(health) = health {
if let Some(health) = take(health_aggregator).into_event(&user, "Health Seen") {
let _ = self.batcher.push(health).await;
}
let _ = self.batcher.flush().await;
@@ -886,6 +916,144 @@ impl MultiSearchAggregator {
}
}
#[derive(Default)]
pub struct FacetSearchAggregator {
timestamp: Option<OffsetDateTime>,
// context
user_agents: HashSet<String>,
// requests
total_received: usize,
total_succeeded: usize,
time_spent: BinaryHeap<usize>,
// The set of all facetNames that were used
facet_names: HashSet<String>,
// As there been any other parameter than the facetName or facetQuery ones?
additional_search_parameters_provided: bool,
}
impl FacetSearchAggregator {
pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self {
let FacetSearchQuery {
facet_query: _,
facet_name,
q,
offset,
limit,
page,
hits_per_page,
attributes_to_retrieve,
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_matches_position,
filter,
sort,
facets,
highlight_pre_tag,
highlight_post_tag,
crop_marker,
matching_strategy,
} = query;
let mut ret = Self::default();
ret.timestamp = Some(OffsetDateTime::now_utc());
ret.total_received = 1;
ret.user_agents = extract_user_agents(request).into_iter().collect();
ret.facet_names = Some(facet_name.clone()).into_iter().collect();
ret.additional_search_parameters_provided = q.is_some()
|| *offset != DEFAULT_SEARCH_OFFSET()
|| *limit != DEFAULT_SEARCH_LIMIT()
|| page.is_some()
|| hits_per_page.is_some()
|| attributes_to_retrieve.is_some()
|| attributes_to_crop.is_some()
|| *crop_length != DEFAULT_CROP_LENGTH()
|| attributes_to_highlight.is_some()
|| *show_matches_position
|| filter.is_some()
|| sort.is_some()
|| facets.is_some()
|| *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG()
|| *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG()
|| *crop_marker != DEFAULT_CROP_MARKER()
|| *matching_strategy != MatchingStrategy::default();
ret
}
pub fn succeed(&mut self, result: &FacetSearchResult) {
self.total_succeeded = self.total_succeeded.saturating_add(1);
self.time_spent.push(result.processing_time_ms as usize);
}
/// Aggregate one [SearchAggregator] into another.
pub fn aggregate(&mut self, mut other: Self) {
if self.timestamp.is_none() {
self.timestamp = other.timestamp;
}
// context
for user_agent in other.user_agents.into_iter() {
self.user_agents.insert(user_agent);
}
// request
self.total_received = self.total_received.saturating_add(other.total_received);
self.total_succeeded = self.total_succeeded.saturating_add(other.total_succeeded);
self.time_spent.append(&mut other.time_spent);
// facet_names
for facet_name in other.facet_names.into_iter() {
self.facet_names.insert(facet_name);
}
// additional_search_parameters_provided
self.additional_search_parameters_provided = self.additional_search_parameters_provided
| other.additional_search_parameters_provided;
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
if self.total_received == 0 {
None
} else {
// the index of the 99th percentage of value
let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.;
// we get all the values in a sorted manner
let time_spent = self.time_spent.into_sorted_vec();
// We are only interested by the slowest value of the 99th fastest results
let time_spent = time_spent.get(percentile_99th as usize);
let properties = json!({
"user-agent": self.user_agents,
"requests": {
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
"total_succeeded": self.total_succeeded,
"total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics
"total_received": self.total_received,
},
"facets": {
"total_distinct_facet_count": self.facet_names.len(),
},
"additional_search_parameters_provided": self.additional_search_parameters_provided,
});
Some(Track {
timestamp: self.timestamp,
user: user.clone(),
event: event_name.to_string(),
properties,
..Default::default()
})
}
}
}
#[derive(Default)]
pub struct DocumentsAggregator {
timestamp: Option<OffsetDateTime>,

View File

@@ -61,7 +61,7 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound,
MeilisearchHttpError::EmptyFilter => Code::InvalidDocumentDeleteFilter,
MeilisearchHttpError::EmptyFilter => Code::InvalidDocumentFilter,
MeilisearchHttpError::InvalidExpression(_, _) => Code::InvalidSearchFilter,
MeilisearchHttpError::PayloadTooLarge(_) => Code::PayloadTooLarge,
MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::InvalidSwapIndexes,

View File

@@ -486,7 +486,7 @@ pub async fn delete_documents_batch(
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct DocumentDeletionByFilter {
#[deserr(error = DeserrJsonError<InvalidDocumentDeleteFilter>)]
#[deserr(error = DeserrJsonError<InvalidDocumentFilter>, missing_field_error = DeserrJsonError::missing_document_filter)]
filter: Value,
}
@@ -508,8 +508,8 @@ pub async fn delete_documents_by_filter(
|| -> Result<_, ResponseError> {
Ok(crate::search::parse_filter(&filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?)
}()
// and whatever was the error, the error code should always be an InvalidDocumentDeleteFilter
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentDeleteFilter))?;
// and whatever was the error, the error code should always be an InvalidDocumentFilter
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
let task: SummarizedTaskView =

View File

@@ -0,0 +1,133 @@
use std::collections::{BTreeSet, HashSet};
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use serde_json::Value;
use crate::analytics::{Analytics, FacetSearchAggregator};
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::search::{
add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(search)));
}
// TODO improve the error messages
#[derive(Debug, Clone, Default, PartialEq, Eq, deserr::Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct FacetSearchQuery {
#[deserr(default, error = DeserrJsonError<InvalidFacetSearchQuery>)]
pub facet_query: Option<String>,
#[deserr(error = DeserrJsonError<MissingFacetSearchFacetName>, missing_field_error = DeserrJsonError::missing_facet_search_facet_name)]
pub facet_name: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
pub q: Option<String>,
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: usize,
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
pub limit: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
pub page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
pub hits_per_page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
pub attributes_to_retrieve: Option<BTreeSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
pub attributes_to_crop: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
pub crop_length: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
pub attributes_to_highlight: Option<HashSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
pub show_matches_position: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub sort: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
pub facets: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
pub highlight_pre_tag: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPostTag>, default = DEFAULT_HIGHLIGHT_POST_TAG())]
pub highlight_post_tag: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropMarker>, default = DEFAULT_CROP_MARKER())]
pub crop_marker: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
pub matching_strategy: MatchingStrategy,
}
pub async fn search(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebJson<FacetSearchQuery, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.into_inner();
debug!("facet search called with params: {:?}", query);
let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
let facet_query = query.facet_query.clone();
let facet_name = query.facet_name.clone();
let mut search_query = SearchQuery::from(query);
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut search_query, search_rules);
}
let index = index_scheduler.index(&index_uid)?;
let search_result = tokio::task::spawn_blocking(move || {
perform_facet_search(&index, search_query, facet_query, facet_name)
})
.await?;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
}
analytics.post_facet_search(aggregate);
let search_result = search_result?;
debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result))
}
impl From<FacetSearchQuery> for SearchQuery {
fn from(value: FacetSearchQuery) -> Self {
SearchQuery {
q: value.q,
offset: value.offset,
limit: value.limit,
page: value.page,
hits_per_page: value.hits_per_page,
attributes_to_retrieve: value.attributes_to_retrieve,
attributes_to_crop: value.attributes_to_crop,
crop_length: value.crop_length,
attributes_to_highlight: value.attributes_to_highlight,
show_matches_position: value.show_matches_position,
filter: value.filter,
sort: value.sort,
facets: value.facets,
highlight_pre_tag: value.highlight_pre_tag,
highlight_post_tag: value.highlight_post_tag,
crop_marker: value.crop_marker,
matching_strategy: value.matching_strategy,
}
}
}

View File

@@ -24,6 +24,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
pub mod documents;
pub mod facet_search;
pub mod search;
pub mod settings;
@@ -44,6 +45,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
.service(web::scope("/documents").configure(documents::configure))
.service(web::scope("/search").configure(search::configure))
.service(web::scope("/facet-search").configure(facet_search::configure))
.service(web::scope("/settings").configure(settings::configure)),
);
}

View File

@@ -99,7 +99,7 @@ pub struct DetailsView {
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_tasks: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub original_filter: Option<String>,
pub original_filter: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub dump_uid: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
@@ -131,12 +131,13 @@ impl From<Details> for DetailsView {
} => DetailsView {
provided_ids: Some(received_document_ids),
deleted_documents: Some(deleted_documents),
original_filter: Some(None),
..DetailsView::default()
},
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
DetailsView {
provided_ids: Some(0),
original_filter: Some(original_filter),
original_filter: Some(Some(original_filter)),
deleted_documents: Some(deleted_documents),
..DetailsView::default()
}
@@ -148,7 +149,7 @@ impl From<Details> for DetailsView {
DetailsView {
matched_tasks: Some(matched_tasks),
canceled_tasks: Some(canceled_tasks),
original_filter: Some(original_filter),
original_filter: Some(Some(original_filter)),
..DetailsView::default()
}
}
@@ -156,7 +157,7 @@ impl From<Details> for DetailsView {
DetailsView {
matched_tasks: Some(matched_tasks),
deleted_tasks: Some(deleted_tasks),
original_filter: Some(original_filter),
original_filter: Some(Some(original_filter)),
..DetailsView::default()
}
}
@@ -729,7 +730,7 @@ mod tests {
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"code": "invalid_task_types",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types"

View File

@@ -8,7 +8,9 @@ use either::Either;
use meilisearch_auth::IndexSearchRules;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::{FacetValueHit, SearchForFacetValues};
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document};
use milli::tokenizer::TokenizerBuilder;
@@ -170,7 +172,7 @@ impl SearchQueryWithIndex {
}
}
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)]
#[deserr(rename_all = camelCase)]
pub enum MatchingStrategy {
/// Remove query words from last to first
@@ -241,6 +243,14 @@ pub struct FacetStats {
pub max: f64,
}
#[derive(Serialize, Debug, Clone, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct FacetSearchResult {
pub facet_hits: Vec<FacetValueHit>,
pub facet_query: Option<String>,
pub processing_time_ms: u128,
}
/// Incorporate search rules in search query
pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
query.filter = match (query.filter.take(), rules.filter) {
@@ -261,14 +271,12 @@ pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
}
}
pub fn perform_search(
index: &Index,
query: SearchQuery,
) -> Result<SearchResult, MeilisearchHttpError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let mut search = index.search(&rtxn);
fn prepare_search<'t>(
index: &'t Index,
rtxn: &'t RoTxn,
query: &'t SearchQuery,
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
let mut search = index.search(rtxn);
if let Some(ref query) = query.q {
search.query(query);
@@ -278,7 +286,7 @@ pub fn perform_search(
search.terms_matching_strategy(query.matching_strategy.into());
let max_total_hits = index
.pagination_max_total_hits(&rtxn)
.pagination_max_total_hits(rtxn)
.map_err(milli::Error::from)?
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
@@ -320,6 +328,19 @@ pub fn perform_search(
search.sort_criteria(sort);
}
Ok((search, is_finite_pagination, max_total_hits, offset))
}
pub fn perform_search(
index: &Index,
query: SearchQuery,
) -> Result<SearchResult, MeilisearchHttpError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let (search, is_finite_pagination, max_total_hits, offset) =
prepare_search(index, &rtxn, &query)?;
let milli::SearchResult { documents_ids, matching_words, candidates, .. } = search.execute()?;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@@ -473,6 +494,28 @@ pub fn perform_search(
Ok(result)
}
pub fn perform_facet_search(
index: &Index,
search_query: SearchQuery,
facet_query: Option<String>,
facet_name: String,
) -> Result<FacetSearchResult, MeilisearchHttpError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query)?;
let mut facet_search = SearchForFacetValues::new(facet_name, search);
if let Some(facet_query) = &facet_query {
facet_search.query(facet_query);
}
Ok(FacetSearchResult {
facet_hits: facet_search.execute()?,
facet_query,
processing_time_ms: before_search.elapsed().as_millis(),
})
}
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
lazy_static::lazy_static! {
static ref GEO_REGEX: Regex =

View File

@@ -547,9 +547,9 @@ async fn delete_document_by_filter() {
snapshot!(json_string!(response), @r###"
{
"message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
"code": "invalid_document_delete_filter",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter"
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
}
"###);
@@ -559,9 +559,9 @@ async fn delete_document_by_filter() {
snapshot!(json_string!(response), @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
"code": "invalid_document_delete_filter",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter"
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
}
"###);
@@ -571,9 +571,21 @@ async fn delete_document_by_filter() {
snapshot!(json_string!(response), @r###"
{
"message": "Sending an empty filter is forbidden.",
"code": "invalid_document_delete_filter",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter"
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
}
"###);
// do not send any filter
let (response, code) = index.delete_document_by_filter(json!({})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Missing field `filter`",
"code": "missing_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_document_filter"
}
"###);

View File

@@ -97,7 +97,7 @@ async fn task_bad_types() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"code": "invalid_task_types",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
@@ -108,7 +108,7 @@ async fn task_bad_types() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"code": "invalid_task_types",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
@@ -119,7 +119,7 @@ async fn task_bad_types() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
"code": "invalid_task_types",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types"

View File

@@ -413,7 +413,7 @@ async fn test_summarized_document_addition_or_update() {
}
#[actix_web::test]
async fn test_summarized_delete_batch() {
async fn test_summarized_delete_documents_by_batch() {
let server = Server::new().await;
let index = server.index("test");
index.delete_batch(vec![1, 2, 3]).await;
@@ -430,7 +430,8 @@ async fn test_summarized_delete_batch() {
"canceledBy": null,
"details": {
"providedIds": 3,
"deletedDocuments": 0
"deletedDocuments": 0,
"originalFilter": null
},
"error": {
"message": "Index `test` not found.",
@@ -460,7 +461,8 @@ async fn test_summarized_delete_batch() {
"canceledBy": null,
"details": {
"providedIds": 1,
"deletedDocuments": 0
"deletedDocuments": 0,
"originalFilter": null
},
"error": null,
"duration": "[duration]",
@@ -472,7 +474,100 @@ async fn test_summarized_delete_batch() {
}
#[actix_web::test]
async fn test_summarized_delete_document() {
async fn test_summarized_delete_documents_by_filter() {
let server = Server::new().await;
let index = server.index("test");
index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
index.wait_task(0).await;
let (task, _) = index.get_task(0).await;
assert_json_snapshot!(task,
{ ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
@r###"
{
"uid": 0,
"indexUid": "test",
"status": "failed",
"type": "documentDeletion",
"canceledBy": null,
"details": {
"providedIds": 0,
"deletedDocuments": 0,
"originalFilter": "\"doggo = bernese\""
},
"error": {
"message": "Index `test` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
index.create(None).await;
index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
index.wait_task(2).await;
let (task, _) = index.get_task(2).await;
assert_json_snapshot!(task,
{ ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
@r###"
{
"uid": 2,
"indexUid": "test",
"status": "failed",
"type": "documentDeletion",
"canceledBy": null,
"details": {
"providedIds": 0,
"deletedDocuments": 0,
"originalFilter": "\"doggo = bernese\""
},
"error": {
"message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
index.update_settings(json!({ "filterableAttributes": ["doggo"] })).await;
index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
index.wait_task(4).await;
let (task, _) = index.get_task(4).await;
assert_json_snapshot!(task,
{ ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
@r###"
{
"uid": 4,
"indexUid": "test",
"status": "succeeded",
"type": "documentDeletion",
"canceledBy": null,
"details": {
"providedIds": 0,
"deletedDocuments": 0,
"originalFilter": "\"doggo = bernese\""
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_web::test]
async fn test_summarized_delete_document_by_id() {
let server = Server::new().await;
let index = server.index("test");
index.delete_document(1).await;
@@ -489,7 +584,8 @@ async fn test_summarized_delete_document() {
"canceledBy": null,
"details": {
"providedIds": 1,
"deletedDocuments": 0
"deletedDocuments": 0,
"originalFilter": null
},
"error": {
"message": "Index `test` not found.",
@@ -519,7 +615,8 @@ async fn test_summarized_delete_document() {
"canceledBy": null,
"details": {
"providedIds": 1,
"deletedDocuments": 0
"deletedDocuments": 0,
"originalFilter": null
},
"error": null,
"duration": "[duration]",

View File

@@ -124,6 +124,16 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
}
)]
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
#[error("Attribute `{}` is not filterable. {}",
.field,
match .valid_fields.is_empty() {
true => "This index does not have configured filterable attributes.".to_string(),
false => format!("Available filterable attributes are: `{}`.",
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
),
}
)]
InvalidFacetSearchFacetName { field: String, valid_fields: BTreeSet<String> },
#[error("{}", HeedError::BadOpenOptions)]
InvalidLmdbOpenOptions,
#[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]

View File

@@ -0,0 +1,23 @@
use std::borrow::Cow;
use fst::Set;
use heed::{BytesDecode, BytesEncode};
/// A codec for values of type `Set<&[u8]>`.
pub struct FstSetCodec;
impl<'a> BytesEncode<'a> for FstSetCodec {
type EItem = Set<Vec<u8>>;
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
Some(Cow::Borrowed(item.as_fst().as_bytes()))
}
}
impl<'a> BytesDecode<'a> for FstSetCodec {
type DItem = Set<&'a [u8]>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
Set::new(bytes).ok()
}
}

View File

@@ -2,6 +2,7 @@ mod beu32_str_codec;
mod byte_slice_ref;
pub mod facet;
mod field_id_word_count_codec;
mod fst_set_codec;
mod obkv_codec;
mod roaring_bitmap;
mod roaring_bitmap_length;
@@ -15,6 +16,7 @@ pub use str_ref::StrRefCodec;
pub use self::beu32_str_codec::BEU32StrCodec;
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
pub use self::fst_set_codec::FstSetCodec;
pub use self::obkv_codec::ObkvCodec;
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
pub use self::roaring_bitmap_length::{

View File

@@ -19,7 +19,7 @@ use crate::heed_codec::facet::{
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
FieldIdCodec, OrderedF64Codec,
};
use crate::heed_codec::{ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
use crate::heed_codec::{FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
use crate::{
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
@@ -85,6 +85,7 @@ pub mod db_name {
pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids";
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst";
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
pub const DOCUMENTS: &str = "documents";
@@ -147,6 +148,8 @@ pub struct Index {
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
/// Maps the facet field id and ranges of strings with the docids that corresponds to them.
pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
/// Maps the facet field id of the string facets with an FST containing all the facets values.
pub facet_id_string_fst: Database<OwnedType<BEU16>, FstSetCodec>,
/// Maps the document id, the facet field id and the numbers.
pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
@@ -166,7 +169,7 @@ impl Index {
) -> Result<Index> {
use db_name::*;
options.max_dbs(23);
options.max_dbs(24);
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
let env = options.open(path)?;
@@ -197,13 +200,13 @@ impl Index {
let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?;
let facet_id_string_docids =
env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?;
let facet_id_string_fst = env.create_database(&mut wtxn, Some(FACET_ID_STRING_FST))?;
let facet_id_exists_docids =
env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?;
let facet_id_is_null_docids =
env.create_database(&mut wtxn, Some(FACET_ID_IS_NULL_DOCIDS))?;
let facet_id_is_empty_docids =
env.create_database(&mut wtxn, Some(FACET_ID_IS_EMPTY_DOCIDS))?;
let field_id_docid_facet_f64s =
env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?;
let field_id_docid_facet_strings =
@@ -232,6 +235,7 @@ impl Index {
field_id_word_count_docids,
facet_id_f64_docids,
facet_id_string_docids,
facet_id_string_fst,
facet_id_exists_docids,
facet_id_is_null_docids,
facet_id_is_empty_docids,

View File

@@ -99,8 +99,9 @@ pub use self::heed_codec::{
};
pub use self::index::Index;
pub use self::search::{
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, Search,
SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder,
MatchingWords, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy,
DEFAULT_VALUES_PER_FACET,
};
pub type Result<T> = std::result::Result<T, error::Error>;

View File

@@ -1,14 +1,20 @@
use std::fmt;
use fst::automaton::{Automaton, Str};
use fst::{IntoStreamer, Streamer};
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
use log::error;
use once_cell::sync::Lazy;
use roaring::bitmap::RoaringBitmap;
pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
use self::new::PartialSearchResult;
use crate::error::UserError;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::{
execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext,
execute_search, normalize_facet, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index,
Result, SearchContext, BEU16,
};
// Building these factories is not free.
@@ -16,6 +22,9 @@ static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
/// The maximum number of facets returned by the facet search route.
const MAX_NUMBER_OF_FACETS: usize = 100;
pub mod facet;
mod fst_utils;
pub mod new;
@@ -199,6 +208,195 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {
}
}
pub struct SearchForFacetValues<'a> {
query: Option<String>,
facet: String,
search_query: Search<'a>,
}
impl<'a> SearchForFacetValues<'a> {
pub fn new(facet: String, search_query: Search<'a>) -> SearchForFacetValues<'a> {
SearchForFacetValues { query: None, facet, search_query }
}
pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
self.query = Some(query.into());
self
}
fn one_original_value_of(
&self,
field_id: FieldId,
facet_str: &str,
any_docid: DocumentId,
) -> Result<Option<String>> {
let index = self.search_query.index;
let rtxn = self.search_query.rtxn;
let key: (FieldId, _, &str) = (field_id, any_docid, facet_str);
Ok(index.field_id_docid_facet_strings.get(rtxn, &key)?.map(|v| v.to_owned()))
}
pub fn execute(&self) -> Result<Vec<FacetValueHit>> {
let index = self.search_query.index;
let rtxn = self.search_query.rtxn;
let filterable_fields = index.filterable_fields(rtxn)?;
if !filterable_fields.contains(&self.facet) {
return Err(UserError::InvalidFacetSearchFacetName {
field: self.facet.clone(),
valid_fields: filterable_fields.into_iter().collect(),
}
.into());
}
let fields_ids_map = index.fields_ids_map(rtxn)?;
let fid = match fields_ids_map.id(&self.facet) {
Some(fid) => fid,
// we return an empty list of results when the attribute has been
// set as filterable but no document contains this field (yet).
None => return Ok(Vec::new()),
};
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? {
Some(fst) => fst,
None => return Ok(vec![]),
};
let search_candidates = self.search_query.execute()?.candidates;
match self.query.as_ref() {
Some(query) => {
let query = normalize_facet(query);
let query = query.as_str();
let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
let field_authorizes_typos =
!self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid);
if authorize_typos && field_authorizes_typos {
let mut results = vec![];
let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: query };
if let Some(FacetGroupValue { bitmap, .. }) =
index.facet_id_string_docids.get(rtxn, &key)?
{
let count = search_candidates.intersection_len(&bitmap);
if count != 0 {
let value = self
.one_original_value_of(fid, query, bitmap.min().unwrap())?
.unwrap_or_else(|| query.to_string());
results.push(FacetValueHit { value, count });
}
}
} else {
let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?;
let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?;
let is_prefix = true;
let automaton = if query.len() < one_typo as usize {
build_dfa(query, 0, is_prefix)
} else if query.len() < two_typos as usize {
build_dfa(query, 1, is_prefix)
} else {
build_dfa(query, 2, is_prefix)
};
let mut stream = fst.search(automaton).into_stream();
let mut length = 0;
while let Some(facet_value) = stream.next() {
let value = std::str::from_utf8(facet_value)?;
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
Some(FacetGroupValue { bitmap, .. }) => bitmap,
None => {
error!(
"the facet value is missing from the facet database: {key:?}"
);
continue;
}
};
let count = search_candidates.intersection_len(&docids);
if count != 0 {
let value = self
.one_original_value_of(fid, value, docids.min().unwrap())?
.unwrap_or_else(|| query.to_string());
results.push(FacetValueHit { value, count });
length += 1;
}
if length >= MAX_NUMBER_OF_FACETS {
break;
}
}
}
Ok(results)
} else {
let automaton = Str::new(query).starts_with();
let mut stream = fst.search(automaton).into_stream();
let mut results = vec![];
let mut length = 0;
while let Some(facet_value) = stream.next() {
let value = std::str::from_utf8(facet_value)?;
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
Some(FacetGroupValue { bitmap, .. }) => bitmap,
None => {
error!(
"the facet value is missing from the facet database: {key:?}"
);
continue;
}
};
let count = search_candidates.intersection_len(&docids);
if count != 0 {
let value = self
.one_original_value_of(fid, value, docids.min().unwrap())?
.unwrap_or_else(|| query.to_string());
results.push(FacetValueHit { value, count });
length += 1;
}
if length >= MAX_NUMBER_OF_FACETS {
break;
}
}
Ok(results)
}
}
None => {
let mut results = vec![];
let mut length = 0;
let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" };
for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? {
let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) =
result?;
let count = search_candidates.intersection_len(&bitmap);
if count != 0 {
let value = self
.one_original_value_of(fid, left_bound, bitmap.min().unwrap())?
.unwrap_or_else(|| left_bound.to_string());
results.push(FacetValueHit { value, count });
length += 1;
}
if length >= MAX_NUMBER_OF_FACETS {
break;
}
}
Ok(results)
}
}
}
}
#[derive(Debug, Clone, serde::Serialize, PartialEq)]
pub struct FacetValueHit {
/// The original facet value
pub value: String,
/// The number of documents associated to this facet
pub count: u64,
}
#[cfg(test)]
mod test {
#[allow(unused_imports)]

View File

@@ -46,7 +46,7 @@ use super::logger::SearchLogger;
use super::query_graph::QueryNode;
use super::ranking_rule_graph::{
ConditionDocIdsCache, DeadEndsCache, ExactnessGraph, FidGraph, PositionGraph, ProximityGraph,
RankingRuleGraph, RankingRuleGraphTrait, TypoGraph,
RankingRuleGraph, RankingRuleGraphTrait, TypoGraph, WordsGraph,
};
use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
@@ -54,6 +54,12 @@ use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::ranking_rule_graph::PathVisitor;
use crate::{Result, TermsMatchingStrategy};
pub type Words = GraphBasedRankingRule<WordsGraph>;
impl GraphBasedRankingRule<WordsGraph> {
pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {
Self::new_with_id("words".to_owned(), Some(terms_matching_strategy))
}
}
pub type Proximity = GraphBasedRankingRule<ProximityGraph>;
impl GraphBasedRankingRule<ProximityGraph> {
pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {

View File

@@ -4,7 +4,6 @@ use std::io::{BufWriter, Write};
use std::path::{Path, PathBuf};
use std::time::Instant;
// use rand::random;
use roaring::RoaringBitmap;
use crate::search::new::interner::Interned;
@@ -13,6 +12,7 @@ use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::ranking_rule_graph::{
Edge, FidCondition, FidGraph, PositionCondition, PositionGraph, ProximityCondition,
ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait, TypoCondition, TypoGraph,
WordsCondition, WordsGraph,
};
use crate::search::new::ranking_rules::BoxRankingRule;
use crate::search::new::{QueryGraph, QueryNode, RankingRule, SearchContext, SearchLogger};
@@ -24,11 +24,12 @@ pub enum SearchEvents {
RankingRuleSkipBucket { ranking_rule_idx: usize, bucket_len: u64 },
RankingRuleEndIteration { ranking_rule_idx: usize, universe_len: u64 },
ExtendResults { new: Vec<u32> },
WordsGraph { query_graph: QueryGraph },
ProximityGraph { graph: RankingRuleGraph<ProximityGraph> },
ProximityPaths { paths: Vec<Vec<Interned<ProximityCondition>>> },
TypoGraph { graph: RankingRuleGraph<TypoGraph> },
TypoPaths { paths: Vec<Vec<Interned<TypoCondition>>> },
WordsGraph { graph: RankingRuleGraph<WordsGraph> },
WordsPaths { paths: Vec<Vec<Interned<WordsCondition>>> },
FidGraph { graph: RankingRuleGraph<FidGraph> },
FidPaths { paths: Vec<Vec<Interned<FidCondition>>> },
PositionGraph { graph: RankingRuleGraph<PositionGraph> },
@@ -139,8 +140,11 @@ impl SearchLogger<QueryGraph> for VisualSearchLogger {
let Some(location) = self.location.last() else { return };
match location {
Location::Words => {
if let Some(query_graph) = state.downcast_ref::<QueryGraph>() {
self.events.push(SearchEvents::WordsGraph { query_graph: query_graph.clone() });
if let Some(graph) = state.downcast_ref::<RankingRuleGraph<WordsGraph>>() {
self.events.push(SearchEvents::WordsGraph { graph: graph.clone() });
}
if let Some(paths) = state.downcast_ref::<Vec<Vec<Interned<WordsCondition>>>>() {
self.events.push(SearchEvents::WordsPaths { paths: paths.clone() });
}
}
Location::Typo => {
@@ -329,7 +333,6 @@ impl<'ctx> DetailedLoggerFinish<'ctx> {
SearchEvents::ExtendResults { new } => {
self.write_extend_results(new)?;
}
SearchEvents::WordsGraph { query_graph } => self.write_words_graph(query_graph)?,
SearchEvents::ProximityGraph { graph } => self.write_rr_graph(&graph)?,
SearchEvents::ProximityPaths { paths } => {
self.write_rr_graph_paths::<ProximityGraph>(paths)?;
@@ -338,6 +341,10 @@ impl<'ctx> DetailedLoggerFinish<'ctx> {
SearchEvents::TypoPaths { paths } => {
self.write_rr_graph_paths::<TypoGraph>(paths)?;
}
SearchEvents::WordsGraph { graph } => self.write_rr_graph(&graph)?,
SearchEvents::WordsPaths { paths } => {
self.write_rr_graph_paths::<WordsGraph>(paths)?;
}
SearchEvents::FidGraph { graph } => self.write_rr_graph(&graph)?,
SearchEvents::FidPaths { paths } => {
self.write_rr_graph_paths::<FidGraph>(paths)?;
@@ -455,7 +462,7 @@ fill: \"#B6E2D3\"
shape: class
max_nbr_typo: {}",
term_subset.description(ctx),
term_subset.max_nbr_typos(ctx)
term_subset.max_typo_cost(ctx)
)?;
for w in term_subset.all_single_words_except_prefix_db(ctx)? {
@@ -482,13 +489,6 @@ fill: \"#B6E2D3\"
}
Ok(())
}
fn write_words_graph(&mut self, qg: QueryGraph) -> Result<()> {
self.make_new_file_for_internal_state_if_needed()?;
self.write_query_graph(&qg)?;
Ok(())
}
fn write_rr_graph<R: RankingRuleGraphTrait>(
&mut self,
graph: &RankingRuleGraph<R>,

View File

@@ -15,11 +15,7 @@ mod resolve_query_graph;
mod small_bitmap;
mod exact_attribute;
// TODO: documentation + comments
// implementation is currently an adaptation of the previous implementation to fit with the new model
mod sort;
// TODO: documentation + comments
mod words;
#[cfg(test)]
mod tests;
@@ -43,10 +39,10 @@ use ranking_rules::{
use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache};
use roaring::RoaringBitmap;
use sort::Sort;
use words::Words;
use self::geo_sort::GeoSort;
pub use self::geo_sort::Strategy as GeoSortStrategy;
use self::graph_based_ranking_rule::Words;
use self::interner::Interned;
use crate::search::new::distinct::apply_distinct_rule;
use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError};
@@ -202,6 +198,11 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
let mut sorted_fields = HashSet::new();
let mut geo_sorted = false;
// Don't add the `words` ranking rule if the term matching strategy is `All`
if matches!(terms_matching_strategy, TermsMatchingStrategy::All) {
words = true;
}
let mut ranking_rules: Vec<BoxRankingRule<QueryGraph>> = vec![];
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
for rr in settings_ranking_rules {

View File

@@ -28,14 +28,14 @@ pub enum ZeroOrOneTypo {
impl Interned<QueryTerm> {
pub fn compute_fully_if_needed(self, ctx: &mut SearchContext) -> Result<()> {
let s = ctx.term_interner.get_mut(self);
if s.max_nbr_typos <= 1 && s.one_typo.is_uninit() {
if s.max_levenshtein_distance <= 1 && s.one_typo.is_uninit() {
assert!(s.two_typo.is_uninit());
// Initialize one_typo subterm even if max_nbr_typo is 0 because of split words
self.initialize_one_typo_subterm(ctx)?;
let s = ctx.term_interner.get_mut(self);
assert!(s.one_typo.is_init());
s.two_typo = Lazy::Init(TwoTypoTerm::default());
} else if s.max_nbr_typos > 1 && s.two_typo.is_uninit() {
} else if s.max_levenshtein_distance > 1 && s.two_typo.is_uninit() {
assert!(s.two_typo.is_uninit());
self.initialize_one_and_two_typo_subterm(ctx)?;
let s = ctx.term_interner.get_mut(self);
@@ -185,7 +185,7 @@ pub fn partially_initialized_term_from_word(
original: ctx.word_interner.insert(word.to_owned()),
ngram_words: None,
is_prefix: false,
max_nbr_typos: 0,
max_levenshtein_distance: 0,
zero_typo: <_>::default(),
one_typo: Lazy::Init(<_>::default()),
two_typo: Lazy::Init(<_>::default()),
@@ -256,7 +256,7 @@ pub fn partially_initialized_term_from_word(
Ok(QueryTerm {
original: word_interned,
ngram_words: None,
max_nbr_typos: max_typo,
max_levenshtein_distance: max_typo,
is_prefix,
zero_typo,
one_typo: Lazy::Uninit,
@@ -275,7 +275,16 @@ fn find_split_words(ctx: &mut SearchContext, word: &str) -> Result<Option<Intern
impl Interned<QueryTerm> {
fn initialize_one_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> {
let self_mut = ctx.term_interner.get_mut(self);
let QueryTerm { original, is_prefix, one_typo, max_nbr_typos, .. } = self_mut;
let allows_split_words = self_mut.allows_split_words();
let QueryTerm {
original,
is_prefix,
one_typo,
max_levenshtein_distance: max_nbr_typos,
..
} = self_mut;
let original = *original;
let is_prefix = *is_prefix;
// let original_str = ctx.word_interner.get(*original).to_owned();
@@ -300,13 +309,17 @@ impl Interned<QueryTerm> {
})?;
}
let original_str = ctx.word_interner.get(original).to_owned();
let split_words = find_split_words(ctx, original_str.as_str())?;
let split_words = if allows_split_words {
let original_str = ctx.word_interner.get(original).to_owned();
find_split_words(ctx, original_str.as_str())?
} else {
None
};
let self_mut = ctx.term_interner.get_mut(self);
// Only add the split words to the derivations if:
// 1. the term is not an ngram; OR
// 1. the term is neither an ngram nor a phrase; OR
// 2. the term is an ngram, but the split words are different from the ngram's component words
let split_words = if let Some((ngram_words, split_words)) =
self_mut.ngram_words.as_ref().zip(split_words.as_ref())
@@ -328,7 +341,13 @@ impl Interned<QueryTerm> {
}
fn initialize_one_and_two_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> {
let self_mut = ctx.term_interner.get_mut(self);
let QueryTerm { original, is_prefix, two_typo, max_nbr_typos, .. } = self_mut;
let QueryTerm {
original,
is_prefix,
two_typo,
max_levenshtein_distance: max_nbr_typos,
..
} = self_mut;
let original_str = ctx.word_interner.get(*original).to_owned();
if two_typo.is_init() {
return Ok(());

View File

@@ -43,7 +43,7 @@ pub struct QueryTermSubset {
pub struct QueryTerm {
original: Interned<String>,
ngram_words: Option<Vec<Interned<String>>>,
max_nbr_typos: u8,
max_levenshtein_distance: u8,
is_prefix: bool,
zero_typo: ZeroTypoTerm,
// May not be computed yet
@@ -342,10 +342,16 @@ impl QueryTermSubset {
}
None
}
pub fn max_nbr_typos(&self, ctx: &SearchContext) -> u8 {
pub fn max_typo_cost(&self, ctx: &SearchContext) -> u8 {
let t = ctx.term_interner.get(self.original);
match t.max_nbr_typos {
0 => 0,
match t.max_levenshtein_distance {
0 => {
if t.allows_split_words() {
1
} else {
0
}
}
1 => {
if self.one_typo_subset.is_empty() {
0
@@ -438,6 +444,9 @@ impl QueryTerm {
self.zero_typo.is_empty() && one_typo.is_empty() && two_typo.is_empty()
}
fn allows_split_words(&self) -> bool {
self.zero_typo.phrase.is_none()
}
}
impl Interned<QueryTerm> {

View File

@@ -77,13 +77,9 @@ pub fn located_query_terms_from_tokens(
}
}
TokenKind::Separator(separator_kind) => {
match separator_kind {
SeparatorKind::Hard => {
position += 1;
}
SeparatorKind::Soft => {
position += 0;
}
// add penalty for hard separators
if let SeparatorKind::Hard = separator_kind {
position = position.wrapping_add(1);
}
phrase = 'phrase: {
@@ -217,7 +213,7 @@ pub fn make_ngram(
original: ngram_str_interned,
ngram_words: Some(words_interned),
is_prefix,
max_nbr_typos,
max_levenshtein_distance: max_nbr_typos,
zero_typo: term.zero_typo,
one_typo: Lazy::Uninit,
two_typo: Lazy::Uninit,
@@ -271,7 +267,7 @@ impl PhraseBuilder {
QueryTerm {
original: ctx.word_interner.insert(phrase_desc),
ngram_words: None,
max_nbr_typos: 0,
max_levenshtein_distance: 0,
is_prefix: false,
zero_typo: ZeroTypoTerm {
phrase: Some(phrase),
@@ -288,3 +284,36 @@ impl PhraseBuilder {
})
}
}
#[cfg(test)]
mod tests {
use charabia::TokenizerBuilder;
use super::*;
use crate::index::tests::TempIndex;
fn temp_index_with_documents() -> TempIndex {
let temp_index = TempIndex::new();
temp_index
.add_documents(documents!([
{ "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" },
{ "id": 2, "name": "Westfália" },
{ "id": 3, "name": "Ŵôřlḑôle" },
]))
.unwrap();
temp_index
}
#[test]
fn start_with_hard_separator() -> Result<()> {
let tokenizer = TokenizerBuilder::new().build();
let tokens = tokenizer.tokenize(".");
let index = temp_index_with_documents();
let rtxn = index.read_txn()?;
let mut ctx = SearchContext::new(&index, &rtxn);
// panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785>
let located_query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None)?;
assert!(located_query_terms.is_empty());
Ok(())
}
}

View File

@@ -205,18 +205,12 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
pub fn find_all_costs_to_end(&self) -> MappedInterner<QueryNode, Vec<u64>> {
let mut costs_to_end = self.query_graph.nodes.map(|_| vec![]);
let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len());
let mut node_stack = VecDeque::new();
*costs_to_end.get_mut(self.query_graph.end_node) = vec![0];
for prev_node in self.query_graph.nodes.get(self.query_graph.end_node).predecessors.iter() {
node_stack.push_back(prev_node);
enqueued.insert(prev_node);
}
while let Some(cur_node) = node_stack.pop_front() {
self.traverse_breadth_first_backward(self.query_graph.end_node, |cur_node| {
if cur_node == self.query_graph.end_node {
*costs_to_end.get_mut(self.query_graph.end_node) = vec![0];
return;
}
let mut self_costs = Vec::<u64>::new();
let cur_node_edges = &self.edges_of_node.get(cur_node);
@@ -232,13 +226,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
self_costs.dedup();
*costs_to_end.get_mut(cur_node) = self_costs;
for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
if !enqueued.contains(prev_node) {
node_stack.push_back(prev_node);
enqueued.insert(prev_node);
}
}
}
});
costs_to_end
}
@@ -247,17 +235,12 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
node_with_removed_outgoing_conditions: Interned<QueryNode>,
costs: &mut MappedInterner<QueryNode, Vec<u64>>,
) {
let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len());
let mut node_stack = VecDeque::new();
enqueued.insert(node_with_removed_outgoing_conditions);
node_stack.push_back(node_with_removed_outgoing_conditions);
'main_loop: while let Some(cur_node) = node_stack.pop_front() {
// Traverse the graph backward from the target node, recomputing the cost for each of its predecessors.
// We first check that no other node is contributing the same total cost to a predecessor before removing
// the cost from the predecessor.
self.traverse_breadth_first_backward(node_with_removed_outgoing_conditions, |cur_node| {
let mut costs_to_remove = FxHashSet::default();
for c in costs.get(cur_node) {
costs_to_remove.insert(*c);
}
costs_to_remove.extend(costs.get(cur_node).iter().copied());
let cur_node_edges = &self.edges_of_node.get(cur_node);
for edge_idx in cur_node_edges.iter() {
@@ -265,22 +248,75 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
for cost in costs.get(edge.dest_node).iter() {
costs_to_remove.remove(&(*cost + edge.cost as u64));
if costs_to_remove.is_empty() {
continue 'main_loop;
return;
}
}
}
if costs_to_remove.is_empty() {
continue 'main_loop;
return;
}
let mut new_costs = BTreeSet::from_iter(costs.get(cur_node).iter().copied());
for c in costs_to_remove {
new_costs.remove(&c);
}
*costs.get_mut(cur_node) = new_costs.into_iter().collect();
});
}
/// Traverse the graph backwards from the given node such that every time
/// a node is visited, we are guaranteed that all its successors either:
/// 1. have already been visited; OR
/// 2. were not reachable from the given node
pub fn traverse_breadth_first_backward(
&self,
from: Interned<QueryNode>,
mut visit: impl FnMut(Interned<QueryNode>),
) {
let mut reachable = SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
{
// go backward to get the set of all reachable nodes from the given node
// the nodes that are not reachable will be set as `visited`
let mut stack = VecDeque::new();
let mut enqueued = SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
enqueued.insert(from);
stack.push_back(from);
while let Some(n) = stack.pop_front() {
if reachable.contains(n) {
continue;
}
reachable.insert(n);
for prev_node in self.query_graph.nodes.get(n).predecessors.iter() {
if !enqueued.contains(prev_node) && !reachable.contains(prev_node) {
stack.push_back(prev_node);
enqueued.insert(prev_node);
}
}
}
};
let mut unreachable_or_visited =
SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
for (n, _) in self.query_graph.nodes.iter() {
if !reachable.contains(n) {
unreachable_or_visited.insert(n);
}
}
let mut enqueued = SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
let mut stack = VecDeque::new();
enqueued.insert(from);
stack.push_back(from);
while let Some(cur_node) = stack.pop_front() {
if !self.query_graph.nodes.get(cur_node).successors.is_subset(&unreachable_or_visited) {
stack.push_back(cur_node);
continue;
}
unreachable_or_visited.insert(cur_node);
visit(cur_node);
for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
if !enqueued.contains(prev_node) {
node_stack.push_back(prev_node);
if !enqueued.contains(prev_node) && !unreachable_or_visited.contains(prev_node) {
stack.push_back(prev_node);
enqueued.insert(prev_node);
}
}

View File

@@ -20,6 +20,8 @@ mod position;
mod proximity;
/// Implementation of the `typo` ranking rule
mod typo;
/// Implementation of the `words` ranking rule
mod words;
use std::collections::BTreeSet;
use std::hash::Hash;
@@ -33,6 +35,7 @@ pub use position::{PositionCondition, PositionGraph};
pub use proximity::{ProximityCondition, ProximityGraph};
use roaring::RoaringBitmap;
pub use typo::{TypoCondition, TypoGraph};
pub use words::{WordsCondition, WordsGraph};
use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner};
use super::query_term::LocatedQueryTermSubset;

View File

@@ -50,7 +50,7 @@ impl RankingRuleGraphTrait for TypoGraph {
// 3-gram -> equivalent to 2 typos
let base_cost = if term.term_ids.len() == 1 { 0 } else { term.term_ids.len() as u32 };
for nbr_typos in 0..=term.term_subset.max_nbr_typos(ctx) {
for nbr_typos in 0..=term.term_subset.max_typo_cost(ctx) {
let mut term = term.clone();
match nbr_typos {
0 => {

View File

@@ -0,0 +1,49 @@
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
use crate::search::new::SearchContext;
use crate::Result;
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct WordsCondition {
term: LocatedQueryTermSubset,
}
pub enum WordsGraph {}
impl RankingRuleGraphTrait for WordsGraph {
type Condition = WordsCondition;
fn resolve_condition(
ctx: &mut SearchContext,
condition: &Self::Condition,
universe: &RoaringBitmap,
) -> Result<ComputedCondition> {
let WordsCondition { term, .. } = condition;
// maybe compute_query_term_subset_docids should accept a universe as argument
let mut docids = compute_query_term_subset_docids(ctx, &term.term_subset)?;
docids &= universe;
Ok(ComputedCondition {
docids,
universe_len: universe.len(),
start_term_subset: None,
end_term_subset: term.clone(),
})
}
fn build_edges(
_ctx: &mut SearchContext,
conditions_interner: &mut DedupInterner<Self::Condition>,
_from: Option<&LocatedQueryTermSubset>,
to_term: &LocatedQueryTermSubset,
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
Ok(vec![(
to_term.term_ids.len() as u32,
conditions_interner.insert(WordsCondition { term: to_term.clone() }),
)])
}
}

View File

@@ -1,87 +0,0 @@
use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::query_graph::QueryNode;
use super::resolve_query_graph::compute_query_graph_docids;
use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
use crate::{Result, TermsMatchingStrategy};
pub struct Words {
exhausted: bool, // TODO: remove
query_graph: Option<QueryGraph>,
nodes_to_remove: Vec<SmallBitmap<QueryNode>>,
terms_matching_strategy: TermsMatchingStrategy,
}
impl Words {
pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {
Self {
exhausted: true,
query_graph: None,
nodes_to_remove: vec![],
terms_matching_strategy,
}
}
}
impl<'ctx> RankingRule<'ctx, QueryGraph> for Words {
fn id(&self) -> String {
"words".to_owned()
}
fn start_iteration(
&mut self,
ctx: &mut SearchContext<'ctx>,
_logger: &mut dyn SearchLogger<QueryGraph>,
_universe: &RoaringBitmap,
parent_query_graph: &QueryGraph,
) -> Result<()> {
self.exhausted = false;
self.query_graph = Some(parent_query_graph.clone());
self.nodes_to_remove = match self.terms_matching_strategy {
TermsMatchingStrategy::Last => {
let mut ns = parent_query_graph.removal_order_for_terms_matching_strategy_last(ctx);
ns.reverse();
ns
}
TermsMatchingStrategy::All => {
vec![]
}
};
Ok(())
}
fn next_bucket(
&mut self,
ctx: &mut SearchContext<'ctx>,
logger: &mut dyn SearchLogger<QueryGraph>,
universe: &RoaringBitmap,
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
if self.exhausted {
return Ok(None);
}
let Some(query_graph) = &mut self.query_graph else { panic!() };
logger.log_internal_state(query_graph);
let this_bucket = compute_query_graph_docids(ctx, query_graph, universe)?;
let child_query_graph = query_graph.clone();
if self.nodes_to_remove.is_empty() {
self.exhausted = true;
} else {
let nodes_to_remove = self.nodes_to_remove.pop().unwrap();
query_graph.remove_nodes_keep_edges(&nodes_to_remove.iter().collect::<Vec<_>>());
}
Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket }))
}
fn end_iteration(
&mut self,
_ctx: &mut SearchContext<'ctx>,
_logger: &mut dyn SearchLogger<QueryGraph>,
) {
self.exhausted = true;
self.nodes_to_remove = vec![];
self.query_graph = None;
}
}

View File

@@ -35,6 +35,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
script_language_docids,
facet_id_f64_docids,
facet_id_string_docids,
facet_id_string_fst: _,
facet_id_exists_docids,
facet_id_is_null_docids,
facet_id_is_empty_docids,

View File

@@ -243,6 +243,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
word_prefix_fid_docids,
facet_id_f64_docids: _,
facet_id_string_docids: _,
facet_id_string_fst: _,
field_id_docid_facet_f64s: _,
field_id_docid_facet_strings: _,
script_language_docids,

View File

@@ -78,15 +78,16 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
use std::fs::File;
use heed::types::DecodeIgnore;
use log::debug;
use time::OffsetDateTime;
use self::incremental::FacetsUpdateIncremental;
use super::FacetsUpdateBulk;
use crate::facet::FacetType;
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec;
use crate::{Index, Result};
use crate::{Index, Result, BEU16};
pub mod bulk;
pub mod delete;
@@ -157,6 +158,43 @@ impl<'i> FacetsUpdate<'i> {
);
incremental_update.execute(wtxn)?;
}
// We compute one FST by string facet
let mut text_fsts = vec![];
let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None;
let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
for result in database.iter(wtxn)? {
let (facet_group_key, _) = result?;
if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
current_fst = match current_fst.take() {
Some((fid, fst_builder)) if fid != field_id => {
let fst = fst_builder.into_set();
text_fsts.push((fid, fst));
Some((field_id, fst::SetBuilder::memory()))
}
Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
None => Some((field_id, fst::SetBuilder::memory())),
};
if let Some((_, fst_builder)) = current_fst.as_mut() {
fst_builder.insert(left_bound)?;
}
}
}
if let Some((field_id, fst_builder)) = current_fst {
let fst = fst_builder.into_set();
text_fsts.push((field_id, fst));
}
// We remove all of the previous FSTs that were in this database
self.index.facet_id_string_fst.clear(wtxn)?;
// We write those FSTs in LMDB now
for (field_id, fst) in text_fsts {
self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?;
}
Ok(())
}
}

View File

@@ -1 +0,0 @@