Compare commits

..

13 Commits

Author SHA1 Message Date
Louis Dureuil
2d1b25388c Fix test again 2023-06-09 08:38:26 +02:00
Louis Dureuil
fc0eb3901d Gate _rankingScore behind showRankingScore query parameter 2023-06-08 17:03:04 +02:00
Louis Dureuil
4e740f4c5f Introduce linear scale instead of a unit one for scores 2023-06-08 17:03:04 +02:00
Louis Dureuil
efc3371b6f use linear scale in search 2023-06-08 17:03:03 +02:00
Louis Dureuil
73085d6b03 Fix tests 2023-06-08 17:03:03 +02:00
Louis Dureuil
0ee35ede86 Expose the scores and detailed scores in the API 2023-06-08 17:03:03 +02:00
Louis Dureuil
16898c661e Store the scores for each bucket 2023-06-08 17:03:03 +02:00
Louis Dureuil
4a2a6dc529 Compute score for the ranking rules 2023-06-08 17:03:03 +02:00
Louis Dureuil
63ddea8ae4 Add score_details 2023-06-08 17:03:03 +02:00
Louis Dureuil
df749d424c add virtual conditions to fid and position to always have the max cost 2023-06-08 17:03:03 +02:00
Louis Dureuil
0cfecf4e9a Remove optimization where ranking rules are not executed on buckets of a single document 2023-06-08 17:03:03 +02:00
Louis Dureuil
b8f4e2b3e4 Change how the cost of removing words is computed 2023-06-08 17:03:03 +02:00
Louis Dureuil
daafbc88d6 Fix sort id 2023-06-08 17:03:03 +02:00
38 changed files with 1004 additions and 924 deletions

View File

@@ -16,23 +16,8 @@ env:
MEILI_NO_ANALYTICS: 'true'
jobs:
define-docker-image:
runs-on: ubuntu-latest
outputs:
docker-image: ${{ steps.define-image.outputs.docker-image }}
steps:
- uses: actions/checkout@v3
- name: Define the Docker image we need to use
id: define-image
run: |
event=${{ github.event.action }}
echo "docker-image=nightly" >> $GITHUB_OUTPUT
if [[ $event == 'workflow_dispatch' ]]; then
echo "docker-image=${{ github.event.inputs.docker_image }}" >> $GITHUB_OUTPUT
fi
meilisearch-js-tests:
needs: define-docker-image
name: JS SDK tests
runs-on: ubuntu-latest
services:
@@ -67,7 +52,6 @@ jobs:
run: yarn test:env:browser
instant-meilisearch-tests:
needs: define-docker-image
name: instant-meilisearch tests
runs-on: ubuntu-latest
services:
@@ -94,7 +78,6 @@ jobs:
run: yarn build
meilisearch-php-tests:
needs: define-docker-image
name: PHP SDK tests
runs-on: ubuntu-latest
services:
@@ -125,7 +108,6 @@ jobs:
composer remove --dev guzzlehttp/guzzle http-interop/http-factory-guzzle
meilisearch-python-tests:
needs: define-docker-image
name: Python SDK tests
runs-on: ubuntu-latest
services:
@@ -150,7 +132,6 @@ jobs:
run: pipenv run pytest
meilisearch-go-tests:
needs: define-docker-image
name: Go SDK tests
runs-on: ubuntu-latest
services:
@@ -180,7 +161,6 @@ jobs:
run: go test -v ./...
meilisearch-ruby-tests:
needs: define-docker-image
name: Ruby SDK tests
runs-on: ubuntu-latest
services:
@@ -205,7 +185,6 @@ jobs:
run: bundle exec rspec
meilisearch-rust-tests:
needs: define-docker-image
name: Rust SDK tests
runs-on: ubuntu-latest
services:

View File

@@ -224,7 +224,6 @@ InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
InvalidAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
@@ -241,6 +240,8 @@ InvalidSearchOffset , InvalidRequest , BAD_REQUEST ;
InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
@@ -331,9 +332,6 @@ impl ErrorCode for milli::Error {
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
UserError::InvalidSearchableAttribute { .. } => {
Code::InvalidAttributesToSearchOn
}
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
UserError::SortError(_) => Code::InvalidSearchSort,

View File

@@ -56,6 +56,10 @@ pub struct SearchQueryGet {
sort: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
show_matches_position: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)]
show_ranking_score: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScoreDetails>)]
show_ranking_score_details: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchFacets>)]
facets: Option<CS<String>>,
#[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPreTag>)]
@@ -66,8 +70,6 @@ pub struct SearchQueryGet {
crop_marker: String,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchMatchingStrategy>)]
matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrQueryParamError<InvalidAttributesToSearchOn>)]
pub attributes_to_search_on: Option<CS<String>>,
}
impl From<SearchQueryGet> for SearchQuery {
@@ -93,12 +95,13 @@ impl From<SearchQueryGet> for SearchQuery {
filter,
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
show_matches_position: other.show_matches_position.0,
show_ranking_score: other.show_ranking_score.0,
show_ranking_score_details: other.show_ranking_score_details.0,
facets: other.facets.map(|o| o.into_iter().collect()),
highlight_pre_tag: other.highlight_pre_tag,
highlight_post_tag: other.highlight_post_tag,
crop_marker: other.crop_marker,
matching_strategy: other.matching_strategy,
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
}
}
}

View File

@@ -9,6 +9,7 @@ use meilisearch_auth::IndexSearchRules;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::score_details::ScoreDetails;
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document};
use milli::tokenizer::TokenizerBuilder;
@@ -54,6 +55,10 @@ pub struct SearchQuery {
pub attributes_to_highlight: Option<HashSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
pub show_matches_position: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScore>, default)]
pub show_ranking_score: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScoreDetails>, default)]
pub show_ranking_score_details: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
@@ -68,8 +73,6 @@ pub struct SearchQuery {
pub crop_marker: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
pub matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrJsonError<InvalidAttributesToSearchOn>, default)]
pub attributes_to_search_on: Option<Vec<String>>,
}
impl SearchQuery {
@@ -105,6 +108,10 @@ pub struct SearchQueryWithIndex {
pub crop_length: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
pub attributes_to_highlight: Option<HashSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScore>, default)]
pub show_ranking_score: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScoreDetails>, default)]
pub show_ranking_score_details: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
pub show_matches_position: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
@@ -121,8 +128,6 @@ pub struct SearchQueryWithIndex {
pub crop_marker: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
pub matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrJsonError<InvalidAttributesToSearchOn>, default)]
pub attributes_to_search_on: Option<Vec<String>>,
}
impl SearchQueryWithIndex {
@@ -138,6 +143,8 @@ impl SearchQueryWithIndex {
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_ranking_score,
show_ranking_score_details,
show_matches_position,
filter,
sort,
@@ -146,7 +153,6 @@ impl SearchQueryWithIndex {
highlight_post_tag,
crop_marker,
matching_strategy,
attributes_to_search_on,
} = self;
(
index_uid,
@@ -160,6 +166,8 @@ impl SearchQueryWithIndex {
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_ranking_score,
show_ranking_score_details,
show_matches_position,
filter,
sort,
@@ -168,7 +176,6 @@ impl SearchQueryWithIndex {
highlight_post_tag,
crop_marker,
matching_strategy,
attributes_to_search_on,
// do not use ..Default::default() here,
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
},
@@ -200,7 +207,7 @@ impl From<MatchingStrategy> for TermsMatchingStrategy {
}
}
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
#[derive(Debug, Clone, Serialize, PartialEq)]
pub struct SearchHit {
#[serde(flatten)]
pub document: Document,
@@ -208,6 +215,10 @@ pub struct SearchHit {
pub formatted: Document,
#[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")]
pub matches_position: Option<MatchesPosition>,
#[serde(rename = "_rankingScore", skip_serializing_if = "Option::is_none")]
pub ranking_score: Option<u64>,
#[serde(rename = "_rankingScoreDetails", skip_serializing_if = "Option::is_none")]
pub ranking_score_details: Option<serde_json::Map<String, serde_json::Value>>,
}
#[derive(Serialize, Debug, Clone, PartialEq)]
@@ -280,10 +291,6 @@ pub fn perform_search(
search.query(query);
}
if let Some(ref searchable) = query.attributes_to_search_on {
search.searchable_attributes(searchable);
}
let is_finite_pagination = query.is_finite_pagination();
search.terms_matching_strategy(query.matching_strategy.into());
@@ -330,7 +337,8 @@ pub fn perform_search(
search.sort_criteria(sort);
}
let milli::SearchResult { documents_ids, matching_words, candidates, .. } = search.execute()?;
let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } =
search.execute()?;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@@ -402,7 +410,7 @@ pub fn perform_search(
let documents_iter = index.documents(&rtxn, documents_ids)?;
for (_id, obkv) in documents_iter {
for ((_id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
// First generate a document with all the displayed fields
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
@@ -426,7 +434,18 @@ pub fn perform_search(
insert_geo_distance(sort, &mut document);
}
let hit = SearchHit { document, formatted, matches_position };
let ranking_score =
query.show_ranking_score.then(|| ScoreDetails::global_score_linear_scale(score.iter()));
let ranking_score_details =
query.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
let hit = SearchHit {
document,
formatted,
matches_position,
ranking_score_details,
ranking_score,
};
documents.push(hit);
}

View File

@@ -963,27 +963,3 @@ async fn sort_unset_ranking_rule() {
)
.await;
}
#[actix_rt::test]
async fn search_on_unknown_field() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
index
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}),
|response, code| {
assert_eq!(400, code, "{}", response);
assert_eq!(response, json!({
"message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
"code": "invalid_attributes_to_search_on",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_attributes_to_search_on"
}));
},
)
.await;
}

View File

@@ -1,3 +1,4 @@
use insta::{allow_duplicates, assert_json_snapshot};
use serde_json::json;
use super::*;
@@ -18,30 +19,43 @@ async fn formatted_contain_wildcard() {
|response, code|
{
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"id": "852",
"cattos": "<em>pésti</em>",
},
"_matchesPosition": {"cattos": [{"start": 0, "length": 5}]},
})
);
}
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"_formatted": {
"id": "852",
"cattos": "<em>pésti</em>"
},
"_matchesPosition": {
"cattos": [
{
"start": 0,
"length": 5
}
]
}
}
"###);
}
}
)
.await;
index
.search(json!({ "q": "pésti", "attributesToRetrieve": ["*"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pésti",
})
);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"cattos": "pésti"
}
"###)
}
})
.await;
@@ -50,20 +64,29 @@ async fn formatted_contain_wildcard() {
json!({ "q": "pésti", "attributesToRetrieve": ["*"], "attributesToHighlight": ["id"], "showMatchesPosition": true }),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pésti",
"_formatted": {
"id": "852",
"cattos": "pésti",
},
"_matchesPosition": {"cattos": [{"start": 0, "length": 5}]},
})
);
}
)
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"cattos": "pésti",
"_formatted": {
"id": "852",
"cattos": "pésti"
},
"_matchesPosition": {
"cattos": [
{
"start": 0,
"length": 5
}
]
}
}
"###)
}
})
.await;
index
@@ -71,17 +94,20 @@ async fn formatted_contain_wildcard() {
json!({ "q": "pésti", "attributesToRetrieve": ["*"], "attributesToCrop": ["*"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pésti",
"_formatted": {
"id": "852",
"cattos": "pésti",
}
})
);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"cattos": "pésti",
"_formatted": {
"id": "852",
"cattos": "pésti"
}
}
"###);
}
},
)
.await;
@@ -89,17 +115,20 @@ async fn formatted_contain_wildcard() {
index
.search(json!({ "q": "pésti", "attributesToCrop": ["*"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pésti",
"_formatted": {
"id": "852",
"cattos": "pésti",
}
})
);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"cattos": "pésti",
"_formatted": {
"id": "852",
"cattos": "pésti"
}
}
"###)
}
})
.await;
}
@@ -116,21 +145,24 @@ async fn format_nested() {
index
.search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"doggos": [
{
"name": "bobby",
"age": 2,
},
{
"name": "buddy",
"age": 4,
},
],
})
);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"doggos": [
{
"name": "bobby",
"age": 2
},
{
"name": "buddy",
"age": 4
}
]
}
"###)
}
})
.await;
@@ -139,19 +171,22 @@ async fn format_nested() {
json!({ "q": "pésti", "attributesToRetrieve": ["doggos.name"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
})
);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"doggos": [
{
"name": "bobby"
},
{
"name": "buddy"
}
]
}
"###)
}
},
)
.await;
@@ -161,20 +196,30 @@ async fn format_nested() {
json!({ "q": "bobby", "attributesToRetrieve": ["doggos.name"], "showMatchesPosition": true }),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
"_matchesPosition": {"doggos.name": [{"start": 0, "length": 5}]},
})
);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"doggos": [
{
"name": "bobby"
},
{
"name": "buddy"
}
],
"_matchesPosition": {
"doggos.name": [
{
"start": 0,
"length": 5
}
]
}
}
"###)
}
}
)
.await;
@@ -183,21 +228,24 @@ async fn format_nested() {
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.name"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
},
})
);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"_formatted": {
"doggos": [
{
"name": "bobby"
},
{
"name": "buddy"
}
]
}
}
"###)
}
})
.await;
@@ -205,21 +253,24 @@ async fn format_nested() {
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToCrop": ["doggos.name"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
},
})
);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"_formatted": {
"doggos": [
{
"name": "bobby"
},
{
"name": "buddy"
}
]
}
}
"###)
}
})
.await;
@@ -227,55 +278,61 @@ async fn format_nested() {
.search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos.name"], "attributesToHighlight": ["doggos.age"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
"_formatted": {
"doggos": [
{
"name": "bobby",
"age": "2",
},
{
"name": "buddy",
"age": "4",
},
],
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"doggos": [
{
"name": "bobby"
},
})
);
})
{
"name": "buddy"
}
],
"_formatted": {
"doggos": [
{
"name": "bobby",
"age": "2"
},
{
"name": "buddy",
"age": "4"
}
]
}
}
"###)
}
})
.await;
index
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.age"], "attributesToCrop": ["doggos.name"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"doggos": [
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"name": "bobby",
"age": "2",
},
{
"name": "buddy",
"age": "4",
},
],
},
})
);
"_formatted": {
"doggos": [
{
"name": "bobby",
"age": "2"
},
{
"name": "buddy",
"age": "4"
}
]
}
}
"###)
}
}
)
.await;
@@ -297,54 +354,66 @@ async fn displayedattr_2_smol() {
.search(json!({ "attributesToRetrieve": ["father", "id"], "attributesToHighlight": ["mother"], "attributesToCrop": ["cattos"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852
}
"###)
}
})
.await;
index
.search(json!({ "attributesToRetrieve": ["id"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852
}
"###)
}
})
.await;
index
.search(json!({ "attributesToHighlight": ["id"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"_formatted": {
"id": "852",
}
})
);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"_formatted": {
"id": "852"
}
}
"###)
}
})
.await;
index
.search(json!({ "attributesToCrop": ["id"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"_formatted": {
"id": "852",
}
})
);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"_formatted": {
"id": "852"
}
}
"###)
}
})
.await;
@@ -353,15 +422,18 @@ async fn displayedattr_2_smol() {
json!({ "attributesToHighlight": ["id"], "attributesToCrop": ["id"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"_formatted": {
"id": "852",
}
})
);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"_formatted": {
"id": "852"
}
}
"###)
}
},
)
.await;
@@ -369,31 +441,41 @@ async fn displayedattr_2_smol() {
index
.search(json!({ "attributesToHighlight": ["cattos"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852
}
"###)
}
})
.await;
index
.search(json!({ "attributesToCrop": ["cattos"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852
}
"###)
}
})
.await;
index
.search(json!({ "attributesToRetrieve": ["cattos"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"][0], json!({}));
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@"{}")
}
})
.await;
@@ -402,7 +484,11 @@ async fn displayedattr_2_smol() {
json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["cattos"], "attributesToCrop": ["cattos"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"][0], json!({}));
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@"{}")
}
}
)
@@ -413,14 +499,17 @@ async fn displayedattr_2_smol() {
json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["id"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"id": "852",
}
})
);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"_formatted": {
"id": "852"
}
}
"###)
}
},
)
.await;
@@ -430,14 +519,17 @@ async fn displayedattr_2_smol() {
json!({ "attributesToRetrieve": ["cattos"], "attributesToCrop": ["id"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"id": "852",
}
})
);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"_formatted": {
"id": "852"
}
}
"###)
}
},
)
.await;

View File

@@ -5,7 +5,6 @@ mod errors;
mod formatted;
mod multi;
mod pagination;
mod restrict_searchable;
use once_cell::sync::Lazy;
use serde_json::{json, Value};

View File

@@ -65,7 +65,7 @@ async fn simple_search_single_index() {
]}))
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###"
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###"
[
{
"indexUid": "test",
@@ -170,7 +170,7 @@ async fn simple_search_two_indexes() {
]}))
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###"
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###"
[
{
"indexUid": "test",

View File

@@ -1,241 +0,0 @@
use once_cell::sync::Lazy;
use serde_json::{json, Value};
use crate::common::index::Index;
use crate::common::Server;
async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
let index = server.index("test");
index.add_documents(documents.clone(), None).await;
index.wait_task(0).await;
index
}
static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1",
},
{
"title": "Captain Planet",
"desc": "He's not part of the Marvel Cinematic Universe",
"id": "2",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "3",
}])
});
#[actix_rt::test]
async fn simple_search_on_title() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
// simple search should return 2 documents (ids: 2 and 3).
index
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"]}),
|response, code| {
assert_eq!(200, code, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
},
)
.await;
}
#[actix_rt::test]
async fn simple_prefix_search_on_title() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
// simple search should return 2 documents (ids: 2 and 3).
index
.search(json!({"q": "Captain Mar", "attributesToSearchOn": ["title"]}), |response, code| {
assert_eq!(200, code, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
})
.await;
}
#[actix_rt::test]
async fn simple_search_on_title_matching_strategy_all() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
// simple search matching strategy all should only return 1 document (ids: 2).
index
.search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "matchingStrategy": "all"}), |response, code| {
assert_eq!(200, code, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
})
.await;
}
#[actix_rt::test]
async fn simple_search_on_no_field() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
// simple search on no field shouldn't return any document.
index
.search(json!({"q": "Captain Marvel", "attributesToSearchOn": []}), |response, code| {
assert_eq!(200, code, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 0);
})
.await;
}
#[actix_rt::test]
async fn word_ranking_rule_order() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
// Document 3 should appear before document 2.
index
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}),
|response, code| {
assert_eq!(200, code, "{}", response);
assert_eq!(
response["hits"],
json!([
{"id": "3"},
{"id": "2"},
])
);
},
)
.await;
}
#[actix_rt::test]
async fn word_ranking_rule_order_exact_words() {
let server = Server::new().await;
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
index.update_settings_typo_tolerance(json!({"disableOnWords": ["Captain", "Marvel"]})).await;
index.wait_task(1).await;
// simple search should return 2 documents (ids: 2 and 3).
index
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}),
|response, code| {
assert_eq!(200, code, "{}", response);
assert_eq!(
response["hits"],
json!([
{"id": "3"},
{"id": "2"},
])
);
},
)
.await;
}
#[actix_rt::test]
async fn typo_ranking_rule_order() {
let server = Server::new().await;
let index = index_with_documents(
&server,
&json!([
{
"title": "Capitain Marivel",
"desc": "Captain Marvel",
"id": "1",
},
{
"title": "Captain Marivel",
"desc": "a Shazam ersatz",
"id": "2",
}]),
)
.await;
// Document 2 should appear before document 1.
index
.search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}), |response, code| {
assert_eq!(200, code, "{}", response);
assert_eq!(
response["hits"],
json!([
{"id": "2"},
{"id": "1"},
])
);
})
.await;
}
#[actix_rt::test]
async fn attributes_ranking_rule_order() {
let server = Server::new().await;
let index = index_with_documents(
&server,
&json!([
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"footer": "The story of Captain Marvel",
"id": "1",
},
{
"title": "The Avengers",
"desc": "Captain Marvel is far from the earth",
"footer": "A super hero team",
"id": "2",
}]),
)
.await;
// Document 2 should appear before document 1.
index
.search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["desc", "footer"], "attributesToRetrieve": ["id"]}), |response, code| {
assert_eq!(200, code, "{}", response);
assert_eq!(
response["hits"],
json!([
{"id": "2"},
{"id": "1"},
])
);
})
.await;
}
#[actix_rt::test]
async fn exactness_ranking_rule_order() {
let server = Server::new().await;
let index = index_with_documents(
&server,
&json!([
{
"title": "Captain Marvel",
"desc": "Captain Marivel",
"id": "1",
},
{
"title": "Captain Marvel",
"desc": "CaptainMarvel",
"id": "2",
}]),
)
.await;
// Document 2 should appear before document 1.
index
.search(json!({"q": "Captain Marvel", "attributesToRetrieve": ["id"], "attributesToSearchOn": ["desc"]}), |response, code| {
assert_eq!(200, code, "{}", response);
assert_eq!(
response["hits"],
json!([
{"id": "2"},
{"id": "1"},
])
);
})
.await;
}

View File

@@ -124,16 +124,6 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
}
)]
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
#[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.",
.field,
.valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
)]
InvalidSearchableAttribute {
field: String,
valid_fields: BTreeSet<String>,
hidden_fields: bool,
},
#[error("{}", HeedError::BadOpenOptions)]
InvalidLmdbOpenOptions,
#[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]

View File

@@ -23,9 +23,3 @@ pub use self::roaring_bitmap_length::{
pub use self::script_language_codec::ScriptLanguageCodec;
pub use self::str_beu32_codec::{StrBEU16Codec, StrBEU32Codec};
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
pub trait BytesDecodeOwned {
type DItem;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem>;
}

View File

@@ -2,11 +2,8 @@ use std::borrow::Cow;
use std::convert::TryInto;
use std::mem::size_of;
use heed::BytesDecode;
use roaring::RoaringBitmap;
use crate::heed_codec::BytesDecodeOwned;
pub struct BoRoaringBitmapCodec;
impl BoRoaringBitmapCodec {
@@ -16,7 +13,7 @@ impl BoRoaringBitmapCodec {
}
}
impl BytesDecode<'_> for BoRoaringBitmapCodec {
impl heed::BytesDecode<'_> for BoRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
@@ -31,14 +28,6 @@ impl BytesDecode<'_> for BoRoaringBitmapCodec {
}
}
impl BytesDecodeOwned for BoRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
Self::bytes_decode(bytes)
}
}
impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
type EItem = RoaringBitmap;

View File

@@ -5,8 +5,6 @@ use std::mem::size_of;
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
use roaring::RoaringBitmap;
use crate::heed_codec::BytesDecodeOwned;
/// This is the limit where using a byteorder became less size efficient
/// than using a direct roaring encoding, it is also the point where we are able
/// to determine the encoding used only by using the array of bytes length.
@@ -105,14 +103,6 @@ impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
}
}
impl BytesDecodeOwned for CboRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
Self::deserialize_from(bytes).ok()
}
}
impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
type EItem = RoaringBitmap;

View File

@@ -2,8 +2,6 @@ use std::borrow::Cow;
use roaring::RoaringBitmap;
use crate::heed_codec::BytesDecodeOwned;
pub struct RoaringBitmapCodec;
impl heed::BytesDecode<'_> for RoaringBitmapCodec {
@@ -14,14 +12,6 @@ impl heed::BytesDecode<'_> for RoaringBitmapCodec {
}
}
impl BytesDecodeOwned for RoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
RoaringBitmap::deserialize_from(bytes).ok()
}
}
impl heed::BytesEncode<'_> for RoaringBitmapCodec {
type EItem = RoaringBitmap;

View File

@@ -1,23 +1,11 @@
use std::mem;
use heed::BytesDecode;
use crate::heed_codec::BytesDecodeOwned;
pub struct BoRoaringBitmapLenCodec;
impl BytesDecode<'_> for BoRoaringBitmapLenCodec {
impl heed::BytesDecode<'_> for BoRoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
Some((bytes.len() / mem::size_of::<u32>()) as u64)
}
}
impl BytesDecodeOwned for BoRoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
Self::bytes_decode(bytes)
}
}

View File

@@ -1,14 +1,11 @@
use std::mem;
use heed::BytesDecode;
use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec};
use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD;
use crate::heed_codec::BytesDecodeOwned;
pub struct CboRoaringBitmapLenCodec;
impl BytesDecode<'_> for CboRoaringBitmapLenCodec {
impl heed::BytesDecode<'_> for CboRoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
@@ -23,11 +20,3 @@ impl BytesDecode<'_> for CboRoaringBitmapLenCodec {
}
}
}
impl BytesDecodeOwned for CboRoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
Self::bytes_decode(bytes)
}
}

View File

@@ -3,8 +3,6 @@ use std::mem;
use byteorder::{LittleEndian, ReadBytesExt};
use crate::heed_codec::BytesDecodeOwned;
const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346;
const SERIAL_COOKIE: u16 = 12347;
@@ -61,14 +59,6 @@ impl heed::BytesDecode<'_> for RoaringBitmapLenCodec {
}
}
impl BytesDecodeOwned for RoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok()
}
}
#[cfg(test)]
mod tests {
use heed::BytesEncode;

View File

@@ -2488,8 +2488,12 @@ pub(crate) mod tests {
let rtxn = index.read_txn().unwrap();
let search = Search::new(&rtxn, &index);
let SearchResult { matching_words: _, candidates: _, mut documents_ids } =
search.execute().unwrap();
let SearchResult {
matching_words: _,
candidates: _,
document_scores: _,
mut documents_ids,
} = search.execute().unwrap();
let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap();
documents_ids.sort_unstable();
let docs = index.documents(&rtxn, documents_ids).unwrap();

View File

@@ -17,6 +17,7 @@ mod fields_ids_map;
pub mod heed_codec;
pub mod index;
pub mod proximity;
pub mod score_details;
mod search;
pub mod update;

295
milli/src/score_details.rs Normal file
View File

@@ -0,0 +1,295 @@
use serde::Serialize;
use crate::distance_between_two_points;
#[derive(Debug, Clone, PartialEq)]
pub enum ScoreDetails {
Words(Words),
Typo(Typo),
Proximity(Rank),
Fid(Rank),
Position(Rank),
ExactAttribute(ExactAttribute),
Exactness(Rank),
Sort(Sort),
GeoSort(GeoSort),
}
impl ScoreDetails {
pub fn local_score(&self) -> Option<f64> {
self.rank().map(Rank::local_score)
}
pub fn rank(&self) -> Option<Rank> {
match self {
ScoreDetails::Words(details) => Some(details.rank()),
ScoreDetails::Typo(details) => Some(details.rank()),
ScoreDetails::Proximity(details) => Some(*details),
ScoreDetails::Fid(details) => Some(*details),
ScoreDetails::Position(details) => Some(*details),
ScoreDetails::ExactAttribute(details) => Some(details.rank()),
ScoreDetails::Exactness(details) => Some(*details),
ScoreDetails::Sort(_) => None,
ScoreDetails::GeoSort(_) => None,
}
}
pub fn global_score<'a>(details: impl Iterator<Item = &'a Self>) -> f64 {
Rank::global_score(details.filter_map(Self::rank))
}
pub fn global_score_linear_scale<'a>(details: impl Iterator<Item = &'a Self>) -> u64 {
(Self::global_score(details) * LINEAR_SCALE_FACTOR).round() as u64
}
/// Panics
///
/// - If Position is not preceded by Fid
/// - If Exactness is not preceded by ExactAttribute
/// - If a sort fid is not contained in the passed `fields_ids_map`.
pub fn to_json_map<'a>(
details: impl Iterator<Item = &'a Self>,
) -> serde_json::Map<String, serde_json::Value> {
let mut order = 0;
let mut details_map = serde_json::Map::default();
for details in details {
match details {
ScoreDetails::Words(words) => {
let words_details = serde_json::json!({
"order": order,
"matchingWords": words.matching_words,
"maxMatchingWords": words.max_matching_words,
"score": words.rank().local_score_linear_scale(),
});
details_map.insert("words".into(), words_details);
order += 1;
}
ScoreDetails::Typo(typo) => {
let typo_details = serde_json::json!({
"order": order,
"typoCount": typo.typo_count,
"maxTypoCount": typo.max_typo_count,
"score": typo.rank().local_score_linear_scale(),
});
details_map.insert("typo".into(), typo_details);
order += 1;
}
ScoreDetails::Proximity(proximity) => {
let proximity_details = serde_json::json!({
"order": order,
"score": proximity.local_score_linear_scale(),
});
details_map.insert("proximity".into(), proximity_details);
order += 1;
}
ScoreDetails::Fid(fid) => {
// For now, fid is a virtual rule always followed by the "position" rule
let fid_details = serde_json::json!({
"order": order,
"attributes_ranking_order": fid.local_score_linear_scale(),
});
details_map.insert("attribute".into(), fid_details);
order += 1;
}
ScoreDetails::Position(position) => {
// For now, position is a virtual rule always preceded by the "fid" rule
let attribute_details = details_map
.get_mut("attribute")
.expect("position not preceded by attribute");
let attribute_details = attribute_details
.as_object_mut()
.expect("attribute details was not an object");
attribute_details.insert(
"attributes_query_word_order".into(),
position.local_score_linear_scale().into(),
);
// do not update the order since this was already done by fid
}
ScoreDetails::ExactAttribute(exact_attribute) => {
let exactness_details = serde_json::json!({
"order": order,
"exactIn": exact_attribute,
"score": exact_attribute.rank().local_score_linear_scale(),
});
details_map.insert("exactness".into(), exactness_details);
order += 1;
}
ScoreDetails::Exactness(details) => {
// For now, exactness is a virtual rule always preceded by the "ExactAttribute" rule
let exactness_details = details_map
.get_mut("exactness")
.expect("Exactness not preceded by exactAttribute");
let exactness_details = exactness_details
.as_object_mut()
.expect("exactness details was not an object");
if exactness_details.get("exactIn").expect("missing 'exactIn'")
== &serde_json::json!(ExactAttribute::NoExactMatch)
{
let score = Rank::global_score_linear_scale(
[ExactAttribute::NoExactMatch.rank(), *details].iter().copied(),
);
*exactness_details.get_mut("score").expect("missing score") = score.into();
}
// do not update the order since this was already done by exactAttribute
}
ScoreDetails::Sort(details) => {
let sort = format!(
"{}:{}",
details.field_name,
if details.ascending { "asc" } else { "desc" }
);
let sort_details = serde_json::json!({
"order": order,
"value": details.value,
});
details_map.insert(sort, sort_details);
order += 1;
}
ScoreDetails::GeoSort(details) => {
let sort = format!(
"_geoPoint({}, {}):{}",
details.target_point[0],
details.target_point[1],
if details.ascending { "asc" } else { "desc" }
);
let point = if let Some(value) = details.value {
serde_json::json!({ "lat": value[0], "lng": value[1]})
} else {
serde_json::Value::Null
};
let sort_details = serde_json::json!({
"order": order,
"value": point,
"distance": details.distance(),
});
details_map.insert(sort, sort_details);
order += 1;
}
}
}
details_map
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Words {
pub matching_words: u32,
pub max_matching_words: u32,
}
impl Words {
pub fn rank(&self) -> Rank {
Rank { rank: self.matching_words, max_rank: self.max_matching_words }
}
pub(crate) fn from_rank(rank: Rank) -> Words {
Words { matching_words: rank.rank, max_matching_words: rank.max_rank }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Typo {
pub typo_count: u32,
pub max_typo_count: u32,
}
impl Typo {
pub fn rank(&self) -> Rank {
Rank {
rank: self.max_typo_count - self.typo_count + 1,
max_rank: (self.max_typo_count + 1),
}
}
// max_rank = max_typo + 1
// max_typo = max_rank - 1
//
// rank = max_typo - typo + 1
// rank = max_rank - 1 - typo + 1
// rank + typo = max_rank
// typo = max_rank - rank
pub fn from_rank(rank: Rank) -> Typo {
Typo { typo_count: rank.max_rank - rank.rank, max_typo_count: rank.max_rank - 1 }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Rank {
/// The ordinal rank, such that `max_rank` is the first rank, and 0 is the last rank.
///
/// The higher the better. Documents with a rank of 0 have a score of 0 and are typically never returned
/// (they don't match the query).
pub rank: u32,
/// The maximum possible rank. Documents with this rank have a score of 1.
///
/// The max rank should not be 0.
pub max_rank: u32,
}
impl Rank {
pub fn local_score(self) -> f64 {
self.rank as f64 / self.max_rank as f64
}
pub fn local_score_linear_scale(self) -> u64 {
(self.local_score() * LINEAR_SCALE_FACTOR).round() as u64
}
pub fn global_score(details: impl Iterator<Item = Self>) -> f64 {
let mut rank = Rank { rank: 1, max_rank: 1 };
for inner_rank in details {
rank.rank -= 1;
rank.rank *= inner_rank.max_rank;
rank.max_rank *= inner_rank.max_rank;
rank.rank += inner_rank.rank;
}
rank.local_score()
}
pub fn global_score_linear_scale(details: impl Iterator<Item = Self>) -> u64 {
(Self::global_score(details) * LINEAR_SCALE_FACTOR).round() as u64
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
#[serde(rename_all = "camelCase")]
pub enum ExactAttribute {
MatchesFull,
MatchesStart,
NoExactMatch,
}
impl ExactAttribute {
pub fn rank(&self) -> Rank {
let rank = match self {
ExactAttribute::MatchesFull => 3,
ExactAttribute::MatchesStart => 2,
ExactAttribute::NoExactMatch => 1,
};
Rank { rank, max_rank: 3 }
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Sort {
pub field_name: String,
pub ascending: bool,
pub value: serde_json::Value,
}
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
pub struct GeoSort {
pub target_point: [f64; 2],
pub ascending: bool,
pub value: Option<[f64; 2]>,
}
impl GeoSort {
pub fn distance(&self) -> Option<f64> {
self.value.map(|value| distance_between_two_points(&self.target_point, &value))
}
}
const LINEAR_SCALE_FACTOR: f64 = 1000.0;

View File

@@ -7,6 +7,7 @@ use roaring::bitmap::RoaringBitmap;
pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
use self::new::PartialSearchResult;
use crate::score_details::ScoreDetails;
use crate::{
execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext,
};
@@ -27,7 +28,6 @@ pub struct Search<'a> {
offset: usize,
limit: usize,
sort_criteria: Option<Vec<AscDesc>>,
searchable_attributes: Option<&'a [String]>,
geo_strategy: new::GeoSortStrategy,
terms_matching_strategy: TermsMatchingStrategy,
words_limit: usize,
@@ -44,7 +44,6 @@ impl<'a> Search<'a> {
offset: 0,
limit: 20,
sort_criteria: None,
searchable_attributes: None,
geo_strategy: new::GeoSortStrategy::default(),
terms_matching_strategy: TermsMatchingStrategy::default(),
exhaustive_number_hits: false,
@@ -74,11 +73,6 @@ impl<'a> Search<'a> {
self
}
pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> {
self.searchable_attributes = Some(searchable);
self
}
pub fn terms_matching_strategy(&mut self, value: TermsMatchingStrategy) -> &mut Search<'a> {
self.terms_matching_strategy = value;
self
@@ -100,7 +94,7 @@ impl<'a> Search<'a> {
self
}
/// Force the search to exhastivelly compute the number of candidates,
/// Forces the search to exhaustively compute the number of candidates,
/// this will increase the search time but allows finite pagination.
pub fn exhaustive_number_hits(&mut self, exhaustive_number_hits: bool) -> &mut Search<'a> {
self.exhaustive_number_hits = exhaustive_number_hits;
@@ -109,12 +103,7 @@ impl<'a> Search<'a> {
pub fn execute(&self) -> Result<SearchResult> {
let mut ctx = SearchContext::new(self.index, self.rtxn);
if let Some(searchable_attributes) = self.searchable_attributes {
ctx.searchable_attributes(searchable_attributes)?;
}
let PartialSearchResult { located_query_terms, candidates, documents_ids } =
let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } =
execute_search(
&mut ctx,
&self.query,
@@ -136,7 +125,7 @@ impl<'a> Search<'a> {
None => MatchingWords::default(),
};
Ok(SearchResult { matching_words, candidates, documents_ids })
Ok(SearchResult { matching_words, candidates, document_scores, documents_ids })
}
}
@@ -148,7 +137,6 @@ impl fmt::Debug for Search<'_> {
offset,
limit,
sort_criteria,
searchable_attributes,
geo_strategy: _,
terms_matching_strategy,
words_limit,
@@ -162,7 +150,6 @@ impl fmt::Debug for Search<'_> {
.field("offset", offset)
.field("limit", limit)
.field("sort_criteria", sort_criteria)
.field("searchable_attributes", searchable_attributes)
.field("terms_matching_strategy", terms_matching_strategy)
.field("exhaustive_number_hits", exhaustive_number_hits)
.field("words_limit", words_limit)
@@ -174,8 +161,8 @@ impl fmt::Debug for Search<'_> {
pub struct SearchResult {
pub matching_words: MatchingWords,
pub candidates: RoaringBitmap,
// TODO those documents ids should be associated with their criteria scores.
pub documents_ids: Vec<DocumentId>,
pub document_scores: Vec<Vec<ScoreDetails>>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]

View File

@@ -3,11 +3,13 @@ use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait};
use super::SearchContext;
use crate::score_details::ScoreDetails;
use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput};
use crate::Result;
pub struct BucketSortOutput {
pub docids: Vec<u32>,
pub scores: Vec<Vec<ScoreDetails>>,
pub all_candidates: RoaringBitmap,
}
@@ -31,7 +33,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
};
if universe.len() < from as u64 {
return Ok(BucketSortOutput { docids: vec![], all_candidates: universe.clone() });
return Ok(BucketSortOutput {
docids: vec![],
scores: vec![],
all_candidates: universe.clone(),
});
}
if ranking_rules.is_empty() {
if let Some(distinct_fid) = distinct_fid {
@@ -49,22 +55,32 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
}
let mut all_candidates = universe - excluded;
all_candidates.extend(results.iter().copied());
return Ok(BucketSortOutput { docids: results, all_candidates });
return Ok(BucketSortOutput {
scores: vec![Default::default(); results.len()],
docids: results,
all_candidates,
});
} else {
let docids = universe.iter().skip(from).take(length).collect();
return Ok(BucketSortOutput { docids, all_candidates: universe.clone() });
let docids: Vec<u32> = universe.iter().skip(from).take(length).collect();
return Ok(BucketSortOutput {
scores: vec![Default::default(); docids.len()],
docids,
all_candidates: universe.clone(),
});
};
}
let ranking_rules_len = ranking_rules.len();
logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe);
ranking_rules[0].start_iteration(ctx, logger, universe, query)?;
let mut ranking_rule_scores: Vec<ScoreDetails> = vec![];
let mut ranking_rule_universes: Vec<RoaringBitmap> =
vec![RoaringBitmap::default(); ranking_rules_len];
ranking_rule_universes[0] = universe.clone();
let mut cur_ranking_rule_index = 0;
/// Finish iterating over the current ranking rule, yielding
@@ -89,11 +105,16 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
} else {
cur_ranking_rule_index -= 1;
}
// FIXME: check off by one
if ranking_rule_scores.len() > cur_ranking_rule_index {
ranking_rule_scores.pop();
}
};
}
let mut all_candidates = universe.clone();
let mut valid_docids = vec![];
let mut valid_scores = vec![];
let mut cur_offset = 0usize;
macro_rules! maybe_add_to_results {
@@ -104,23 +125,23 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
length,
logger,
&mut valid_docids,
&mut valid_scores,
&mut all_candidates,
&mut ranking_rule_universes,
&mut ranking_rules,
cur_ranking_rule_index,
&mut cur_offset,
distinct_fid,
&ranking_rule_scores,
$candidates,
)?;
};
}
while valid_docids.len() < length {
// The universe for this bucket is zero or one element, so we don't need to sort
// anything, just extend the results and go back to the parent ranking rule.
if ranking_rule_universes[cur_ranking_rule_index].len() <= 1 {
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
maybe_add_to_results!(bucket);
// The universe for this bucket is zero, so we don't need to sort
// anything, just go back to the parent ranking rule.
if ranking_rule_universes[cur_ranking_rule_index].is_empty() {
back!();
continue;
}
@@ -130,6 +151,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
continue;
};
ranking_rule_scores.push(next_bucket.score);
logger.next_bucket_ranking_rule(
cur_ranking_rule_index,
ranking_rules[cur_ranking_rule_index].as_ref(),
@@ -143,10 +166,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;
if cur_ranking_rule_index == ranking_rules_len - 1
|| next_bucket.candidates.len() <= 1
|| cur_offset + (next_bucket.candidates.len() as usize) < from
{
maybe_add_to_results!(next_bucket.candidates);
// FIXME: use index based logic like all the other rules so that you don't have to maintain the pop/push?
ranking_rule_scores.pop();
continue;
}
@@ -166,7 +190,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
)?;
}
Ok(BucketSortOutput { docids: valid_docids, all_candidates })
Ok(BucketSortOutput { docids: valid_docids, scores: valid_scores, all_candidates })
}
/// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset`
@@ -179,14 +203,18 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
logger: &mut dyn SearchLogger<Q>,
valid_docids: &mut Vec<u32>,
valid_scores: &mut Vec<Vec<ScoreDetails>>,
all_candidates: &mut RoaringBitmap,
ranking_rule_universes: &mut [RoaringBitmap],
ranking_rules: &mut [BoxRankingRule<'ctx, Q>],
cur_ranking_rule_index: usize,
cur_offset: &mut usize,
distinct_fid: Option<u16>,
ranking_rule_scores: &[ScoreDetails],
candidates: RoaringBitmap,
) -> Result<()> {
// First apply the distinct rule on the candidates, reducing the universes if necessary
@@ -231,13 +259,17 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
let candidates =
candidates.iter().take(length - valid_docids.len()).copied().collect::<Vec<_>>();
logger.add_to_results(&candidates);
valid_docids.extend(&candidates);
valid_docids.extend_from_slice(&candidates);
valid_scores
.extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len()));
}
} else {
// if we have passed the offset already, add some of the documents (up to the limit)
let candidates = candidates.iter().take(length - valid_docids.len()).collect::<Vec<u32>>();
logger.add_to_results(&candidates);
valid_docids.extend(&candidates);
valid_docids.extend_from_slice(&candidates);
valid_scores
.extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len()));
}
*cur_offset += candidates.len() as usize;

View File

@@ -4,13 +4,12 @@ use std::hash::Hash;
use fxhash::FxHashMap;
use heed::types::ByteSlice;
use heed::{BytesEncode, Database, RoTxn};
use heed::{BytesDecode, BytesEncode, Database, RoTxn};
use roaring::RoaringBitmap;
use super::interner::Interned;
use super::Word;
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
use crate::heed_codec::StrBEU16Codec;
use crate::{
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext,
};
@@ -23,110 +22,50 @@ use crate::{
#[derive(Default)]
pub struct DatabaseCache<'ctx> {
pub word_pair_proximity_docids:
FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'ctx [u8]>>,
pub word_prefix_pair_proximity_docids:
FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'ctx [u8]>>,
pub prefix_word_pair_proximity_docids:
FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
pub word_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
pub exact_word_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
pub word_prefix_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
pub exact_word_prefix_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'ctx [u8]>>,
pub word_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,
pub exact_word_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,
pub word_prefix_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,
pub exact_word_prefix_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,
pub words_fst: Option<fst::Set<Cow<'ctx, [u8]>>>,
pub word_position_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
pub word_prefix_position_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
pub word_position_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
pub word_prefix_position_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
pub word_positions: FxHashMap<Interned<String>, Vec<u16>>,
pub word_prefix_positions: FxHashMap<Interned<String>, Vec<u16>>,
pub word_fid_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
pub word_prefix_fid_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
pub word_fid_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
pub word_prefix_fid_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
pub word_fids: FxHashMap<Interned<String>, Vec<u16>>,
pub word_prefix_fids: FxHashMap<Interned<String>, Vec<u16>>,
}
impl<'ctx> DatabaseCache<'ctx> {
fn get_value<'v, K1, KC, DC>(
fn get_value<'v, K1, KC>(
txn: &'ctx RoTxn,
cache_key: K1,
db_key: &'v KC::EItem,
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
cache: &mut FxHashMap<K1, Option<&'ctx [u8]>>,
db: Database<KC, ByteSlice>,
) -> Result<Option<DC::DItem>>
) -> Result<Option<&'ctx [u8]>>
where
K1: Copy + Eq + Hash,
KC: BytesEncode<'v>,
DC: BytesDecodeOwned,
{
match cache.entry(cache_key) {
Entry::Occupied(_) => {}
let bitmap_ptr = match cache.entry(cache_key) {
Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(),
Entry::Vacant(entry) => {
let bitmap_ptr = db.get(txn, db_key)?.map(Cow::Borrowed);
entry.insert(bitmap_ptr);
}
}
match cache.get(&cache_key).unwrap() {
Some(Cow::Borrowed(bytes)) => {
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
}
Some(Cow::Owned(bytes)) => {
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
}
None => Ok(None),
}
}
fn get_value_from_keys<'v, K1, KC, DC>(
txn: &'ctx RoTxn,
cache_key: K1,
db_keys: &'v [KC::EItem],
cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
db: Database<KC, ByteSlice>,
merger: MergeFn,
) -> Result<Option<DC::DItem>>
where
K1: Copy + Eq + Hash,
KC: BytesEncode<'v>,
DC: BytesDecodeOwned,
KC::EItem: Sized,
{
match cache.entry(cache_key) {
Entry::Occupied(_) => {}
Entry::Vacant(entry) => {
let bitmap_ptr: Option<Cow<'ctx, [u8]>> = match db_keys {
[] => None,
[key] => db.get(txn, key)?.map(Cow::Borrowed),
keys => {
let bitmaps = keys
.iter()
.filter_map(|key| db.get(txn, key).transpose())
.map(|v| v.map(Cow::Borrowed))
.collect::<std::result::Result<Vec<Cow<[u8]>>, _>>()?;
if bitmaps.is_empty() {
None
} else {
Some(merger(&[], &bitmaps[..])?)
}
}
};
let bitmap_ptr = db.get(txn, db_key)?;
entry.insert(bitmap_ptr);
bitmap_ptr
}
};
match cache.get(&cache_key).unwrap() {
Some(Cow::Borrowed(bytes)) => {
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
}
Some(Cow::Owned(bytes)) => {
DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
}
None => Ok(None),
}
Ok(bitmap_ptr)
}
}
impl<'ctx> SearchContext<'ctx> {
pub fn get_words_fst(&mut self) -> Result<fst::Set<Cow<'ctx, [u8]>>> {
if let Some(fst) = self.db_cache.words_fst.clone() {
@@ -160,41 +99,30 @@ impl<'ctx> SearchContext<'ctx> {
/// Retrieve or insert the given value in the `word_docids` database.
fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> {
match &self.restricted_fids {
Some(restricted_fids) => {
let interned = self.word_interner.get(word).as_str();
let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect();
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn,
word,
&keys[..],
&mut self.db_cache.word_docids,
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
merge_cbo_roaring_bitmaps,
)
}
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
self.txn,
word,
self.word_interner.get(word).as_str(),
&mut self.db_cache.word_docids,
self.index.word_docids.remap_data_type::<ByteSlice>(),
),
}
DatabaseCache::get_value(
self.txn,
word,
self.word_interner.get(word).as_str(),
&mut self.db_cache.word_docids,
self.index.word_docids.remap_data_type::<ByteSlice>(),
)?
.map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
.transpose()
}
fn get_db_exact_word_docids(
&mut self,
word: Interned<String>,
) -> Result<Option<RoaringBitmap>> {
DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
DatabaseCache::get_value(
self.txn,
word,
self.word_interner.get(word).as_str(),
&mut self.db_cache.exact_word_docids,
self.index.exact_word_docids.remap_data_type::<ByteSlice>(),
)
)?
.map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
.transpose()
}
pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> {
@@ -222,41 +150,30 @@ impl<'ctx> SearchContext<'ctx> {
&mut self,
prefix: Interned<String>,
) -> Result<Option<RoaringBitmap>> {
match &self.restricted_fids {
Some(restricted_fids) => {
let interned = self.word_interner.get(prefix).as_str();
let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect();
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn,
prefix,
&keys[..],
&mut self.db_cache.word_prefix_docids,
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
merge_cbo_roaring_bitmaps,
)
}
None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
self.txn,
prefix,
self.word_interner.get(prefix).as_str(),
&mut self.db_cache.word_prefix_docids,
self.index.word_prefix_docids.remap_data_type::<ByteSlice>(),
),
}
DatabaseCache::get_value(
self.txn,
prefix,
self.word_interner.get(prefix).as_str(),
&mut self.db_cache.word_prefix_docids,
self.index.word_prefix_docids.remap_data_type::<ByteSlice>(),
)?
.map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
.transpose()
}
fn get_db_exact_word_prefix_docids(
&mut self,
prefix: Interned<String>,
) -> Result<Option<RoaringBitmap>> {
DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
DatabaseCache::get_value(
self.txn,
prefix,
self.word_interner.get(prefix).as_str(),
&mut self.db_cache.exact_word_prefix_docids,
self.index.exact_word_prefix_docids.remap_data_type::<ByteSlice>(),
)
)?
.map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
.transpose()
}
pub fn get_db_word_pair_proximity_docids(
@@ -265,7 +182,7 @@ impl<'ctx> SearchContext<'ctx> {
word2: Interned<String>,
proximity: u8,
) -> Result<Option<RoaringBitmap>> {
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
DatabaseCache::get_value(
self.txn,
(proximity, word1, word2),
&(
@@ -275,7 +192,9 @@ impl<'ctx> SearchContext<'ctx> {
),
&mut self.db_cache.word_pair_proximity_docids,
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
)
)?
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
.transpose()
}
pub fn get_db_word_pair_proximity_docids_len(
@@ -284,7 +203,7 @@ impl<'ctx> SearchContext<'ctx> {
word2: Interned<String>,
proximity: u8,
) -> Result<Option<u64>> {
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
DatabaseCache::get_value(
self.txn,
(proximity, word1, word2),
&(
@@ -294,7 +213,11 @@ impl<'ctx> SearchContext<'ctx> {
),
&mut self.db_cache.word_pair_proximity_docids,
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
)
)?
.map(|bytes| {
CboRoaringBitmapLenCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())
})
.transpose()
}
pub fn get_db_word_prefix_pair_proximity_docids(
@@ -303,7 +226,7 @@ impl<'ctx> SearchContext<'ctx> {
prefix2: Interned<String>,
proximity: u8,
) -> Result<Option<RoaringBitmap>> {
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
DatabaseCache::get_value(
self.txn,
(proximity, word1, prefix2),
&(
@@ -313,7 +236,9 @@ impl<'ctx> SearchContext<'ctx> {
),
&mut self.db_cache.word_prefix_pair_proximity_docids,
self.index.word_prefix_pair_proximity_docids.remap_data_type::<ByteSlice>(),
)
)?
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
.transpose()
}
pub fn get_db_prefix_word_pair_proximity_docids(
&mut self,
@@ -321,7 +246,7 @@ impl<'ctx> SearchContext<'ctx> {
right: Interned<String>,
proximity: u8,
) -> Result<Option<RoaringBitmap>> {
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
DatabaseCache::get_value(
self.txn,
(proximity, left_prefix, right),
&(
@@ -331,7 +256,9 @@ impl<'ctx> SearchContext<'ctx> {
),
&mut self.db_cache.prefix_word_pair_proximity_docids,
self.index.prefix_word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
)
)?
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
.transpose()
}
pub fn get_db_word_fid_docids(
@@ -339,18 +266,15 @@ impl<'ctx> SearchContext<'ctx> {
word: Interned<String>,
fid: u16,
) -> Result<Option<RoaringBitmap>> {
// if the requested fid isn't in the restricted list, return None.
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
return Ok(None);
}
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
DatabaseCache::get_value(
self.txn,
(word, fid),
&(self.word_interner.get(word).as_str(), fid),
&mut self.db_cache.word_fid_docids,
self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
)
)?
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
.transpose()
}
pub fn get_db_word_prefix_fid_docids(
@@ -358,18 +282,15 @@ impl<'ctx> SearchContext<'ctx> {
word_prefix: Interned<String>,
fid: u16,
) -> Result<Option<RoaringBitmap>> {
// if the requested fid isn't in the restricted list, return None.
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
return Ok(None);
}
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
DatabaseCache::get_value(
self.txn,
(word_prefix, fid),
&(self.word_interner.get(word_prefix).as_str(), fid),
&mut self.db_cache.word_prefix_fid_docids,
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
)
)?
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
.transpose()
}
pub fn get_db_word_fids(&mut self, word: Interned<String>) -> Result<Vec<u16>> {
@@ -388,7 +309,7 @@ impl<'ctx> SearchContext<'ctx> {
for result in remap_key_type {
let ((_, fid), value) = result?;
// filling other caches to avoid searching for them again
self.db_cache.word_fid_docids.insert((word, fid), Some(Cow::Borrowed(value)));
self.db_cache.word_fid_docids.insert((word, fid), Some(value));
fids.push(fid);
}
entry.insert(fids.clone());
@@ -414,9 +335,7 @@ impl<'ctx> SearchContext<'ctx> {
for result in remap_key_type {
let ((_, fid), value) = result?;
// filling other caches to avoid searching for them again
self.db_cache
.word_prefix_fid_docids
.insert((word_prefix, fid), Some(Cow::Borrowed(value)));
self.db_cache.word_prefix_fid_docids.insert((word_prefix, fid), Some(value));
fids.push(fid);
}
entry.insert(fids.clone());
@@ -431,13 +350,15 @@ impl<'ctx> SearchContext<'ctx> {
word: Interned<String>,
position: u16,
) -> Result<Option<RoaringBitmap>> {
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
DatabaseCache::get_value(
self.txn,
(word, position),
&(self.word_interner.get(word).as_str(), position),
&mut self.db_cache.word_position_docids,
self.index.word_position_docids.remap_data_type::<ByteSlice>(),
)
)?
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
.transpose()
}
pub fn get_db_word_prefix_position_docids(
@@ -445,13 +366,15 @@ impl<'ctx> SearchContext<'ctx> {
word_prefix: Interned<String>,
position: u16,
) -> Result<Option<RoaringBitmap>> {
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
DatabaseCache::get_value(
self.txn,
(word_prefix, position),
&(self.word_interner.get(word_prefix).as_str(), position),
&mut self.db_cache.word_prefix_position_docids,
self.index.word_prefix_position_docids.remap_data_type::<ByteSlice>(),
)
)?
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
.transpose()
}
pub fn get_db_word_positions(&mut self, word: Interned<String>) -> Result<Vec<u16>> {
@@ -470,9 +393,7 @@ impl<'ctx> SearchContext<'ctx> {
for result in remap_key_type {
let ((_, position), value) = result?;
// filling other caches to avoid searching for them again
self.db_cache
.word_position_docids
.insert((word, position), Some(Cow::Borrowed(value)));
self.db_cache.word_position_docids.insert((word, position), Some(value));
positions.push(position);
}
entry.insert(positions.clone());
@@ -503,7 +424,7 @@ impl<'ctx> SearchContext<'ctx> {
// filling other caches to avoid searching for them again
self.db_cache
.word_prefix_position_docids
.insert((word_prefix, position), Some(Cow::Borrowed(value)));
.insert((word_prefix, position), Some(value));
positions.push(position);
}
entry.insert(positions.clone());

View File

@@ -2,6 +2,7 @@ use roaring::{MultiOps, RoaringBitmap};
use super::query_graph::QueryGraph;
use super::ranking_rules::{RankingRule, RankingRuleOutput};
use crate::score_details::{self, ScoreDetails};
use crate::search::new::query_graph::QueryNodeData;
use crate::search::new::query_term::ExactTerm;
use crate::{Result, SearchContext, SearchLogger};
@@ -244,7 +245,13 @@ impl State {
candidates &= universe;
(
State::AttributeStarts(query_graph.clone(), candidates_per_attribute),
Some(RankingRuleOutput { query: query_graph, candidates }),
Some(RankingRuleOutput {
query: query_graph,
candidates,
score: ScoreDetails::ExactAttribute(
score_details::ExactAttribute::MatchesFull,
),
}),
)
}
State::AttributeStarts(query_graph, candidates_per_attribute) => {
@@ -257,12 +264,24 @@ impl State {
candidates &= universe;
(
State::Empty(query_graph.clone()),
Some(RankingRuleOutput { query: query_graph, candidates }),
Some(RankingRuleOutput {
query: query_graph,
candidates,
score: ScoreDetails::ExactAttribute(
score_details::ExactAttribute::MatchesStart,
),
}),
)
}
State::Empty(query_graph) => (
State::Empty(query_graph.clone()),
Some(RankingRuleOutput { query: query_graph, candidates: universe.clone() }),
Some(RankingRuleOutput {
query: query_graph,
candidates: universe.clone(),
score: ScoreDetails::ExactAttribute(
score_details::ExactAttribute::NoExactMatch,
),
}),
),
};
(state, output)

View File

@@ -8,6 +8,7 @@ use rstar::RTree;
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
use crate::heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec};
use crate::score_details::{self, ScoreDetails};
use crate::{
distance_between_two_points, lat_lng_to_xyz, GeoPoint, Index, Result, SearchContext,
SearchLogger,
@@ -80,7 +81,7 @@ pub struct GeoSort<Q: RankingRuleQueryTrait> {
field_ids: Option<[u16; 2]>,
rtree: Option<RTree<GeoPoint>>,
cached_sorted_docids: VecDeque<u32>,
cached_sorted_docids: VecDeque<(u32, [f64; 2])>,
geo_candidates: RoaringBitmap,
}
@@ -130,7 +131,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
let point = lat_lng_to_xyz(&self.point);
for point in rtree.nearest_neighbor_iter(&point) {
if self.geo_candidates.contains(point.data.0) {
self.cached_sorted_docids.push_back(point.data.0);
self.cached_sorted_docids.push_back(point.data);
if self.cached_sorted_docids.len() >= cache_size {
break;
}
@@ -142,7 +143,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
let point = lat_lng_to_xyz(&opposite_of(self.point));
for point in rtree.nearest_neighbor_iter(&point) {
if self.geo_candidates.contains(point.data.0) {
self.cached_sorted_docids.push_front(point.data.0);
self.cached_sorted_docids.push_front(point.data);
if self.cached_sorted_docids.len() >= cache_size {
break;
}
@@ -177,7 +178,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
// computing the distance between two points is expensive thus we cache the result
documents
.sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize);
self.cached_sorted_docids.extend(documents.into_iter().map(|(doc_id, _)| doc_id));
self.cached_sorted_docids.extend(documents.into_iter());
};
Ok(())
@@ -220,12 +221,19 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
logger: &mut dyn SearchLogger<Q>,
universe: &RoaringBitmap,
) -> Result<Option<RankingRuleOutput<Q>>> {
assert!(universe.len() > 1);
let query = self.query.as_ref().unwrap().clone();
self.geo_candidates &= universe;
if self.geo_candidates.is_empty() {
return Ok(Some(RankingRuleOutput { query, candidates: universe.clone() }));
return Ok(Some(RankingRuleOutput {
query,
candidates: universe.clone(),
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: None,
}),
}));
}
let ascending = self.ascending;
@@ -236,11 +244,16 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
cache.pop_back()
}
};
while let Some(id) = next(&mut self.cached_sorted_docids) {
while let Some((id, point)) = next(&mut self.cached_sorted_docids) {
if self.geo_candidates.contains(id) {
return Ok(Some(RankingRuleOutput {
query,
candidates: RoaringBitmap::from_iter([id]),
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: Some(point),
}),
}));
}
}

View File

@@ -50,6 +50,7 @@ use super::ranking_rule_graph::{
};
use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
use crate::score_details::Rank;
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::ranking_rule_graph::PathVisitor;
use crate::{Result, TermsMatchingStrategy};
@@ -118,6 +119,8 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
all_costs: MappedInterner<QueryNode, Vec<u64>>,
/// An index in the first element of `all_distances`, giving the cost of the next bucket
cur_cost: u64,
/// One above the highest possible cost for this rule
next_max_cost: u64,
}
impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBasedRankingRule<G> {
@@ -139,13 +142,12 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
let mut forbidden_nodes =
SmallBitmap::for_interned_values_in(&query_graph.nodes);
let mut costs = query_graph.nodes.map(|_| None);
let mut cost = 100;
// FIXME: this works because only words uses termsmatchingstrategy at the moment.
for ns in removal_order {
for n in ns.iter() {
*costs.get_mut(n) = Some((cost, forbidden_nodes.clone()));
*costs.get_mut(n) = Some((1, forbidden_nodes.clone()));
}
forbidden_nodes.union(&ns);
cost += 100;
}
costs
}
@@ -162,12 +164,16 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
// Then pre-compute the cost of all paths from each node to the end node
let all_costs = graph.find_all_costs_to_end();
let next_max_cost =
all_costs.get(graph.query_graph.root_node).iter().copied().max().unwrap_or(0) + 1;
let state = GraphBasedRankingRuleState {
graph,
conditions_cache: condition_docids_cache,
dead_ends_cache,
all_costs,
cur_cost: 0,
next_max_cost,
};
self.state = Some(state);
@@ -181,17 +187,13 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
logger: &mut dyn SearchLogger<QueryGraph>,
universe: &RoaringBitmap,
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
// If universe.len() <= 1, the bucket sort algorithm
// should not have called this function.
assert!(universe.len() > 1);
// Will crash if `next_bucket` is called before `start_iteration` or after `end_iteration`,
// should never happen
let mut state = self.state.take().unwrap();
let all_costs = state.all_costs.get(state.graph.query_graph.root_node);
// Retrieve the cost of the paths to compute
let Some(&cost) = state
.all_costs
.get(state.graph.query_graph.root_node)
let Some(&cost) = all_costs
.iter()
.find(|c| **c >= state.cur_cost) else {
self.state = None;
@@ -207,8 +209,12 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
dead_ends_cache,
all_costs,
cur_cost: _,
next_max_cost,
} = &mut state;
let rank = *next_max_cost - cost;
let score = G::rank_to_score(Rank { rank: rank as u32, max_rank: *next_max_cost as u32 });
let mut universe = universe.clone();
let mut used_conditions = SmallBitmap::for_interned_values_in(&graph.conditions_interner);
@@ -325,7 +331,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
self.state = Some(state);
Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket }))
Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket, score }))
}
fn end_iteration(

View File

@@ -20,7 +20,7 @@ mod sort;
#[cfg(test)]
mod tests;
use std::collections::{BTreeSet, HashSet};
use std::collections::HashSet;
use bucket_sort::{bucket_sort, BucketSortOutput};
use charabia::TokenizerBuilder;
@@ -44,7 +44,7 @@ use self::geo_sort::GeoSort;
pub use self::geo_sort::Strategy as GeoSortStrategy;
use self::graph_based_ranking_rule::Words;
use self::interner::Interned;
use crate::error::FieldIdMapMissingEntry;
use crate::score_details::ScoreDetails;
use crate::search::new::distinct::apply_distinct_rule;
use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError};
@@ -57,7 +57,6 @@ pub struct SearchContext<'ctx> {
pub phrase_interner: DedupInterner<Phrase>,
pub term_interner: Interner<QueryTerm>,
pub phrase_docids: PhraseDocIdsCache,
pub restricted_fids: Option<Vec<u16>>,
}
impl<'ctx> SearchContext<'ctx> {
@@ -70,66 +69,8 @@ impl<'ctx> SearchContext<'ctx> {
phrase_interner: <_>::default(),
term_interner: <_>::default(),
phrase_docids: <_>::default(),
restricted_fids: None,
}
}
pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> {
let fids_map = self.index.fields_ids_map(self.txn)?;
let searchable_names = self.index.searchable_fields(self.txn)?;
let mut restricted_fids = Vec::new();
for field_name in searchable_attributes {
let searchable_contains_name =
searchable_names.as_ref().map(|sn| sn.iter().any(|name| name == field_name));
let fid = match (fids_map.id(field_name), searchable_contains_name) {
// The Field id exist and the field is searchable
(Some(fid), Some(true)) | (Some(fid), None) => fid,
// The field is searchable but the Field id doesn't exist => Internal Error
(None, Some(true)) => {
return Err(FieldIdMapMissingEntry::FieldName {
field_name: field_name.to_string(),
process: "search",
}
.into())
}
// The field is not searchable => User error
_otherwise => {
let mut valid_fields: BTreeSet<_> =
fids_map.names().map(String::from).collect();
// Filter by the searchable names
if let Some(sn) = searchable_names {
let searchable_names = sn.iter().map(|s| s.to_string()).collect();
valid_fields = &valid_fields & &searchable_names;
}
let searchable_count = valid_fields.len();
// Remove hidden fields
if let Some(dn) = self.index.displayed_fields(self.txn)? {
let displayable_names = dn.iter().map(|s| s.to_string()).collect();
valid_fields = &valid_fields & &displayable_names;
}
let hidden_fields = searchable_count > valid_fields.len();
let field = field_name.to_string();
return Err(UserError::InvalidSearchableAttribute {
field,
valid_fields,
hidden_fields,
}
.into());
}
};
restricted_fids.push(fid);
}
self.restricted_fids = Some(restricted_fids);
Ok(())
}
}
#[derive(Clone, Copy, PartialEq, PartialOrd, Ord, Eq)]
@@ -486,13 +427,15 @@ pub fn execute_search(
)?
};
let BucketSortOutput { docids, mut all_candidates } = bucket_sort_output;
let BucketSortOutput { docids, scores, mut all_candidates } = bucket_sort_output;
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
// The candidates is the universe unless the exhaustive number of hits
// is requested and a distinct attribute is set.
if exhaustive_number_hits {
if let Some(f) = ctx.index.distinct_field(ctx.txn)? {
if let Some(distinct_fid) = ctx.index.fields_ids_map(ctx.txn)?.id(f) {
if let Some(distinct_fid) = fields_ids_map.id(f) {
all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining;
}
}
@@ -500,6 +443,7 @@ pub fn execute_search(
Ok(PartialSearchResult {
candidates: all_candidates,
document_scores: scores,
documents_ids: docids,
located_query_terms,
})
@@ -551,4 +495,5 @@ pub struct PartialSearchResult {
pub located_query_terms: Option<Vec<LocatedQueryTerm>>,
pub candidates: RoaringBitmap,
pub documents_ids: Vec<DocumentId>,
pub document_scores: Vec<Vec<ScoreDetails>>,
}

View File

@@ -49,10 +49,15 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
if let Some((cost_of_ignoring, forbidden_nodes)) =
cost_of_ignoring_node.get(dest_idx)
{
let dest = graph_nodes.get(dest_idx);
let dest_size = match &dest.data {
QueryNodeData::Term(term) => term.term_ids.len(),
_ => panic!(),
};
let new_edge_id = edges_store.insert(Some(Edge {
source_node: source_id,
dest_node: dest_idx,
cost: *cost_of_ignoring,
cost: *cost_of_ignoring * dest_size as u32,
condition: None,
nodes_to_skip: forbidden_nodes.clone(),
}));

View File

@@ -1,6 +1,7 @@
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset};
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
@@ -84,4 +85,8 @@ impl RankingRuleGraphTrait for ExactnessGraph {
Ok(vec![(0, exact_condition), (dest_node.term_ids.len() as u32, skip_condition)])
}
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Exactness(rank)
}
}

View File

@@ -2,6 +2,7 @@ use fxhash::FxHashSet;
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
@@ -68,7 +69,7 @@ impl RankingRuleGraphTrait for FidGraph {
}
let mut edges = vec![];
for fid in all_fields {
for fid in all_fields.iter().copied() {
// TODO: We can improve performances and relevancy by storing
// the term subsets associated to each field ids fetched.
edges.push((
@@ -80,6 +81,35 @@ impl RankingRuleGraphTrait for FidGraph {
));
}
// always lookup the max_fid if we don't already and add an artificial condition for max scoring
let max_fid: Option<u16> = {
if let Some(max_fid) = ctx
.index
.searchable_fields_ids(ctx.txn)?
.map(|field_ids| field_ids.into_iter().max())
{
max_fid
} else {
ctx.index.fields_ids_map(ctx.txn)?.ids().max()
}
};
if let Some(max_fid) = max_fid {
if !all_fields.contains(&max_fid) {
edges.push((
max_fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
conditions_interner.insert(FidCondition {
term: term.clone(), // TODO remove this ugly clone
fid: max_fid,
}),
));
}
}
Ok(edges)
}
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Fid(rank)
}
}

View File

@@ -41,6 +41,7 @@ use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner
use super::query_term::LocatedQueryTermSubset;
use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, QueryNode, SearchContext};
use crate::score_details::{Rank, ScoreDetails};
use crate::Result;
pub struct ComputedCondition {
@@ -110,6 +111,9 @@ pub trait RankingRuleGraphTrait: Sized + 'static {
source_node: Option<&LocatedQueryTermSubset>,
dest_node: &LocatedQueryTermSubset,
) -> Result<Vec<(u32, Interned<Self::Condition>)>>;
/// Convert the rank of a path to its corresponding score for the ranking rule
fn rank_to_score(rank: Rank) -> ScoreDetails;
}
/// The graph used by graph-based ranking rules.

View File

@@ -2,6 +2,7 @@ use fxhash::{FxHashMap, FxHashSet};
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_position;
@@ -105,8 +106,20 @@ impl RankingRuleGraphTrait for PositionGraph {
));
}
// artificial empty condition for computing max cost
let max_cost = term.term_ids.len() as u32 * 10;
edges.push((
max_cost,
conditions_interner
.insert(PositionCondition { term: term.clone(), positions: Vec::default() }),
));
Ok(edges)
}
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Position(rank)
}
}
fn cost_from_position(sum_positions: u32) -> u32 {

View File

@@ -4,6 +4,7 @@ pub mod compute_docids;
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::SearchContext;
@@ -36,4 +37,8 @@ impl RankingRuleGraphTrait for ProximityGraph {
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
build::build_edges(ctx, conditions_interner, source_term, dest_term)
}
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Proximity(rank)
}
}

View File

@@ -1,6 +1,7 @@
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{self, Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
@@ -75,4 +76,8 @@ impl RankingRuleGraphTrait for TypoGraph {
}
Ok(edges)
}
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Typo(score_details::Typo::from_rank(rank))
}
}

View File

@@ -1,6 +1,7 @@
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{self, Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
@@ -41,9 +42,10 @@ impl RankingRuleGraphTrait for WordsGraph {
_from: Option<&LocatedQueryTermSubset>,
to_term: &LocatedQueryTermSubset,
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
Ok(vec![(
to_term.term_ids.len() as u32,
conditions_interner.insert(WordsCondition { term: to_term.clone() }),
)])
Ok(vec![(0, conditions_interner.insert(WordsCondition { term: to_term.clone() }))])
}
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Words(score_details::Words::from_rank(rank))
}
}

View File

@@ -2,6 +2,7 @@ use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::{QueryGraph, SearchContext};
use crate::score_details::ScoreDetails;
use crate::Result;
/// An internal trait implemented by only [`PlaceholderQuery`] and [`QueryGraph`]
@@ -66,4 +67,6 @@ pub struct RankingRuleOutput<Q> {
pub query: Q,
/// The allowed candidates for the child ranking rule
pub candidates: RoaringBitmap,
/// The score for the candidates of the current bucket
pub score: ScoreDetails,
}

View File

@@ -1,9 +1,11 @@
use heed::BytesDecode;
use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
use crate::heed_codec::facet::FacetGroupKeyCodec;
use crate::heed_codec::ByteSliceRefCodec;
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec};
use crate::score_details::{self, ScoreDetails};
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
use crate::{FieldId, Index, Result};
@@ -67,7 +69,7 @@ impl<'ctx, Query> Sort<'ctx, Query> {
impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, Query> {
fn id(&self) -> String {
let Self { field_name, is_ascending, .. } = self;
format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc " })
format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc" })
}
fn start_iteration(
&mut self,
@@ -118,12 +120,43 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
(itertools::Either::Right(number_iter), itertools::Either::Right(string_iter))
};
let number_iter = number_iter.map(|r| -> Result<_> {
let (docids, bytes) = r?;
Ok((
docids,
serde_json::Value::Number(
serde_json::Number::from_f64(
OrderedF64Codec::bytes_decode(bytes).expect("some number"),
)
.expect("too big float"),
),
))
});
let string_iter = string_iter.map(|r| -> Result<_> {
let (docids, bytes) = r?;
Ok((
docids,
serde_json::Value::String(
StrRefCodec::bytes_decode(bytes).expect("some string").to_owned(),
),
))
});
let query_graph = parent_query.clone();
let ascending = self.is_ascending;
let field_name = self.field_name.clone();
RankingRuleOutputIterWrapper::new(Box::new(number_iter.chain(string_iter).map(
move |r| {
let (docids, _) = r?;
Ok(RankingRuleOutput { query: query_graph.clone(), candidates: docids })
let (docids, value) = r?;
Ok(RankingRuleOutput {
query: query_graph.clone(),
candidates: docids,
score: ScoreDetails::Sort(score_details::Sort {
field_name: field_name.clone(),
ascending,
value,
}),
})
},
)))
}
@@ -150,7 +183,15 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
Ok(Some(bucket))
} else {
let query = self.original_query.as_ref().unwrap().clone();
Ok(Some(RankingRuleOutput { query, candidates: universe.clone() }))
Ok(Some(RankingRuleOutput {
query,
candidates: universe.clone(),
score: ScoreDetails::Sort(score_details::Sort {
field_name: self.field_name.clone(),
ascending: self.is_ascending,
value: serde_json::Value::Null,
}),
}))
}
}

View File

@@ -4,8 +4,7 @@ pub use self::delete_documents::{DeleteDocuments, DeletionStrategy, DocumentDele
pub use self::facet::bulk::FacetsUpdateBulk;
pub use self::facet::incremental::FacetsUpdateIncrementalInner;
pub use self::index_documents::{
merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, DocumentAdditionResult, DocumentId,
IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, MergeFn,
DocumentAdditionResult, DocumentId, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod,
};
pub use self::indexer_config::IndexerConfig;
pub use self::prefix_word_pairs::{