Compare commits

..

29 Commits

Author SHA1 Message Date
Kerollmops
54d2a7846e Return one of the original facet values when doing a facet search 2023-06-12 11:39:31 +02:00
Kerollmops
0513a0bafa Make sure the facet queries are normalized 2023-06-12 11:13:34 +02:00
Kerollmops
51891c898d Remove useless InvalidSearchFacet error 2023-06-07 17:58:05 +02:00
Kerollmops
6147688fe4 Make rustfmt happy 2023-06-07 17:02:41 +02:00
Kerollmops
681920ba3e Rename the hits and query output into facetHits and facetQuery respectively 2023-06-07 11:20:46 +02:00
Kerollmops
975a88a093 Fix the error code returned when the facetName field is missing 2023-06-07 11:14:14 +02:00
Kerollmops
ef0fe1195f Introduce a new invalid_facet_search_facet_name error code 2023-06-07 11:04:53 +02:00
Kerollmops
377ebb8a52 Use the right field id to write the string facet values FST 2023-06-07 10:52:35 +02:00
Kerollmops
3b174c6f15 Return an empty list of results if attribute is set as filterable 2023-06-07 10:33:11 +02:00
Clément Renault
25d49f5811 Use the minWordSizeForTypos index settings 2023-06-06 10:48:43 +02:00
Clément Renault
e9af506591 Format the code 2023-06-06 10:48:43 +02:00
Clément Renault
6ee4f4b544 Fix compilation issues 2023-06-06 10:48:42 +02:00
Clément Renault
e92576e0d4 Simplify the placeholder search of the facet-search route 2023-06-06 10:48:08 +02:00
Clément Renault
7e1a49e7fa Use the disableOnAttributes parameter on the facet-search route 2023-06-06 10:48:08 +02:00
Clément Renault
17e86e9c42 Use the disableOnWords parameter on the facet-search route 2023-06-06 10:48:08 +02:00
Clément Renault
f4f5ae70d6 Support the typoTolerant.enabled parameter 2023-06-06 10:48:08 +02:00
Clément Renault
edf3031dae Log an error when a facet value is missing from the database 2023-06-06 10:48:08 +02:00
Clément Renault
09d440a427 Rename the SearchForFacetValues struct 2023-06-06 10:48:08 +02:00
Clément Renault
8b66318a6b Return an internal error when a field id is missing 2023-06-06 10:48:08 +02:00
Clément Renault
196a2b3d58 Make clippy happy 2023-06-06 10:48:07 +02:00
Clément Renault
c153cbc593 Improve the returned errors from the facet-search route 2023-06-06 10:48:07 +02:00
Clément Renault
e731f1c8ba Fix the max number of facets to be returned to 100 2023-06-06 10:48:07 +02:00
Clément Renault
c39d830ff8 Return the correct response JSON object from the facet-search route 2023-06-06 10:48:07 +02:00
Clément Renault
2dca4d82d8 Send analytics about the facet-search route 2023-06-06 10:48:07 +02:00
Clément Renault
ce87ee8ea0 Make the search for facet work 2023-06-06 10:37:27 +02:00
Kerollmops
f06bb445a6 Introduce the facet search route 2023-06-06 10:37:26 +02:00
Kerollmops
81792eb5f7 Restrict the number of facet search results to 1000 2023-06-06 10:37:26 +02:00
Kerollmops
7a49bbc8df Introduce the SearchForFacetValue struct 2023-06-06 10:37:26 +02:00
Clément Renault
ca16aaaa30 Store the facet string values in multiple FSTs 2023-06-06 10:37:26 +02:00
153 changed files with 2469 additions and 12455 deletions

View File

@@ -2,3 +2,4 @@ target
Dockerfile
.dockerignore
.gitignore
**/.git

View File

@@ -35,7 +35,7 @@ jobs:
- name: Build deb package
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
- name: Upload debian pkg to release
uses: svenstaro/upload-release-action@2.6.1
uses: svenstaro/upload-release-action@2.5.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/debian/meilisearch.deb

View File

@@ -54,7 +54,7 @@ jobs:
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.6.1
uses: svenstaro/upload-release-action@2.5.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/meilisearch
@@ -87,7 +87,7 @@ jobs:
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.6.1
uses: svenstaro/upload-release-action@2.5.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/${{ matrix.artifact_name }}
@@ -121,7 +121,7 @@ jobs:
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.6.1
uses: svenstaro/upload-release-action@2.5.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch
@@ -183,7 +183,7 @@ jobs:
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.6.1
uses: svenstaro/upload-release-action@2.5.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch

View File

@@ -58,9 +58,13 @@ jobs:
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
with:
platforms: linux/amd64,linux/arm64
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
with:
platforms: linux/amd64,linux/arm64
- name: Login to Docker Hub
uses: docker/login-action@v2
@@ -88,10 +92,13 @@ jobs:
push: true
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }}
builder: ${{ steps.buildx.outputs.name }}
build-args: |
COMMIT_SHA=${{ github.sha }}
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
GIT_TAG=${{ github.ref_name }}
cache-from: type=gha
cache-to: type=gha,mode=max
# /!\ Don't touch this without checking with Cloud team
- name: Send CI information to Cloud team

View File

@@ -3,11 +3,6 @@ name: SDKs tests
on:
workflow_dispatch:
inputs:
docker_image:
description: 'The Meilisearch Docker image used'
required: false
default: nightly
schedule:
- cron: "0 6 * * MON" # Every Monday at 6:00AM
@@ -16,28 +11,13 @@ env:
MEILI_NO_ANALYTICS: 'true'
jobs:
define-docker-image:
runs-on: ubuntu-latest
outputs:
docker-image: ${{ steps.define-image.outputs.docker-image }}
steps:
- uses: actions/checkout@v3
- name: Define the Docker image we need to use
id: define-image
run: |
event=${{ github.event.action }}
echo "docker-image=nightly" >> $GITHUB_OUTPUT
if [[ $event == 'workflow_dispatch' ]]; then
echo "docker-image=${{ github.event.inputs.docker_image }}" >> $GITHUB_OUTPUT
fi
meilisearch-js-tests:
needs: define-docker-image
name: JS SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ github.event.inputs.docker_image }}
image: getmeili/meilisearch:nightly
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -67,12 +47,11 @@ jobs:
run: yarn test:env:browser
instant-meilisearch-tests:
needs: define-docker-image
name: instant-meilisearch tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ github.event.inputs.docker_image }}
image: getmeili/meilisearch:nightly
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -94,12 +73,11 @@ jobs:
run: yarn build
meilisearch-php-tests:
needs: define-docker-image
name: PHP SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ github.event.inputs.docker_image }}
image: getmeili/meilisearch:nightly
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -125,12 +103,11 @@ jobs:
composer remove --dev guzzlehttp/guzzle http-interop/http-factory-guzzle
meilisearch-python-tests:
needs: define-docker-image
name: Python SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ github.event.inputs.docker_image }}
image: getmeili/meilisearch:nightly
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -150,12 +127,11 @@ jobs:
run: pipenv run pytest
meilisearch-go-tests:
needs: define-docker-image
name: Go SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ github.event.inputs.docker_image }}
image: getmeili/meilisearch:nightly
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -163,7 +139,7 @@ jobs:
- '7700:7700'
steps:
- name: Set up Go
uses: actions/setup-go@v4
uses: actions/setup-go@v3
with:
go-version: stable
- uses: actions/checkout@v3
@@ -180,12 +156,11 @@ jobs:
run: go test -v ./...
meilisearch-ruby-tests:
needs: define-docker-image
name: Ruby SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ github.event.inputs.docker_image }}
image: getmeili/meilisearch:nightly
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -205,12 +180,11 @@ jobs:
run: bundle exec rspec
meilisearch-rust-tests:
needs: define-docker-image
name: Rust SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ github.event.inputs.docker_image }}
image: getmeili/meilisearch:nightly
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}

View File

@@ -43,7 +43,7 @@ jobs:
toolchain: nightly
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.4.0
uses: Swatinem/rust-cache@v2.2.1
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@@ -65,7 +65,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.4.0
uses: Swatinem/rust-cache@v2.2.1
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@@ -105,29 +105,6 @@ jobs:
command: test
args: --workspace --locked --release --all-features
test-disabled-tokenization:
name: Test disabled tokenization
runs-on: ubuntu-latest
container:
image: ubuntu:18.04
if: github.event_name == 'schedule'
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run cargo tree without default features and check lindera is not present
run: |
cargo tree -f '{p} {f}' -e normal --no-default-features | grep lindera -vqz
- name: Run cargo tree with default features and check lindera is pressent
run: |
cargo tree -f '{p} {f}' -e normal | grep lindera -qz
# We run tests in debug also, to make sure that the debug_assertions are hit
test-debug:
name: Run tests in debug
@@ -146,7 +123,7 @@ jobs:
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.4.0
uses: Swatinem/rust-cache@v2.2.1
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
@@ -165,7 +142,7 @@ jobs:
override: true
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.4.0
uses: Swatinem/rust-cache@v2.2.1
- name: Run cargo clippy
uses: actions-rs/cargo@v1
with:
@@ -184,7 +161,7 @@ jobs:
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.4.0
uses: Swatinem/rust-cache@v2.2.1
- name: Run cargo fmt
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate

View File

@@ -1,3 +1,4 @@
# syntax=docker/dockerfile:1.4
# Compile
FROM rust:alpine3.16 AS compiler
@@ -11,7 +12,7 @@ ARG GIT_TAG
ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG}
ENV RUSTFLAGS="-C target-feature=-crt-static"
COPY . .
COPY --link . .
RUN set -eux; \
apkArch="$(apk --print-arch)"; \
if [ "$apkArch" = "aarch64" ]; then \
@@ -30,7 +31,7 @@ RUN apk update --quiet \
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
# to find.
COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
COPY --from=compiler --link /meilisearch/target/release/meilisearch /bin/meilisearch
# To stay compatible with the older version of the container (pre v0.27.0) we're
# going to symlink the meilisearch binary in the path to `/meilisearch`
RUN ln -s /bin/meilisearch /meilisearch

File diff suppressed because it is too large Load Diff

View File

@@ -1,19 +0,0 @@
global:
scrape_interval: 15s # By default, scrape targets every 15 seconds.
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'codelab-monitor'
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'meilisearch'
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
static_configs:
- targets: ['localhost:7700']

View File

@@ -1,131 +1,131 @@
# This file shows the default configuration of Meilisearch.
# All variables are defined here: https://www.meilisearch.com/docs/learn/configuration/instance_options#environment-variables
db_path = "./data.ms"
# Designates the location where database files will be created and retrieved.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#database-path
db_path = "./data.ms"
env = "development"
# Configures the instance's environment. Value must be either `production` or `development`.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#environment
env = "development"
# The address on which the HTTP server will listen.
http_addr = "localhost:7700"
# The address on which the HTTP server will listen.
# master_key = "YOUR_MASTER_KEY_VALUE"
# Sets the instance's master key, automatically protecting all routes except GET /health.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#master-key
# master_key = "YOUR_MASTER_KEY_VALUE"
# no_analytics = true
# Deactivates Meilisearch's built-in telemetry when provided.
# Meilisearch automatically collects data from all instances that do not opt out using this flag.
# All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted at any time.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#disable-analytics
# no_analytics = true
http_payload_size_limit = "100 MB"
# Sets the maximum size of accepted payloads.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#payload-limit-size
http_payload_size_limit = "100 MB"
log_level = "INFO"
# Defines how much detail should be present in Meilisearch's logs.
# Meilisearch currently supports six log levels, listed in order of increasing verbosity: `OFF`, `ERROR`, `WARN`, `INFO`, `DEBUG`, `TRACE`
# https://www.meilisearch.com/docs/learn/configuration/instance_options#log-level
log_level = "INFO"
# max_indexing_memory = "2 GiB"
# Sets the maximum amount of RAM Meilisearch can use when indexing.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-memory
# max_indexing_memory = "2 GiB"
# max_indexing_threads = 4
# Sets the maximum number of threads Meilisearch can use during indexing.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-threads
# max_indexing_threads = 4
#############
### DUMPS ###
#############
dump_dir = "dumps/"
# Sets the directory where Meilisearch will create dump files.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#dump-directory
dump_dir = "dumps/"
# import_dump = "./path/to/my/file.dump"
# Imports the dump file located at the specified path. Path must point to a .dump file.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#import-dump
# import_dump = "./path/to/my/file.dump"
ignore_missing_dump = false
# Prevents Meilisearch from throwing an error when `import_dump` does not point to a valid dump file.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-dump
ignore_missing_dump = false
ignore_dump_if_db_exists = false
# Prevents a Meilisearch instance with an existing database from throwing an error when using `import_dump`.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-dump-if-db-exists
ignore_dump_if_db_exists = false
#################
### SNAPSHOTS ###
#################
schedule_snapshot = false
# Enables scheduled snapshots when true, disable when false (the default).
# If the value is given as an integer, then enables the scheduled snapshot with the passed value as the interval
# between each snapshot, in seconds.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#schedule-snapshot-creation
schedule_snapshot = false
snapshot_dir = "snapshots/"
# Sets the directory where Meilisearch will store snapshots.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#snapshot-destination
snapshot_dir = "snapshots/"
# import_snapshot = "./path/to/my/snapshot"
# Launches Meilisearch after importing a previously-generated snapshot at the given filepath.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#import-snapshot
# import_snapshot = "./path/to/my/snapshot"
ignore_missing_snapshot = false
# Prevents a Meilisearch instance from throwing an error when `import_snapshot` does not point to a valid snapshot file.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-snapshot
ignore_missing_snapshot = false
ignore_snapshot_if_db_exists = false
# Prevents a Meilisearch instance with an existing database from throwing an error when using `import_snapshot`.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-snapshot-if-db-exists
ignore_snapshot_if_db_exists = false
###########
### SSL ###
###########
# ssl_auth_path = "./path/to/root"
# Enables client authentication in the specified path.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-authentication-path
# ssl_auth_path = "./path/to/root"
# ssl_cert_path = "./path/to/certfile"
# Sets the server's SSL certificates.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-certificates-path
# ssl_cert_path = "./path/to/certfile"
# ssl_key_path = "./path/to/private-key"
# Sets the server's SSL key files.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-key-path
# ssl_key_path = "./path/to/private-key"
# ssl_ocsp_path = "./path/to/ocsp-file"
# Sets the server's OCSP file.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-ocsp-path
# ssl_ocsp_path = "./path/to/ocsp-file"
ssl_require_auth = false
# Makes SSL authentication mandatory.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-require-auth
ssl_require_auth = false
ssl_resumption = false
# Activates SSL session resumption.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-resumption
ssl_resumption = false
ssl_tickets = false
# Activates SSL tickets.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-tickets
ssl_tickets = false
#############################
### Experimental features ###
#############################
experimental_enable_metrics = false
# Experimental metrics feature. For more information, see: <https://github.com/meilisearch/meilisearch/discussions/3518>
# Enables the Prometheus metrics on the `GET /metrics` endpoint.
experimental_enable_metrics = false
# Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
experimental_reduce_indexing_memory_usage = false
# Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>

File diff suppressed because it is too large Load Diff

View File

@@ -90,17 +90,8 @@ pub enum IndexStatus {
pub struct IndexStats {
/// Number of documents in the index.
pub number_of_documents: u64,
/// Size taken up by the index' DB, in bytes.
///
/// This includes the size taken by both the used and free pages of the DB, and as the free pages
/// are not returned to the disk after a deletion, this number is typically larger than
/// `used_database_size` that only includes the size of the used pages.
/// Size of the index' DB, in bytes.
pub database_size: u64,
/// Size taken by the used pages of the index' DB, in bytes.
///
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
/// this value is typically smaller than `database_size`.
pub used_database_size: u64,
/// Association of every field name with the number of times it occurs in the documents.
pub field_distribution: FieldDistribution,
/// Creation date of the index.
@@ -116,10 +107,10 @@ impl IndexStats {
///
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
pub fn new(index: &Index, rtxn: &RoTxn) -> Result<Self> {
let database_size = index.on_disk_size()?;
Ok(IndexStats {
number_of_documents: index.number_of_documents(rtxn)?,
database_size: index.on_disk_size()?,
used_database_size: index.used_size()?,
database_size,
field_distribution: index.field_distribution(rtxn)?,
created_at: index.created_at(rtxn)?,
updated_at: index.updated_at(rtxn)?,

View File

@@ -31,7 +31,7 @@ mod uuid_codec;
pub type Result<T> = std::result::Result<T, Error>;
pub type TaskId = u32;
use std::collections::{BTreeMap, HashMap};
use std::collections::HashMap;
use std::ops::{Bound, RangeBounds};
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicBool;
@@ -573,16 +573,10 @@ impl IndexScheduler {
&self.index_mapper.indexer_config
}
/// Return the real database size (i.e.: The size **with** the free pages)
pub fn size(&self) -> Result<u64> {
Ok(self.env.real_disk_size()?)
}
/// Return the used database size (i.e.: The size **without** the free pages)
pub fn used_size(&self) -> Result<u64> {
Ok(self.env.non_free_pages_size()?)
}
/// Return the index corresponding to the name.
///
/// * If the index wasn't opened before, the index will be opened.
@@ -762,38 +756,6 @@ impl IndexScheduler {
Ok(tasks)
}
/// The returned structure contains:
/// 1. The name of the property being observed can be `statuses`, `types`, or `indexes`.
/// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example.
/// 3. The number of times the properties appeared.
pub fn get_stats(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
let rtxn = self.read_txn()?;
let mut res = BTreeMap::new();
res.insert(
"statuses".to_string(),
enum_iterator::all::<Status>()
.map(|s| Ok((s.to_string(), self.get_status(&rtxn, s)?.len())))
.collect::<Result<BTreeMap<String, u64>>>()?,
);
res.insert(
"types".to_string(),
enum_iterator::all::<Kind>()
.map(|s| Ok((s.to_string(), self.get_kind(&rtxn, s)?.len())))
.collect::<Result<BTreeMap<String, u64>>>()?,
);
res.insert(
"indexes".to_string(),
self.index_tasks
.iter(&rtxn)?
.map(|res| Ok(res.map(|(name, bitmap)| (name.to_string(), bitmap.len()))?))
.collect::<Result<BTreeMap<String, u64>>>()?,
);
Ok(res)
}
/// Return true iff there is at least one task associated with this index
/// that is processing.
pub fn is_index_processing(&self, index: &str) -> Result<bool> {

View File

@@ -45,11 +45,6 @@ impl AuthController {
self.store.size()
}
/// Return the used size of the `AuthController` database in bytes.
pub fn used_size(&self) -> Result<u64> {
self.store.used_size()
}
pub fn create_key(&self, create_key: CreateApiKey) -> Result<Key> {
match self.store.get_api_key(create_key.uid)? {
Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(create_key.uid.to_string())),

View File

@@ -75,11 +75,6 @@ impl HeedAuthStore {
Ok(self.env.real_disk_size()?)
}
/// Return the number of bytes actually used in the database
pub fn used_size(&self) -> Result<u64> {
Ok(self.env.non_free_pages_size()?)
}
pub fn set_drop_on_close(&mut self, v: bool) {
self.should_close_on_drop = v;
}

View File

@@ -151,6 +151,10 @@ make_missing_field_convenience_builder!(MissingApiKeyExpiresAt, missing_api_key_
make_missing_field_convenience_builder!(MissingApiKeyIndexes, missing_api_key_indexes);
make_missing_field_convenience_builder!(MissingSwapIndexes, missing_swap_indexes);
make_missing_field_convenience_builder!(MissingDocumentFilter, missing_document_filter);
make_missing_field_convenience_builder!(
MissingFacetSearchFacetName,
missing_facet_search_facet_name
);
// Integrate a sub-error into a [`DeserrError`] by taking its error message but using
// the default error code (C) from `Self`

View File

@@ -230,6 +230,7 @@ InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
InvalidSearchHighlightPostTag , InvalidRequest , BAD_REQUEST ;
InvalidSearchHighlightPreTag , InvalidRequest , BAD_REQUEST ;
@@ -239,9 +240,9 @@ InvalidSearchMatchingStrategy , InvalidRequest , BAD_REQUEST ;
InvalidSearchOffset , InvalidRequest , BAD_REQUEST ;
InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchQuery , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
@@ -279,6 +280,7 @@ MissingApiKeyIndexes , InvalidRequest , BAD_REQUEST ;
MissingAuthorizationHeader , Auth , UNAUTHORIZED ;
MissingContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
MissingDocumentId , InvalidRequest , BAD_REQUEST ;
MissingFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
MissingIndexUid , InvalidRequest , BAD_REQUEST ;
MissingMasterKey , Auth , UNAUTHORIZED ;
MissingPayload , InvalidRequest , BAD_REQUEST ;
@@ -332,6 +334,9 @@ impl ErrorCode for milli::Error {
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
UserError::InvalidFacetSearchFacetName { .. } => {
Code::InvalidFacetSearchFacetName
}
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
UserError::SortError(_) => Code::InvalidSearchSort,

View File

@@ -38,6 +38,18 @@ impl MultiSearchAggregator {
pub fn succeed(&mut self) {}
}
#[derive(Default)]
pub struct FacetSearchAggregator;
#[allow(dead_code)]
impl FacetSearchAggregator {
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
Self::default()
}
pub fn succeed(&mut self, _: &dyn Any) {}
}
impl MockAnalytics {
#[allow(clippy::new_ret_no_self)]
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
@@ -56,6 +68,7 @@ impl Analytics for MockAnalytics {
fn get_search(&self, _aggregate: super::SearchAggregator) {}
fn post_search(&self, _aggregate: super::SearchAggregator) {}
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
fn add_documents(
&self,
_documents_query: &UpdateDocumentsQuery,

View File

@@ -25,6 +25,8 @@ pub type SegmentAnalytics = mock_analytics::MockAnalytics;
pub type SearchAggregator = mock_analytics::SearchAggregator;
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;
// if we are in release mode and the feature analytics was enabled
// we use the real analytics
@@ -34,6 +36,8 @@ pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
pub type SearchAggregator = segment_analytics::SearchAggregator;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
/// The Meilisearch config dir:
/// `~/.config/Meilisearch` on *NIX or *BSD.
@@ -88,6 +92,9 @@ pub trait Analytics: Sync + Send {
/// This method should be called to aggregate a post array of searches
fn post_multi_search(&self, aggregate: MultiSearchAggregator);
/// This method should be called to aggregate post facet values searches
fn post_facet_search(&self, aggregate: FacetSearchAggregator);
// this method should be called to aggregate a add documents request
fn add_documents(
&self,

View File

@@ -1,5 +1,6 @@
use std::collections::{BinaryHeap, HashMap, HashSet};
use std::fs;
use std::mem::take;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{Duration, Instant};
@@ -29,11 +30,13 @@ use super::{
use crate::analytics::Analytics;
use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, ScheduleSnapshot};
use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::indexes::facet_search::FacetSearchQuery;
use crate::routes::tasks::TasksFilterQuery;
use crate::routes::{create_all_stats, Stats};
use crate::search::{
SearchQuery, SearchQueryWithIndex, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
};
use crate::Opt;
@@ -71,6 +74,7 @@ pub enum AnalyticsMsg {
AggregateGetSearch(SearchAggregator),
AggregatePostSearch(SearchAggregator),
AggregatePostMultiSearch(MultiSearchAggregator),
AggregatePostFacetSearch(FacetSearchAggregator),
AggregateAddDocuments(DocumentsAggregator),
AggregateDeleteDocuments(DocumentsDeletionAggregator),
AggregateUpdateDocuments(DocumentsAggregator),
@@ -139,6 +143,7 @@ impl SegmentAnalytics {
batcher,
post_search_aggregator: SearchAggregator::default(),
post_multi_search_aggregator: MultiSearchAggregator::default(),
post_facet_search_aggregator: FacetSearchAggregator::default(),
get_search_aggregator: SearchAggregator::default(),
add_documents_aggregator: DocumentsAggregator::default(),
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
@@ -182,6 +187,10 @@ impl super::Analytics for SegmentAnalytics {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
}
fn post_facet_search(&self, aggregate: FacetSearchAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate));
}
fn post_multi_search(&self, aggregate: MultiSearchAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate));
}
@@ -354,6 +363,7 @@ pub struct Segment {
get_search_aggregator: SearchAggregator,
post_search_aggregator: SearchAggregator,
post_multi_search_aggregator: MultiSearchAggregator,
post_facet_search_aggregator: FacetSearchAggregator,
add_documents_aggregator: DocumentsAggregator,
delete_documents_aggregator: DocumentsDeletionAggregator,
update_documents_aggregator: DocumentsAggregator,
@@ -418,6 +428,7 @@ impl Segment {
Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostFacetSearch(agreg)) => self.post_facet_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
@@ -461,55 +472,74 @@ impl Segment {
})
.await;
}
let get_search = std::mem::take(&mut self.get_search_aggregator)
.into_event(&self.user, "Documents Searched GET");
let post_search = std::mem::take(&mut self.post_search_aggregator)
.into_event(&self.user, "Documents Searched POST");
let post_multi_search = std::mem::take(&mut self.post_multi_search_aggregator)
.into_event(&self.user, "Documents Searched by Multi-Search POST");
let add_documents = std::mem::take(&mut self.add_documents_aggregator)
.into_event(&self.user, "Documents Added");
let delete_documents = std::mem::take(&mut self.delete_documents_aggregator)
.into_event(&self.user, "Documents Deleted");
let update_documents = std::mem::take(&mut self.update_documents_aggregator)
.into_event(&self.user, "Documents Updated");
let get_fetch_documents = std::mem::take(&mut self.get_fetch_documents_aggregator)
.into_event(&self.user, "Documents Fetched GET");
let post_fetch_documents = std::mem::take(&mut self.post_fetch_documents_aggregator)
.into_event(&self.user, "Documents Fetched POST");
let get_tasks =
std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen");
let health =
std::mem::take(&mut self.health_aggregator).into_event(&self.user, "Health Seen");
if let Some(get_search) = get_search {
let Segment {
inbox: _,
opt: _,
batcher: _,
user,
get_search_aggregator,
post_search_aggregator,
post_multi_search_aggregator,
post_facet_search_aggregator,
add_documents_aggregator,
delete_documents_aggregator,
update_documents_aggregator,
get_fetch_documents_aggregator,
post_fetch_documents_aggregator,
get_tasks_aggregator,
health_aggregator,
} = self;
if let Some(get_search) =
take(get_search_aggregator).into_event(&user, "Documents Searched GET")
{
let _ = self.batcher.push(get_search).await;
}
if let Some(post_search) = post_search {
if let Some(post_search) =
take(post_search_aggregator).into_event(&user, "Documents Searched POST")
{
let _ = self.batcher.push(post_search).await;
}
if let Some(post_multi_search) = post_multi_search {
if let Some(post_multi_search) = take(post_multi_search_aggregator)
.into_event(&user, "Documents Searched by Multi-Search POST")
{
let _ = self.batcher.push(post_multi_search).await;
}
if let Some(add_documents) = add_documents {
if let Some(post_facet_search) = take(post_facet_search_aggregator)
.into_event(&user, "Documents Searched by Facet-Search POST")
{
let _ = self.batcher.push(post_facet_search).await;
}
if let Some(add_documents) =
take(add_documents_aggregator).into_event(&user, "Documents Added")
{
let _ = self.batcher.push(add_documents).await;
}
if let Some(delete_documents) = delete_documents {
if let Some(delete_documents) =
take(delete_documents_aggregator).into_event(&user, "Documents Deleted")
{
let _ = self.batcher.push(delete_documents).await;
}
if let Some(update_documents) = update_documents {
if let Some(update_documents) =
take(update_documents_aggregator).into_event(&user, "Documents Updated")
{
let _ = self.batcher.push(update_documents).await;
}
if let Some(get_fetch_documents) = get_fetch_documents {
if let Some(get_fetch_documents) =
take(get_fetch_documents_aggregator).into_event(&user, "Documents Fetched GET")
{
let _ = self.batcher.push(get_fetch_documents).await;
}
if let Some(post_fetch_documents) = post_fetch_documents {
if let Some(post_fetch_documents) =
take(post_fetch_documents_aggregator).into_event(&user, "Documents Fetched POST")
{
let _ = self.batcher.push(post_fetch_documents).await;
}
if let Some(get_tasks) = get_tasks {
if let Some(get_tasks) = take(get_tasks_aggregator).into_event(&user, "Tasks Seen") {
let _ = self.batcher.push(get_tasks).await;
}
if let Some(health) = health {
if let Some(health) = take(health_aggregator).into_event(&user, "Health Seen") {
let _ = self.batcher.push(health).await;
}
let _ = self.batcher.flush().await;
@@ -886,6 +916,144 @@ impl MultiSearchAggregator {
}
}
#[derive(Default)]
pub struct FacetSearchAggregator {
timestamp: Option<OffsetDateTime>,
// context
user_agents: HashSet<String>,
// requests
total_received: usize,
total_succeeded: usize,
time_spent: BinaryHeap<usize>,
// The set of all facetNames that were used
facet_names: HashSet<String>,
// As there been any other parameter than the facetName or facetQuery ones?
additional_search_parameters_provided: bool,
}
impl FacetSearchAggregator {
pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self {
let FacetSearchQuery {
facet_query: _,
facet_name,
q,
offset,
limit,
page,
hits_per_page,
attributes_to_retrieve,
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_matches_position,
filter,
sort,
facets,
highlight_pre_tag,
highlight_post_tag,
crop_marker,
matching_strategy,
} = query;
let mut ret = Self::default();
ret.timestamp = Some(OffsetDateTime::now_utc());
ret.total_received = 1;
ret.user_agents = extract_user_agents(request).into_iter().collect();
ret.facet_names = Some(facet_name.clone()).into_iter().collect();
ret.additional_search_parameters_provided = q.is_some()
|| *offset != DEFAULT_SEARCH_OFFSET()
|| *limit != DEFAULT_SEARCH_LIMIT()
|| page.is_some()
|| hits_per_page.is_some()
|| attributes_to_retrieve.is_some()
|| attributes_to_crop.is_some()
|| *crop_length != DEFAULT_CROP_LENGTH()
|| attributes_to_highlight.is_some()
|| *show_matches_position
|| filter.is_some()
|| sort.is_some()
|| facets.is_some()
|| *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG()
|| *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG()
|| *crop_marker != DEFAULT_CROP_MARKER()
|| *matching_strategy != MatchingStrategy::default();
ret
}
pub fn succeed(&mut self, result: &FacetSearchResult) {
self.total_succeeded = self.total_succeeded.saturating_add(1);
self.time_spent.push(result.processing_time_ms as usize);
}
/// Aggregate one [SearchAggregator] into another.
pub fn aggregate(&mut self, mut other: Self) {
if self.timestamp.is_none() {
self.timestamp = other.timestamp;
}
// context
for user_agent in other.user_agents.into_iter() {
self.user_agents.insert(user_agent);
}
// request
self.total_received = self.total_received.saturating_add(other.total_received);
self.total_succeeded = self.total_succeeded.saturating_add(other.total_succeeded);
self.time_spent.append(&mut other.time_spent);
// facet_names
for facet_name in other.facet_names.into_iter() {
self.facet_names.insert(facet_name);
}
// additional_search_parameters_provided
self.additional_search_parameters_provided = self.additional_search_parameters_provided
| other.additional_search_parameters_provided;
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
if self.total_received == 0 {
None
} else {
// the index of the 99th percentage of value
let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.;
// we get all the values in a sorted manner
let time_spent = self.time_spent.into_sorted_vec();
// We are only interested by the slowest value of the 99th fastest results
let time_spent = time_spent.get(percentile_99th as usize);
let properties = json!({
"user-agent": self.user_agents,
"requests": {
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
"total_succeeded": self.total_succeeded,
"total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics
"total_received": self.total_received,
},
"facets": {
"total_distinct_facet_count": self.facet_names.len(),
},
"additional_search_parameters_provided": self.additional_search_parameters_provided,
});
Some(Track {
timestamp: self.timestamp,
user: user.clone(),
event: event_name.to_string(),
properties,
..Default::default()
})
}
}
}
#[derive(Default)]
pub struct DocumentsAggregator {
timestamp: Option<OffsetDateTime>,

View File

@@ -4,32 +4,20 @@ use prometheus::{
register_int_gauge_vec, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec,
};
/// Create evenly distributed buckets
fn create_buckets() -> [f64; 29] {
(0..10)
.chain((10..100).step_by(10))
.chain((100..=1000).step_by(100))
.map(|i| i as f64 / 1000.)
.collect::<Vec<_>>()
.try_into()
.unwrap()
}
const HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: &[f64; 14] = &[
0.0005, 0.0008, 0.00085, 0.0009, 0.00095, 0.001, 0.00105, 0.0011, 0.00115, 0.0012, 0.0015,
0.002, 0.003, 1.0,
];
lazy_static! {
pub static ref HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: [f64; 29] = create_buckets();
pub static ref MEILISEARCH_HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
opts!("meilisearch_http_requests_total", "Meilisearch HTTP requests total"),
pub static ref HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
opts!("http_requests_total", "HTTP requests total"),
&["method", "path"]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge =
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes"))
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch Db Size In Bytes"))
.expect("Can't create a metric");
pub static ref MEILISEARCH_USED_DB_SIZE_BYTES: IntGauge = register_int_gauge!(opts!(
"meilisearch_used_db_size_bytes",
"Meilisearch Used DB Size In Bytes"
))
.expect("Can't create a metric");
pub static ref MEILISEARCH_INDEX_COUNT: IntGauge =
register_int_gauge!(opts!("meilisearch_index_count", "Meilisearch Index Count"))
.expect("Can't create a metric");
@@ -38,16 +26,11 @@ lazy_static! {
&["index"]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS: HistogramVec = register_histogram_vec!(
pub static ref HTTP_RESPONSE_TIME_SECONDS: HistogramVec = register_histogram_vec!(
"http_response_time_seconds",
"HTTP response times",
&["method", "path"],
HTTP_RESPONSE_TIME_CUSTOM_BUCKETS.to_vec()
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_NB_TASKS: IntGaugeVec = register_int_gauge_vec!(
opts!("meilisearch_nb_tasks", "Meilisearch Number of tasks"),
&["kind", "value"]
)
.expect("Can't create a metric");
}

View File

@@ -52,11 +52,11 @@ where
if is_registered_resource {
let request_method = req.method().to_string();
histogram_timer = Some(
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
crate::metrics::HTTP_RESPONSE_TIME_SECONDS
.with_label_values(&[&request_method, request_path])
.start_timer(),
);
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
crate::metrics::HTTP_REQUESTS_TOTAL
.with_label_values(&[&request_method, request_path])
.inc();
}

View File

@@ -0,0 +1,133 @@
use std::collections::{BTreeSet, HashSet};
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use serde_json::Value;
use crate::analytics::{Analytics, FacetSearchAggregator};
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::search::{
add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(search)));
}
// TODO improve the error messages
#[derive(Debug, Clone, Default, PartialEq, Eq, deserr::Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct FacetSearchQuery {
#[deserr(default, error = DeserrJsonError<InvalidFacetSearchQuery>)]
pub facet_query: Option<String>,
#[deserr(error = DeserrJsonError<MissingFacetSearchFacetName>, missing_field_error = DeserrJsonError::missing_facet_search_facet_name)]
pub facet_name: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
pub q: Option<String>,
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: usize,
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
pub limit: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
pub page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
pub hits_per_page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
pub attributes_to_retrieve: Option<BTreeSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
pub attributes_to_crop: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
pub crop_length: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
pub attributes_to_highlight: Option<HashSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
pub show_matches_position: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub sort: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
pub facets: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
pub highlight_pre_tag: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPostTag>, default = DEFAULT_HIGHLIGHT_POST_TAG())]
pub highlight_post_tag: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropMarker>, default = DEFAULT_CROP_MARKER())]
pub crop_marker: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
pub matching_strategy: MatchingStrategy,
}
pub async fn search(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebJson<FacetSearchQuery, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.into_inner();
debug!("facet search called with params: {:?}", query);
let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
let facet_query = query.facet_query.clone();
let facet_name = query.facet_name.clone();
let mut search_query = SearchQuery::from(query);
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut search_query, search_rules);
}
let index = index_scheduler.index(&index_uid)?;
let search_result = tokio::task::spawn_blocking(move || {
perform_facet_search(&index, search_query, facet_query, facet_name)
})
.await?;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
}
analytics.post_facet_search(aggregate);
let search_result = search_result?;
debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result))
}
impl From<FacetSearchQuery> for SearchQuery {
fn from(value: FacetSearchQuery) -> Self {
SearchQuery {
q: value.q,
offset: value.offset,
limit: value.limit,
page: value.page,
hits_per_page: value.hits_per_page,
attributes_to_retrieve: value.attributes_to_retrieve,
attributes_to_crop: value.attributes_to_crop,
crop_length: value.crop_length,
attributes_to_highlight: value.attributes_to_highlight,
show_matches_position: value.show_matches_position,
filter: value.filter,
sort: value.sort,
facets: value.facets,
highlight_pre_tag: value.highlight_pre_tag,
highlight_post_tag: value.highlight_post_tag,
crop_marker: value.crop_marker,
matching_strategy: value.matching_strategy,
}
}
}

View File

@@ -24,6 +24,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
pub mod documents;
pub mod facet_search;
pub mod search;
pub mod settings;
@@ -44,6 +45,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
.service(web::scope("/documents").configure(documents::configure))
.service(web::scope("/search").configure(search::configure))
.service(web::scope("/facet-search").configure(facet_search::configure))
.service(web::scope("/settings").configure(settings::configure)),
);
}

View File

@@ -56,10 +56,6 @@ pub struct SearchQueryGet {
sort: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
show_matches_position: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)]
show_ranking_score: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScoreDetails>)]
show_ranking_score_details: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchFacets>)]
facets: Option<CS<String>>,
#[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPreTag>)]
@@ -95,8 +91,6 @@ impl From<SearchQueryGet> for SearchQuery {
filter,
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
show_matches_position: other.show_matches_position.0,
show_ranking_score: other.show_ranking_score.0,
show_ranking_score_details: other.show_ranking_score_details.0,
facets: other.facets.map(|o| o.into_iter().collect()),
highlight_pre_tag: other.highlight_pre_tag,
highlight_post_tag: other.highlight_post_tag,

View File

@@ -17,7 +17,7 @@ pub fn configure(config: &mut web::ServiceConfig) {
pub async fn get_metrics(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
auth_controller: Data<AuthController>,
auth_controller: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<AuthController>>,
) -> Result<HttpResponse, ResponseError> {
let auth_filters = index_scheduler.filters();
if !auth_filters.all_indexes_authorized() {
@@ -28,10 +28,10 @@ pub async fn get_metrics(
return Err(error);
}
let response = create_all_stats((*index_scheduler).clone(), auth_controller, auth_filters)?;
let response =
create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), auth_filters)?;
crate::metrics::MEILISEARCH_DB_SIZE_BYTES.set(response.database_size as i64);
crate::metrics::MEILISEARCH_USED_DB_SIZE_BYTES.set(response.used_database_size as i64);
crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64);
for (index, value) in response.indexes.iter() {
@@ -40,14 +40,6 @@ pub async fn get_metrics(
.set(value.number_of_documents as i64);
}
for (kind, value) in index_scheduler.get_stats()? {
for (value, count) in value {
crate::metrics::MEILISEARCH_NB_TASKS
.with_label_values(&[&kind, &value])
.set(count as i64);
}
}
let encoder = TextEncoder::new();
let mut buffer = vec![];
encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics");

View File

@@ -231,8 +231,6 @@ pub async fn running() -> HttpResponse {
#[serde(rename_all = "camelCase")]
pub struct Stats {
pub database_size: u64,
#[serde(skip)]
pub used_database_size: u64,
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
pub last_update: Option<OffsetDateTime>,
pub indexes: BTreeMap<String, indexes::IndexStats>,
@@ -261,7 +259,6 @@ pub fn create_all_stats(
let mut last_task: Option<OffsetDateTime> = None;
let mut indexes = BTreeMap::new();
let mut database_size = 0;
let mut used_database_size = 0;
for index_uid in index_scheduler.index_names()? {
// Accumulate the size of all indexes, even unauthorized ones, so
@@ -269,7 +266,6 @@ pub fn create_all_stats(
// See <https://github.com/meilisearch/meilisearch/pull/3541#discussion_r1126747643> for context.
let stats = index_scheduler.index_stats(&index_uid)?;
database_size += stats.inner_stats.database_size;
used_database_size += stats.inner_stats.used_database_size;
if !filters.is_index_authorized(&index_uid) {
continue;
@@ -282,14 +278,10 @@ pub fn create_all_stats(
}
database_size += index_scheduler.size()?;
used_database_size += index_scheduler.used_size()?;
database_size += auth_controller.size()?;
used_database_size += auth_controller.used_size()?;
let update_file_size = index_scheduler.compute_update_file_size()?;
database_size += update_file_size;
used_database_size += update_file_size;
database_size += index_scheduler.compute_update_file_size()?;
let stats = Stats { database_size, used_database_size, last_update: last_task, indexes };
let stats = Stats { database_size, last_update: last_task, indexes };
Ok(stats)
}

View File

@@ -8,8 +8,9 @@ use either::Either;
use meilisearch_auth::IndexSearchRules;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
use meilisearch_types::milli::{FacetValueHit, SearchForFacetValues};
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document};
use milli::tokenizer::TokenizerBuilder;
@@ -55,10 +56,6 @@ pub struct SearchQuery {
pub attributes_to_highlight: Option<HashSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
pub show_matches_position: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScore>, default)]
pub show_ranking_score: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScoreDetails>, default)]
pub show_ranking_score_details: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
@@ -108,10 +105,6 @@ pub struct SearchQueryWithIndex {
pub crop_length: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
pub attributes_to_highlight: Option<HashSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScore>, default)]
pub show_ranking_score: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScoreDetails>, default)]
pub show_ranking_score_details: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
pub show_matches_position: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
@@ -143,8 +136,6 @@ impl SearchQueryWithIndex {
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_ranking_score,
show_ranking_score_details,
show_matches_position,
filter,
sort,
@@ -166,8 +157,6 @@ impl SearchQueryWithIndex {
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_ranking_score,
show_ranking_score_details,
show_matches_position,
filter,
sort,
@@ -183,7 +172,7 @@ impl SearchQueryWithIndex {
}
}
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)]
#[deserr(rename_all = camelCase)]
pub enum MatchingStrategy {
/// Remove query words from last to first
@@ -207,7 +196,7 @@ impl From<MatchingStrategy> for TermsMatchingStrategy {
}
}
#[derive(Debug, Clone, Serialize, PartialEq)]
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct SearchHit {
#[serde(flatten)]
pub document: Document,
@@ -215,10 +204,6 @@ pub struct SearchHit {
pub formatted: Document,
#[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")]
pub matches_position: Option<MatchesPosition>,
#[serde(rename = "_rankingScore", skip_serializing_if = "Option::is_none")]
pub ranking_score: Option<f64>,
#[serde(rename = "_rankingScoreDetails", skip_serializing_if = "Option::is_none")]
pub ranking_score_details: Option<serde_json::Map<String, serde_json::Value>>,
}
#[derive(Serialize, Debug, Clone, PartialEq)]
@@ -258,6 +243,14 @@ pub struct FacetStats {
pub max: f64,
}
#[derive(Serialize, Debug, Clone, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct FacetSearchResult {
pub facet_hits: Vec<FacetValueHit>,
pub facet_query: Option<String>,
pub processing_time_ms: u128,
}
/// Incorporate search rules in search query
pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
query.filter = match (query.filter.take(), rules.filter) {
@@ -278,14 +271,12 @@ pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
}
}
pub fn perform_search(
index: &Index,
query: SearchQuery,
) -> Result<SearchResult, MeilisearchHttpError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let mut search = index.search(&rtxn);
fn prepare_search<'t>(
index: &'t Index,
rtxn: &'t RoTxn,
query: &'t SearchQuery,
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
let mut search = index.search(rtxn);
if let Some(ref query) = query.q {
search.query(query);
@@ -295,16 +286,11 @@ pub fn perform_search(
search.terms_matching_strategy(query.matching_strategy.into());
let max_total_hits = index
.pagination_max_total_hits(&rtxn)
.pagination_max_total_hits(rtxn)
.map_err(milli::Error::from)?
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
search.exhaustive_number_hits(is_finite_pagination);
search.scoring_strategy(if query.show_ranking_score || query.show_ranking_score_details {
ScoringStrategy::Detailed
} else {
ScoringStrategy::Skip
});
// compute the offset on the limit depending on the pagination mode.
let (offset, limit) = if is_finite_pagination {
@@ -342,8 +328,20 @@ pub fn perform_search(
search.sort_criteria(sort);
}
let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } =
search.execute()?;
Ok((search, is_finite_pagination, max_total_hits, offset))
}
pub fn perform_search(
index: &Index,
query: SearchQuery,
) -> Result<SearchResult, MeilisearchHttpError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let (search, is_finite_pagination, max_total_hits, offset) =
prepare_search(index, &rtxn, &query)?;
let milli::SearchResult { documents_ids, matching_words, candidates, .. } = search.execute()?;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@@ -415,7 +413,7 @@ pub fn perform_search(
let documents_iter = index.documents(&rtxn, documents_ids)?;
for ((_id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
for (_id, obkv) in documents_iter {
// First generate a document with all the displayed fields
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
@@ -439,18 +437,7 @@ pub fn perform_search(
insert_geo_distance(sort, &mut document);
}
let ranking_score =
query.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
let ranking_score_details =
query.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
let hit = SearchHit {
document,
formatted,
matches_position,
ranking_score_details,
ranking_score,
};
let hit = SearchHit { document, formatted, matches_position };
documents.push(hit);
}
@@ -507,6 +494,28 @@ pub fn perform_search(
Ok(result)
}
pub fn perform_facet_search(
index: &Index,
search_query: SearchQuery,
facet_query: Option<String>,
facet_name: String,
) -> Result<FacetSearchResult, MeilisearchHttpError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query)?;
let mut facet_search = SearchForFacetValues::new(facet_name, search);
if let Some(facet_query) = &facet_query {
facet_search.query(facet_query);
}
Ok(FacetSearchResult {
facet_hits: facet_search.execute()?,
facet_query,
processing_time_ms: before_search.elapsed().as_millis(),
})
}
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
lazy_static::lazy_static! {
static ref GEO_REGEX: Regex =

View File

@@ -1,4 +1,3 @@
use insta::{allow_duplicates, assert_json_snapshot};
use serde_json::json;
use super::*;
@@ -19,43 +18,30 @@ async fn formatted_contain_wildcard() {
|response, code|
{
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"_formatted": {
"id": "852",
"cattos": "<em>pésti</em>"
},
"_matchesPosition": {
"cattos": [
{
"start": 0,
"length": 5
}
]
}
}
"###);
}
}
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"id": "852",
"cattos": "<em>pésti</em>",
},
"_matchesPosition": {"cattos": [{"start": 0, "length": 5}]},
})
);
}
)
.await;
index
.search(json!({ "q": "pésti", "attributesToRetrieve": ["*"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"cattos": "pésti"
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pésti",
})
);
})
.await;
@@ -64,29 +50,20 @@ async fn formatted_contain_wildcard() {
json!({ "q": "pésti", "attributesToRetrieve": ["*"], "attributesToHighlight": ["id"], "showMatchesPosition": true }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"cattos": "pésti",
"_formatted": {
"id": "852",
"cattos": "pésti"
},
"_matchesPosition": {
"cattos": [
{
"start": 0,
"length": 5
}
]
}
}
"###)
}
})
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pésti",
"_formatted": {
"id": "852",
"cattos": "pésti",
},
"_matchesPosition": {"cattos": [{"start": 0, "length": 5}]},
})
);
}
)
.await;
index
@@ -94,20 +71,17 @@ async fn formatted_contain_wildcard() {
json!({ "q": "pésti", "attributesToRetrieve": ["*"], "attributesToCrop": ["*"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"cattos": "pésti",
"_formatted": {
"id": "852",
"cattos": "pésti"
}
}
"###);
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pésti",
"_formatted": {
"id": "852",
"cattos": "pésti",
}
})
);
},
)
.await;
@@ -115,20 +89,17 @@ async fn formatted_contain_wildcard() {
index
.search(json!({ "q": "pésti", "attributesToCrop": ["*"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"cattos": "pésti",
"_formatted": {
"id": "852",
"cattos": "pésti"
}
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pésti",
"_formatted": {
"id": "852",
"cattos": "pésti",
}
})
);
})
.await;
}
@@ -145,24 +116,21 @@ async fn format_nested() {
index
.search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"doggos": [
{
"name": "bobby",
"age": 2
},
{
"name": "buddy",
"age": 4
}
]
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"doggos": [
{
"name": "bobby",
"age": 2,
},
{
"name": "buddy",
"age": 4,
},
],
})
);
})
.await;
@@ -171,22 +139,19 @@ async fn format_nested() {
json!({ "q": "pésti", "attributesToRetrieve": ["doggos.name"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"doggos": [
{
"name": "bobby"
},
{
"name": "buddy"
}
]
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
})
);
},
)
.await;
@@ -196,30 +161,20 @@ async fn format_nested() {
json!({ "q": "bobby", "attributesToRetrieve": ["doggos.name"], "showMatchesPosition": true }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"doggos": [
{
"name": "bobby"
},
{
"name": "buddy"
}
],
"_matchesPosition": {
"doggos.name": [
{
"start": 0,
"length": 5
}
]
}
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
"_matchesPosition": {"doggos.name": [{"start": 0, "length": 5}]},
})
);
}
)
.await;
@@ -228,24 +183,21 @@ async fn format_nested() {
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.name"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"_formatted": {
"doggos": [
{
"name": "bobby"
},
{
"name": "buddy"
}
]
}
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
},
})
);
})
.await;
@@ -253,24 +205,21 @@ async fn format_nested() {
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToCrop": ["doggos.name"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"_formatted": {
"doggos": [
{
"name": "bobby"
},
{
"name": "buddy"
}
]
}
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
},
})
);
})
.await;
@@ -278,61 +227,55 @@ async fn format_nested() {
.search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos.name"], "attributesToHighlight": ["doggos.age"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"doggos": [
{
"name": "bobby"
},
{
"name": "buddy"
}
],
"_formatted": {
assert_eq!(
response["hits"][0],
json!({
"doggos": [
{
"name": "bobby",
"age": "2"
},
{
"name": "buddy",
"age": "4"
}
]
}
}
"###)
}
})
{
"name": "bobby",
},
{
"name": "buddy",
},
],
"_formatted": {
"doggos": [
{
"name": "bobby",
"age": "2",
},
{
"name": "buddy",
"age": "4",
},
],
},
})
);
})
.await;
index
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.age"], "attributesToCrop": ["doggos.name"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"doggos": [
{
"_formatted": {
"doggos": [
{
"name": "bobby",
"age": "2"
},
{
"name": "buddy",
"age": "4"
}
]
}
}
"###)
}
"name": "bobby",
"age": "2",
},
{
"name": "buddy",
"age": "4",
},
],
},
})
);
}
)
.await;
@@ -354,66 +297,54 @@ async fn displayedattr_2_smol() {
.search(json!({ "attributesToRetrieve": ["father", "id"], "attributesToHighlight": ["mother"], "attributesToCrop": ["cattos"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
})
.await;
index
.search(json!({ "attributesToRetrieve": ["id"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
})
.await;
index
.search(json!({ "attributesToHighlight": ["id"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"_formatted": {
"id": "852"
}
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"_formatted": {
"id": "852",
}
})
);
})
.await;
index
.search(json!({ "attributesToCrop": ["id"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"_formatted": {
"id": "852"
}
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"_formatted": {
"id": "852",
}
})
);
})
.await;
@@ -422,18 +353,15 @@ async fn displayedattr_2_smol() {
json!({ "attributesToHighlight": ["id"], "attributesToCrop": ["id"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"_formatted": {
"id": "852"
}
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"_formatted": {
"id": "852",
}
})
);
},
)
.await;
@@ -441,41 +369,31 @@ async fn displayedattr_2_smol() {
index
.search(json!({ "attributesToHighlight": ["cattos"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
})
.await;
index
.search(json!({ "attributesToCrop": ["cattos"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
})
.await;
index
.search(json!({ "attributesToRetrieve": ["cattos"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@"{}")
}
assert_eq!(response["hits"][0], json!({}));
})
.await;
@@ -484,11 +402,7 @@ async fn displayedattr_2_smol() {
json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["cattos"], "attributesToCrop": ["cattos"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@"{}")
}
assert_eq!(response["hits"][0], json!({}));
}
)
@@ -499,17 +413,14 @@ async fn displayedattr_2_smol() {
json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["id"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"_formatted": {
"id": "852"
}
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"id": "852",
}
})
);
},
)
.await;
@@ -519,17 +430,14 @@ async fn displayedattr_2_smol() {
json!({ "attributesToRetrieve": ["cattos"], "attributesToCrop": ["id"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"_formatted": {
"id": "852"
}
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"id": "852",
}
})
);
},
)
.await;

View File

@@ -65,7 +65,7 @@ async fn simple_search_single_index() {
]}))
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###"
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###"
[
{
"indexUid": "test",
@@ -170,7 +170,7 @@ async fn simple_search_two_indexes() {
]}))
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###"
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###"
[
{
"indexUid": "test",

View File

@@ -53,7 +53,6 @@ fn main() -> Result<(), Box<dyn Error>> {
&mut ctx,
&(!query.trim().is_empty()).then(|| query.trim().to_owned()),
TermsMatchingStrategy::Last,
milli::score_details::ScoringStrategy::Skip,
false,
&None,
&None,

View File

@@ -124,6 +124,16 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
}
)]
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
#[error("Attribute `{}` is not filterable. {}",
.field,
match .valid_fields.is_empty() {
true => "This index does not have configured filterable attributes.".to_string(),
false => format!("Available filterable attributes are: `{}`.",
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
),
}
)]
InvalidFacetSearchFacetName { field: String, valid_fields: BTreeSet<String> },
#[error("{}", HeedError::BadOpenOptions)]
InvalidLmdbOpenOptions,
#[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]

View File

@@ -0,0 +1,23 @@
use std::borrow::Cow;
use fst::Set;
use heed::{BytesDecode, BytesEncode};
/// A codec for values of type `Set<&[u8]>`.
pub struct FstSetCodec;
impl<'a> BytesEncode<'a> for FstSetCodec {
type EItem = Set<Vec<u8>>;
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
Some(Cow::Borrowed(item.as_fst().as_bytes()))
}
}
impl<'a> BytesDecode<'a> for FstSetCodec {
type DItem = Set<&'a [u8]>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
Set::new(bytes).ok()
}
}

View File

@@ -2,6 +2,7 @@ mod beu32_str_codec;
mod byte_slice_ref;
pub mod facet;
mod field_id_word_count_codec;
mod fst_set_codec;
mod obkv_codec;
mod roaring_bitmap;
mod roaring_bitmap_length;
@@ -15,6 +16,7 @@ pub use str_ref::StrRefCodec;
pub use self::beu32_str_codec::BEU32StrCodec;
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
pub use self::fst_set_codec::FstSetCodec;
pub use self::obkv_codec::ObkvCodec;
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
pub use self::roaring_bitmap_length::{

View File

@@ -49,7 +49,7 @@ impl CboRoaringBitmapCodec {
} else {
// Otherwise, it means we used the classic RoaringBitmapCodec and
// that the header takes threshold integers.
RoaringBitmap::deserialize_unchecked_from(bytes)
RoaringBitmap::deserialize_from(bytes)
}
}
@@ -69,7 +69,7 @@ impl CboRoaringBitmapCodec {
vec.push(integer);
}
} else {
roaring |= RoaringBitmap::deserialize_unchecked_from(bytes.as_ref())?;
roaring |= RoaringBitmap::deserialize_from(bytes.as_ref())?;
}
}

View File

@@ -8,7 +8,7 @@ impl heed::BytesDecode<'_> for RoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
RoaringBitmap::deserialize_unchecked_from(bytes).ok()
RoaringBitmap::deserialize_from(bytes).ok()
}
}

View File

@@ -19,11 +19,12 @@ use crate::heed_codec::facet::{
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
FieldIdCodec, OrderedF64Codec,
};
use crate::heed_codec::{ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
use crate::heed_codec::{FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32,
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
FieldIdWordCountCodec, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
Search, U8StrStrCodec, BEU16, BEU32,
};
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@@ -84,6 +85,7 @@ pub mod db_name {
pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids";
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst";
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
pub const DOCUMENTS: &str = "documents";
@@ -110,6 +112,9 @@ pub struct Index {
/// A prefix of word and all the documents ids containing this prefix, from attributes for which typos are not allowed.
pub exact_word_prefix_docids: Database<Str, RoaringBitmapCodec>,
/// Maps a word and a document id (u32) to all the positions where the given word appears.
pub docid_word_positions: Database<BEU32StrCodec, BoRoaringBitmapCodec>,
/// Maps the proximity between a pair of words with all the docids where this relation appears.
pub word_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
/// Maps the proximity between a pair of word and prefix with all the docids where this relation appears.
@@ -143,6 +148,8 @@ pub struct Index {
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
/// Maps the facet field id and ranges of strings with the docids that corresponds to them.
pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
/// Maps the facet field id of the string facets with an FST containing all the facets values.
pub facet_id_string_fst: Database<OwnedType<BEU16>, FstSetCodec>,
/// Maps the document id, the facet field id and the numbers.
pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
@@ -162,7 +169,7 @@ impl Index {
) -> Result<Index> {
use db_name::*;
options.max_dbs(23);
options.max_dbs(24);
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
let env = options.open(path)?;
@@ -173,6 +180,7 @@ impl Index {
let word_prefix_docids = env.create_database(&mut wtxn, Some(WORD_PREFIX_DOCIDS))?;
let exact_word_prefix_docids =
env.create_database(&mut wtxn, Some(EXACT_WORD_PREFIX_DOCIDS))?;
let docid_word_positions = env.create_database(&mut wtxn, Some(DOCID_WORD_POSITIONS))?;
let word_pair_proximity_docids =
env.create_database(&mut wtxn, Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
let script_language_docids =
@@ -192,13 +200,13 @@ impl Index {
let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?;
let facet_id_string_docids =
env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?;
let facet_id_string_fst = env.create_database(&mut wtxn, Some(FACET_ID_STRING_FST))?;
let facet_id_exists_docids =
env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?;
let facet_id_is_null_docids =
env.create_database(&mut wtxn, Some(FACET_ID_IS_NULL_DOCIDS))?;
let facet_id_is_empty_docids =
env.create_database(&mut wtxn, Some(FACET_ID_IS_EMPTY_DOCIDS))?;
let field_id_docid_facet_f64s =
env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?;
let field_id_docid_facet_strings =
@@ -215,6 +223,7 @@ impl Index {
exact_word_docids,
word_prefix_docids,
exact_word_prefix_docids,
docid_word_positions,
word_pair_proximity_docids,
script_language_docids,
word_prefix_pair_proximity_docids,
@@ -226,6 +235,7 @@ impl Index {
field_id_word_count_docids,
facet_id_f64_docids,
facet_id_string_docids,
facet_id_string_fst,
facet_id_exists_docids,
facet_id_is_null_docids,
facet_id_is_empty_docids,
@@ -1466,9 +1476,9 @@ pub(crate) mod tests {
db_snap!(index, field_distribution,
@r###"
age 1 |
id 2 |
name 2 |
age 1
id 2
name 2
"###
);
@@ -1486,9 +1496,9 @@ pub(crate) mod tests {
db_snap!(index, field_distribution,
@r###"
age 1 |
id 2 |
name 2 |
age 1
id 2
name 2
"###
);
@@ -1502,9 +1512,9 @@ pub(crate) mod tests {
db_snap!(index, field_distribution,
@r###"
has_dog 1 |
id 2 |
name 2 |
has_dog 1
id 2
name 2
"###
);
}
@@ -2488,12 +2498,8 @@ pub(crate) mod tests {
let rtxn = index.read_txn().unwrap();
let search = Search::new(&rtxn, &index);
let SearchResult {
matching_words: _,
candidates: _,
document_scores: _,
mut documents_ids,
} = search.execute().unwrap();
let SearchResult { matching_words: _, candidates: _, mut documents_ids } =
search.execute().unwrap();
let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap();
documents_ids.sort_unstable();
let docs = index.documents(&rtxn, documents_ids).unwrap();

View File

@@ -5,6 +5,52 @@
#[global_allocator]
pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
// #[cfg(test)]
// pub mod allocator {
// use std::alloc::{GlobalAlloc, System};
// use std::sync::atomic::{self, AtomicI64};
// #[global_allocator]
// pub static ALLOC: CountingAlloc = CountingAlloc {
// max_resident: AtomicI64::new(0),
// resident: AtomicI64::new(0),
// allocated: AtomicI64::new(0),
// };
// pub struct CountingAlloc {
// pub max_resident: AtomicI64,
// pub resident: AtomicI64,
// pub allocated: AtomicI64,
// }
// unsafe impl GlobalAlloc for CountingAlloc {
// unsafe fn alloc(&self, layout: std::alloc::Layout) -> *mut u8 {
// self.allocated.fetch_add(layout.size() as i64, atomic::Ordering::SeqCst);
// let old_resident =
// self.resident.fetch_add(layout.size() as i64, atomic::Ordering::SeqCst);
// let resident = old_resident + layout.size() as i64;
// self.max_resident.fetch_max(resident, atomic::Ordering::SeqCst);
// // if layout.size() > 1_000_000 {
// // eprintln!(
// // "allocating {} with new resident size: {resident}",
// // layout.size() / 1_000_000
// // );
// // // let trace = std::backtrace::Backtrace::capture();
// // // let t = trace.to_string();
// // // eprintln!("{t}");
// // }
// System.alloc(layout)
// }
// unsafe fn dealloc(&self, ptr: *mut u8, layout: std::alloc::Layout) {
// self.resident.fetch_sub(layout.size() as i64, atomic::Ordering::Relaxed);
// System.dealloc(ptr, layout)
// }
// }
// }
#[macro_use]
pub mod documents;
@@ -17,7 +63,6 @@ mod fields_ids_map;
pub mod heed_codec;
pub mod index;
pub mod proximity;
pub mod score_details;
mod search;
pub mod update;
@@ -54,8 +99,9 @@ pub use self::heed_codec::{
};
pub use self::index::Index;
pub use self::search::{
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, Search,
SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder,
MatchingWords, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy,
DEFAULT_VALUES_PER_FACET,
};
pub type Result<T> = std::result::Result<T, error::Error>;

View File

@@ -1,316 +0,0 @@
use serde::Serialize;
use crate::distance_between_two_points;
#[derive(Debug, Clone, PartialEq)]
pub enum ScoreDetails {
Words(Words),
Typo(Typo),
Proximity(Rank),
Fid(Rank),
Position(Rank),
ExactAttribute(ExactAttribute),
Exactness(Rank),
Sort(Sort),
GeoSort(GeoSort),
}
impl ScoreDetails {
pub fn local_score(&self) -> Option<f64> {
self.rank().map(Rank::local_score)
}
pub fn rank(&self) -> Option<Rank> {
match self {
ScoreDetails::Words(details) => Some(details.rank()),
ScoreDetails::Typo(details) => Some(details.rank()),
ScoreDetails::Proximity(details) => Some(*details),
ScoreDetails::Fid(details) => Some(*details),
ScoreDetails::Position(details) => Some(*details),
ScoreDetails::ExactAttribute(details) => Some(details.rank()),
ScoreDetails::Exactness(details) => Some(*details),
ScoreDetails::Sort(_) => None,
ScoreDetails::GeoSort(_) => None,
}
}
pub fn global_score<'a>(details: impl Iterator<Item = &'a Self>) -> f64 {
Rank::global_score(details.filter_map(Self::rank))
}
/// Panics
///
/// - If Position is not preceded by Fid
/// - If Exactness is not preceded by ExactAttribute
pub fn to_json_map<'a>(
details: impl Iterator<Item = &'a Self>,
) -> serde_json::Map<String, serde_json::Value> {
let mut order = 0;
let mut fid_details = None;
let mut details_map = serde_json::Map::default();
for details in details {
match details {
ScoreDetails::Words(words) => {
let words_details = serde_json::json!({
"order": order,
"matchingWords": words.matching_words,
"maxMatchingWords": words.max_matching_words,
"score": words.rank().local_score(),
});
details_map.insert("words".into(), words_details);
order += 1;
}
ScoreDetails::Typo(typo) => {
let typo_details = serde_json::json!({
"order": order,
"typoCount": typo.typo_count,
"maxTypoCount": typo.max_typo_count,
"score": typo.rank().local_score(),
});
details_map.insert("typo".into(), typo_details);
order += 1;
}
ScoreDetails::Proximity(proximity) => {
let proximity_details = serde_json::json!({
"order": order,
"score": proximity.local_score(),
});
details_map.insert("proximity".into(), proximity_details);
order += 1;
}
ScoreDetails::Fid(fid) => {
// copy the rank for future use in Position.
fid_details = Some(*fid);
// For now, fid is a virtual rule always followed by the "position" rule
let fid_details = serde_json::json!({
"order": order,
"attributes_ranking_order": fid.local_score(),
});
details_map.insert("attribute".into(), fid_details);
order += 1;
}
ScoreDetails::Position(position) => {
// For now, position is a virtual rule always preceded by the "fid" rule
let attribute_details = details_map
.get_mut("attribute")
.expect("position not preceded by attribute");
let attribute_details = attribute_details
.as_object_mut()
.expect("attribute details was not an object");
let Some(fid_details) = fid_details
else {
panic!("position not preceded by attribute");
};
attribute_details.insert(
"attributes_query_word_order".into(),
position.local_score().into(),
);
let score = Rank::global_score([fid_details, *position].iter().copied());
attribute_details.insert("score".into(), score.into());
// do not update the order since this was already done by fid
}
ScoreDetails::ExactAttribute(exact_attribute) => {
let exactness_details = serde_json::json!({
"order": order,
"matchType": exact_attribute,
"score": exact_attribute.rank().local_score(),
});
details_map.insert("exactness".into(), exactness_details);
order += 1;
}
ScoreDetails::Exactness(details) => {
// For now, exactness is a virtual rule always preceded by the "ExactAttribute" rule
let exactness_details = details_map
.get_mut("exactness")
.expect("Exactness not preceded by exactAttribute");
let exactness_details = exactness_details
.as_object_mut()
.expect("exactness details was not an object");
if exactness_details.get("matchType").expect("missing 'matchType'")
== &serde_json::json!(ExactAttribute::NoExactMatch)
{
let score = Rank::global_score(
[ExactAttribute::NoExactMatch.rank(), *details].iter().copied(),
);
*exactness_details.get_mut("score").expect("missing score") = score.into();
}
// do not update the order since this was already done by exactAttribute
}
ScoreDetails::Sort(details) => {
let sort = if details.redacted {
format!("<hidden-rule-{order}>")
} else {
format!(
"{}:{}",
details.field_name,
if details.ascending { "asc" } else { "desc" }
)
};
let value =
if details.redacted { "<hidden>".into() } else { details.value.clone() };
let sort_details = serde_json::json!({
"order": order,
"value": value,
});
details_map.insert(sort, sort_details);
order += 1;
}
ScoreDetails::GeoSort(details) => {
let sort = format!(
"_geoPoint({}, {}):{}",
details.target_point[0],
details.target_point[1],
if details.ascending { "asc" } else { "desc" }
);
let point = if let Some(value) = details.value {
serde_json::json!({ "lat": value[0], "lng": value[1]})
} else {
serde_json::Value::Null
};
let sort_details = serde_json::json!({
"order": order,
"value": point,
"distance": details.distance(),
});
details_map.insert(sort, sort_details);
order += 1;
}
}
}
details_map
}
}
/// The strategy to compute scores.
///
/// It makes sense to pass down this strategy to the internals of the search, because
/// some optimizations (today, mainly skipping ranking rules for universes of a single document)
/// are not correct to do when computing the scores.
///
/// This strategy could feasibly be extended to differentiate between the normalized score and the
/// detailed scores, but it is not useful today as the normalized score is *derived from* the
/// detailed scores.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum ScoringStrategy {
/// Don't compute scores
#[default]
Skip,
/// Compute detailed scores
Detailed,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Words {
pub matching_words: u32,
pub max_matching_words: u32,
}
impl Words {
pub fn rank(&self) -> Rank {
Rank { rank: self.matching_words, max_rank: self.max_matching_words }
}
pub(crate) fn from_rank(rank: Rank) -> Words {
Words { matching_words: rank.rank, max_matching_words: rank.max_rank }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Typo {
pub typo_count: u32,
pub max_typo_count: u32,
}
impl Typo {
pub fn rank(&self) -> Rank {
Rank {
rank: self.max_typo_count - self.typo_count + 1,
max_rank: (self.max_typo_count + 1),
}
}
// max_rank = max_typo + 1
// max_typo = max_rank - 1
//
// rank = max_typo - typo + 1
// rank = max_rank - 1 - typo + 1
// rank + typo = max_rank
// typo = max_rank - rank
pub fn from_rank(rank: Rank) -> Typo {
Typo { typo_count: rank.max_rank - rank.rank, max_typo_count: rank.max_rank - 1 }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Rank {
/// The ordinal rank, such that `max_rank` is the first rank, and 0 is the last rank.
///
/// The higher the better. Documents with a rank of 0 have a score of 0 and are typically never returned
/// (they don't match the query).
pub rank: u32,
/// The maximum possible rank. Documents with this rank have a score of 1.
///
/// The max rank should not be 0.
pub max_rank: u32,
}
impl Rank {
pub fn local_score(self) -> f64 {
self.rank as f64 / self.max_rank as f64
}
pub fn global_score(details: impl Iterator<Item = Self>) -> f64 {
let mut rank = Rank { rank: 1, max_rank: 1 };
for inner_rank in details {
rank.rank -= 1;
rank.rank *= inner_rank.max_rank;
rank.max_rank *= inner_rank.max_rank;
rank.rank += inner_rank.rank;
}
rank.local_score()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
#[serde(rename_all = "camelCase")]
pub enum ExactAttribute {
ExactMatch,
MatchesStart,
NoExactMatch,
}
impl ExactAttribute {
pub fn rank(&self) -> Rank {
let rank = match self {
ExactAttribute::ExactMatch => 3,
ExactAttribute::MatchesStart => 2,
ExactAttribute::NoExactMatch => 1,
};
Rank { rank, max_rank: 3 }
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Sort {
pub field_name: String,
pub ascending: bool,
pub redacted: bool,
pub value: serde_json::Value,
}
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
pub struct GeoSort {
pub target_point: [f64; 2],
pub ascending: bool,
pub value: Option<[f64; 2]>,
}
impl GeoSort {
pub fn distance(&self) -> Option<f64> {
self.value.map(|value| distance_between_two_points(&self.target_point, &value))
}
}

View File

@@ -1,15 +1,20 @@
use std::fmt;
use fst::automaton::{Automaton, Str};
use fst::{IntoStreamer, Streamer};
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
use log::error;
use once_cell::sync::Lazy;
use roaring::bitmap::RoaringBitmap;
pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
use self::new::PartialSearchResult;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::error::UserError;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::{
execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext,
execute_search, normalize_facet, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index,
Result, SearchContext, BEU16,
};
// Building these factories is not free.
@@ -17,6 +22,9 @@ static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
/// The maximum number of facets returned by the facet search route.
const MAX_NUMBER_OF_FACETS: usize = 100;
pub mod facet;
mod fst_utils;
pub mod new;
@@ -30,7 +38,6 @@ pub struct Search<'a> {
sort_criteria: Option<Vec<AscDesc>>,
geo_strategy: new::GeoSortStrategy,
terms_matching_strategy: TermsMatchingStrategy,
scoring_strategy: ScoringStrategy,
words_limit: usize,
exhaustive_number_hits: bool,
rtxn: &'a heed::RoTxn<'a>,
@@ -47,7 +54,6 @@ impl<'a> Search<'a> {
sort_criteria: None,
geo_strategy: new::GeoSortStrategy::default(),
terms_matching_strategy: TermsMatchingStrategy::default(),
scoring_strategy: Default::default(),
exhaustive_number_hits: false,
words_limit: 10,
rtxn,
@@ -80,11 +86,6 @@ impl<'a> Search<'a> {
self
}
pub fn scoring_strategy(&mut self, value: ScoringStrategy) -> &mut Search<'a> {
self.scoring_strategy = value;
self
}
pub fn words_limit(&mut self, value: usize) -> &mut Search<'a> {
self.words_limit = value;
self
@@ -101,7 +102,7 @@ impl<'a> Search<'a> {
self
}
/// Forces the search to exhaustively compute the number of candidates,
/// Force the search to exhastivelly compute the number of candidates,
/// this will increase the search time but allows finite pagination.
pub fn exhaustive_number_hits(&mut self, exhaustive_number_hits: bool) -> &mut Search<'a> {
self.exhaustive_number_hits = exhaustive_number_hits;
@@ -110,12 +111,11 @@ impl<'a> Search<'a> {
pub fn execute(&self) -> Result<SearchResult> {
let mut ctx = SearchContext::new(self.index, self.rtxn);
let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } =
let PartialSearchResult { located_query_terms, candidates, documents_ids } =
execute_search(
&mut ctx,
&self.query,
self.terms_matching_strategy,
self.scoring_strategy,
self.exhaustive_number_hits,
&self.filter,
&self.sort_criteria,
@@ -133,7 +133,7 @@ impl<'a> Search<'a> {
None => MatchingWords::default(),
};
Ok(SearchResult { matching_words, candidates, document_scores, documents_ids })
Ok(SearchResult { matching_words, candidates, documents_ids })
}
}
@@ -147,7 +147,6 @@ impl fmt::Debug for Search<'_> {
sort_criteria,
geo_strategy: _,
terms_matching_strategy,
scoring_strategy,
words_limit,
exhaustive_number_hits,
rtxn: _,
@@ -160,7 +159,6 @@ impl fmt::Debug for Search<'_> {
.field("limit", limit)
.field("sort_criteria", sort_criteria)
.field("terms_matching_strategy", terms_matching_strategy)
.field("scoring_strategy", scoring_strategy)
.field("exhaustive_number_hits", exhaustive_number_hits)
.field("words_limit", words_limit)
.finish()
@@ -171,8 +169,8 @@ impl fmt::Debug for Search<'_> {
pub struct SearchResult {
pub matching_words: MatchingWords,
pub candidates: RoaringBitmap,
// TODO those documents ids should be associated with their criteria scores.
pub documents_ids: Vec<DocumentId>,
pub document_scores: Vec<Vec<ScoreDetails>>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -210,6 +208,195 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {
}
}
pub struct SearchForFacetValues<'a> {
query: Option<String>,
facet: String,
search_query: Search<'a>,
}
impl<'a> SearchForFacetValues<'a> {
pub fn new(facet: String, search_query: Search<'a>) -> SearchForFacetValues<'a> {
SearchForFacetValues { query: None, facet, search_query }
}
pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
self.query = Some(query.into());
self
}
fn one_original_value_of(
&self,
field_id: FieldId,
facet_str: &str,
any_docid: DocumentId,
) -> Result<Option<String>> {
let index = self.search_query.index;
let rtxn = self.search_query.rtxn;
let key: (FieldId, _, &str) = (field_id, any_docid, facet_str);
Ok(index.field_id_docid_facet_strings.get(rtxn, &key)?.map(|v| v.to_owned()))
}
pub fn execute(&self) -> Result<Vec<FacetValueHit>> {
let index = self.search_query.index;
let rtxn = self.search_query.rtxn;
let filterable_fields = index.filterable_fields(rtxn)?;
if !filterable_fields.contains(&self.facet) {
return Err(UserError::InvalidFacetSearchFacetName {
field: self.facet.clone(),
valid_fields: filterable_fields.into_iter().collect(),
}
.into());
}
let fields_ids_map = index.fields_ids_map(rtxn)?;
let fid = match fields_ids_map.id(&self.facet) {
Some(fid) => fid,
// we return an empty list of results when the attribute has been
// set as filterable but no document contains this field (yet).
None => return Ok(Vec::new()),
};
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? {
Some(fst) => fst,
None => return Ok(vec![]),
};
let search_candidates = self.search_query.execute()?.candidates;
match self.query.as_ref() {
Some(query) => {
let query = normalize_facet(query);
let query = query.as_str();
let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
let field_authorizes_typos =
!self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid);
if authorize_typos && field_authorizes_typos {
let mut results = vec![];
let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: query };
if let Some(FacetGroupValue { bitmap, .. }) =
index.facet_id_string_docids.get(rtxn, &key)?
{
let count = search_candidates.intersection_len(&bitmap);
if count != 0 {
let value = self
.one_original_value_of(fid, query, bitmap.min().unwrap())?
.unwrap_or_else(|| query.to_string());
results.push(FacetValueHit { value, count });
}
}
} else {
let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?;
let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?;
let is_prefix = true;
let automaton = if query.len() < one_typo as usize {
build_dfa(query, 0, is_prefix)
} else if query.len() < two_typos as usize {
build_dfa(query, 1, is_prefix)
} else {
build_dfa(query, 2, is_prefix)
};
let mut stream = fst.search(automaton).into_stream();
let mut length = 0;
while let Some(facet_value) = stream.next() {
let value = std::str::from_utf8(facet_value)?;
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
Some(FacetGroupValue { bitmap, .. }) => bitmap,
None => {
error!(
"the facet value is missing from the facet database: {key:?}"
);
continue;
}
};
let count = search_candidates.intersection_len(&docids);
if count != 0 {
let value = self
.one_original_value_of(fid, value, docids.min().unwrap())?
.unwrap_or_else(|| query.to_string());
results.push(FacetValueHit { value, count });
length += 1;
}
if length >= MAX_NUMBER_OF_FACETS {
break;
}
}
}
Ok(results)
} else {
let automaton = Str::new(query).starts_with();
let mut stream = fst.search(automaton).into_stream();
let mut results = vec![];
let mut length = 0;
while let Some(facet_value) = stream.next() {
let value = std::str::from_utf8(facet_value)?;
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
Some(FacetGroupValue { bitmap, .. }) => bitmap,
None => {
error!(
"the facet value is missing from the facet database: {key:?}"
);
continue;
}
};
let count = search_candidates.intersection_len(&docids);
if count != 0 {
let value = self
.one_original_value_of(fid, value, docids.min().unwrap())?
.unwrap_or_else(|| query.to_string());
results.push(FacetValueHit { value, count });
length += 1;
}
if length >= MAX_NUMBER_OF_FACETS {
break;
}
}
Ok(results)
}
}
None => {
let mut results = vec![];
let mut length = 0;
let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" };
for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? {
let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) =
result?;
let count = search_candidates.intersection_len(&bitmap);
if count != 0 {
let value = self
.one_original_value_of(fid, left_bound, bitmap.min().unwrap())?
.unwrap_or_else(|| left_bound.to_string());
results.push(FacetValueHit { value, count });
length += 1;
}
if length >= MAX_NUMBER_OF_FACETS {
break;
}
}
Ok(results)
}
}
}
}
#[derive(Debug, Clone, serde::Serialize, PartialEq)]
pub struct FacetValueHit {
/// The original facet value
pub value: String,
/// The number of documents associated to this facet
pub count: u64,
}
#[cfg(test)]
mod test {
#[allow(unused_imports)]

View File

@@ -3,18 +3,14 @@ use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait};
use super::SearchContext;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput};
use crate::Result;
pub struct BucketSortOutput {
pub docids: Vec<u32>,
pub scores: Vec<Vec<ScoreDetails>>,
pub all_candidates: RoaringBitmap,
}
// TODO: would probably be good to regroup some of these inside of a struct?
#[allow(clippy::too_many_arguments)]
pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ctx: &mut SearchContext<'ctx>,
mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
@@ -22,7 +18,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
universe: &RoaringBitmap,
from: usize,
length: usize,
scoring_strategy: ScoringStrategy,
logger: &mut dyn SearchLogger<Q>,
) -> Result<BucketSortOutput> {
logger.initial_query(query);
@@ -36,11 +31,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
};
if universe.len() < from as u64 {
return Ok(BucketSortOutput {
docids: vec![],
scores: vec![],
all_candidates: universe.clone(),
});
return Ok(BucketSortOutput { docids: vec![], all_candidates: universe.clone() });
}
if ranking_rules.is_empty() {
if let Some(distinct_fid) = distinct_fid {
@@ -58,32 +49,22 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
}
let mut all_candidates = universe - excluded;
all_candidates.extend(results.iter().copied());
return Ok(BucketSortOutput {
scores: vec![Default::default(); results.len()],
docids: results,
all_candidates,
});
return Ok(BucketSortOutput { docids: results, all_candidates });
} else {
let docids: Vec<u32> = universe.iter().skip(from).take(length).collect();
return Ok(BucketSortOutput {
scores: vec![Default::default(); docids.len()],
docids,
all_candidates: universe.clone(),
});
let docids = universe.iter().skip(from).take(length).collect();
return Ok(BucketSortOutput { docids, all_candidates: universe.clone() });
};
}
let ranking_rules_len = ranking_rules.len();
logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe);
ranking_rules[0].start_iteration(ctx, logger, universe, query)?;
let mut ranking_rule_scores: Vec<ScoreDetails> = vec![];
let mut ranking_rule_universes: Vec<RoaringBitmap> =
vec![RoaringBitmap::default(); ranking_rules_len];
ranking_rule_universes[0] = universe.clone();
let mut cur_ranking_rule_index = 0;
/// Finish iterating over the current ranking rule, yielding
@@ -108,16 +89,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
} else {
cur_ranking_rule_index -= 1;
}
// FIXME: check off by one
if ranking_rule_scores.len() > cur_ranking_rule_index {
ranking_rule_scores.pop();
}
};
}
let mut all_candidates = universe.clone();
let mut valid_docids = vec![];
let mut valid_scores = vec![];
let mut cur_offset = 0usize;
macro_rules! maybe_add_to_results {
@@ -128,26 +104,21 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
length,
logger,
&mut valid_docids,
&mut valid_scores,
&mut all_candidates,
&mut ranking_rule_universes,
&mut ranking_rules,
cur_ranking_rule_index,
&mut cur_offset,
distinct_fid,
&ranking_rule_scores,
$candidates,
)?;
};
}
while valid_docids.len() < length {
// The universe for this bucket is zero, so we don't need to sort
// anything, just go back to the parent ranking rule.
if ranking_rule_universes[cur_ranking_rule_index].is_empty()
|| (scoring_strategy == ScoringStrategy::Skip
&& ranking_rule_universes[cur_ranking_rule_index].len() == 1)
{
// The universe for this bucket is zero or one element, so we don't need to sort
// anything, just extend the results and go back to the parent ranking rule.
if ranking_rule_universes[cur_ranking_rule_index].len() <= 1 {
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
maybe_add_to_results!(bucket);
back!();
@@ -159,8 +130,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
continue;
};
ranking_rule_scores.push(next_bucket.score);
logger.next_bucket_ranking_rule(
cur_ranking_rule_index,
ranking_rules[cur_ranking_rule_index].as_ref(),
@@ -174,12 +143,10 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;
if cur_ranking_rule_index == ranking_rules_len - 1
|| (scoring_strategy == ScoringStrategy::Skip && next_bucket.candidates.len() <= 1)
|| next_bucket.candidates.len() <= 1
|| cur_offset + (next_bucket.candidates.len() as usize) < from
{
maybe_add_to_results!(next_bucket.candidates);
// FIXME: use index based logic like all the other rules so that you don't have to maintain the pop/push?
ranking_rule_scores.pop();
continue;
}
@@ -199,7 +166,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
)?;
}
Ok(BucketSortOutput { docids: valid_docids, scores: valid_scores, all_candidates })
Ok(BucketSortOutput { docids: valid_docids, all_candidates })
}
/// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset`
@@ -212,18 +179,14 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
logger: &mut dyn SearchLogger<Q>,
valid_docids: &mut Vec<u32>,
valid_scores: &mut Vec<Vec<ScoreDetails>>,
all_candidates: &mut RoaringBitmap,
ranking_rule_universes: &mut [RoaringBitmap],
ranking_rules: &mut [BoxRankingRule<'ctx, Q>],
cur_ranking_rule_index: usize,
cur_offset: &mut usize,
distinct_fid: Option<u16>,
ranking_rule_scores: &[ScoreDetails],
candidates: RoaringBitmap,
) -> Result<()> {
// First apply the distinct rule on the candidates, reducing the universes if necessary
@@ -268,17 +231,13 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
let candidates =
candidates.iter().take(length - valid_docids.len()).copied().collect::<Vec<_>>();
logger.add_to_results(&candidates);
valid_docids.extend_from_slice(&candidates);
valid_scores
.extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len()));
valid_docids.extend(&candidates);
}
} else {
// if we have passed the offset already, add some of the documents (up to the limit)
let candidates = candidates.iter().take(length - valid_docids.len()).collect::<Vec<u32>>();
logger.add_to_results(&candidates);
valid_docids.extend_from_slice(&candidates);
valid_scores
.extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len()));
valid_docids.extend(&candidates);
}
*cur_offset += candidates.len() as usize;

View File

@@ -2,7 +2,6 @@ use roaring::{MultiOps, RoaringBitmap};
use super::query_graph::QueryGraph;
use super::ranking_rules::{RankingRule, RankingRuleOutput};
use crate::score_details::{self, ScoreDetails};
use crate::search::new::query_graph::QueryNodeData;
use crate::search::new::query_term::ExactTerm;
use crate::{Result, SearchContext, SearchLogger};
@@ -245,13 +244,7 @@ impl State {
candidates &= universe;
(
State::AttributeStarts(query_graph.clone(), candidates_per_attribute),
Some(RankingRuleOutput {
query: query_graph,
candidates,
score: ScoreDetails::ExactAttribute(
score_details::ExactAttribute::ExactMatch,
),
}),
Some(RankingRuleOutput { query: query_graph, candidates }),
)
}
State::AttributeStarts(query_graph, candidates_per_attribute) => {
@@ -264,24 +257,12 @@ impl State {
candidates &= universe;
(
State::Empty(query_graph.clone()),
Some(RankingRuleOutput {
query: query_graph,
candidates,
score: ScoreDetails::ExactAttribute(
score_details::ExactAttribute::MatchesStart,
),
}),
Some(RankingRuleOutput { query: query_graph, candidates }),
)
}
State::Empty(query_graph) => (
State::Empty(query_graph.clone()),
Some(RankingRuleOutput {
query: query_graph,
candidates: universe.clone(),
score: ScoreDetails::ExactAttribute(
score_details::ExactAttribute::NoExactMatch,
),
}),
Some(RankingRuleOutput { query: query_graph, candidates: universe.clone() }),
),
};
(state, output)

View File

@@ -8,7 +8,6 @@ use rstar::RTree;
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
use crate::heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec};
use crate::score_details::{self, ScoreDetails};
use crate::{
distance_between_two_points, lat_lng_to_xyz, GeoPoint, Index, Result, SearchContext,
SearchLogger,
@@ -81,7 +80,7 @@ pub struct GeoSort<Q: RankingRuleQueryTrait> {
field_ids: Option<[u16; 2]>,
rtree: Option<RTree<GeoPoint>>,
cached_sorted_docids: VecDeque<(u32, [f64; 2])>,
cached_sorted_docids: VecDeque<u32>,
geo_candidates: RoaringBitmap,
}
@@ -131,7 +130,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
let point = lat_lng_to_xyz(&self.point);
for point in rtree.nearest_neighbor_iter(&point) {
if self.geo_candidates.contains(point.data.0) {
self.cached_sorted_docids.push_back(point.data);
self.cached_sorted_docids.push_back(point.data.0);
if self.cached_sorted_docids.len() >= cache_size {
break;
}
@@ -143,7 +142,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
let point = lat_lng_to_xyz(&opposite_of(self.point));
for point in rtree.nearest_neighbor_iter(&point) {
if self.geo_candidates.contains(point.data.0) {
self.cached_sorted_docids.push_front(point.data);
self.cached_sorted_docids.push_front(point.data.0);
if self.cached_sorted_docids.len() >= cache_size {
break;
}
@@ -178,7 +177,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
// computing the distance between two points is expensive thus we cache the result
documents
.sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize);
self.cached_sorted_docids.extend(documents.into_iter());
self.cached_sorted_docids.extend(documents.into_iter().map(|(doc_id, _)| doc_id));
};
Ok(())
@@ -221,19 +220,12 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
logger: &mut dyn SearchLogger<Q>,
universe: &RoaringBitmap,
) -> Result<Option<RankingRuleOutput<Q>>> {
assert!(universe.len() > 1);
let query = self.query.as_ref().unwrap().clone();
self.geo_candidates &= universe;
if self.geo_candidates.is_empty() {
return Ok(Some(RankingRuleOutput {
query,
candidates: universe.clone(),
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: None,
}),
}));
return Ok(Some(RankingRuleOutput { query, candidates: universe.clone() }));
}
let ascending = self.ascending;
@@ -244,16 +236,11 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
cache.pop_back()
}
};
while let Some((id, point)) = next(&mut self.cached_sorted_docids) {
while let Some(id) = next(&mut self.cached_sorted_docids) {
if self.geo_candidates.contains(id) {
return Ok(Some(RankingRuleOutput {
query,
candidates: RoaringBitmap::from_iter([id]),
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: Some(point),
}),
}));
}
}

View File

@@ -50,7 +50,6 @@ use super::ranking_rule_graph::{
};
use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
use crate::score_details::Rank;
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::ranking_rule_graph::PathVisitor;
use crate::{Result, TermsMatchingStrategy};
@@ -119,8 +118,6 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
all_costs: MappedInterner<QueryNode, Vec<u64>>,
/// An index in the first element of `all_distances`, giving the cost of the next bucket
cur_cost: u64,
/// One above the highest possible cost for this rule
next_max_cost: u64,
}
impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBasedRankingRule<G> {
@@ -134,20 +131,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
_universe: &RoaringBitmap,
query_graph: &QueryGraph,
) -> Result<()> {
// the `next_max_cost` is the successor integer to the maximum cost of the paths in the graph.
//
// When there is a matching strategy, it also factors the additional costs of:
// 1. The words that are matched in phrases
// 2. Skipping words (by adding them to the paths with a cost)
let mut next_max_cost = 1;
let removal_cost = if let Some(terms_matching_strategy) = self.terms_matching_strategy {
// add the cost of the phrase to the next_max_cost
next_max_cost += query_graph
.words_in_phrases_count(ctx)
// remove 1 from the words in phrases count, because when there is a phrase we can now have a document
// where only the phrase is matching, and none of the non-phrase words.
// With the `1` that `next_max_cost` is initialized with, this gets counted twice.
.saturating_sub(1) as u64;
match terms_matching_strategy {
TermsMatchingStrategy::Last => {
let removal_order =
@@ -155,12 +139,13 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
let mut forbidden_nodes =
SmallBitmap::for_interned_values_in(&query_graph.nodes);
let mut costs = query_graph.nodes.map(|_| None);
// FIXME: this works because only words uses termsmatchingstrategy at the moment.
let mut cost = 100;
for ns in removal_order {
for n in ns.iter() {
*costs.get_mut(n) = Some((1, forbidden_nodes.clone()));
*costs.get_mut(n) = Some((cost, forbidden_nodes.clone()));
}
forbidden_nodes.union(&ns);
cost += 100;
}
costs
}
@@ -177,16 +162,12 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
// Then pre-compute the cost of all paths from each node to the end node
let all_costs = graph.find_all_costs_to_end();
next_max_cost +=
all_costs.get(graph.query_graph.root_node).iter().copied().max().unwrap_or(0);
let state = GraphBasedRankingRuleState {
graph,
conditions_cache: condition_docids_cache,
dead_ends_cache,
all_costs,
cur_cost: 0,
next_max_cost,
};
self.state = Some(state);
@@ -200,13 +181,17 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
logger: &mut dyn SearchLogger<QueryGraph>,
universe: &RoaringBitmap,
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
// If universe.len() <= 1, the bucket sort algorithm
// should not have called this function.
assert!(universe.len() > 1);
// Will crash if `next_bucket` is called before `start_iteration` or after `end_iteration`,
// should never happen
let mut state = self.state.take().unwrap();
let all_costs = state.all_costs.get(state.graph.query_graph.root_node);
// Retrieve the cost of the paths to compute
let Some(&cost) = all_costs
let Some(&cost) = state
.all_costs
.get(state.graph.query_graph.root_node)
.iter()
.find(|c| **c >= state.cur_cost) else {
self.state = None;
@@ -222,12 +207,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
dead_ends_cache,
all_costs,
cur_cost: _,
next_max_cost,
} = &mut state;
let rank = *next_max_cost - cost;
let score = G::rank_to_score(Rank { rank: rank as u32, max_rank: *next_max_cost as u32 });
let mut universe = universe.clone();
let mut used_conditions = SmallBitmap::for_interned_values_in(&graph.conditions_interner);
@@ -314,6 +295,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
// We modify the next query graph so that it only contains the subgraph
// that was used to compute this bucket
// But we only do it in case the bucket length is >1, because otherwise
// we know the child ranking rule won't be called anyway
let paths: Vec<Vec<(Option<LocatedQueryTermSubset>, LocatedQueryTermSubset)>> = good_paths
.into_iter()
@@ -342,7 +325,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
self.state = Some(state);
Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket, score }))
Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket }))
}
fn end_iteration(

View File

@@ -510,7 +510,6 @@ mod tests {
&mut ctx,
&Some(query.to_string()),
crate::TermsMatchingStrategy::default(),
crate::score_details::ScoringStrategy::Skip,
false,
&None,
&None,

View File

@@ -44,7 +44,6 @@ use self::geo_sort::GeoSort;
pub use self::geo_sort::Strategy as GeoSortStrategy;
use self::graph_based_ranking_rule::Words;
use self::interner::Interned;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::apply_distinct_rule;
use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError};
@@ -351,7 +350,6 @@ pub fn execute_search(
ctx: &mut SearchContext,
query: &Option<String>,
terms_matching_strategy: TermsMatchingStrategy,
scoring_strategy: ScoringStrategy,
exhaustive_number_hits: bool,
filters: &Option<Filter>,
sort_criteria: &Option<Vec<AscDesc>>,
@@ -413,16 +411,7 @@ pub fn execute_search(
universe =
resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?;
bucket_sort(
ctx,
ranking_rules,
&graph,
&universe,
from,
length,
scoring_strategy,
query_graph_logger,
)?
bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger)?
} else {
let ranking_rules =
get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?;
@@ -433,20 +422,17 @@ pub fn execute_search(
&universe,
from,
length,
scoring_strategy,
placeholder_search_logger,
)?
};
let BucketSortOutput { docids, scores, mut all_candidates } = bucket_sort_output;
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
let BucketSortOutput { docids, mut all_candidates } = bucket_sort_output;
// The candidates is the universe unless the exhaustive number of hits
// is requested and a distinct attribute is set.
if exhaustive_number_hits {
if let Some(f) = ctx.index.distinct_field(ctx.txn)? {
if let Some(distinct_fid) = fields_ids_map.id(f) {
if let Some(distinct_fid) = ctx.index.fields_ids_map(ctx.txn)?.id(f) {
all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining;
}
}
@@ -454,7 +440,6 @@ pub fn execute_search(
Ok(PartialSearchResult {
candidates: all_candidates,
document_scores: scores,
documents_ids: docids,
located_query_terms,
})
@@ -506,5 +491,4 @@ pub struct PartialSearchResult {
pub located_query_terms: Option<Vec<LocatedQueryTerm>>,
pub candidates: RoaringBitmap,
pub documents_ids: Vec<DocumentId>,
pub document_scores: Vec<Vec<ScoreDetails>>,
}

View File

@@ -342,25 +342,6 @@ impl QueryGraph {
}
res
}
/// Number of words in the phrases in this query graph
pub(crate) fn words_in_phrases_count(&self, ctx: &SearchContext) -> usize {
let mut word_count = 0;
for (_, node) in self.nodes.iter() {
match &node.data {
QueryNodeData::Term(term) => {
let Some(phrase) = term.term_subset.original_phrase(ctx)
else {
continue
};
let phrase = ctx.phrase_interner.get(phrase);
word_count += phrase.words.iter().copied().filter(|a| a.is_some()).count()
}
_ => continue,
}
}
word_count
}
}
fn add_node(nodes_data: &mut Vec<QueryNodeData>, node_data: QueryNodeData) -> u16 {

View File

@@ -79,7 +79,7 @@ pub fn located_query_terms_from_tokens(
TokenKind::Separator(separator_kind) => {
// add penalty for hard separators
if let SeparatorKind::Hard = separator_kind {
position = position.wrapping_add(7);
position = position.wrapping_add(1);
}
phrase = 'phrase: {

View File

@@ -49,15 +49,10 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
if let Some((cost_of_ignoring, forbidden_nodes)) =
cost_of_ignoring_node.get(dest_idx)
{
let dest = graph_nodes.get(dest_idx);
let dest_size = match &dest.data {
QueryNodeData::Term(term) => term.term_ids.len(),
_ => panic!(),
};
let new_edge_id = edges_store.insert(Some(Edge {
source_node: source_id,
dest_node: dest_idx,
cost: *cost_of_ignoring * dest_size as u32,
cost: *cost_of_ignoring,
condition: None,
nodes_to_skip: forbidden_nodes.clone(),
}));

View File

@@ -1,7 +1,6 @@
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset};
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
@@ -85,8 +84,4 @@ impl RankingRuleGraphTrait for ExactnessGraph {
Ok(vec![(0, exact_condition), (dest_node.term_ids.len() as u32, skip_condition)])
}
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Exactness(rank)
}
}

View File

@@ -2,7 +2,6 @@ use fxhash::FxHashSet;
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
@@ -69,7 +68,7 @@ impl RankingRuleGraphTrait for FidGraph {
}
let mut edges = vec![];
for fid in all_fields.iter().copied() {
for fid in all_fields {
// TODO: We can improve performances and relevancy by storing
// the term subsets associated to each field ids fetched.
edges.push((
@@ -81,35 +80,6 @@ impl RankingRuleGraphTrait for FidGraph {
));
}
// always lookup the max_fid if we don't already and add an artificial condition for max scoring
let max_fid: Option<u16> = {
if let Some(max_fid) = ctx
.index
.searchable_fields_ids(ctx.txn)?
.map(|field_ids| field_ids.into_iter().max())
{
max_fid
} else {
ctx.index.fields_ids_map(ctx.txn)?.ids().max()
}
};
if let Some(max_fid) = max_fid {
if !all_fields.contains(&max_fid) {
edges.push((
max_fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
conditions_interner.insert(FidCondition {
term: term.clone(), // TODO remove this ugly clone
fid: max_fid,
}),
));
}
}
Ok(edges)
}
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Fid(rank)
}
}

View File

@@ -41,7 +41,6 @@ use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner
use super::query_term::LocatedQueryTermSubset;
use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, QueryNode, SearchContext};
use crate::score_details::{Rank, ScoreDetails};
use crate::Result;
pub struct ComputedCondition {
@@ -111,9 +110,6 @@ pub trait RankingRuleGraphTrait: Sized + 'static {
source_node: Option<&LocatedQueryTermSubset>,
dest_node: &LocatedQueryTermSubset,
) -> Result<Vec<(u32, Interned<Self::Condition>)>>;
/// Convert the rank of a path to its corresponding score for the ranking rule
fn rank_to_score(rank: Rank) -> ScoreDetails;
}
/// The graph used by graph-based ranking rules.

View File

@@ -2,7 +2,6 @@ use fxhash::{FxHashMap, FxHashSet};
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_position;
@@ -78,8 +77,6 @@ impl RankingRuleGraphTrait for PositionGraph {
let mut positions_for_costs = FxHashMap::<u32, Vec<u16>>::default();
for position in all_positions {
// FIXME: bucketed position???
let distance = position.abs_diff(*term.positions.start());
let cost = {
let mut cost = 0;
for i in 0..term.term_ids.len() {
@@ -87,17 +84,15 @@ impl RankingRuleGraphTrait for PositionGraph {
// Because if two words are in the same bucketed position (e.g. 32) and consecutive,
// then their position cost will be 32+32=64, but an ngram of these two words at the
// same position will have a cost of 32+32+1=65
cost += cost_from_distance(distance as u32 + i as u32);
cost += cost_from_position(position as u32 + i as u32);
}
cost
};
positions_for_costs.entry(cost).or_default().push(position);
}
let max_cost = term.term_ids.len() as u32 * 10;
let max_cost_exists = positions_for_costs.contains_key(&max_cost);
let mut edges = vec![];
for (cost, positions) in positions_for_costs {
// TODO: We can improve performances and relevancy by storing
// the term subsets associated to each position fetched
@@ -110,25 +105,12 @@ impl RankingRuleGraphTrait for PositionGraph {
));
}
if !max_cost_exists {
// artificial empty condition for computing max cost
edges.push((
max_cost,
conditions_interner
.insert(PositionCondition { term: term.clone(), positions: Vec::default() }),
));
}
Ok(edges)
}
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Position(rank)
}
}
fn cost_from_distance(distance: u32) -> u32 {
match distance {
fn cost_from_position(sum_positions: u32) -> u32 {
match sum_positions {
0 => 0,
1 => 1,
2..=4 => 2,

View File

@@ -12,11 +12,11 @@ pub fn build_edges(
left_term: Option<&LocatedQueryTermSubset>,
right_term: &LocatedQueryTermSubset,
) -> Result<Vec<(u32, Interned<ProximityCondition>)>> {
let right_ngram_max = right_term.term_ids.len().saturating_sub(1);
let right_ngram_length = right_term.term_ids.len();
let Some(left_term) = left_term else {
return Ok(vec![(
right_ngram_max as u32,
(right_ngram_length - 1) as u32,
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
)])
};
@@ -29,25 +29,25 @@ pub fn build_edges(
// The remaining query graph represents `the sun .. are beautiful`
// but `sun` and `are` have no proximity condition between them
return Ok(vec![(
right_ngram_max as u32,
(right_ngram_length - 1) as u32,
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
)]);
}
let mut conditions = vec![];
for cost in right_ngram_max..(7 + right_ngram_max) {
for cost in right_ngram_length..(7 + right_ngram_length) {
conditions.push((
cost as u32,
conditions_interner.insert(ProximityCondition::Uninit {
left_term: left_term.clone(),
right_term: right_term.clone(),
cost: (cost + 1) as u8,
cost: cost as u8,
}),
))
}
conditions.push((
(7 + right_ngram_max) as u32,
(7 + right_ngram_length) as u32,
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
));

View File

@@ -4,7 +4,6 @@ pub mod compute_docids;
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::SearchContext;
@@ -37,8 +36,4 @@ impl RankingRuleGraphTrait for ProximityGraph {
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
build::build_edges(ctx, conditions_interner, source_term, dest_term)
}
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Proximity(rank)
}
}

View File

@@ -1,7 +1,6 @@
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{self, Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
@@ -76,8 +75,4 @@ impl RankingRuleGraphTrait for TypoGraph {
}
Ok(edges)
}
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Typo(score_details::Typo::from_rank(rank))
}
}

View File

@@ -1,7 +1,6 @@
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{self, Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
@@ -42,10 +41,9 @@ impl RankingRuleGraphTrait for WordsGraph {
_from: Option<&LocatedQueryTermSubset>,
to_term: &LocatedQueryTermSubset,
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
Ok(vec![(0, conditions_interner.insert(WordsCondition { term: to_term.clone() }))])
}
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Words(score_details::Words::from_rank(rank))
Ok(vec![(
to_term.term_ids.len() as u32,
conditions_interner.insert(WordsCondition { term: to_term.clone() }),
)])
}
}

View File

@@ -2,7 +2,6 @@ use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::{QueryGraph, SearchContext};
use crate::score_details::ScoreDetails;
use crate::Result;
/// An internal trait implemented by only [`PlaceholderQuery`] and [`QueryGraph`]
@@ -67,6 +66,4 @@ pub struct RankingRuleOutput<Q> {
pub query: Q,
/// The allowed candidates for the child ranking rule
pub candidates: RoaringBitmap,
/// The score for the candidates of the current bucket
pub score: ScoreDetails,
}

View File

@@ -1,11 +1,9 @@
use heed::BytesDecode;
use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec};
use crate::score_details::{self, ScoreDetails};
use crate::heed_codec::facet::FacetGroupKeyCodec;
use crate::heed_codec::ByteSliceRefCodec;
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
use crate::{FieldId, Index, Result};
@@ -51,7 +49,6 @@ pub struct Sort<'ctx, Query> {
is_ascending: bool,
original_query: Option<Query>,
iter: Option<RankingRuleOutputIterWrapper<'ctx, Query>>,
must_redact: bool,
}
impl<'ctx, Query> Sort<'ctx, Query> {
pub fn new(
@@ -62,30 +59,15 @@ impl<'ctx, Query> Sort<'ctx, Query> {
) -> Result<Self> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let field_id = fields_ids_map.id(&field_name);
let must_redact = Self::must_redact(index, rtxn, &field_name)?;
Ok(Self {
field_name,
field_id,
is_ascending,
original_query: None,
iter: None,
must_redact,
})
}
fn must_redact(index: &Index, rtxn: &'ctx heed::RoTxn, field_name: &str) -> Result<bool> {
let Some(displayed_fields) = index.displayed_fields(rtxn)?
else { return Ok(false); };
Ok(!displayed_fields.iter().any(|&field| field == field_name))
Ok(Self { field_name, field_id, is_ascending, original_query: None, iter: None })
}
}
impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, Query> {
fn id(&self) -> String {
let Self { field_name, is_ascending, .. } = self;
format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc" })
format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc " })
}
fn start_iteration(
&mut self,
@@ -136,45 +118,12 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
(itertools::Either::Right(number_iter), itertools::Either::Right(string_iter))
};
let number_iter = number_iter.map(|r| -> Result<_> {
let (docids, bytes) = r?;
Ok((
docids,
serde_json::Value::Number(
serde_json::Number::from_f64(
OrderedF64Codec::bytes_decode(bytes).expect("some number"),
)
.expect("too big float"),
),
))
});
let string_iter = string_iter.map(|r| -> Result<_> {
let (docids, bytes) = r?;
Ok((
docids,
serde_json::Value::String(
StrRefCodec::bytes_decode(bytes).expect("some string").to_owned(),
),
))
});
let query_graph = parent_query.clone();
let ascending = self.is_ascending;
let field_name = self.field_name.clone();
let must_redact = self.must_redact;
RankingRuleOutputIterWrapper::new(Box::new(number_iter.chain(string_iter).map(
move |r| {
let (docids, value) = r?;
Ok(RankingRuleOutput {
query: query_graph.clone(),
candidates: docids,
score: ScoreDetails::Sort(score_details::Sort {
field_name: field_name.clone(),
ascending,
redacted: must_redact,
value,
}),
})
let (docids, _) = r?;
Ok(RankingRuleOutput { query: query_graph.clone(), candidates: docids })
},
)))
}
@@ -201,16 +150,7 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
Ok(Some(bucket))
} else {
let query = self.original_query.as_ref().unwrap().clone();
Ok(Some(RankingRuleOutput {
query,
candidates: universe.clone(),
score: ScoreDetails::Sort(score_details::Sort {
field_name: self.field_name.clone(),
ascending: self.is_ascending,
redacted: self.must_redact,
value: serde_json::Value::Null,
}),
}))
Ok(Some(RankingRuleOutput { query, candidates: universe.clone() }))
}
}

View File

@@ -122,11 +122,8 @@ fn test_attribute_fid_simple() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 6, 5, 4, 3, 9, 7, 8, 11, 10, 12, 13, 14, 0]");
}
#[test]
@@ -138,11 +135,6 @@ fn test_attribute_fid_ngrams() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 6, 5, 4, 3, 9, 7, 8, 11, 10, 12, 13, 14, 0]");
}

View File

@@ -40,68 +40,68 @@ fn create_index() -> TempIndex {
},
{
"id": 5,
"text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
"text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
the quick brown fox",
},
{
"id": 6,
"text": "quick a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
"text": "quick a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
brown",
},
{
"id": 7,
"text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
"text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
quickbrown",
},
{
"id": 8,
"text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
"text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
quick brown",
},
{
"id": 9,
"text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
"text": "a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a
quickbrown",
},
{
@@ -137,13 +137,8 @@ fn test_attribute_position_simple() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("quick brown");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 2, 3, 4, 1, 0, 6, 8, 7, 9, 5]");
}
#[test]
fn test_attribute_position_repeated() {
@@ -154,13 +149,8 @@ fn test_attribute_position_repeated() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("a a a a a");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[5, 7, 8, 9, 6]");
}
#[test]
@@ -172,13 +162,8 @@ fn test_attribute_position_different_fields() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("quick brown");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 2, 3, 4, 1, 0, 6, 8, 7, 9, 5]");
}
#[test]
@@ -190,11 +175,6 @@ fn test_attribute_position_ngrams() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("quick brown");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 2, 3, 4, 1, 0, 6, 8, 7, 9, 5]");
}

View File

@@ -474,14 +474,8 @@ fn test_exactness_simple_ordered() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 8, 7, 6, 5, 4, 3, 2, 1]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
@@ -507,14 +501,8 @@ fn test_exactness_simple_reversed() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 8, 3, 4, 5, 6, 7]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
@@ -531,14 +519,8 @@ fn test_exactness_simple_reversed() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 8, 3, 4, 5, 6, 7]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
@@ -562,14 +544,8 @@ fn test_exactness_simple_random() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[8, 7, 4, 6, 3, 5]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
@@ -592,14 +568,8 @@ fn test_exactness_attribute_starts_with_simple() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("this balcony");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
@@ -619,14 +589,8 @@ fn test_exactness_attribute_starts_with_phrase() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("\"overlooking the sea\" is a beautiful balcony");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6, 5, 4, 1]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
@@ -640,14 +604,8 @@ fn test_exactness_attribute_starts_with_phrase() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("overlooking the sea is a beautiful balcony");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6, 5, 4, 3, 1, 7]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
@@ -670,14 +628,8 @@ fn test_exactness_all_candidates_with_typo() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("overlocking the sea is a beautiful balcony");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 6, 1, 7]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
// "overlooking" is returned here because the term matching strategy allows it
// but it has the worst exactness score (0 exact words)
@@ -707,14 +659,8 @@ fn test_exactness_after_words() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 9, 18, 8, 17, 16, 6, 7, 15, 5, 14, 4, 13, 3, 12, 2, 1, 11]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
@@ -756,13 +702,7 @@ fn test_words_after_exactness() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 9, 18, 8, 17, 16, 6, 7, 15, 5, 14, 4, 13, 3, 12, 2, 1, 11]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@@ -805,14 +745,7 @@ fn test_proximity_after_exactness() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 4, 5, 8, 7, 3, 6]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@@ -843,13 +776,7 @@ fn test_proximity_after_exactness() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@@ -877,13 +804,7 @@ fn test_exactness_followed_by_typo_prefer_no_typo_prefix() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("quick brown fox extra");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 4, 3]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
@@ -913,13 +834,7 @@ fn test_typo_followed_by_exactness() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("extraordinarily quick brown fox");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let document_ids_scores: Vec<_> =
documents_ids.iter().zip(document_scores.into_iter()).collect();
insta::assert_snapshot!(format!("{document_ids_scores:#?}"));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 0, 4, 3]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);

View File

@@ -7,7 +7,6 @@ use heed::RoTxn;
use maplit::hashset;
use crate::index::tests::TempIndex;
use crate::score_details::ScoreDetails;
use crate::search::new::tests::collect_field_values;
use crate::{AscDesc, Criterion, GeoSortStrategy, Member, Search, SearchResult};
@@ -29,37 +28,30 @@ fn execute_iterative_and_rtree_returns_the_same<'a>(
rtxn: &RoTxn<'a>,
index: &TempIndex,
search: &mut Search<'a>,
) -> (Vec<usize>, Vec<Vec<ScoreDetails>>) {
) -> Vec<usize> {
search.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(2));
let SearchResult { documents_ids, document_scores: iterative_scores_bucketed, .. } =
search.execute().unwrap();
let SearchResult { documents_ids, .. } = search.execute().unwrap();
let iterative_ids_bucketed = collect_field_values(index, rtxn, "id", &documents_ids);
search.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(1000));
let SearchResult { documents_ids, document_scores: iterative_scores, .. } =
search.execute().unwrap();
let SearchResult { documents_ids, .. } = search.execute().unwrap();
let iterative_ids = collect_field_values(index, rtxn, "id", &documents_ids);
assert_eq!(iterative_ids_bucketed, iterative_ids, "iterative bucket");
assert_eq!(iterative_scores_bucketed, iterative_scores, "iterative bucket score");
search.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(2));
let SearchResult { documents_ids, document_scores: rtree_scores_bucketed, .. } =
search.execute().unwrap();
let SearchResult { documents_ids, .. } = search.execute().unwrap();
let rtree_ids_bucketed = collect_field_values(index, rtxn, "id", &documents_ids);
search.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(1000));
let SearchResult { documents_ids, document_scores: rtree_scores, .. } =
search.execute().unwrap();
let SearchResult { documents_ids, .. } = search.execute().unwrap();
let rtree_ids = collect_field_values(index, rtxn, "id", &documents_ids);
assert_eq!(rtree_ids_bucketed, rtree_ids, "rtree bucket");
assert_eq!(rtree_scores_bucketed, rtree_scores, "rtree bucket score");
assert_eq!(iterative_ids, rtree_ids, "iterative vs rtree");
assert_eq!(iterative_scores, rtree_scores, "iterative vs rtree scores");
(iterative_ids.into_iter().map(|id| id.parse().unwrap()).collect(), iterative_scores)
iterative_ids.into_iter().map(|id| id.parse().unwrap()).collect()
}
#[test]
@@ -81,17 +73,14 @@ fn test_geo_sort() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]);
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[0, 1, 2, 3, 4, 5, 6, 8, 7, 10, 9]");
insta::assert_snapshot!(format!("{scores:#?}"));
s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., 0.]))]);
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[5, 4, 3, 2, 1, 0, 6, 8, 7, 10, 9]");
insta::assert_snapshot!(format!("{scores:#?}"));
}
#[test]
@@ -112,63 +101,52 @@ fn test_geo_sort_around_the_edge_of_the_flat_earth() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
// --- asc
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]);
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[0, 1, 2, 3, 4]");
insta::assert_snapshot!(format!("{scores:#?}"));
// ensuring the lat doesn't wrap around
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([85., 0.]))]);
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[1, 0, 3, 4, 2]");
insta::assert_snapshot!(format!("{scores:#?}"));
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([-85., 0.]))]);
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[2, 0, 3, 4, 1]");
insta::assert_snapshot!(format!("{scores:#?}"));
// ensuring the lng does wrap around
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 175.]))]);
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[3, 4, 2, 1, 0]");
insta::assert_snapshot!(format!("{scores:#?}"));
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., -175.]))]);
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[4, 3, 2, 1, 0]");
insta::assert_snapshot!(format!("{scores:#?}"));
// --- desc
s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., 0.]))]);
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[4, 3, 2, 1, 0]");
insta::assert_snapshot!(format!("{scores:#?}"));
// ensuring the lat doesn't wrap around
s.sort_criteria(vec![AscDesc::Desc(Member::Geo([85., 0.]))]);
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[2, 4, 3, 0, 1]");
insta::assert_snapshot!(format!("{scores:#?}"));
s.sort_criteria(vec![AscDesc::Desc(Member::Geo([-85., 0.]))]);
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[1, 4, 3, 0, 2]");
insta::assert_snapshot!(format!("{scores:#?}"));
// ensuring the lng does wrap around
s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., 175.]))]);
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[0, 1, 2, 4, 3]");
insta::assert_snapshot!(format!("{scores:#?}"));
s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., -175.]))]);
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[0, 1, 2, 3, 4]");
insta::assert_snapshot!(format!("{scores:#?}"));
}
#[test]
@@ -188,98 +166,19 @@ fn geo_sort_mixed_with_words() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]);
s.query("jean");
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[0, 2, 3]");
insta::assert_snapshot!(format!("{scores:#?}"));
s.query("bob");
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[2, 4]");
insta::assert_snapshot!(format!("{scores:#?}"), @r###"
[
[
Words(
Words {
matching_words: 1,
max_matching_words: 1,
},
),
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
-89.0,
0.0,
],
),
},
),
],
[
Words(
Words {
matching_words: 1,
max_matching_words: 1,
},
),
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
0.0,
-179.0,
],
),
},
),
],
]
"###);
s.query("intel");
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[1]");
insta::assert_snapshot!(format!("{scores:#?}"), @r###"
[
[
Words(
Words {
matching_words: 1,
max_matching_words: 1,
},
),
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
88.0,
0.0,
],
),
},
),
],
]
"###);
}
#[test]
@@ -299,11 +198,9 @@ fn geo_sort_without_any_geo_faceted_documents() {
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.]))]);
s.query("jean");
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
let ids = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[0, 2, 3]");
insta::assert_snapshot!(format!("{scores:#?}"));
}

View File

@@ -80,13 +80,10 @@ fn test_2gram_simple() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("sun flower");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let SearchResult { documents_ids, .. } = s.execute().unwrap();
// will also match documents with "sunflower" + prefix tolerance
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3, 5]");
// scores are empty because the only rule is Words with All matching strategy
insta::assert_snapshot!(format!("{document_scores:?}"), @"[[], [], [], [], []]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[

View File

@@ -122,11 +122,11 @@ fn create_edge_cases_index() -> TempIndex {
sta stb stc ste stf stg sth sti stj stk stl stm stn sto stp stq str stst stt stu stv stw stx sty stz
"
},
// The next 5 documents lay out a trap with the split word, phrase search, or synonym `sun flower`.
// If the search query is "sunflower", the split word "Sun Flower" will match some documents.
// The next 5 documents lay out a trap with the split word, phrase search, or synonym `sun flower`.
// If the search query is "sunflower", the split word "Sun Flower" will match some documents.
// If the query is `sunflower wilting`, then we should make sure that
// the sprximity condition `flower wilting: sprx N` also comes with the condition
// `sun wilting: sprx N+1`. TODO: this is not the exact condition we use for now.
// `sun wilting: sprx N+1`. TODO: this is not the exact condition we use for now.
// We only check that the phrase `sun flower` exists and `flower wilting: sprx N`, which
// is better than nothing but not the best.
{
@@ -139,7 +139,7 @@ fn create_edge_cases_index() -> TempIndex {
},
{
"id": 3,
// This document matches the query `sunflower wilting`, but the sprximity condition
// This document matches the query `sunflower wilting`, but the sprximity condition
// between `sunflower` and `wilting` cannot be through the split-word `Sun Flower`
// which would reduce to only `flower` and `wilting` being in sprximity.
"text": "A flower wilting under the sun, unlike a sunflower"
@@ -270,13 +270,13 @@ fn test_proximity_simple() {
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 4, 7, 6, 5, 2, 3, 0, 1]");
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 9, 10, 7, 6, 5, 2, 3, 0, 1]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quickbrown fox jumps over the lazy dog\"",
"\"the quack brown fox jumps over the lazy dog\"",
"\"the quick brown fox jumps over the lazy dog\"",
"\"the quickbrown fox jumps over the lazy dog\"",
"\"the really quick brown fox jumps over the lazy dog\"",
"\"the really quick brown fox jumps over the very lazy dog\"",
"\"brown quick fox jumps over the lazy dog\"",
@@ -295,12 +295,9 @@ fn test_proximity_split_word() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("sunflower wilting");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 5, 1, 3]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
// TODO: "2" and "4" should be swapped ideally
insta::assert_debug_snapshot!(texts, @r###"
@@ -315,11 +312,9 @@ fn test_proximity_split_word() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("\"sun flower\" wilting");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 1]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
// TODO: "2" and "4" should be swapped ideally
insta::assert_debug_snapshot!(texts, @r###"
@@ -342,11 +337,9 @@ fn test_proximity_split_word() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("xyz wilting");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 1]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
// TODO: "2" and "4" should be swapped ideally
insta::assert_debug_snapshot!(texts, @r###"
@@ -365,11 +358,9 @@ fn test_proximity_prefix_db() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best s");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 6, 7, 11, 15]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
// This test illustrates the loss of precision from using the prefix DB
@@ -390,11 +381,9 @@ fn test_proximity_prefix_db() {
// Difference when using the `su` prefix, which is not in the prefix DB
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best su");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 11, 7, 6, 15]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
@@ -417,11 +406,9 @@ fn test_proximity_prefix_db() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best win");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[15, 16, 17, 18, 19, 20, 21, 22]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
@@ -441,11 +428,9 @@ fn test_proximity_prefix_db() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best wint");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 20, 16, 15]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
@@ -465,11 +450,9 @@ fn test_proximity_prefix_db() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.query("best wi");
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 15, 16, 20]");
insta::assert_snapshot!(format!("{document_scores:#?}"));
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"

View File

@@ -61,41 +61,8 @@ fn test_trap_basic() {
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("summer holiday");
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1]");
insta::assert_snapshot!(format!("{document_scores:#?}"), @r###"
[
[
Proximity(
Rank {
rank: 8,
max_rank: 8,
},
),
Typo(
Typo {
typo_count: 0,
max_typo_count: 2,
},
),
],
[
Proximity(
Rank {
rank: 8,
max_rank: 8,
},
),
Typo(
Typo {
typo_count: 0,
max_typo_count: 2,
},
),
],
]
"###);
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
// TODO: this is incorrect, 1 should come before 0
insta::assert_debug_snapshot!(texts, @r###"

View File

@@ -1,244 +0,0 @@
---
source: milli/src/search/new/tests/attribute_fid.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
2,
[
Fid(
Rank {
rank: 19,
max_rank: 19,
},
),
Position(
Rank {
rank: 91,
max_rank: 91,
},
),
],
),
(
6,
[
Fid(
Rank {
rank: 15,
max_rank: 19,
},
),
Position(
Rank {
rank: 81,
max_rank: 91,
},
),
],
),
(
5,
[
Fid(
Rank {
rank: 14,
max_rank: 19,
},
),
Position(
Rank {
rank: 79,
max_rank: 91,
},
),
],
),
(
4,
[
Fid(
Rank {
rank: 13,
max_rank: 19,
},
),
Position(
Rank {
rank: 77,
max_rank: 91,
},
),
],
),
(
3,
[
Fid(
Rank {
rank: 12,
max_rank: 19,
},
),
Position(
Rank {
rank: 83,
max_rank: 91,
},
),
],
),
(
9,
[
Fid(
Rank {
rank: 11,
max_rank: 19,
},
),
Position(
Rank {
rank: 75,
max_rank: 91,
},
),
],
),
(
8,
[
Fid(
Rank {
rank: 10,
max_rank: 19,
},
),
Position(
Rank {
rank: 79,
max_rank: 91,
},
),
],
),
(
7,
[
Fid(
Rank {
rank: 10,
max_rank: 19,
},
),
Position(
Rank {
rank: 73,
max_rank: 91,
},
),
],
),
(
11,
[
Fid(
Rank {
rank: 7,
max_rank: 19,
},
),
Position(
Rank {
rank: 77,
max_rank: 91,
},
),
],
),
(
10,
[
Fid(
Rank {
rank: 6,
max_rank: 19,
},
),
Position(
Rank {
rank: 81,
max_rank: 91,
},
),
],
),
(
13,
[
Fid(
Rank {
rank: 6,
max_rank: 19,
},
),
Position(
Rank {
rank: 81,
max_rank: 91,
},
),
],
),
(
12,
[
Fid(
Rank {
rank: 6,
max_rank: 19,
},
),
Position(
Rank {
rank: 78,
max_rank: 91,
},
),
],
),
(
14,
[
Fid(
Rank {
rank: 5,
max_rank: 19,
},
),
Position(
Rank {
rank: 75,
max_rank: 91,
},
),
],
),
(
0,
[
Fid(
Rank {
rank: 1,
max_rank: 19,
},
),
Position(
Rank {
rank: 91,
max_rank: 91,
},
),
],
),
]

View File

@@ -1,244 +0,0 @@
---
source: milli/src/search/new/tests/attribute_fid.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
2,
[
Fid(
Rank {
rank: 19,
max_rank: 19,
},
),
Position(
Rank {
rank: 91,
max_rank: 91,
},
),
],
),
(
6,
[
Fid(
Rank {
rank: 15,
max_rank: 19,
},
),
Position(
Rank {
rank: 81,
max_rank: 91,
},
),
],
),
(
5,
[
Fid(
Rank {
rank: 14,
max_rank: 19,
},
),
Position(
Rank {
rank: 79,
max_rank: 91,
},
),
],
),
(
4,
[
Fid(
Rank {
rank: 13,
max_rank: 19,
},
),
Position(
Rank {
rank: 77,
max_rank: 91,
},
),
],
),
(
3,
[
Fid(
Rank {
rank: 12,
max_rank: 19,
},
),
Position(
Rank {
rank: 83,
max_rank: 91,
},
),
],
),
(
9,
[
Fid(
Rank {
rank: 11,
max_rank: 19,
},
),
Position(
Rank {
rank: 75,
max_rank: 91,
},
),
],
),
(
8,
[
Fid(
Rank {
rank: 10,
max_rank: 19,
},
),
Position(
Rank {
rank: 79,
max_rank: 91,
},
),
],
),
(
7,
[
Fid(
Rank {
rank: 10,
max_rank: 19,
},
),
Position(
Rank {
rank: 73,
max_rank: 91,
},
),
],
),
(
11,
[
Fid(
Rank {
rank: 7,
max_rank: 19,
},
),
Position(
Rank {
rank: 77,
max_rank: 91,
},
),
],
),
(
10,
[
Fid(
Rank {
rank: 6,
max_rank: 19,
},
),
Position(
Rank {
rank: 81,
max_rank: 91,
},
),
],
),
(
13,
[
Fid(
Rank {
rank: 6,
max_rank: 19,
},
),
Position(
Rank {
rank: 81,
max_rank: 91,
},
),
],
),
(
12,
[
Fid(
Rank {
rank: 6,
max_rank: 19,
},
),
Position(
Rank {
rank: 78,
max_rank: 91,
},
),
],
),
(
14,
[
Fid(
Rank {
rank: 5,
max_rank: 19,
},
),
Position(
Rank {
rank: 75,
max_rank: 91,
},
),
],
),
(
0,
[
Fid(
Rank {
rank: 1,
max_rank: 19,
},
),
Position(
Rank {
rank: 91,
max_rank: 91,
},
),
],
),
]

View File

@@ -1,244 +0,0 @@
---
source: milli/src/search/new/tests/attribute_position.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
10,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 21,
max_rank: 21,
},
),
],
),
(
12,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 21,
max_rank: 21,
},
),
],
),
(
11,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 20,
max_rank: 21,
},
),
],
),
(
13,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 20,
max_rank: 21,
},
),
],
),
(
3,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 19,
max_rank: 21,
},
),
],
),
(
4,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 19,
max_rank: 21,
},
),
],
),
(
2,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 18,
max_rank: 21,
},
),
],
),
(
0,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 15,
max_rank: 21,
},
),
],
),
(
1,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 15,
max_rank: 21,
},
),
],
),
(
6,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 13,
max_rank: 21,
},
),
],
),
(
8,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 5,
max_rank: 21,
},
),
],
),
(
7,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 4,
max_rank: 21,
},
),
],
),
(
9,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 4,
max_rank: 21,
},
),
],
),
(
5,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 1,
max_rank: 21,
},
),
],
),
]

View File

@@ -1,244 +0,0 @@
---
source: milli/src/search/new/tests/attribute_position.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
10,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 21,
max_rank: 21,
},
),
],
),
(
12,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 21,
max_rank: 21,
},
),
],
),
(
11,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 20,
max_rank: 21,
},
),
],
),
(
13,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 20,
max_rank: 21,
},
),
],
),
(
3,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 19,
max_rank: 21,
},
),
],
),
(
4,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 19,
max_rank: 21,
},
),
],
),
(
2,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 18,
max_rank: 21,
},
),
],
),
(
0,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 15,
max_rank: 21,
},
),
],
),
(
1,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 15,
max_rank: 21,
},
),
],
),
(
6,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 13,
max_rank: 21,
},
),
],
),
(
8,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 5,
max_rank: 21,
},
),
],
),
(
7,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 4,
max_rank: 21,
},
),
],
),
(
9,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 4,
max_rank: 21,
},
),
],
),
(
5,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 1,
max_rank: 21,
},
),
],
),
]

View File

@@ -1,91 +0,0 @@
---
source: milli/src/search/new/tests/attribute_position.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
5,
[
Fid(
Rank {
rank: 11,
max_rank: 11,
},
),
Position(
Rank {
rank: 51,
max_rank: 51,
},
),
],
),
(
7,
[
Fid(
Rank {
rank: 11,
max_rank: 11,
},
),
Position(
Rank {
rank: 51,
max_rank: 51,
},
),
],
),
(
8,
[
Fid(
Rank {
rank: 11,
max_rank: 11,
},
),
Position(
Rank {
rank: 51,
max_rank: 51,
},
),
],
),
(
9,
[
Fid(
Rank {
rank: 11,
max_rank: 11,
},
),
Position(
Rank {
rank: 51,
max_rank: 51,
},
),
],
),
(
6,
[
Fid(
Rank {
rank: 11,
max_rank: 11,
},
),
Position(
Rank {
rank: 50,
max_rank: 51,
},
),
],
),
]

View File

@@ -1,244 +0,0 @@
---
source: milli/src/search/new/tests/attribute_position.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
10,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 21,
max_rank: 21,
},
),
],
),
(
12,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 21,
max_rank: 21,
},
),
],
),
(
11,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 20,
max_rank: 21,
},
),
],
),
(
13,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 20,
max_rank: 21,
},
),
],
),
(
3,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 19,
max_rank: 21,
},
),
],
),
(
4,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 19,
max_rank: 21,
},
),
],
),
(
2,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 18,
max_rank: 21,
},
),
],
),
(
0,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 15,
max_rank: 21,
},
),
],
),
(
1,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 15,
max_rank: 21,
},
),
],
),
(
6,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 13,
max_rank: 21,
},
),
],
),
(
8,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 5,
max_rank: 21,
},
),
],
),
(
7,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 4,
max_rank: 21,
},
),
],
),
(
9,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 4,
max_rank: 21,
},
),
],
),
(
5,
[
Fid(
Rank {
rank: 5,
max_rank: 5,
},
),
Position(
Rank {
rank: 1,
max_rank: 21,
},
),
],
),
]

View File

@@ -1,366 +0,0 @@
---
source: milli/src/search/new/tests/exactness.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
19,
[
Words(
Words {
matching_words: 9,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 10,
max_rank: 10,
},
),
],
),
(
9,
[
Words(
Words {
matching_words: 9,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 7,
max_rank: 10,
},
),
],
),
(
18,
[
Words(
Words {
matching_words: 8,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 9,
max_rank: 9,
},
),
],
),
(
8,
[
Words(
Words {
matching_words: 8,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 6,
max_rank: 9,
},
),
],
),
(
17,
[
Words(
Words {
matching_words: 7,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 8,
max_rank: 8,
},
),
],
),
(
16,
[
Words(
Words {
matching_words: 7,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 8,
max_rank: 8,
},
),
],
),
(
6,
[
Words(
Words {
matching_words: 7,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 5,
max_rank: 8,
},
),
],
),
(
7,
[
Words(
Words {
matching_words: 7,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 5,
max_rank: 8,
},
),
],
),
(
15,
[
Words(
Words {
matching_words: 5,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 6,
max_rank: 6,
},
),
],
),
(
5,
[
Words(
Words {
matching_words: 5,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 3,
max_rank: 6,
},
),
],
),
(
14,
[
Words(
Words {
matching_words: 4,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 5,
max_rank: 5,
},
),
],
),
(
4,
[
Words(
Words {
matching_words: 4,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 3,
max_rank: 5,
},
),
],
),
(
13,
[
Words(
Words {
matching_words: 3,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 4,
max_rank: 4,
},
),
],
),
(
3,
[
Words(
Words {
matching_words: 3,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 4,
},
),
],
),
(
12,
[
Words(
Words {
matching_words: 2,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 3,
max_rank: 3,
},
),
],
),
(
2,
[
Words(
Words {
matching_words: 2,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 3,
},
),
],
),
(
1,
[
Words(
Words {
matching_words: 1,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 2,
},
),
],
),
(
11,
[
Words(
Words {
matching_words: 1,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 2,
},
),
],
),
]

View File

@@ -1,106 +0,0 @@
---
source: milli/src/search/new/tests/exactness.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
4,
[
Words(
Words {
matching_words: 7,
max_matching_words: 7,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 7,
max_rank: 8,
},
),
],
),
(
5,
[
Words(
Words {
matching_words: 7,
max_matching_words: 7,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 7,
max_rank: 8,
},
),
],
),
(
6,
[
Words(
Words {
matching_words: 7,
max_matching_words: 7,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 7,
max_rank: 8,
},
),
],
),
(
1,
[
Words(
Words {
matching_words: 4,
max_matching_words: 7,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 4,
max_rank: 5,
},
),
],
),
(
7,
[
Words(
Words {
matching_words: 1,
max_matching_words: 7,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 1,
max_rank: 2,
},
),
],
),
]

View File

@@ -1,126 +0,0 @@
---
source: milli/src/search/new/tests/exactness.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
6,
[
Words(
Words {
matching_words: 7,
max_matching_words: 7,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 8,
max_rank: 8,
},
),
],
),
(
5,
[
Words(
Words {
matching_words: 7,
max_matching_words: 7,
},
),
ExactAttribute(
MatchesStart,
),
Exactness(
Rank {
rank: 8,
max_rank: 8,
},
),
],
),
(
4,
[
Words(
Words {
matching_words: 7,
max_matching_words: 7,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 8,
max_rank: 8,
},
),
],
),
(
3,
[
Words(
Words {
matching_words: 7,
max_matching_words: 7,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 7,
max_rank: 8,
},
),
],
),
(
1,
[
Words(
Words {
matching_words: 4,
max_matching_words: 7,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 5,
max_rank: 5,
},
),
],
),
(
7,
[
Words(
Words {
matching_words: 1,
max_matching_words: 7,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 2,
},
),
],
),
]

View File

@@ -1,86 +0,0 @@
---
source: milli/src/search/new/tests/exactness.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
6,
[
Words(
Words {
matching_words: 7,
max_matching_words: 7,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 6,
max_rank: 6,
},
),
],
),
(
5,
[
Words(
Words {
matching_words: 7,
max_matching_words: 7,
},
),
ExactAttribute(
MatchesStart,
),
Exactness(
Rank {
rank: 6,
max_rank: 6,
},
),
],
),
(
4,
[
Words(
Words {
matching_words: 7,
max_matching_words: 7,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 6,
max_rank: 6,
},
),
],
),
(
1,
[
Words(
Words {
matching_words: 4,
max_matching_words: 7,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 3,
max_rank: 3,
},
),
],
),
]

View File

@@ -1,66 +0,0 @@
---
source: milli/src/search/new/tests/exactness.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
2,
[
Words(
Words {
matching_words: 2,
max_matching_words: 2,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 3,
max_rank: 3,
},
),
],
),
(
1,
[
Words(
Words {
matching_words: 2,
max_matching_words: 2,
},
),
ExactAttribute(
MatchesStart,
),
Exactness(
Rank {
rank: 3,
max_rank: 3,
},
),
],
),
(
0,
[
Words(
Words {
matching_words: 2,
max_matching_words: 2,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 3,
max_rank: 3,
},
),
],
),
]

View File

@@ -1,136 +0,0 @@
---
source: milli/src/search/new/tests/exactness.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
2,
[
Words(
Words {
matching_words: 4,
max_matching_words: 4,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 5,
max_rank: 5,
},
),
Typo(
Typo {
typo_count: 0,
max_typo_count: 1,
},
),
],
),
(
1,
[
Words(
Words {
matching_words: 4,
max_matching_words: 4,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 4,
max_rank: 5,
},
),
Typo(
Typo {
typo_count: 0,
max_typo_count: 2,
},
),
],
),
(
0,
[
Words(
Words {
matching_words: 4,
max_matching_words: 4,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 4,
max_rank: 5,
},
),
Typo(
Typo {
typo_count: 1,
max_typo_count: 2,
},
),
],
),
(
4,
[
Words(
Words {
matching_words: 4,
max_matching_words: 4,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 4,
max_rank: 5,
},
),
Typo(
Typo {
typo_count: 1,
max_typo_count: 2,
},
),
],
),
(
3,
[
Words(
Words {
matching_words: 4,
max_matching_words: 4,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 3,
max_rank: 5,
},
),
Typo(
Typo {
typo_count: 2,
max_typo_count: 3,
},
),
],
),
]

View File

@@ -1,186 +0,0 @@
---
source: milli/src/search/new/tests/exactness.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
9,
[
Words(
Words {
matching_words: 9,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 10,
max_rank: 10,
},
),
],
),
(
8,
[
Words(
Words {
matching_words: 8,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 9,
max_rank: 9,
},
),
],
),
(
7,
[
Words(
Words {
matching_words: 7,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 8,
max_rank: 8,
},
),
],
),
(
6,
[
Words(
Words {
matching_words: 7,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 8,
max_rank: 8,
},
),
],
),
(
5,
[
Words(
Words {
matching_words: 5,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 6,
max_rank: 6,
},
),
],
),
(
4,
[
Words(
Words {
matching_words: 4,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 5,
max_rank: 5,
},
),
],
),
(
3,
[
Words(
Words {
matching_words: 3,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 4,
max_rank: 4,
},
),
],
),
(
2,
[
Words(
Words {
matching_words: 2,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 3,
max_rank: 3,
},
),
],
),
(
1,
[
Words(
Words {
matching_words: 1,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 2,
},
),
],
),
]

View File

@@ -1,126 +0,0 @@
---
source: milli/src/search/new/tests/exactness.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
8,
[
Words(
Words {
matching_words: 9,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 10,
max_rank: 10,
},
),
],
),
(
7,
[
Words(
Words {
matching_words: 3,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 4,
max_rank: 4,
},
),
],
),
(
4,
[
Words(
Words {
matching_words: 2,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 3,
max_rank: 3,
},
),
],
),
(
6,
[
Words(
Words {
matching_words: 2,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 3,
max_rank: 3,
},
),
],
),
(
3,
[
Words(
Words {
matching_words: 1,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 2,
},
),
],
),
(
5,
[
Words(
Words {
matching_words: 1,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 2,
},
),
],
),
]

View File

@@ -1,146 +0,0 @@
---
source: milli/src/search/new/tests/exactness.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
9,
[
Words(
Words {
matching_words: 9,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 10,
max_rank: 10,
},
),
],
),
(
8,
[
Words(
Words {
matching_words: 9,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 10,
max_rank: 10,
},
),
],
),
(
3,
[
Words(
Words {
matching_words: 1,
max_matching_words: 9,
},
),
ExactAttribute(
MatchesStart,
),
Exactness(
Rank {
rank: 2,
max_rank: 2,
},
),
],
),
(
4,
[
Words(
Words {
matching_words: 1,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 2,
},
),
],
),
(
5,
[
Words(
Words {
matching_words: 1,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 2,
},
),
],
),
(
6,
[
Words(
Words {
matching_words: 1,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 2,
},
),
],
),
(
7,
[
Words(
Words {
matching_words: 1,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 2,
},
),
],
),
]

View File

@@ -1,146 +0,0 @@
---
source: milli/src/search/new/tests/exactness.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
9,
[
Words(
Words {
matching_words: 9,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 10,
max_rank: 10,
},
),
],
),
(
8,
[
Words(
Words {
matching_words: 9,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 10,
max_rank: 10,
},
),
],
),
(
3,
[
Words(
Words {
matching_words: 1,
max_matching_words: 9,
},
),
ExactAttribute(
MatchesStart,
),
Exactness(
Rank {
rank: 2,
max_rank: 2,
},
),
],
),
(
4,
[
Words(
Words {
matching_words: 1,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 2,
},
),
],
),
(
5,
[
Words(
Words {
matching_words: 1,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 2,
},
),
],
),
(
6,
[
Words(
Words {
matching_words: 1,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 2,
},
),
],
),
(
7,
[
Words(
Words {
matching_words: 1,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 2,
},
),
],
),
]

View File

@@ -1,84 +0,0 @@
---
source: milli/src/search/new/tests/exactness.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
0,
[
Words(
Words {
matching_words: 9,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 10,
max_rank: 10,
},
),
Proximity(
Rank {
rank: 35,
max_rank: 57,
},
),
],
),
(
1,
[
Words(
Words {
matching_words: 9,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 10,
max_rank: 10,
},
),
Proximity(
Rank {
rank: 35,
max_rank: 57,
},
),
],
),
(
2,
[
Words(
Words {
matching_words: 9,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 10,
max_rank: 10,
},
),
Proximity(
Rank {
rank: 35,
max_rank: 57,
},
),
],
),
]

View File

@@ -1,240 +0,0 @@
---
source: milli/src/search/new/tests/exactness.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
2,
[
Words(
Words {
matching_words: 9,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 10,
max_rank: 10,
},
),
Proximity(
Rank {
rank: 57,
max_rank: 57,
},
),
],
),
(
1,
[
Words(
Words {
matching_words: 9,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 10,
max_rank: 10,
},
),
Proximity(
Rank {
rank: 56,
max_rank: 57,
},
),
],
),
(
0,
[
Words(
Words {
matching_words: 9,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 10,
max_rank: 10,
},
),
Proximity(
Rank {
rank: 35,
max_rank: 57,
},
),
],
),
(
4,
[
Words(
Words {
matching_words: 4,
max_matching_words: 9,
},
),
ExactAttribute(
MatchesStart,
),
Exactness(
Rank {
rank: 5,
max_rank: 5,
},
),
Proximity(
Rank {
rank: 22,
max_rank: 22,
},
),
],
),
(
5,
[
Words(
Words {
matching_words: 4,
max_matching_words: 9,
},
),
ExactAttribute(
MatchesStart,
),
Exactness(
Rank {
rank: 5,
max_rank: 5,
},
),
Proximity(
Rank {
rank: 22,
max_rank: 22,
},
),
],
),
(
8,
[
Words(
Words {
matching_words: 4,
max_matching_words: 9,
},
),
ExactAttribute(
MatchesStart,
),
Exactness(
Rank {
rank: 5,
max_rank: 5,
},
),
Proximity(
Rank {
rank: 22,
max_rank: 22,
},
),
],
),
(
7,
[
Words(
Words {
matching_words: 4,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 5,
max_rank: 5,
},
),
Proximity(
Rank {
rank: 21,
max_rank: 22,
},
),
],
),
(
3,
[
Words(
Words {
matching_words: 4,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 5,
max_rank: 5,
},
),
Proximity(
Rank {
rank: 17,
max_rank: 22,
},
),
],
),
(
6,
[
Words(
Words {
matching_words: 4,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 5,
max_rank: 5,
},
),
Proximity(
Rank {
rank: 17,
max_rank: 22,
},
),
],
),
]

View File

@@ -1,110 +0,0 @@
---
source: milli/src/search/new/tests/exactness.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
1,
[
Words(
Words {
matching_words: 4,
max_matching_words: 4,
},
),
Typo(
Typo {
typo_count: 0,
max_typo_count: 5,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 5,
max_rank: 5,
},
),
],
),
(
0,
[
Words(
Words {
matching_words: 4,
max_matching_words: 4,
},
),
Typo(
Typo {
typo_count: 1,
max_typo_count: 5,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 4,
max_rank: 5,
},
),
],
),
(
4,
[
Words(
Words {
matching_words: 4,
max_matching_words: 4,
},
),
Typo(
Typo {
typo_count: 2,
max_typo_count: 5,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 4,
max_rank: 5,
},
),
],
),
(
3,
[
Words(
Words {
matching_words: 4,
max_matching_words: 4,
},
),
Typo(
Typo {
typo_count: 2,
max_typo_count: 5,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 3,
max_rank: 5,
},
),
],
),
]

View File

@@ -1,366 +0,0 @@
---
source: milli/src/search/new/tests/exactness.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
19,
[
Words(
Words {
matching_words: 9,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 10,
max_rank: 10,
},
),
],
),
(
9,
[
Words(
Words {
matching_words: 9,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 7,
max_rank: 10,
},
),
],
),
(
18,
[
Words(
Words {
matching_words: 8,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 9,
max_rank: 9,
},
),
],
),
(
8,
[
Words(
Words {
matching_words: 8,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 6,
max_rank: 9,
},
),
],
),
(
17,
[
Words(
Words {
matching_words: 7,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 8,
max_rank: 8,
},
),
],
),
(
16,
[
Words(
Words {
matching_words: 7,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 8,
max_rank: 8,
},
),
],
),
(
6,
[
Words(
Words {
matching_words: 7,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 5,
max_rank: 8,
},
),
],
),
(
7,
[
Words(
Words {
matching_words: 7,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 5,
max_rank: 8,
},
),
],
),
(
15,
[
Words(
Words {
matching_words: 5,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 6,
max_rank: 6,
},
),
],
),
(
5,
[
Words(
Words {
matching_words: 5,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 3,
max_rank: 6,
},
),
],
),
(
14,
[
Words(
Words {
matching_words: 4,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 5,
max_rank: 5,
},
),
],
),
(
4,
[
Words(
Words {
matching_words: 4,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 3,
max_rank: 5,
},
),
],
),
(
13,
[
Words(
Words {
matching_words: 3,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 4,
max_rank: 4,
},
),
],
),
(
3,
[
Words(
Words {
matching_words: 3,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 4,
},
),
],
),
(
12,
[
Words(
Words {
matching_words: 2,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 3,
max_rank: 3,
},
),
],
),
(
2,
[
Words(
Words {
matching_words: 2,
max_matching_words: 9,
},
),
ExactAttribute(
NoExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 3,
},
),
],
),
(
1,
[
Words(
Words {
matching_words: 1,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 2,
},
),
],
),
(
11,
[
Words(
Words {
matching_words: 1,
max_matching_words: 9,
},
),
ExactAttribute(
ExactMatch,
),
Exactness(
Rank {
rank: 2,
max_rank: 2,
},
),
],
),
]

View File

@@ -1,168 +0,0 @@
---
source: milli/src/search/new/tests/geo_sort.rs
expression: "format!(\"{scores:#?}\")"
---
[
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
0.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
1.0,
1.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
2.0,
-1.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
-2.0,
-2.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
3.0,
5.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
6.0,
-5.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: None,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: None,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: None,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: None,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: None,
},
),
],
]

View File

@@ -1,168 +0,0 @@
---
source: milli/src/search/new/tests/geo_sort.rs
expression: "format!(\"{scores:#?}\")"
---
[
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
6.0,
-5.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
3.0,
5.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
-2.0,
-2.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
2.0,
-1.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
1.0,
1.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
0.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: None,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: None,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: None,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: None,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: None,
},
),
],
]

View File

@@ -1,91 +0,0 @@
---
source: milli/src/search/new/tests/geo_sort.rs
expression: "format!(\"{scores:#?}\")"
---
[
[
GeoSort(
GeoSort {
target_point: [
0.0,
-175.0,
],
ascending: true,
value: Some(
[
0.0,
-179.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
-175.0,
],
ascending: true,
value: Some(
[
0.0,
178.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
-175.0,
],
ascending: true,
value: Some(
[
-89.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
-175.0,
],
ascending: true,
value: Some(
[
88.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
-175.0,
],
ascending: true,
value: Some(
[
0.0,
0.0,
],
),
},
),
],
]

View File

@@ -1,91 +0,0 @@
---
source: milli/src/search/new/tests/geo_sort.rs
expression: "format!(\"{scores:#?}\")"
---
[
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
0.0,
-179.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
0.0,
178.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
-89.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
88.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
0.0,
0.0,
],
),
},
),
],
]

View File

@@ -1,91 +0,0 @@
---
source: milli/src/search/new/tests/geo_sort.rs
expression: "format!(\"{scores:#?}\")"
---
[
[
GeoSort(
GeoSort {
target_point: [
85.0,
0.0,
],
ascending: false,
value: Some(
[
-89.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
85.0,
0.0,
],
ascending: false,
value: Some(
[
0.0,
-179.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
85.0,
0.0,
],
ascending: false,
value: Some(
[
0.0,
178.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
85.0,
0.0,
],
ascending: false,
value: Some(
[
0.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
85.0,
0.0,
],
ascending: false,
value: Some(
[
88.0,
0.0,
],
),
},
),
],
]

View File

@@ -1,91 +0,0 @@
---
source: milli/src/search/new/tests/geo_sort.rs
expression: "format!(\"{scores:#?}\")"
---
[
[
GeoSort(
GeoSort {
target_point: [
-85.0,
0.0,
],
ascending: false,
value: Some(
[
88.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
-85.0,
0.0,
],
ascending: false,
value: Some(
[
0.0,
-179.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
-85.0,
0.0,
],
ascending: false,
value: Some(
[
0.0,
178.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
-85.0,
0.0,
],
ascending: false,
value: Some(
[
0.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
-85.0,
0.0,
],
ascending: false,
value: Some(
[
-89.0,
0.0,
],
),
},
),
],
]

View File

@@ -1,91 +0,0 @@
---
source: milli/src/search/new/tests/geo_sort.rs
expression: "format!(\"{scores:#?}\")"
---
[
[
GeoSort(
GeoSort {
target_point: [
0.0,
175.0,
],
ascending: false,
value: Some(
[
0.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
175.0,
],
ascending: false,
value: Some(
[
88.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
175.0,
],
ascending: false,
value: Some(
[
-89.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
175.0,
],
ascending: false,
value: Some(
[
0.0,
-179.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
175.0,
],
ascending: false,
value: Some(
[
0.0,
178.0,
],
),
},
),
],
]

View File

@@ -1,91 +0,0 @@
---
source: milli/src/search/new/tests/geo_sort.rs
expression: "format!(\"{scores:#?}\")"
---
[
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
0.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
88.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
-89.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
0.0,
178.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
0.0,
-179.0,
],
),
},
),
],
]

View File

@@ -1,91 +0,0 @@
---
source: milli/src/search/new/tests/geo_sort.rs
expression: "format!(\"{scores:#?}\")"
---
[
[
GeoSort(
GeoSort {
target_point: [
0.0,
-175.0,
],
ascending: false,
value: Some(
[
0.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
-175.0,
],
ascending: false,
value: Some(
[
88.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
-175.0,
],
ascending: false,
value: Some(
[
-89.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
-175.0,
],
ascending: false,
value: Some(
[
0.0,
178.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
-175.0,
],
ascending: false,
value: Some(
[
0.0,
-179.0,
],
),
},
),
],
]

View File

@@ -1,91 +0,0 @@
---
source: milli/src/search/new/tests/geo_sort.rs
expression: "format!(\"{scores:#?}\")"
---
[
[
GeoSort(
GeoSort {
target_point: [
85.0,
0.0,
],
ascending: true,
value: Some(
[
88.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
85.0,
0.0,
],
ascending: true,
value: Some(
[
0.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
85.0,
0.0,
],
ascending: true,
value: Some(
[
0.0,
178.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
85.0,
0.0,
],
ascending: true,
value: Some(
[
0.0,
-179.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
85.0,
0.0,
],
ascending: true,
value: Some(
[
-89.0,
0.0,
],
),
},
),
],
]

View File

@@ -1,91 +0,0 @@
---
source: milli/src/search/new/tests/geo_sort.rs
expression: "format!(\"{scores:#?}\")"
---
[
[
GeoSort(
GeoSort {
target_point: [
-85.0,
0.0,
],
ascending: true,
value: Some(
[
-89.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
-85.0,
0.0,
],
ascending: true,
value: Some(
[
0.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
-85.0,
0.0,
],
ascending: true,
value: Some(
[
0.0,
178.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
-85.0,
0.0,
],
ascending: true,
value: Some(
[
0.0,
-179.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
-85.0,
0.0,
],
ascending: true,
value: Some(
[
88.0,
0.0,
],
),
},
),
],
]

View File

@@ -1,91 +0,0 @@
---
source: milli/src/search/new/tests/geo_sort.rs
expression: "format!(\"{scores:#?}\")"
---
[
[
GeoSort(
GeoSort {
target_point: [
0.0,
175.0,
],
ascending: true,
value: Some(
[
0.0,
178.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
175.0,
],
ascending: true,
value: Some(
[
0.0,
-179.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
175.0,
],
ascending: true,
value: Some(
[
-89.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
175.0,
],
ascending: true,
value: Some(
[
88.0,
0.0,
],
),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
175.0,
],
ascending: true,
value: Some(
[
0.0,
0.0,
],
),
},
),
],
]

View File

@@ -1,75 +0,0 @@
---
source: milli/src/search/new/tests/geo_sort.rs
expression: "format!(\"{scores:#?}\")"
---
[
[
Words(
Words {
matching_words: 1,
max_matching_words: 1,
},
),
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
0.0,
0.0,
],
),
},
),
],
[
Words(
Words {
matching_words: 1,
max_matching_words: 1,
},
),
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
-89.0,
0.0,
],
),
},
),
],
[
Words(
Words {
matching_words: 1,
max_matching_words: 1,
},
),
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
0.0,
178.0,
],
),
},
),
],
]

View File

@@ -1,60 +0,0 @@
---
source: milli/src/search/new/tests/geo_sort.rs
expression: "format!(\"{scores:#?}\")"
---
[
[
Words(
Words {
matching_words: 1,
max_matching_words: 1,
},
),
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: None,
},
),
],
[
Words(
Words {
matching_words: 1,
max_matching_words: 1,
},
),
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: None,
},
),
],
[
Words(
Words {
matching_words: 1,
max_matching_words: 1,
},
),
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: None,
},
),
],
]

Some files were not shown because too many files have changed in this diff Show More