mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-27 08:12:36 +00:00
Compare commits
1 Commits
expose-sta
...
check-wind
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
40215bfec3 |
18
.github/workflows/test-suite.yml
vendored
18
.github/workflows/test-suite.yml
vendored
@@ -56,6 +56,12 @@ jobs:
|
||||
matrix:
|
||||
os: [macos-12, windows-2022]
|
||||
steps:
|
||||
- name: Check free disk space on C
|
||||
run: |
|
||||
fsutil volume diskfree c:
|
||||
- name: Check free disk space on D
|
||||
run: |
|
||||
fsutil volume diskfree d:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.7.1
|
||||
@@ -63,11 +69,23 @@ jobs:
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Check free disk space on C
|
||||
run: |
|
||||
fsutil volume diskfree c:
|
||||
- name: Check free disk space on D
|
||||
run: |
|
||||
fsutil volume diskfree d:
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --locked --release --no-default-features --all
|
||||
- name: Check free disk space on C
|
||||
run: |
|
||||
fsutil volume diskfree c:
|
||||
- name: Check free disk space on D
|
||||
run: |
|
||||
fsutil volume diskfree d:
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
|
||||
34
Cargo.lock
generated
34
Cargo.lock
generated
@@ -500,7 +500,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.9.0"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@@ -645,7 +645,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "build-info"
|
||||
version = "1.9.0"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"time",
|
||||
@@ -1545,7 +1545,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.9.0"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@@ -1793,7 +1793,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-store"
|
||||
version = "1.9.0"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"faux",
|
||||
"tempfile",
|
||||
@@ -1816,7 +1816,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "1.9.0"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"nom",
|
||||
@@ -1836,7 +1836,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "1.9.0"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -1954,7 +1954,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fuzzers"
|
||||
version = "1.9.0"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"clap",
|
||||
@@ -2447,7 +2447,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
|
||||
|
||||
[[package]]
|
||||
name = "index-scheduler"
|
||||
version = "1.9.0"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@@ -2642,7 +2642,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "json-depth-checker"
|
||||
version = "1.9.0"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -3272,7 +3272,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "meili-snap"
|
||||
version = "1.9.0"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"md5",
|
||||
@@ -3281,7 +3281,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch"
|
||||
version = "1.9.0"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-http",
|
||||
@@ -3373,7 +3373,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-auth"
|
||||
version = "1.9.0"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"base64 0.21.7",
|
||||
"enum-iterator",
|
||||
@@ -3392,7 +3392,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-types"
|
||||
version = "1.9.0"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
@@ -3422,7 +3422,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilitool"
|
||||
version = "1.9.0"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
@@ -3461,7 +3461,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "1.9.0"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"arroy",
|
||||
"big_s",
|
||||
@@ -3901,7 +3901,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
||||
|
||||
[[package]]
|
||||
name = "permissive-json-pointer"
|
||||
version = "1.9.0"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"serde_json",
|
||||
@@ -6052,7 +6052,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "xtask"
|
||||
version = "1.9.0"
|
||||
version = "1.8.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"build-info",
|
||||
|
||||
@@ -22,7 +22,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.9.0"
|
||||
version = "1.8.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
|
||||
16
README.md
16
README.md
@@ -25,7 +25,7 @@
|
||||
|
||||
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
|
||||
|
||||
[Meilisearch](https://www.meilisearch.com) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.
|
||||
Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
|
||||
|
||||
<p align="center" name="demo">
|
||||
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
|
||||
@@ -39,8 +39,8 @@
|
||||
🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥
|
||||
|
||||
## ✨ Features
|
||||
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search) & full-text search to get the most relevant results
|
||||
- **Search-as-you-type:** find & display results in less than 50 milliseconds to provide an intuitive experience
|
||||
|
||||
- **Search-as-you-type:** find search results in less than 50 milliseconds
|
||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
|
||||
@@ -55,15 +55,15 @@
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
You can consult Meilisearch's documentation at [meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs).
|
||||
You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs).
|
||||
|
||||
## 🚀 Getting started
|
||||
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
|
||||
|
||||
## 🌍 Supercharge your Meilisearch experience
|
||||
## ⚡ Supercharge your Meilisearch experience
|
||||
|
||||
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Additional features include analytics & monitoring in many regions around the world. No credit card is required.
|
||||
Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). No credit card required.
|
||||
|
||||
## 🧰 SDKs & integration tools
|
||||
|
||||
@@ -85,13 +85,13 @@ Finally, for more in-depth information, refer to our articles explaining fundame
|
||||
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
|
||||
|
||||
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Remember to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
|
||||
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
|
||||
|
||||
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) of our documentation.
|
||||
|
||||
## 📫 Get in touch!
|
||||
|
||||
Meilisearch is a search engine created by [Meili]([https://www.welcometothejungle.com/en/companies/meilisearch](https://www.meilisearch.com/careers)), a software development company headquartered in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
|
||||
Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
|
||||
|
||||
🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
|
||||
|
||||
|
||||
@@ -46,10 +46,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.route(web::delete().to(SeqHandler(delete_index))),
|
||||
)
|
||||
.service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
|
||||
.service(
|
||||
web::resource("/advanced-stats")
|
||||
.route(web::get().to(SeqHandler(get_advanced_index_stats))),
|
||||
)
|
||||
.service(web::scope("/documents").configure(documents::configure))
|
||||
.service(web::scope("/search").configure(search::configure))
|
||||
.service(web::scope("/facet-search").configure(facet_search::configure))
|
||||
@@ -282,16 +278,3 @@ pub async fn get_index_stats(
|
||||
debug!(returns = ?stats, "Get index stats");
|
||||
Ok(HttpResponse::Ok().json(stats))
|
||||
}
|
||||
|
||||
pub async fn get_advanced_index_stats(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let rtxn = index.read_txn()?;
|
||||
let advanced_stats = index.advanced_stats(&rtxn)?;
|
||||
|
||||
debug!(returns = ?advanced_stats, "Get advanced index stats");
|
||||
Ok(HttpResponse::Ok().json(advanced_stats))
|
||||
}
|
||||
|
||||
@@ -477,8 +477,6 @@ pub enum MatchingStrategy {
|
||||
Last,
|
||||
/// All query words are mandatory
|
||||
All,
|
||||
/// Remove query words from the most frequent to the least
|
||||
Frequency,
|
||||
}
|
||||
|
||||
impl Default for MatchingStrategy {
|
||||
@@ -492,7 +490,6 @@ impl From<MatchingStrategy> for TermsMatchingStrategy {
|
||||
match other {
|
||||
MatchingStrategy::Last => Self::Last,
|
||||
MatchingStrategy::All => Self::All,
|
||||
MatchingStrategy::Frequency => Self::Frequency,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -505,7 +505,7 @@ async fn search_bad_matching_strategy() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown value `doggo` at `.matchingStrategy`: expected one of `last`, `all`, `frequency`",
|
||||
"message": "Unknown value `doggo` at `.matchingStrategy`: expected one of `last`, `all`",
|
||||
"code": "invalid_search_matching_strategy",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_matching_strategy"
|
||||
@@ -527,7 +527,7 @@ async fn search_bad_matching_strategy() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown value `doggo` for parameter `matchingStrategy`: expected one of `last`, `all`, `frequency`",
|
||||
"message": "Unknown value `doggo` for parameter `matchingStrategy`: expected one of `last`, `all`",
|
||||
"code": "invalid_search_matching_strategy",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_matching_strategy"
|
||||
|
||||
@@ -117,69 +117,3 @@ async fn geo_bounding_box_with_string_and_number() {
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn bug_4640() {
|
||||
// https://github.com/meilisearch/meilisearch/issues/4640
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.update_settings_filterable_attributes(json!(["_geo"])).await;
|
||||
let (ret, _code) = index.update_settings_sortable_attributes(json!(["_geo"])).await;
|
||||
index.wait_task(ret.uid()).await;
|
||||
|
||||
// Sort the document with the second one first
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"sort": ["_geoPoint(45.4777599, 9.1967508):asc"],
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": 2,
|
||||
"name": "La Bella Italia",
|
||||
"address": "456 Elm Street, Townsville",
|
||||
"type": "Italian",
|
||||
"rating": 9,
|
||||
"_geo": {
|
||||
"lat": "45.4777599",
|
||||
"lng": "9.1967508"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"name": "Taco Truck",
|
||||
"address": "444 Salsa Street, Burritoville",
|
||||
"type": "Mexican",
|
||||
"rating": 9,
|
||||
"_geo": {
|
||||
"lat": 34.0522,
|
||||
"lng": -118.2437
|
||||
},
|
||||
"_geoDistance": 9714063
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "Crêpe Truck",
|
||||
"address": "2 Billig Avenue, Rouenville",
|
||||
"type": "French",
|
||||
"rating": 10
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 3
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -1,128 +0,0 @@
|
||||
use meili_snap::snapshot;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use crate::common::index::Index;
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(documents.clone(), None).await;
|
||||
index.wait_task(0).await;
|
||||
index
|
||||
}
|
||||
|
||||
static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"id": "1",
|
||||
},
|
||||
{
|
||||
"title": "Captain Planet",
|
||||
"id": "2",
|
||||
},
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"id": "3",
|
||||
},
|
||||
{
|
||||
"title": "a Captain Marvel ersatz",
|
||||
"id": "4"
|
||||
},
|
||||
{
|
||||
"title": "He's not part of the Marvel Cinematic Universe",
|
||||
"id": "5"
|
||||
},
|
||||
{
|
||||
"title": "a Shazam ersatz, but better than Captain Planet",
|
||||
"id": "6"
|
||||
},
|
||||
{
|
||||
"title": "Capitain CAAAAAVEEERNE!!!!",
|
||||
"id": "7"
|
||||
}
|
||||
])
|
||||
});
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_search() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "Captain Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"2"},{"id":"6"},{"id":"7"}]"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "Captain Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"}]"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "Captain Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_typo() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "Capitain Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"7"},{"id":"2"},{"id":"6"}]"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "Capitain Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"}]"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "Capitain Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_unknown_word() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "last", "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"id":"2"},{"id":"3"},{"id":"4"},{"id":"6"},{"id":"7"}]"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "all", "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @"[]");
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "Captain Supercopter Marvel", "matchingStrategy": "frequency", "attributesToRetrieve": ["id"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"id":"3"},{"id":"4"},{"id":"5"}]"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
@@ -7,7 +7,6 @@ mod facet_search;
|
||||
mod formatted;
|
||||
mod geo;
|
||||
mod hybrid;
|
||||
mod matching_strategy;
|
||||
mod multi;
|
||||
mod pagination;
|
||||
mod restrict_searchable;
|
||||
|
||||
@@ -31,7 +31,6 @@ macro_rules! verify_snapshot {
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[cfg_attr(target_os = "windows", ignore)]
|
||||
async fn perform_snapshot() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let snapshot_dir = tempfile::tempdir().unwrap();
|
||||
|
||||
@@ -9,7 +9,6 @@ use heed::types::*;
|
||||
use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
|
||||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
use serde::Serialize;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::documents::PrimaryKey;
|
||||
@@ -325,87 +324,6 @@ impl Index {
|
||||
self.env.info().map_size
|
||||
}
|
||||
|
||||
pub fn advanced_stats(&self, rtxn: &heed::RoTxn) -> Result<AdvancedStats> {
|
||||
use db_name::*;
|
||||
|
||||
let mut dbs = BTreeMap::new();
|
||||
dbs.insert(WORD_DOCIDS, advanced_database_stats(rtxn, self.word_docids)?);
|
||||
dbs.insert(
|
||||
WORD_PAIR_PROXIMITY_DOCIDS,
|
||||
advanced_database_stats(rtxn, self.word_pair_proximity_docids)?,
|
||||
);
|
||||
dbs.insert(WORD_PREFIX_DOCIDS, advanced_database_stats(rtxn, self.word_prefix_docids)?);
|
||||
dbs.insert(WORD_FIELD_ID_DOCIDS, advanced_database_stats(rtxn, self.word_fid_docids)?);
|
||||
dbs.insert(WORD_POSITION_DOCIDS, advanced_database_stats(rtxn, self.word_position_docids)?);
|
||||
dbs.insert(DOCUMENTS, advanced_database_stats_no_bitmap(rtxn, self.documents)?);
|
||||
|
||||
fn advanced_database_stats<KC>(
|
||||
rtxn: &heed::RoTxn,
|
||||
db: Database<KC, CboRoaringBitmapCodec>,
|
||||
) -> Result<AdvancedDatabaseStats> {
|
||||
let db = db.remap_key_type::<Bytes>().lazily_decode_data();
|
||||
|
||||
let mut entries_count = 0;
|
||||
let mut total_bitmap_size = 0;
|
||||
let mut total_bitmap_len = 0;
|
||||
let mut total_key_size = 0;
|
||||
|
||||
for result in db.iter(rtxn)? {
|
||||
let (bytes_key, lazy_value) = result?;
|
||||
entries_count += 1;
|
||||
total_bitmap_size += lazy_value.remap::<Bytes>().decode().unwrap().len();
|
||||
let bitmap = lazy_value.decode().map_err(heed::Error::Decoding)?;
|
||||
total_bitmap_len += bitmap.len();
|
||||
total_key_size += bytes_key.len();
|
||||
}
|
||||
|
||||
Ok(AdvancedDatabaseStats {
|
||||
entries_count,
|
||||
average_bitmap_len: Some(total_bitmap_len as f64 / entries_count as f64),
|
||||
median_bitmap_len: None,
|
||||
average_value_size: Some(total_bitmap_size as f64 / entries_count as f64),
|
||||
median_value_size: None,
|
||||
average_key_size: Some(total_key_size as f64 / entries_count as f64),
|
||||
median_key_size: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn advanced_database_stats_no_bitmap<KC, DC>(
|
||||
rtxn: &heed::RoTxn,
|
||||
db: Database<KC, DC>,
|
||||
) -> Result<AdvancedDatabaseStats> {
|
||||
let db = db.remap_types::<Bytes, Bytes>();
|
||||
|
||||
let mut entries_count = 0;
|
||||
let mut total_value_size = 0;
|
||||
let mut total_key_size = 0;
|
||||
|
||||
for result in db.iter(rtxn)? {
|
||||
let (bytes_key, bytes_value) = result?;
|
||||
entries_count += 1;
|
||||
total_value_size += bytes_value.len();
|
||||
total_key_size += bytes_key.len();
|
||||
}
|
||||
|
||||
Ok(AdvancedDatabaseStats {
|
||||
entries_count,
|
||||
average_bitmap_len: None,
|
||||
median_bitmap_len: None,
|
||||
average_value_size: Some(total_value_size as f64 / entries_count as f64),
|
||||
median_value_size: None,
|
||||
average_key_size: Some(total_key_size as f64 / entries_count as f64),
|
||||
median_key_size: None,
|
||||
})
|
||||
}
|
||||
|
||||
Ok(AdvancedStats {
|
||||
map_size: self.map_size(),
|
||||
non_free_pages_size: self.on_disk_size()?,
|
||||
on_disk_size: self.on_disk_size()?,
|
||||
databases: dbs,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn copy_to_file<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> {
|
||||
self.env.copy_to_file(path, option).map_err(Into::into)
|
||||
}
|
||||
@@ -1744,36 +1662,6 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct AdvancedStats {
|
||||
/// Size of the data memory map.
|
||||
map_size: usize,
|
||||
/// Returns the size used by all the databases in the environment without the free pages.
|
||||
non_free_pages_size: u64,
|
||||
/// The size of the data file on disk.
|
||||
on_disk_size: u64,
|
||||
/// Databases advanced stats.
|
||||
databases: BTreeMap<&'static str, AdvancedDatabaseStats>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct AdvancedDatabaseStats {
|
||||
/// The number of entries in this database.
|
||||
entries_count: usize,
|
||||
/// The average number of entries in the bitmaps of this database.
|
||||
average_bitmap_len: Option<f64>,
|
||||
/// The median number of entries in the bitmaps of this database.
|
||||
median_bitmap_len: Option<f64>,
|
||||
/// The average size of values of this database.
|
||||
average_value_size: Option<f64>,
|
||||
/// The median size of values of this database.
|
||||
median_value_size: Option<f64>,
|
||||
/// The average size of keys of this database.
|
||||
average_key_size: Option<f64>,
|
||||
/// The mediane size of keys of this database.
|
||||
median_key_size: Option<f64>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use std::collections::HashSet;
|
||||
|
||||
@@ -277,8 +277,6 @@ pub enum TermsMatchingStrategy {
|
||||
Last,
|
||||
// all words are mandatory
|
||||
All,
|
||||
// remove more frequent word first
|
||||
Frequency,
|
||||
}
|
||||
|
||||
impl Default for TermsMatchingStrategy {
|
||||
|
||||
@@ -164,21 +164,6 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
}
|
||||
costs
|
||||
}
|
||||
TermsMatchingStrategy::Frequency => {
|
||||
let removal_order =
|
||||
query_graph.removal_order_for_terms_matching_strategy_frequency(ctx)?;
|
||||
let mut forbidden_nodes =
|
||||
SmallBitmap::for_interned_values_in(&query_graph.nodes);
|
||||
let mut costs = query_graph.nodes.map(|_| None);
|
||||
// FIXME: this works because only words uses termsmatchingstrategy at the moment.
|
||||
for ns in removal_order {
|
||||
for n in ns.iter() {
|
||||
*costs.get_mut(n) = Some((1, forbidden_nodes.clone()));
|
||||
}
|
||||
forbidden_nodes.union(&ns);
|
||||
}
|
||||
costs
|
||||
}
|
||||
TermsMatchingStrategy::All => query_graph.nodes.map(|_| None),
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -197,11 +197,6 @@ fn resolve_maximally_reduced_query_graph(
|
||||
.iter()
|
||||
.flat_map(|x| x.iter())
|
||||
.collect(),
|
||||
TermsMatchingStrategy::Frequency => query_graph
|
||||
.removal_order_for_terms_matching_strategy_frequency(ctx)?
|
||||
.iter()
|
||||
.flat_map(|x| x.iter())
|
||||
.collect(),
|
||||
TermsMatchingStrategy::All => vec![],
|
||||
};
|
||||
graph.remove_nodes_keep_edges(&nodes_to_remove);
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
use std::cmp::{Ordering, Reverse};
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::BTreeMap;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
use fxhash::{FxHashMap, FxHasher};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::interner::{FixedSizeInterner, Interned};
|
||||
use super::query_term::{
|
||||
@@ -12,7 +11,6 @@ use super::query_term::{
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
use super::SearchContext;
|
||||
use crate::search::new::interner::Interner;
|
||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
||||
use crate::Result;
|
||||
|
||||
/// A node of the [`QueryGraph`].
|
||||
@@ -292,49 +290,6 @@ impl QueryGraph {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn removal_order_for_terms_matching_strategy_frequency(
|
||||
&self,
|
||||
ctx: &mut SearchContext,
|
||||
) -> Result<Vec<SmallBitmap<QueryNode>>> {
|
||||
// lookup frequency for each term
|
||||
let mut term_with_frequency: Vec<(u8, u64)> = {
|
||||
let mut term_docids: BTreeMap<u8, RoaringBitmap> = Default::default();
|
||||
for (_, node) in self.nodes.iter() {
|
||||
match &node.data {
|
||||
QueryNodeData::Term(t) => {
|
||||
let docids = compute_query_term_subset_docids(ctx, &t.term_subset)?;
|
||||
for id in t.term_ids.clone() {
|
||||
term_docids
|
||||
.entry(id)
|
||||
.and_modify(|curr| *curr |= &docids)
|
||||
.or_insert_with(|| docids.clone());
|
||||
}
|
||||
}
|
||||
QueryNodeData::Deleted | QueryNodeData::Start | QueryNodeData::End => continue,
|
||||
}
|
||||
}
|
||||
term_docids
|
||||
.into_iter()
|
||||
.map(|(idx, docids)| match docids.len() {
|
||||
0 => (idx, u64::max_value()),
|
||||
frequency => (idx, frequency),
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
term_with_frequency.sort_by_key(|(_, frequency)| Reverse(*frequency));
|
||||
let mut term_weight = BTreeMap::new();
|
||||
let mut weight: u16 = 1;
|
||||
let mut peekable = term_with_frequency.into_iter().peekable();
|
||||
while let Some((idx, frequency)) = peekable.next() {
|
||||
term_weight.insert(idx, weight);
|
||||
if peekable.peek().map_or(false, |(_, f)| frequency != *f) {
|
||||
weight += 1;
|
||||
}
|
||||
}
|
||||
let cost_of_term_idx = move |term_idx: u8| *term_weight.get(&term_idx).unwrap();
|
||||
Ok(self.removal_order_for_terms_matching_strategy(ctx, cost_of_term_idx))
|
||||
}
|
||||
|
||||
pub fn removal_order_for_terms_matching_strategy_last(
|
||||
&self,
|
||||
ctx: &SearchContext,
|
||||
@@ -360,19 +315,10 @@ impl QueryGraph {
|
||||
if first_term_idx >= last_term_idx {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let cost_of_term_idx = |term_idx: u8| {
|
||||
let rank = 1 + last_term_idx - term_idx;
|
||||
rank as u16
|
||||
};
|
||||
self.removal_order_for_terms_matching_strategy(ctx, cost_of_term_idx)
|
||||
}
|
||||
|
||||
pub fn removal_order_for_terms_matching_strategy(
|
||||
&self,
|
||||
ctx: &SearchContext,
|
||||
order: impl Fn(u8) -> u16,
|
||||
) -> Vec<SmallBitmap<QueryNode>> {
|
||||
let mut nodes_to_remove = BTreeMap::<u16, SmallBitmap<QueryNode>>::new();
|
||||
let mut at_least_one_mandatory_term = false;
|
||||
for (node_id, node) in self.nodes.iter() {
|
||||
@@ -383,7 +329,7 @@ impl QueryGraph {
|
||||
}
|
||||
let mut cost = 0;
|
||||
for id in t.term_ids.clone() {
|
||||
cost = std::cmp::max(cost, order(id));
|
||||
cost = std::cmp::max(cost, cost_of_term_idx(id));
|
||||
}
|
||||
nodes_to_remove
|
||||
.entry(cost)
|
||||
|
||||
@@ -45,6 +45,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
) -> Result<ExtractedFacetValues> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -124,18 +125,12 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
add_exists.insert(document);
|
||||
}
|
||||
|
||||
let del_geo_support = settings_diff
|
||||
.old
|
||||
.geo_fields_ids
|
||||
.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
|
||||
let add_geo_support = settings_diff
|
||||
.new
|
||||
.geo_fields_ids
|
||||
.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
|
||||
let geo_support =
|
||||
geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
|
||||
let del_filterable_values =
|
||||
del_value.map(|value| extract_facet_values(&value, del_geo_support));
|
||||
del_value.map(|value| extract_facet_values(&value, geo_support));
|
||||
let add_filterable_values =
|
||||
add_value.map(|value| extract_facet_values(&value, add_geo_support));
|
||||
add_value.map(|value| extract_facet_values(&value, geo_support));
|
||||
|
||||
// Those closures are just here to simplify things a bit.
|
||||
let mut insert_numbers_diff = |del_numbers, add_numbers| {
|
||||
|
||||
@@ -8,7 +8,6 @@ use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
||||
use crate::error::GeoError;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::index_documents::extract_finite_float_from_value;
|
||||
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
||||
use crate::{FieldId, InternalError, Result};
|
||||
|
||||
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
|
||||
@@ -19,7 +18,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
||||
obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
primary_key_id: FieldId,
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
(lat_fid, lng_fid): (FieldId, FieldId),
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
let mut writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
@@ -39,27 +38,47 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
||||
serde_json::from_slice(document_id).unwrap()
|
||||
};
|
||||
|
||||
// extract old version
|
||||
let del_lat_lng =
|
||||
extract_lat_lng(&obkv, &settings_diff.old, DelAdd::Deletion, document_id)?;
|
||||
// extract new version
|
||||
let add_lat_lng =
|
||||
extract_lat_lng(&obkv, &settings_diff.new, DelAdd::Addition, document_id)?;
|
||||
// first we get the two fields
|
||||
match (obkv.get(lat_fid), obkv.get(lng_fid)) {
|
||||
(Some(lat), Some(lng)) => {
|
||||
let deladd_lat_obkv = KvReaderDelAdd::new(lat);
|
||||
let deladd_lng_obkv = KvReaderDelAdd::new(lng);
|
||||
|
||||
if del_lat_lng != add_lat_lng {
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
if let Some([lat, lng]) = del_lat_lng {
|
||||
#[allow(clippy::drop_non_drop)]
|
||||
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||
obkv.insert(DelAdd::Deletion, bytes)?;
|
||||
// then we extract the values
|
||||
let del_lat_lng = deladd_lat_obkv
|
||||
.get(DelAdd::Deletion)
|
||||
.zip(deladd_lng_obkv.get(DelAdd::Deletion))
|
||||
.map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
|
||||
.transpose()?;
|
||||
let add_lat_lng = deladd_lat_obkv
|
||||
.get(DelAdd::Addition)
|
||||
.zip(deladd_lng_obkv.get(DelAdd::Addition))
|
||||
.map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
|
||||
.transpose()?;
|
||||
|
||||
if del_lat_lng != add_lat_lng {
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
if let Some([lat, lng]) = del_lat_lng {
|
||||
#[allow(clippy::drop_non_drop)]
|
||||
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||
obkv.insert(DelAdd::Deletion, bytes)?;
|
||||
}
|
||||
if let Some([lat, lng]) = add_lat_lng {
|
||||
#[allow(clippy::drop_non_drop)]
|
||||
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||
obkv.insert(DelAdd::Addition, bytes)?;
|
||||
}
|
||||
let bytes = obkv.into_inner()?;
|
||||
writer.insert(docid_bytes, bytes)?;
|
||||
}
|
||||
}
|
||||
if let Some([lat, lng]) = add_lat_lng {
|
||||
#[allow(clippy::drop_non_drop)]
|
||||
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||
obkv.insert(DelAdd::Addition, bytes)?;
|
||||
(None, Some(_)) => {
|
||||
return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
|
||||
}
|
||||
let bytes = obkv.into_inner()?;
|
||||
writer.insert(docid_bytes, bytes)?;
|
||||
(Some(_), None) => {
|
||||
return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
|
||||
}
|
||||
(None, None) => (),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,37 +86,16 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
||||
}
|
||||
|
||||
/// Extract the finite floats lat and lng from two bytes slices.
|
||||
fn extract_lat_lng(
|
||||
document: &obkv::KvReader<FieldId>,
|
||||
settings: &InnerIndexSettings,
|
||||
deladd: DelAdd,
|
||||
document_id: impl Fn() -> Value,
|
||||
) -> Result<Option<[f64; 2]>> {
|
||||
match settings.geo_fields_ids {
|
||||
Some((lat_fid, lng_fid)) => {
|
||||
let lat = document.get(lat_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
|
||||
let lng = document.get(lng_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
|
||||
let (lat, lng) = match (lat, lng) {
|
||||
(Some(lat), Some(lng)) => (lat, lng),
|
||||
(Some(_), None) => {
|
||||
return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
|
||||
}
|
||||
(None, Some(_)) => {
|
||||
return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
|
||||
}
|
||||
(None, None) => return Ok(None),
|
||||
};
|
||||
let lat = extract_finite_float_from_value(
|
||||
serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
|
||||
)
|
||||
.map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
|
||||
fn extract_lat_lng(lat: &[u8], lng: &[u8], document_id: impl Fn() -> Value) -> Result<[f64; 2]> {
|
||||
let lat = extract_finite_float_from_value(
|
||||
serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
|
||||
)
|
||||
.map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
|
||||
|
||||
let lng = extract_finite_float_from_value(
|
||||
serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
|
||||
)
|
||||
.map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
|
||||
Ok(Some([lat, lng]))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
let lng = extract_finite_float_from_value(
|
||||
serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
|
||||
)
|
||||
.map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
|
||||
|
||||
Ok([lat, lng])
|
||||
}
|
||||
|
||||
@@ -43,6 +43,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
indexer: GrenadParameters,
|
||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||
primary_key_id: FieldId,
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
settings_diff: Arc<InnerIndexSettingsDiff>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<()> {
|
||||
@@ -69,6 +70,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
primary_key_id,
|
||||
geo_fields_ids,
|
||||
settings_diff.clone(),
|
||||
max_positions_per_attributes,
|
||||
)
|
||||
@@ -291,6 +293,7 @@ fn send_and_extract_flattened_documents_data(
|
||||
indexer: GrenadParameters,
|
||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||
primary_key_id: FieldId,
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
settings_diff: Arc<InnerIndexSettingsDiff>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(
|
||||
@@ -300,13 +303,12 @@ fn send_and_extract_flattened_documents_data(
|
||||
let flattened_documents_chunk =
|
||||
flattened_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
|
||||
|
||||
if settings_diff.run_geo_indexing() {
|
||||
if let Some(geo_fields_ids) = geo_fields_ids {
|
||||
let documents_chunk_cloned = flattened_documents_chunk.clone();
|
||||
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
||||
let settings_diff = settings_diff.clone();
|
||||
rayon::spawn(move || {
|
||||
let result =
|
||||
extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, &settings_diff);
|
||||
extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, geo_fields_ids);
|
||||
let _ = match result {
|
||||
Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
|
||||
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
|
||||
@@ -345,6 +347,7 @@ fn send_and_extract_flattened_documents_data(
|
||||
flattened_documents_chunk.clone(),
|
||||
indexer,
|
||||
&settings_diff,
|
||||
geo_fields_ids,
|
||||
)?;
|
||||
|
||||
// send fid_docid_facet_numbers_chunk to DB writer
|
||||
|
||||
@@ -315,6 +315,28 @@ where
|
||||
// get the primary key field id
|
||||
let primary_key_id = settings_diff.new.fields_ids_map.id(&primary_key).unwrap();
|
||||
|
||||
// get the fid of the `_geo.lat` and `_geo.lng` fields.
|
||||
let mut field_id_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
|
||||
// self.index.fields_ids_map($a)? ==>> field_id_map
|
||||
let geo_fields_ids = match field_id_map.id("_geo") {
|
||||
Some(gfid) => {
|
||||
let is_sortable = self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid);
|
||||
let is_filterable = self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid);
|
||||
// if `_geo` is faceted then we get the `lat` and `lng`
|
||||
if is_sortable || is_filterable {
|
||||
let field_ids = field_id_map
|
||||
.insert("_geo.lat")
|
||||
.zip(field_id_map.insert("_geo.lng"))
|
||||
.ok_or(UserError::AttributeLimitReached)?;
|
||||
Some(field_ids)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let pool_params = GrenadParameters {
|
||||
chunk_compression_type: self.indexer_config.chunk_compression_type,
|
||||
chunk_compression_level: self.indexer_config.chunk_compression_level,
|
||||
@@ -398,6 +420,7 @@ where
|
||||
pool_params,
|
||||
lmdb_writer_sx.clone(),
|
||||
primary_key_id,
|
||||
geo_fields_ids,
|
||||
settings_diff.clone(),
|
||||
max_positions_per_attributes,
|
||||
)
|
||||
|
||||
@@ -1142,11 +1142,6 @@ impl InnerIndexSettingsDiff {
|
||||
self.settings_update_only
|
||||
}
|
||||
|
||||
pub fn run_geo_indexing(&self) -> bool {
|
||||
self.old.geo_fields_ids != self.new.geo_fields_ids
|
||||
|| (!self.settings_update_only && self.new.geo_fields_ids.is_some())
|
||||
}
|
||||
|
||||
pub fn modified_faceted_fields(&self) -> HashSet<String> {
|
||||
&self.old.user_defined_faceted_fields ^ &self.new.user_defined_faceted_fields
|
||||
}
|
||||
@@ -1166,7 +1161,6 @@ pub(crate) struct InnerIndexSettings {
|
||||
pub proximity_precision: ProximityPrecision,
|
||||
pub embedding_configs: EmbeddingConfigs,
|
||||
pub existing_fields: HashSet<String>,
|
||||
pub geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
}
|
||||
|
||||
impl InnerIndexSettings {
|
||||
@@ -1175,7 +1169,7 @@ impl InnerIndexSettings {
|
||||
let stop_words = stop_words.map(|sw| sw.map_data(Vec::from).unwrap());
|
||||
let allowed_separators = index.allowed_separators(rtxn)?;
|
||||
let dictionary = index.dictionary(rtxn)?;
|
||||
let mut fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?;
|
||||
let user_defined_searchable_fields =
|
||||
user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect());
|
||||
@@ -1190,24 +1184,6 @@ impl InnerIndexSettings {
|
||||
.into_iter()
|
||||
.filter_map(|(field, count)| (count != 0).then_some(field))
|
||||
.collect();
|
||||
// index.fields_ids_map($a)? ==>> fields_ids_map
|
||||
let geo_fields_ids = match fields_ids_map.id("_geo") {
|
||||
Some(gfid) => {
|
||||
let is_sortable = index.sortable_fields_ids(rtxn)?.contains(&gfid);
|
||||
let is_filterable = index.filterable_fields_ids(rtxn)?.contains(&gfid);
|
||||
// if `_geo` is faceted then we get the `lat` and `lng`
|
||||
if is_sortable || is_filterable {
|
||||
let field_ids = fields_ids_map
|
||||
.insert("_geo.lat")
|
||||
.zip(fields_ids_map.insert("_geo.lng"))
|
||||
.ok_or(UserError::AttributeLimitReached)?;
|
||||
Some(field_ids)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
stop_words,
|
||||
@@ -1222,7 +1198,6 @@ impl InnerIndexSettings {
|
||||
proximity_precision,
|
||||
embedding_configs,
|
||||
existing_fields,
|
||||
geo_fields_ids,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -159,7 +159,6 @@ pub fn expected_order(
|
||||
|
||||
match optional_words {
|
||||
TermsMatchingStrategy::Last => groups.into_iter().flatten().collect(),
|
||||
TermsMatchingStrategy::Frequency => groups.into_iter().flatten().collect(),
|
||||
TermsMatchingStrategy::All => {
|
||||
groups.into_iter().flatten().filter(|d| d.word_rank == 0).collect()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user