mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-18 04:11:07 +00:00
Compare commits
17 Commits
tmp-use-we
...
v1.8.3
Author | SHA1 | Date | |
---|---|---|---|
7d69953267 | |||
7bd1b7ac43 | |||
1ff860e0a8 | |||
5d2b172e79 | |||
e64d0a206e | |||
6254c7cee1 | |||
6c6c4732a1 | |||
3976fe660e | |||
50f8218a5d | |||
19585f1a4f | |||
ba75d23bfe | |||
7fbb3bf8e8 | |||
9066a446a3 | |||
f762307838 | |||
3e94a90722 | |||
fc7e817221 | |||
0f78703b85 |
34
Cargo.lock
generated
34
Cargo.lock
generated
@ -494,7 +494,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.8.0"
|
||||
version = "1.8.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@ -639,7 +639,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "build-info"
|
||||
version = "1.8.0"
|
||||
version = "1.8.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"time",
|
||||
@ -1539,7 +1539,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.8.0"
|
||||
version = "1.8.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@ -1787,7 +1787,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-store"
|
||||
version = "1.8.0"
|
||||
version = "1.8.3"
|
||||
dependencies = [
|
||||
"faux",
|
||||
"tempfile",
|
||||
@ -1810,7 +1810,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "1.8.0"
|
||||
version = "1.8.3"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"nom",
|
||||
@ -1830,7 +1830,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "1.8.0"
|
||||
version = "1.8.3"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@ -1948,7 +1948,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fuzzers"
|
||||
version = "1.8.0"
|
||||
version = "1.8.3"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"clap",
|
||||
@ -2442,7 +2442,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
|
||||
|
||||
[[package]]
|
||||
name = "index-scheduler"
|
||||
version = "1.8.0"
|
||||
version = "1.8.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@ -2638,7 +2638,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "json-depth-checker"
|
||||
version = "1.8.0"
|
||||
version = "1.8.3"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@ -3275,7 +3275,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "meili-snap"
|
||||
version = "1.8.0"
|
||||
version = "1.8.3"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"md5",
|
||||
@ -3284,7 +3284,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch"
|
||||
version = "1.8.0"
|
||||
version = "1.8.3"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-http",
|
||||
@ -3377,7 +3377,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-auth"
|
||||
version = "1.8.0"
|
||||
version = "1.8.3"
|
||||
dependencies = [
|
||||
"base64 0.21.7",
|
||||
"enum-iterator",
|
||||
@ -3396,7 +3396,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-types"
|
||||
version = "1.8.0"
|
||||
version = "1.8.3"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
@ -3426,7 +3426,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilitool"
|
||||
version = "1.8.0"
|
||||
version = "1.8.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
@ -3465,7 +3465,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "1.8.0"
|
||||
version = "1.8.3"
|
||||
dependencies = [
|
||||
"arroy",
|
||||
"big_s",
|
||||
@ -3906,7 +3906,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
||||
|
||||
[[package]]
|
||||
name = "permissive-json-pointer"
|
||||
version = "1.8.0"
|
||||
version = "1.8.3"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"serde_json",
|
||||
@ -6074,7 +6074,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "xtask"
|
||||
version = "1.8.0"
|
||||
version = "1.8.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"build-info",
|
||||
|
@ -22,7 +22,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.8.0"
|
||||
version = "1.8.3"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
|
@ -152,6 +152,7 @@ impl Settings<Unchecked> {
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[allow(dead_code)] // otherwise rustc complains that the fields go unused
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
|
@ -182,6 +182,7 @@ impl Settings<Unchecked> {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)] // otherwise rustc complains that the fields go unused
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
#[serde(deny_unknown_fields)]
|
||||
|
@ -200,6 +200,7 @@ impl std::ops::Deref for IndexUid {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)] // otherwise rustc complains that the fields go unused
|
||||
#[derive(Debug)]
|
||||
#[cfg_attr(test, derive(serde::Serialize))]
|
||||
#[cfg_attr(test, serde(rename_all = "camelCase"))]
|
||||
|
@ -40,8 +40,9 @@ pub struct Permit {
|
||||
|
||||
impl Drop for Permit {
|
||||
fn drop(&mut self) {
|
||||
let sender = self.sender.clone();
|
||||
// if the channel is closed then the whole instance is down
|
||||
let _ = futures::executor::block_on(self.sender.send(()));
|
||||
std::mem::drop(tokio::spawn(async move { sender.send(()).await }));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -117,3 +117,69 @@ async fn geo_bounding_box_with_string_and_number() {
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn bug_4640() {
|
||||
// https://github.com/meilisearch/meilisearch/issues/4640
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.update_settings_filterable_attributes(json!(["_geo"])).await;
|
||||
let (ret, _code) = index.update_settings_sortable_attributes(json!(["_geo"])).await;
|
||||
index.wait_task(ret.uid()).await;
|
||||
|
||||
// Sort the document with the second one first
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"sort": ["_geoPoint(45.4777599, 9.1967508):asc"],
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": 2,
|
||||
"name": "La Bella Italia",
|
||||
"address": "456 Elm Street, Townsville",
|
||||
"type": "Italian",
|
||||
"rating": 9,
|
||||
"_geo": {
|
||||
"lat": "45.4777599",
|
||||
"lng": "9.1967508"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"name": "Taco Truck",
|
||||
"address": "444 Salsa Street, Burritoville",
|
||||
"type": "Mexican",
|
||||
"rating": 9,
|
||||
"_geo": {
|
||||
"lat": 34.0522,
|
||||
"lng": -118.2437
|
||||
},
|
||||
"_geoDistance": 9714063
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "CrĂŞpe Truck",
|
||||
"address": "2 Billig Avenue, Rouenville",
|
||||
"type": "French",
|
||||
"rating": 10
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 3
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
@ -74,10 +74,10 @@ csv = "1.3.0"
|
||||
candle-core = { version = "0.4.1" }
|
||||
candle-transformers = { version = "0.4.1" }
|
||||
candle-nn = { version = "0.4.1" }
|
||||
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default_features = false, features = [
|
||||
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
|
||||
"onig",
|
||||
] }
|
||||
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [
|
||||
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
|
||||
"online",
|
||||
] }
|
||||
tiktoken-rs = "0.5.8"
|
||||
|
@ -22,7 +22,7 @@ pub enum SearchEvents {
|
||||
RankingRuleStartIteration { ranking_rule_idx: usize, universe_len: u64 },
|
||||
RankingRuleNextBucket { ranking_rule_idx: usize, universe_len: u64, bucket_len: u64 },
|
||||
RankingRuleSkipBucket { ranking_rule_idx: usize, bucket_len: u64 },
|
||||
RankingRuleEndIteration { ranking_rule_idx: usize, universe_len: u64 },
|
||||
RankingRuleEndIteration { ranking_rule_idx: usize },
|
||||
ExtendResults { new: Vec<u32> },
|
||||
ProximityGraph { graph: RankingRuleGraph<ProximityGraph> },
|
||||
ProximityPaths { paths: Vec<Vec<Interned<ProximityCondition>>> },
|
||||
@ -123,12 +123,9 @@ impl SearchLogger<QueryGraph> for VisualSearchLogger {
|
||||
&mut self,
|
||||
ranking_rule_idx: usize,
|
||||
_ranking_rule: &dyn RankingRule<QueryGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
_universe: &RoaringBitmap,
|
||||
) {
|
||||
self.events.push(SearchEvents::RankingRuleEndIteration {
|
||||
ranking_rule_idx,
|
||||
universe_len: universe.len(),
|
||||
});
|
||||
self.events.push(SearchEvents::RankingRuleEndIteration { ranking_rule_idx });
|
||||
self.location.pop();
|
||||
}
|
||||
fn add_to_results(&mut self, docids: &[u32]) {
|
||||
@ -326,7 +323,7 @@ impl<'ctx> DetailedLoggerFinish<'ctx> {
|
||||
assert!(ranking_rule_idx == self.rr_action_counter.len() - 1);
|
||||
self.write_skip_bucket(bucket_len)?;
|
||||
}
|
||||
SearchEvents::RankingRuleEndIteration { ranking_rule_idx, universe_len: _ } => {
|
||||
SearchEvents::RankingRuleEndIteration { ranking_rule_idx } => {
|
||||
assert!(ranking_rule_idx == self.rr_action_counter.len() - 1);
|
||||
self.write_end_iteration()?;
|
||||
}
|
||||
|
@ -45,7 +45,6 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
) -> Result<ExtractedFacetValues> {
|
||||
puffin::profile_function!();
|
||||
|
||||
@ -127,12 +126,18 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
add_exists.insert(document);
|
||||
}
|
||||
|
||||
let geo_support =
|
||||
geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
|
||||
let del_geo_support = settings_diff
|
||||
.old
|
||||
.geo_fields_ids
|
||||
.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
|
||||
let add_geo_support = settings_diff
|
||||
.new
|
||||
.geo_fields_ids
|
||||
.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
|
||||
let del_filterable_values =
|
||||
del_value.map(|value| extract_facet_values(&value, geo_support));
|
||||
del_value.map(|value| extract_facet_values(&value, del_geo_support));
|
||||
let add_filterable_values =
|
||||
add_value.map(|value| extract_facet_values(&value, geo_support));
|
||||
add_value.map(|value| extract_facet_values(&value, add_geo_support));
|
||||
|
||||
// Those closures are just here to simplify things a bit.
|
||||
let mut insert_numbers_diff = |del_numbers, add_numbers| {
|
||||
|
@ -8,6 +8,7 @@ use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
||||
use crate::error::GeoError;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::index_documents::extract_finite_float_from_value;
|
||||
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
||||
use crate::{FieldId, InternalError, Result};
|
||||
|
||||
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
|
||||
@ -18,7 +19,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
||||
obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
primary_key_id: FieldId,
|
||||
(lat_fid, lng_fid): (FieldId, FieldId),
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
@ -40,47 +41,27 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
||||
serde_json::from_slice(document_id).unwrap()
|
||||
};
|
||||
|
||||
// first we get the two fields
|
||||
match (obkv.get(lat_fid), obkv.get(lng_fid)) {
|
||||
(Some(lat), Some(lng)) => {
|
||||
let deladd_lat_obkv = KvReaderDelAdd::new(lat);
|
||||
let deladd_lng_obkv = KvReaderDelAdd::new(lng);
|
||||
// extract old version
|
||||
let del_lat_lng =
|
||||
extract_lat_lng(&obkv, &settings_diff.old, DelAdd::Deletion, document_id)?;
|
||||
// extract new version
|
||||
let add_lat_lng =
|
||||
extract_lat_lng(&obkv, &settings_diff.new, DelAdd::Addition, document_id)?;
|
||||
|
||||
// then we extract the values
|
||||
let del_lat_lng = deladd_lat_obkv
|
||||
.get(DelAdd::Deletion)
|
||||
.zip(deladd_lng_obkv.get(DelAdd::Deletion))
|
||||
.map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
|
||||
.transpose()?;
|
||||
let add_lat_lng = deladd_lat_obkv
|
||||
.get(DelAdd::Addition)
|
||||
.zip(deladd_lng_obkv.get(DelAdd::Addition))
|
||||
.map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
|
||||
.transpose()?;
|
||||
|
||||
if del_lat_lng != add_lat_lng {
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
if let Some([lat, lng]) = del_lat_lng {
|
||||
#[allow(clippy::drop_non_drop)]
|
||||
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||
obkv.insert(DelAdd::Deletion, bytes)?;
|
||||
}
|
||||
if let Some([lat, lng]) = add_lat_lng {
|
||||
#[allow(clippy::drop_non_drop)]
|
||||
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||
obkv.insert(DelAdd::Addition, bytes)?;
|
||||
}
|
||||
let bytes = obkv.into_inner()?;
|
||||
writer.insert(docid_bytes, bytes)?;
|
||||
}
|
||||
if del_lat_lng != add_lat_lng {
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
if let Some([lat, lng]) = del_lat_lng {
|
||||
#[allow(clippy::drop_non_drop)]
|
||||
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||
obkv.insert(DelAdd::Deletion, bytes)?;
|
||||
}
|
||||
(None, Some(_)) => {
|
||||
return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
|
||||
if let Some([lat, lng]) = add_lat_lng {
|
||||
#[allow(clippy::drop_non_drop)]
|
||||
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||
obkv.insert(DelAdd::Addition, bytes)?;
|
||||
}
|
||||
(Some(_), None) => {
|
||||
return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
|
||||
}
|
||||
(None, None) => (),
|
||||
let bytes = obkv.into_inner()?;
|
||||
writer.insert(docid_bytes, bytes)?;
|
||||
}
|
||||
}
|
||||
|
||||
@ -88,16 +69,37 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
||||
}
|
||||
|
||||
/// Extract the finite floats lat and lng from two bytes slices.
|
||||
fn extract_lat_lng(lat: &[u8], lng: &[u8], document_id: impl Fn() -> Value) -> Result<[f64; 2]> {
|
||||
let lat = extract_finite_float_from_value(
|
||||
serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
|
||||
)
|
||||
.map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
|
||||
fn extract_lat_lng(
|
||||
document: &obkv::KvReader<FieldId>,
|
||||
settings: &InnerIndexSettings,
|
||||
deladd: DelAdd,
|
||||
document_id: impl Fn() -> Value,
|
||||
) -> Result<Option<[f64; 2]>> {
|
||||
match settings.geo_fields_ids {
|
||||
Some((lat_fid, lng_fid)) => {
|
||||
let lat = document.get(lat_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
|
||||
let lng = document.get(lng_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
|
||||
let (lat, lng) = match (lat, lng) {
|
||||
(Some(lat), Some(lng)) => (lat, lng),
|
||||
(Some(_), None) => {
|
||||
return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
|
||||
}
|
||||
(None, Some(_)) => {
|
||||
return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
|
||||
}
|
||||
(None, None) => return Ok(None),
|
||||
};
|
||||
let lat = extract_finite_float_from_value(
|
||||
serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
|
||||
)
|
||||
.map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
|
||||
|
||||
let lng = extract_finite_float_from_value(
|
||||
serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
|
||||
)
|
||||
.map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
|
||||
|
||||
Ok([lat, lng])
|
||||
let lng = extract_finite_float_from_value(
|
||||
serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
|
||||
)
|
||||
.map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
|
||||
Ok(Some([lat, lng]))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
@ -11,7 +11,7 @@ mod extract_word_position_docids;
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::sync::Arc;
|
||||
use std::sync::{Arc, OnceLock};
|
||||
|
||||
use crossbeam_channel::Sender;
|
||||
use rayon::prelude::*;
|
||||
@ -31,7 +31,7 @@ use self::extract_word_position_docids::extract_word_position_docids;
|
||||
use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters};
|
||||
use super::{helpers, TypedChunk};
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::{FieldId, Result, ThreadPoolNoAbortBuilder};
|
||||
use crate::{FieldId, Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
|
||||
|
||||
/// Extract data for each databases from obkv documents in parallel.
|
||||
/// Send data in grenad file over provided Sender.
|
||||
@ -43,7 +43,6 @@ pub(crate) fn data_from_obkv_documents(
|
||||
indexer: GrenadParameters,
|
||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||
primary_key_id: FieldId,
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
settings_diff: Arc<InnerIndexSettingsDiff>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<()> {
|
||||
@ -72,7 +71,6 @@ pub(crate) fn data_from_obkv_documents(
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
primary_key_id,
|
||||
geo_fields_ids,
|
||||
settings_diff.clone(),
|
||||
max_positions_per_attributes,
|
||||
)
|
||||
@ -215,6 +213,18 @@ fn run_extraction_task<FE, FS, M>(
|
||||
})
|
||||
}
|
||||
|
||||
fn request_threads() -> &'static ThreadPoolNoAbort {
|
||||
static REQUEST_THREADS: OnceLock<ThreadPoolNoAbort> = OnceLock::new();
|
||||
|
||||
REQUEST_THREADS.get_or_init(|| {
|
||||
ThreadPoolNoAbortBuilder::new()
|
||||
.num_threads(crate::vector::REQUEST_PARALLELISM)
|
||||
.thread_name(|index| format!("embedding-request-{index}"))
|
||||
.build()
|
||||
.unwrap()
|
||||
})
|
||||
}
|
||||
|
||||
/// Extract chunked data and send it into lmdb_writer_sx sender:
|
||||
/// - documents
|
||||
fn send_original_documents_data(
|
||||
@ -229,11 +239,6 @@ fn send_original_documents_data(
|
||||
let documents_chunk_cloned = original_documents_chunk.clone();
|
||||
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
||||
|
||||
let request_threads = ThreadPoolNoAbortBuilder::new()
|
||||
.num_threads(crate::vector::REQUEST_PARALLELISM)
|
||||
.thread_name(|index| format!("embedding-request-{index}"))
|
||||
.build()?;
|
||||
|
||||
if settings_diff.reindex_vectors() || !settings_diff.settings_update_only() {
|
||||
let settings_diff = settings_diff.clone();
|
||||
rayon::spawn(move || {
|
||||
@ -251,7 +256,7 @@ fn send_original_documents_data(
|
||||
prompts,
|
||||
indexer,
|
||||
embedder.clone(),
|
||||
&request_threads,
|
||||
request_threads(),
|
||||
) {
|
||||
Ok(results) => Some(results),
|
||||
Err(error) => {
|
||||
@ -300,7 +305,6 @@ fn send_and_extract_flattened_documents_data(
|
||||
indexer: GrenadParameters,
|
||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||
primary_key_id: FieldId,
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
settings_diff: Arc<InnerIndexSettingsDiff>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(
|
||||
@ -310,12 +314,13 @@ fn send_and_extract_flattened_documents_data(
|
||||
let flattened_documents_chunk =
|
||||
flattened_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
|
||||
|
||||
if let Some(geo_fields_ids) = geo_fields_ids {
|
||||
if settings_diff.run_geo_indexing() {
|
||||
let documents_chunk_cloned = flattened_documents_chunk.clone();
|
||||
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
||||
let settings_diff = settings_diff.clone();
|
||||
rayon::spawn(move || {
|
||||
let result =
|
||||
extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, geo_fields_ids);
|
||||
extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, &settings_diff);
|
||||
let _ = match result {
|
||||
Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
|
||||
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
|
||||
@ -354,7 +359,6 @@ fn send_and_extract_flattened_documents_data(
|
||||
flattened_documents_chunk.clone(),
|
||||
indexer,
|
||||
&settings_diff,
|
||||
geo_fields_ids,
|
||||
)?;
|
||||
|
||||
// send fid_docid_facet_numbers_chunk to DB writer
|
||||
|
@ -324,28 +324,6 @@ where
|
||||
// get the primary key field id
|
||||
let primary_key_id = settings_diff.new.fields_ids_map.id(&primary_key).unwrap();
|
||||
|
||||
// get the fid of the `_geo.lat` and `_geo.lng` fields.
|
||||
let mut field_id_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
|
||||
// self.index.fields_ids_map($a)? ==>> field_id_map
|
||||
let geo_fields_ids = match field_id_map.id("_geo") {
|
||||
Some(gfid) => {
|
||||
let is_sortable = self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid);
|
||||
let is_filterable = self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid);
|
||||
// if `_geo` is faceted then we get the `lat` and `lng`
|
||||
if is_sortable || is_filterable {
|
||||
let field_ids = field_id_map
|
||||
.insert("_geo.lat")
|
||||
.zip(field_id_map.insert("_geo.lng"))
|
||||
.ok_or(UserError::AttributeLimitReached)?;
|
||||
Some(field_ids)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let pool_params = GrenadParameters {
|
||||
chunk_compression_type: self.indexer_config.chunk_compression_type,
|
||||
chunk_compression_level: self.indexer_config.chunk_compression_level,
|
||||
@ -412,7 +390,6 @@ where
|
||||
pool_params,
|
||||
lmdb_writer_sx.clone(),
|
||||
primary_key_id,
|
||||
geo_fields_ids,
|
||||
settings_diff.clone(),
|
||||
max_positions_per_attributes,
|
||||
)
|
||||
|
@ -48,7 +48,6 @@ pub struct Transform<'a, 'i> {
|
||||
fields_ids_map: FieldsIdsMap,
|
||||
|
||||
indexer_settings: &'a IndexerConfig,
|
||||
pub autogenerate_docids: bool,
|
||||
pub index_documents_method: IndexDocumentsMethod,
|
||||
available_documents_ids: AvailableDocumentsIds,
|
||||
|
||||
@ -102,7 +101,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
index: &'i Index,
|
||||
indexer_settings: &'a IndexerConfig,
|
||||
index_documents_method: IndexDocumentsMethod,
|
||||
autogenerate_docids: bool,
|
||||
_autogenerate_docids: bool,
|
||||
) -> Result<Self> {
|
||||
// We must choose the appropriate merge function for when two or more documents
|
||||
// with the same user id must be merged or fully replaced in the same batch.
|
||||
@ -136,7 +135,6 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
index,
|
||||
fields_ids_map: index.fields_ids_map(wtxn)?,
|
||||
indexer_settings,
|
||||
autogenerate_docids,
|
||||
available_documents_ids: AvailableDocumentsIds::from_documents_ids(&documents_ids),
|
||||
original_sorter,
|
||||
flattened_sorter,
|
||||
|
@ -1161,6 +1161,11 @@ impl InnerIndexSettingsDiff {
|
||||
pub fn settings_update_only(&self) -> bool {
|
||||
self.settings_update_only
|
||||
}
|
||||
|
||||
pub fn run_geo_indexing(&self) -> bool {
|
||||
self.old.geo_fields_ids != self.new.geo_fields_ids
|
||||
|| (!self.settings_update_only && self.new.geo_fields_ids.is_some())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
@ -1177,6 +1182,7 @@ pub(crate) struct InnerIndexSettings {
|
||||
pub proximity_precision: ProximityPrecision,
|
||||
pub embedding_configs: EmbeddingConfigs,
|
||||
pub existing_fields: HashSet<String>,
|
||||
pub geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
}
|
||||
|
||||
impl InnerIndexSettings {
|
||||
@ -1185,7 +1191,7 @@ impl InnerIndexSettings {
|
||||
let stop_words = stop_words.map(|sw| sw.map_data(Vec::from).unwrap());
|
||||
let allowed_separators = index.allowed_separators(rtxn)?;
|
||||
let dictionary = index.dictionary(rtxn)?;
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let mut fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?;
|
||||
let user_defined_searchable_fields =
|
||||
user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect());
|
||||
@ -1200,6 +1206,24 @@ impl InnerIndexSettings {
|
||||
.into_iter()
|
||||
.filter_map(|(field, count)| (count != 0).then_some(field))
|
||||
.collect();
|
||||
// index.fields_ids_map($a)? ==>> fields_ids_map
|
||||
let geo_fields_ids = match fields_ids_map.id("_geo") {
|
||||
Some(gfid) => {
|
||||
let is_sortable = index.sortable_fields_ids(rtxn)?.contains(&gfid);
|
||||
let is_filterable = index.filterable_fields_ids(rtxn)?.contains(&gfid);
|
||||
// if `_geo` is faceted then we get the `lat` and `lng`
|
||||
if is_sortable || is_filterable {
|
||||
let field_ids = fields_ids_map
|
||||
.insert("_geo.lat")
|
||||
.zip(fields_ids_map.insert("_geo.lng"))
|
||||
.ok_or(UserError::AttributeLimitReached)?;
|
||||
Some(field_ids)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
stop_words,
|
||||
@ -1214,6 +1238,7 @@ impl InnerIndexSettings {
|
||||
proximity_precision,
|
||||
embedding_configs,
|
||||
existing_fields,
|
||||
geo_fields_ids,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,7 @@ reqwest = { version = "0.11.23", features = [
|
||||
"stream",
|
||||
"json",
|
||||
"rustls-tls",
|
||||
], default_features = false }
|
||||
], default-features = false }
|
||||
serde = { version = "1.0.195", features = ["derive"] }
|
||||
serde_json = "1.0.111"
|
||||
sha2 = "0.10.8"
|
||||
|
Reference in New Issue
Block a user