Compare commits

...

57 Commits

Author SHA1 Message Date
Louis Dureuil
9bd3482230 WIP 2025-10-14 09:09:33 +02:00
Louis Dureuil
d6e4e414d7 Refactor export route subroutine, add extra-header support 2025-10-13 11:53:31 +02:00
Louis Dureuil
c0617efe76 Extract part of the implementation of the network route 2025-10-09 17:01:02 +02:00
Louis Dureuil
8316c36648 Take &str where possible instead of String 2025-10-09 16:49:33 +02:00
Louis Dureuil
572bae9da1 Pass tokio handle to index-scheduler 2025-10-09 13:58:44 +02:00
Louis Dureuil
2a330dce83 Update easy snapshot 2025-09-23 17:43:03 +02:00
Louis Dureuil
d62a6b6f0d Make network route async 2025-09-23 16:38:22 +02:00
Louis Dureuil
58b8630862 Add existing errors as UserError as they will now be triggered inside of the task 2025-09-23 16:37:59 +02:00
Louis Dureuil
0703767fc6 Add a process network task type 2025-09-23 16:35:48 +02:00
Louis Dureuil
e0c97325d6 Allow to register a NetworkTopologyChange task 2025-09-23 16:34:28 +02:00
Louis Dureuil
0f3ef8de73 move Network route types to meilisearch_types and prefix existing ones by Db 2025-09-23 16:32:34 +02:00
Louis Dureuil
7313cefd74 Add itertools 2025-09-23 16:25:41 +02:00
Louis Dureuil
7fb4404928 Merge pull request #5758 from meilisearch/cellulite
Cellulite integration
2025-09-23 12:48:13 +00:00
Tamo
8405f0bf9c fmt 2025-09-23 13:55:36 +02:00
Tamo
3a7f9b56fe update cellulite 2025-09-23 13:55:36 +02:00
Louis Dureuil
61034e2e2e write geojson in obkv 2025-09-23 13:55:36 +02:00
Tamo
108d6d3344 remove a bunch of useless logs 2025-09-23 13:55:36 +02:00
Tamo
35bd00f6a1 continue previous commit 2025-09-23 13:55:36 +02:00
Tamo
69059d67ef stop returning the geojson field when iterating on the fields 2025-09-23 13:55:36 +02:00
Tamo
e13783103f use the CELLULITE constant 2025-09-23 13:55:36 +02:00
Tamo
f719665c4e update the filter-parser after updating its error messages 2025-09-23 13:55:36 +02:00
Tamo
638f284614 densify the shapes before storing them 2025-09-23 13:55:36 +02:00
Tamo
32ac98ed95 style improvement 2025-09-23 13:55:36 +02:00
Tamo
46aee695ca review the filters errors 2025-09-23 13:55:36 +02:00
Tamo
716c67f858 review and fix all error codes 2025-09-23 13:55:36 +02:00
Tamo
fec10bb2d6 update cellulite to the latest version 2025-09-23 13:55:36 +02:00
Mubelotix
3dac2cf73e Update tests 2025-09-23 13:55:36 +02:00
Mubelotix
03eca800e6 Support _geoRadius 2025-09-23 13:55:36 +02:00
Mubelotix
28fa2e960e Tolerate trailing comma 2025-09-23 13:55:36 +02:00
Mubelotix
a3b9220f84 Improve error message 2025-09-23 13:55:36 +02:00
Mubelotix
c09d48edf2 Fix coordinates order in filters 2025-09-23 13:55:36 +02:00
Mubelotix
ae4ab0ebbb Improve filter parser errors 2025-09-23 13:55:36 +02:00
Mubelotix
900a9a6d59 Reduce identations 2025-09-23 13:55:36 +02:00
Mubelotix
fc560e6730 Improve geo polygon errors 2025-09-23 13:55:36 +02:00
Mubelotix
e2a06470b7 Update tests 2025-09-23 13:55:36 +02:00
Mubelotix
ada27323f2 Rename file 2025-09-23 13:55:36 +02:00
Mubelotix
607a1c2395 Add geo bounding box filter 2025-09-23 13:55:36 +02:00
Mubelotix
b56956ea0c Optimize geojson channels 2025-09-23 13:55:36 +02:00
Mubelotix
3d21290f7f Add cellulite database sizes 2025-09-23 13:55:36 +02:00
Mubelotix
4edd4c06bc Fix trivial clippy warnings 2025-09-23 13:55:36 +02:00
Mubelotix
566baddc6b Optimize points removed serialization 2025-09-23 13:55:36 +02:00
Tamo
febe3186ce improve deletion 2025-09-23 13:55:36 +02:00
Tamo
5dd42c1871 remove useless log 2025-09-23 13:55:36 +02:00
Tamo
8670793e6e fix the cellulite spilling bug 2025-09-23 13:55:36 +02:00
Tamo
41a04aa3ab fix the cellulite integration 2025-09-23 13:55:36 +02:00
Tamo
88f841bc05 plug in the document deletion in cellulite 2025-09-23 13:55:36 +02:00
Tamo
d19892d2ea update to the latest version of cellulite and steppe 2025-09-23 13:55:36 +02:00
Tamo
c0905d6650 add the deletion in the new indexer 2025-09-23 13:55:36 +02:00
Tamo
576d7d94b1 fix the old indexer 2025-09-23 13:55:36 +02:00
Tamo
f4f1334b62 add a new _geoPolygon filter to query the cellulite database 2025-09-23 13:55:36 +02:00
Tamo
aaff6c3685 fmt 2025-09-23 13:55:36 +02:00
Tamo
42d2af4c84 finish plugin cellulite to the new indexer 2025-09-23 13:55:36 +02:00
Tamo
6be91c824c Cellulite is almost in the new indexer. We must add the documentID to the geojson pipeline 2025-09-23 13:55:36 +02:00
Tamo
6ee0537db8 add an extractor for cellulite in the new pipeline 2025-09-23 13:55:36 +02:00
Tamo
3fbeff4308 add cellulite to the old pipeline, it probably doesn't works 2025-09-23 13:55:36 +02:00
Tamo
375546b61a add a few helpers 2025-09-23 13:55:36 +02:00
Tamo
25a1d50763 add cellulite to the index 2025-09-23 13:55:36 +02:00
96 changed files with 4297 additions and 1353 deletions

1077
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -158,6 +158,10 @@ pub enum KindDump {
UpgradeDatabase {
from: (u32, u32, u32),
},
NetworkTopologyChange {
network: Option<meilisearch_types::enterprise_edition::network::Network>,
origin: Option<meilisearch_types::tasks::Origin>,
},
}
impl From<Task> for TaskDump {
@@ -240,6 +244,9 @@ impl From<KindWithContent> for KindDump {
KindWithContent::UpgradeDatabase { from: version } => {
KindDump::UpgradeDatabase { from: version }
}
KindWithContent::NetworkTopologyChange { network, origin } => {
KindDump::NetworkTopologyChange { network, origin }
}
}
}
}
@@ -253,7 +260,7 @@ pub(crate) mod test {
use big_s::S;
use maplit::{btreemap, btreeset};
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
use meilisearch_types::enterprise_edition::network::{Network, Remote};
use meilisearch_types::enterprise_edition::network::{DbNetwork, DbRemote};
use meilisearch_types::facet_values_sort::FacetValuesSort;
use meilisearch_types::features::RuntimeTogglableFeatures;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
@@ -544,10 +551,10 @@ pub(crate) mod test {
RuntimeTogglableFeatures::default()
}
fn create_test_network() -> Network {
Network {
fn create_test_network() -> DbNetwork {
DbNetwork {
local: Some("myself".to_string()),
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()), write_api_key: Some("docApiKey".to_string()) }},
remotes: maplit::btreemap! {"other".to_string() => DbRemote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()), write_api_key: Some("docApiKey".to_string()) }},
sharding: false,
}
}

View File

@@ -24,7 +24,7 @@ pub type Batch = meilisearch_types::batches::Batch;
pub type Key = meilisearch_types::keys::Key;
pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings;
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
pub type Network = meilisearch_types::enterprise_edition::network::Network;
pub type Network = meilisearch_types::enterprise_edition::network::DbNetwork;
pub type Webhooks = meilisearch_types::webhooks::WebhooksDumpView;
// ===== Other types to clarify the code of the compat module

View File

@@ -5,7 +5,7 @@ use std::path::PathBuf;
use flate2::write::GzEncoder;
use flate2::Compression;
use meilisearch_types::batches::Batch;
use meilisearch_types::enterprise_edition::network::Network;
use meilisearch_types::enterprise_edition::network::DbNetwork;
use meilisearch_types::features::{ChatCompletionSettings, RuntimeTogglableFeatures};
use meilisearch_types::keys::Key;
use meilisearch_types::settings::{Checked, Settings};
@@ -72,7 +72,7 @@ impl DumpWriter {
)?)
}
pub fn create_network(&self, network: Network) -> Result<()> {
pub fn create_network(&self, network: DbNetwork) -> Result<()> {
Ok(std::fs::write(self.dir.path().join("network.json"), serde_json::to_string(&network)?)?)
}

View File

@@ -7,23 +7,14 @@
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::char;
use nom::character::complete::multispace0;
use nom::character::complete::multispace1;
use nom::combinator::cut;
use nom::combinator::map;
use nom::combinator::value;
use nom::sequence::preceded;
use nom::sequence::{terminated, tuple};
use nom::character::complete::{char, multispace0, multispace1};
use nom::combinator::{cut, map, value};
use nom::sequence::{preceded, terminated, tuple};
use Condition::*;
use crate::error::IResultExt;
use crate::value::parse_vector_value;
use crate::value::parse_vector_value_cut;
use crate::Error;
use crate::ErrorKind;
use crate::VectorFilter;
use crate::{parse_value, FilterCondition, IResult, Span, Token};
use crate::value::{parse_vector_value, parse_vector_value_cut};
use crate::{parse_value, Error, ErrorKind, FilterCondition, IResult, Span, Token, VectorFilter};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Condition<'a> {

View File

@@ -75,7 +75,11 @@ pub enum ExpectedValueKind {
pub enum ErrorKind<'a> {
ReservedGeo(&'a str),
GeoRadius,
GeoRadiusArgumentCount(usize),
GeoBoundingBox,
GeoPolygon,
GeoPolygonNotEnoughPoints(usize),
GeoCoordinatesNotPair(usize),
MisusedGeoRadius,
MisusedGeoBoundingBox,
VectorFilterLeftover,
@@ -189,7 +193,7 @@ impl Display for Error<'_> {
}
ErrorKind::InvalidPrimary => {
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` {text}")?
}
ErrorKind::InvalidEscapedNumber => {
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
@@ -198,11 +202,23 @@ impl Display for Error<'_> {
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
}
ErrorKind::GeoRadius => {
writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")?
writeln!(f, "The `_geoRadius` filter must be in the form: `_geoRadius(latitude, longitude, radius, optionalResolution)`.")?
}
ErrorKind::GeoRadiusArgumentCount(count) => {
writeln!(f, "Was expecting 3 or 4 arguments for `_geoRadius`, but instead found {count}.")?
}
ErrorKind::GeoBoundingBox => {
writeln!(f, "The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.")?
}
ErrorKind::GeoPolygon => {
writeln!(f, "The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.")?
}
ErrorKind::GeoPolygonNotEnoughPoints(n) => {
writeln!(f, "The `_geoPolygon` filter expects at least 3 points but only {n} were specified")?;
}
ErrorKind::GeoCoordinatesNotPair(number) => {
writeln!(f, "Was expecting 2 coordinates but instead found {number}.")?
}
ErrorKind::ReservedGeo(name) => {
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.", name.escape_debug())?
}

View File

@@ -19,6 +19,7 @@
//! word = (alphanumeric | _ | - | .)+
//! geoRadius = "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")"
//! geoBoundingBox = "_geoBoundingBox([" WS * float WS* "," WS* float WS* "], [" WS* float WS* "," WS* float WS* "]")
//! geoPolygon = "_geoPolygon([[" WS* float WS* "," WS* float WS* "],+])"
//! ```
//!
//! Other BNF grammar used to handle some specific errors:
@@ -156,8 +157,9 @@ pub enum FilterCondition<'a> {
Or(Vec<Self>),
And(Vec<Self>),
VectorExists { fid: Token<'a>, embedder: Option<Token<'a>>, filter: VectorFilter<'a> },
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a>, resolution: Option<Token<'a>> },
GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] },
GeoPolygon { points: Vec<[Token<'a>; 2]> },
}
pub enum TraversedElement<'a> {
@@ -189,6 +191,7 @@ impl<'a> FilterCondition<'a> {
FilterCondition::VectorExists { .. }
| FilterCondition::GeoLowerThan { .. }
| FilterCondition::GeoBoundingBox { .. }
| FilterCondition::GeoPolygon { .. }
| FilterCondition::In { .. } => None,
}
}
@@ -202,6 +205,7 @@ impl<'a> FilterCondition<'a> {
}
FilterCondition::GeoLowerThan { .. }
| FilterCondition::GeoBoundingBox { .. }
| FilterCondition::GeoPolygon { .. }
| FilterCondition::In { .. } => None,
FilterCondition::VectorExists { fid, .. } => Some(fid),
}
@@ -396,23 +400,27 @@ fn parse_not(input: Span, depth: usize) -> IResult<FilterCondition> {
/// If we parse `_geoRadius` we MUST parse the rest of the expression.
fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
// we want to allow space BEFORE the _geoRadius but not after
let parsed = preceded(
tuple((multispace0, word_exact("_geoRadius"))),
// if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
)(input)
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::GeoRadius)));
let (input, _) = tuple((multispace0, word_exact("_geoRadius")))(input)?;
// if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure
let parsed =
delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))(input)
.map_cut(ErrorKind::GeoRadius);
let (input, args) = parsed?;
if args.len() != 3 {
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::GeoRadius)));
if !(3..=4).contains(&args.len()) {
return Err(Error::failure_from_kind(input, ErrorKind::GeoRadiusArgumentCount(args.len())));
}
let res = FilterCondition::GeoLowerThan {
point: [args[0].into(), args[1].into()],
radius: args[2].into(),
resolution: args.get(3).cloned().map(Token::from),
};
Ok((input, res))
}
@@ -420,26 +428,33 @@ fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
/// If we parse `_geoBoundingBox` we MUST parse the rest of the expression.
fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
// we want to allow space BEFORE the _geoBoundingBox but not after
let parsed = preceded(
tuple((multispace0, word_exact("_geoBoundingBox"))),
// if we were able to parse `_geoBoundingBox` and can't parse the rest of the input we return a failure
cut(delimited(
char('('),
separated_list1(
tag(","),
ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))),
),
char(')'),
)),
let (input, _) = tuple((multispace0, word_exact("_geoBoundingBox")))(input)?;
// if we were able to parse `_geoBoundingBox` and can't parse the rest of the input we return a failure
let (input, args) = delimited(
char('('),
separated_list1(
tag(","),
ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))),
),
char(')'),
)(input)
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::GeoBoundingBox)));
.map_cut(ErrorKind::GeoBoundingBox)?;
let (input, args) = parsed?;
if args.len() != 2 || args[0].len() != 2 || args[1].len() != 2 {
if args.len() != 2 {
return Err(Error::failure_from_kind(input, ErrorKind::GeoBoundingBox));
}
if let Some(offending) = args.iter().find(|a| a.len() != 2) {
let context = offending.first().unwrap_or(&input);
return Err(Error::failure_from_kind(
*context,
ErrorKind::GeoCoordinatesNotPair(offending.len()),
));
}
let res = FilterCondition::GeoBoundingBox {
top_right_point: [args[0][0].into(), args[0][1].into()],
bottom_left_point: [args[1][0].into(), args[1][1].into()],
@@ -447,6 +462,47 @@ fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
Ok((input, res))
}
/// geoPolygon = "_geoPolygon([[" WS* float WS* "," WS* float WS* "],+])"
/// If we parse `_geoPolygon` we MUST parse the rest of the expression.
fn parse_geo_polygon(input: Span) -> IResult<FilterCondition> {
// we want to allow space BEFORE the _geoPolygon but not after
let (input, _) = tuple((multispace0, word_exact("_geoPolygon")))(input)?;
// if we were able to parse `_geoPolygon` and can't parse the rest of the input we return a failure
let (input, args): (_, Vec<Vec<LocatedSpan<_, _>>>) = delimited(
char('('),
separated_list1(
tag(","),
ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))),
),
preceded(opt(ws(char(','))), char(')')), // Tolerate trailing comma
)(input)
.map_cut(ErrorKind::GeoPolygon)?;
if args.len() < 3 {
let context = args.last().and_then(|a| a.last()).unwrap_or(&input);
return Err(Error::failure_from_kind(
*context,
ErrorKind::GeoPolygonNotEnoughPoints(args.len()),
));
}
if let Some(offending) = args.iter().find(|a| a.len() != 2) {
let context = offending.first().unwrap_or(&input);
return Err(Error::failure_from_kind(
*context,
ErrorKind::GeoCoordinatesNotPair(offending.len()),
));
}
let res = FilterCondition::GeoPolygon {
points: args.into_iter().map(|a| [a[0].into(), a[1].into()]).collect(),
};
Ok((input, res))
}
/// geoPoint = WS* "_geoPoint(float WS* "," WS* float WS* "," WS* float)
fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
// we want to forbid space BEFORE the _geoPoint but not after
@@ -516,8 +572,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char()))
}),
),
parse_geo_radius,
parse_geo_bounding_box,
// Made a random block of functions because we reached the maximum number of elements per alt
alt((parse_geo_radius, parse_geo_bounding_box, parse_geo_polygon)),
parse_in,
parse_not_in,
parse_condition,
@@ -597,9 +653,12 @@ impl std::fmt::Display for FilterCondition<'_> {
}
write!(f, " EXISTS")
}
FilterCondition::GeoLowerThan { point, radius } => {
FilterCondition::GeoLowerThan { point, radius, resolution: None } => {
write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius)
}
FilterCondition::GeoLowerThan { point, radius, resolution: Some(resolution) } => {
write!(f, "_geoRadius({}, {}, {}, {})", point[0], point[1], radius, resolution)
}
FilterCondition::GeoBoundingBox {
top_right_point: top_left_point,
bottom_left_point: bottom_right_point,
@@ -613,6 +672,13 @@ impl std::fmt::Display for FilterCondition<'_> {
bottom_right_point[1]
)
}
FilterCondition::GeoPolygon { points } => {
write!(f, "_geoPolygon([")?;
for point in points {
write!(f, "[{}, {}], ", point[0], point[1])?;
}
write!(f, "])")
}
}
}
}
@@ -776,12 +842,17 @@ pub mod tests {
insta::assert_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})");
insta::assert_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))");
insta::assert_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})");
insta::assert_snapshot!(p("_geoRadius(12,13,14,1000)"), @"_geoRadius({12}, {13}, {14}, {1000})");
// Test geo bounding box
insta::assert_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
insta::assert_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))");
insta::assert_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
// Test geo polygon
insta::assert_snapshot!(p("_geoPolygon([12, 13], [14, 15], [16, 17])"), @"_geoPolygon([[{12}, {13}], [{14}, {15}], [{16}, {17}], ])");
insta::assert_snapshot!(p("_geoPolygon([12, 13], [14, 15], [-1.2,2939.2], [1,1])"), @"_geoPolygon([[{12}, {13}], [{14}, {15}], [{-1.2}, {2939.2}], [{1}, {1}], ])");
// Test OR + AND
insta::assert_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
insta::assert_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
@@ -838,50 +909,80 @@ pub mod tests {
11:12 channel = 🐻 AND followers < 100
"###);
insta::assert_snapshot!(p("'OR'"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
insta::assert_snapshot!(p("'OR'"), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `\'OR\'`.
1:5 'OR'
"###);
");
insta::assert_snapshot!(p("OR"), @r###"
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
1:3 OR
"###);
insta::assert_snapshot!(p("channel Ponce"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
insta::assert_snapshot!(p("channel Ponce"), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `channel Ponce`.
1:14 channel Ponce
"###);
");
insta::assert_snapshot!(p("channel = Ponce OR"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
insta::assert_snapshot!(p("channel = Ponce OR"), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` but instead got nothing.
19:19 channel = Ponce OR
"###);
");
insta::assert_snapshot!(p("_geoRadius"), @r###"
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
1:11 _geoRadius
"###);
insta::assert_snapshot!(p("_geoRadius"), @r"
The `_geoRadius` filter must be in the form: `_geoRadius(latitude, longitude, radius, optionalResolution)`.
11:11 _geoRadius
");
insta::assert_snapshot!(p("_geoRadius = 12"), @r###"
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
1:16 _geoRadius = 12
"###);
insta::assert_snapshot!(p("_geoRadius = 12"), @r"
The `_geoRadius` filter must be in the form: `_geoRadius(latitude, longitude, radius, optionalResolution)`.
11:16 _geoRadius = 12
");
insta::assert_snapshot!(p("_geoBoundingBox"), @r###"
insta::assert_snapshot!(p("_geoBoundingBox"), @r"
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
1:16 _geoBoundingBox
"###);
16:16 _geoBoundingBox
");
insta::assert_snapshot!(p("_geoBoundingBox = 12"), @r###"
insta::assert_snapshot!(p("_geoBoundingBox = 12"), @r"
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
1:21 _geoBoundingBox = 12
"###);
16:21 _geoBoundingBox = 12
");
insta::assert_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###"
insta::assert_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r"
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
1:26 _geoBoundingBox(1.0, 1.0)
"###);
17:26 _geoBoundingBox(1.0, 1.0)
");
insta::assert_snapshot!(p("_geoPolygon([1,2,3])"), @r"
The `_geoPolygon` filter expects at least 3 points but only 1 were specified
18:19 _geoPolygon([1,2,3])
");
insta::assert_snapshot!(p("_geoPolygon(1,2,3)"), @r"
The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.
13:19 _geoPolygon(1,2,3)
");
insta::assert_snapshot!(p("_geoPolygon([1,2],[1,2],[1,2,3])"), @r"
Was expecting 2 coordinates but instead found 3.
26:27 _geoPolygon([1,2],[1,2],[1,2,3])
");
insta::assert_snapshot!(p("_geoPolygon([1,2],[1,2,3])"), @r"
The `_geoPolygon` filter expects at least 3 points but only 2 were specified
24:25 _geoPolygon([1,2],[1,2,3])
");
insta::assert_snapshot!(p("_geoPolygon(1)"), @r"
The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.
13:15 _geoPolygon(1)
");
insta::assert_snapshot!(p("_geoPolygon([1,2)"), @r"
The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.
17:18 _geoPolygon([1,2)
");
insta::assert_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
@@ -938,15 +1039,15 @@ pub mod tests {
34:35 channel = mv OR followers >= 1000)
"###);
insta::assert_snapshot!(p("colour NOT EXIST"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
insta::assert_snapshot!(p("colour NOT EXIST"), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `colour NOT EXIST`.
1:17 colour NOT EXIST
"###);
");
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `subscribers 100 TO1000`.
1:23 subscribers 100 TO1000
"###);
");
insta::assert_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###"
Found unexpected characters at the end of the filter: `ORdog != \'bernese mountain\'`. You probably forgot an `OR` or an `AND` rule.
@@ -1071,38 +1172,38 @@ pub mod tests {
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
"###);
insta::assert_snapshot!(p(r#"value NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
insta::assert_snapshot!(p(r#"value NULL"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value NULL`.
1:11 value NULL
"###);
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
");
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value NOT NULL`.
1:15 value NOT NULL
"###);
insta::assert_snapshot!(p(r#"value EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
");
insta::assert_snapshot!(p(r#"value EMPTY"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value EMPTY`.
1:12 value EMPTY
"###);
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
");
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value NOT EMPTY`.
1:16 value NOT EMPTY
"###);
insta::assert_snapshot!(p(r#"value IS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
");
insta::assert_snapshot!(p(r#"value IS"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS`.
1:9 value IS
"###);
insta::assert_snapshot!(p(r#"value IS NOT"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
");
insta::assert_snapshot!(p(r#"value IS NOT"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS NOT`.
1:13 value IS NOT
"###);
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
");
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS EXISTS`.
1:16 value IS EXISTS
"###);
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
");
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS NOT EXISTS`.
1:20 value IS NOT EXISTS
"###);
");
}
#[test]

View File

@@ -23,6 +23,7 @@ dump = { path = "../dump" }
enum-iterator = "2.1.0"
file-store = { path = "../file-store" }
flate2 = "1.1.2"
hashbrown = "0.15.4"
indexmap = "2.9.0"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
@@ -45,6 +46,8 @@ tracing = "0.1.41"
ureq = "2.12.1"
uuid = { version = "1.17.0", features = ["serde", "v4"] }
backoff = "0.4.0"
itertools = "0.14.0"
tokio = { version = "1.47.1", features = ["full"] }
[dev-dependencies]
big_s = "1.0.2"

View File

@@ -234,6 +234,9 @@ impl<'a> Dump<'a> {
}
}
KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from },
KindDump::NetworkTopologyChange { network: new_network, origin } => {
KindWithContent::NetworkTopologyChange { network: new_network, origin }
}
},
};

View File

@@ -45,6 +45,7 @@ impl From<DateField> for Code {
}
}
#[allow(clippy::large_enum_variant)]
#[derive(Error, Debug)]
pub enum Error {
#[error("{1}")]

View File

@@ -1,6 +1,6 @@
use std::sync::{Arc, RwLock};
use meilisearch_types::enterprise_edition::network::Network;
use meilisearch_types::enterprise_edition::network::DbNetwork;
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls};
@@ -24,7 +24,7 @@ mod db_keys {
pub(crate) struct FeatureData {
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
network: Arc<RwLock<Network>>,
network: Arc<RwLock<DbNetwork>>,
}
#[derive(Debug, Clone, Copy)]
@@ -197,8 +197,8 @@ impl FeatureData {
}));
// Once this is stabilized, network should be stored along with webhooks in index-scheduler's persisted database
let network_db = runtime_features_db.remap_data_type::<SerdeJson<Network>>();
let network: Network = network_db.get(wtxn, db_keys::NETWORK)?.unwrap_or_default();
let network_db = runtime_features_db.remap_data_type::<SerdeJson<DbNetwork>>();
let network: DbNetwork = network_db.get(wtxn, db_keys::NETWORK)?.unwrap_or_default();
Ok(Self {
persisted: runtime_features_db,
@@ -234,8 +234,8 @@ impl FeatureData {
RoFeatures::new(self)
}
pub fn put_network(&self, mut wtxn: RwTxn, new_network: Network) -> Result<()> {
self.persisted.remap_data_type::<SerdeJson<Network>>().put(
pub fn put_network(&self, mut wtxn: RwTxn, new_network: DbNetwork) -> Result<()> {
self.persisted.remap_data_type::<SerdeJson<DbNetwork>>().put(
&mut wtxn,
db_keys::NETWORK,
&new_network,
@@ -247,7 +247,7 @@ impl FeatureData {
Ok(())
}
pub fn network(&self) -> Network {
Network::clone(&*self.network.read().unwrap())
pub fn network(&self) -> DbNetwork {
DbNetwork::clone(&*self.network.read().unwrap())
}
}

View File

@@ -36,6 +36,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
run_loop_iteration: _,
embedders: _,
chat_settings: _,
runtime: _,
} = scheduler;
let rtxn = env.read_txn().unwrap();
@@ -317,6 +318,9 @@ fn snapshot_details(d: &Details) -> String {
Details::UpgradeDatabase { from, to } => {
format!("{{ from: {from:?}, to: {to:?} }}")
}
Details::NetworkTopologyChange { network: new_network } => {
format!("{{ new_network: {new_network:?} }}")
}
}
}

View File

@@ -54,7 +54,7 @@ pub use features::RoFeatures;
use flate2::bufread::GzEncoder;
use flate2::Compression;
use meilisearch_types::batches::Batch;
use meilisearch_types::enterprise_edition::network::Network;
use meilisearch_types::enterprise_edition::network::DbNetwork;
use meilisearch_types::features::{
ChatCompletionSettings, InstanceTogglableFeatures, RuntimeTogglableFeatures,
};
@@ -216,6 +216,8 @@ pub struct IndexScheduler {
/// A counter that is incremented before every call to [`tick`](IndexScheduler::tick)
#[cfg(test)]
run_loop_iteration: Arc<RwLock<usize>>,
runtime: Option<tokio::runtime::Handle>,
}
impl IndexScheduler {
@@ -242,6 +244,7 @@ impl IndexScheduler {
run_loop_iteration: self.run_loop_iteration.clone(),
features: self.features.clone(),
chat_settings: self.chat_settings,
runtime: self.runtime.clone(),
}
}
@@ -260,6 +263,7 @@ impl IndexScheduler {
options: IndexSchedulerOptions,
auth_env: Env<WithoutTls>,
from_db_version: (u32, u32, u32),
runtime: Option<tokio::runtime::Handle>,
#[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>,
#[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>,
) -> Result<Self> {
@@ -341,6 +345,7 @@ impl IndexScheduler {
run_loop_iteration: Arc::new(RwLock::new(0)),
features,
chat_settings,
runtime,
};
this.run();
@@ -892,13 +897,13 @@ impl IndexScheduler {
Ok(())
}
pub fn put_network(&self, network: Network) -> Result<()> {
pub fn put_network(&self, network: DbNetwork) -> Result<()> {
let wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
self.features.put_network(wtxn, network)?;
Ok(())
}
pub fn network(&self) -> Network {
pub fn network(&self) -> DbNetwork {
self.features.network()
}
@@ -927,9 +932,10 @@ impl IndexScheduler {
pub fn embedders(
&self,
index_uid: String,
index_uid: &str,
embedding_configs: Vec<IndexEmbeddingConfig>,
) -> Result<RuntimeEmbedders> {
let err = |err| Error::from_milli(err, Some(index_uid.to_owned()));
let res: Result<_> = embedding_configs
.into_iter()
.map(
@@ -942,7 +948,7 @@ impl IndexScheduler {
let document_template = prompt
.try_into()
.map_err(meilisearch_types::milli::Error::from)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
.map_err(err)?;
let fragments = fragments
.into_inner()
@@ -972,9 +978,8 @@ impl IndexScheduler {
let embedder = Arc::new(
Embedder::new(embedder_options.clone(), self.scheduler.embedding_cache_cap)
.map_err(meilisearch_types::milli::vector::Error::from)
.map_err(|err| {
Error::from_milli(err.into(), Some(index_uid.clone()))
})?,
.map_err(milli::Error::from)
.map_err(err)?,
);
{
let mut embedders = self.embedders.write().unwrap();

View File

@@ -73,6 +73,7 @@ impl From<KindWithContent> for AutobatchKind {
| KindWithContent::DumpCreation { .. }
| KindWithContent::Export { .. }
| KindWithContent::UpgradeDatabase { .. }
| KindWithContent::NetworkTopologyChange { .. }
| KindWithContent::SnapshotCreation => {
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
}
@@ -287,7 +288,7 @@ impl BatchKind {
};
match (self, autobatch_kind) {
// We don't batch any of these operations
// We don't batch any of these operations
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break((this, BatchStopReason::TaskCannotBeBatched { kind, id })),
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {

View File

@@ -55,6 +55,9 @@ pub(crate) enum Batch {
UpgradeDatabase {
tasks: Vec<Task>,
},
NetworkTopologyChanges {
tasks: Vec<Task>,
},
}
#[derive(Debug)]
@@ -116,7 +119,8 @@ impl Batch {
Batch::SnapshotCreation(tasks)
| Batch::TaskDeletions(tasks)
| Batch::UpgradeDatabase { tasks }
| Batch::IndexDeletion { tasks, .. } => {
| Batch::IndexDeletion { tasks, .. }
| Batch::NetworkTopologyChanges { tasks } => {
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
}
Batch::IndexOperation { op, .. } => match op {
@@ -151,6 +155,7 @@ impl Batch {
| Dump(_)
| Export { .. }
| UpgradeDatabase { .. }
| NetworkTopologyChanges { .. }
| IndexSwap { .. } => None,
IndexOperation { op, .. } => Some(op.index_uid()),
IndexCreation { index_uid, .. }
@@ -176,6 +181,7 @@ impl fmt::Display for Batch {
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
Batch::Export { .. } => f.write_str("Export")?,
Batch::NetworkTopologyChanges { .. } => f.write_str("NetworkTopologyChange")?,
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
};
match index_uid {
@@ -545,7 +551,18 @@ impl IndexScheduler {
return Ok(Some((Batch::Dump(task), current_batch)));
}
// 6. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
// 6. We batch the network changes.
let to_network = self.queue.tasks.get_kind(rtxn, Kind::NetworkTopologyChange)? & enqueued;
if !to_network.is_empty() {
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_network)?;
current_batch.processing(&mut tasks);
current_batch.reason(BatchStopReason::TaskKindCannotBeBatched {
kind: Kind::NetworkTopologyChange,
});
return Ok(Some((Batch::NetworkTopologyChanges { tasks }, current_batch)));
}
// 7. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
let mut task =
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;

View File

@@ -0,0 +1,6 @@
// Copyright © 2025 Meilisearch Some Rights Reserved
// This file is part of Meilisearch Enterprise Edition (EE).
// Use of this source code is governed by the Business Source License 1.1,
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
mod process_network;

View File

@@ -0,0 +1,362 @@
// Copyright © 2025 Meilisearch Some Rights Reserved
// This file is part of Meilisearch Enterprise Edition (EE).
// Use of this source code is governed by the Business Source License 1.1,
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
use std::collections::BTreeMap;
use std::time::Duration;
use bumpalo::Bump;
use itertools::{EitherOrBoth, Itertools};
use meilisearch_types::enterprise_edition::network::{DbNetwork, DbRemote, Network, Remote};
use meilisearch_types::milli::documents::PrimaryKey;
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
use meilisearch_types::milli::update::new::indexer;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::{self};
use meilisearch_types::tasks::{KindWithContent, Status, Task};
use roaring::RoaringBitmap;
use crate::scheduler::process_export::{ExportContext, ExportOptions, TargetInstance};
use crate::{Error, IndexScheduler};
impl IndexScheduler {
pub(crate) fn process_network_changes(
&self,
progress: Progress,
mut tasks: Vec<Task>,
) -> crate::Result<Vec<Task>> {
let old_network = self.network();
let mut current_network = Some(old_network.clone());
for task in &tasks {
let KindWithContent::NetworkTopologyChange { network, origin } = &task.kind else {
continue;
};
current_network = match (current_network, network) {
(None, None) => None,
(None, Some(network)) => Some(accumulate(DbNetwork::default(), network.clone())?),
(Some(current_network), None) => Some(current_network),
(Some(current_network), Some(new_network)) => {
Some(accumulate(current_network, new_network.clone())?)
}
};
}
'network: {
let mut new_network = current_network.unwrap_or_default();
if old_network == new_network {
// no change, exit
break 'network;
}
/// TODO: only do this if the task originates with an end-user
let must_replicate = old_network.sharding || new_network.sharding;
if !must_replicate {
self.put_network(new_network)?;
break 'network;
}
let must_stop_processing = &self.scheduler.must_stop_processing;
/// FIXME: make it mandatory for `self` to be part of the network
let old_this = old_network.local.as_deref();
/// FIXME: error here
let new_this = new_network.local.unwrap();
// in network replication, we need to tell old nodes that they are no longer part of the network.
// This is made difficult by "node aliasing": Meilisearch has no way of knowing if two nodes with different names
// or even different URLs actually refer to the same machine in two different versions of the network.
//
// This implementation ignores aliasing: a node is the same when it has the same name.
//
// To defeat aliasing, we iterate a first time to collect all deletions and additions, then we make sure to process the deletions
// first, rather than processing the tasks in the alphalexical order of remotes.
let mut node_deletions = Vec::new();
let mut node_additions = Vec::new();
for eob in old_network
.remotes
.iter()
.merge_join_by(new_network.remotes.iter(), |(left, _), (right, _)| left.cmp(right))
{
match eob {
EitherOrBoth::Both((to_update_name, _), (_, new_node)) => {
if to_update_name.as_str() == new_this {
continue; // skip `self`
}
node_additions.push((to_update_name, new_node));
}
EitherOrBoth::Left((to_delete_name, to_delete_node)) => {
if Some(to_delete_name.as_str()) == old_this {
continue; // skip `self`
}
node_deletions.push((to_delete_name, to_delete_node));
}
EitherOrBoth::Right((to_add_name, to_add_node)) => {
if to_add_name.as_str() == new_this {
continue; // skip `self`
}
node_additions.push((to_add_name, to_add_node));
}
}
}
let runtime = self.runtime.clone().unwrap();
let mut in_flight = Vec::new();
// process deletions
for (to_delete_name, to_delete) in node_deletions {
// set `self` to None so that this node is forgotten about
new_network.local = None;
in_flight.push(proxy_network(&runtime, to_delete.url.as_str(), &new_network)?);
}
runtime.block_on(async {
for task in in_flight.drain(..) {
// TODO: log and ignore errors during deletion
let res = task.await;
}
});
// process additions
for (to_add_name, to_add) in node_additions {
new_network.local = Some(to_add_name.clone());
in_flight.push(proxy_network(&runtime, to_add.url.as_str(), &new_network)?);
}
runtime.block_on(async {
for task in in_flight.drain(..) {
// TODO: handle errors during addition
let res = task.await;
}
});
// balance documents
new_network.local = Some(new_this);
self.balance_documents(&new_network, &progress, &must_stop_processing)?;
self.put_network(new_network)?;
}
for task in &mut tasks {
task.status = Status::Succeeded;
}
Ok(tasks)
}
fn balance_documents(
&self,
new_network: &DbNetwork,
progress: &Progress,
must_stop_processing: &crate::scheduler::MustStopProcessing,
) -> crate::Result<()> {
/// FIXME unwrap
let new_shards = new_network.shards().unwrap();
// TECHDEBT: this spawns a `ureq` agent additionally to `reqwest`. We probably want to harmonize all of this.
let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build();
let mut indexer_alloc = Bump::new();
// process by batches of 20MiB. Allow for compression? Don't forget about embeddings
let _: Vec<()> = self.try_for_each_index(|index_uid, index| -> crate::Result<()> {
indexer_alloc.reset();
let err = |err| Error::from_milli(err, Some(index_uid.to_string()));
let index_rtxn = index.read_txn()?;
let all_docids = index.external_documents_ids();
let mut documents_to_move_to: hashbrown::HashMap<String, RoaringBitmap> =
hashbrown::HashMap::new();
let mut documents_to_delete = RoaringBitmap::new();
for res in all_docids.iter(&index_rtxn)? {
let (external_docid, docid) = res?;
match new_shards.processing_shard(external_docid) {
Some(shard) if shard.is_own => continue,
Some(shard) => {
documents_to_move_to
.entry_ref(shard.name.as_str())
.or_default()
.insert(docid);
}
None => {
documents_to_delete.insert(docid);
}
}
}
let fields_ids_map = index.fields_ids_map(&index_rtxn)?;
for (remote, documents_to_move) in documents_to_move_to {
/// TODO: justify the unwrap
let remote = new_network.remotes.get(&remote).unwrap();
let target = TargetInstance {
base_url: &remote.url,
api_key: remote.write_api_key.as_deref(),
};
let options = ExportOptions {
index_uid,
payload_size: None,
override_settings: false,
extra_headers: &Default::default(),
};
let ctx = ExportContext {
index,
index_rtxn: &index_rtxn,
universe: &documents_to_move,
progress,
agent: &agent,
must_stop_processing,
};
self.export_one_index(target, options, ctx)?;
documents_to_delete |= documents_to_move;
}
if documents_to_delete.is_empty() {
return Ok(());
}
let mut new_fields_ids_map = fields_ids_map.clone();
// candidates not empty => index not empty => a primary key is set
let primary_key = index.primary_key(&index_rtxn)?.unwrap();
let primary_key = PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
.map_err(milli::Error::from)
.map_err(err)?;
let mut index_wtxn = index.write_txn()?;
let mut indexer = indexer::DocumentDeletion::new();
indexer.delete_documents_by_docids(documents_to_delete);
let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
let embedders = index
.embedding_configs()
.embedding_configs(&index_wtxn)
.map_err(milli::Error::from)
.map_err(err)?;
let embedders = self.embedders(index_uid, embedders)?;
let indexer_config = self.index_mapper.indexer_config();
let pool = &indexer_config.thread_pool;
indexer::index(
&mut index_wtxn,
index,
pool,
indexer_config.grenad_parameters(),
&fields_ids_map,
new_fields_ids_map,
None, // document deletion never changes primary key
&document_changes,
embedders,
&|| must_stop_processing.get(),
&progress,
&EmbedderStats::default(),
)
.map_err(err)?;
index_wtxn.commit()?;
Ok(())
})?;
Ok(())
}
}
fn proxy_network(
runtime: &tokio::runtime::Handle,
url: &str,
network: &DbNetwork,
) -> crate::Result<tokio::task::JoinHandle<()>> {
todo!()
}
fn accumulate(old_network: DbNetwork, new_network: Network) -> crate::Result<DbNetwork> {
let err = |err| Err(Error::from_milli(milli::Error::UserError(err), None));
let merged_local = match new_network.local {
Setting::Set(new_self) => Some(new_self),
Setting::Reset => None,
Setting::NotSet => old_network.local,
};
let merged_sharding = match new_network.sharding {
Setting::Set(new_sharding) => new_sharding,
Setting::Reset => false,
Setting::NotSet => old_network.sharding,
};
if merged_sharding && merged_local.is_none() {
return err(milli::UserError::NetworkShardingWithoutSelf);
}
let merged_remotes = match new_network.remotes {
Setting::Set(new_remotes) => {
let mut merged_remotes = BTreeMap::new();
for either_or_both in old_network
.remotes
.into_iter()
.merge_join_by(new_remotes.into_iter(), |left, right| left.0.cmp(&right.0))
{
match either_or_both {
EitherOrBoth::Both((name, old), (_, Some(new))) => {
let DbRemote {
url: old_url,
search_api_key: old_search_api_key,
write_api_key: old_write_api_key,
} = old;
let Remote {
url: new_url,
search_api_key: new_search_api_key,
write_api_key: new_write_api_key,
} = new;
let merged = DbRemote {
url: match new_url {
Setting::Set(new_url) => new_url,
Setting::Reset => {
return err(milli::UserError::NetworkMissingUrl(name))
}
Setting::NotSet => old_url,
},
search_api_key: match new_search_api_key {
Setting::Set(new_search_api_key) => Some(new_search_api_key),
Setting::Reset => None,
Setting::NotSet => old_search_api_key,
},
write_api_key: match new_write_api_key {
Setting::Set(new_write_api_key) => Some(new_write_api_key),
Setting::Reset => None,
Setting::NotSet => old_write_api_key,
},
};
merged_remotes.insert(name, merged);
}
EitherOrBoth::Both((_, _), (_, None)) | EitherOrBoth::Right((_, None)) => {}
EitherOrBoth::Left((name, node)) => {
merged_remotes.insert(name, node);
}
EitherOrBoth::Right((name, Some(node))) => {
let Some(url) = node.url.set() else {
return err(milli::UserError::NetworkMissingUrl(name));
};
let node = DbRemote {
url,
search_api_key: node.search_api_key.set(),
write_api_key: node.write_api_key.set(),
};
merged_remotes.insert(name, node);
}
}
}
merged_remotes
}
Setting::Reset => BTreeMap::new(),
Setting::NotSet => old_network.remotes,
};
Ok(DbNetwork { local: merged_local, remotes: merged_remotes, sharding: merged_sharding })
}

View File

@@ -2,6 +2,7 @@ mod autobatcher;
#[cfg(test)]
mod autobatcher_test;
mod create_batch;
mod enterprise_edition;
mod process_batch;
mod process_dump_creation;
mod process_export;

View File

@@ -135,6 +135,9 @@ impl IndexScheduler {
Batch::Dump(task) => self
.process_dump_creation(progress, task)
.map(|tasks| (tasks, ProcessBatchInfo::default())),
Batch::NetworkTopologyChanges { tasks } => self
.process_network_changes(progress, tasks)
.map(|tasks| (tasks, ProcessBatchInfo::default())),
Batch::IndexOperation { op, must_create_index } => {
let index_uid = op.index_uid().to_string();
let index = if must_create_index {

View File

@@ -16,6 +16,7 @@ use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOr
use meilisearch_types::milli::{self, obkv_to_json, Filter, InternalError};
use meilisearch_types::settings::{self, SecretPolicy};
use meilisearch_types::tasks::{DetailsExportIndexSettings, ExportIndexSettings};
use roaring::RoaringBitmap;
use serde::Deserialize;
use ureq::{json, Response};
@@ -50,6 +51,7 @@ impl IndexScheduler {
let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build();
let must_stop_processing = self.scheduler.must_stop_processing.clone();
for (i, (_pattern, uid, export_settings)) in indexes.iter().enumerate() {
let err = |err| Error::from_milli(err, Some(uid.to_string()));
if must_stop_processing.get() {
return Err(Error::AbortedTask);
}
@@ -61,104 +63,31 @@ impl IndexScheduler {
));
let ExportIndexSettings { filter, override_settings } = export_settings;
let index = self.index(uid)?;
let index_rtxn = index.read_txn()?;
let bearer = api_key.map(|api_key| format!("Bearer {api_key}"));
// First, check if the index already exists
let url = format!("{base_url}/indexes/{uid}");
let response = retry(&must_stop_processing, || {
let mut request = agent.get(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(Default::default()).map_err(into_backoff_error)
});
let index_exists = match response {
Ok(response) => response.status() == 200,
Err(Error::FromRemoteWhenExporting { code, .. }) if code == "index_not_found" => {
false
}
Err(e) => return Err(e),
};
let primary_key = index
.primary_key(&index_rtxn)
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
// Create the index
if !index_exists {
let url = format!("{base_url}/indexes");
retry(&must_stop_processing, || {
let mut request = agent.post(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "uid": uid, "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
})?;
}
// Patch the index primary key
if index_exists && *override_settings {
let url = format!("{base_url}/indexes/{uid}");
retry(&must_stop_processing, || {
let mut request = agent.patch(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
})?;
}
// Send the index settings
if !index_exists || *override_settings {
let mut settings =
settings::settings(&index, &index_rtxn, SecretPolicy::RevealSecrets)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
// Remove the experimental chat setting if not enabled
if self.features().check_chat_completions("exporting chat settings").is_err() {
settings.chat = Setting::NotSet;
}
// Retry logic for sending settings
let url = format!("{base_url}/indexes/{uid}/settings");
retry(&must_stop_processing, || {
let mut request = agent.patch(&url);
if let Some(bearer) = bearer.as_ref() {
request = request.set("Authorization", bearer);
}
request.send_json(settings.clone()).map_err(into_backoff_error)
})?;
}
let filter = filter
.as_ref()
.map(Filter::from_json)
.transpose()
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
.flatten();
let filter_universe = filter
.map(|f| f.evaluate(&index_rtxn, &index))
.transpose()
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let whole_universe = index
.documents_ids(&index_rtxn)
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
let filter = filter.as_ref().map(Filter::from_json).transpose().map_err(err)?.flatten();
let filter_universe =
filter.map(|f| f.evaluate(&index_rtxn, &index)).transpose().map_err(err)?;
let whole_universe =
index.documents_ids(&index_rtxn).map_err(milli::Error::from).map_err(err)?;
let universe = filter_universe.unwrap_or(whole_universe);
let fields_ids_map = index.fields_ids_map(&index_rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
// We don't need to keep this one alive as we will
// spawn many threads to process the documents
drop(index_rtxn);
let total_documents = universe.len() as u32;
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
progress.update_progress(progress_step);
let target = TargetInstance { base_url, api_key };
let ctx = ExportContext {
index: &index,
index_rtxn: &index_rtxn,
universe: &universe,
progress: &progress,
agent: &agent,
must_stop_processing: &must_stop_processing,
};
let options = ExportOptions {
index_uid: uid,
payload_size,
override_settings: *override_settings,
extra_headers: &Default::default(),
};
let total_documents = self.export_one_index(target, options, ctx)?;
output.insert(
IndexUidPattern::new_unchecked(uid.clone()),
@@ -167,155 +96,217 @@ impl IndexScheduler {
matched_documents: Some(total_documents as u64),
},
);
let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(20 * 1024 * 1024); // defaults to 20 MiB
let documents_url = format!("{base_url}/indexes/{uid}/documents");
let results = request_threads()
.broadcast(|ctx| {
let index_rtxn = index
.read_txn()
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
let mut buffer = Vec::new();
let mut tmp_buffer = Vec::new();
let mut compressed_buffer = Vec::new();
for (i, docid) in universe.iter().enumerate() {
if i % ctx.num_threads() != ctx.index() {
continue;
}
let document = index
.document(&index_rtxn, docid)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let mut document = obkv_to_json(&all_fields, &fields_ids_map, document)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
// TODO definitely factorize this code
'inject_vectors: {
let embeddings = index
.embeddings(&index_rtxn, docid)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
if embeddings.is_empty() {
break 'inject_vectors;
}
let vectors = document
.entry(RESERVED_VECTORS_FIELD_NAME)
.or_insert(serde_json::Value::Object(Default::default()));
let serde_json::Value::Object(vectors) = vectors else {
return Err(Error::from_milli(
milli::Error::UserError(
milli::UserError::InvalidVectorsMapType {
document_id: {
if let Ok(Some(Ok(index))) = index
.external_id_of(
&index_rtxn,
std::iter::once(docid),
)
.map(|it| it.into_iter().next())
{
index
} else {
format!("internal docid={docid}")
}
},
value: vectors.clone(),
},
),
Some(uid.to_string()),
));
};
for (
embedder_name,
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
) in embeddings
{
let embeddings = ExplicitVectors {
embeddings: Some(
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
),
regenerate: regenerate &&
// Meilisearch does not handle well dumps with fragments, because as the fragments
// are marked as user-provided,
// all embeddings would be regenerated on any settings change or document update.
// To prevent this, we mark embeddings has non regenerate in this case.
!has_fragments,
};
vectors.insert(
embedder_name,
serde_json::to_value(embeddings).unwrap(),
);
}
}
tmp_buffer.clear();
serde_json::to_writer(&mut tmp_buffer, &document)
.map_err(milli::InternalError::from)
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
// Make sure we put at least one document in the buffer even
// though we might go above the buffer limit before sending
if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit {
// We compress the documents before sending them
let mut encoder =
GzEncoder::new(&mut compressed_buffer, Compression::default());
encoder
.write_all(&buffer)
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
encoder
.finish()
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
retry(&must_stop_processing, || {
let mut request = agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
request = request.set("Content-Encoding", "gzip");
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(&compressed_buffer).map_err(into_backoff_error)
})?;
buffer.clear();
compressed_buffer.clear();
}
buffer.extend_from_slice(&tmp_buffer);
if i > 0 && i % 100 == 0 {
step.fetch_add(100, atomic::Ordering::Relaxed);
}
}
retry(&must_stop_processing, || {
let mut request = agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(&buffer).map_err(into_backoff_error)
})?;
Ok(())
})
.map_err(|e| {
Error::from_milli(
milli::Error::InternalError(InternalError::PanicInThreadPool(e)),
Some(uid.to_string()),
)
})?;
for result in results {
result?;
}
step.store(total_documents, atomic::Ordering::Relaxed);
}
Ok(output)
}
pub(super) fn export_one_index(
&self,
target: TargetInstance<'_>,
options: ExportOptions<'_>,
ctx: ExportContext<'_>,
) -> Result<u64, Error> {
let err = |err| Error::from_milli(err, Some(options.index_uid.to_string()));
let bearer = target.api_key.map(|api_key| format!("Bearer {api_key}"));
let url = format!(
"{base_url}/indexes/{index_uid}",
base_url = target.base_url,
index_uid = options.index_uid
);
let response = retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.get(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(Default::default()).map_err(into_backoff_error)
});
let index_exists = match response {
Ok(response) => response.status() == 200,
Err(Error::FromRemoteWhenExporting { code, .. }) if code == "index_not_found" => false,
Err(e) => return Err(e),
};
let primary_key =
ctx.index.primary_key(&ctx.index_rtxn).map_err(milli::Error::from).map_err(err)?;
if !index_exists {
let url = format!("{base_url}/indexes", base_url = target.base_url);
retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.post(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "uid": options.index_uid, "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
})?;
}
if index_exists && options.override_settings {
retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.patch(&url);
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
let index_param = json!({ "primaryKey": primary_key });
request.send_json(&index_param).map_err(into_backoff_error)
})?;
}
if !index_exists || options.override_settings {
let mut settings =
settings::settings(&ctx.index, &ctx.index_rtxn, SecretPolicy::RevealSecrets)
.map_err(err)?;
// Remove the experimental chat setting if not enabled
if self.features().check_chat_completions("exporting chat settings").is_err() {
settings.chat = Setting::NotSet;
}
// Retry logic for sending settings
let url = format!(
"{base_url}/indexes/{index_uid}/settings",
base_url = target.base_url,
index_uid = options.index_uid
);
retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.patch(&url);
if let Some(bearer) = bearer.as_ref() {
request = request.set("Authorization", bearer);
}
request.send_json(settings.clone()).map_err(into_backoff_error)
})?;
}
let fields_ids_map = ctx.index.fields_ids_map(&ctx.index_rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let total_documents = ctx.universe.len() as u32;
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
ctx.progress.update_progress(progress_step);
let limit = options.payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(20 * 1024 * 1024);
let documents_url = format!(
"{base_url}/indexes/{index_uid}/documents",
base_url = target.base_url,
index_uid = options.index_uid
);
let results = request_threads()
.broadcast(|broadcast| {
let index_rtxn = ctx.index.read_txn().map_err(milli::Error::from).map_err(err)?;
let mut buffer = Vec::new();
let mut tmp_buffer = Vec::new();
let mut compressed_buffer = Vec::new();
for (i, docid) in ctx.universe.iter().enumerate() {
if i % broadcast.num_threads() != broadcast.index() {
continue;
}
let document = ctx.index.document(&index_rtxn, docid).map_err(err)?;
let mut document =
obkv_to_json(&all_fields, &fields_ids_map, document).map_err(err)?;
// TODO definitely factorize this code
'inject_vectors: {
let embeddings = ctx.index.embeddings(&index_rtxn, docid).map_err(err)?;
if embeddings.is_empty() {
break 'inject_vectors;
}
let vectors = document
.entry(RESERVED_VECTORS_FIELD_NAME)
.or_insert(serde_json::Value::Object(Default::default()));
let serde_json::Value::Object(vectors) = vectors else {
return Err(err(milli::Error::UserError(
milli::UserError::InvalidVectorsMapType {
document_id: {
if let Ok(Some(Ok(index))) = ctx
.index
.external_id_of(&index_rtxn, std::iter::once(docid))
.map(|it| it.into_iter().next())
{
index
} else {
format!("internal docid={docid}")
}
},
value: vectors.clone(),
},
)));
};
for (
embedder_name,
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
) in embeddings
{
let embeddings = ExplicitVectors {
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
embeddings,
)),
regenerate: regenerate &&
// Meilisearch does not handle well dumps with fragments, because as the fragments
// are marked as user-provided,
// all embeddings would be regenerated on any settings change or document update.
// To prevent this, we mark embeddings has non regenerate in this case.
!has_fragments,
};
vectors
.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
}
}
tmp_buffer.clear();
serde_json::to_writer(&mut tmp_buffer, &document)
.map_err(milli::InternalError::from)
.map_err(milli::Error::from)
.map_err(err)?;
// Make sure we put at least one document in the buffer even
// though we might go above the buffer limit before sending
if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit {
// We compress the documents before sending them
let mut encoder =
GzEncoder::new(&mut compressed_buffer, Compression::default());
encoder.write_all(&buffer).map_err(milli::Error::from).map_err(err)?;
encoder.finish().map_err(milli::Error::from).map_err(err)?;
retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
request = request.set("Content-Encoding", "gzip");
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(&compressed_buffer).map_err(into_backoff_error)
})?;
buffer.clear();
compressed_buffer.clear();
}
buffer.extend_from_slice(&tmp_buffer);
if i > 0 && i % 100 == 0 {
step.fetch_add(100, atomic::Ordering::Relaxed);
}
}
retry(ctx.must_stop_processing, || {
let mut request = ctx.agent.post(&documents_url);
request = request.set("Content-Type", "application/x-ndjson");
if let Some(bearer) = &bearer {
request = request.set("Authorization", bearer);
}
request.send_bytes(&buffer).map_err(into_backoff_error)
})?;
Ok(())
})
.map_err(|e| err(milli::Error::InternalError(InternalError::PanicInThreadPool(e))))?;
for result in results {
result?;
}
step.store(total_documents, atomic::Ordering::Relaxed);
Ok(total_documents as u64)
}
}
fn retry<F>(must_stop_processing: &MustStopProcessing, send_request: F) -> Result<ureq::Response>
@@ -374,4 +365,27 @@ fn ureq_error_into_error(error: ureq::Error) -> Error {
}
}
// export_one_index arguments
pub(super) struct TargetInstance<'a> {
pub(super) base_url: &'a str,
pub(super) api_key: Option<&'a str>,
}
pub(super) struct ExportOptions<'a> {
pub(super) index_uid: &'a str,
pub(super) payload_size: Option<&'a Byte>,
pub(super) override_settings: bool,
pub(super) extra_headers: &'a hashbrown::HashMap<String, String>,
}
pub(super) struct ExportContext<'a> {
pub(super) index: &'a meilisearch_types::milli::Index,
pub(super) index_rtxn: &'a milli::heed::RoTxn<'a>,
pub(super) universe: &'a RoaringBitmap,
pub(super) progress: &'a Progress,
pub(super) agent: &'a ureq::Agent,
pub(super) must_stop_processing: &'a MustStopProcessing,
}
// progress related
enum ExportIndex {}

View File

@@ -97,7 +97,7 @@ impl IndexScheduler {
.embedding_configs()
.embedding_configs(index_wtxn)
.map_err(|e| Error::from_milli(e.into(), Some(index_uid.clone())))?;
let embedders = self.embedders(index_uid.clone(), embedders)?;
let embedders = self.embedders(&index_uid, embedders)?;
for operation in operations {
match operation {
DocumentOperation::Replace(_content_uuid) => {
@@ -284,7 +284,7 @@ impl IndexScheduler {
.embedding_configs()
.embedding_configs(index_wtxn)
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
let embedders = self.embedders(index_uid.clone(), embedders)?;
let embedders = self.embedders(&index_uid, embedders)?;
progress.update_progress(DocumentEditionProgress::Indexing);
congestion = Some(
@@ -434,7 +434,7 @@ impl IndexScheduler {
.embedding_configs()
.embedding_configs(index_wtxn)
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
let embedders = self.embedders(index_uid.clone(), embedders)?;
let embedders = self.embedders(&index_uid, embedders)?;
progress.update_progress(DocumentDeletionProgress::Indexing);
congestion = Some(

View File

@@ -722,7 +722,7 @@ fn basic_get_stats() {
let kind = index_creation_task("whalo", "fish");
let _task = index_scheduler.register(kind, None, false).unwrap();
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
{
"indexes": {
"catto": 1,
@@ -746,6 +746,7 @@ fn basic_get_stats() {
"indexDeletion": 0,
"indexSwap": 0,
"indexUpdate": 0,
"networkTopologyChange": 0,
"settingsUpdate": 0,
"snapshotCreation": 0,
"taskCancelation": 0,
@@ -753,7 +754,7 @@ fn basic_get_stats() {
"upgradeDatabase": 0
}
}
"#);
"###);
handle.advance_till([Start, BatchCreated]);
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r#"

View File

@@ -121,7 +121,7 @@ fn import_vectors() {
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
let simple_hf_name = name.clone();
let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
let configs = index_scheduler.embedders("doggos", configs).unwrap();
let hf_runtime = configs.get(&simple_hf_name).unwrap();
let hf_embedder = &hf_runtime.embedder;
let beagle_embed = hf_embedder

View File

@@ -126,7 +126,7 @@ impl IndexScheduler {
std::fs::create_dir_all(&options.auth_path).unwrap();
let auth_env = open_auth_store_env(&options.auth_path).unwrap();
let index_scheduler =
Self::new(options, auth_env, version, sender, planned_failures).unwrap();
Self::new(options, auth_env, version, None, sender, planned_failures).unwrap();
// To be 100% consistent between all test we're going to start the scheduler right now
// and ensure it's in the expected starting state.

View File

@@ -285,6 +285,7 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
| K::DumpCreation { .. }
| K::Export { .. }
| K::UpgradeDatabase { .. }
| K::NetworkTopologyChange { .. }
| K::SnapshotCreation => (),
};
if let Some(Details::IndexSwap { swaps }) = &mut task.details {
@@ -618,6 +619,9 @@ impl crate::IndexScheduler {
Details::UpgradeDatabase { from: _, to: _ } => {
assert_eq!(kind.as_kind(), Kind::UpgradeDatabase);
}
Details::NetworkTopologyChange { .. } => {
assert_eq!(kind.as_kind(), Kind::NetworkTopologyChange);
}
}
}

View File

@@ -271,9 +271,10 @@ macro_rules! json_string {
#[cfg(test)]
mod tests {
use uuid::Uuid;
use crate as meili_snap;
use crate::UUID_IN_MESSAGE_RE;
use uuid::Uuid;
#[test]
fn snap() {

View File

@@ -5,31 +5,85 @@
use std::collections::BTreeMap;
use milli::update::new::indexer::enterprise_edition::sharding::Shards;
use deserr::Deserr;
use milli::update::new::indexer::enterprise_edition::sharding::{Shard, Shards};
use milli::update::Setting;
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use crate::deserr::DeserrJsonError;
use crate::error::deserr_codes::{
InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkSharding,
InvalidNetworkUrl, InvalidNetworkWriteApiKey,
};
#[derive(Clone, Debug, Deserr, ToSchema, Serialize, Deserialize, PartialEq, Eq)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct Network {
#[schema(value_type = Option<BTreeMap<String, Remote>>, example = json!("http://localhost:7700"))]
#[deserr(default, error = DeserrJsonError<InvalidNetworkRemotes>)]
#[serde(default)]
pub remotes: Setting<BTreeMap<String, Option<Remote>>>,
#[schema(value_type = Option<String>, example = json!("ms-00"), rename = "self")]
#[serde(default, rename = "self")]
#[deserr(default, rename = "self", error = DeserrJsonError<InvalidNetworkSelf>)]
pub local: Setting<String>,
#[schema(value_type = Option<bool>, example = json!(true))]
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidNetworkSharding>)]
pub sharding: Setting<bool>,
}
#[derive(Clone, Debug, Deserr, ToSchema, Serialize, Deserialize, PartialEq, Eq)]
#[deserr(error = DeserrJsonError<InvalidNetworkRemotes>, rename_all = camelCase, deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct Remote {
#[schema(value_type = Option<String>, example = json!({
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset, write_api_key: Setting::Reset },
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()), write_api_key: Setting::Set("bar".into()) },
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()), write_api_key: Setting::Set("foo".into()) },
}))]
#[deserr(default, error = DeserrJsonError<InvalidNetworkUrl>)]
#[serde(default)]
pub url: Setting<String>,
#[schema(value_type = Option<String>, example = json!("XWnBI8QHUc-4IlqbKPLUDuhftNq19mQtjc6JvmivzJU"))]
#[deserr(default, error = DeserrJsonError<InvalidNetworkSearchApiKey>)]
#[serde(default)]
pub search_api_key: Setting<String>,
#[schema(value_type = Option<String>, example = json!("XWnBI8QHUc-4IlqbKPLUDuhftNq19mQtjc6JvmivzJU"))]
#[deserr(default, error = DeserrJsonError<InvalidNetworkWriteApiKey>)]
#[serde(default)]
pub write_api_key: Setting<String>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]
pub struct Network {
pub struct DbNetwork {
#[serde(default, rename = "self")]
pub local: Option<String>,
#[serde(default)]
pub remotes: BTreeMap<String, Remote>,
pub remotes: BTreeMap<String, DbRemote>,
#[serde(default)]
pub sharding: bool,
}
impl Network {
impl DbNetwork {
pub fn shards(&self) -> Option<Shards> {
if self.sharding {
let this = self.local.as_deref().expect("Inconsistent `sharding` and `self`");
let others = self
.remotes
.keys()
.filter(|name| name.as_str() != this)
.map(|name| name.to_owned())
.collect();
Some(Shards { own: vec![this.to_owned()], others })
let this = self.local.as_deref();
Some(Shards(
self.remotes
.keys()
.map(|name| Shard {
is_own: Some(name.as_str()) == this,
name: name.to_owned(),
})
.collect(),
))
} else {
None
}
@@ -38,7 +92,7 @@ impl Network {
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct Remote {
pub struct DbRemote {
pub url: String,
#[serde(default)]
pub search_api_key: Option<String>,

View File

@@ -5,6 +5,7 @@ use actix_web::{self as aweb, HttpResponseBuilder};
use aweb::http::header;
use aweb::rt::task::JoinError;
use convert_case::Casing;
use milli::cellulite;
use milli::heed::{Error as HeedError, MdbError};
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
@@ -239,6 +240,7 @@ InconsistentDocumentChangeHeaders , InvalidRequest , BAD_REQU
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentSort , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeojsonField , InvalidRequest , BAD_REQUEST ;
InvalidHeaderValue , InvalidRequest , BAD_REQUEST ;
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
@@ -501,7 +503,9 @@ impl ErrorCode for milli::Error {
Code::InvalidFacetSearchFacetName
}
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
UserError::InvalidGeoField { .. } | UserError::GeoJsonError(_) => {
Code::InvalidDocumentGeoField
}
UserError::InvalidVectorDimensions { .. }
| UserError::InvalidIndexingVectorDimensions { .. } => {
Code::InvalidVectorDimensions
@@ -525,6 +529,19 @@ impl ErrorCode for milli::Error {
| UserError::DocumentEditionCompilationError(_) => {
Code::EditDocumentsByFunctionError
}
UserError::NetworkShardingWithoutSelf => Code::InvalidNetworkSharding,
UserError::NetworkMissingUrl(_) => Code::MissingNetworkUrl,
UserError::CelluliteError(err) => match err {
cellulite::Error::BuildCanceled
| cellulite::Error::VersionMismatchOnBuild(_)
| cellulite::Error::DatabaseDoesntExists
| cellulite::Error::Heed(_)
| cellulite::Error::InvalidGeometry(_)
| cellulite::Error::InternalDocIdMissing(_, _)
| cellulite::Error::CannotConvertLineToCell(_, _, _) => Code::Internal,
cellulite::Error::InvalidGeoJson(_) => Code::InvalidDocumentGeojsonField,
},
UserError::MalformedGeojson(_) => Code::InvalidDocumentGeojsonField,
}
}
}

View File

@@ -1,3 +1,5 @@
#![allow(clippy::result_large_err)]
pub mod batch_view;
pub mod batches;
pub mod compression;

View File

@@ -7,6 +7,7 @@ use time::{Duration, OffsetDateTime};
use utoipa::ToSchema;
use crate::batches::BatchId;
use crate::enterprise_edition::network::Network;
use crate::error::ResponseError;
use crate::settings::{Settings, Unchecked};
use crate::tasks::{
@@ -142,6 +143,9 @@ pub struct DetailsView {
pub old_index_uid: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub new_index_uid: Option<String>,
// network
#[serde(skip_serializing_if = "Option::is_none")]
pub network: Option<Network>,
}
impl DetailsView {
@@ -314,6 +318,10 @@ impl DetailsView {
// We should never be able to batch multiple renames at the same time.
(Some(left), Some(_right)) => Some(left),
},
network: match (&self.network, &other.network) {
(None, None) => None,
(_, Some(network)) | (Some(network), None) => Some(network.clone()),
},
}
}
}
@@ -415,6 +423,9 @@ impl From<Details> for DetailsView {
upgrade_to: Some(format!("v{}.{}.{}", to.0, to.1, to.2)),
..Default::default()
},
Details::NetworkTopologyChange { network: new_network } => {
DetailsView { network: new_network, ..Default::default() }
}
}
}
}

View File

@@ -15,6 +15,7 @@ use utoipa::{schema, ToSchema};
use uuid::Uuid;
use crate::batches::BatchId;
use crate::enterprise_edition::network::Network;
use crate::error::ResponseError;
use crate::index_uid_pattern::IndexUidPattern;
use crate::keys::Key;
@@ -58,6 +59,7 @@ impl Task {
| TaskDeletion { .. }
| Export { .. }
| UpgradeDatabase { .. }
| NetworkTopologyChange { .. }
| IndexSwap { .. } => None,
DocumentAdditionOrUpdate { index_uid, .. }
| DocumentEdition { index_uid, .. }
@@ -94,7 +96,8 @@ impl Task {
| KindWithContent::DumpCreation { .. }
| KindWithContent::SnapshotCreation
| KindWithContent::Export { .. }
| KindWithContent::UpgradeDatabase { .. } => None,
| KindWithContent::UpgradeDatabase { .. }
| KindWithContent::NetworkTopologyChange { .. } => None,
}
}
}
@@ -170,6 +173,10 @@ pub enum KindWithContent {
UpgradeDatabase {
from: (u32, u32, u32),
},
NetworkTopologyChange {
network: Option<Network>,
origin: Option<Origin>,
},
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
@@ -206,6 +213,7 @@ impl KindWithContent {
KindWithContent::SnapshotCreation => Kind::SnapshotCreation,
KindWithContent::Export { .. } => Kind::Export,
KindWithContent::UpgradeDatabase { .. } => Kind::UpgradeDatabase,
KindWithContent::NetworkTopologyChange { .. } => Kind::NetworkTopologyChange,
}
}
@@ -218,6 +226,7 @@ impl KindWithContent {
| TaskCancelation { .. }
| TaskDeletion { .. }
| Export { .. }
| NetworkTopologyChange { .. }
| UpgradeDatabase { .. } => vec![],
DocumentAdditionOrUpdate { index_uid, .. }
| DocumentEdition { index_uid, .. }
@@ -325,6 +334,9 @@ impl KindWithContent {
versioning::VERSION_PATCH,
),
}),
KindWithContent::NetworkTopologyChange { network: new_network, origin: _ } => {
Some(Details::NetworkTopologyChange { network: new_network.clone() })
}
}
}
@@ -407,6 +419,9 @@ impl KindWithContent {
versioning::VERSION_PATCH,
),
}),
KindWithContent::NetworkTopologyChange { network: new_network, origin: _s } => {
Some(Details::NetworkTopologyChange { network: new_network.clone() })
}
}
}
}
@@ -469,6 +484,9 @@ impl From<&KindWithContent> for Option<Details> {
versioning::VERSION_PATCH,
),
}),
KindWithContent::NetworkTopologyChange { network: new_network, origin: _ } => {
Some(Details::NetworkTopologyChange { network: new_network.clone() })
}
}
}
}
@@ -579,6 +597,7 @@ pub enum Kind {
SnapshotCreation,
Export,
UpgradeDatabase,
NetworkTopologyChange,
}
impl Kind {
@@ -597,7 +616,8 @@ impl Kind {
| Kind::DumpCreation
| Kind::Export
| Kind::UpgradeDatabase
| Kind::SnapshotCreation => false,
| Kind::SnapshotCreation
| Kind::NetworkTopologyChange => false,
}
}
}
@@ -618,6 +638,7 @@ impl Display for Kind {
Kind::SnapshotCreation => write!(f, "snapshotCreation"),
Kind::Export => write!(f, "export"),
Kind::UpgradeDatabase => write!(f, "upgradeDatabase"),
Kind::NetworkTopologyChange => write!(f, "networkTopologyChange"),
}
}
}
@@ -653,6 +674,8 @@ impl FromStr for Kind {
Ok(Kind::Export)
} else if kind.eq_ignore_ascii_case("upgradeDatabase") {
Ok(Kind::UpgradeDatabase)
} else if kind.eq_ignore_ascii_case("networkTopologyChange") {
Ok(Kind::NetworkTopologyChange)
} else {
Err(ParseTaskKindError(kind.to_owned()))
}
@@ -738,6 +761,9 @@ pub enum Details {
from: (u32, u32, u32),
to: (u32, u32, u32),
},
NetworkTopologyChange {
network: Option<Network>,
},
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
@@ -805,6 +831,7 @@ impl Details {
| Self::Dump { .. }
| Self::Export { .. }
| Self::UpgradeDatabase { .. }
| Self::NetworkTopologyChange { .. }
| Self::IndexSwap { .. } => (),
}

View File

@@ -216,7 +216,10 @@ enum OnFailure {
KeepDb,
}
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
pub fn setup_meilisearch(
opt: &Opt,
handle: tokio::runtime::Handle,
) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
let index_scheduler_opt = IndexSchedulerOptions {
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
auth_path: opt.db_path.join("auth"),
@@ -256,6 +259,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
index_scheduler_opt,
OnFailure::RemoveDb,
binary_version, // the db is empty
handle,
)?,
Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?;
@@ -273,7 +277,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
bail!("snapshot doesn't exist at {}", snapshot_path.display())
// the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
} else {
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version, handle)?
}
} else if let Some(ref path) = opt.import_dump {
let src_path_exists = path.exists();
@@ -284,6 +288,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
index_scheduler_opt,
OnFailure::RemoveDb,
binary_version, // the db is empty
handle,
)?;
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
Ok(()) => (index_scheduler, auth_controller),
@@ -304,10 +309,10 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
// the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
// or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
} else {
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version, handle)?
}
} else {
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version)?
open_or_create_database(opt, index_scheduler_opt, empty_db, binary_version, handle)?
};
// We create a loop in a thread that registers snapshotCreation tasks
@@ -338,6 +343,7 @@ fn open_or_create_database_unchecked(
index_scheduler_opt: IndexSchedulerOptions,
on_failure: OnFailure,
version: (u32, u32, u32),
handle: tokio::runtime::Handle,
) -> anyhow::Result<(IndexScheduler, AuthController)> {
// we don't want to create anything in the data.ms yet, thus we
// wrap our two builders in a closure that'll be executed later.
@@ -345,7 +351,7 @@ fn open_or_create_database_unchecked(
let auth_env = open_auth_store_env(&index_scheduler_opt.auth_path).unwrap();
let auth_controller = AuthController::new(auth_env.clone(), &opt.master_key);
let index_scheduler_builder = || -> anyhow::Result<_> {
Ok(IndexScheduler::new(index_scheduler_opt, auth_env, version)?)
Ok(IndexScheduler::new(index_scheduler_opt, auth_env, version, Some(handle))?)
};
match (
@@ -452,6 +458,7 @@ fn open_or_create_database(
index_scheduler_opt: IndexSchedulerOptions,
empty_db: bool,
binary_version: (u32, u32, u32),
handle: tokio::runtime::Handle,
) -> anyhow::Result<(IndexScheduler, AuthController)> {
let version = if !empty_db {
check_version(opt, &index_scheduler_opt, binary_version)?
@@ -459,7 +466,7 @@ fn open_or_create_database(
binary_version
};
open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version)
open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version, handle)
}
fn import_dump(
@@ -584,7 +591,7 @@ fn import_dump(
let reader = DocumentsBatchReader::from_reader(reader)?;
let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?;
let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?;
let embedders = index_scheduler.embedders(&uid, embedder_configs)?;
let builder = milli::update::IndexDocuments::new(
&mut wtxn,
@@ -612,7 +619,7 @@ fn import_dump(
let mut indexer = indexer::DocumentOperation::new();
let embedders = index.embedding_configs().embedding_configs(&rtxn)?;
let embedders = index_scheduler.embedders(uid.clone(), embedders)?;
let embedders = index_scheduler.embedders(&uid, embedders)?;
let mmap = unsafe { memmap2::Mmap::map(index_reader.documents_file())? };

View File

@@ -76,7 +76,10 @@ fn on_panic(info: &std::panic::PanicHookInfo) {
#[actix_web::main]
async fn main() -> anyhow::Result<()> {
try_main().await.inspect_err(|error| {
// won't panic inside of tokio::main
let runtime = tokio::runtime::Handle::current();
try_main(runtime).await.inspect_err(|error| {
tracing::error!(%error);
let mut current = error.source();
let mut depth = 0;
@@ -88,7 +91,7 @@ async fn main() -> anyhow::Result<()> {
})
}
async fn try_main() -> anyhow::Result<()> {
async fn try_main(runtime: tokio::runtime::Handle) -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?;
std::panic::set_hook(Box::new(on_panic));
@@ -122,7 +125,7 @@ async fn try_main() -> anyhow::Result<()> {
_ => (),
}
let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
let (index_scheduler, auth_controller) = setup_meilisearch(&opt, runtime)?;
let analytics =
analytics::Analytics::new(&opt, index_scheduler.clone(), auth_controller.clone()).await;

View File

@@ -282,8 +282,7 @@ async fn process_search_request(
if let Some(search_rules) = auth_filter.get_index_search_rules(&index_uid) {
add_search_rules(&mut query.filter, search_rules);
}
let search_kind =
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index_uid, &index)?;
let permit = search_queue.try_get_search_permit().await?;
let features = index_scheduler.features();
@@ -300,7 +299,7 @@ async fn process_search_request(
let (search, _is_finite_pagination, _max_total_hits, _offset) =
prepare_search(&index_cloned, &rtxn, &query, &search_kind, time_budget, features)?;
match search_from_kind(index_uid, search_kind, search) {
match search_from_kind(&index_uid, search_kind, search) {
Ok((search_results, _)) => Ok((rtxn, Ok(search_results))),
Err(MeilisearchHttpError::Milli {
error: meilisearch_types::milli::Error::UserError(user_error),

View File

@@ -45,7 +45,7 @@ use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::payload::Payload;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::enterprise_edition::proxy::{proxy, Body};
use crate::routes::indexes::enterprise_edition::proxy::{check_leader, proxy, Body};
use crate::routes::indexes::search::fix_sort_query_parameters;
use crate::routes::{
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
@@ -340,6 +340,7 @@ pub async fn delete_document(
let DocumentParam { index_uid, document_id } = path.into_inner();
let index_uid = IndexUid::try_from(index_uid)?;
let network = index_scheduler.network();
let origin = check_leader(&req, &network)?;
analytics.publish(
DocumentsDeletionAggregator {
@@ -363,7 +364,7 @@ pub async fn delete_document(
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::none(), &task).await?;
proxy(&index_scheduler, &index_uid, &req, origin, network, Body::none(), &task).await?;
}
let task: SummarizedTaskView = task.into();
@@ -946,6 +947,7 @@ async fn document_addition(
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
let mime_type = extract_mime_type(req)?;
let network = index_scheduler.network();
let origin = check_leader(&req, &network)?;
let format = match (
mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())),
@@ -1081,6 +1083,7 @@ async fn document_addition(
&index_scheduler,
&index_uid,
req,
origin,
network,
Body::with_ndjson_payload(file),
&task,
@@ -1168,6 +1171,7 @@ pub async fn delete_documents_batch(
debug!(parameters = ?body, "Delete documents by batch");
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let network = index_scheduler.network();
let origin = check_leader(&req, &network)?;
analytics.publish(
DocumentsDeletionAggregator {
@@ -1194,7 +1198,8 @@ pub async fn delete_documents_batch(
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(body), &task).await?;
proxy(&index_scheduler, &index_uid, &req, origin, network, Body::Inline(body), &task)
.await?;
}
let task: SummarizedTaskView = task.into();
@@ -1254,6 +1259,7 @@ pub async fn delete_documents_by_filter(
let index_uid = index_uid.into_inner();
let filter = body.into_inner();
let network = index_scheduler.network();
let origin = check_leader(&req, &network)?;
analytics.publish(
DocumentsDeletionAggregator {
@@ -1286,7 +1292,8 @@ pub async fn delete_documents_by_filter(
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(filter), &task).await?;
proxy(&index_scheduler, &index_uid, &req, origin, network, Body::Inline(filter), &task)
.await?;
}
let task: SummarizedTaskView = task.into();
@@ -1384,6 +1391,7 @@ pub async fn edit_documents_by_function(
.check_edit_documents_by_function("Using the documents edit route")?;
let network = index_scheduler.network();
let origin = check_leader(&req, &network)?;
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index_uid = index_uid.into_inner();
@@ -1436,7 +1444,8 @@ pub async fn edit_documents_by_function(
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(params), &task).await?;
proxy(&index_scheduler, &index_uid, &req, origin, network, Body::Inline(params), &task)
.await?;
}
let task: SummarizedTaskView = task.into();
@@ -1483,6 +1492,7 @@ pub async fn clear_all_documents(
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let network = index_scheduler.network();
let origin = check_leader(&req, &network)?;
analytics.publish(
DocumentsDeletionAggregator {
@@ -1505,7 +1515,7 @@ pub async fn clear_all_documents(
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::none(), &task).await?;
proxy(&index_scheduler, &index_uid, &req, origin, network, Body::none(), &task).await?;
}
let task: SummarizedTaskView = task.into();

View File

@@ -38,6 +38,27 @@ impl Body<()> {
}
}
pub fn check_leader(
req: &HttpRequest,
network: &meilisearch_types::enterprise_edition::network::DbNetwork,
) -> Result<Option<Origin>, MeilisearchHttpError> {
match origin_from_req(req)? {
Some(origin) => Ok(Some(origin)),
None => {
let this = network
.local
.as_deref()
.expect("inconsistent `network.sharding` and `network.self`");
let is_leader = this == todo!();
if !is_leader {
return Err(MeilisearchHttpError::NotLeader { leader: todo!() });
}
Ok(None)
}
}
}
/// If necessary, proxies the passed request to the network and update the task description.
///
/// This function reads the custom headers from the request to determine if must proxy the request or if the request
@@ -52,11 +73,12 @@ pub async fn proxy<T: serde::Serialize>(
index_scheduler: &IndexScheduler,
index_uid: &str,
req: &HttpRequest,
network: meilisearch_types::enterprise_edition::network::Network,
origin: Option<Origin>,
network: meilisearch_types::enterprise_edition::network::DbNetwork,
body: Body<T>,
task: &meilisearch_types::tasks::Task,
) -> Result<(), MeilisearchHttpError> {
match origin_from_req(req)? {
match origin {
Some(origin) => {
index_scheduler.set_task_network(task.uid, TaskNetwork::Origin { origin })?
}

View File

@@ -260,7 +260,7 @@ pub async fn search(
}
let index = index_scheduler.index(&index_uid)?;
let search_kind = search_kind(&search_query, &index_scheduler, index_uid.to_string(), &index)?;
let search_kind = search_kind(&search_query, &index_scheduler, &index_uid, &index)?;
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_facet_search(

View File

@@ -339,13 +339,12 @@ pub async fn search_with_url_query(
let index = index_scheduler.index(&index_uid)?;
let search_kind =
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index_uid, &index)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
index_uid.to_string(),
&index_uid,
&index,
query,
search_kind,
@@ -445,14 +444,13 @@ pub async fn search_with_post(
let index = index_scheduler.index(&index_uid)?;
let search_kind =
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index_uid, &index)?;
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
index_uid.to_string(),
&index_uid,
&index,
query,
search_kind,
@@ -480,7 +478,7 @@ pub async fn search_with_post(
pub fn search_kind(
query: &SearchQuery,
index_scheduler: &IndexScheduler,
index_uid: String,
index_uid: &str,
index: &milli::Index,
) -> Result<SearchKind, ResponseError> {
let is_placeholder_query =

View File

@@ -227,7 +227,7 @@ async fn similar(
let (embedder_name, embedder, quantized) = SearchKind::embedder(
&index_scheduler,
index_uid.to_string(),
index_uid.as_str(),
&index,
&query.embedder,
None,

View File

@@ -39,7 +39,6 @@ use crate::routes::features::RuntimeTogglableFeatures;
use crate::routes::indexes::documents::{DocumentDeletionByFilter, DocumentEditionByFunction};
use crate::routes::indexes::IndexView;
use crate::routes::multi_search::SearchResults;
use crate::routes::network::{Network, Remote};
use crate::routes::swap_indexes::SwapIndexesPayload;
use crate::routes::webhooks::{WebhookResults, WebhookSettings, WebhookWithMetadata};
use crate::search::{
@@ -102,7 +101,7 @@ mod webhooks;
url = "/",
description = "Local server",
)),
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures, Export, WebhookSettings, WebhookResults, WebhookWithMetadata, meilisearch_types::milli::vector::VectorStoreBackend))
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, meilisearch_types::enterprise_edition::network::Network, meilisearch_types::enterprise_edition::network::Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures, Export, WebhookSettings, WebhookResults, WebhookWithMetadata, meilisearch_types::milli::vector::VectorStoreBackend))
)]
pub struct MeilisearchApi;

View File

@@ -239,26 +239,23 @@ pub async fn multi_search_with_post(
})
.with_index(query_index)?;
let index_uid_str = index_uid.to_string();
let search_kind = search_kind(
&query,
index_scheduler.get_ref(),
index_uid_str.clone(),
&index,
)
.with_index(query_index)?;
let search_kind =
search_kind(&query, index_scheduler.get_ref(), &index_uid, &index)
.with_index(query_index)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
index_uid_str.clone(),
&index,
query,
search_kind,
retrieve_vector,
features,
)
let search_result = tokio::task::spawn_blocking({
let index_uid = index_uid.clone();
move || {
perform_search(
&index_uid,
&index,
query,
search_kind,
retrieve_vector,
features,
)
}
})
.await
.with_index(query_index)?;

View File

@@ -1,28 +1,21 @@
use std::collections::BTreeMap;
use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use deserr::Deserr;
use index_scheduler::IndexScheduler;
use itertools::{EitherOrBoth, Itertools};
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::enterprise_edition::network::{Network as DbNetwork, Remote as DbRemote};
use meilisearch_types::error::deserr_codes::{
InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkSharding,
InvalidNetworkUrl, InvalidNetworkWriteApiKey,
};
use meilisearch_types::enterprise_edition::network::{Network, Remote};
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use meilisearch_types::milli::update::Setting;
use serde::Serialize;
use tracing::debug;
use utoipa::{OpenApi, ToSchema};
use utoipa::OpenApi;
use crate::analytics::{Aggregate, Analytics};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::SummarizedTaskView;
#[derive(OpenApi)]
#[openapi(
@@ -31,7 +24,7 @@ use crate::extractors::sequential_extractor::SeqHandler;
name = "Network",
description = "The `/network` route allows you to describe the topology of a network of Meilisearch instances.
This route is **synchronous**. This means that no task object will be returned, and any change to the network will be made available immediately.",
This route is **asynchronous**. A task uid will be returned, and any change to the network will be effective after the corresponding task has been processed.",
external_docs(url = "https://www.meilisearch.com/docs/reference/api/network"),
)),
)]
@@ -83,73 +76,6 @@ async fn get_network(
Ok(HttpResponse::Ok().json(network))
}
#[derive(Debug, Deserr, ToSchema, Serialize)]
#[deserr(error = DeserrJsonError<InvalidNetworkRemotes>, rename_all = camelCase, deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct Remote {
#[schema(value_type = Option<String>, example = json!({
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset, write_api_key: Setting::Reset },
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()), write_api_key: Setting::Set("bar".into()) },
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()), write_api_key: Setting::Set("foo".into()) },
}))]
#[deserr(default, error = DeserrJsonError<InvalidNetworkUrl>)]
#[serde(default)]
pub url: Setting<String>,
#[schema(value_type = Option<String>, example = json!("XWnBI8QHUc-4IlqbKPLUDuhftNq19mQtjc6JvmivzJU"))]
#[deserr(default, error = DeserrJsonError<InvalidNetworkSearchApiKey>)]
#[serde(default)]
pub search_api_key: Setting<String>,
#[schema(value_type = Option<String>, example = json!("XWnBI8QHUc-4IlqbKPLUDuhftNq19mQtjc6JvmivzJU"))]
#[deserr(default, error = DeserrJsonError<InvalidNetworkWriteApiKey>)]
#[serde(default)]
pub write_api_key: Setting<String>,
}
#[derive(Debug, Deserr, ToSchema, Serialize)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")]
pub struct Network {
#[schema(value_type = Option<BTreeMap<String, Remote>>, example = json!("http://localhost:7700"))]
#[deserr(default, error = DeserrJsonError<InvalidNetworkRemotes>)]
#[serde(default)]
pub remotes: Setting<BTreeMap<String, Option<Remote>>>,
#[schema(value_type = Option<String>, example = json!("ms-00"), rename = "self")]
#[serde(default, rename = "self")]
#[deserr(default, rename = "self", error = DeserrJsonError<InvalidNetworkSelf>)]
pub local: Setting<String>,
#[schema(value_type = Option<bool>, example = json!(true))]
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidNetworkSharding>)]
pub sharding: Setting<bool>,
}
impl Remote {
pub fn try_into_db_node(self, name: &str) -> Result<DbRemote, ResponseError> {
Ok(DbRemote {
url: self
.url
.set()
.ok_or(ResponseError::from_msg(
format!("Missing field `.remotes.{name}.url`"),
meilisearch_types::error::Code::MissingNetworkUrl,
))
.and_then(|url| {
if let Err(error) = url::Url::parse(&url) {
return Err(ResponseError::from_msg(
format!("Invalid `.remotes.{name}.url` (`{url}`): {error}"),
meilisearch_types::error::Code::InvalidNetworkUrl,
));
}
Ok(url)
})?,
search_api_key: self.search_api_key.set(),
write_api_key: self.write_api_key.set(),
})
}
}
#[derive(Serialize)]
pub struct PatchNetworkAnalytics {
network_size: usize,
@@ -208,111 +134,58 @@ async fn patch_network(
index_scheduler.features().check_network("Using the /network route")?;
let new_network = new_network.0;
let old_network = index_scheduler.network();
debug!(parameters = ?new_network, "Patch network");
let merged_self = match new_network.local {
Setting::Set(new_self) => Some(new_self),
Setting::Reset => None,
Setting::NotSet => old_network.local,
};
let merged_sharding = match new_network.sharding {
Setting::Set(new_sharding) => new_sharding,
Setting::Reset => false,
Setting::NotSet => old_network.sharding,
};
if merged_sharding && merged_self.is_none() {
return Err(ResponseError::from_msg(
"`.sharding`: enabling the sharding requires `.self` to be set\n - Hint: Disable `sharding` or set `self` to a value.".into(),
meilisearch_types::error::Code::InvalidNetworkSharding,
));
}
let merged_remotes = match new_network.remotes {
Setting::Set(new_remotes) => {
let mut merged_remotes = BTreeMap::new();
for either_or_both in old_network
.remotes
.into_iter()
.merge_join_by(new_remotes.into_iter(), |left, right| left.0.cmp(&right.0))
{
match either_or_both {
EitherOrBoth::Both((key, old), (_, Some(new))) => {
let DbRemote {
url: old_url,
search_api_key: old_search_api_key,
write_api_key: old_write_api_key,
} = old;
let Remote {
url: new_url,
search_api_key: new_search_api_key,
write_api_key: new_write_api_key,
} = new;
let merged = DbRemote {
url: match new_url {
Setting::Set(new_url) => {
if let Err(error) = url::Url::parse(&new_url) {
return Err(ResponseError::from_msg(
format!("Invalid `.remotes.{key}.url` (`{new_url}`): {error}"),
meilisearch_types::error::Code::InvalidNetworkUrl,
));
}
new_url
}
Setting::Reset => {
return Err(ResponseError::from_msg(
format!(
"Field `.remotes.{key}.url` cannot be set to `null`"
),
meilisearch_types::error::Code::InvalidNetworkUrl,
))
}
Setting::NotSet => old_url,
},
search_api_key: match new_search_api_key {
Setting::Set(new_search_api_key) => Some(new_search_api_key),
Setting::Reset => None,
Setting::NotSet => old_search_api_key,
},
write_api_key: match new_write_api_key {
Setting::Set(new_write_api_key) => Some(new_write_api_key),
Setting::Reset => None,
Setting::NotSet => old_write_api_key,
},
};
merged_remotes.insert(key, merged);
}
EitherOrBoth::Both((_, _), (_, None)) | EitherOrBoth::Right((_, None)) => {}
EitherOrBoth::Left((key, node)) => {
merged_remotes.insert(key, node);
}
EitherOrBoth::Right((key, Some(node))) => {
let node = node.try_into_db_node(&key)?;
merged_remotes.insert(key, node);
// check the URLs of all remotes
if let Setting::Set(remotes) = &new_network.remotes {
for (remote_name, remote) in remotes.iter() {
let Some(remote) = remote else {
continue;
};
match &remote.url {
Setting::Set(new_url) => {
if let Err(error) = url::Url::parse(&new_url) {
return Err(ResponseError::from_msg(
format!("Invalid `.remotes.{remote_name}.url` (`{new_url}`): {error}"),
meilisearch_types::error::Code::InvalidNetworkUrl,
));
}
}
Setting::Reset => {
return Err(ResponseError::from_msg(
format!("Field `.remotes.{remote_name}.url` cannot be set to `null`"),
meilisearch_types::error::Code::InvalidNetworkUrl,
))
}
Setting::NotSet => (),
}
merged_remotes
}
Setting::Reset => BTreeMap::new(),
Setting::NotSet => old_network.remotes,
};
}
analytics.publish(
PatchNetworkAnalytics {
network_size: merged_remotes.len(),
network_has_self: merged_self.is_some(),
network_size: new_network
.remotes
.as_ref()
.set()
.map(|remotes| remotes.len())
.unwrap_or_default(),
network_has_self: new_network.local.as_ref().set().is_some(),
},
&req,
);
let merged_network =
DbNetwork { local: merged_self, remotes: merged_remotes, sharding: merged_sharding };
index_scheduler.put_network(merged_network.clone())?;
debug!(returns = ?merged_network, "Patch network");
Ok(HttpResponse::Ok().json(merged_network))
let task = index_scheduler.register(
meilisearch_types::tasks::KindWithContent::NetworkTopologyChange {
network: Some(new_network),
origin: None,
},
None,
false,
)?;
debug!(returns = ?task, "Patch network");
let task: SummarizedTaskView = task.into();
return Ok(HttpResponse::Accepted().json(task));
}

View File

@@ -226,14 +226,14 @@ mod tests {
{
let params = "types=createIndex";
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r#"
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`.",
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `networkTopologyChange`.",
"code": "invalid_task_types",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
}
"#);
"###);
}
}
#[test]

View File

@@ -1,3 +1,4 @@
use core::convert::Infallible;
use std::collections::BTreeMap;
use std::str::FromStr;
@@ -7,7 +8,6 @@ use actix_http::header::{
};
use actix_web::web::{self, Data, Path};
use actix_web::{HttpRequest, HttpResponse};
use core::convert::Infallible;
use deserr::actix_web::AwebJson;
use deserr::{DeserializeError, Deserr, ValuePointerRef};
use index_scheduler::IndexScheduler;
@@ -24,12 +24,12 @@ use tracing::debug;
use url::Url;
use utoipa::{OpenApi, ToSchema};
use uuid::Uuid;
use WebhooksError::*;
use crate::analytics::{Aggregate, Analytics};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use WebhooksError::*;
#[derive(OpenApi)]
#[openapi(

View File

@@ -9,7 +9,7 @@ use std::vec::{IntoIter, Vec};
use actix_http::StatusCode;
use index_scheduler::{IndexScheduler, RoFeatures};
use itertools::Itertools;
use meilisearch_types::enterprise_edition::network::{Network, Remote};
use meilisearch_types::enterprise_edition::network::{DbNetwork, DbRemote};
use meilisearch_types::error::ResponseError;
use meilisearch_types::milli::order_by_map::OrderByMap;
use meilisearch_types::milli::score_details::{ScoreDetails, WeightedScoreValue};
@@ -456,7 +456,7 @@ fn merge_metadata(
}
type LocalQueriesByIndex = BTreeMap<String, Vec<QueryByIndex>>;
type RemoteQueriesByHost = BTreeMap<String, (Remote, Vec<SearchQueryWithIndex>)>;
type RemoteQueriesByHost = BTreeMap<String, (DbRemote, Vec<SearchQueryWithIndex>)>;
struct PartitionedQueries {
local_queries_by_index: LocalQueriesByIndex,
@@ -477,7 +477,7 @@ impl PartitionedQueries {
&mut self,
federated_query: SearchQueryWithIndex,
query_index: usize,
network: &Network,
network: &DbNetwork,
features: RoFeatures,
) -> Result<(), ResponseError> {
if let Some(pagination_field) = federated_query.has_pagination() {
@@ -672,7 +672,7 @@ struct SearchByIndexParams<'a> {
features: RoFeatures,
is_proxy: bool,
has_remote: bool,
network: &'a Network,
network: &'a DbNetwork,
}
struct SearchByIndex {
@@ -755,8 +755,7 @@ impl SearchByIndex {
// use an immediately invoked lambda to capture the result without returning from the function
let res: Result<(), ResponseError> = (|| {
let search_kind =
search_kind(&query, params.index_scheduler, index_uid.to_string(), &index)?;
let search_kind = search_kind(&query, params.index_scheduler, &index_uid, &index)?;
let canonicalization_kind = match (&search_kind, &query.q) {
(SearchKind::SemanticOnly { .. }, _) => {
@@ -806,11 +805,11 @@ impl SearchByIndex {
{
Some((previous_ranking_rules, previous_query_index, previous_index_uid))
} else {
Some((ranking_rules, query_index, index_uid.clone()))
Some((ranking_rules, query_index, index_uid.to_string()))
};
} else {
self.previous_query_data =
Some((ranking_rules, query_index, index_uid.clone()));
Some((ranking_rules, query_index, index_uid.to_string()));
}
match search_kind {
@@ -839,7 +838,7 @@ impl SearchByIndex {
search.limit(params.required_hit_count);
let (result, _semantic_hit_count) =
super::super::search_from_kind(index_uid.to_string(), search_kind, search)?;
super::super::search_from_kind(&index_uid, search_kind, search)?;
let format = AttributesFormat {
attributes_to_retrieve: query.attributes_to_retrieve,
retrieve_vectors,

View File

@@ -1,6 +1,6 @@
pub use error::ProxySearchError;
use error::ReqwestErrorWithoutUrl;
use meilisearch_types::enterprise_edition::network::Remote;
use meilisearch_types::enterprise_edition::network::DbRemote;
use rand::Rng as _;
use reqwest::{Client, Response, StatusCode};
use serde::de::DeserializeOwned;
@@ -94,7 +94,7 @@ pub struct ProxySearchParams {
/// Performs a federated search on a remote host and returns the results
pub async fn proxy_search(
node: &Remote,
node: &DbRemote,
queries: Vec<SearchQueryWithIndex>,
federation: Federation,
params: &ProxySearchParams,

View File

@@ -362,7 +362,7 @@ pub enum SearchKind {
impl SearchKind {
pub(crate) fn semantic(
index_scheduler: &index_scheduler::IndexScheduler,
index_uid: String,
index_uid: &str,
index: &Index,
embedder_name: &str,
vector_len: Option<usize>,
@@ -380,7 +380,7 @@ impl SearchKind {
pub(crate) fn hybrid(
index_scheduler: &index_scheduler::IndexScheduler,
index_uid: String,
index_uid: &str,
index: &Index,
embedder_name: &str,
semantic_ratio: f32,
@@ -399,7 +399,7 @@ impl SearchKind {
pub(crate) fn embedder(
index_scheduler: &index_scheduler::IndexScheduler,
index_uid: String,
index_uid: &str,
index: &Index,
embedder_name: &str,
vector_len: Option<usize>,
@@ -1114,7 +1114,7 @@ pub fn prepare_search<'t>(
}
pub fn perform_search(
index_uid: String,
index_uid: &str,
index: &Index,
query: SearchQuery,
search_kind: SearchKind,
@@ -1299,27 +1299,24 @@ fn compute_facet_distribution_stats<S: AsRef<str>>(
}
pub fn search_from_kind(
index_uid: String,
index_uid: &str,
search_kind: SearchKind,
search: milli::Search<'_>,
) -> Result<(milli::SearchResult, Option<u32>), MeilisearchHttpError> {
let err = |e| MeilisearchHttpError::from_milli(e, Some(index_uid.to_string()));
let (milli_result, semantic_hit_count) = match &search_kind {
SearchKind::KeywordOnly => {
let results = search
.execute()
.map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid.to_string())))?;
let results = search.execute().map_err(err)?;
(results, None)
}
SearchKind::SemanticOnly { .. } => {
let results = search
.execute()
.map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid.to_string())))?;
let results = search.execute().map_err(err)?;
let semantic_hit_count = results.document_scores.len() as u32;
(results, Some(semantic_hit_count))
}
SearchKind::Hybrid { semantic_ratio, .. } => search
.execute_hybrid(*semantic_ratio)
.map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid)))?,
SearchKind::Hybrid { semantic_ratio, .. } => {
search.execute_hybrid(*semantic_ratio).map_err(err)?
}
};
Ok((milli_result, semantic_hit_count))
}

View File

@@ -40,14 +40,14 @@ async fn batch_bad_types() {
let (response, code) = server.batches_filter("types=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r#"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`.",
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `networkTopologyChange`.",
"code": "invalid_task_types",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
}
"#);
"###);
}
#[actix_rt::test]

View File

@@ -522,6 +522,26 @@ pub async fn shared_index_with_geo_documents() -> &'static Index<'static, Shared
.await
}
pub async fn shared_index_geojson_documents() -> &'static Index<'static, Shared> {
static INDEX: OnceCell<Index<'static, Shared>> = OnceCell::const_new();
INDEX
.get_or_init(|| async {
// Retrieved from https://gitlab-forge.din.developpement-durable.gouv.fr/pub/geomatique/descartes/d-map/-/blob/main/demo/examples/commons/countries.geojson?ref_type=heads
let server = Server::new_shared();
let index = server._index("SHARED_GEOJSON_DOCUMENTS").to_shared();
let countries = include_str!("../documents/geojson/assets/countries.json");
let lille = serde_json::from_str::<serde_json::Value>(countries).unwrap();
let (response, _code) = index._add_documents(Value(lille), Some("name")).await;
server.wait_task(response.uid()).await.succeeded();
let (response, _code) =
index._update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
server.wait_task(response.uid()).await.succeeded();
index
})
.await
}
pub async fn shared_index_for_fragments() -> Index<'static, Shared> {
static INDEX: OnceCell<(Server<Shared>, String)> = OnceCell::const_new();
let (server, uid) = INDEX

View File

@@ -49,8 +49,8 @@ impl Server<Owned> {
}
let options = default_settings(dir.path());
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
let handle = tokio::runtime::Handle::current();
let (index_scheduler, auth) = setup_meilisearch(&options, handle).unwrap();
let service = Service { index_scheduler, auth, options, api_key: None };
Server { service, _dir: Some(dir), _marker: PhantomData }
@@ -65,7 +65,9 @@ impl Server<Owned> {
options.master_key = Some("MASTER_KEY".to_string());
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
let handle = tokio::runtime::Handle::current();
let (index_scheduler, auth) = setup_meilisearch(&options, handle).unwrap();
let service = Service { index_scheduler, auth, options, api_key: None };
Server { service, _dir: Some(dir), _marker: PhantomData }
@@ -78,7 +80,9 @@ impl Server<Owned> {
}
pub async fn new_with_options(options: Opt) -> Result<Self, anyhow::Error> {
let (index_scheduler, auth) = setup_meilisearch(&options)?;
let handle = tokio::runtime::Handle::current();
let (index_scheduler, auth) = setup_meilisearch(&options, handle)?;
let service = Service { index_scheduler, auth, options, api_key: None };
Ok(Server { service, _dir: None, _marker: PhantomData })
@@ -217,8 +221,9 @@ impl Server<Shared> {
}
let options = default_settings(dir.path());
let handle = tokio::runtime::Handle::current();
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
let (index_scheduler, auth) = setup_meilisearch(&options, handle).unwrap();
let service = Service { index_scheduler, auth, api_key: None, options };
Server { service, _dir: Some(dir), _marker: PhantomData }

View File

@@ -1,6 +1,3 @@
use crate::common::encoder::Encoder;
use crate::common::{default_settings, GetAllDocumentsOptions, Server, Value};
use crate::json;
use actix_web::test;
use meili_snap::{json_string, snapshot};
use meilisearch::Opt;
@@ -8,6 +5,10 @@ use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
use uuid::Uuid;
use crate::common::encoder::Encoder;
use crate::common::{default_settings, GetAllDocumentsOptions, Server, Value};
use crate::json;
/// This is the basic usage of our API and every other tests uses the content-type application/json
#[actix_rt::test]
async fn add_documents_test_json_content_types() {

View File

@@ -134,14 +134,14 @@ async fn get_all_documents_bad_filter() {
let (response, code) = index.get_all_documents_raw("?filter=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
snapshot!(json_string!(response), @r#"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `doggo`.\n1:6 doggo",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
}
"###);
"#);
let (response, code) = index.get_all_documents_raw("?filter=doggo=bernese").await;
snapshot!(code, @"400 Bad Request");
@@ -523,14 +523,14 @@ async fn delete_document_by_filter() {
// send bad filter
let (response, code) = index.delete_document_by_filter(json!({ "filter": "hello"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
snapshot!(response, @r#"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `hello`.\n1:6 hello",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
}
"###);
"#);
// send empty filter
let (response, code) = index.delete_document_by_filter(json!({ "filter": ""})).await;
@@ -724,14 +724,14 @@ async fn fetch_document_by_filter() {
let (response, code) = index.fetch_documents(json!({ "filter": "cool doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
snapshot!(response, @r#"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `cool doggo`.\n1:11 cool doggo",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
}
"###);
"#);
let (response, code) = index.fetch_documents(json!({ "filter": "doggo = bernese" })).await;
snapshot!(code, @"400 Bad Request");

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,547 @@
{
"type": "Polygon",
"coordinates": [
[
[
3.11681,
50.63646
],
[
3.11945,
50.63488
],
[
3.12134,
50.63504
],
[
3.12064,
50.63127
],
[
3.12203,
50.62785
],
[
3.12389,
50.6262
],
[
3.12161,
50.62358
],
[
3.12547,
50.62114
],
[
3.12447,
50.61874
],
[
3.12288,
50.61988
],
[
3.12054,
50.61846
],
[
3.11846,
50.61754
],
[
3.11482,
50.6207
],
[
3.11232,
50.6188
],
[
3.10936,
50.61727
],
[
3.10822,
50.61765
],
[
3.10603,
50.61536
],
[
3.1041,
50.61596
],
[
3.10017,
50.6186
],
[
3.09688,
50.61714
],
[
3.09575,
50.61795
],
[
3.0891,
50.61532
],
[
3.08625,
50.61792
],
[
3.07948,
50.61428
],
[
3.07146,
50.6066
],
[
3.06819,
50.60918
],
[
3.06502,
50.61046
],
[
3.06223,
50.61223
],
[
3.05925,
50.60659
],
[
3.05463,
50.60077
],
[
3.04906,
50.6008
],
[
3.04726,
50.6035
],
[
3.04328,
50.60667
],
[
3.04155,
50.60417
],
[
3.03767,
50.60456
],
[
3.03528,
50.60538
],
[
3.03239,
50.60725
],
[
3.0254,
50.6111
],
[
3.02387,
50.6125
],
[
3.0248,
50.61344
],
[
3.02779,
50.61418
],
[
3.02414,
50.6169
],
[
3.02312,
50.61975
],
[
3.02172,
50.62082
],
[
3.01953,
50.62484
],
[
3.01811,
50.62529
],
[
3.01313,
50.62558
],
[
3.01385,
50.62695
],
[
3.00844,
50.62717
],
[
3.0056,
50.6267
],
[
3.00229,
50.62557
],
[
3.00119,
50.62723
],
[
2.99769,
50.62901
],
[
2.99391,
50.62732
],
[
2.98971,
50.63036
],
[
2.9862,
50.63328
],
[
2.98178,
50.63404
],
[
2.97917,
50.63499
],
[
2.97284,
50.63429
],
[
2.97174,
50.63365
],
[
2.97002,
50.63366
],
[
2.96956,
50.63506
],
[
2.97046,
50.6365
],
[
2.96878,
50.63833
],
[
2.97039,
50.6395
],
[
2.97275,
50.64183
],
[
2.97225,
50.64381
],
[
2.9745,
50.64442
],
[
2.97474,
50.64648
],
[
2.97091,
50.65108
],
[
2.96975,
50.65361
],
[
2.97061,
50.65513
],
[
2.96929,
50.65739
],
[
2.97072,
50.6581
],
[
2.97973,
50.66048
],
[
2.98369,
50.66123
],
[
2.9865,
50.65959
],
[
2.9896,
50.65845
],
[
2.9963,
50.65666
],
[
2.99903,
50.65552
],
[
3.00274,
50.65235
],
[
3.00714,
50.64887
],
[
3.01088,
50.64845
],
[
3.01318,
50.64541
],
[
3.01974,
50.63972
],
[
3.02317,
50.63813
],
[
3.02639,
50.63613
],
[
3.029,
50.63521
],
[
3.03414,
50.6382
],
[
3.03676,
50.63888
],
[
3.03686,
50.64147
],
[
3.03791,
50.64379
],
[
3.0409,
50.64577
],
[
3.04582,
50.64807
],
[
3.05132,
50.64866
],
[
3.05055,
50.64949
],
[
3.05244,
50.65055
],
[
3.05784,
50.64927
],
[
3.0596,
50.65105
],
[
3.06414,
50.65041
],
[
3.06705,
50.64936
],
[
3.07023,
50.64706
],
[
3.07203,
50.64355
],
[
3.07526,
50.64188
],
[
3.0758,
50.64453
],
[
3.07753,
50.64381
],
[
3.07861,
50.64542
],
[
3.08299,
50.64725
],
[
3.08046,
50.64912
],
[
3.08349,
50.65082
],
[
3.08354,
50.65155
],
[
3.08477,
50.65312
],
[
3.08542,
50.65654
],
[
3.08753,
50.65687
],
[
3.09032,
50.65602
],
[
3.09018,
50.65142
],
[
3.09278,
50.65086
],
[
3.09402,
50.64982
],
[
3.09908,
50.65146
],
[
3.10316,
50.65227
],
[
3.09726,
50.64723
],
[
3.09387,
50.64358
],
[
3.09357,
50.64095
],
[
3.09561,
50.64133
],
[
3.09675,
50.64018
],
[
3.09454,
50.63891
],
[
3.09627,
50.63693
],
[
3.09795,
50.63713
],
[
3.09919,
50.63576
],
[
3.10324,
50.6351
],
[
3.10613,
50.63532
],
[
3.10649,
50.63434
],
[
3.1109,
50.63525
],
[
3.11502,
50.63504
],
[
3.11681,
50.63646
]
]
]
}

View File

@@ -0,0 +1,420 @@
use meili_snap::{json_string, snapshot};
use crate::common::{shared_index_geojson_documents, Server};
use crate::json;
const LILLE: &str = include_str!("assets/lille.geojson");
#[actix_rt::test]
async fn basic_add_settings_and_geojson_documents() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _status_code) =
index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
server.wait_task(task.uid()).await.succeeded();
let (response, _) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
snapshot!(response,
@r#"
{
"hits": [],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 0
}
"#);
let lille: serde_json::Value = serde_json::from_str(LILLE).unwrap();
let documents = json!([
{
"id": "missing",
},
{
"id": "point",
"_geojson": { "type": "Point", "coordinates": [1, 1] },
},
{
"id": "lille",
"_geojson": lille,
},
]);
let (task, _status_code) = index.add_documents(documents, None).await;
let response = server.wait_task(task.uid()).await.succeeded();
snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r#"
{
"uid": "[uid]",
"batchUid": "[batch_uid]",
"indexUid": "[uuid]",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 3,
"indexedDocuments": 3
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"#);
let (response, code) = index.get_all_documents_raw("?ids=missing,point").await;
snapshot!(code, @"200 OK");
snapshot!(response,
@r#"
{
"results": [
{
"id": "missing"
},
{
"id": "point",
"_geojson": {
"type": "Point",
"coordinates": [
1,
1
]
}
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"#);
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
snapshot!(response,
@r#"
{
"hits": [
{
"id": "point",
"_geojson": {
"type": "Point",
"coordinates": [
1,
1
]
}
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1
}
"#);
}
#[actix_rt::test]
async fn basic_add_geojson_documents_and_settings() {
let server = Server::new_shared();
let index = server.unique_index();
let lille: serde_json::Value = serde_json::from_str(LILLE).unwrap();
let documents = json!([
{
"id": "missing",
},
{
"id": "point",
"_geojson": { "type": "Point", "coordinates": [1, 1] },
},
{
"id": "lille",
"_geojson": lille,
},
]);
let (task, _status_code) = index.add_documents(documents, None).await;
let response = server.wait_task(task.uid()).await.succeeded();
snapshot!(response,
@r#"
{
"uid": "[uid]",
"batchUid": "[batch_uid]",
"indexUid": "[uuid]",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 3,
"indexedDocuments": 3
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"#);
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
snapshot!(response,
@r#"
{
"message": "Index `[uuid]`: Attribute `_geojson` is not filterable. This index does not have configured filterable attributes.\n14:15 _geoPolygon([0,0],[0,2],[2,2],[2,0])",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"#);
let (task, _status_code) =
index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
server.wait_task(task.uid()).await.succeeded();
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
snapshot!(response,
@r#"
{
"hits": [
{
"id": "point",
"_geojson": {
"type": "Point",
"coordinates": [
1,
1
]
}
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1
}
"#);
}
#[actix_rt::test]
async fn add_and_remove_geojson() {
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
let documents = json!([
{
"id": "missing",
},
{
"id": 0,
"_geojson": { "type": "Point", "coordinates": [1, 1] },
}
]);
let (task, _status_code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
let (response, _code) =
index.search_get("?filter=_geoPolygon([0,0],[0,0.9],[0.9,0.9],[0.9,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
let (task, _) = index.delete_document(0).await;
server.wait_task(task.uid()).await.succeeded();
let (response, _code) =
index.search_get("?filter=_geoPolygon([0,0],[0,0.9],[0.9,0.9],[0.9,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
// add it back
let documents = json!([
{
"id": 0,
"_geojson": { "type": "Point", "coordinates": [1, 1] },
}
]);
let (task, _status_code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
let (response, _code) =
index.search_get("?filter=_geoPolygon([0,0],[0,0.9],[0.9,0.9],[0.9,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
}
#[actix_rt::test]
async fn partial_update_geojson() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _) = index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
server.wait_task(task.uid()).await.succeeded();
let documents = json!([
{
"id": 0,
"_geojson": { "type": "Point", "coordinates": [1, 1] },
}
]);
let (task, _status_code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
let (response, _code) =
index.search_get("?filter=_geoPolygon([0,0],[0,0.9],[0.9,0.9],[0.9,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
let documents = json!([
{
"id": 0,
"_geojson": { "type": "Point", "coordinates": [0.5, 0.5] },
}
]);
let (task, _status_code) = index.update_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
let (response, _code) =
index.search_get("?filter=_geoPolygon([0,0],[0,0.9],[0.9,0.9],[0.9,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
let (response, _code) =
index.search_get("?filter=_geoPolygon([0.9,0.9],[0.9,2],[2,2],[2,0.9])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
}
#[actix_rt::test]
async fn geo_bounding_box() {
let index = shared_index_geojson_documents().await;
// The bounding box is a polygon over middle Europe
let (response, code) =
index.search_get("?filter=_geoBoundingBox([50.53987503447863,21.43443989912143],[43.76393151539099,0.54979129195425])&attributesToRetrieve=name").await;
snapshot!(code, @"200 OK");
snapshot!(response, @r#"
{
"hits": [
{
"name": "Austria"
},
{
"name": "Belgium"
},
{
"name": "Bosnia_and_Herzegovina"
},
{
"name": "Switzerland"
},
{
"name": "Czech_Republic"
},
{
"name": "Germany"
},
{
"name": "France"
},
{
"name": "Croatia"
},
{
"name": "Hungary"
},
{
"name": "Italy"
},
{
"name": "Luxembourg"
},
{
"name": "Netherlands"
},
{
"name": "Poland"
},
{
"name": "Romania"
},
{
"name": "Republic_of_Serbia"
},
{
"name": "Slovakia"
},
{
"name": "Slovenia"
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 17
}
"#);
// Between Russia and Alaska
let (response, code) = index
.search_get("?filter=_geoBoundingBox([70,-148],[63,152])&attributesToRetrieve=name")
.await;
snapshot!(code, @"200 OK");
snapshot!(response, @r#"
{
"hits": [
{
"name": "Canada"
},
{
"name": "Russia"
},
{
"name": "United_States_of_America"
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 3
}
"#);
}
#[actix_rt::test]
async fn geo_radius() {
let index = shared_index_geojson_documents().await;
// 200km around Luxembourg
let (response, code) = index
.search_get("?filter=_geoRadius(49.4369862,6.5576591,200000)&attributesToRetrieve=name")
.await;
snapshot!(code, @"200 OK");
snapshot!(response, @r#"
{
"hits": [
{
"name": "Belgium"
},
{
"name": "Germany"
},
{
"name": "France"
},
{
"name": "Luxembourg"
},
{
"name": "Netherlands"
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 5
}
"#);
}

View File

@@ -1,5 +1,6 @@
mod add_documents;
mod delete_documents;
mod errors;
mod geojson;
mod get_documents;
mod update_documents;

View File

@@ -1,5 +1,4 @@
use meili_snap::snapshot;
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;

View File

@@ -642,14 +642,14 @@ async fn filter_invalid_syntax_object() {
&json!({"filterableAttributes": ["title"]}),
&json!({"filter": "title & Glass"}),
|response, code| {
snapshot!(response, @r###"
snapshot!(response, @r#"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
"#);
snapshot!(code, @"400 Bad Request");
},
)
@@ -663,14 +663,14 @@ async fn filter_invalid_syntax_array() {
&json!({"filterableAttributes": ["title"]}),
&json!({"filter": ["title & Glass"]}),
|response, code| {
snapshot!(response, @r###"
snapshot!(response, @r#"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
"#);
snapshot!(code, @"400 Bad Request");
},
)

View File

@@ -2,8 +2,7 @@ use std::sync::Arc;
use actix_http::StatusCode;
use meili_snap::{json_string, snapshot};
use wiremock::matchers::method;
use wiremock::matchers::{path, AnyMatcher};
use wiremock::matchers::{method, path, AnyMatcher};
use wiremock::{Mock, MockServer, Request, ResponseTemplate};
use crate::common::{Server, Value, SCORE_DOCUMENTS};
@@ -128,7 +127,7 @@ async fn remote_sharding() {
// set self
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
@@ -137,7 +136,7 @@ async fn remote_sharding() {
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
@@ -146,7 +145,7 @@ async fn remote_sharding() {
}
"###);
let (response, code) = ms2.set_network(json!({"self": "ms2"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms2",
@@ -193,11 +192,11 @@ async fn remote_sharding() {
println!("{}", serde_json::to_string_pretty(&network).unwrap());
let (_response, status_code) = ms0.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
let (_response, status_code) = ms1.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
let (_response, status_code) = ms2.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
// perform multi-search
let query = "badman returns";
@@ -443,7 +442,7 @@ async fn remote_sharding_retrieve_vectors() {
// set self
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
@@ -452,7 +451,7 @@ async fn remote_sharding_retrieve_vectors() {
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
@@ -461,7 +460,7 @@ async fn remote_sharding_retrieve_vectors() {
}
"###);
let (response, code) = ms2.set_network(json!({"self": "ms2"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms2",
@@ -543,11 +542,11 @@ async fn remote_sharding_retrieve_vectors() {
}});
let (_response, status_code) = ms0.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
let (_response, status_code) = ms1.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
let (_response, status_code) = ms2.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
// multi vector search: one query per remote
@@ -936,7 +935,7 @@ async fn error_unregistered_remote() {
// set self
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
@@ -945,7 +944,7 @@ async fn error_unregistered_remote() {
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
@@ -984,9 +983,9 @@ async fn error_unregistered_remote() {
println!("{}", serde_json::to_string_pretty(&network).unwrap());
let (_response, status_code) = ms0.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
let (_response, status_code) = ms1.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
// perform multi-search
let query = "badman returns";
@@ -1056,7 +1055,7 @@ async fn error_no_weighted_score() {
// set self
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
@@ -1065,7 +1064,7 @@ async fn error_no_weighted_score() {
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
@@ -1108,7 +1107,7 @@ async fn error_no_weighted_score() {
println!("{}", serde_json::to_string_pretty(&network).unwrap());
let (_response, status_code) = ms0.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
// perform multi-search
let query = "badman returns";
@@ -1191,7 +1190,7 @@ async fn error_bad_response() {
// set self
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
@@ -1200,7 +1199,7 @@ async fn error_bad_response() {
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
@@ -1246,7 +1245,7 @@ async fn error_bad_response() {
println!("{}", serde_json::to_string_pretty(&network).unwrap());
let (_response, status_code) = ms0.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
// perform multi-search
let query = "badman returns";
@@ -1330,7 +1329,7 @@ async fn error_bad_request() {
// set self
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
@@ -1339,7 +1338,7 @@ async fn error_bad_request() {
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
@@ -1378,7 +1377,7 @@ async fn error_bad_request() {
println!("{}", serde_json::to_string_pretty(&network).unwrap());
let (_response, status_code) = ms0.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
// perform multi-search
let query = "badman returns";
@@ -1462,7 +1461,7 @@ async fn error_bad_request_facets_by_index() {
// set self
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
@@ -1471,7 +1470,7 @@ async fn error_bad_request_facets_by_index() {
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
@@ -1511,7 +1510,7 @@ async fn error_bad_request_facets_by_index() {
println!("{}", serde_json::to_string_pretty(&network).unwrap());
let (_response, status_code) = ms0.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
// perform multi-search
let query = "badman returns";
@@ -1605,7 +1604,7 @@ async fn error_bad_request_facets_by_index_facet() {
// set self
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
@@ -1614,7 +1613,7 @@ async fn error_bad_request_facets_by_index_facet() {
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
@@ -1657,7 +1656,7 @@ async fn error_bad_request_facets_by_index_facet() {
println!("{}", serde_json::to_string_pretty(&network).unwrap());
let (_response, status_code) = ms0.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
// perform multi-search
let query = "badman returns";
@@ -1757,7 +1756,7 @@ async fn error_remote_does_not_answer() {
// set self
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
@@ -1766,7 +1765,7 @@ async fn error_remote_does_not_answer() {
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
@@ -1808,9 +1807,9 @@ async fn error_remote_does_not_answer() {
println!("{}", serde_json::to_string_pretty(&network).unwrap());
let (_response, status_code) = ms0.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
let (_response, status_code) = ms1.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
// perform multi-search
let query = "badman returns";
@@ -1960,7 +1959,7 @@ async fn error_remote_404() {
// set self
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
@@ -1969,7 +1968,7 @@ async fn error_remote_404() {
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
@@ -2008,9 +2007,9 @@ async fn error_remote_404() {
println!("{}", serde_json::to_string_pretty(&network).unwrap());
let (_response, status_code) = ms0.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
let (_response, status_code) = ms1.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
// perform multi-search
let query = "badman returns";
@@ -2157,7 +2156,7 @@ async fn error_remote_sharding_auth() {
// set self
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
@@ -2166,7 +2165,7 @@ async fn error_remote_sharding_auth() {
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
@@ -2212,7 +2211,7 @@ async fn error_remote_sharding_auth() {
println!("{}", serde_json::to_string_pretty(&network).unwrap());
let (_response, status_code) = ms0.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
// perform multi-search
let query = "badman returns";
@@ -2319,7 +2318,7 @@ async fn remote_sharding_auth() {
// set self
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
@@ -2328,7 +2327,7 @@ async fn remote_sharding_auth() {
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
@@ -2373,7 +2372,7 @@ async fn remote_sharding_auth() {
println!("{}", serde_json::to_string_pretty(&network).unwrap());
let (_response, status_code) = ms0.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
// perform multi-search
let query = "badman returns";
@@ -2476,7 +2475,7 @@ async fn error_remote_500() {
// set self
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
@@ -2485,7 +2484,7 @@ async fn error_remote_500() {
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
@@ -2528,9 +2527,9 @@ async fn error_remote_500() {
println!("{}", serde_json::to_string_pretty(&network).unwrap());
let (_response, status_code) = ms0.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
let (_response, status_code) = ms1.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
// perform multi-search
let query = "badman returns";
@@ -2657,7 +2656,7 @@ async fn error_remote_500_once() {
// set self
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
@@ -2666,7 +2665,7 @@ async fn error_remote_500_once() {
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
@@ -2709,9 +2708,9 @@ async fn error_remote_500_once() {
println!("{}", serde_json::to_string_pretty(&network).unwrap());
let (_response, status_code) = ms0.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
let (_response, status_code) = ms1.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
// perform multi-search
let query = "badman returns";
@@ -2842,7 +2841,7 @@ async fn error_remote_timeout() {
// set self
let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
@@ -2850,7 +2849,7 @@ async fn error_remote_timeout() {
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
snapshot!(code, @"200 OK");
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
@@ -2892,9 +2891,9 @@ async fn error_remote_timeout() {
println!("{}", serde_json::to_string_pretty(&network).unwrap());
let (_response, status_code) = ms0.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
let (_response, status_code) = ms1.set_network(network.clone()).await;
snapshot!(status_code, @"200 OK");
snapshot!(status_code, @"202 Accepted");
// perform multi-search
let query = "badman returns";

View File

@@ -1,7 +1,8 @@
use meili_snap::{json_string, snapshot};
use super::shared_index_with_documents;
use crate::common::Server;
use crate::json;
use meili_snap::{json_string, snapshot};
#[actix_rt::test]
async fn default_search_should_return_estimated_total_hit() {

View File

@@ -1,6 +1,7 @@
use meili_snap::{json_string, snapshot};
use crate::common::Server;
use crate::json;
use meili_snap::{json_string, snapshot};
#[actix_rt::test]
async fn set_reset_chat_issue_5772() {

View File

@@ -339,14 +339,14 @@ async fn filter_invalid_syntax_object() {
index
.similar(json!({"id": 287947, "filter": "title & Glass", "embedder": "manual"}), |response, code| {
snapshot!(response, @r###"
snapshot!(response, @r#"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}
"###);
"#);
snapshot!(code, @"400 Bad Request");
})
.await;
@@ -377,14 +377,14 @@ async fn filter_invalid_syntax_array() {
index
.similar(json!({"id": 287947, "filter": ["title & Glass"], "embedder": "manual"}), |response, code| {
snapshot!(response, @r###"
snapshot!(response, @r#"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}
"###);
"#);
snapshot!(code, @"400 Bad Request");
})
.await;

View File

@@ -95,14 +95,14 @@ async fn task_bad_types() {
let (response, code) = server.tasks_filter("types=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r#"
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`.",
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentEdition`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`, `export`, `upgradeDatabase`, `networkTopologyChange`.",
"code": "invalid_task_types",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
}
"#);
"###);
let (response, code) = server.cancel_tasks("types=doggo").await;
snapshot!(code, @"400 Bad Request");

View File

@@ -1,3 +1,5 @@
#![allow(clippy::result_large_err)]
use std::fs::{read_dir, read_to_string, remove_file, File};
use std::io::{BufWriter, Write as _};
use std::path::PathBuf;

View File

@@ -19,6 +19,7 @@ bstr = "1.12.0"
bytemuck = { version = "1.23.1", features = ["extern_crate_alloc"] }
byteorder = "1.5.0"
charabia = { version = "0.9.7", default-features = false }
cellulite = "0.3.0"
concat-arrays = "0.1.2"
convert_case = "0.8.0"
crossbeam-channel = "0.5.15"
@@ -27,6 +28,7 @@ either = { version = "1.15.0", features = ["serde"] }
flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7"
fxhash = "0.2.1"
geojson = "0.24.2"
geoutils = "0.5.1"
grenad = { version = "0.5.0", default-features = false, features = [
"rayon",
@@ -96,7 +98,7 @@ url = "2.5.4"
hashbrown = "0.15.4"
bumpalo = "3.18.1"
bumparaw-collections = "0.1.4"
steppe = { version = "0.4.0", default-features = false }
steppe = { version = "0.4", default-features = false }
thread_local = "1.1.9"
allocator-api2 = "0.3.0"
rustc-hash = "2.1.1"
@@ -116,6 +118,8 @@ twox-hash = { version = "2.1.1", default-features = false, features = [
"xxhash3_64",
"xxhash64",
] }
geo-types = "0.7.16"
zerometry = "0.1.0"
[dev-dependencies]
mimalloc = { version = "0.1.47", default-features = false }

View File

@@ -11,3 +11,4 @@ const fn parse_u32(s: &str) -> u32 {
pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors";
pub const RESERVED_GEO_FIELD_NAME: &str = "_geo";
pub const RESERVED_GEOJSON_FIELD_NAME: &str = "_geojson";

View File

@@ -48,6 +48,7 @@ pub enum PrimaryKey<'a> {
Nested { name: &'a str },
}
#[allow(clippy::large_enum_variant)]
pub enum DocumentIdExtractionError {
InvalidDocumentId(UserError),
MissingDocumentId,

View File

@@ -10,17 +10,26 @@ use rhai::EvalAltResult;
use serde_json::Value;
use thiserror::Error;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
use crate::documents::{self, DocumentsBatchCursorError};
use crate::thread_pool_no_abort::PanicCatched;
use crate::vector::settings::EmbeddingSettings;
use crate::{CriterionError, DocumentId, FieldId, Object, SortError};
pub fn is_reserved_keyword(keyword: &str) -> bool {
[RESERVED_GEO_FIELD_NAME, "_geoDistance", "_geoPoint", "_geoRadius", "_geoBoundingBox"]
.contains(&keyword)
[
RESERVED_GEO_FIELD_NAME,
RESERVED_GEOJSON_FIELD_NAME,
"_geoDistance",
"_geoPoint",
"_geoRadius",
"_geoBoundingBox",
"_geoPolygon",
]
.contains(&keyword)
}
#[allow(clippy::large_enum_variant)]
#[derive(Error, Debug)]
pub enum Error {
#[error("internal: {0}.")]
@@ -80,6 +89,8 @@ pub enum InternalError {
#[error(transparent)]
HannoyError(#[from] hannoy::Error),
#[error(transparent)]
CelluliteError(#[from] cellulite::Error),
#[error(transparent)]
VectorEmbeddingError(#[from] crate::vector::Error),
}
@@ -99,6 +110,12 @@ pub enum SerializationError {
InvalidNumberSerialization,
}
impl From<cellulite::Error> for Error {
fn from(error: cellulite::Error) -> Self {
Self::UserError(UserError::CelluliteError(error))
}
}
#[derive(Error, Debug)]
pub enum FieldIdMapMissingEntry {
#[error("unknown field id {field_id} coming from the {process} process")]
@@ -107,8 +124,13 @@ pub enum FieldIdMapMissingEntry {
FieldName { field_name: String, process: &'static str },
}
#[allow(clippy::large_enum_variant)]
#[derive(Error, Debug)]
pub enum UserError {
#[error(transparent)]
CelluliteError(#[from] cellulite::Error),
#[error("Malformed geojson: {0}")]
MalformedGeojson(serde_json::Error),
#[error("A document cannot contain more than 65,535 fields.")]
AttributeLimitReached,
#[error(transparent)]
@@ -153,6 +175,8 @@ and can not be more than 511 bytes.", .document_id.to_string()
},
#[error(transparent)]
InvalidGeoField(#[from] Box<GeoError>),
#[error(transparent)]
GeoJsonError(#[from] geojson::Error),
#[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
InvalidVectorDimensions { expected: usize, found: usize },
#[error("Invalid vector dimensions in document with id `{document_id}` in `._vectors.{embedder_name}`.\n - note: embedding #{embedding_index} has dimensions {found}\n - note: embedder `{embedder_name}` requires {expected}")]
@@ -408,6 +432,10 @@ and can not be more than 511 bytes.", .document_id.to_string()
InvalidChatSettingsDocumentTemplateMaxBytes,
#[error("{0}")]
DocumentEmbeddingError(String),
#[error("enabling the sharding requires `.self` to be set\n - Hint: Disable `sharding` or set `self` to a value.")]
NetworkShardingWithoutSelf,
#[error("Field `.remotes.{0}.url` cannot be set to `null`")]
NetworkMissingUrl(String),
}
impl From<crate::vector::Error> for Error {

View File

@@ -6,7 +6,9 @@ use heed::RoTxn;
use super::FieldsIdsMap;
use crate::attribute_patterns::{match_field_legacy, PatternMatch};
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
use crate::constants::{
RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME,
};
use crate::{
is_faceted_by, FieldId, FilterableAttributesFeatures, FilterableAttributesRule, Index,
LocalizedAttributesRule, Result, Weight,
@@ -24,6 +26,8 @@ pub struct Metadata {
pub asc_desc: bool,
/// The field is a geo field (`_geo`, `_geo.lat`, `_geo.lng`).
pub geo: bool,
/// The field is a geo json field (`_geojson`).
pub geo_json: bool,
/// The id of the localized attributes rule if the field is localized.
pub localized_attributes_rule_id: Option<NonZeroU16>,
/// The id of the filterable attributes rule if the field is filterable.
@@ -269,6 +273,7 @@ impl MetadataBuilder {
distinct: false,
asc_desc: false,
geo: false,
geo_json: false,
localized_attributes_rule_id: None,
filterable_attributes_rule_id: None,
};
@@ -295,6 +300,20 @@ impl MetadataBuilder {
distinct: false,
asc_desc: false,
geo: true,
geo_json: false,
localized_attributes_rule_id: None,
filterable_attributes_rule_id,
};
}
if match_field_legacy(RESERVED_GEOJSON_FIELD_NAME, field) == PatternMatch::Match {
debug_assert!(!sortable, "geojson fields should not be sortable");
return Metadata {
searchable: None,
sortable,
distinct: false,
asc_desc: false,
geo: false,
geo_json: true,
localized_attributes_rule_id: None,
filterable_attributes_rule_id,
};
@@ -328,6 +347,7 @@ impl MetadataBuilder {
distinct,
asc_desc,
geo: false,
geo_json: false,
localized_attributes_rule_id,
filterable_attributes_rule_id,
}

View File

@@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use crate::attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch};
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
use crate::AttributePatterns;
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, ToSchema)]
@@ -34,6 +34,10 @@ impl FilterableAttributesRule {
matches!(self, FilterableAttributesRule::Field(field_name) if field_name == RESERVED_GEO_FIELD_NAME)
}
pub fn has_geojson(&self) -> bool {
matches!(self, FilterableAttributesRule::Field(field_name) if field_name == RESERVED_GEOJSON_FIELD_NAME)
}
/// Get the features of the rule.
pub fn features(&self) -> FilterableAttributesFeatures {
match self {

View File

@@ -5,6 +5,7 @@ use std::fmt;
use std::fs::File;
use std::path::Path;
use cellulite::Cellulite;
use deserr::Deserr;
use heed::types::*;
use heed::{CompactionOption, Database, DatabaseStat, RoTxn, RwTxn, Unspecified, WithoutTls};
@@ -115,9 +116,10 @@ pub mod db_name {
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
pub const VECTOR_EMBEDDER_CATEGORY_ID: &str = "vector-embedder-category-id";
pub const VECTOR_STORE: &str = "vector-arroy";
pub const CELLULITE: &str = "cellulite";
pub const DOCUMENTS: &str = "documents";
}
const NUMBER_OF_DBS: u32 = 25;
const NUMBER_OF_DBS: u32 = 25 + Cellulite::nb_dbs();
#[derive(Clone)]
pub struct Index {
@@ -183,6 +185,9 @@ pub struct Index {
/// Vector store based on hannoy™.
pub vector_store: hannoy::Database<Unspecified>,
/// Geo store based on cellulite™.
pub cellulite: Cellulite,
/// Maps the document id to the document as an obkv store.
pub(crate) documents: Database<BEU32, ObkvCodec>,
}
@@ -239,6 +244,7 @@ impl Index {
let embedder_category_id =
env.create_database(&mut wtxn, Some(VECTOR_EMBEDDER_CATEGORY_ID))?;
let vector_store = env.create_database(&mut wtxn, Some(VECTOR_STORE))?;
let cellulite = cellulite::Cellulite::create_from_env(&env, &mut wtxn, CELLULITE)?;
let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
@@ -267,6 +273,7 @@ impl Index {
field_id_docid_facet_strings,
vector_store,
embedder_category_id,
cellulite,
documents,
};
if this.get_version(&wtxn)?.is_none() && creation {
@@ -1052,6 +1059,13 @@ impl Index {
Ok(geo_filter)
}
/// Returns true if the geo sorting feature is enabled.
pub fn is_geojson_filtering_enabled(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
let geojson_filter =
self.filterable_attributes_rules(rtxn)?.iter().any(|field| field.has_geojson());
Ok(geojson_filter)
}
pub fn asc_desc_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
let asc_desc_fields = self
.criteria(rtxn)?
@@ -1882,6 +1896,7 @@ impl Index {
field_id_docid_facet_strings,
vector_store,
embedder_category_id,
cellulite,
documents,
} = self;
@@ -1955,6 +1970,17 @@ impl Index {
sizes.insert("embedder_category_id", embedder_category_id.stat(rtxn).map(compute_size)?);
sizes.insert("documents", documents.stat(rtxn).map(compute_size)?);
// Cellulite
const _CELLULITE_DB_CHECK: () = {
if Cellulite::nb_dbs() != 4 {
panic!("Cellulite database count has changed, please update the code accordingly.")
}
};
sizes.insert("cellulite_item", cellulite.item_db_stats(rtxn).map(compute_size)?);
sizes.insert("cellulite_cell", cellulite.cell_db_stats(rtxn).map(compute_size)?);
sizes.insert("cellulite_update", cellulite.update_db_stats(rtxn).map(compute_size)?);
sizes.insert("cellulite_metadata", cellulite.metadata_db_stats(rtxn).map(compute_size)?);
Ok(sizes)
}
}

View File

@@ -54,7 +54,7 @@ pub use search::new::{
};
use serde_json::Value;
pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
pub use {arroy, charabia as tokenizer, hannoy, heed, rhai};
pub use {arroy, cellulite, charabia as tokenizer, hannoy, heed, rhai};
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
pub use self::attribute_patterns::{AttributePatterns, PatternMatch};
@@ -87,7 +87,7 @@ pub use self::search::{
};
pub use self::update::ChannelCongestion;
pub type Result<T> = std::result::Result<T, error::Error>;
pub type Result<T, E = error::Error> = std::result::Result<T, E>;
pub type Attribute = u32;
pub type BEU16 = heed::types::U16<heed::byteorder::BE>;

View File

@@ -278,30 +278,6 @@ impl<U: Send + Sync + 'static> Step for VariableNameStep<U> {
}
}
// Integration with steppe
impl steppe::Progress for Progress {
fn update(&self, sub_progress: impl steppe::Step) {
self.update_progress(Compat(sub_progress));
}
}
struct Compat<T: steppe::Step>(T);
impl<T: steppe::Step> Step for Compat<T> {
fn name(&self) -> Cow<'static, str> {
self.0.name()
}
fn current(&self) -> u32 {
self.0.current().try_into().unwrap_or(u32::MAX)
}
fn total(&self) -> u32 {
self.0.total().try_into().unwrap_or(u32::MAX)
}
}
impl Step for arroy::MainStep {
fn name(&self) -> Cow<'static, str> {
match self {
@@ -343,3 +319,27 @@ impl Step for arroy::SubStep {
self.max
}
}
// Integration with steppe
impl steppe::Progress for Progress {
fn update(&self, sub_progress: impl steppe::Step) {
self.update_progress(Compat(sub_progress));
}
}
struct Compat<T: steppe::Step>(T);
impl<T: steppe::Step> Step for Compat<T> {
fn name(&self) -> Cow<'static, str> {
self.0.name()
}
fn current(&self) -> u32 {
self.0.current().try_into().unwrap_or(u32::MAX)
}
fn total(&self) -> u32 {
self.0.total().try_into().unwrap_or(u32::MAX)
}
}

View File

@@ -12,7 +12,9 @@ use roaring::{MultiOps, RoaringBitmap};
use serde_json::Value;
use super::facet_range_search;
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
use crate::constants::{
RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME,
};
use crate::error::{Error, UserError};
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
@@ -36,6 +38,7 @@ pub struct Filter<'a> {
pub enum BadGeoError {
Lat(f64),
Lng(f64),
InvalidResolution(usize),
BoundingBoxTopIsBelowBottom(f64, f64),
}
@@ -47,16 +50,23 @@ impl Display for BadGeoError {
Self::BoundingBoxTopIsBelowBottom(top, bottom) => {
write!(f, "The top latitude `{top}` is below the bottom latitude `{bottom}`.")
}
Self::InvalidResolution(resolution) => write!(
f,
"Invalid resolution `{resolution}`. Resolution must be between 3 and 1000."
),
Self::Lat(lat) => write!(
f,
"Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees. ",
"Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees.",
lat
),
Self::Lng(lng) => write!(
f,
"Bad longitude `{}`. Longitude must be contained between -180 and 180 degrees. ",
lng
),
Self::Lng(lng) => {
let normalized = (lng + 180.0).rem_euclid(360.0) - 180.0;
write!(
f,
"Bad longitude `{}`. Longitude must be contained between -180 and 180 degrees. Hint: try using `{normalized}` instead.",
lng
)
}
}
}
}
@@ -612,50 +622,61 @@ impl<'a> Filter<'a> {
.union(),
FilterCondition::And(subfilters) => {
let mut subfilters_iter = subfilters.iter();
if let Some(first_subfilter) = subfilters_iter.next() {
let mut bitmap = Self::inner_evaluate(
&(first_subfilter.clone()).into(),
let Some(first_subfilter) = subfilters_iter.next() else {
return Ok(RoaringBitmap::new());
};
let mut bitmap = Self::inner_evaluate(
&(first_subfilter.clone()).into(),
rtxn,
index,
field_ids_map,
filterable_attribute_rules,
universe,
)?;
for f in subfilters_iter {
if bitmap.is_empty() {
return Ok(bitmap);
}
// TODO We are doing the intersections two times,
// it could be more efficient
// Can't I just replace this `&=` by an `=`?
bitmap &= Self::inner_evaluate(
&(f.clone()).into(),
rtxn,
index,
field_ids_map,
filterable_attribute_rules,
universe,
Some(&bitmap),
)?;
for f in subfilters_iter {
if bitmap.is_empty() {
return Ok(bitmap);
}
// TODO We are doing the intersections two times,
// it could be more efficient
// Can't I just replace this `&=` by an `=`?
bitmap &= Self::inner_evaluate(
&(f.clone()).into(),
rtxn,
index,
field_ids_map,
filterable_attribute_rules,
Some(&bitmap),
)?;
}
Ok(bitmap)
} else {
Ok(RoaringBitmap::new())
}
Ok(bitmap)
}
FilterCondition::VectorExists { fid: _, embedder, filter } => {
super::filter_vector::evaluate(rtxn, index, universe, embedder.clone(), filter)
}
FilterCondition::GeoLowerThan { point, radius } => {
FilterCondition::GeoLowerThan { point, radius, resolution: res_token } => {
let base_point: [f64; 2] =
[point[0].parse_finite_float()?, point[1].parse_finite_float()?];
if !(-90.0..=90.0).contains(&base_point[0]) {
return Err(point[0].as_external_error(BadGeoError::Lat(base_point[0])))?;
}
if !(-180.0..=180.0).contains(&base_point[1]) {
return Err(point[1].as_external_error(BadGeoError::Lng(base_point[1])))?;
}
let radius = radius.parse_finite_float()?;
let mut resolution = 125;
if let Some(res_token) = res_token {
resolution = res_token.parse_finite_float()? as usize;
if !(3..=1000).contains(&resolution) {
return Err(
res_token.as_external_error(BadGeoError::InvalidResolution(resolution))
)?;
}
}
let mut r1 = None;
if index.is_geo_filtering_enabled(rtxn)? {
let base_point: [f64; 2] =
[point[0].parse_finite_float()?, point[1].parse_finite_float()?];
if !(-90.0..=90.0).contains(&base_point[0]) {
return Err(point[0].as_external_error(BadGeoError::Lat(base_point[0])))?;
}
if !(-180.0..=180.0).contains(&base_point[1]) {
return Err(point[1].as_external_error(BadGeoError::Lng(base_point[1])))?;
}
let radius = radius.parse_finite_float()?;
let rtree = match index.geo_rtree(rtxn)? {
Some(rtree) => rtree,
None => return Ok(RoaringBitmap::new()),
@@ -671,52 +692,72 @@ impl<'a> Filter<'a> {
})
.map(|point| point.data.0)
.collect();
r1 = Some(result);
}
Ok(result)
} else {
Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
attribute: RESERVED_GEO_FIELD_NAME,
filterable_patterns: filtered_matching_patterns(
filterable_attribute_rules,
&|features| features.is_filterable(),
),
}))?
let mut r2 = None;
if index.is_geojson_filtering_enabled(rtxn)? {
let point = geo_types::Point::new(base_point[1], base_point[0]);
let result = index.cellulite.in_circle(rtxn, point, radius, resolution)?;
r2 = Some(RoaringBitmap::from_iter(result)); // TODO: Remove once we update roaring in meilisearch
}
match (r1, r2) {
(Some(r1), Some(r2)) => Ok(r1 | r2),
(Some(r1), None) => Ok(r1),
(None, Some(r2)) => Ok(r2),
(None, None) => {
Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
attribute: &format!(
"{RESERVED_GEO_FIELD_NAME}/{RESERVED_GEOJSON_FIELD_NAME}"
),
filterable_patterns: filtered_matching_patterns(
filterable_attribute_rules,
&|features| features.is_filterable(),
),
}))?
}
}
}
FilterCondition::GeoBoundingBox { top_right_point, bottom_left_point } => {
if index.is_geo_filtering_enabled(rtxn)? {
let top_right: [f64; 2] = [
top_right_point[0].parse_finite_float()?,
top_right_point[1].parse_finite_float()?,
];
let bottom_left: [f64; 2] = [
bottom_left_point[0].parse_finite_float()?,
bottom_left_point[1].parse_finite_float()?,
];
if !(-90.0..=90.0).contains(&top_right[0]) {
return Err(
top_right_point[0].as_external_error(BadGeoError::Lat(top_right[0]))
)?;
}
if !(-180.0..=180.0).contains(&top_right[1]) {
return Err(
top_right_point[1].as_external_error(BadGeoError::Lng(top_right[1]))
)?;
}
if !(-90.0..=90.0).contains(&bottom_left[0]) {
return Err(bottom_left_point[0]
.as_external_error(BadGeoError::Lat(bottom_left[0])))?;
}
if !(-180.0..=180.0).contains(&bottom_left[1]) {
return Err(bottom_left_point[1]
.as_external_error(BadGeoError::Lng(bottom_left[1])))?;
}
if top_right[0] < bottom_left[0] {
return Err(bottom_left_point[1].as_external_error(
BadGeoError::BoundingBoxTopIsBelowBottom(top_right[0], bottom_left[0]),
))?;
}
let top_right: [f64; 2] = [
top_right_point[0].parse_finite_float()?,
top_right_point[1].parse_finite_float()?,
];
let bottom_left: [f64; 2] = [
bottom_left_point[0].parse_finite_float()?,
bottom_left_point[1].parse_finite_float()?,
];
if !(-90.0..=90.0).contains(&top_right[0]) {
return Err(
top_right_point[0].as_external_error(BadGeoError::Lat(top_right[0]))
)?;
}
if !(-180.0..=180.0).contains(&top_right[1]) {
return Err(
top_right_point[1].as_external_error(BadGeoError::Lng(top_right[1]))
)?;
}
if !(-90.0..=90.0).contains(&bottom_left[0]) {
return Err(
bottom_left_point[0].as_external_error(BadGeoError::Lat(bottom_left[0]))
)?;
}
if !(-180.0..=180.0).contains(&bottom_left[1]) {
return Err(
bottom_left_point[1].as_external_error(BadGeoError::Lng(bottom_left[1]))
)?;
}
if top_right[0] < bottom_left[0] {
return Err(bottom_left_point[1].as_external_error(
BadGeoError::BoundingBoxTopIsBelowBottom(top_right[0], bottom_left[0]),
))?;
}
let mut r1 = None;
if index.is_geo_filtering_enabled(rtxn)? {
// Instead of writing a custom `GeoBoundingBox` filter we're simply going to re-use the range
// filter to create the following filter;
// `_geo.lat {top_right[0]} TO {bottom_left[0]} AND _geo.lng {top_right[1]} TO {bottom_left[1]}`
@@ -811,19 +852,76 @@ impl<'a> Filter<'a> {
)?
};
Ok(selected_lat & selected_lng)
} else {
Err(top_right_point[0].as_external_error(
r1 = Some(selected_lat & selected_lng);
}
let mut r2 = None;
if index.is_geojson_filtering_enabled(rtxn)? {
let polygon = geo_types::Polygon::new(
geo_types::LineString(vec![
geo_types::Coord { x: top_right[1], y: top_right[0] },
geo_types::Coord { x: bottom_left[1], y: top_right[0] },
geo_types::Coord { x: bottom_left[1], y: bottom_left[0] },
geo_types::Coord { x: top_right[1], y: bottom_left[0] },
]),
Vec::new(),
);
let result = index.cellulite.in_shape(rtxn, &polygon)?;
r2 = Some(RoaringBitmap::from_iter(result)); // TODO: Remove once we update roaring in meilisearch
}
match (r1, r2) {
(Some(r1), Some(r2)) => Ok(r1 | r2),
(Some(r1), None) => Ok(r1),
(None, Some(r2)) => Ok(r2),
(None, None) => Err(top_right_point[0].as_external_error(
FilterError::AttributeNotFilterable {
attribute: RESERVED_GEO_FIELD_NAME,
attribute: &format!(
"{RESERVED_GEO_FIELD_NAME}/{RESERVED_GEOJSON_FIELD_NAME}"
),
filterable_patterns: filtered_matching_patterns(
filterable_attribute_rules,
&|features| features.is_filterable(),
),
},
))?
))?,
}
}
FilterCondition::GeoPolygon { points } => {
if !index.is_geojson_filtering_enabled(rtxn)? {
return Err(points[0][0].as_external_error(
FilterError::AttributeNotFilterable {
attribute: RESERVED_GEOJSON_FIELD_NAME,
filterable_patterns: filtered_matching_patterns(
filterable_attribute_rules,
&|features| features.is_filterable(),
),
},
))?;
}
let mut coords = Vec::new();
for [lat_token, lng_token] in points {
let lat = lat_token.parse_finite_float()?;
let lng = lng_token.parse_finite_float()?;
if !(-90.0..=90.0).contains(&lat) {
return Err(lat_token.as_external_error(BadGeoError::Lat(lat)))?;
}
if !(-180.0..=180.0).contains(&lng) {
return Err(lng_token.as_external_error(BadGeoError::Lng(lng)))?;
}
coords.push(geo_types::Coord { x: lng, y: lat });
}
let polygon = geo_types::Polygon::new(geo_types::LineString(coords), Vec::new());
let result = index.cellulite.in_shape(rtxn, &polygon)?;
let result = roaring::RoaringBitmap::from_iter(result); // TODO: Remove once we update roaring in meilisearch
Ok(result)
}
}
}
}
@@ -962,17 +1060,17 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let filter = Filter::from_str("_geoRadius(42, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
snapshot!(error.to_string(), @r###"
Attribute `_geo` is not filterable. This index does not have configured filterable attributes.
snapshot!(error.to_string(), @r"
Attribute `_geo/_geojson` is not filterable. This index does not have configured filterable attributes.
12:14 _geoRadius(42, 150, 10)
"###);
");
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
snapshot!(error.to_string(), @r###"
Attribute `_geo` is not filterable. This index does not have configured filterable attributes.
snapshot!(error.to_string(), @r"
Attribute `_geo/_geojson` is not filterable. This index does not have configured filterable attributes.
18:20 _geoBoundingBox([42, 150], [30, 10])
"###);
");
let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
@@ -993,19 +1091,19 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
let filter = Filter::from_str("_geoRadius(-90, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
snapshot!(error.to_string(), @r###"
Attribute `_geo` is not filterable. Available filterable attribute patterns are: `title`.
12:16 _geoRadius(-100, 150, 10)
"###);
snapshot!(error.to_string(), @r"
Attribute `_geo/_geojson` is not filterable. Available filterable attribute patterns are: `title`.
12:15 _geoRadius(-90, 150, 10)
");
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
snapshot!(error.to_string(), @r###"
Attribute `_geo` is not filterable. Available filterable attribute patterns are: `title`.
snapshot!(error.to_string(), @r"
Attribute `_geo/_geojson` is not filterable. Available filterable attribute patterns are: `title`.
18:20 _geoBoundingBox([42, 150], [30, 10])
"###);
");
let filter = Filter::from_str("name = 12").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
@@ -1153,38 +1251,34 @@ mod tests {
// georadius have a bad latitude
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(
error.to_string().starts_with(
"Bad latitude `-100`. Latitude must be contained between -90 and 90 degrees."
),
"{}",
error.to_string()
);
snapshot!(error.to_string(), @r"
Bad latitude `-100`. Latitude must be contained between -90 and 90 degrees.
12:16 _geoRadius(-100, 150, 10)
");
// georadius have a bad latitude
let filter = Filter::from_str("_geoRadius(-90.0000001, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
));
snapshot!(error.to_string(), @r"
Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees.
12:23 _geoRadius(-90.0000001, 150, 10)
");
// georadius have a bad longitude
let filter = Filter::from_str("_geoRadius(-10, 250, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(
error.to_string().contains(
"Bad longitude `250`. Longitude must be contained between -180 and 180 degrees."
),
"{}",
error.to_string(),
);
snapshot!(error.to_string(), @r"
Bad longitude `250`. Longitude must be contained between -180 and 180 degrees. Hint: try using `-110` instead.
17:20 _geoRadius(-10, 250, 10)
");
// georadius have a bad longitude
let filter = Filter::from_str("_geoRadius(-10, 180.000001, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
));
snapshot!(error.to_string(), @r"
Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees. Hint: try using `-179.999999` instead.
17:27 _geoRadius(-10, 180.000001, 10)
");
}
#[test]
@@ -1207,73 +1301,73 @@ mod tests {
let filter =
Filter::from_str("_geoBoundingBox([-90.0000001, 150], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(
error.to_string().starts_with(
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
),
"{}",
error.to_string()
);
snapshot!(error.to_string(), @r"
Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees.
18:29 _geoBoundingBox([-90.0000001, 150], [30, 10])
");
// geoboundingbox top left coord have a bad latitude
let filter =
Filter::from_str("_geoBoundingBox([90.0000001, 150], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(
error.to_string().starts_with(
"Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees."
),
"{}",
error.to_string()
);
snapshot!(error.to_string(), @r"
Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees.
18:28 _geoBoundingBox([90.0000001, 150], [30, 10])
");
// geoboundingbox bottom right coord have a bad latitude
let filter =
Filter::from_str("_geoBoundingBox([30, 10], [-90.0000001, 150])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
));
snapshot!(error.to_string(), @r"
Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees.
28:39 _geoBoundingBox([30, 10], [-90.0000001, 150])
");
// geoboundingbox bottom right coord have a bad latitude
let filter =
Filter::from_str("_geoBoundingBox([30, 10], [90.0000001, 150])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees."
));
snapshot!(error.to_string(), @r"
Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees.
28:38 _geoBoundingBox([30, 10], [90.0000001, 150])
");
// geoboundingbox top left coord have a bad longitude
let filter =
Filter::from_str("_geoBoundingBox([-10, 180.000001], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
));
snapshot!(error.to_string(), @r"
Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees. Hint: try using `-179.999999` instead.
23:33 _geoBoundingBox([-10, 180.000001], [30, 10])
");
// geoboundingbox top left coord have a bad longitude
let filter =
Filter::from_str("_geoBoundingBox([-10, -180.000001], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees."
));
snapshot!(error.to_string(), @r"
Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees. Hint: try using `179.999999` instead.
23:34 _geoBoundingBox([-10, -180.000001], [30, 10])
");
// geoboundingbox bottom right coord have a bad longitude
let filter =
Filter::from_str("_geoBoundingBox([30, 10], [-10, -180.000001])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees."
));
snapshot!(error.to_string(), @r"
Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees. Hint: try using `179.999999` instead.
33:44 _geoBoundingBox([30, 10], [-10, -180.000001])
");
// geoboundingbox bottom right coord have a bad longitude
let filter =
Filter::from_str("_geoBoundingBox([30, 10], [-10, 180.000001])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
));
snapshot!(error.to_string(), @r"
Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees. Hint: try using `-179.999999` instead.
33:43 _geoBoundingBox([30, 10], [-10, 180.000001])
");
}
#[test]

View File

@@ -48,6 +48,7 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
field_id_docid_facet_strings,
vector_store,
embedder_category_id: _,
cellulite,
documents,
} = self.index;
@@ -90,6 +91,7 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
field_id_docid_facet_strings.clear(self.wtxn)?;
// vector
vector_store.clear(self.wtxn)?;
cellulite.clear(self.wtxn)?;
documents.clear(self.wtxn)?;

View File

@@ -2,6 +2,7 @@ use std::fs::File;
use std::io::{self, BufReader};
use concat_arrays::concat_arrays;
use geojson::GeoJson;
use serde_json::Value;
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
@@ -9,7 +10,7 @@ use crate::error::GeoError;
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::index_documents::extract_finite_float_from_value;
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
use crate::{FieldId, InternalError, Result};
use crate::{DocumentId, FieldId, InternalError, Result, UserError};
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
///
@@ -107,3 +108,72 @@ fn extract_lat_lng(
None => Ok(None),
}
}
/// Extracts the geographical coordinates contained in each document under the `_geojson` field.
///
/// Returns the generated grenad reader containing the docid as key associated to its zerometry
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_geojson<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,
primary_key_id: FieldId,
settings_diff: &InnerIndexSettingsDiff,
) -> Result<grenad::Reader<BufReader<File>>> {
let mut writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
tempfile::tempfile()?,
);
let mut cursor = obkv_documents.into_cursor()?;
while let Some((docid_bytes, value)) = cursor.move_on_next()? {
let obkv = obkv::KvReader::from_slice(value);
// since we only need the primary key when we throw an error
// we create this getter to lazily get it when needed
let document_id = || -> Value {
let reader = KvReaderDelAdd::from_slice(obkv.get(primary_key_id).unwrap());
let document_id =
reader.get(DelAdd::Deletion).or(reader.get(DelAdd::Addition)).unwrap();
serde_json::from_slice(document_id).unwrap()
};
// extract old version
let del_geojson =
extract_geojson_field(obkv, &settings_diff.old, DelAdd::Deletion, document_id)?;
// extract new version
let add_geojson =
extract_geojson_field(obkv, &settings_diff.new, DelAdd::Addition, document_id)?;
if del_geojson != add_geojson {
let mut obkv = KvWriterDelAdd::memory();
if del_geojson.is_some() {
// We don't need to store the geojson, we'll just delete it by id
obkv.insert(DelAdd::Deletion, [])?;
}
if let Some(geojson) = add_geojson {
obkv.insert(DelAdd::Addition, geojson.to_string().as_bytes())?;
}
let bytes = obkv.into_inner()?;
writer.insert(&docid_bytes[0..std::mem::size_of::<DocumentId>()], bytes)?;
}
}
writer_into_reader(writer)
}
fn extract_geojson_field(
obkv: &obkv::KvReader<FieldId>,
settings: &InnerIndexSettings,
deladd: DelAdd,
_document_id: impl Fn() -> Value,
) -> Result<Option<GeoJson>> {
match settings.geojson_fid {
Some(fid) if settings.filterable_attributes_rules.iter().any(|rule| rule.has_geojson()) => {
let value = obkv.get(fid).map(KvReaderDelAdd::from_slice).and_then(|r| r.get(deladd));
Ok(value
.map(|v| GeoJson::from_reader(v).map_err(UserError::MalformedGeojson))
.transpose()?)
}
_ => Ok(None),
}
}

View File

@@ -31,6 +31,7 @@ use self::extract_word_position_docids::extract_word_position_docids;
use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters};
use super::{helpers, TypedChunk};
use crate::progress::EmbedderStats;
use crate::update::index_documents::extract::extract_geo_points::extract_geojson;
use crate::update::index_documents::extract::extract_vector_points::extract_embeddings_from_fragments;
use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::db::EmbedderInfo;
@@ -62,6 +63,7 @@ pub(crate) fn data_from_obkv_documents(
original_documents_chunk,
indexer,
lmdb_writer_sx.clone(),
primary_key_id,
settings_diff.clone(),
embedder_info.clone(),
possible_embedding_mistakes.clone(),
@@ -228,10 +230,12 @@ pub fn request_threads() -> &'static ThreadPoolNoAbort {
/// Extract chunked data and send it into lmdb_writer_sx sender:
/// - documents
#[allow(clippy::too_many_arguments)]
fn send_original_documents_data(
original_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
indexer: GrenadParameters,
lmdb_writer_sx: Sender<Result<TypedChunk>>,
primary_key_id: FieldId,
settings_diff: Arc<InnerIndexSettingsDiff>,
embedder_info: Arc<Vec<(String, EmbedderInfo)>>,
possible_embedding_mistakes: Arc<PossibleEmbeddingMistakes>,
@@ -240,6 +244,20 @@ fn send_original_documents_data(
let original_documents_chunk =
original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
if settings_diff.reindex_geojson() {
let documents_chunk_cloned = original_documents_chunk.clone();
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
let settings_diff = settings_diff.clone();
rayon::spawn(move || {
let result =
extract_geojson(documents_chunk_cloned, indexer, primary_key_id, &settings_diff);
let _ = match result {
Ok(geojson) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoJson(geojson))),
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
};
});
}
let index_vectors = (settings_diff.reindex_vectors() || !settings_diff.settings_update_only())
// no point in indexing vectors without embedders
&& (!settings_diff.new.runtime_embedders.inner_as_ref().is_empty());

View File

@@ -523,7 +523,7 @@ where
.is_some_and(|conf| conf.is_quantized);
let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized);
pool.install(|| {
pool.install(|| -> Result<_> {
let mut writer =
VectorStore::new(backend, vector_store, embedder_index, was_quantized);
writer.build_and_quantize(
@@ -541,6 +541,8 @@ where
.map_err(InternalError::from)??;
}
self.index.cellulite.build(self.wtxn, &self.should_abort, &Progress::default())?;
self.execute_prefix_databases(
word_docids.map(MergerBuilder::build),
exact_word_docids.map(MergerBuilder::build),

View File

@@ -820,19 +820,20 @@ impl<'a, 'i> Transform<'a, 'i> {
let documents_count = documents_ids.len() as usize;
// We initialize the sorter with the user indexing settings.
let mut original_sorter = if settings_diff.reindex_vectors() {
Some(create_sorter(
grenad::SortAlgorithm::Stable,
KeepFirst,
self.indexer_settings.chunk_compression_type,
self.indexer_settings.chunk_compression_level,
self.indexer_settings.max_nb_chunks,
self.indexer_settings.max_memory.map(|mem| mem / 2),
true,
))
} else {
None
};
let mut original_sorter =
if settings_diff.reindex_vectors() || settings_diff.reindex_geojson() {
Some(create_sorter(
grenad::SortAlgorithm::Stable,
KeepFirst,
self.indexer_settings.chunk_compression_type,
self.indexer_settings.chunk_compression_level,
self.indexer_settings.max_nb_chunks,
self.indexer_settings.max_memory.map(|mem| mem / 2),
true,
))
} else {
None
};
let backend = self.index.get_vector_store(wtxn)?.unwrap_or_default();
let readers: BTreeMap<&str, (VectorStore, &RoaringBitmap)> = settings_diff

View File

@@ -30,7 +30,7 @@ use crate::vector::db::{EmbeddingStatusDelta, IndexEmbeddingConfig};
use crate::vector::VectorStore;
use crate::{
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
Result, SerializationError, U8StrStrCodec,
Result, SerializationError, U8StrStrCodec, UserError,
};
/// This struct accumulates and group the TypedChunks
@@ -85,6 +85,7 @@ pub(crate) enum TypedChunk {
FieldIdFacetIsNullDocids(grenad::Reader<BufReader<File>>),
FieldIdFacetIsEmptyDocids(grenad::Reader<BufReader<File>>),
GeoPoints(grenad::Reader<BufReader<File>>),
GeoJson(grenad::Reader<BufReader<File>>),
VectorPoints {
remove_vectors: grenad::Reader<BufReader<File>>,
// docid -> vector
@@ -614,6 +615,36 @@ pub(crate) fn write_typed_chunk_into_index(
index.put_geo_rtree(wtxn, &rtree)?;
index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?;
}
TypedChunk::GeoJson(_) => {
let span = tracing::trace_span!(target: "indexing::write_db", "geo_json");
let _entered = span.enter();
let mut builder = MergerBuilder::new(KeepFirst);
for typed_chunk in typed_chunks {
let TypedChunk::GeoJson(chunk) = typed_chunk else {
unreachable!();
};
builder.push(chunk.into_cursor()?);
}
let merger = builder.build();
let mut iter = merger.into_stream_merger_iter()?;
while let Some((key, value)) = iter.next()? {
// convert the key back to a u32 (4 bytes)
let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
let deladd_obkv = KvReaderDelAdd::from_slice(value);
if let Some(_value) = deladd_obkv.get(DelAdd::Deletion) {
index.cellulite.delete(wtxn, docid)?;
}
if let Some(value) = deladd_obkv.get(DelAdd::Addition) {
let geojson =
geojson::GeoJson::from_reader(value).map_err(UserError::SerdeJson)?;
index.cellulite.add(wtxn, docid, &geojson)?;
}
}
}
TypedChunk::VectorPoints { .. } => {
let span = tracing::trace_span!(target: "indexing::write_db", "vector_points");
let _entered = span.enter();

View File

@@ -139,6 +139,7 @@ pub enum ReceiverAction {
LargeEntry(LargeEntry),
LargeVectors(LargeVectors),
LargeVector(LargeVector),
LargeGeoJson(LargeGeoJson),
}
/// An entry that cannot fit in the BBQueue buffers has been
@@ -193,6 +194,14 @@ impl LargeVector {
}
}
#[derive(Debug)]
pub struct LargeGeoJson {
/// The document id associated to the large geojson.
pub docid: DocumentId,
/// The large geojson that must be written.
pub geojson: Mmap,
}
impl<'a> WriterBbqueueReceiver<'a> {
/// Tries to receive an action to do until the timeout occurs
/// and if it does, consider it as a spurious wake up.
@@ -258,10 +267,12 @@ pub enum EntryHeader {
DeleteVector(DeleteVector),
SetVectors(SetVectors),
SetVector(SetVector),
CelluliteItem(DocumentId),
CelluliteRemove(DocumentId),
}
impl EntryHeader {
const fn variant_size() -> usize {
pub const fn variant_size() -> usize {
mem::size_of::<u8>()
}
@@ -271,6 +282,8 @@ impl EntryHeader {
EntryHeader::DeleteVector(_) => 1,
EntryHeader::SetVectors(_) => 2,
EntryHeader::SetVector(_) => 3,
EntryHeader::CelluliteItem(_) => 4,
EntryHeader::CelluliteRemove(_) => 5,
}
}
@@ -289,6 +302,14 @@ impl EntryHeader {
Self::variant_size() + mem::size_of::<DeleteVector>()
}
const fn total_cellulite_item_size(value_length: usize) -> usize {
Self::variant_size() + mem::size_of::<DocumentId>() + value_length
}
const fn total_cellulite_remove_size() -> usize {
Self::variant_size() + mem::size_of::<DocumentId>()
}
/// The `dimensions` corresponds to the number of `f32` in the embedding.
fn total_set_vectors_size(count: usize, dimensions: usize) -> usize {
let embedding_size = dimensions * mem::size_of::<f32>();
@@ -306,6 +327,8 @@ impl EntryHeader {
EntryHeader::DeleteVector(adv) => mem::size_of_val(adv),
EntryHeader::SetVectors(asvs) => mem::size_of_val(asvs),
EntryHeader::SetVector(asv) => mem::size_of_val(asv),
EntryHeader::CelluliteItem(docid) => mem::size_of_val(docid),
EntryHeader::CelluliteRemove(docid) => mem::size_of_val(docid),
};
Self::variant_size() + payload_size
}
@@ -333,6 +356,16 @@ impl EntryHeader {
let header = checked::pod_read_unaligned(header_bytes);
EntryHeader::SetVector(header)
}
4 => {
let header_bytes = &remaining[..mem::size_of::<DocumentId>()];
let header = checked::pod_read_unaligned(header_bytes);
EntryHeader::CelluliteItem(header)
}
5 => {
let header_bytes = &remaining[..mem::size_of::<DocumentId>()];
let header = checked::pod_read_unaligned(header_bytes);
EntryHeader::CelluliteRemove(header)
}
id => panic!("invalid variant id: {id}"),
}
}
@@ -344,6 +377,8 @@ impl EntryHeader {
EntryHeader::DeleteVector(adv) => bytemuck::bytes_of(adv),
EntryHeader::SetVectors(asvs) => bytemuck::bytes_of(asvs),
EntryHeader::SetVector(asv) => bytemuck::bytes_of(asv),
EntryHeader::CelluliteItem(docid) => bytemuck::bytes_of(docid),
EntryHeader::CelluliteRemove(docid) => bytemuck::bytes_of(docid),
};
*first = self.variant_id();
remaining.copy_from_slice(payload_bytes);
@@ -548,6 +583,10 @@ impl<'b> ExtractorBbqueueSender<'b> {
GeoSender(self)
}
pub fn geojson<'a>(&'a self) -> GeoJsonSender<'a, 'b> {
GeoJsonSender(self)
}
fn delete_vector(&self, docid: DocumentId) -> crate::Result<()> {
let max_grant = self.max_grant;
let refcell = self.producers.get().unwrap();
@@ -1139,3 +1178,72 @@ impl GeoSender<'_, '_> {
)
}
}
#[derive(Clone, Copy)]
pub struct GeoJsonSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
impl GeoJsonSender<'_, '_> {
pub fn send_geojson(&self, docid: DocumentId, value: Vec<u8>) -> crate::Result<()> {
let max_grant = self.0.max_grant;
let refcell = self.0.producers.get().unwrap();
let mut producer = refcell.0.borrow_mut_or_yield();
let payload_header = EntryHeader::CelluliteItem(docid);
let value_length = value.len();
let total_length = EntryHeader::total_cellulite_item_size(value_length);
if total_length > max_grant {
let mut value_file = tempfile::tempfile().map(BufWriter::new)?;
let mut embedding_bytes = bytemuck::cast_slice(&value);
io::copy(&mut embedding_bytes, &mut value_file)?;
let value_file = value_file.into_inner().map_err(|ie| ie.into_error())?;
let geojson = unsafe { Mmap::map(&value_file)? }; // Safe because the file is never modified
let large_geojson = LargeGeoJson { docid, geojson };
self.0.sender.send(ReceiverAction::LargeGeoJson(large_geojson)).unwrap();
return Ok(());
}
// Spin loop to have a frame the size we requested.
reserve_and_write_grant(
&mut producer,
total_length,
&self.0.sender,
&self.0.sent_messages_attempts,
&self.0.blocking_sent_messages_attempts,
|grant| {
let header_size = payload_header.header_size();
let (header_bytes, remaining) = grant.split_at_mut(header_size);
payload_header.serialize_into(header_bytes);
remaining.copy_from_slice(&value);
Ok(())
},
)?;
Ok(())
}
pub fn delete_geojson(&self, docid: DocumentId) -> crate::Result<()> {
let refcell = self.0.producers.get().unwrap();
let mut producer = refcell.0.borrow_mut_or_yield();
let payload_header = EntryHeader::CelluliteRemove(docid);
let total_length = EntryHeader::total_cellulite_remove_size();
reserve_and_write_grant(
&mut producer,
total_length,
&self.0.sender,
&self.0.sent_messages_attempts,
&self.0.blocking_sent_messages_attempts,
|grant| {
payload_header.serialize_into(grant);
Ok(())
},
)?;
Ok(())
}
}

View File

@@ -10,7 +10,9 @@ use serde_json::value::RawValue;
use super::vector_document::VectorDocument;
use super::{KvReaderFieldId, KvWriterFieldId};
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
use crate::constants::{
RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME,
};
use crate::documents::FieldIdMapper;
use crate::update::del_add::KvReaderDelAdd;
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
@@ -29,10 +31,10 @@ pub trait Document<'doc> {
/// Iterate over all **top-level** fields of the document, returning their name and raw JSON value.
///
/// - The returned values *may* contain nested fields.
/// - The `_vectors` and `_geo` fields are **ignored** by this method, meaning they are **not returned** by this method.
/// - The `_vectors`, `_geo` and `_geojson` fields are **ignored** by this method, meaning they are **not returned** by this method.
fn iter_top_level_fields(&self) -> impl Iterator<Item = Result<(&'doc str, &'doc RawValue)>>;
/// Number of top level fields, **excluding** `_vectors` and `_geo`
/// Number of top level fields, **excluding** `_vectors`, `_geo` and `_geojson`.
fn top_level_fields_count(&self) -> usize;
/// Get the **top-level** with the specified name, if exists.
@@ -50,11 +52,13 @@ pub trait Document<'doc> {
/// Returns the unparsed value of the `_geo` field from the document data.
///
/// This field alone is insufficient to retrieve geo data, as they may be stored in a dedicated location in the database.
/// Use a [`super::geo_document::GeoDocument`] to access the vector.
///
/// This method is meant as a convenience for implementors of [`super::geo_document::GeoDocument`].
fn geo_field(&self) -> Result<Option<&'doc RawValue>>;
/// Returns the unparsed value of the `_geojson` field from the document data.
///
/// This method is meant as a convenience for implementors of [`super::geo_document::GeoDocument`].
fn geojson_field(&self) -> Result<Option<&'doc RawValue>>;
}
#[derive(Debug)]
@@ -90,7 +94,10 @@ impl<'t, Mapper: FieldIdMapper> Document<'t> for DocumentFromDb<'t, Mapper> {
Err(error) => return Some(Err(error.into())),
};
if name == RESERVED_VECTORS_FIELD_NAME || name == RESERVED_GEO_FIELD_NAME {
if name == RESERVED_VECTORS_FIELD_NAME
|| name == RESERVED_GEO_FIELD_NAME
|| name == RESERVED_GEOJSON_FIELD_NAME
{
continue;
}
@@ -113,19 +120,24 @@ impl<'t, Mapper: FieldIdMapper> Document<'t> for DocumentFromDb<'t, Mapper> {
self.field(RESERVED_GEO_FIELD_NAME)
}
fn geojson_field(&self) -> Result<Option<&'t RawValue>> {
self.field(RESERVED_GEOJSON_FIELD_NAME)
}
fn top_level_fields_count(&self) -> usize {
let has_vectors_field = self.vectors_field().unwrap_or(None).is_some();
let has_geo_field = self.geo_field().unwrap_or(None).is_some();
let has_geojson_field = self.geojson_field().unwrap_or(None).is_some();
let count = self.content.iter().count();
match (has_vectors_field, has_geo_field) {
(true, true) => count - 2,
(true, false) | (false, true) => count - 1,
(false, false) => count,
}
count - has_vectors_field as usize - has_geo_field as usize - has_geojson_field as usize
}
fn top_level_field(&self, k: &str) -> Result<Option<&'t RawValue>> {
if k == RESERVED_VECTORS_FIELD_NAME || k == RESERVED_GEO_FIELD_NAME {
if k == RESERVED_VECTORS_FIELD_NAME
|| k == RESERVED_GEO_FIELD_NAME
|| k == RESERVED_GEOJSON_FIELD_NAME
{
return Ok(None);
}
self.field(k)
@@ -177,15 +189,16 @@ impl<'doc> Document<'doc> for DocumentFromVersions<'_, 'doc> {
Ok(self.versions.geo_field())
}
fn geojson_field(&self) -> Result<Option<&'doc RawValue>> {
Ok(self.versions.geojson_field())
}
fn top_level_fields_count(&self) -> usize {
let has_vectors_field = self.vectors_field().unwrap_or(None).is_some();
let has_geo_field = self.geo_field().unwrap_or(None).is_some();
let has_geojson_field = self.geojson_field().unwrap_or(None).is_some();
let count = self.versions.len();
match (has_vectors_field, has_geo_field) {
(true, true) => count - 2,
(true, false) | (false, true) => count - 1,
(false, false) => count,
}
count - has_vectors_field as usize - has_geo_field as usize - has_geojson_field as usize
}
fn top_level_field(&self, k: &str) -> Result<Option<&'doc RawValue>> {
@@ -265,6 +278,16 @@ impl<'d, 'doc: 'd, 't: 'd, Mapper: FieldIdMapper> Document<'d>
db.geo_field()
}
fn geojson_field(&self) -> Result<Option<&'d RawValue>> {
if let Some(geojson) = self.new_doc.geojson_field()? {
return Ok(Some(geojson));
}
let Some(db) = self.db else { return Ok(None) };
db.geojson_field()
}
fn top_level_fields_count(&self) -> usize {
self.iter_top_level_fields().count()
}
@@ -296,6 +319,10 @@ where
D::geo_field(self)
}
fn geojson_field(&self) -> Result<Option<&'doc RawValue>> {
D::geojson_field(self)
}
fn top_level_fields_count(&self) -> usize {
D::top_level_fields_count(self)
}
@@ -405,6 +432,13 @@ where
unordered_field_buffer.push((fid, geo_value));
}
if let Some(geojson_value) = document.geojson_field()? {
let fid = fields_ids_map
.id_or_insert(RESERVED_GEOJSON_FIELD_NAME)
.ok_or(UserError::AttributeLimitReached)?;
unordered_field_buffer.push((fid, geojson_value));
}
unordered_field_buffer.sort_by_key(|(fid, _)| *fid);
for (fid, value) in unordered_field_buffer.iter() {
writer.insert(*fid, value.get().as_bytes()).unwrap();
@@ -441,9 +475,11 @@ impl<'doc> Versions<'doc> {
}
pub fn iter_top_level_fields(&self) -> impl Iterator<Item = (&'doc str, &'doc RawValue)> + '_ {
self.data
.iter()
.filter(|(k, _)| *k != RESERVED_VECTORS_FIELD_NAME && *k != RESERVED_GEO_FIELD_NAME)
self.data.iter().filter(|(k, _)| {
*k != RESERVED_VECTORS_FIELD_NAME
&& *k != RESERVED_GEO_FIELD_NAME
&& *k != RESERVED_GEOJSON_FIELD_NAME
})
}
pub fn vectors_field(&self) -> Option<&'doc RawValue> {
@@ -454,6 +490,10 @@ impl<'doc> Versions<'doc> {
self.data.get(RESERVED_GEO_FIELD_NAME)
}
pub fn geojson_field(&self) -> Option<&'doc RawValue> {
self.data.get(RESERVED_GEOJSON_FIELD_NAME)
}
pub fn len(&self) -> usize {
self.data.len()
}
@@ -463,7 +503,10 @@ impl<'doc> Versions<'doc> {
}
pub fn top_level_field(&self, k: &str) -> Option<&'doc RawValue> {
if k == RESERVED_VECTORS_FIELD_NAME || k == RESERVED_GEO_FIELD_NAME {
if k == RESERVED_VECTORS_FIELD_NAME
|| k == RESERVED_GEO_FIELD_NAME
|| k == RESERVED_GEOJSON_FIELD_NAME
{
return None;
}
self.data.get(k)
@@ -516,7 +559,10 @@ impl<'a, Mapper: FieldIdMapper> Document<'a> for KvDelAddDocument<'a, Mapper> {
Err(error) => return Some(Err(error.into())),
};
if name == RESERVED_VECTORS_FIELD_NAME || name == RESERVED_GEO_FIELD_NAME {
if name == RESERVED_VECTORS_FIELD_NAME
|| name == RESERVED_GEO_FIELD_NAME
|| name == RESERVED_GEOJSON_FIELD_NAME
{
continue;
}
@@ -549,7 +595,10 @@ impl<'a, Mapper: FieldIdMapper> Document<'a> for KvDelAddDocument<'a, Mapper> {
Err(_) => return Some(()),
};
if name == RESERVED_VECTORS_FIELD_NAME || name == RESERVED_GEO_FIELD_NAME {
if name == RESERVED_VECTORS_FIELD_NAME
|| name == RESERVED_GEO_FIELD_NAME
|| name == RESERVED_GEOJSON_FIELD_NAME
{
continue;
}
@@ -559,7 +608,10 @@ impl<'a, Mapper: FieldIdMapper> Document<'a> for KvDelAddDocument<'a, Mapper> {
}
fn top_level_field(&self, k: &str) -> Result<Option<&'a RawValue>> {
if k == RESERVED_VECTORS_FIELD_NAME || k == RESERVED_GEO_FIELD_NAME {
if k == RESERVED_VECTORS_FIELD_NAME
|| k == RESERVED_GEO_FIELD_NAME
|| k == RESERVED_GEOJSON_FIELD_NAME
{
return Ok(None);
}
self.get(k)
@@ -572,6 +624,10 @@ impl<'a, Mapper: FieldIdMapper> Document<'a> for KvDelAddDocument<'a, Mapper> {
fn geo_field(&self) -> Result<Option<&'a RawValue>> {
self.get(RESERVED_GEO_FIELD_NAME)
}
fn geojson_field(&self) -> Result<Option<&'a RawValue>> {
self.get(RESERVED_GEOJSON_FIELD_NAME)
}
}
pub struct DocumentIdentifiers<'doc> {

View File

@@ -5,7 +5,7 @@ use bumpalo::Bump;
use hashbrown::HashMap;
use super::DelAddRoaringBitmap;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
use crate::update::new::channel::{DocumentsSender, ExtractorBbqueueSender};
use crate::update::new::document::{write_to_obkv, Document, DocumentContext, DocumentIdentifiers};
use crate::update::new::indexer::document_changes::{Extractor, IndexingContext};
@@ -75,7 +75,11 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> {
.geo_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter) {
let geojson_iter = content
.geojson_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
let (f, _) = res?;
let entry = document_extractor_data
.field_distribution_delta
@@ -94,7 +98,11 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> {
.geo_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter) {
let geojson_iter = content
.geojson_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
let (f, _) = res?;
let entry = document_extractor_data
.field_distribution_delta
@@ -108,7 +116,11 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> {
.geo_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter) {
let geojson_iter = content
.geojson_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
let (f, _) = res?;
let entry = document_extractor_data
.field_distribution_delta
@@ -143,7 +155,11 @@ impl<'extractor> Extractor<'extractor> for DocumentsExtractor<'_, '_> {
.geo_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter) {
let geojson_iter = content
.geojson_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
for res in content.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
let (f, _) = res?;
let entry = document_extractor_data
.field_distribution_delta

View File

@@ -0,0 +1,261 @@
use std::cell::RefCell;
use std::fs::File;
use std::io::{BufReader, BufWriter, ErrorKind, Read, Seek as _, Write as _};
use std::mem;
use std::str::FromStr;
use bumpalo::Bump;
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
use cellulite::zerometry::ZerometryCodec;
use geo_types::Geometry;
use geojson::GeoJson;
use heed::{BytesEncode, RoTxn};
use zerometry::Zerometry;
use crate::update::new::channel::GeoJsonSender;
use crate::update::new::document::{Document, DocumentContext};
use crate::update::new::indexer::document_changes::Extractor;
use crate::update::new::ref_cell_ext::RefCellExt as _;
use crate::update::new::thread_local::MostlySend;
use crate::update::new::DocumentChange;
use crate::update::GrenadParameters;
use crate::{DocumentId, Index, InternalError, Result, UserError};
pub struct GeoJsonExtractor {
grenad_parameters: GrenadParameters,
}
impl GeoJsonExtractor {
pub fn new(
rtxn: &RoTxn,
index: &Index,
grenad_parameters: GrenadParameters,
) -> Result<Option<Self>> {
if index.is_geojson_filtering_enabled(rtxn)? {
Ok(Some(GeoJsonExtractor { grenad_parameters }))
} else {
Ok(None)
}
}
}
pub struct GeoJsonExtractorData<'extractor> {
/// The set of documents ids that were removed. If a document sees its geo
/// point being updated, we first put it in the deleted and then in the inserted.
removed: bumpalo::collections::Vec<'extractor, DocumentId>,
inserted: bumpalo::collections::Vec<'extractor, (DocumentId, &'extractor [u8])>,
/// Contains a packed list of `ExtractedGeoPoint` of the inserted geo points
/// data structures if we have spilled to disk.
spilled_removed: Option<BufWriter<File>>,
/// Contains a packed list of `ExtractedGeoPoint` of the inserted geo points
/// data structures if we have spilled to disk.
spilled_inserted: Option<BufWriter<File>>,
}
impl<'extractor> GeoJsonExtractorData<'extractor> {
pub fn freeze(self) -> Result<FrozenGeoJsonExtractorData<'extractor>> {
let GeoJsonExtractorData { removed, inserted, spilled_removed, spilled_inserted } = self;
Ok(FrozenGeoJsonExtractorData {
removed: removed.into_bump_slice(),
inserted: inserted.into_bump_slice(),
spilled_removed: spilled_removed
.map(|bw| bw.into_inner().map(BufReader::new).map_err(|iie| iie.into_error()))
.transpose()?,
spilled_inserted: spilled_inserted
.map(|bw| bw.into_inner().map(BufReader::new).map_err(|iie| iie.into_error()))
.transpose()?,
})
}
}
unsafe impl MostlySend for GeoJsonExtractorData<'_> {}
pub struct FrozenGeoJsonExtractorData<'extractor> {
pub removed: &'extractor [DocumentId],
pub inserted: &'extractor [(DocumentId, &'extractor [u8])],
pub spilled_removed: Option<BufReader<File>>,
pub spilled_inserted: Option<BufReader<File>>,
}
impl FrozenGeoJsonExtractorData<'_> {
pub fn iter_and_clear_removed(&mut self, channel: GeoJsonSender<'_, '_>) -> Result<()> {
for docid in mem::take(&mut self.removed) {
channel.delete_geojson(*docid).unwrap();
}
if let Some(mut spilled) = self.spilled_removed.take() {
spilled.rewind()?;
loop {
let docid = match spilled.read_u32::<BigEndian>() {
Ok(docid) => docid,
Err(e) if e.kind() == ErrorKind::UnexpectedEof => break,
Err(e) => return Err(InternalError::SerdeJson(serde_json::Error::io(e)).into()),
};
channel.delete_geojson(docid).unwrap();
}
}
Ok(())
}
pub fn iter_and_clear_inserted(&mut self, channel: GeoJsonSender<'_, '_>) -> Result<()> {
for (docid, _buf) in mem::take(&mut self.inserted) {
channel.send_geojson(*docid, _buf.to_vec()).unwrap();
}
if let Some(mut spilled) = self.spilled_inserted.take() {
spilled.rewind()?;
loop {
let docid = match spilled.read_u32::<BigEndian>() {
Ok(docid) => docid,
Err(e) if e.kind() == ErrorKind::UnexpectedEof => break,
Err(e) => return Err(InternalError::SerdeJson(serde_json::Error::io(e)).into()),
};
let size = match spilled.read_u32::<BigEndian>() {
Ok(size) => size,
Err(e) => return Err(InternalError::SerdeJson(serde_json::Error::io(e)).into()),
};
let mut buf = vec![0; size as usize];
spilled
.read_exact(&mut buf)
.map_err(|e| InternalError::SerdeJson(serde_json::Error::io(e)))?;
channel.send_geojson(docid, buf).unwrap();
}
}
Ok(())
}
}
impl<'extractor> Extractor<'extractor> for GeoJsonExtractor {
type Data = RefCell<GeoJsonExtractorData<'extractor>>;
fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
Ok(RefCell::new(GeoJsonExtractorData {
removed: bumpalo::collections::Vec::new_in(extractor_alloc),
inserted: bumpalo::collections::Vec::new_in(extractor_alloc),
spilled_inserted: None,
spilled_removed: None,
}))
}
fn process<'doc>(
&'doc self,
changes: impl Iterator<Item = Result<DocumentChange<'doc>>>,
context: &'doc DocumentContext<'doc, 'extractor, '_, '_, Self::Data>,
) -> Result<()> {
let rtxn = &context.rtxn;
let index = context.index;
let max_memory = self.grenad_parameters.max_memory_by_thread();
let db_fields_ids_map = context.db_fields_ids_map;
let mut data_ref = context.data.borrow_mut_or_yield();
for change in changes {
if data_ref.spilled_removed.is_none()
&& max_memory.is_some_and(|mm| context.extractor_alloc.allocated_bytes() >= mm)
{
// We must spill as we allocated too much memory
data_ref.spilled_removed = tempfile::tempfile().map(BufWriter::new).map(Some)?;
data_ref.spilled_inserted = tempfile::tempfile().map(BufWriter::new).map(Some)?;
}
match change? {
DocumentChange::Deletion(deletion) => {
let docid = deletion.docid();
let current = deletion.current(rtxn, index, db_fields_ids_map)?;
if let Some(_geojson) = current.geojson_field()? {
match &mut data_ref.spilled_removed {
Some(file) => {
file.write_u32::<BigEndian>(docid)?;
}
None => {
data_ref.removed.push(docid);
}
}
}
}
DocumentChange::Update(update) => {
let current = update.current(rtxn, index, db_fields_ids_map)?;
let docid = update.docid();
let current_geo = current.geojson_field()?;
let updated_geo =
update.merged(rtxn, index, db_fields_ids_map)?.geojson_field()?;
if current_geo.map(|c| c.get()) != updated_geo.map(|u| u.get()) {
// If the current and new geo points are different it means that
// we need to replace the current by the new point and therefore
// delete the current point from cellulite.
if let Some(_geojson) = current_geo {
match &mut data_ref.spilled_removed {
Some(file) => {
file.write_u32::<BigEndian>(docid)?;
}
None => {
data_ref.removed.push(docid);
}
}
}
if let Some(geojson) = updated_geo {
let geojson =
GeoJson::from_str(geojson.get()).map_err(UserError::from)?;
let mut geometry =
Geometry::try_from(geojson).map_err(UserError::from)?;
cellulite::densify_geom(&mut geometry);
let buf = ZerometryCodec::bytes_encode(&geometry).unwrap();
match &mut data_ref.spilled_inserted {
Some(file) => {
file.write_u32::<BigEndian>(docid)?;
file.write_u32::<BigEndian>(buf.len() as u32)?;
file.write_all(&buf)?;
}
None => {
let mut bvec =
bumpalo::collections::Vec::new_in(context.extractor_alloc);
bvec.extend_from_slice(&buf);
data_ref.inserted.push((docid, bvec.into_bump_slice()));
}
}
}
}
}
DocumentChange::Insertion(insertion) => {
let docid = insertion.docid();
let inserted_geo = insertion.inserted().geojson_field()?;
if let Some(geojson) = inserted_geo {
let geojson = GeoJson::from_str(geojson.get()).map_err(UserError::from)?;
let mut geometry = Geometry::try_from(geojson).map_err(UserError::from)?;
cellulite::densify_geom(&mut geometry);
let mut bytes = Vec::new();
Zerometry::write_from_geometry(&mut bytes, &geometry)?;
match &mut data_ref.spilled_inserted {
Some(file) => {
file.write_u32::<BigEndian>(docid)?;
file.write_u32::<BigEndian>(bytes.len() as u32)?;
file.write_all(&bytes)?;
}
None => {
let mut bvec =
bumpalo::collections::Vec::new_in(context.extractor_alloc);
bvec.extend_from_slice(&bytes);
data_ref.inserted.push((docid, bvec.into_bump_slice()));
}
}
}
}
}
}
Ok(())
}
}

View File

@@ -18,6 +18,8 @@ use crate::update::new::DocumentChange;
use crate::update::GrenadParameters;
use crate::{lat_lng_to_xyz, DocumentId, GeoPoint, Index, InternalError, Result};
pub mod cellulite;
pub struct GeoExtractor {
grenad_parameters: GrenadParameters,
}

View File

@@ -653,7 +653,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
settings_delta.try_for_each_fragment_diff(
session.embedder_name(),
|fragment_diff| {
|fragment_diff| -> Result<()> {
let extractor = RequestFragmentExtractor::new(fragment_diff.new, doc_alloc)
.ignore_errors();
let old = if full_reindex {

View File

@@ -24,7 +24,7 @@ pub trait Extractor<'extractor>: Sync {
fn process<'doc>(
&'doc self,
changes: impl Iterator<Item = Result<DocumentChange<'doc>>>,
context: &'doc DocumentContext<Self::Data>,
context: &'doc DocumentContext<'doc, 'extractor, '_, '_, Self::Data>,
) -> Result<()>;
}

View File

@@ -5,18 +5,24 @@
use std::hash::{BuildHasher as _, BuildHasherDefault};
pub struct Shards {
pub own: Vec<String>,
pub others: Vec<String>,
pub struct Shards(pub Vec<Shard>);
pub struct Shard {
pub is_own: bool,
pub name: String,
}
impl Shards {
pub fn must_process(&self, docid: &str) -> bool {
self.processing_shard(docid).map(|shard| shard.is_own).unwrap_or_default()
}
pub fn processing_shard<'a>(&'a self, docid: &str) -> Option<&'a Shard> {
let hasher = BuildHasherDefault::<twox_hash::XxHash3_64>::new();
let to_hash = |shard: &String| hasher.hash_one((shard, docid));
let to_hash = |shard: &'a Shard| (shard, hasher.hash_one((&shard.name, docid)));
let max_hash = self.others.iter().map(to_hash).max().unwrap_or_default();
self.own.iter().map(to_hash).any(|hash| hash > max_hash)
let shard =
self.0.iter().map(to_hash).max_by_key(|(_, hash)| *hash).map(|(shard, _)| shard);
shard
}
}

View File

@@ -16,9 +16,10 @@ use super::settings_changes::settings_change_extract;
use crate::documents::{FieldIdMapper, PrimaryKey};
use crate::progress::{EmbedderStats, MergingWordCache};
use crate::proximity::ProximityPrecision;
use crate::update::new::extract::cellulite::GeoJsonExtractor;
use crate::update::new::extract::EmbeddingExtractor;
use crate::update::new::indexer::settings_changes::DocumentsIndentifiers;
use crate::update::new::merger::merge_and_send_rtree;
use crate::update::new::merger::{merge_and_send_cellulite, merge_and_send_rtree};
use crate::update::new::{merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases};
use crate::update::settings::SettingsDelta;
use crate::vector::db::{EmbedderInfo, IndexEmbeddingConfig};
@@ -317,6 +318,37 @@ where
&indexing_context.must_stop_processing,
)?;
}
'cellulite: {
let Some(extractor) =
GeoJsonExtractor::new(&rtxn, index, *indexing_context.grenad_parameters)?
else {
break 'cellulite;
};
let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
{
let span = tracing::trace_span!(target: "indexing::documents::extract", "cellulite");
let _entered = span.enter();
extract(
document_changes,
&extractor,
indexing_context,
extractor_allocs,
&datastore,
IndexingStep::WritingGeoJson,
)?;
}
merge_and_send_cellulite(
datastore,
&rtxn,
index,
extractor_sender.geojson(),
&indexing_context.must_stop_processing,
)?;
}
indexing_context.progress.update_progress(IndexingStep::WaitingForDatabaseWrites);
finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);

View File

@@ -187,6 +187,13 @@ where
facet_field_ids_delta,
)?;
indexing_context.progress.update_progress(IndexingStep::BuildingGeoJson);
index.cellulite.build(
wtxn,
&indexing_context.must_stop_processing,
indexing_context.progress,
)?;
indexing_context.progress.update_progress(IndexingStep::Finalizing);
Ok(congestion) as Result<_>
@@ -317,6 +324,13 @@ where
})
.unwrap()?;
indexing_context.progress.update_progress(IndexingStep::BuildingGeoJson);
index.cellulite.build(
wtxn,
&indexing_context.must_stop_processing,
indexing_context.progress,
)?;
indexing_context.progress.update_progress(IndexingStep::Finalizing);
Ok(congestion) as Result<_>

View File

@@ -16,7 +16,7 @@ use crate::update::settings::InnerIndexSettings;
use crate::vector::db::IndexEmbeddingConfig;
use crate::vector::settings::EmbedderAction;
use crate::vector::{Embedder, Embeddings, RuntimeEmbedders, VectorStore};
use crate::{Error, Index, InternalError, Result, UserError};
use crate::{DocumentId, Error, Index, InternalError, Result, UserError};
pub fn write_to_db(
mut writer_receiver: WriterBbqueueReceiver<'_>,
@@ -72,6 +72,14 @@ pub fn write_to_db(
let embedding = large_vector.read_embedding(*dimensions);
writer.add_item_in_store(wtxn, docid, extractor_id, embedding)?;
}
ReceiverAction::LargeGeoJson(LargeGeoJson { docid, geojson }) => {
// It cannot be a deletion because it's large. Deletions are always small
let geojson: &[u8] = &geojson;
index
.cellulite
.add_raw_zerometry(wtxn, docid, geojson)
.map_err(InternalError::CelluliteError)?;
}
}
// Every time the is a message in the channel we search
@@ -263,6 +271,19 @@ pub fn write_from_bbqueue(
writer.add_item_in_store(wtxn, docid, extractor_id, embedding)?;
}
}
EntryHeader::CelluliteItem(docid) => {
let frame = frame_with_header.frame();
let skip = EntryHeader::variant_size() + std::mem::size_of::<DocumentId>();
let geojson = &frame[skip..];
index
.cellulite
.add_raw_zerometry(wtxn, docid, geojson)
.map_err(InternalError::CelluliteError)?;
}
EntryHeader::CelluliteRemove(docid) => {
index.cellulite.delete(wtxn, docid).map_err(InternalError::CelluliteError)?;
}
}
}

View File

@@ -13,6 +13,7 @@ use super::extract::{
FacetKind, GeoExtractorData,
};
use crate::update::facet::new_incremental::FacetFieldIdChange;
use crate::update::new::extract::cellulite::GeoJsonExtractorData;
use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result};
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
@@ -62,6 +63,30 @@ where
Ok(())
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
pub fn merge_and_send_cellulite<'extractor, MSP>(
datastore: impl IntoIterator<Item = RefCell<GeoJsonExtractorData<'extractor>>>,
_rtxn: &RoTxn,
_index: &Index,
geojson_sender: GeoJsonSender<'_, '_>,
must_stop_processing: &MSP,
) -> Result<()>
where
MSP: Fn() -> bool + Sync,
{
for data in datastore {
if must_stop_processing() {
return Err(InternalError::AbortedIndexation.into());
}
let mut frozen = data.into_inner().freeze()?;
frozen.iter_and_clear_removed(geojson_sender)?;
frozen.iter_and_clear_inserted(geojson_sender)?;
}
Ok(())
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
pub fn merge_and_send_docids<MSP, D>(
mut caches: Vec<BalancedCaches<'_>>,

View File

@@ -1,6 +1,7 @@
use heed::RwTxn;
use super::document::{Document, DocumentFromDb};
use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
use crate::progress::{self, AtomicSubStep, Progress};
use crate::{FieldDistribution, Index, Result};
@@ -22,8 +23,13 @@ pub fn field_distribution(index: &Index, wtxn: &mut RwTxn<'_>, progress: &Progre
let Some(document) = DocumentFromDb::new(docid, wtxn, index, &field_id_map)? else {
continue;
};
let geo_iter = document.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv)));
for res in document.iter_top_level_fields().chain(geo_iter) {
let geo_iter =
document.geo_field().transpose().map(|res| res.map(|rv| (RESERVED_GEO_FIELD_NAME, rv)));
let geojson_iter = document
.geojson_field()
.transpose()
.map(|res| res.map(|rv| (RESERVED_GEOJSON_FIELD_NAME, rv)));
for res in document.iter_top_level_fields().chain(geo_iter).chain(geojson_iter) {
let (field_name, _) = res?;
if let Some(count) = distribution.get_mut(field_name) {
*count += 1;

View File

@@ -12,11 +12,13 @@ make_enum_progress! {
MergingWordCaches,
MergingWordProximity,
WritingGeoPoints,
WritingGeoJson,
WaitingForDatabaseWrites,
WaitingForExtractors,
WritingEmbeddingsToDatabase,
PostProcessingFacets,
PostProcessingWords,
BuildingGeoJson,
Finalizing,
}
}

View File

@@ -193,7 +193,7 @@ impl WordPrefixIntegerDocids {
// We access this HashMap in parallel to compute the *union* of all
// of them and *serialize* them into files. There is one file by CPU.
let local_entries = ThreadLocal::with_capacity(rayon::current_num_threads());
prefixes.into_par_iter().map(AsRef::as_ref).try_for_each(|prefix| {
prefixes.into_par_iter().map(AsRef::as_ref).try_for_each(|prefix| -> Result<()> {
let refcell = local_entries.get_or(|| {
let file = BufWriter::new(spooled_tempfile(
self.max_memory_by_thread.unwrap_or(usize::MAX),

View File

@@ -15,7 +15,7 @@ use super::del_add::{DelAdd, DelAddOperation};
use super::index_documents::{IndexDocumentsConfig, Transform};
use super::{ChatSettings, IndexerConfig};
use crate::attribute_patterns::PatternMatch;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
use crate::criterion::Criterion;
use crate::disabled_typos_terms::DisabledTyposTerms;
use crate::error::UserError::{self, InvalidChatSettingsDocumentTemplateMaxBytes};
@@ -1862,7 +1862,10 @@ impl InnerIndexSettingsDiff {
}
pub fn any_reindexing_needed(&self) -> bool {
self.reindex_searchable() || self.reindex_facets() || self.reindex_vectors()
self.reindex_searchable()
|| self.reindex_facets()
|| self.reindex_vectors()
|| self.reindex_geojson()
}
pub fn reindex_searchable(&self) -> bool {
@@ -1971,6 +1974,11 @@ impl InnerIndexSettingsDiff {
!self.embedding_config_updates.is_empty()
}
pub fn reindex_geojson(&self) -> bool {
self.old.filterable_attributes_rules.iter().any(|rule| rule.has_geojson())
!= self.new.filterable_attributes_rules.iter().any(|rule| rule.has_geojson())
}
pub fn settings_update_only(&self) -> bool {
self.settings_update_only
}
@@ -1979,6 +1987,11 @@ impl InnerIndexSettingsDiff {
self.old.geo_fields_ids != self.new.geo_fields_ids
|| (!self.settings_update_only && self.new.geo_fields_ids.is_some())
}
pub fn run_geojson_indexing(&self) -> bool {
self.old.geojson_fid != self.new.geojson_fid
|| (!self.settings_update_only && self.new.geojson_fid.is_some())
}
}
#[derive(Clone)]
@@ -1999,6 +2012,7 @@ pub(crate) struct InnerIndexSettings {
pub runtime_embedders: RuntimeEmbedders,
pub embedder_category_id: HashMap<String, u8>,
pub geo_fields_ids: Option<(FieldId, FieldId)>,
pub geojson_fid: Option<FieldId>,
pub prefix_search: PrefixSearch,
pub facet_search: bool,
}
@@ -2038,6 +2052,7 @@ impl InnerIndexSettings {
}
_ => None,
};
let geo_json_fid = fields_ids_map.id(RESERVED_GEOJSON_FIELD_NAME);
let localized_attributes_rules =
index.localized_attributes_rules(rtxn)?.unwrap_or_default();
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
@@ -2066,6 +2081,7 @@ impl InnerIndexSettings {
runtime_embedders,
embedder_category_id,
geo_fields_ids,
geojson_fid: geo_json_fid,
prefix_search,
facet_search,
disabled_typos_terms,