mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-21 05:41:01 +00:00
Compare commits
9 Commits
prototype-
...
v1.3.3
Author | SHA1 | Date | |
---|---|---|---|
256cf33bca | |||
9945cbf9db | |||
03d0f628bd | |||
ea78060916 | |||
b42d48187a | |||
679c0b0f97 | |||
e02d0064bd | |||
7ef3572f11 | |||
93285041a9 |
38
Cargo.lock
generated
38
Cargo.lock
generated
@ -469,7 +469,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.3.2"
|
||||
version = "1.3.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@ -1199,7 +1199,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.3.2"
|
||||
version = "1.3.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@ -1413,7 +1413,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-store"
|
||||
version = "1.3.2"
|
||||
version = "1.3.3"
|
||||
dependencies = [
|
||||
"faux",
|
||||
"tempfile",
|
||||
@ -1435,11 +1435,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "1.3.2"
|
||||
version = "1.3.3"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"nom",
|
||||
"nom_locate",
|
||||
"unescaper",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -1454,7 +1455,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "1.3.2"
|
||||
version = "1.3.3"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@ -1572,7 +1573,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fuzzers"
|
||||
version = "1.3.2"
|
||||
version = "1.3.3"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"clap",
|
||||
@ -1894,7 +1895,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index-scheduler"
|
||||
version = "1.3.2"
|
||||
version = "1.3.3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@ -2081,7 +2082,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "json-depth-checker"
|
||||
version = "1.3.2"
|
||||
version = "1.3.3"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@ -2493,7 +2494,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "meili-snap"
|
||||
version = "1.3.2"
|
||||
version = "1.3.3"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"md5",
|
||||
@ -2502,7 +2503,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch"
|
||||
version = "1.3.2"
|
||||
version = "1.3.3"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-http",
|
||||
@ -2591,7 +2592,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-auth"
|
||||
version = "1.3.2"
|
||||
version = "1.3.3"
|
||||
dependencies = [
|
||||
"base64 0.21.2",
|
||||
"enum-iterator",
|
||||
@ -2610,7 +2611,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-types"
|
||||
version = "1.3.2"
|
||||
version = "1.3.3"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
@ -2664,7 +2665,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "1.3.2"
|
||||
version = "1.3.3"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"bimap",
|
||||
@ -2994,7 +2995,7 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
|
||||
|
||||
[[package]]
|
||||
name = "permissive-json-pointer"
|
||||
version = "1.3.2"
|
||||
version = "1.3.3"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"serde_json",
|
||||
@ -4147,6 +4148,15 @@ version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e79c4d996edb816c91e4308506774452e55e95c3c9de07b6729e17e15a5ef81"
|
||||
|
||||
[[package]]
|
||||
name = "unescaper"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a96a44ae11e25afb520af4534fd7b0bd8cd613e35a78def813b8cf41631fa3c8"
|
||||
dependencies = [
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicase"
|
||||
version = "2.6.0"
|
||||
|
@ -18,7 +18,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.3.2"
|
||||
version = "1.3.3"
|
||||
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
||||
description = "Meilisearch HTTP server"
|
||||
homepage = "https://meilisearch.com"
|
||||
|
@ -14,6 +14,7 @@ license.workspace = true
|
||||
[dependencies]
|
||||
nom = "7.1.3"
|
||||
nom_locate = "4.1.0"
|
||||
unescaper = "0.1.2"
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.29.0"
|
||||
|
@ -62,6 +62,7 @@ pub enum ErrorKind<'a> {
|
||||
MisusedGeoRadius,
|
||||
MisusedGeoBoundingBox,
|
||||
InvalidPrimary,
|
||||
InvalidEscapedNumber,
|
||||
ExpectedEof,
|
||||
ExpectedValue(ExpectedValueKind),
|
||||
MalformedValue,
|
||||
@ -147,6 +148,9 @@ impl<'a> Display for Error<'a> {
|
||||
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
|
||||
}
|
||||
ErrorKind::InvalidEscapedNumber => {
|
||||
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
|
||||
}
|
||||
ErrorKind::ExpectedEof => {
|
||||
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
|
||||
}
|
||||
|
@ -545,6 +545,8 @@ impl<'a> std::fmt::Display for Token<'a> {
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use FilterCondition as Fc;
|
||||
|
||||
use super::*;
|
||||
|
||||
/// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element
|
||||
@ -556,14 +558,22 @@ pub mod tests {
|
||||
unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
|
||||
}
|
||||
|
||||
fn p(s: &str) -> impl std::fmt::Display + '_ {
|
||||
Fc::parse(s).unwrap().unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_escaped() {
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\\'"#), @r#"{title} = {foo\}"#);
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\'"#), @r#"{title} = {foo\\}"#);
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\'"#), @r#"{title} = {foo\\\}"#);
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\\\'"#), @r#"{title} = {foo\\\\}"#);
|
||||
// but it also works with other sequencies
|
||||
insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse() {
|
||||
use FilterCondition as Fc;
|
||||
|
||||
fn p(s: &str) -> impl std::fmt::Display + '_ {
|
||||
Fc::parse(s).unwrap().unwrap()
|
||||
}
|
||||
|
||||
// Test equal
|
||||
insta::assert_display_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}");
|
||||
insta::assert_display_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}");
|
||||
|
@ -171,7 +171,24 @@ pub fn parse_value(input: Span) -> IResult<Token> {
|
||||
})
|
||||
})?;
|
||||
|
||||
Ok((input, value))
|
||||
match unescaper::unescape(value.value()) {
|
||||
Ok(content) => {
|
||||
if content.len() != value.value().len() {
|
||||
Ok((input, Token::new(value.original_span(), Some(content))))
|
||||
} else {
|
||||
Ok((input, value))
|
||||
}
|
||||
}
|
||||
Err(unescaper::Error::IncompleteStr(_)) => Err(nom::Err::Incomplete(nom::Needed::Unknown)),
|
||||
Err(unescaper::Error::ParseIntError { .. }) => Err(nom::Err::Error(Error::new_from_kind(
|
||||
value.original_span(),
|
||||
ErrorKind::InvalidEscapedNumber,
|
||||
))),
|
||||
Err(unescaper::Error::InvalidChar { .. }) => Err(nom::Err::Error(Error::new_from_kind(
|
||||
value.original_span(),
|
||||
ErrorKind::MalformedValue,
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_value_component(c: char) -> bool {
|
||||
@ -318,17 +335,17 @@ pub mod test {
|
||||
("\"cha'nnel\"", "cha'nnel", false),
|
||||
("I'm tamo", "I", false),
|
||||
// escaped thing but not quote
|
||||
(r#""\\""#, r#"\\"#, false),
|
||||
(r#""\\\\\\""#, r#"\\\\\\"#, false),
|
||||
(r#""aa\\aa""#, r#"aa\\aa"#, false),
|
||||
(r#""\\""#, r#"\"#, true),
|
||||
(r#""\\\\\\""#, r#"\\\"#, true),
|
||||
(r#""aa\\aa""#, r#"aa\aa"#, true),
|
||||
// with double quote
|
||||
(r#""Hello \"world\"""#, r#"Hello "world""#, true),
|
||||
(r#""Hello \\\"world\\\"""#, r#"Hello \\"world\\""#, true),
|
||||
(r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true),
|
||||
(r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
|
||||
(r#""\"\"""#, r#""""#, true),
|
||||
// with simple quote
|
||||
(r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
|
||||
(r#"'Hello \\\'world\\\''"#, r#"Hello \\'world\\'"#, true),
|
||||
(r#"'Hello \\\'world\\\''"#, r#"Hello \'world\'"#, true),
|
||||
(r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
|
||||
(r#"'\'\''"#, r#"''"#, true),
|
||||
];
|
||||
@ -350,7 +367,14 @@ pub mod test {
|
||||
"Filter `{}` was not supposed to be escaped",
|
||||
input
|
||||
);
|
||||
assert_eq!(token.value(), expected, "Filter `{}` failed.", input);
|
||||
assert_eq!(
|
||||
token.value(),
|
||||
expected,
|
||||
"Filter `{}` failed by giving `{}` instead of `{}`.",
|
||||
input,
|
||||
token.value(),
|
||||
expected
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -55,7 +55,13 @@ pub(crate) fn data_from_obkv_documents(
|
||||
original_obkv_chunks
|
||||
.par_bridge()
|
||||
.map(|original_documents_chunk| {
|
||||
send_original_documents_data(original_documents_chunk, lmdb_writer_sx.clone())
|
||||
send_original_documents_data(
|
||||
original_documents_chunk,
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
vectors_field_id,
|
||||
primary_key_id,
|
||||
)
|
||||
})
|
||||
.collect::<Result<()>>()?;
|
||||
|
||||
@ -72,7 +78,6 @@ pub(crate) fn data_from_obkv_documents(
|
||||
&faceted_fields,
|
||||
primary_key_id,
|
||||
geo_fields_ids,
|
||||
vectors_field_id,
|
||||
&stop_words,
|
||||
max_positions_per_attributes,
|
||||
)
|
||||
@ -257,11 +262,33 @@ fn spawn_extraction_task<FE, FS, M>(
|
||||
/// - documents
|
||||
fn send_original_documents_data(
|
||||
original_documents_chunk: Result<grenad::Reader<File>>,
|
||||
indexer: GrenadParameters,
|
||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||
vectors_field_id: Option<FieldId>,
|
||||
primary_key_id: FieldId,
|
||||
) -> Result<()> {
|
||||
let original_documents_chunk =
|
||||
original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
|
||||
|
||||
if let Some(vectors_field_id) = vectors_field_id {
|
||||
let documents_chunk_cloned = original_documents_chunk.clone();
|
||||
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
||||
rayon::spawn(move || {
|
||||
let result = extract_vector_points(
|
||||
documents_chunk_cloned,
|
||||
indexer,
|
||||
primary_key_id,
|
||||
vectors_field_id,
|
||||
);
|
||||
let _ = match result {
|
||||
Ok(vector_points) => {
|
||||
lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
|
||||
}
|
||||
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
// TODO: create a custom internal error
|
||||
lmdb_writer_sx.send(Ok(TypedChunk::Documents(original_documents_chunk))).unwrap();
|
||||
Ok(())
|
||||
@ -283,7 +310,6 @@ fn send_and_extract_flattened_documents_data(
|
||||
faceted_fields: &HashSet<FieldId>,
|
||||
primary_key_id: FieldId,
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
vectors_field_id: Option<FieldId>,
|
||||
stop_words: &Option<fst::Set<&[u8]>>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(
|
||||
@ -312,25 +338,6 @@ fn send_and_extract_flattened_documents_data(
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(vectors_field_id) = vectors_field_id {
|
||||
let documents_chunk_cloned = flattened_documents_chunk.clone();
|
||||
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
||||
rayon::spawn(move || {
|
||||
let result = extract_vector_points(
|
||||
documents_chunk_cloned,
|
||||
indexer,
|
||||
primary_key_id,
|
||||
vectors_field_id,
|
||||
);
|
||||
let _ = match result {
|
||||
Ok(vector_points) => {
|
||||
lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
|
||||
}
|
||||
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
|
||||
rayon::join(
|
||||
|| {
|
||||
|
@ -2519,6 +2519,25 @@ mod tests {
|
||||
db_snap!(index, word_position_docids, 3, @"74f556b91d161d997a89468b4da1cb8f");
|
||||
}
|
||||
|
||||
/// Index multiple different number of vectors in documents.
|
||||
/// Vectors must be of the same length.
|
||||
#[test]
|
||||
fn test_multiple_vectors() {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index.add_documents(documents!([{"id": 0, "_vectors": [[0, 1, 2], [3, 4, 5]] }])).unwrap();
|
||||
index.add_documents(documents!([{"id": 1, "_vectors": [6, 7, 8] }])).unwrap();
|
||||
index
|
||||
.add_documents(
|
||||
documents!([{"id": 2, "_vectors": [[9, 10, 11], [12, 13, 14], [15, 16, 17]] }]),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let res = index.search(&rtxn).vector([0.0, 1.0, 2.0]).execute().unwrap();
|
||||
assert_eq!(res.documents_ids.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reproduce_the_bug() {
|
||||
/*
|
||||
|
Reference in New Issue
Block a user