mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-20 05:20:36 +00:00
Compare commits
11 Commits
prototype-
...
v1.5.0-rc.
Author | SHA1 | Date | |
---|---|---|---|
2614e7d9ca | |||
e7244aa485 | |||
9cacc82307 | |||
4c6fddb1cb | |||
ca52021079 | |||
ee6f79d60b | |||
e4c24ca6a3 | |||
2bae9550c8 | |||
32c78ac8b1 | |||
5fe7c4545a | |||
2042229927 |
810
Cargo.lock
generated
810
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -18,7 +18,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.4.1"
|
||||
version = "1.5.0"
|
||||
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
||||
description = "Meilisearch HTTP server"
|
||||
homepage = "https://meilisearch.com"
|
||||
|
@ -50,6 +50,7 @@ hebrew = ["milli/hebrew"]
|
||||
japanese = ["milli/japanese"]
|
||||
# thai specialized tokenization
|
||||
thai = ["milli/thai"]
|
||||
|
||||
# allow greek specialized tokenization
|
||||
greek = ["milli/greek"]
|
||||
# allow khmer specialized tokenization
|
||||
khmer = ["milli/khmer"]
|
||||
|
@ -150,6 +150,7 @@ hebrew = ["meilisearch-types/hebrew"]
|
||||
japanese = ["meilisearch-types/japanese"]
|
||||
thai = ["meilisearch-types/thai"]
|
||||
greek = ["meilisearch-types/greek"]
|
||||
khmer = ["meilisearch-types/khmer"]
|
||||
|
||||
[package.metadata.mini-dashboard]
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
|
||||
|
@ -5,9 +5,11 @@ pub mod service;
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
#[allow(unused)]
|
||||
pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
|
||||
use meili_snap::json_string;
|
||||
use serde::{Deserialize, Serialize};
|
||||
#[allow(unused)]
|
||||
pub use server::{default_settings, Server};
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
|
||||
|
@ -6,21 +6,109 @@ use crate::json;
|
||||
|
||||
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{"productId": 1, "shopId": 1},
|
||||
{"productId": 2, "shopId": 1},
|
||||
{"productId": 3, "shopId": 2},
|
||||
{"productId": 4, "shopId": 2},
|
||||
{"productId": 5, "shopId": 3},
|
||||
{"productId": 6, "shopId": 3},
|
||||
{"productId": 7, "shopId": 4},
|
||||
{"productId": 8, "shopId": 4},
|
||||
{"productId": 9, "shopId": 5},
|
||||
{"productId": 10, "shopId": 5}
|
||||
{
|
||||
"id": 1,
|
||||
"description": "Leather Jacket",
|
||||
"brand": "Lee Jeans",
|
||||
"product_id": "123456",
|
||||
"color": "Brown"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"description": "Leather Jacket",
|
||||
"brand": "Lee Jeans",
|
||||
"product_id": "123456",
|
||||
"color": "Black"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"description": "Leather Jacket",
|
||||
"brand": "Lee Jeans",
|
||||
"product_id": "123456",
|
||||
"color": "Blue"
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"description": "T-Shirt",
|
||||
"brand": "Nike",
|
||||
"product_id": "789012",
|
||||
"color": "Red"
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"description": "T-Shirt",
|
||||
"brand": "Nike",
|
||||
"product_id": "789012",
|
||||
"color": "Blue"
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"description": "Running Shoes",
|
||||
"brand": "Adidas",
|
||||
"product_id": "456789",
|
||||
"color": "Black"
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"description": "Running Shoes",
|
||||
"brand": "Adidas",
|
||||
"product_id": "456789",
|
||||
"color": "White"
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"description": "Hoodie",
|
||||
"brand": "Puma",
|
||||
"product_id": "987654",
|
||||
"color": "Gray"
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"description": "Sweater",
|
||||
"brand": "Gap",
|
||||
"product_id": "234567",
|
||||
"color": "Green"
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"description": "Sweater",
|
||||
"brand": "Gap",
|
||||
"product_id": "234567",
|
||||
"color": "Red"
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"description": "Sweater",
|
||||
"brand": "Gap",
|
||||
"product_id": "234567",
|
||||
"color": "Blue"
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
"description": "Jeans",
|
||||
"brand": "Levi's",
|
||||
"product_id": "345678",
|
||||
"color": "Indigo"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"description": "Jeans",
|
||||
"brand": "Levi's",
|
||||
"product_id": "345678",
|
||||
"color": "Black"
|
||||
},
|
||||
{
|
||||
"id": 14,
|
||||
"description": "Jeans",
|
||||
"brand": "Levi's",
|
||||
"product_id": "345678",
|
||||
"color": "Stone Wash"
|
||||
}
|
||||
])
|
||||
});
|
||||
|
||||
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
|
||||
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";
|
||||
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id";
|
||||
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id";
|
||||
|
||||
/// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
#[actix_rt::test]
|
||||
@ -33,31 +121,121 @@ async fn distinct_search_with_offset_no_ranking() {
|
||||
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
fn get_hits(Value(response): Value) -> Vec<i64> {
|
||||
fn get_hits(response: &Value) -> Vec<&str> {
|
||||
let hits_array = response["hits"].as_array().unwrap();
|
||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
|
||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
|
||||
let hits = get_hits(response);
|
||||
let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @"[1, 2]");
|
||||
snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
|
||||
snapshot!(response["estimatedTotalHits"] , @"11");
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
|
||||
let hits = get_hits(response);
|
||||
let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @"[3, 4]");
|
||||
snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"10");
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
|
||||
let hits = get_hits(response);
|
||||
let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"1");
|
||||
snapshot!(format!("{:?}", hits), @"[5]");
|
||||
snapshot!(format!("{:?}", hits), @r#"["345678"]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
|
||||
let hits = get_hits(response);
|
||||
let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["estimatedTotalHits"], @"6");
|
||||
}
|
||||
|
||||
/// testing: https://github.com/meilisearch/meilisearch/issues/4130
|
||||
#[actix_rt::test]
|
||||
async fn distinct_search_with_pagination_no_ranking() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
|
||||
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
fn get_hits(response: &Value) -> Vec<&str> {
|
||||
let hits_array = response["hits"].as_array().unwrap();
|
||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["page"], @"0");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
|
||||
snapshot!(response["page"], @"1");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
|
||||
snapshot!(response["page"], @"2");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
|
||||
snapshot!(response["page"], @"3");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
snapshot!(format!("{:?}", hits), @r#"[]"#);
|
||||
snapshot!(response["page"], @"4");
|
||||
snapshot!(response["totalPages"], @"3");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
|
||||
let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await;
|
||||
let hits = get_hits(&response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"3");
|
||||
snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#);
|
||||
snapshot!(response["page"], @"2");
|
||||
snapshot!(response["totalPages"], @"2");
|
||||
snapshot!(response["totalHits"], @"6");
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ bincode = "1.3.3"
|
||||
bstr = "1.4.0"
|
||||
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.4.3"
|
||||
charabia = { version = "0.8.3", default-features = false }
|
||||
charabia = { version = "0.8.5", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"]}
|
||||
@ -82,7 +82,7 @@ md5 = "0.7.0"
|
||||
rand = { version = "0.8.5", features = ["small_rng"] }
|
||||
|
||||
[features]
|
||||
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"]
|
||||
all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]
|
||||
|
||||
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
||||
# For more information on this feature, see heed's Cargo.toml
|
||||
@ -106,3 +106,6 @@ thai = ["charabia/thai"]
|
||||
|
||||
# allow greek specialized tokenization
|
||||
greek = ["charabia/greek"]
|
||||
|
||||
# allow khmer specialized tokenization
|
||||
khmer = ["charabia/khmer"]
|
||||
|
@ -3,7 +3,7 @@ use std::fmt::{Debug, Display};
|
||||
use std::ops::Bound::{self, Excluded, Included};
|
||||
|
||||
use either::Either;
|
||||
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
|
||||
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde_json::Value;
|
||||
|
||||
|
@ -11,7 +11,7 @@ use once_cell::sync::Lazy;
|
||||
use roaring::bitmap::RoaringBitmap;
|
||||
|
||||
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
|
||||
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
|
||||
use self::new::PartialSearchResult;
|
||||
use crate::error::UserError;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
||||
|
@ -46,9 +46,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
if let Some(distinct_fid) = distinct_fid {
|
||||
let mut excluded = RoaringBitmap::new();
|
||||
let mut results = vec![];
|
||||
let mut skip = 0;
|
||||
for docid in universe.iter() {
|
||||
if results.len() >= length {
|
||||
if results.len() >= from + length {
|
||||
break;
|
||||
}
|
||||
if excluded.contains(docid) {
|
||||
@ -56,16 +55,19 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
}
|
||||
|
||||
distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
|
||||
skip += 1;
|
||||
if skip <= from {
|
||||
continue;
|
||||
}
|
||||
|
||||
results.push(docid);
|
||||
}
|
||||
|
||||
let mut all_candidates = universe - excluded;
|
||||
all_candidates.extend(results.iter().copied());
|
||||
// drain the results of the skipped elements
|
||||
// this **must** be done **after** writing the entire results in `all_candidates` to ensure
|
||||
// e.g. estimatedTotalHits is correct.
|
||||
if results.len() >= from {
|
||||
results.drain(..from);
|
||||
} else {
|
||||
results.clear();
|
||||
}
|
||||
|
||||
return Ok(BucketSortOutput {
|
||||
scores: vec![Default::default(); results.len()],
|
||||
|
@ -29,7 +29,7 @@ use std::hash::Hash;
|
||||
pub use cheapest_paths::PathVisitor;
|
||||
pub use condition_docids_cache::ConditionDocIdsCache;
|
||||
pub use dead_ends_cache::DeadEndsCache;
|
||||
pub use exactness::{ExactnessCondition, ExactnessGraph};
|
||||
pub use exactness::ExactnessGraph;
|
||||
pub use fid::{FidCondition, FidGraph};
|
||||
pub use position::{PositionCondition, PositionGraph};
|
||||
pub use proximity::{ProximityCondition, ProximityGraph};
|
||||
|
@ -14,7 +14,7 @@ pub use grenad_helpers::{
|
||||
};
|
||||
pub use merge_functions::{
|
||||
concat_u32s_array, keep_first, keep_latest_obkv, merge_btreeset_string,
|
||||
merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs,
|
||||
merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps,
|
||||
serialize_roaring_bitmap, MergeFn,
|
||||
};
|
||||
|
||||
|
@ -20,10 +20,7 @@ use slice_group_by::GroupBy;
|
||||
use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
|
||||
|
||||
use self::enrich::enrich_documents_batch;
|
||||
pub use self::enrich::{
|
||||
extract_finite_float_from_value, validate_document_id, validate_document_id_value,
|
||||
validate_geo_from_json, DocumentId,
|
||||
};
|
||||
pub use self::enrich::{extract_finite_float_from_value, DocumentId};
|
||||
pub use self::helpers::{
|
||||
as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
|
||||
fst_stream_into_vec, merge_btreeset_string, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
|
||||
|
@ -202,7 +202,7 @@ test_distinct!(
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
1,
|
||||
vec![],
|
||||
2
|
||||
3
|
||||
);
|
||||
test_distinct!(
|
||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
@ -212,7 +212,7 @@ test_distinct!(
|
||||
1,
|
||||
2,
|
||||
vec![],
|
||||
1
|
||||
3
|
||||
);
|
||||
test_distinct!(
|
||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
@ -222,7 +222,7 @@ test_distinct!(
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
2,
|
||||
vec![],
|
||||
5
|
||||
7
|
||||
);
|
||||
test_distinct!(
|
||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
@ -232,5 +232,5 @@ test_distinct!(
|
||||
2,
|
||||
4,
|
||||
vec![],
|
||||
3
|
||||
7
|
||||
);
|
||||
|
Reference in New Issue
Block a user