mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-22 04:36:32 +00:00
Compare commits
9 Commits
c19fa9f1e1
...
925bce5fbd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
925bce5fbd | ||
|
|
62065ed30d | ||
|
|
97e6ae1957 | ||
|
|
5ed9be0789 | ||
|
|
7597b1049f | ||
|
|
d99150f21b | ||
|
|
c9726674a0 | ||
|
|
205f40b3b8 | ||
|
|
361580f451 |
6
.github/workflows/publish-release-assets.yml
vendored
6
.github/workflows/publish-release-assets.yml
vendored
@@ -65,9 +65,9 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-13, windows-2022]
|
||||
os: [macos-14, windows-2022]
|
||||
include:
|
||||
- os: macos-13
|
||||
- os: macos-14
|
||||
artifact_name: meilisearch
|
||||
asset_name: meilisearch-macos-amd64
|
||||
- os: windows-2022
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
|
||||
publish-macos-apple-silicon:
|
||||
name: Publish binary for macOS silicon
|
||||
runs-on: macos-13
|
||||
runs-on: macos-14
|
||||
needs: check-version
|
||||
strategy:
|
||||
matrix:
|
||||
|
||||
2
.github/workflows/test-suite.yml
vendored
2
.github/workflows/test-suite.yml
vendored
@@ -47,7 +47,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-13, windows-2022]
|
||||
os: [macos-14, windows-2022]
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Cache dependencies
|
||||
|
||||
@@ -438,12 +438,15 @@ async fn multipart_stream_to_s3(
|
||||
db_name: String,
|
||||
reader: std::io::PipeReader,
|
||||
) -> Result<(), Error> {
|
||||
use std::{collections::VecDeque, os::fd::OwnedFd, path::PathBuf};
|
||||
use std::collections::VecDeque;
|
||||
use std::io;
|
||||
use std::os::fd::OwnedFd;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use reqwest::{Client, Response};
|
||||
use rusty_s3::S3Action as _;
|
||||
use rusty_s3::{actions::CreateMultipartUpload, Bucket, BucketError, Credentials, UrlStyle};
|
||||
use rusty_s3::actions::CreateMultipartUpload;
|
||||
use rusty_s3::{Bucket, BucketError, Credentials, S3Action as _, UrlStyle};
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
let reader = OwnedFd::from(reader);
|
||||
@@ -517,7 +520,6 @@ async fn multipart_stream_to_s3(
|
||||
while buffer.len() < (s3_multipart_part_size as usize / 2) {
|
||||
// Wait for the pipe to be readable
|
||||
|
||||
use std::io;
|
||||
reader.readable().await?;
|
||||
|
||||
match reader.try_read_buf(&mut buffer) {
|
||||
@@ -581,15 +583,17 @@ async fn multipart_stream_to_s3(
|
||||
async move {
|
||||
match client.post(url).body(body).send().await {
|
||||
Ok(resp) if resp.status().is_client_error() => {
|
||||
resp.error_for_status().map_err(backoff::Error::Permanent)
|
||||
Err(backoff::Error::Permanent(Error::S3Error {
|
||||
status: resp.status(),
|
||||
body: resp.text().await.unwrap_or_default(),
|
||||
}))
|
||||
}
|
||||
Ok(resp) => Ok(resp),
|
||||
Err(e) => Err(backoff::Error::transient(e)),
|
||||
Err(e) => Err(backoff::Error::transient(Error::S3HttpError(e))),
|
||||
}
|
||||
}
|
||||
})
|
||||
.await
|
||||
.map_err(Error::S3HttpError)?;
|
||||
.await?;
|
||||
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.map_err(|e| Error::S3Error { status, body: e.to_string() })?;
|
||||
|
||||
@@ -195,7 +195,7 @@ struct Infos {
|
||||
experimental_enable_logs_route: bool,
|
||||
experimental_reduce_indexing_memory_usage: bool,
|
||||
experimental_max_number_of_batched_tasks: usize,
|
||||
experimental_limit_batched_tasks_total_size: u64,
|
||||
experimental_limit_batched_tasks_total_size: Option<u64>,
|
||||
experimental_network: bool,
|
||||
experimental_multimodal: bool,
|
||||
experimental_chat_completions: bool,
|
||||
@@ -359,7 +359,7 @@ impl Infos {
|
||||
http_payload_size_limit,
|
||||
experimental_max_number_of_batched_tasks,
|
||||
experimental_limit_batched_tasks_total_size:
|
||||
experimental_limit_batched_tasks_total_size.into(),
|
||||
experimental_limit_batched_tasks_total_size.map(|size| size.as_u64()),
|
||||
task_queue_webhook: task_webhook_url.is_some(),
|
||||
task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
|
||||
log_level: log_level.to_string(),
|
||||
|
||||
@@ -230,7 +230,11 @@ pub fn setup_meilisearch(
|
||||
cleanup_enabled: !opt.experimental_replication_parameters,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
|
||||
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size.into(),
|
||||
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size.map_or_else(
|
||||
// By default, we use half of the available memory to determine the size of batched tasks
|
||||
|| opt.indexer_options.max_indexing_memory.map_or(u64::MAX, |mem| mem.as_u64() / 2),
|
||||
|size| size.as_u64(),
|
||||
),
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
instance_features: opt.to_instance_features(),
|
||||
|
||||
@@ -473,11 +473,13 @@ pub struct Opt {
|
||||
#[serde(default = "default_limit_batched_tasks")]
|
||||
pub experimental_max_number_of_batched_tasks: usize,
|
||||
|
||||
/// Experimentally reduces the maximum total size, in bytes, of tasks that will be processed at once,
|
||||
/// see: <https://github.com/orgs/meilisearch/discussions/801>
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, default_value_t = default_limit_batched_tasks_total_size())]
|
||||
#[serde(default = "default_limit_batched_tasks_total_size")]
|
||||
pub experimental_limit_batched_tasks_total_size: Byte,
|
||||
/// Experimentally controls the maximum total size, in bytes, of tasks that will be processed
|
||||
/// simultaneously. When unspecified, defaults to half of the maximum indexing memory.
|
||||
///
|
||||
/// See: <https://github.com/orgs/meilisearch/discussions/801>
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE)]
|
||||
#[serde(default)]
|
||||
pub experimental_limit_batched_tasks_total_size: Option<Byte>,
|
||||
|
||||
/// Enables experimental caching of search query embeddings. The value represents the maximal number of entries in the cache of each
|
||||
/// distinct embedder.
|
||||
@@ -701,10 +703,12 @@ impl Opt {
|
||||
MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS,
|
||||
experimental_max_number_of_batched_tasks.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE,
|
||||
experimental_limit_batched_tasks_total_size.to_string(),
|
||||
);
|
||||
if let Some(limit) = experimental_limit_batched_tasks_total_size {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE,
|
||||
limit.to_string(),
|
||||
);
|
||||
}
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES,
|
||||
experimental_embedding_cache_entries.to_string(),
|
||||
@@ -1273,10 +1277,6 @@ fn default_limit_batched_tasks() -> usize {
|
||||
usize::MAX
|
||||
}
|
||||
|
||||
fn default_limit_batched_tasks_total_size() -> Byte {
|
||||
Byte::from_u64(u64::MAX)
|
||||
}
|
||||
|
||||
fn default_embedding_cache_entries() -> usize {
|
||||
0
|
||||
}
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
use crate::search::{Personalize, SearchResult};
|
||||
use meilisearch_types::{
|
||||
error::{Code, ErrorCode, ResponseError},
|
||||
milli::TimeBudget,
|
||||
};
|
||||
use std::time::Duration;
|
||||
|
||||
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
|
||||
use meilisearch_types::milli::TimeBudget;
|
||||
use rand::Rng;
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::search::{Personalize, SearchResult};
|
||||
|
||||
const COHERE_API_URL: &str = "https://api.cohere.ai/v1/rerank";
|
||||
const MAX_RETRIES: u32 = 10;
|
||||
|
||||
|
||||
@@ -18,10 +18,9 @@ use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::search::SearchMetadata;
|
||||
|
||||
use super::super::{ComputedFacets, FacetStats, HitsInfo, SearchHit, SearchQueryWithIndex};
|
||||
use crate::milli::vector::Embedding;
|
||||
use crate::search::SearchMetadata;
|
||||
|
||||
pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0;
|
||||
|
||||
|
||||
@@ -1339,3 +1339,117 @@ async fn get_document_with_vectors() {
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_fetch_documents_pagination_with_sorting() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
let (task, _code) = index.create(None).await;
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
// Set name as sortable attribute
|
||||
let (task, code) = index.update_settings_sortable_attributes(json!(["name"])).await;
|
||||
assert_eq!(code, 202);
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let documents = json!((0..50)
|
||||
.map(|i| json!({"id": i, "name": format!("doc_{:05}", std::cmp::min(i, 5))}))
|
||||
.collect::<Vec<_>>());
|
||||
|
||||
// Add documents as described in the bug report
|
||||
let (task, code) = index.add_documents(documents, None).await;
|
||||
assert_eq!(code, 202);
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
// Request 1 (first page): offset 0, limit 2
|
||||
let (response, code) = index
|
||||
.fetch_documents(json!({
|
||||
"offset": 0,
|
||||
"limit": 2,
|
||||
"sort": ["name:asc"]
|
||||
}))
|
||||
.await;
|
||||
assert_eq!(code, 200);
|
||||
let results = response["results"].as_array().unwrap();
|
||||
snapshot!(json_string!(results), @r###"
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"name": "doc_00000"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"name": "doc_00001"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
|
||||
// Request 2 (second page): offset 2, limit 2
|
||||
let (response, code) = index
|
||||
.fetch_documents(json!({
|
||||
"offset": 2,
|
||||
"limit": 2,
|
||||
"sort": ["name:asc"]
|
||||
}))
|
||||
.await;
|
||||
assert_eq!(code, 200);
|
||||
let results = response["results"].as_array().unwrap();
|
||||
snapshot!(json_string!(results), @r###"
|
||||
[
|
||||
{
|
||||
"id": 2,
|
||||
"name": "doc_00002"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"name": "doc_00003"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
|
||||
// Request 3 (third page): offset 4, limit 2
|
||||
let (response, code) = index
|
||||
.fetch_documents(json!({
|
||||
"offset": 4,
|
||||
"limit": 2,
|
||||
"sort": ["name:asc"]
|
||||
}))
|
||||
.await;
|
||||
assert_eq!(code, 200);
|
||||
let results = response["results"].as_array().unwrap();
|
||||
snapshot!(json_string!(results), @r###"
|
||||
[
|
||||
{
|
||||
"id": 4,
|
||||
"name": "doc_00004"
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"name": "doc_00005"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
|
||||
// Request 4 (fourth page): offset 6, limit 2
|
||||
let (response, code) = index
|
||||
.fetch_documents(json!({
|
||||
"offset": 6,
|
||||
"limit": 2,
|
||||
"sort": ["name:asc"]
|
||||
}))
|
||||
.await;
|
||||
assert_eq!(code, 200);
|
||||
let results = response["results"].as_array().unwrap();
|
||||
snapshot!(json_string!(results), @r###"
|
||||
[
|
||||
{
|
||||
"id": 6,
|
||||
"name": "doc_00005"
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"name": "doc_00005"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
@@ -87,7 +87,7 @@ impl Iterator for SortedDocumentsIterator<'_> {
|
||||
};
|
||||
|
||||
// Otherwise don't directly iterate over children, skip them if we know we will go further
|
||||
let mut to_skip = n - 1;
|
||||
let mut to_skip = n;
|
||||
while to_skip > 0 {
|
||||
if let Err(e) = SortedDocumentsIterator::update_current(
|
||||
current_child,
|
||||
@@ -108,7 +108,7 @@ impl Iterator for SortedDocumentsIterator<'_> {
|
||||
continue;
|
||||
} else {
|
||||
// The current iterator is large enough, so we can forward the call to it.
|
||||
return inner.nth(to_skip + 1);
|
||||
return inner.nth(to_skip);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user