Compare commits

...

11 Commits

Author SHA1 Message Date
ManyTheFish
3bb39bae9a Activate only the necessary features for Japanese 2023-11-30 12:19:49 +01:00
meili-bors[bot]
b11f85a635 Merge #4205
4205: Prevent search hang on the processing index r=Kerollmops a=dureuill

Fixes #4206, an issue originally [reported on Discord](https://discord.com/channels/1006923006964154428/1148983671026618579/1148983671026618579) where having parallel search requests on more indexes than the index cache capacity would cause search requests on the currently updating index to hang until the index is done updating.

## Test setup

- Create 20 empty indexes by sending settings to them
- repeatedly send placeholder search requests to each of the indexes in a loop
- Create another index and send a significant batch of documents to index.
- Attempt to perform a search request on that last index.
  - Before this PR, the search request hangs while the index update task is processing
  - After this PR, the search request respond immediately even while the index update task is processing

## Changes

- When getting the handle to an index for some potentially long running batches of tasks, save it in the index scheduler.
- Drop the handle from the index-scheduler when the task is done so that we don't leak indexes.
- When getting an index from outside the task queue processor, check if there is such an handle matching the requested index. If so, skip the cache entirely and clone the handle.

Co-authored-by: Louis Dureuil <louis.dureuil@xinra.net>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-11-13 10:36:01 +00:00
Louis Dureuil
a2d6dc8571 Fix typo, remove caching for the change of index 2023-11-13 10:44:36 +01:00
meili-bors[bot]
ee1701157f Merge #4204
4204: Throw error when the vector search is sent with the wrong size r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #4201 


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-11-13 09:43:20 +00:00
Louis Dureuil
8c649d8061 Throw error when the vector search is sent with the wrong size 2023-11-13 09:57:42 +01:00
Louis Dureuil
492fc086f0 cargo fmt 2023-11-12 21:53:11 +01:00
Louis Dureuil
a2d0c73b41 Save the currently updating index so that the search can access it at all times 2023-11-10 10:52:03 +01:00
meili-bors[bot]
54f0ee1ed2 Merge #4167
4167: Introduce the `meilitool` command line interface r=Kerollmops a=Kerollmops

This PR introduces a small tool to help the Cloud team:
 - Clear the tasks queue by removing all the tasks
 - Dump a Meilisearch database without having to enqueue the task
 - Access this `meilitool` binary from the Docker Image

## TODO
 - [x] Modify the Docker File to ship with this new tool (`@curquiza,` could you review that, please?)
 - [x] Clear the tasks queue by removing all the tasks
   - [x] Add more logs to explain what is happening
   - [x] Clear the `update_files` folder
 - [x] Dump a Meilisearch database without having to enqueue the task
   - [x] Add more logs to explain what is happening
   - [x] Introduce a flag to skip dumping enqueued and processing tasks.
   - [x] Dump the instance uid.
   - [x] Dump the keys.
   - [x] Dump the tasks with the update files.
   - [x] Dump the index documents and settings.
   - [ ] ~Dump the experimental features~

Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-10-31 14:05:22 +00:00
Clément Renault
ce5647e730 Fix Dockerfile WORKDIR path 2023-10-30 17:27:59 +01:00
Clément Renault
b57b818b67 Don't use the last version of clap 2023-10-30 16:57:31 +01:00
Clément Renault
f7ea94e5f4 Modify the Dockerfile to compile meilisearch and meilitool 2023-10-30 16:32:17 +01:00
7 changed files with 69 additions and 34 deletions

58
Cargo.lock generated
View File

@@ -310,15 +310,16 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
[[package]]
name = "anstream"
version = "0.6.4"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ab91ebe16eb252986481c5b62f6098f3b698a45e34b5b98200cf20dd2484a44"
checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is-terminal",
"utf8parse",
]
@@ -348,9 +349,9 @@ dependencies = [
[[package]]
name = "anstyle-wincon"
version = "3.0.1"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628"
checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188"
dependencies = [
"anstyle",
"windows-sys 0.48.0",
@@ -776,19 +777,20 @@ dependencies = [
[[package]]
name = "clap"
version = "4.4.7"
version = "4.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac495e00dcec98c83465d5ad66c5c4fabd652fd6686e7c6269b117e729a6f17b"
checksum = "c27cdf28c0f604ba3f512b0c9a409f8de8513e4816705deb0498b627e7c3a3fd"
dependencies = [
"clap_builder",
"clap_derive",
"once_cell",
]
[[package]]
name = "clap_builder"
version = "4.4.7"
version = "4.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c77ed9a32a62e6ca27175d00d29d05ca32e396ea1eb5fb01d8256b669cec7663"
checksum = "08a9f1ab5e9f01a9b81f202e8562eb9a10de70abf9eaeac1be465c28b75aa4aa"
dependencies = [
"anstream",
"anstyle",
@@ -798,9 +800,9 @@ dependencies = [
[[package]]
name = "clap_derive"
version = "4.4.7"
version = "4.3.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442"
checksum = "54a9bb5758fc5dfe728d1019941681eccaf0cf8a4189b692a0ee2f2ecf90a050"
dependencies = [
"heck",
"proc-macro2",
@@ -810,9 +812,9 @@ dependencies = [
[[package]]
name = "clap_lex"
version = "0.6.0"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b"
[[package]]
name = "cobs"
@@ -2072,9 +2074,9 @@ dependencies = [
[[package]]
name = "icu_compactdecimal_data"
version = "1.3.4"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51cc4515902110b79d180c561c13b87e5b42bad85edf719a1d59ec713cd6ccf7"
checksum = "c2e9b7585f26db531ea5aaedaa68cb66cd2be37fe698b33a289849ff3129545b"
[[package]]
name = "icu_datetime"
@@ -2103,9 +2105,9 @@ dependencies = [
[[package]]
name = "icu_datetime_data"
version = "1.3.4"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ced82224d980ffebafebf443a85c062ac6e801a24415324d0f25962b088f55f4"
checksum = "078b2ed516a2f5054ee7f55b1fe970b92e90ae4cace8a0fe1e5f9fc2e94be609"
[[package]]
name = "icu_decimal"
@@ -2125,9 +2127,9 @@ dependencies = [
[[package]]
name = "icu_decimal_data"
version = "1.3.4"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20116c22b56b74384904ecd5e061fa7ece6e3eb26a48c524fc490ec8f46d26a2"
checksum = "3c064b3828953151f8c610bfff6fec776f958641249ebfd1cf36f073f0654e77"
[[package]]
name = "icu_displaynames"
@@ -2146,9 +2148,9 @@ dependencies = [
[[package]]
name = "icu_displaynames_data"
version = "1.3.4"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "220c0ba83e42b255fef61ba9b78f22ba2ce1e27559a4029e3e24092b64f14a06"
checksum = "60f9f56c427f1e80383667e8fb13c07707f6561839283115617cc67307a5d020"
[[package]]
name = "icu_list"
@@ -2272,9 +2274,9 @@ dependencies = [
[[package]]
name = "icu_properties_data"
version = "1.3.4"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98507b488098f45eb95ef495612a2012e4d8ad6095dda86cb2f1728aa2204a60"
checksum = "7c8bb3b67a8347e94d580434369e5c7ee89999b9309d04b7cfc88dfaa0f31b59"
[[package]]
name = "icu_provider"
@@ -3441,9 +3443,9 @@ dependencies = [
[[package]]
name = "obkv"
version = "0.2.1"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c459142426056c639ff88d053ebaaaeca0ee1411c94362892398ef4ccd81080"
checksum = "f69e48cd7c8e5bb52a1da1287fdbfd877c32673176583ce664cd63b201aba385"
[[package]]
name = "once_cell"
@@ -4204,9 +4206,9 @@ checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918"
[[package]]
name = "serde"
version = "1.0.189"
version = "1.0.190"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e422a44e74ad4001bdc8eede9a4570ab52f71190e9c076d14369f38b9200537"
checksum = "91d3c334ca1ee894a2c6f6ad698fe8c435b76d504b13d436f0685d648d6d96f7"
dependencies = [
"serde_derive",
]
@@ -4231,9 +4233,9 @@ dependencies = [
[[package]]
name = "serde_derive"
version = "1.0.189"
version = "1.0.190"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e48d1f918009ce3145511378cf68d613e3b3d9137d67272562080d68a2b32d5"
checksum = "67c5609f394e5c2bd7fc51efda478004ea80ef42fee983d5c67a65e34f32c0e3"
dependencies = [
"proc-macro2",
"quote",

View File

@@ -3,7 +3,7 @@ FROM rust:alpine3.16 AS compiler
RUN apk add -q --update-cache --no-cache build-base openssl-dev
WORKDIR /meilisearch
WORKDIR /
ARG COMMIT_SHA
ARG COMMIT_DATE
@@ -17,7 +17,7 @@ RUN set -eux; \
if [ "$apkArch" = "aarch64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \
cargo build --release
cargo build --release -p meilisearch -p meilitool --no-default-features --features "analytics mini-dashboard japanese"
# Run
FROM alpine:3.16
@@ -28,9 +28,10 @@ ENV MEILI_SERVER_PROVIDER docker
RUN apk update --quiet \
&& apk add -q --no-cache libgcc tini curl
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
# to find.
COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
# and it's easy to find.
COPY --from=compiler /target/release/meilisearch /bin/meilisearch
COPY --from=compiler /target/release/meilitool /bin/meilitool
# To stay compatible with the older version of the container (pre v0.27.0) we're
# going to symlink the meilisearch binary in the path to `/meilisearch`
RUN ln -s /bin/meilisearch /meilisearch

View File

@@ -923,6 +923,10 @@ impl IndexScheduler {
self.index_mapper.index(&rtxn, &index_uid)?
};
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
*self.currently_updating_index.write().unwrap() =
Some((index_uid.clone(), index.clone()));
let mut index_wtxn = index.write_txn()?;
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
index_wtxn.commit()?;

View File

@@ -39,6 +39,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
test_breakpoint_sdr: _,
planned_failures: _,
run_loop_iteration: _,
currently_updating_index: _,
} = scheduler;
let rtxn = env.read_txn().unwrap();

View File

@@ -331,6 +331,10 @@ pub struct IndexScheduler {
/// The path to the version file of Meilisearch.
pub(crate) version_file_path: PathBuf,
/// A few types of long running batches of tasks that act on a single index set this field
/// so that a handle to the index is available from other threads (search) in an optimized manner.
currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
// ================= test
// The next entry is dedicated to the tests.
/// Provide a way to set a breakpoint in multiple part of the scheduler.
@@ -374,6 +378,7 @@ impl IndexScheduler {
dumps_path: self.dumps_path.clone(),
auth_path: self.auth_path.clone(),
version_file_path: self.version_file_path.clone(),
currently_updating_index: self.currently_updating_index.clone(),
#[cfg(test)]
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
#[cfg(test)]
@@ -470,6 +475,7 @@ impl IndexScheduler {
snapshots_path: options.snapshots_path,
auth_path: options.auth_path,
version_file_path: options.version_file_path,
currently_updating_index: Arc::new(RwLock::new(None)),
#[cfg(test)]
test_breakpoint_sdr,
@@ -652,6 +658,13 @@ impl IndexScheduler {
/// If you need to fetch information from or perform an action on all indexes,
/// see the `try_for_each_index` function.
pub fn index(&self, name: &str) -> Result<Index> {
if let Some((current_name, current_index)) =
self.currently_updating_index.read().unwrap().as_ref()
{
if current_name == name {
return Ok(current_index.clone());
}
}
let rtxn = self.env.read_txn()?;
self.index_mapper.index(&rtxn, name)
}
@@ -1133,6 +1146,9 @@ impl IndexScheduler {
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
};
// Reset the currently updating index to relinquish the index handle
*self.currently_updating_index.write().unwrap() = None;
#[cfg(test)]
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;

View File

@@ -10,7 +10,7 @@ license.workspace = true
[dependencies]
anyhow = "1.0.75"
clap = { version = "4.4.7", features = ["derive"] }
clap = { version = "4.2.1", features = ["derive"] }
dump = { path = "../dump" }
file-store = { path = "../file-store" }
meilisearch-auth = { path = "../meilisearch-auth" }

View File

@@ -434,7 +434,18 @@ pub fn execute_search(
let mut search = Search::default();
let docids = match ctx.index.vector_hnsw(ctx.txn)? {
Some(hnsw) => {
if let Some(expected_size) = hnsw.iter().map(|(_, point)| point.len()).next() {
if vector.len() != expected_size {
return Err(UserError::InvalidVectorDimensions {
expected: expected_size,
found: vector.len(),
}
.into());
}
}
let vector = NDotProductPoint::new(vector.clone());
let neighbors = hnsw.search(&vector, &mut search);
let mut docids = Vec::new();