Compare commits

..

1 Commits

Author SHA1 Message Date
ManyTheFish
2373adf9b7 Add ollama url in settings GET 2024-05-02 15:36:13 +02:00
60 changed files with 565 additions and 1083 deletions

View File

@@ -187,8 +187,8 @@ They are JSON files with the following structure (comments are not actually supp
}, },
// Core of the workload. // Core of the workload.
// A list of commands to run sequentially. // A list of commands to run sequentially.
// Optional: A precommand is a request to the Meilisearch instance that is executed before the profiling runs. // A command is a request to the Meilisearch instance that is executed while the profiling runs.
"precommands": [ "commands": [
{ {
// Meilisearch route to call. `http://localhost:7700/` will be prepended. // Meilisearch route to call. `http://localhost:7700/` will be prepended.
"route": "indexes/movies/settings", "route": "indexes/movies/settings",
@@ -224,11 +224,8 @@ They are JSON files with the following structure (comments are not actually supp
// - DontWait: run the next command without waiting the response to this one. // - DontWait: run the next command without waiting the response to this one.
// - WaitForResponse: run the next command as soon as the response from the server is received. // - WaitForResponse: run the next command as soon as the response from the server is received.
// - WaitForTask: run the next command once **all** the Meilisearch tasks created up to now have finished processing. // - WaitForTask: run the next command once **all** the Meilisearch tasks created up to now have finished processing.
"synchronous": "WaitForTask" "synchronous": "DontWait"
} },
],
// A command is a request to the Meilisearch instance that is executed while the profiling runs.
"commands": [
{ {
"route": "indexes/movies/documents", "route": "indexes/movies/documents",
"method": "POST", "method": "POST",

444
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -15,4 +15,4 @@ time = { version = "0.3.34", features = ["parsing"] }
[build-dependencies] [build-dependencies]
anyhow = "1.0.80" anyhow = "1.0.80"
vergen-gitcl = "1.0.0-beta.2" vergen-git2 = "1.0.0-beta.2"

View File

@@ -8,7 +8,7 @@ fn emit_git_variables() -> anyhow::Result<()> {
// Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them // Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
// in the corresponding GitHub workflow (publish_docker.yml). // in the corresponding GitHub workflow (publish_docker.yml).
// This is due to the Dockerfile building the binary outside of the git directory. // This is due to the Dockerfile building the binary outside of the git directory.
let mut builder = vergen_gitcl::GitclBuilder::default(); let mut builder = vergen_git2::Git2Builder::default();
builder.branch(true); builder.branch(true);
builder.commit_timestamp(true); builder.commit_timestamp(true);
@@ -16,6 +16,7 @@ fn emit_git_variables() -> anyhow::Result<()> {
builder.describe(true, true, None); builder.describe(true, true, None);
builder.sha(false); builder.sha(false);
let gitcl = builder.build()?; let git2 = builder.build()?;
vergen_gitcl::Emitter::default().fail_on_error().add_instructions(&gitcl)?.emit()
vergen_git2::Emitter::default().fail_on_error().add_instructions(&git2)?.emit()
} }

View File

@@ -568,7 +568,7 @@ pub mod tests {
insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#); insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#); insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#); insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
// but it also works with other sequences // but it also works with other sequencies
insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}"); insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
} }

View File

@@ -37,7 +37,7 @@ time = { version = "0.3.31", features = [
"macros", "macros",
] } ] }
tracing = "0.1.40" tracing = "0.1.40"
ureq = "2.9.7" ureq = "2.9.1"
uuid = { version = "1.6.1", features = ["serde", "v4"] } uuid = { version = "1.6.1", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]

View File

@@ -13,7 +13,7 @@ We can combine the two tasks in a single batch:
1. import documents X and Y 1. import documents X and Y
Processing this batch is functionally equivalent to processing the two Processing this batch is functionally equivalent to processing the two
tasks individually, but should be much faster since we are only performing tasks individally, but should be much faster since we are only performing
one indexing operation. one indexing operation.
*/ */
@@ -785,12 +785,10 @@ impl IndexScheduler {
let dst = temp_snapshot_dir.path().join("auth"); let dst = temp_snapshot_dir.path().join("auth");
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
// TODO We can't use the open_auth_store_env function here but we should // TODO We can't use the open_auth_store_env function here but we should
let auth = unsafe { let auth = milli::heed::EnvOpenOptions::new()
milli::heed::EnvOpenOptions::new() .map_size(1024 * 1024 * 1024) // 1 GiB
.map_size(1024 * 1024 * 1024) // 1 GiB .max_dbs(2)
.max_dbs(2) .open(&self.auth_path)?;
.open(&self.auth_path)
}?;
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 5. Copy and tarball the flat snapshot // 5. Copy and tarball the flat snapshot

View File

@@ -453,12 +453,10 @@ impl IndexScheduler {
) )
}; };
let env = unsafe { let env = heed::EnvOpenOptions::new()
heed::EnvOpenOptions::new() .max_dbs(11)
.max_dbs(11) .map_size(budget.task_db_size)
.map_size(budget.task_db_size) .open(options.tasks_path)?;
.open(options.tasks_path)
}?;
let features = features::FeatureData::new(&env, options.instance_features)?; let features = features::FeatureData::new(&env, options.instance_features)?;
@@ -587,9 +585,9 @@ impl IndexScheduler {
} }
fn is_good_heed(tasks_path: &Path, map_size: usize) -> bool { fn is_good_heed(tasks_path: &Path, map_size: usize) -> bool {
if let Ok(env) = unsafe { if let Ok(env) =
heed::EnvOpenOptions::new().map_size(clamp_to_page_size(map_size)).open(tasks_path) heed::EnvOpenOptions::new().map_size(clamp_to_page_size(map_size)).open(tasks_path)
} { {
env.prepare_for_closing().wait(); env.prepare_for_closing().wait();
true true
} else { } else {

View File

@@ -272,9 +272,9 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
} }
for index_uid in index_uids { for index_uid in index_uids {
if index_uid == swap.0 { if index_uid == swap.0 {
swap.1.clone_into(index_uid); *index_uid = swap.1.to_owned();
} else if index_uid == swap.1 { } else if index_uid == swap.1 {
swap.0.clone_into(index_uid); *index_uid = swap.0.to_owned();
} }
} }
} }

View File

@@ -49,7 +49,7 @@ pub fn open_auth_store_env(path: &Path) -> milli::heed::Result<milli::heed::Env>
let mut options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
options.map_size(AUTH_STORE_SIZE); // 1GB options.map_size(AUTH_STORE_SIZE); // 1GB
options.max_dbs(2); options.max_dbs(2);
unsafe { options.open(path) } options.open(path)
} }
impl HeedAuthStore { impl HeedAuthStore {

View File

@@ -26,7 +26,7 @@ pub type DeserrQueryParamError<C = BadRequest> = DeserrError<DeserrQueryParam, C
/// A request deserialization error. /// A request deserialization error.
/// ///
/// The first generic parameter is a marker type describing the format of the request: either json (e.g. [`DeserrJson`] or [`DeserrQueryParam`]). /// The first generic paramater is a marker type describing the format of the request: either json (e.g. [`DeserrJson`] or [`DeserrQueryParam`]).
/// The second generic parameter is the default error code for the deserialization error, in case it is not given. /// The second generic parameter is the default error code for the deserialization error, in case it is not given.
pub struct DeserrError<Format, C: Default + ErrorCode> { pub struct DeserrError<Format, C: Default + ErrorCode> {
pub msg: String, pub msg: String,

View File

@@ -423,6 +423,7 @@ impl ErrorCode for HeedError {
HeedError::Mdb(_) HeedError::Mdb(_)
| HeedError::Encoding(_) | HeedError::Encoding(_)
| HeedError::Decoding(_) | HeedError::Decoding(_)
| HeedError::InvalidDatabaseTyping
| HeedError::DatabaseClosing | HeedError::DatabaseClosing
| HeedError::BadOpenOptions { .. } => Code::Internal, | HeedError::BadOpenOptions { .. } => Code::Internal,
} }

View File

@@ -75,7 +75,7 @@ reqwest = { version = "0.11.23", features = [
"rustls-tls", "rustls-tls",
"json", "json",
], default-features = false } ], default-features = false }
rustls = "0.21.12" rustls = "0.21.6"
rustls-pemfile = "1.0.2" rustls-pemfile = "1.0.2"
segment = { version = "0.2.3", optional = true } segment = { version = "0.2.3", optional = true }
serde = { version = "1.0.195", features = ["derive"] } serde = { version = "1.0.195", features = ["derive"] }
@@ -132,7 +132,7 @@ reqwest = { version = "0.11.23", features = [
sha-1 = { version = "0.10.1", optional = true } sha-1 = { version = "0.10.1", optional = true }
static-files = { version = "0.2.3", optional = true } static-files = { version = "0.2.3", optional = true }
tempfile = { version = "3.9.0", optional = true } tempfile = { version = "3.9.0", optional = true }
zip = { version = "1.3.1", features = ["deflate"], default-features = false, optional = true } zip = { version = "0.6.6", optional = true }
[features] [features]
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"] default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]

View File

@@ -59,12 +59,10 @@ where
let request_path = req.path(); let request_path = req.path();
let is_registered_resource = req.resource_map().has_resource(request_path); let is_registered_resource = req.resource_map().has_resource(request_path);
if is_registered_resource { if is_registered_resource {
let request_pattern = req.match_pattern();
let metric_path = request_pattern.as_ref().map_or(request_path, String::as_str);
let request_method = req.method().to_string(); let request_method = req.method().to_string();
histogram_timer = Some( histogram_timer = Some(
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
.with_label_values(&[&request_method, metric_path]) .with_label_values(&[&request_method, request_path])
.start_timer(), .start_timer(),
); );
} }

View File

@@ -367,6 +367,12 @@ async fn get_version(
}) })
} }
#[derive(Serialize)]
struct KeysResponse {
private: Option<String>,
public: Option<String>,
}
pub async fn get_health( pub async fn get_health(
index_scheduler: Data<IndexScheduler>, index_scheduler: Data<IndexScheduler>,
auth_controller: Data<AuthController>, auth_controller: Data<AuthController>,

View File

@@ -730,7 +730,7 @@ pub fn perform_search(
let mut ids = BTreeSet::new(); let mut ids = BTreeSet::new();
for attr in attrs { for attr in attrs {
if attr == "*" { if attr == "*" {
ids.clone_from(&displayed_ids); ids = displayed_ids.clone();
break; break;
} }

View File

@@ -85,13 +85,8 @@ impl SearchQueue {
}, },
search_request = receive_new_searches.recv() => { search_request = receive_new_searches.recv() => {
let search_request = match search_request { // this unwrap is safe because we're sure the `SearchQueue` still lives somewhere in actix-web
Some(search_request) => search_request, let search_request = search_request.unwrap();
// This should never happen while actix-web is running, but it's not a reason to crash
// and it can generate a lot of noise in the tests.
None => continue,
};
if searches_running < usize::from(parallelism) && queue.is_empty() { if searches_running < usize::from(parallelism) && queue.is_empty() {
searches_running += 1; searches_running += 1;
// if the search requests die it's not a hard error on our side // if the search requests die it's not a hard error on our side

View File

@@ -85,8 +85,8 @@ async fn simple_search() {
) )
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###); snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.996969696969697},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.996969696969697},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
snapshot!(response["semanticHitCount"], @"2"); snapshot!(response["semanticHitCount"], @"1");
let (response, code) = index let (response, code) = index
.search_post( .search_post(
@@ -331,7 +331,7 @@ async fn query_combination() {
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9242424242424242}]"###); snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.996969696969697},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.996969696969697},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.8848484848484849}]"###);
snapshot!(response["semanticHitCount"], @"null"); snapshot!(response["semanticHitCount"], @"null");
// query + vector, no hybrid keyword => // query + vector, no hybrid keyword =>
@@ -374,6 +374,6 @@ async fn query_combination() {
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9242424242424242}]"###); snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848}]"###);
snapshot!(response["semanticHitCount"], @"0"); snapshot!(response["semanticHitCount"], @"0");
} }

View File

@@ -921,7 +921,7 @@ async fn test_score_details() {
"order": 3, "order": 3,
"attributeRankingOrderScore": 1.0, "attributeRankingOrderScore": 1.0,
"queryWordDistanceScore": 0.8095238095238095, "queryWordDistanceScore": 0.8095238095238095,
"score": 0.8095238095238095 "score": 0.9727891156462584
}, },
"exactness": { "exactness": {
"order": 4, "order": 4,

View File

@@ -285,10 +285,10 @@ async fn attributes_ranking_rule_order() {
@r###" @r###"
[ [
{ {
"id": "1" "id": "2"
}, },
{ {
"id": "2" "id": "1"
} }
] ]
"### "###

View File

@@ -1,5 +1,6 @@
use std::time::Duration; use std::time::Duration;
use actix_rt::time::sleep;
use meili_snap::{json_string, snapshot}; use meili_snap::{json_string, snapshot};
use meilisearch::option::ScheduleSnapshot; use meilisearch::option::ScheduleSnapshot;
use meilisearch::Opt; use meilisearch::Opt;
@@ -52,29 +53,11 @@ async fn perform_snapshot() {
index.load_test_set().await; index.load_test_set().await;
let (task, code) = server.index("test1").create(Some("prim")).await; server.index("test1").create(Some("prim")).await;
meili_snap::snapshot!(code, @"202 Accepted");
index.wait_task(task.uid()).await; index.wait_task(2).await;
// wait for the _next task_ to process, aka the snapshot that should be enqueued at some point sleep(Duration::from_secs(2)).await;
println!("waited for the next task to finish");
let now = std::time::Instant::now();
let next_task = task.uid() + 1;
loop {
let (value, code) = index.get_task(next_task).await;
dbg!(&value);
if code != 404 && value["status"].as_str() == Some("succeeded") {
break;
}
if now.elapsed() > Duration::from_secs(30) {
panic!("The snapshot didn't schedule in 30s even though it was supposed to be scheduled every 2s: {}",
serde_json::to_string_pretty(&value).unwrap()
);
}
}
let temp = tempfile::tempdir().unwrap(); let temp = tempfile::tempdir().unwrap();

View File

@@ -80,7 +80,9 @@ fn main() -> anyhow::Result<()> {
/// Clears the task queue located at `db_path`. /// Clears the task queue located at `db_path`.
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> { fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
let path = db_path.join("tasks"); let path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&path) } let env = EnvOpenOptions::new()
.max_dbs(100)
.open(&path)
.with_context(|| format!("While trying to open {:?}", path.display()))?; .with_context(|| format!("While trying to open {:?}", path.display()))?;
eprintln!("Deleting tasks from the database..."); eprintln!("Deleting tasks from the database...");
@@ -127,7 +129,7 @@ fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
} }
} }
eprintln!("Successfully deleted {count} content files from disk!"); eprintln!("Sucessfully deleted {count} content files from disk!");
Ok(()) Ok(())
} }
@@ -191,7 +193,9 @@ fn export_a_dump(
FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?; FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } let env = EnvOpenOptions::new()
.max_dbs(100)
.open(&index_scheduler_path)
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
eprintln!("Dumping the keys..."); eprintln!("Dumping the keys...");

View File

@@ -30,7 +30,7 @@ grenad = { version = "0.4.6", default-features = false, features = [
"rayon", "rayon",
"tempfile", "tempfile",
] } ] }
heed = { version = "0.20.1", default-features = false, features = [ heed = { version = "0.20.0-alpha.9", default-features = false, features = [
"serde-json", "serde-json",
"serde-bincode", "serde-bincode",
"read-txn-no-tls", "read-txn-no-tls",
@@ -82,10 +82,10 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
] } ] }
tiktoken-rs = "0.5.8" tiktoken-rs = "0.5.8"
liquid = "0.26.4" liquid = "0.26.4"
arroy = "0.3.1" arroy = "0.2.0"
rand = "0.8.5" rand = "0.8.5"
tracing = "0.1.40" tracing = "0.1.40"
ureq = { version = "2.9.7", features = ["json"] } ureq = { version = "2.9.6", features = ["json"] }
url = "2.5.0" url = "2.5.0"
[dev-dependencies] [dev-dependencies]

View File

@@ -48,7 +48,7 @@ fn main() -> Result<(), Box<dyn Error>> {
let start = Instant::now(); let start = Instant::now();
let mut ctx = SearchContext::new(&index, &txn)?; let mut ctx = SearchContext::new(&index, &txn);
let universe = filtered_universe(&ctx, &None)?; let universe = filtered_universe(&ctx, &None)?;
let docs = execute_search( let docs = execute_search(

View File

@@ -1,3 +0,0 @@
target
corpus
artifacts

View File

@@ -203,7 +203,7 @@ fn parse_csv_header(header: &str) -> (&str, AllowedType) {
"string" => (field_name, AllowedType::String), "string" => (field_name, AllowedType::String),
"boolean" => (field_name, AllowedType::Boolean), "boolean" => (field_name, AllowedType::Boolean),
"number" => (field_name, AllowedType::Number), "number" => (field_name, AllowedType::Number),
// if the pattern isn't recognized, we keep the whole field. // if the pattern isn't reconized, we keep the whole field.
_otherwise => (header, AllowedType::String), _otherwise => (header, AllowedType::String),
}, },
None => (header, AllowedType::String), None => (header, AllowedType::String),

View File

@@ -32,8 +32,6 @@ pub enum InternalError {
DatabaseClosing, DatabaseClosing,
#[error("Missing {} in the {db_name} database.", key.unwrap_or("key"))] #[error("Missing {} in the {db_name} database.", key.unwrap_or("key"))]
DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> }, DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> },
#[error("Missing {key} in the fieldids weights mapping.")]
FieldidsWeightsMapMissingEntry { key: FieldId },
#[error(transparent)] #[error(transparent)]
FieldIdMapMissingEntry(#[from] FieldIdMapMissingEntry), FieldIdMapMissingEntry(#[from] FieldIdMapMissingEntry),
#[error("Missing {key} in the field id mapping.")] #[error("Missing {key} in the field id mapping.")]
@@ -48,6 +46,8 @@ pub enum InternalError {
GrenadInvalidFormatVersion, GrenadInvalidFormatVersion,
#[error("Invalid merge while processing {process}")] #[error("Invalid merge while processing {process}")]
IndexingMergingKeys { process: &'static str }, IndexingMergingKeys { process: &'static str },
#[error("{}", HeedError::InvalidDatabaseTyping)]
InvalidDatabaseTyping,
#[error(transparent)] #[error(transparent)]
RayonThreadPool(#[from] ThreadPoolBuildError), RayonThreadPool(#[from] ThreadPoolBuildError),
#[error(transparent)] #[error(transparent)]
@@ -427,6 +427,7 @@ impl From<HeedError> for Error {
// TODO use the encoding // TODO use the encoding
HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })), HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })),
HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })), HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })),
HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
HeedError::DatabaseClosing => InternalError(DatabaseClosing), HeedError::DatabaseClosing => InternalError(DatabaseClosing),
HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions), HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions),
} }

View File

@@ -1,48 +0,0 @@
//! The fieldids weights map is in charge of storing linking the searchable fields with their weights.
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use crate::{FieldId, FieldsIdsMap, Weight};
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct FieldidsWeightsMap {
map: HashMap<FieldId, Weight>,
}
impl FieldidsWeightsMap {
/// Insert a field id -> weigth into the map.
/// If the map did not have this key present, `None` is returned.
/// If the map did have this key present, the value is updated, and the old value is returned.
pub fn insert(&mut self, fid: FieldId, weight: Weight) -> Option<Weight> {
self.map.insert(fid, weight)
}
/// Create the map from the fields ids maps.
/// Should only be called in the case there are NO searchable attributes.
/// All the fields will be inserted in the order of the fields ids map with a weight of 0.
pub fn from_field_id_map_without_searchable(fid_map: &FieldsIdsMap) -> Self {
FieldidsWeightsMap { map: fid_map.ids().map(|fid| (fid, 0)).collect() }
}
/// Removes a field id from the map, returning the associated weight previously in the map.
pub fn remove(&mut self, fid: FieldId) -> Option<Weight> {
self.map.remove(&fid)
}
/// Returns weight corresponding to the key.
pub fn weight(&self, fid: FieldId) -> Option<Weight> {
self.map.get(&fid).copied()
}
/// Returns highest weight contained in the map if any.
pub fn max_weight(&self) -> Option<Weight> {
self.map.values().copied().max()
}
/// Return an iterator visiting all field ids in arbitrary order.
pub fn ids(&self) -> impl Iterator<Item = FieldId> + '_ {
self.map.keys().copied()
}
}

View File

@@ -1,6 +1,5 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::convert::TryInto;
use std::fs::File; use std::fs::File;
use std::path::Path; use std::path::Path;
@@ -26,9 +25,8 @@ use crate::proximity::ProximityPrecision;
use crate::vector::EmbeddingConfig; use crate::vector::EmbeddingConfig;
use crate::{ use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
FieldidsWeightsMap, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, BEU64,
Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64,
}; };
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@@ -44,7 +42,6 @@ pub mod main_key {
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields"; pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution"; pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map"; pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
pub const FIELDIDS_WEIGHTS_MAP_KEY: &str = "fieldids-weights-map";
pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids"; pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
pub const GEO_RTREE_KEY: &str = "geo-rtree"; pub const GEO_RTREE_KEY: &str = "geo-rtree";
pub const PRIMARY_KEY_KEY: &str = "primary-key"; pub const PRIMARY_KEY_KEY: &str = "primary-key";
@@ -184,7 +181,7 @@ impl Index {
options.max_dbs(25); options.max_dbs(25);
let env = unsafe { options.open(path) }?; let env = options.open(path)?;
let mut wtxn = env.write_txn()?; let mut wtxn = env.write_txn()?;
let main = env.database_options().name(MAIN).create(&mut wtxn)?; let main = env.database_options().name(MAIN).create(&mut wtxn)?;
let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?; let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?;
@@ -294,11 +291,6 @@ impl Index {
self.env.read_txn() self.env.read_txn()
} }
/// Create a static read transaction to be able to read the index without keeping a reference to it.
pub fn static_read_txn(&self) -> heed::Result<RoTxn<'static>> {
self.env.clone().static_read_txn()
}
/// Returns the canonicalized path where the heed `Env` of this `Index` lives. /// Returns the canonicalized path where the heed `Env` of this `Index` lives.
pub fn path(&self) -> &Path { pub fn path(&self) -> &Path {
self.env.path() self.env.path()
@@ -422,65 +414,6 @@ impl Index {
.unwrap_or_default()) .unwrap_or_default())
} }
/* fieldids weights map */
// This maps the fields ids to their weights.
// Their weights is defined by the ordering of the searchable attributes.
/// Writes the fieldids weights map which associates the field ids to their weights
pub(crate) fn put_fieldids_weights_map(
&self,
wtxn: &mut RwTxn,
map: &FieldidsWeightsMap,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<_>>().put(
wtxn,
main_key::FIELDIDS_WEIGHTS_MAP_KEY,
map,
)
}
/// Get the fieldids weights map which associates the field ids to their weights
pub fn fieldids_weights_map(&self, rtxn: &RoTxn) -> heed::Result<FieldidsWeightsMap> {
self.main
.remap_types::<Str, SerdeJson<_>>()
.get(rtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)?
.map(Ok)
.unwrap_or_else(|| {
Ok(FieldidsWeightsMap::from_field_id_map_without_searchable(
&self.fields_ids_map(rtxn)?,
))
})
}
/// Delete the fieldsids weights map
pub fn delete_fieldids_weights_map(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)
}
pub fn searchable_fields_and_weights<'a>(
&self,
rtxn: &'a RoTxn,
) -> Result<Vec<(Cow<'a, str>, FieldId, Weight)>> {
let fid_map = self.fields_ids_map(rtxn)?;
let weight_map = self.fieldids_weights_map(rtxn)?;
let searchable = self.searchable_fields(rtxn)?;
searchable
.into_iter()
.map(|field| -> Result<_> {
let fid = fid_map.id(&field).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
field_name: field.to_string(),
process: "searchable_fields_and_weights",
})?;
let weight = weight_map
.weight(fid)
.ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
Ok((field, fid, weight))
})
.collect()
}
/* geo rtree */ /* geo rtree */
/// Writes the provided `rtree` which associates coordinates to documents ids. /// Writes the provided `rtree` which associates coordinates to documents ids.
@@ -645,42 +578,33 @@ impl Index {
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
user_fields: &[&str], user_fields: &[&str],
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
) -> Result<()> { ) -> heed::Result<()> {
// We can write the user defined searchable fields as-is. // We can write the user defined searchable fields as-is.
self.put_user_defined_searchable_fields(wtxn, user_fields)?; self.put_user_defined_searchable_fields(wtxn, user_fields)?;
let mut weights = FieldidsWeightsMap::default();
// Now we generate the real searchable fields: // Now we generate the real searchable fields:
// 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion. // 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion.
// 2. Iterate over the user defined searchable fields. // 2. Iterate over the user defined searchable fields.
// 3. If a user defined field is a subset of a field defined in the fields_ids_map // 3. If a user defined field is a subset of a field defined in the fields_ids_map
// (ie doggo.name is a subset of doggo) right after doggo and with the same weight. // (ie doggo.name is a subset of doggo) then we push it at the end of the fields.
let mut real_fields = Vec::new(); let mut real_fields = user_fields.to_vec();
for (id, field_from_map) in fields_ids_map.iter() { for field_from_map in fields_ids_map.names() {
for (weight, user_field) in user_fields.iter().enumerate() { for user_field in user_fields {
if crate::is_faceted_by(field_from_map, user_field) if crate::is_faceted_by(field_from_map, user_field)
&& !real_fields.contains(&field_from_map) && !user_fields.contains(&field_from_map)
{ {
real_fields.push(field_from_map); real_fields.push(field_from_map);
let weight: u16 =
weight.try_into().map_err(|_| UserError::AttributeLimitReached)?;
weights.insert(id, weight);
} }
} }
} }
self.put_searchable_fields(wtxn, &real_fields)?; self.put_searchable_fields(wtxn, &real_fields)
self.put_fieldids_weights_map(wtxn, &weights)?;
Ok(())
} }
pub(crate) fn delete_all_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_all_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
let did_delete_searchable = self.delete_searchable_fields(wtxn)?; let did_delete_searchable = self.delete_searchable_fields(wtxn)?;
let did_delete_user_defined = self.delete_user_defined_searchable_fields(wtxn)?; let did_delete_user_defined = self.delete_user_defined_searchable_fields(wtxn)?;
self.delete_fieldids_weights_map(wtxn)?;
Ok(did_delete_searchable || did_delete_user_defined) Ok(did_delete_searchable || did_delete_user_defined)
} }
@@ -699,31 +623,28 @@ impl Index {
} }
/// Returns the searchable fields, those are the fields that are indexed, /// Returns the searchable fields, those are the fields that are indexed,
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Vec<Cow<'t, str>>> { /// if the searchable fields aren't there it means that **all** the fields are indexed.
pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
self.main self.main
.remap_types::<Str, SerdeBincode<Vec<&'t str>>>() .remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
.get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)? .get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)
.map(|fields| Ok(fields.into_iter().map(Cow::Borrowed).collect()))
.unwrap_or_else(|| {
Ok(self
.fields_ids_map(rtxn)?
.names()
.map(|field| Cow::Owned(field.to_string()))
.collect())
})
} }
/// Identical to `searchable_fields`, but returns the ids instead. /// Identical to `searchable_fields`, but returns the ids instead.
pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Vec<FieldId>> { pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Option<Vec<FieldId>>> {
let fields = self.searchable_fields(rtxn)?; match self.searchable_fields(rtxn)? {
let fields_ids_map = self.fields_ids_map(rtxn)?; Some(fields) => {
let mut fields_ids = Vec::new(); let fields_ids_map = self.fields_ids_map(rtxn)?;
for name in fields { let mut fields_ids = Vec::new();
if let Some(field_id) = fields_ids_map.id(&name) { for name in fields {
fields_ids.push(field_id); if let Some(field_id) = fields_ids_map.id(name) {
fields_ids.push(field_id);
}
}
Ok(Some(fields_ids))
} }
None => Ok(None),
} }
Ok(fields_ids)
} }
/// Writes the searchable fields, when this list is specified, only these are indexed. /// Writes the searchable fields, when this list is specified, only these are indexed.
@@ -1789,14 +1710,10 @@ pub(crate) mod tests {
])) ]))
.unwrap(); .unwrap();
db_snap!(index, field_distribution, @r###" db_snap!(index, field_distribution, 1);
age 1 |
id 2 |
name 2 |
"###);
db_snap!(index, word_docids, db_snap!(index, word_docids,
@r###" @r###"
1 [0, ] 1 [0, ]
2 [1, ] 2 [1, ]
20 [1, ] 20 [1, ]
@@ -1805,6 +1722,18 @@ pub(crate) mod tests {
"### "###
); );
db_snap!(index, field_distribution);
db_snap!(index, field_distribution,
@r###"
age 1 |
id 2 |
name 2 |
"###
);
// snapshot_index!(&index, "1", include: "^field_distribution$");
// we add all the documents a second time. we are supposed to get the same // we add all the documents a second time. we are supposed to get the same
// field_distribution in the end // field_distribution in the end
index index
@@ -1891,7 +1820,7 @@ pub(crate) mod tests {
// ensure we get the right real searchable fields + user defined searchable fields // ensure we get the right real searchable fields + user defined searchable fields
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let real = index.searchable_fields(&rtxn).unwrap(); let real = index.searchable_fields(&rtxn).unwrap().unwrap();
assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]); assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap(); let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
@@ -1911,7 +1840,7 @@ pub(crate) mod tests {
// ensure we get the right real searchable fields + user defined searchable fields // ensure we get the right real searchable fields + user defined searchable fields
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let real = index.searchable_fields(&rtxn).unwrap(); let real = index.searchable_fields(&rtxn).unwrap().unwrap();
assert_eq!(real, &["doggo", "name"]); assert_eq!(real, &["doggo", "name"]);
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap(); let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
assert_eq!(user_defined, &["doggo", "name"]); assert_eq!(user_defined, &["doggo", "name"]);
@@ -1927,7 +1856,7 @@ pub(crate) mod tests {
// ensure we get the right real searchable fields + user defined searchable fields // ensure we get the right real searchable fields + user defined searchable fields
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let real = index.searchable_fields(&rtxn).unwrap(); let real = index.searchable_fields(&rtxn).unwrap().unwrap();
assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]); assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap(); let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
@@ -2466,14 +2395,6 @@ pub(crate) mod tests {
11 0 11 0
4 1 4 1
"###); "###);
db_snap!(index, fields_ids_map, @r###"
0 primary_key |
"###);
db_snap!(index, searchable_fields, @r###"["primary_key"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
"###);
index index
.add_documents(documents!([ .add_documents(documents!([
@@ -2489,16 +2410,6 @@ pub(crate) mod tests {
11 0 11 0
4 1 4 1
"###); "###);
db_snap!(index, fields_ids_map, @r###"
0 primary_key |
1 a |
"###);
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 0 |
"###);
index.delete_documents(Default::default()); index.delete_documents(Default::default());
@@ -2509,16 +2420,6 @@ pub(crate) mod tests {
11 0 11 0
4 1 4 1
"###); "###);
db_snap!(index, fields_ids_map, @r###"
0 primary_key |
1 a |
"###);
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 0 |
"###);
index index
.add_documents(documents!([ .add_documents(documents!([
@@ -2534,16 +2435,6 @@ pub(crate) mod tests {
11 0 11 0
4 1 4 1
"###); "###);
db_snap!(index, fields_ids_map, @r###"
0 primary_key |
1 a |
"###);
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 0 |
"###);
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let search = Search::new(&rtxn, &index); let search = Search::new(&rtxn, &index);
@@ -2629,104 +2520,4 @@ pub(crate) mod tests {
db_snap!(index, geo_faceted_documents_ids); // ensure that no documents were inserted db_snap!(index, geo_faceted_documents_ids); // ensure that no documents were inserted
} }
#[test]
fn swapping_searchable_attributes() {
// See https://github.com/meilisearch/meilisearch/issues/4484
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("name")]);
settings.set_filterable_fields(HashSet::from([S("age")]));
})
.unwrap();
index
.add_documents(documents!({ "id": 1, "name": "Many", "age": 28, "realName": "Maxime" }))
.unwrap();
db_snap!(index, fields_ids_map, @r###"
0 name |
1 id |
2 age |
3 realName |
"###);
db_snap!(index, searchable_fields, @r###"["name"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
"###);
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("name"), S("realName")]);
settings.set_filterable_fields(HashSet::from([S("age")]));
})
.unwrap();
// The order of the field id map shouldn't change
db_snap!(index, fields_ids_map, @r###"
0 name |
1 id |
2 age |
3 realName |
"###);
db_snap!(index, searchable_fields, @r###"["name", "realName"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
3 1 |
"###);
}
#[test]
fn attribute_weights_after_swapping_searchable_attributes() {
// See https://github.com/meilisearch/meilisearch/issues/4484
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("name"), S("beverage")]);
})
.unwrap();
index
.add_documents(documents!([
{ "id": 0, "name": "kefir", "beverage": "water" },
{ "id": 1, "name": "tamo", "beverage": "kefir" }
]))
.unwrap();
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
let results = search.query("kefir").execute().unwrap();
// We should find kefir the dog first
insta::assert_debug_snapshot!(results.documents_ids, @r###"
[
0,
1,
]
"###);
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("beverage"), S("name")]);
})
.unwrap();
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
let results = search.query("kefir").execute().unwrap();
// We should find tamo first
insta::assert_debug_snapshot!(results.documents_ids, @r###"
[
1,
0,
]
"###);
}
} }

View File

@@ -28,7 +28,6 @@ pub mod vector;
#[cfg(test)] #[cfg(test)]
#[macro_use] #[macro_use]
pub mod snapshot_tests; pub mod snapshot_tests;
mod fieldids_weights_map;
use std::collections::{BTreeMap, HashMap}; use std::collections::{BTreeMap, HashMap};
use std::convert::{TryFrom, TryInto}; use std::convert::{TryFrom, TryInto};
@@ -53,7 +52,6 @@ pub use self::error::{
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError, Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
}; };
pub use self::external_documents_ids::ExternalDocumentsIds; pub use self::external_documents_ids::ExternalDocumentsIds;
pub use self::fieldids_weights_map::FieldidsWeightsMap;
pub use self::fields_ids_map::FieldsIdsMap; pub use self::fields_ids_map::FieldsIdsMap;
pub use self::heed_codec::{ pub use self::heed_codec::{
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec, BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
@@ -79,7 +77,6 @@ pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>; pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
pub type FieldDistribution = BTreeMap<String, u64>; pub type FieldDistribution = BTreeMap<String, u64>;
pub type FieldId = u16; pub type FieldId = u16;
pub type Weight = u16;
pub type Object = serde_json::Map<String, serde_json::Value>; pub type Object = serde_json::Map<String, serde_json::Value>;
pub type Position = u32; pub type Position = u32;
pub type RelativePosition = u16; pub type RelativePosition = u16;

View File

@@ -147,7 +147,7 @@ impl<'a> Search<'a> {
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> { pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
if has_vector_search { if has_vector_search {
let ctx = SearchContext::new(self.index, self.rtxn)?; let ctx = SearchContext::new(self.index, self.rtxn);
filtered_universe(&ctx, &self.filter) filtered_universe(&ctx, &self.filter)
} else { } else {
Ok(self.execute()?.candidates) Ok(self.execute()?.candidates)
@@ -155,10 +155,10 @@ impl<'a> Search<'a> {
} }
pub fn execute(&self) -> Result<SearchResult> { pub fn execute(&self) -> Result<SearchResult> {
let mut ctx = SearchContext::new(self.index, self.rtxn)?; let mut ctx = SearchContext::new(self.index, self.rtxn);
if let Some(searchable_attributes) = self.searchable_attributes { if let Some(searchable_attributes) = self.searchable_attributes {
ctx.attributes_to_search_on(searchable_attributes)?; ctx.searchable_attributes(searchable_attributes)?;
} }
let universe = filtered_universe(&ctx, &self.filter)?; let universe = filtered_universe(&ctx, &self.filter)?;

View File

@@ -101,7 +101,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
let mut ranking_rule_universes: Vec<RoaringBitmap> = let mut ranking_rule_universes: Vec<RoaringBitmap> =
vec![RoaringBitmap::default(); ranking_rules_len]; vec![RoaringBitmap::default(); ranking_rules_len];
ranking_rule_universes[0].clone_from(universe); ranking_rule_universes[0] = universe.clone();
let mut cur_ranking_rule_index = 0; let mut cur_ranking_rule_index = 0;
/// Finish iterating over the current ranking rule, yielding /// Finish iterating over the current ranking rule, yielding
@@ -232,7 +232,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
} }
cur_ranking_rule_index += 1; cur_ranking_rule_index += 1;
ranking_rule_universes[cur_ranking_rule_index].clone_from(&next_bucket.candidates); ranking_rule_universes[cur_ranking_rule_index] = next_bucket.candidates.clone();
logger.start_iteration_ranking_rule( logger.start_iteration_ranking_rule(
cur_ranking_rule_index, cur_ranking_rule_index,
ranking_rules[cur_ranking_rule_index].as_ref(), ranking_rules[cur_ranking_rule_index].as_ref(),

View File

@@ -163,7 +163,7 @@ impl<'ctx> SearchContext<'ctx> {
Some(restricted_fids) => { Some(restricted_fids) => {
let interned = self.word_interner.get(word).as_str(); let interned = self.word_interner.get(word).as_str();
let keys: Vec<_> = let keys: Vec<_> =
restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect(); restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn, self.txn,
@@ -192,7 +192,7 @@ impl<'ctx> SearchContext<'ctx> {
Some(restricted_fids) => { Some(restricted_fids) => {
let interned = self.word_interner.get(word).as_str(); let interned = self.word_interner.get(word).as_str();
let keys: Vec<_> = let keys: Vec<_> =
restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect(); restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn, self.txn,
@@ -242,7 +242,7 @@ impl<'ctx> SearchContext<'ctx> {
Some(restricted_fids) => { Some(restricted_fids) => {
let interned = self.word_interner.get(prefix).as_str(); let interned = self.word_interner.get(prefix).as_str();
let keys: Vec<_> = let keys: Vec<_> =
restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect(); restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn, self.txn,
@@ -271,7 +271,7 @@ impl<'ctx> SearchContext<'ctx> {
Some(restricted_fids) => { Some(restricted_fids) => {
let interned = self.word_interner.get(prefix).as_str(); let interned = self.word_interner.get(prefix).as_str();
let keys: Vec<_> = let keys: Vec<_> =
restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect(); restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn, self.txn,
@@ -315,7 +315,11 @@ impl<'ctx> SearchContext<'ctx> {
.map_err(heed::Error::Decoding)? .map_err(heed::Error::Decoding)?
} else { } else {
// Compute the distance at the attribute level and store it in the cache. // Compute the distance at the attribute level and store it in the cache.
let fids = self.index.searchable_fields_ids(self.txn)?; let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
fids
} else {
self.index.fields_ids_map(self.txn)?.ids().collect()
};
let mut docids = RoaringBitmap::new(); let mut docids = RoaringBitmap::new();
for fid in fids { for fid in fids {
// for each field, intersect left word bitmap and right word bitmap, // for each field, intersect left word bitmap and right word bitmap,
@@ -404,7 +408,11 @@ impl<'ctx> SearchContext<'ctx> {
let prefix_docids = match proximity_precision { let prefix_docids = match proximity_precision {
ProximityPrecision::ByAttribute => { ProximityPrecision::ByAttribute => {
// Compute the distance at the attribute level and store it in the cache. // Compute the distance at the attribute level and store it in the cache.
let fids = self.index.searchable_fields_ids(self.txn)?; let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
fids
} else {
self.index.fields_ids_map(self.txn)?.ids().collect()
};
let mut prefix_docids = RoaringBitmap::new(); let mut prefix_docids = RoaringBitmap::new();
// for each field, intersect left word bitmap and right word bitmap, // for each field, intersect left word bitmap and right word bitmap,
// then merge the result in a global bitmap before storing it in the cache. // then merge the result in a global bitmap before storing it in the cache.

View File

@@ -184,7 +184,13 @@ impl State {
return Ok(State::Empty(query_graph.clone())); return Ok(State::Empty(query_graph.clone()));
} }
let searchable_fields_ids = ctx.index.searchable_fields_ids(ctx.txn)?; let searchable_fields_ids = {
if let Some(fids) = ctx.index.searchable_fields_ids(ctx.txn)? {
fids
} else {
ctx.index.fields_ids_map(ctx.txn)?.ids().collect()
}
};
let mut candidates_per_attribute = Vec::with_capacity(searchable_fields_ids.len()); let mut candidates_per_attribute = Vec::with_capacity(searchable_fields_ids.len());
// then check that there exists at least one attribute that has all of the terms // then check that there exists at least one attribute that has all of the terms

View File

@@ -42,7 +42,7 @@ fn facet_number_values<'a>(
} }
/// Define the strategy used by the geo sort. /// Define the strategy used by the geo sort.
/// The parameter represents the cache size, and, in the case of the Dynamic strategy, /// The paramater represents the cache size, and, in the case of the Dynamic strategy,
/// the point where we move from using the iterative strategy to the rtree. /// the point where we move from using the iterative strategy to the rtree.
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub enum Strategy { pub enum Strategy {

View File

@@ -258,7 +258,7 @@ pub(crate) mod tests {
fn matching_words() { fn matching_words() {
let temp_index = temp_index_with_documents(); let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap(); let rtxn = temp_index.read_txn().unwrap();
let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap(); let mut ctx = SearchContext::new(&temp_index, &rtxn);
let mut builder = TokenizerBuilder::default(); let mut builder = TokenizerBuilder::default();
let tokenizer = builder.build(); let tokenizer = builder.build();
let tokens = tokenizer.tokenize("split this world"); let tokens = tokenizer.tokenize("split this world");

View File

@@ -134,7 +134,7 @@ impl<'t> Matcher<'t, '_> {
for (token_position, word_position, word) in words_positions { for (token_position, word_position, word) in words_positions {
partial = match partial.match_token(word) { partial = match partial.match_token(word) {
// token matches the partial match, but the match is not full, // token matches the partial match, but the match is not full,
// we temporarily save the current token then we try to match the next one. // we temporarly save the current token then we try to match the next one.
Some(MatchType::Partial(partial)) => { Some(MatchType::Partial(partial)) => {
potential_matches.push((token_position, word_position, partial.char_len())); potential_matches.push((token_position, word_position, partial.char_len()));
partial partial
@@ -506,7 +506,7 @@ mod tests {
impl<'a> MatcherBuilder<'a> { impl<'a> MatcherBuilder<'a> {
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self { fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
let mut ctx = SearchContext::new(index, rtxn).unwrap(); let mut ctx = SearchContext::new(index, rtxn);
let universe = filtered_universe(&ctx, &None).unwrap(); let universe = filtered_universe(&ctx, &None).unwrap();
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search( let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
&mut ctx, &mut ctx,
@@ -722,7 +722,7 @@ mod tests {
@"…void void void void void split the world void void" @"…void void void void void split the world void void"
); );
// Text containing matches with different density. // Text containing matches with diferent density.
let text = "split void the void void world void void void void void void void void void void split the world void void"; let text = "split void the void void world void void void void void void void void void void split the world void void";
let mut matcher = builder.build(text); let mut matcher = builder.build(text);
// crop should return 10 last words with a marker at the start. // crop should return 10 last words with a marker at the start.

View File

@@ -49,12 +49,13 @@ pub use self::geo_sort::Strategy as GeoSortStrategy;
use self::graph_based_ranking_rule::Words; use self::graph_based_ranking_rule::Words;
use self::interner::Interned; use self::interner::Interned;
use self::vector_sort::VectorSort; use self::vector_sort::VectorSort;
use crate::error::FieldIdMapMissingEntry;
use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::apply_distinct_rule; use crate::search::new::distinct::apply_distinct_rule;
use crate::vector::Embedder; use crate::vector::Embedder;
use crate::{ use crate::{
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget, AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget,
UserError, Weight, UserError,
}; };
/// A structure used throughout the execution of a search query. /// A structure used throughout the execution of a search query.
@@ -70,21 +71,8 @@ pub struct SearchContext<'ctx> {
} }
impl<'ctx> SearchContext<'ctx> { impl<'ctx> SearchContext<'ctx> {
pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Result<Self> { pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Self {
let searchable_fids = index.searchable_fields_and_weights(txn)?; Self {
let exact_attributes_ids = index.exact_attributes_ids(txn)?;
let mut exact = Vec::new();
let mut tolerant = Vec::new();
for (_name, fid, weight) in searchable_fids {
if exact_attributes_ids.contains(&fid) {
exact.push((fid, weight));
} else {
tolerant.push((fid, weight));
}
}
Ok(Self {
index, index,
txn, txn,
db_cache: <_>::default(), db_cache: <_>::default(),
@@ -93,39 +81,42 @@ impl<'ctx> SearchContext<'ctx> {
term_interner: <_>::default(), term_interner: <_>::default(),
phrase_docids: <_>::default(), phrase_docids: <_>::default(),
restricted_fids: None, restricted_fids: None,
}) }
} }
pub fn attributes_to_search_on( pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> {
&mut self, let fids_map = self.index.fields_ids_map(self.txn)?;
attributes_to_search_on: &'ctx [String], let searchable_names = self.index.searchable_fields(self.txn)?;
) -> Result<()> {
let user_defined_searchable = self.index.user_defined_searchable_fields(self.txn)?;
let searchable_fields_weights = self.index.searchable_fields_and_weights(self.txn)?;
let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?; let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
let mut wildcard = false;
let mut restricted_fids = RestrictedFids::default(); let mut restricted_fids = RestrictedFids::default();
for field_name in attributes_to_search_on { let mut contains_wildcard = false;
for field_name in searchable_attributes {
if field_name == "*" { if field_name == "*" {
wildcard = true; contains_wildcard = true;
// we cannot early exit as we want to returns error in case of unknown fields
continue; continue;
} }
let searchable_weight = let searchable_contains_name =
searchable_fields_weights.iter().find(|(name, _, _)| name == field_name); searchable_names.as_ref().map(|sn| sn.iter().any(|name| name == field_name));
let (fid, weight) = match searchable_weight { let fid = match (fids_map.id(field_name), searchable_contains_name) {
// The Field id exist and the field is searchable // The Field id exist and the field is searchable
Some((_name, fid, weight)) => (*fid, *weight), (Some(fid), Some(true)) | (Some(fid), None) => fid,
// The field is not searchable but the user didn't define any searchable attributes // The field is searchable but the Field id doesn't exist => Internal Error
None if user_defined_searchable.is_none() => continue, (None, Some(true)) => {
return Err(FieldIdMapMissingEntry::FieldName {
field_name: field_name.to_string(),
process: "search",
}
.into())
}
// The field is not searchable, but the searchableAttributes are set to * => ignore field
(None, None) => continue,
// The field is not searchable => User error // The field is not searchable => User error
None => { (_fid, Some(false)) => {
let (valid_fields, hidden_fields) = self.index.remove_hidden_fields( let (valid_fields, hidden_fields) = match searchable_names {
self.txn, Some(sn) => self.index.remove_hidden_fields(self.txn, sn)?,
searchable_fields_weights.iter().map(|(name, _, _)| name), None => self.index.remove_hidden_fields(self.txn, fids_map.names())?,
)?; };
let field = field_name.to_string(); let field = field_name.to_string();
return Err(UserError::InvalidSearchableAttribute { return Err(UserError::InvalidSearchableAttribute {
@@ -138,17 +129,13 @@ impl<'ctx> SearchContext<'ctx> {
}; };
if exact_attributes_ids.contains(&fid) { if exact_attributes_ids.contains(&fid) {
restricted_fids.exact.push((fid, weight)); restricted_fids.exact.push(fid);
} else { } else {
restricted_fids.tolerant.push((fid, weight)); restricted_fids.tolerant.push(fid);
}; };
} }
if wildcard { self.restricted_fids = (!contains_wildcard).then_some(restricted_fids);
self.restricted_fids = None;
} else {
self.restricted_fids = Some(restricted_fids);
}
Ok(()) Ok(())
} }
@@ -171,13 +158,13 @@ impl Word {
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone, Default)]
pub struct RestrictedFids { pub struct RestrictedFids {
pub tolerant: Vec<(FieldId, Weight)>, pub tolerant: Vec<FieldId>,
pub exact: Vec<(FieldId, Weight)>, pub exact: Vec<FieldId>,
} }
impl RestrictedFids { impl RestrictedFids {
pub fn contains(&self, fid: &FieldId) -> bool { pub fn contains(&self, fid: &FieldId) -> bool {
self.tolerant.iter().any(|(id, _)| id == fid) || self.exact.iter().any(|(id, _)| id == fid) self.tolerant.contains(fid) || self.exact.contains(fid)
} }
} }

View File

@@ -119,7 +119,7 @@ pub fn located_query_terms_from_tokens(
if let Some(located_query_term) = phrase.build(ctx) { if let Some(located_query_term) = phrase.build(ctx) {
// as we are evaluating a negative operator we put the phrase // as we are evaluating a negative operator we put the phrase
// in the negative one *but* we don't reset the negative operator // in the negative one *but* we don't reset the negative operator
// as we are immediately starting a new negative phrase. // as we are immediatly starting a new negative phrase.
if negative_phrase { if negative_phrase {
negative_phrases.push(located_query_term); negative_phrases.push(located_query_term);
} else { } else {
@@ -366,7 +366,7 @@ mod tests {
let tokens = tokenizer.tokenize("."); let tokens = tokenizer.tokenize(".");
let index = temp_index_with_documents(); let index = temp_index_with_documents();
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let mut ctx = SearchContext::new(&index, &rtxn)?; let mut ctx = SearchContext::new(&index, &rtxn);
// panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785> // panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785>
let ExtractedTokens { query_terms, .. } = let ExtractedTokens { query_terms, .. } =
located_query_terms_from_tokens(&mut ctx, tokens, None)?; located_query_terms_from_tokens(&mut ctx, tokens, None)?;

View File

@@ -7,12 +7,12 @@ use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset; use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id; use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
use crate::search::new::SearchContext; use crate::search::new::SearchContext;
use crate::{FieldId, InternalError, Result}; use crate::Result;
#[derive(Clone, PartialEq, Eq, Hash)] #[derive(Clone, PartialEq, Eq, Hash)]
pub struct FidCondition { pub struct FidCondition {
term: LocatedQueryTermSubset, term: LocatedQueryTermSubset,
fid: Option<FieldId>, fid: u16,
} }
pub enum FidGraph {} pub enum FidGraph {}
@@ -26,15 +26,13 @@ impl RankingRuleGraphTrait for FidGraph {
universe: &RoaringBitmap, universe: &RoaringBitmap,
) -> Result<ComputedCondition> { ) -> Result<ComputedCondition> {
let FidCondition { term, .. } = condition; let FidCondition { term, .. } = condition;
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
let docids = if let Some(fid) = condition.fid { let mut docids = compute_query_term_subset_docids_within_field_id(
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument ctx,
let docids = &term.term_subset,
compute_query_term_subset_docids_within_field_id(ctx, &term.term_subset, fid)?; condition.fid,
docids & universe )?;
} else { docids &= universe;
RoaringBitmap::new()
};
Ok(ComputedCondition { Ok(ComputedCondition {
docids, docids,
@@ -70,29 +68,34 @@ impl RankingRuleGraphTrait for FidGraph {
all_fields.extend(fields); all_fields.extend(fields);
} }
let weights_map = ctx.index.fieldids_weights_map(ctx.txn)?;
let mut edges = vec![]; let mut edges = vec![];
for fid in all_fields.iter().copied() { for fid in all_fields.iter().copied() {
let weight = weights_map
.weight(fid)
.ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
edges.push(( edges.push((
weight as u32 * term.term_ids.len() as u32, fid as u32 * term.term_ids.len() as u32,
conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }), conditions_interner.insert(FidCondition { term: term.clone(), fid }),
)); ));
} }
// always lookup the max_fid if we don't already and add an artificial condition for max scoring // always lookup the max_fid if we don't already and add an artificial condition for max scoring
let max_weight: Option<u16> = weights_map.max_weight(); let max_fid: Option<u16> = {
if let Some(max_fid) = ctx
.index
.searchable_fields_ids(ctx.txn)?
.map(|field_ids| field_ids.into_iter().max())
{
max_fid
} else {
ctx.index.fields_ids_map(ctx.txn)?.ids().max()
}
};
if let Some(max_weight) = max_weight { if let Some(max_fid) = max_fid {
if !all_fields.contains(&max_weight) { if !all_fields.contains(&max_fid) {
edges.push(( edges.push((
max_weight as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10. max_fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
conditions_interner.insert(FidCondition { conditions_interner.insert(FidCondition {
term: term.clone(), // TODO remove this ugly clone term: term.clone(), // TODO remove this ugly clone
fid: None, fid: max_fid,
}), }),
)); ));
} }

View File

@@ -1,5 +1,5 @@
use crate::index::tests::TempIndex; use crate::index::tests::TempIndex;
use crate::{db_snap, Criterion, Search, SearchResult, TermsMatchingStrategy}; use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex { fn create_index() -> TempIndex {
let index = TempIndex::new(); let index = TempIndex::new();
@@ -131,19 +131,6 @@ fn test_attribute_fid_simple() {
#[test] #[test]
fn test_attribute_fid_ngrams() { fn test_attribute_fid_ngrams() {
let index = create_index(); let index = create_index();
db_snap!(index, fields_ids_map, @r###"
0 id |
1 title |
2 description |
3 plot |
"###);
db_snap!(index, searchable_fields, @r###"["title", "description", "plot"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
1 0 |
2 1 |
3 2 |
"###);
let txn = index.read_txn().unwrap(); let txn = index.read_txn().unwrap();

View File

@@ -1,244 +0,0 @@
---
source: milli/src/search/new/tests/attribute_fid.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[
(
2,
[
Fid(
Rank {
rank: 19,
max_rank: 19,
},
),
Position(
Rank {
rank: 91,
max_rank: 91,
},
),
],
),
(
6,
[
Fid(
Rank {
rank: 15,
max_rank: 19,
},
),
Position(
Rank {
rank: 81,
max_rank: 91,
},
),
],
),
(
5,
[
Fid(
Rank {
rank: 14,
max_rank: 19,
},
),
Position(
Rank {
rank: 79,
max_rank: 91,
},
),
],
),
(
4,
[
Fid(
Rank {
rank: 13,
max_rank: 19,
},
),
Position(
Rank {
rank: 77,
max_rank: 91,
},
),
],
),
(
3,
[
Fid(
Rank {
rank: 12,
max_rank: 19,
},
),
Position(
Rank {
rank: 83,
max_rank: 91,
},
),
],
),
(
9,
[
Fid(
Rank {
rank: 11,
max_rank: 19,
},
),
Position(
Rank {
rank: 75,
max_rank: 91,
},
),
],
),
(
8,
[
Fid(
Rank {
rank: 10,
max_rank: 19,
},
),
Position(
Rank {
rank: 79,
max_rank: 91,
},
),
],
),
(
7,
[
Fid(
Rank {
rank: 10,
max_rank: 19,
},
),
Position(
Rank {
rank: 73,
max_rank: 91,
},
),
],
),
(
11,
[
Fid(
Rank {
rank: 7,
max_rank: 19,
},
),
Position(
Rank {
rank: 77,
max_rank: 91,
},
),
],
),
(
10,
[
Fid(
Rank {
rank: 6,
max_rank: 19,
},
),
Position(
Rank {
rank: 81,
max_rank: 91,
},
),
],
),
(
13,
[
Fid(
Rank {
rank: 6,
max_rank: 19,
},
),
Position(
Rank {
rank: 81,
max_rank: 91,
},
),
],
),
(
12,
[
Fid(
Rank {
rank: 6,
max_rank: 19,
},
),
Position(
Rank {
rank: 78,
max_rank: 91,
},
),
],
),
(
14,
[
Fid(
Rank {
rank: 5,
max_rank: 19,
},
),
Position(
Rank {
rank: 75,
max_rank: 91,
},
),
],
),
(
0,
[
Fid(
Rank {
rank: 1,
max_rank: 19,
},
),
Position(
Rank {
rank: 91,
max_rank: 91,
},
),
],
),
]

View File

@@ -308,25 +308,6 @@ pub fn snap_fields_ids_map(index: &Index) -> String {
} }
snap snap
} }
pub fn snap_fieldids_weights_map(index: &Index) -> String {
let rtxn = index.read_txn().unwrap();
let weights_map = index.fieldids_weights_map(&rtxn).unwrap();
let mut snap = String::new();
writeln!(&mut snap, "fid weight").unwrap();
let mut field_ids: Vec<_> = weights_map.ids().collect();
field_ids.sort();
for field_id in field_ids {
let weight = weights_map.weight(field_id).unwrap();
writeln!(&mut snap, "{field_id:<3} {weight:<3} |").unwrap();
}
snap
}
pub fn snap_searchable_fields(index: &Index) -> String {
let rtxn = index.read_txn().unwrap();
let searchable_fields = index.searchable_fields(&rtxn).unwrap();
format!("{searchable_fields:?}")
}
pub fn snap_geo_faceted_documents_ids(index: &Index) -> String { pub fn snap_geo_faceted_documents_ids(index: &Index) -> String {
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let geo_faceted_documents_ids = index.geo_faceted_documents_ids(&rtxn).unwrap(); let geo_faceted_documents_ids = index.geo_faceted_documents_ids(&rtxn).unwrap();
@@ -488,12 +469,6 @@ macro_rules! full_snap_of_db {
($index:ident, fields_ids_map) => {{ ($index:ident, fields_ids_map) => {{
$crate::snapshot_tests::snap_fields_ids_map(&$index) $crate::snapshot_tests::snap_fields_ids_map(&$index)
}}; }};
($index:ident, fieldids_weights_map) => {{
$crate::snapshot_tests::snap_fieldids_weights_map(&$index)
}};
($index:ident, searchable_fields) => {{
$crate::snapshot_tests::snap_searchable_fields(&$index)
}};
($index:ident, geo_faceted_documents_ids) => {{ ($index:ident, geo_faceted_documents_ids) => {{
$crate::snapshot_tests::snap_geo_faceted_documents_ids(&$index) $crate::snapshot_tests::snap_geo_faceted_documents_ids(&$index)
}}; }};

View File

@@ -499,7 +499,7 @@ impl FacetsUpdateIncrementalInner {
ModificationResult::Expand | ModificationResult::Reduce { .. } ModificationResult::Expand | ModificationResult::Reduce { .. }
) )
{ {
// if any modification occurred, insert it in the database. // if any modification occured, insert it in the database.
self.db.put(txn, &insertion_key.as_ref(), &updated_value)?; self.db.put(txn, &insertion_key.as_ref(), &updated_value)?;
Ok(insertion_key_modification) Ok(insertion_key_modification)
} else { } else {

View File

@@ -379,7 +379,7 @@ pub(crate) mod test_helpers {
let mut options = heed::EnvOpenOptions::new(); let mut options = heed::EnvOpenOptions::new();
let options = options.map_size(4096 * 4 * 1000 * 100); let options = options.map_size(4096 * 4 * 1000 * 100);
let tempdir = tempfile::TempDir::new().unwrap(); let tempdir = tempfile::TempDir::new().unwrap();
let env = unsafe { options.open(tempdir.path()) }.unwrap(); let env = options.open(tempdir.path()).unwrap();
let mut wtxn = env.write_txn().unwrap(); let mut wtxn = env.write_txn().unwrap();
let content = env.create_database(&mut wtxn, None).unwrap(); let content = env.create_database(&mut wtxn, None).unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();

View File

@@ -186,7 +186,7 @@ fn searchable_fields_changed(
) -> bool { ) -> bool {
let searchable_fields = &settings_diff.new.searchable_fields_ids; let searchable_fields = &settings_diff.new.searchable_fields_ids;
for (field_id, field_bytes) in obkv.iter() { for (field_id, field_bytes) in obkv.iter() {
if searchable_fields.contains(&field_id) { if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
let del_add = KvReaderDelAdd::new(field_bytes); let del_add = KvReaderDelAdd::new(field_bytes);
match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) { match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
// if both fields are None, check the next field. // if both fields are None, check the next field.
@@ -298,7 +298,7 @@ fn lang_safe_tokens_from_document<'a>(
/// Extract words mapped with their positions of a document. /// Extract words mapped with their positions of a document.
fn tokens_from_document<'a>( fn tokens_from_document<'a>(
obkv: &KvReader<FieldId>, obkv: &KvReader<FieldId>,
searchable_fields: &[FieldId], searchable_fields: &Option<Vec<FieldId>>,
tokenizer: &Tokenizer, tokenizer: &Tokenizer,
max_positions_per_attributes: u32, max_positions_per_attributes: u32,
del_add: DelAdd, del_add: DelAdd,
@@ -309,7 +309,7 @@ fn tokens_from_document<'a>(
let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer); let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer);
for (field_id, field_bytes) in obkv.iter() { for (field_id, field_bytes) in obkv.iter() {
// if field is searchable. // if field is searchable.
if searchable_fields.as_ref().contains(&field_id) { if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
// extract deletion or addition only. // extract deletion or addition only.
if let Some(field_bytes) = KvReaderDelAdd::new(field_bytes).get(del_add) { if let Some(field_bytes) = KvReaderDelAdd::new(field_bytes).get(del_add) {
// parse json. // parse json.

View File

@@ -37,7 +37,7 @@ pub struct ExtractedFacetValues {
/// Extracts the facet values of each faceted field of each document. /// Extracts the facet values of each faceted field of each document.
/// ///
/// Returns the generated grenad reader containing the docid the fid and the original value as key /// Returns the generated grenad reader containing the docid the fid and the orginal value as key
/// and the normalized value as value extracted from the given chunk of documents. /// and the normalized value as value extracted from the given chunk of documents.
/// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially. /// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]

View File

@@ -556,7 +556,7 @@ where
let writer_index = (embedder_index as u16) << 8; let writer_index = (embedder_index as u16) << 8;
for k in 0..=u8::MAX { for k in 0..=u8::MAX {
let writer = let writer =
arroy::Writer::new(vector_arroy, writer_index | (k as u16), dimension); arroy::Writer::new(vector_arroy, writer_index | (k as u16), dimension)?;
if writer.is_empty(wtxn)? { if writer.is_empty(wtxn)? {
break; break;
} }
@@ -3260,7 +3260,6 @@ mod tests {
} }
#[test] #[test]
#[cfg(feature = "all-tokenizations")]
fn stored_detected_script_and_language_should_not_return_deleted_documents() { fn stored_detected_script_and_language_should_not_return_deleted_documents() {
use charabia::{Language, Script}; use charabia::{Language, Script};
let index = TempIndex::new(); let index = TempIndex::new();

View File

@@ -661,7 +661,7 @@ pub(crate) fn write_typed_chunk_into_index(
)?; )?;
let writer_index = (embedder_index as u16) << 8; let writer_index = (embedder_index as u16) << 8;
// FIXME: allow customizing distance // FIXME: allow customizing distance
let writers: Vec<_> = (0..=u8::MAX) let writers: std::result::Result<Vec<_>, _> = (0..=u8::MAX)
.map(|k| { .map(|k| {
arroy::Writer::new( arroy::Writer::new(
index.vector_arroy, index.vector_arroy,
@@ -670,6 +670,7 @@ pub(crate) fn write_typed_chunk_into_index(
) )
}) })
.collect(); .collect();
let writers = writers?;
// remove vectors for docids we want them removed // remove vectors for docids we want them removed
let merger = remove_vectors_builder.build(); let merger = remove_vectors_builder.build();

View File

@@ -461,39 +461,50 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
Ok(true) Ok(true)
} }
/// Updates the index's searchable attributes. /// Updates the index's searchable attributes. This causes the field map to be recomputed to
/// reflect the order of the searchable attributes.
fn update_searchable(&mut self) -> Result<bool> { fn update_searchable(&mut self) -> Result<bool> {
match self.searchable_fields { match self.searchable_fields {
Setting::Set(ref fields) => { Setting::Set(ref fields) => {
// Check to see if the searchable fields changed before doing anything else // Check to see if the searchable fields changed before doing anything else
let old_fields = self.index.searchable_fields(self.wtxn)?; let old_fields = self.index.searchable_fields(self.wtxn)?;
let did_change = { let did_change = match old_fields {
let new_fields = fields.iter().map(String::as_str).collect::<Vec<_>>(); // If old_fields is Some, let's check to see if the fields actually changed
new_fields != old_fields Some(old_fields) => {
let new_fields = fields.iter().map(String::as_str).collect::<Vec<_>>();
new_fields != old_fields
}
// If old_fields is None, the fields have changed (because they are being set)
None => true,
}; };
if !did_change { if !did_change {
return Ok(false); return Ok(false);
} }
// Since we're updating the settings we can only add new fields at the end of the field id map // every time the searchable attributes are updated, we need to update the
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; // ids for any settings that uses the facets. (distinct_fields, filterable_fields).
let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let mut new_fields_ids_map = FieldsIdsMap::new();
// fields are deduplicated, only the first occurrence is taken into account // fields are deduplicated, only the first occurrence is taken into account
let names = fields.iter().unique().map(String::as_str).collect::<Vec<_>>(); let names = fields.iter().unique().map(String::as_str).collect::<Vec<_>>();
// Add all the searchable attributes to the field map, and then add the // Add all the searchable attributes to the field map, and then add the
// remaining fields from the old field map to the new one // remaining fields from the old field map to the new one
for name in names.iter() { for name in names.iter() {
// The fields ids map won't change the field id of already present elements thus only the new_fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
// new fields will be inserted. }
fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
for (_, name) in old_fields_ids_map.iter() {
new_fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
} }
self.index.put_all_searchable_fields_from_fields_ids_map( self.index.put_all_searchable_fields_from_fields_ids_map(
self.wtxn, self.wtxn,
&names, &names,
&fields_ids_map, &new_fields_ids_map,
)?; )?;
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?;
Ok(true) Ok(true)
} }
Setting::Reset => Ok(self.index.delete_all_searchable_fields(self.wtxn)?), Setting::Reset => Ok(self.index.delete_all_searchable_fields(self.wtxn)?),
@@ -1161,7 +1172,7 @@ pub(crate) struct InnerIndexSettings {
pub user_defined_faceted_fields: HashSet<String>, pub user_defined_faceted_fields: HashSet<String>,
pub user_defined_searchable_fields: Option<Vec<String>>, pub user_defined_searchable_fields: Option<Vec<String>>,
pub faceted_fields_ids: HashSet<FieldId>, pub faceted_fields_ids: HashSet<FieldId>,
pub searchable_fields_ids: Vec<FieldId>, pub searchable_fields_ids: Option<Vec<FieldId>>,
pub exact_attributes: HashSet<FieldId>, pub exact_attributes: HashSet<FieldId>,
pub proximity_precision: ProximityPrecision, pub proximity_precision: ProximityPrecision,
pub embedding_configs: EmbeddingConfigs, pub embedding_configs: EmbeddingConfigs,
@@ -1222,21 +1233,18 @@ impl InnerIndexSettings {
// find and insert the new field ids // find and insert the new field ids
pub fn recompute_searchables(&mut self, wtxn: &mut heed::RwTxn, index: &Index) -> Result<()> { pub fn recompute_searchables(&mut self, wtxn: &mut heed::RwTxn, index: &Index) -> Result<()> {
let searchable_fields = self
.user_defined_searchable_fields
.as_ref()
.map(|searchable| searchable.iter().map(|s| s.as_str()).collect::<Vec<_>>());
// in case new fields were introduced we're going to recreate the searchable fields. // in case new fields were introduced we're going to recreate the searchable fields.
if let Some(searchable_fields) = searchable_fields { if let Some(searchable_fields) = self.user_defined_searchable_fields.as_ref() {
let searchable_fields =
searchable_fields.iter().map(String::as_ref).collect::<Vec<_>>();
index.put_all_searchable_fields_from_fields_ids_map( index.put_all_searchable_fields_from_fields_ids_map(
wtxn, wtxn,
&searchable_fields, &searchable_fields,
&self.fields_ids_map, &self.fields_ids_map,
)?; )?;
let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
self.searchable_fields_ids = searchable_fields_ids;
} }
let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
self.searchable_fields_ids = searchable_fields_ids;
Ok(()) Ok(())
} }
@@ -1509,13 +1517,12 @@ mod tests {
use big_s::S; use big_s::S;
use heed::types::Bytes; use heed::types::Bytes;
use maplit::{btreemap, btreeset, hashset}; use maplit::{btreemap, btreeset, hashset};
use meili_snap::snapshot;
use super::*; use super::*;
use crate::error::Error; use crate::error::Error;
use crate::index::tests::TempIndex; use crate::index::tests::TempIndex;
use crate::update::ClearDocuments; use crate::update::ClearDocuments;
use crate::{db_snap, Criterion, Filter, SearchResult}; use crate::{Criterion, Filter, SearchResult};
#[test] #[test]
fn set_and_reset_searchable_fields() { fn set_and_reset_searchable_fields() {
@@ -1544,17 +1551,6 @@ mod tests {
wtxn.commit().unwrap(); wtxn.commit().unwrap();
db_snap!(index, fields_ids_map, @r###"
0 id |
1 name |
2 age |
"###);
db_snap!(index, searchable_fields, @r###"["name"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
1 0 |
"###);
// Check that the searchable field is correctly set to "name" only. // Check that the searchable field is correctly set to "name" only.
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
// When we search for something that is not in // When we search for something that is not in
@@ -1566,9 +1562,8 @@ mod tests {
// we must find the appropriate document. // we must find the appropriate document.
let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap(); let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap();
let documents = index.documents(&rtxn, result.documents_ids).unwrap(); let documents = index.documents(&rtxn, result.documents_ids).unwrap();
let fid_map = index.fields_ids_map(&rtxn).unwrap();
assert_eq!(documents.len(), 1); assert_eq!(documents.len(), 1);
assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..])); assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..]));
drop(rtxn); drop(rtxn);
// We change the searchable fields to be the "name" field only. // We change the searchable fields to be the "name" field only.
@@ -1578,31 +1573,14 @@ mod tests {
}) })
.unwrap(); .unwrap();
db_snap!(index, fields_ids_map, @r###"
0 id |
1 name |
2 age |
"###);
db_snap!(index, searchable_fields, @r###"["id", "name", "age"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 0 |
2 0 |
"###);
// Check that the searchable field have been reset and documents are found now. // Check that the searchable field have been reset and documents are found now.
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let fid_map = index.fields_ids_map(&rtxn).unwrap();
let user_defined_searchable_fields = index.user_defined_searchable_fields(&rtxn).unwrap();
snapshot!(format!("{user_defined_searchable_fields:?}"), @"None");
// the searchable fields should contain all the fields
let searchable_fields = index.searchable_fields(&rtxn).unwrap(); let searchable_fields = index.searchable_fields(&rtxn).unwrap();
snapshot!(format!("{searchable_fields:?}"), @r###"["id", "name", "age"]"###); assert_eq!(searchable_fields, None);
let result = index.search(&rtxn).query("23").execute().unwrap(); let result = index.search(&rtxn).query("23").execute().unwrap();
assert_eq!(result.documents_ids.len(), 1); assert_eq!(result.documents_ids.len(), 1);
let documents = index.documents(&rtxn, result.documents_ids).unwrap(); let documents = index.documents(&rtxn, result.documents_ids).unwrap();
assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..])); assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..]));
} }
#[test] #[test]

View File

@@ -301,14 +301,10 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
fn from(value: EmbeddingConfig) -> Self { fn from(value: EmbeddingConfig) -> Self {
let EmbeddingConfig { embedder_options, prompt } = value; let EmbeddingConfig { embedder_options, prompt } = value;
match embedder_options { match embedder_options {
super::EmbedderOptions::HuggingFace(super::hf::EmbedderOptions { super::EmbedderOptions::HuggingFace(options) => Self {
model,
revision,
distribution,
}) => Self {
source: Setting::Set(EmbedderSource::HuggingFace), source: Setting::Set(EmbedderSource::HuggingFace),
model: Setting::Set(model), model: Setting::Set(options.model),
revision: revision.map(Setting::Set).unwrap_or_default(), revision: options.revision.map(Setting::Set).unwrap_or_default(),
api_key: Setting::NotSet, api_key: Setting::NotSet,
dimensions: Setting::NotSet, dimensions: Setting::NotSet,
document_template: Setting::Set(prompt.template), document_template: Setting::Set(prompt.template),
@@ -318,19 +314,14 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
path_to_embeddings: Setting::NotSet, path_to_embeddings: Setting::NotSet,
embedding_object: Setting::NotSet, embedding_object: Setting::NotSet,
input_type: Setting::NotSet, input_type: Setting::NotSet,
distribution: distribution.map(Setting::Set).unwrap_or_default(), distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
}, },
super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions { super::EmbedderOptions::OpenAi(options) => Self {
api_key,
embedding_model,
dimensions,
distribution,
}) => Self {
source: Setting::Set(EmbedderSource::OpenAi), source: Setting::Set(EmbedderSource::OpenAi),
model: Setting::Set(embedding_model.name().to_owned()), model: Setting::Set(options.embedding_model.name().to_owned()),
revision: Setting::NotSet, revision: Setting::NotSet,
api_key: api_key.map(Setting::Set).unwrap_or_default(), api_key: options.api_key.map(Setting::Set).unwrap_or_default(),
dimensions: dimensions.map(Setting::Set).unwrap_or_default(), dimensions: options.dimensions.map(Setting::Set).unwrap_or_default(),
document_template: Setting::Set(prompt.template), document_template: Setting::Set(prompt.template),
url: Setting::NotSet, url: Setting::NotSet,
query: Setting::NotSet, query: Setting::NotSet,
@@ -338,37 +329,29 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
path_to_embeddings: Setting::NotSet, path_to_embeddings: Setting::NotSet,
embedding_object: Setting::NotSet, embedding_object: Setting::NotSet,
input_type: Setting::NotSet, input_type: Setting::NotSet,
distribution: distribution.map(Setting::Set).unwrap_or_default(), distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
}, },
super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions { super::EmbedderOptions::Ollama(options) => Self {
embedding_model,
url,
api_key,
distribution,
}) => Self {
source: Setting::Set(EmbedderSource::Ollama), source: Setting::Set(EmbedderSource::Ollama),
model: Setting::Set(embedding_model), model: Setting::Set(options.embedding_model.to_owned()),
revision: Setting::NotSet, revision: Setting::NotSet,
api_key: api_key.map(Setting::Set).unwrap_or_default(), api_key: options.api_key.map(Setting::Set).unwrap_or_default(),
dimensions: Setting::NotSet, dimensions: Setting::NotSet,
document_template: Setting::Set(prompt.template), document_template: Setting::Set(prompt.template),
url: url.map(Setting::Set).unwrap_or_default(), url: options.url.map(Setting::Set).unwrap_or_default(),
query: Setting::NotSet, query: Setting::NotSet,
input_field: Setting::NotSet, input_field: Setting::NotSet,
path_to_embeddings: Setting::NotSet, path_to_embeddings: Setting::NotSet,
embedding_object: Setting::NotSet, embedding_object: Setting::NotSet,
input_type: Setting::NotSet, input_type: Setting::NotSet,
distribution: distribution.map(Setting::Set).unwrap_or_default(), distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
}, },
super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions { super::EmbedderOptions::UserProvided(options) => Self {
dimensions,
distribution,
}) => Self {
source: Setting::Set(EmbedderSource::UserProvided), source: Setting::Set(EmbedderSource::UserProvided),
model: Setting::NotSet, model: Setting::NotSet,
revision: Setting::NotSet, revision: Setting::NotSet,
api_key: Setting::NotSet, api_key: Setting::NotSet,
dimensions: Setting::Set(dimensions), dimensions: Setting::Set(options.dimensions),
document_template: Setting::NotSet, document_template: Setting::NotSet,
url: Setting::NotSet, url: Setting::NotSet,
query: Setting::NotSet, query: Setting::NotSet,
@@ -376,7 +359,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
path_to_embeddings: Setting::NotSet, path_to_embeddings: Setting::NotSet,
embedding_object: Setting::NotSet, embedding_object: Setting::NotSet,
input_type: Setting::NotSet, input_type: Setting::NotSet,
distribution: distribution.map(Setting::Set).unwrap_or_default(), distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
}, },
super::EmbedderOptions::Rest(super::rest::EmbedderOptions { super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
api_key, api_key,

View File

@@ -54,7 +54,7 @@
"sha256": "27e25efd0b68b159b8b21350d9af76938710cb29ce0393fa71b41c4f3c630ffe" "sha256": "27e25efd0b68b159b8b21350d9af76938710cb29ce0393fa71b41c4f3c630ffe"
} }
}, },
"precommands": [ "commands": [
{ {
"route": "indexes/movies/settings", "route": "indexes/movies/settings",
"method": "PATCH", "method": "PATCH",
@@ -78,10 +78,8 @@
] ]
} }
}, },
"synchronous": "WaitForTask" "synchronous": "DontWait"
} },
],
"commands": [
{ {
"route": "indexes/movies/documents", "route": "indexes/movies/documents",
"method": "POST", "method": "POST",

View File

@@ -11,7 +11,7 @@
"sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1" "sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
} }
}, },
"precommands": [ "commands": [
{ {
"route": "indexes/movies/settings", "route": "indexes/movies/settings",
"method": "PATCH", "method": "PATCH",
@@ -30,10 +30,8 @@
] ]
} }
}, },
"synchronous": "WaitForTask" "synchronous": "DontWait"
} },
],
"commands": [
{ {
"route": "indexes/movies/documents", "route": "indexes/movies/documents",
"method": "POST", "method": "POST",

View File

@@ -11,7 +11,7 @@
"sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6" "sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6"
} }
}, },
"precommands": [ "commands": [
{ {
"route": "experimental-features", "route": "experimental-features",
"method": "PATCH", "method": "PATCH",
@@ -55,9 +55,7 @@
} }
}, },
"synchronous": "WaitForTask" "synchronous": "WaitForTask"
} },
],
"commands": [
{ {
"route": "indexes/movies/documents", "route": "indexes/movies/documents",
"method": "POST", "method": "POST",

View File

@@ -11,7 +11,7 @@
"sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6" "sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6"
} }
}, },
"precommands": [ "commands": [
{ {
"route": "experimental-features", "route": "experimental-features",
"method": "PATCH", "method": "PATCH",
@@ -49,9 +49,7 @@
"asset": "movies-100.json" "asset": "movies-100.json"
}, },
"synchronous": "WaitForTask" "synchronous": "WaitForTask"
} },
],
"commands": [
{ {
"route": "indexes/movies/settings", "route": "indexes/movies/settings",
"method": "PATCH", "method": "PATCH",

View File

@@ -11,7 +11,7 @@
"sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b" "sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
} }
}, },
"precommands": [ "commands": [
{ {
"route": "indexes/peoples/settings", "route": "indexes/peoples/settings",
"method": "PATCH", "method": "PATCH",
@@ -59,9 +59,7 @@
"asset": "150k-people.json" "asset": "150k-people.json"
}, },
"synchronous": "WaitForTask" "synchronous": "WaitForTask"
} },
],
"commands": [
{ {
"route": "indexes/peoples/settings", "route": "indexes/peoples/settings",
"method": "PATCH", "method": "PATCH",

View File

@@ -11,7 +11,7 @@
"sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b" "sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
} }
}, },
"precommands": [ "commands": [
{ {
"route": "indexes/peoples/settings", "route": "indexes/peoples/settings",
"method": "PATCH", "method": "PATCH",
@@ -61,9 +61,7 @@
"asset": "150k-people.json" "asset": "150k-people.json"
}, },
"synchronous": "WaitForTask" "synchronous": "WaitForTask"
} },
],
"commands": [
{ {
"route": "indexes/peoples/settings", "route": "indexes/peoples/settings",
"method": "PATCH", "method": "PATCH",

View File

@@ -11,7 +11,7 @@
"sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b" "sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
} }
}, },
"precommands": [ "commands": [
{ {
"route": "indexes/peoples/settings", "route": "indexes/peoples/settings",
"method": "PATCH", "method": "PATCH",
@@ -61,9 +61,7 @@
"asset": "150k-people.json" "asset": "150k-people.json"
}, },
"synchronous": "WaitForTask" "synchronous": "WaitForTask"
} },
],
"commands": [
{ {
"route": "indexes/peoples/settings", "route": "indexes/peoples/settings",
"method": "PATCH", "method": "PATCH",

View File

@@ -11,7 +11,7 @@
"sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b" "sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
} }
}, },
"precommands": [ "commands": [
{ {
"route": "indexes/peoples/settings", "route": "indexes/peoples/settings",
"method": "PATCH", "method": "PATCH",
@@ -62,18 +62,14 @@
"asset": "150k-people.json" "asset": "150k-people.json"
}, },
"synchronous": "WaitForTask" "synchronous": "WaitForTask"
} },
],
"commands": [
{ {
"route": "indexes/peoples/settings", "route": "indexes/peoples/settings",
"method": "PATCH", "method": "PATCH",
"body": { "body": {
"inline": { "inline": {
"typoTolerance": { "typoTolerance": {
"disableOnAttributes": [ "disableOnAttributes": ["featured_job_organization_name"]
"featured_job_organization_name"
]
} }
} }
}, },
@@ -97,22 +93,7 @@
"body": { "body": {
"inline": { "inline": {
"typoTolerance": { "typoTolerance": {
"disableOnWords": [ "disableOnWords": ["Ben","Elowitz","Kevin","Flaherty", "Ron", "Dustin", "Owen", "Chris", "Mark", "Matt", "Peter", "Van", "Head", "of"]
"Ben",
"Elowitz",
"Kevin",
"Flaherty",
"Ron",
"Dustin",
"Owen",
"Chris",
"Mark",
"Matt",
"Peter",
"Van",
"Head",
"of"
]
} }
} }
}, },

View File

@@ -22,8 +22,6 @@ pub struct Workload {
pub run_count: u16, pub run_count: u16,
pub extra_cli_args: Vec<String>, pub extra_cli_args: Vec<String>,
pub assets: BTreeMap<String, Asset>, pub assets: BTreeMap<String, Asset>,
#[serde(default)]
pub precommands: Vec<super::command::Command>,
pub commands: Vec<super::command::Command>, pub commands: Vec<super::command::Command>,
} }
@@ -39,15 +37,6 @@ async fn run_commands(
let report_folder = &args.report_folder; let report_folder = &args.report_folder;
let workload_name = &workload.name; let workload_name = &workload.name;
for batch in workload
.precommands
.as_slice()
.split_inclusive(|command| !matches!(command.synchronous, SyncMode::DontWait))
{
super::command::run_batch(meili_client, batch, &workload.assets, &args.asset_folder)
.await?;
}
std::fs::create_dir_all(report_folder) std::fs::create_dir_all(report_folder)
.with_context(|| format!("could not create report directory at {report_folder}"))?; .with_context(|| format!("could not create report directory at {report_folder}"))?;