Remove the unused distance

Introduce a new error message for invalid vector dimensions
Make clippy happy
2025-12-02 02:35:36 +00:00 · 2023-06-14 16:37:14 +02:00 · 2023-06-14 16:36:58 +02:00 · 2023-06-14 15:59:10 +02:00 · 2023-06-14 15:57:31 +02:00 · 2023-06-14 15:53:39 +02:00
34 changed files with 436 additions and 634 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1207,6 +1207,12 @@ dependencies = [
 "winapi",
 ]

+[[package]]
+name = "doc-comment"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
+
 [[package]]
 name = "dump"
 version = "1.2.0"
@@ -1763,6 +1769,15 @@ dependencies = [
 "byteorder",
 ]

+[[package]]
+name = "hashbrown"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
+dependencies = [
+ "ahash 0.7.6",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.12.3"
@@ -1864,6 +1879,22 @@ dependencies = [
 "digest",
 ]

+[[package]]
+name = "hnsw"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b9740ebf8769ec4ad6762cc951ba18f39bba6dfbc2fbbe46285f7539af79752"
+dependencies = [
+ "ahash 0.7.6",
+ "hashbrown 0.11.2",
+ "libm",
+ "num-traits",
+ "rand_core",
+ "serde",
+ "smallvec",
+ "space",
+]
+
 [[package]]
 name = "http"
 version = "0.2.9"
@@ -1994,7 +2025,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
 dependencies = [
 "autocfg",
- "hashbrown",
+ "hashbrown 0.12.3",
 "serde",
 ]

@@ -2086,7 +2117,7 @@ checksum = "37228e06c75842d1097432d94d02f37fe3ebfca9791c2e8fef6e9db17ed128c1"
 dependencies = [
 "cedarwood",
 "fxhash",
- "hashbrown",
+ "hashbrown 0.12.3",
 "lazy_static",
 "phf",
 "phf_codegen",
@@ -2715,6 +2746,7 @@ dependencies = [
 "bimap",
 "bincode",
 "bstr",
+ "bytemuck",
 "byteorder",
 "charabia",
 "concat-arrays",
@@ -2730,6 +2762,7 @@ dependencies = [
 "geoutils",
 "grenad",
 "heed",
+ "hnsw",
 "insta",
 "itertools",
 "json-depth-checker",
@@ -2744,6 +2777,7 @@ dependencies = [
 "once_cell",
 "ordered-float",
 "rand",
+ "rand_pcg",
 "rayon",
 "roaring",
 "rstar",
@@ -2753,6 +2787,7 @@ dependencies = [
 "smallstr",
 "smallvec",
 "smartstring",
+ "space",
 "tempfile",
 "thiserror",
 "time",
@@ -3327,6 +3362,16 @@ dependencies = [
 "getrandom",
 ]

+[[package]]
+name = "rand_pcg"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59cad018caf63deb318e5a4586d99a24424a364f40f1e5778c29aca23f4fc73e"
+dependencies = [
+ "rand_core",
+ "serde",
+]
+
 [[package]]
 name = "rayon"
 version = "1.7.0"
@@ -3764,6 +3809,9 @@ name = "smallvec"
 version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
+dependencies = [
+ "serde",
+]

 [[package]]
 name = "smartstring"
@@ -3786,6 +3834,16 @@ dependencies = [
 "winapi",
 ]

+[[package]]
+name = "space"
+version = "0.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c5ab9701ae895386d13db622abf411989deff7109b13b46b6173bb4ce5c1d123"
+dependencies = [
+ "doc-comment",
+ "num-traits",
+]
+
 [[package]]
 name = "spin"
 version = "0.5.2"
@@ -4433,7 +4491,7 @@ version = "0.16.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c531a2dc4c462b833788be2c07eef4e621d0e9edbd55bf280cc164c1c1aa043"
 dependencies = [
- "hashbrown",
+ "hashbrown 0.12.3",
 "once_cell",
 ]

--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@@ -217,6 +217,7 @@ InvalidDocumentFields                 , InvalidRequest       , BAD_REQUEST ;
 MissingDocumentFilter                 , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentFilter                 , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentGeoField               , InvalidRequest       , BAD_REQUEST ;
+InvalidVectorDimensions               , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentId                     , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentLimit                  , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentOffset                 , InvalidRequest       , BAD_REQUEST ;
@@ -224,7 +225,6 @@ InvalidIndexLimit                     , InvalidRequest       , BAD_REQUEST ;
 InvalidIndexOffset                    , InvalidRequest       , BAD_REQUEST ;
 InvalidIndexPrimaryKey                , InvalidRequest       , BAD_REQUEST ;
 InvalidIndexUid                       , InvalidRequest       , BAD_REQUEST ;
-InvalidAttributesToSearchOn   , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchAttributesToCrop         , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchAttributesToHighlight    , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchAttributesToRetrieve     , InvalidRequest       , BAD_REQUEST ;
@@ -331,11 +331,9 @@ impl ErrorCode for milli::Error {
                    UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
                    UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
                    UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
-                    UserError::InvalidSearchableAttribute { .. } => {
-                        Code::InvalidAttributesToSearchOn
-                    }
                    UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
                    UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
+                    UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
                    UserError::SortError(_) => Code::InvalidSearchSort,
                    UserError::InvalidMinTypoWordLenSetting(_, _) => {
                        Code::InvalidSettingsTypoTolerance
--- a/meilisearch/src/routes/indexes/search.rs
+++ b/meilisearch/src/routes/indexes/search.rs
@@ -34,6 +34,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
 pub struct SearchQueryGet {
    #[deserr(default, error = DeserrQueryParamError<InvalidSearchQ>)]
    q: Option<String>,
+    #[deserr(default, error = DeserrQueryParamError<InvalidSearchQ>)]
+    vector: Option<Vec<f32>>,
    #[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError<InvalidSearchOffset>)]
    offset: Param<usize>,
    #[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError<InvalidSearchLimit>)]
@@ -66,8 +68,6 @@ pub struct SearchQueryGet {
    crop_marker: String,
    #[deserr(default, error = DeserrQueryParamError<InvalidSearchMatchingStrategy>)]
    matching_strategy: MatchingStrategy,
-    #[deserr(default, error = DeserrQueryParamError<InvalidAttributesToSearchOn>)]
-    pub attributes_to_search_on: Option<CS<String>>,
 }

 impl From<SearchQueryGet> for SearchQuery {
@@ -82,6 +82,7 @@ impl From<SearchQueryGet> for SearchQuery {

        Self {
            q: other.q,
+            vector: other.vector,
            offset: other.offset.0,
            limit: other.limit.0,
            page: other.page.as_deref().copied(),
@@ -98,7 +99,6 @@ impl From<SearchQueryGet> for SearchQuery {
            highlight_post_tag: other.highlight_post_tag,
            crop_marker: other.crop_marker,
            matching_strategy: other.matching_strategy,
-            attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
        }
    }
 }
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@@ -31,11 +31,13 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
 pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
 pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();

-#[derive(Debug, Clone, Default, PartialEq, Eq, Deserr)]
+#[derive(Debug, Clone, Default, PartialEq, Deserr)]
 #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
 pub struct SearchQuery {
    #[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
    pub q: Option<String>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
+    pub vector: Option<Vec<f32>>,
    #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
    pub offset: usize,
    #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
@@ -68,8 +70,6 @@ pub struct SearchQuery {
    pub crop_marker: String,
    #[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
    pub matching_strategy: MatchingStrategy,
-    #[deserr(default, error = DeserrJsonError<InvalidAttributesToSearchOn>, default)]
-    pub attributes_to_search_on: Option<Vec<String>>,
 }

 impl SearchQuery {
@@ -82,13 +82,15 @@ impl SearchQuery {
 // This struct contains the fields of `SearchQuery` inline.
 // This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields.
 // The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date.
-#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
+#[derive(Debug, Clone, PartialEq, Deserr)]
 #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
 pub struct SearchQueryWithIndex {
    #[deserr(error = DeserrJsonError<InvalidIndexUid>, missing_field_error = DeserrJsonError::missing_index_uid)]
    pub index_uid: IndexUid,
    #[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
    pub q: Option<String>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
+    pub vector: Option<Vec<f32>>,
    #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
    pub offset: usize,
    #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
@@ -121,8 +123,6 @@ pub struct SearchQueryWithIndex {
    pub crop_marker: String,
    #[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
    pub matching_strategy: MatchingStrategy,
-    #[deserr(default, error = DeserrJsonError<InvalidAttributesToSearchOn>, default)]
-    pub attributes_to_search_on: Option<Vec<String>>,
 }

 impl SearchQueryWithIndex {
@@ -130,6 +130,7 @@ impl SearchQueryWithIndex {
        let SearchQueryWithIndex {
            index_uid,
            q,
+            vector,
            offset,
            limit,
            page,
@@ -146,12 +147,12 @@ impl SearchQueryWithIndex {
            highlight_post_tag,
            crop_marker,
            matching_strategy,
-            attributes_to_search_on,
        } = self;
        (
            index_uid,
            SearchQuery {
                q,
+                vector,
                offset,
                limit,
                page,
@@ -168,7 +169,6 @@ impl SearchQueryWithIndex {
                highlight_post_tag,
                crop_marker,
                matching_strategy,
-                attributes_to_search_on,
                // do not use ..Default::default() here,
                // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
            },
@@ -276,12 +276,12 @@ pub fn perform_search(

    let mut search = index.search(&rtxn);

-    if let Some(ref query) = query.q {
-        search.query(query);
+    if let Some(ref vector) = query.vector {
+        search.vector(vector.clone());
    }

-    if let Some(ref searchable) = query.attributes_to_search_on {
-        search.searchable_attributes(searchable);
+    if let Some(ref query) = query.q {
+        search.query(query);
    }

    let is_finite_pagination = query.is_finite_pagination();
--- a/meilisearch/tests/search/errors.rs
+++ b/meilisearch/tests/search/errors.rs
@@ -963,27 +963,3 @@ async fn sort_unset_ranking_rule() {
        )
        .await;
 }
-
-#[actix_rt::test]
-async fn search_on_unknown_field() {
-    let server = Server::new().await;
-    let index = server.index("test");
-    let documents = DOCUMENTS.clone();
-    index.add_documents(documents, None).await;
-    index.wait_task(0).await;
-
-    index
-        .search(
-            json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}),
-            |response, code| {
-                assert_eq!(400, code, "{}", response);
-                assert_eq!(response, json!({
-                    "message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
-                    "code": "invalid_attributes_to_search_on",
-                    "type": "invalid_request",
-                    "link": "https://docs.meilisearch.com/errors#invalid_attributes_to_search_on"
-                }));
-            },
-        )
-        .await;
-}
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@@ -5,7 +5,6 @@ mod errors;
 mod formatted;
 mod multi;
 mod pagination;
-mod restrict_searchable;

 use once_cell::sync::Lazy;
 use serde_json::{json, Value};
--- a/meilisearch/tests/search/restrict_searchable.rs
+++ b/meilisearch/tests/search/restrict_searchable.rs
@@ -1,241 +0,0 @@
-use once_cell::sync::Lazy;
-use serde_json::{json, Value};
-
-use crate::common::index::Index;
-use crate::common::Server;
-
-async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
-    let index = server.index("test");
-
-    index.add_documents(documents.clone(), None).await;
-    index.wait_task(0).await;
-    index
-}
-
-static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
-    json!([
-    {
-        "title": "Shazam!",
-        "desc": "a Captain Marvel ersatz",
-        "id": "1",
-    },
-    {
-        "title": "Captain Planet",
-        "desc": "He's not part of the Marvel Cinematic Universe",
-        "id": "2",
-    },
-    {
-        "title": "Captain Marvel",
-        "desc": "a Shazam ersatz",
-        "id": "3",
-    }])
-});
-
-#[actix_rt::test]
-async fn simple_search_on_title() {
-    let server = Server::new().await;
-    let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
-
-    // simple search should return 2 documents (ids: 2 and 3).
-    index
-        .search(
-            json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"]}),
-            |response, code| {
-                assert_eq!(200, code, "{}", response);
-                assert_eq!(response["hits"].as_array().unwrap().len(), 2);
-            },
-        )
-        .await;
-}
-
-#[actix_rt::test]
-async fn simple_prefix_search_on_title() {
-    let server = Server::new().await;
-    let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
-
-    // simple search should return 2 documents (ids: 2 and 3).
-    index
-        .search(json!({"q": "Captain Mar", "attributesToSearchOn": ["title"]}), |response, code| {
-            assert_eq!(200, code, "{}", response);
-            assert_eq!(response["hits"].as_array().unwrap().len(), 2);
-        })
-        .await;
-}
-
-#[actix_rt::test]
-async fn simple_search_on_title_matching_strategy_all() {
-    let server = Server::new().await;
-    let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
-    // simple search matching strategy all should only return 1 document (ids: 2).
-    index
-        .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "matchingStrategy": "all"}), |response, code| {
-            assert_eq!(200, code, "{}", response);
-            assert_eq!(response["hits"].as_array().unwrap().len(), 1);
-        })
-        .await;
-}
-
-#[actix_rt::test]
-async fn simple_search_on_no_field() {
-    let server = Server::new().await;
-    let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
-    // simple search on no field shouldn't return any document.
-    index
-        .search(json!({"q": "Captain Marvel", "attributesToSearchOn": []}), |response, code| {
-            assert_eq!(200, code, "{}", response);
-            assert_eq!(response["hits"].as_array().unwrap().len(), 0);
-        })
-        .await;
-}
-
-#[actix_rt::test]
-async fn word_ranking_rule_order() {
-    let server = Server::new().await;
-    let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
-
-    // Document 3 should appear before document 2.
-    index
-        .search(
-            json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}),
-            |response, code| {
-                assert_eq!(200, code, "{}", response);
-                assert_eq!(
-                    response["hits"],
-                    json!([
-                        {"id": "3"},
-                        {"id": "2"},
-                    ])
-                );
-            },
-        )
-        .await;
-}
-
-#[actix_rt::test]
-async fn word_ranking_rule_order_exact_words() {
-    let server = Server::new().await;
-    let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
-    index.update_settings_typo_tolerance(json!({"disableOnWords": ["Captain", "Marvel"]})).await;
-    index.wait_task(1).await;
-
-    // simple search should return 2 documents (ids: 2 and 3).
-    index
-        .search(
-            json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}),
-            |response, code| {
-                assert_eq!(200, code, "{}", response);
-                assert_eq!(
-                    response["hits"],
-                    json!([
-                        {"id": "3"},
-                        {"id": "2"},
-                    ])
-                );
-            },
-        )
-        .await;
-}
-
-#[actix_rt::test]
-async fn typo_ranking_rule_order() {
-    let server = Server::new().await;
-    let index = index_with_documents(
-        &server,
-        &json!([
-        {
-            "title": "Capitain Marivel",
-            "desc": "Captain Marvel",
-            "id": "1",
-        },
-        {
-            "title": "Captain Marivel",
-            "desc": "a Shazam ersatz",
-            "id": "2",
-        }]),
-    )
-    .await;
-
-    // Document 2 should appear before document 1.
-    index
-        .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["id"]}), |response, code| {
-            assert_eq!(200, code, "{}", response);
-            assert_eq!(
-                response["hits"],
-                json!([
-                    {"id": "2"},
-                    {"id": "1"},
-                ])
-            );
-        })
-        .await;
-}
-
-#[actix_rt::test]
-async fn attributes_ranking_rule_order() {
-    let server = Server::new().await;
-    let index = index_with_documents(
-        &server,
-        &json!([
-        {
-            "title": "Captain Marvel",
-            "desc": "a Shazam ersatz",
-            "footer": "The story of Captain Marvel",
-            "id": "1",
-        },
-        {
-            "title": "The Avengers",
-            "desc": "Captain Marvel is far from the earth",
-            "footer": "A super hero team",
-            "id": "2",
-        }]),
-    )
-    .await;
-
-    // Document 2 should appear before document 1.
-    index
-        .search(json!({"q": "Captain Marvel", "attributesToSearchOn": ["desc", "footer"], "attributesToRetrieve": ["id"]}), |response, code| {
-            assert_eq!(200, code, "{}", response);
-            assert_eq!(
-                response["hits"],
-                json!([
-                    {"id": "2"},
-                    {"id": "1"},
-                ])
-            );
-        })
-        .await;
-}
-
-#[actix_rt::test]
-async fn exactness_ranking_rule_order() {
-    let server = Server::new().await;
-    let index = index_with_documents(
-        &server,
-        &json!([
-        {
-            "title": "Captain Marvel",
-            "desc": "Captain Marivel",
-            "id": "1",
-        },
-        {
-            "title": "Captain Marvel",
-            "desc": "CaptainMarvel",
-            "id": "2",
-        }]),
-    )
-    .await;
-
-    // Document 2 should appear before document 1.
-    index
-        .search(json!({"q": "Captain Marvel", "attributesToRetrieve": ["id"], "attributesToSearchOn": ["desc"]}), |response, code| {
-            assert_eq!(200, code, "{}", response);
-            assert_eq!(
-                response["hits"],
-                json!([
-                    {"id": "2"},
-                    {"id": "1"},
-                ])
-            );
-        })
-        .await;
-}
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -15,6 +15,7 @@ license.workspace = true
 bimap = { version = "0.6.3", features = ["serde"] }
 bincode = "1.3.3"
 bstr = "1.4.0"
+bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
 byteorder = "1.4.3"
 charabia = { version = "0.7.2", default-features = false }
 concat-arrays = "0.1.2"
@@ -32,18 +33,21 @@ heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.6", default-f
    "lmdb",
    "sync-read-txn",
 ] }
+hnsw = { version = "0.11.0", features = ["serde1"] }
 json-depth-checker = { path = "../json-depth-checker" }
 levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
 memmap2 = "0.5.10"
 obkv = "0.2.0"
 once_cell = "1.17.1"
 ordered-float = "3.6.0"
+rand_pcg = { version = "0.3.1", features = ["serde1"] }
 rayon = "1.7.0"
 roaring = "0.10.1"
 rstar = { version = "0.10.0", features = ["serde"] }
 serde = { version = "1.0.160", features = ["derive"] }
 serde_json = { version = "1.0.95", features = ["preserve_order"] }
 slice-group-by = "0.3.0"
+space = "0.17.0"
 smallstr = { version = "0.3.0", features = ["serde"] }
 smallvec = "1.10.0"
 smartstring = "1.0.1"
--- a/milli/examples/search.rs
+++ b/milli/examples/search.rs
@@ -52,6 +52,7 @@ fn main() -> Result<(), Box<dyn Error>> {
            let docs = execute_search(
                &mut ctx,
                &(!query.trim().is_empty()).then(|| query.trim().to_owned()),
+                &None,
                TermsMatchingStrategy::Last,
                false,
                &None,
--- a/milli/src/distance.rs
+++ b/milli/src/distance.rs
@@ -0,0 +1,34 @@
+use serde::{Deserialize, Serialize};
+use space::Metric;
+
+#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
+pub struct DotProduct;
+
+impl Metric<Vec<f32>> for DotProduct {
+    type Unit = u32;
+
+    // TODO explain me this function, I don't understand why f32.to_bits is ordered.
+    // I tried to do this and it wasn't OK <https://stackoverflow.com/a/43305015/1941280>
+    //
+    // Following <https://docs.rs/space/0.17.0/space/trait.Metric.html>.
+    fn distance(&self, a: &Vec<f32>, b: &Vec<f32>) -> Self::Unit {
+        let dist: f32 = a.iter().zip(b).map(|(a, b)| a * b).sum();
+        let dist = 1.0 - dist;
+        debug_assert!(!dist.is_nan());
+        dist.to_bits()
+    }
+}
+
+#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
+pub struct Euclidean;
+
+impl Metric<Vec<f32>> for Euclidean {
+    type Unit = u32;
+
+    fn distance(&self, a: &Vec<f32>, b: &Vec<f32>) -> Self::Unit {
+        let squared: f32 = a.iter().zip(b).map(|(a, b)| (a - b).powi(2)).sum();
+        let dist = squared.sqrt();
+        debug_assert!(!dist.is_nan());
+        dist.to_bits()
+    }
+}
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -110,9 +110,11 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
    },
    #[error(transparent)]
    InvalidGeoField(#[from] GeoError),
+    #[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
+    InvalidVectorDimensions { expected: usize, found: usize },
    #[error("{0}")]
    InvalidFilter(String),
-    #[error("Invalid type for filter subexpression: `expected {}, found: {1}`.", .0.join(", "))]
+    #[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
    InvalidFilterExpression(&'static [&'static str], Value),
    #[error("Attribute `{}` is not sortable. {}",
        .field,
@@ -124,16 +126,6 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
        }
    )]
    InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
-    #[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.",
-        .field,
-        .valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
-        .hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
-    )]
-    InvalidSearchableAttribute {
-        field: String,
-        valid_fields: BTreeSet<String>,
-        hidden_fields: bool,
-    },
    #[error("{}", HeedError::BadOpenOptions)]
    InvalidLmdbOpenOptions,
    #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]
--- a/milli/src/heed_codec/mod.rs
+++ b/milli/src/heed_codec/mod.rs
@@ -23,9 +23,3 @@ pub use self::roaring_bitmap_length::{
 pub use self::script_language_codec::ScriptLanguageCodec;
 pub use self::str_beu32_codec::{StrBEU16Codec, StrBEU32Codec};
 pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
-
-pub trait BytesDecodeOwned {
-    type DItem;
-
-    fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem>;
-}
--- a/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs
@@ -2,11 +2,8 @@ use std::borrow::Cow;
 use std::convert::TryInto;
 use std::mem::size_of;

-use heed::BytesDecode;
 use roaring::RoaringBitmap;

-use crate::heed_codec::BytesDecodeOwned;
-
 pub struct BoRoaringBitmapCodec;

 impl BoRoaringBitmapCodec {
@@ -16,7 +13,7 @@ impl BoRoaringBitmapCodec {
    }
 }

-impl BytesDecode<'_> for BoRoaringBitmapCodec {
+impl heed::BytesDecode<'_> for BoRoaringBitmapCodec {
    type DItem = RoaringBitmap;

    fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
@@ -31,14 +28,6 @@ impl BytesDecode<'_> for BoRoaringBitmapCodec {
    }
 }

-impl BytesDecodeOwned for BoRoaringBitmapCodec {
-    type DItem = RoaringBitmap;
-
-    fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
-        Self::bytes_decode(bytes)
-    }
-}
-
 impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
    type EItem = RoaringBitmap;

--- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@@ -5,8 +5,6 @@ use std::mem::size_of;
 use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
 use roaring::RoaringBitmap;

-use crate::heed_codec::BytesDecodeOwned;
-
 /// This is the limit where using a byteorder became less size efficient
 /// than using a direct roaring encoding, it is also the point where we are able
 /// to determine the encoding used only by using the array of bytes length.
@@ -105,14 +103,6 @@ impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
    }
 }

-impl BytesDecodeOwned for CboRoaringBitmapCodec {
-    type DItem = RoaringBitmap;
-
-    fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
-        Self::deserialize_from(bytes).ok()
-    }
-}
-
 impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
    type EItem = RoaringBitmap;

--- a/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs
@@ -2,8 +2,6 @@ use std::borrow::Cow;

 use roaring::RoaringBitmap;

-use crate::heed_codec::BytesDecodeOwned;
-
 pub struct RoaringBitmapCodec;

 impl heed::BytesDecode<'_> for RoaringBitmapCodec {
@@ -14,14 +12,6 @@ impl heed::BytesDecode<'_> for RoaringBitmapCodec {
    }
 }

-impl BytesDecodeOwned for RoaringBitmapCodec {
-    type DItem = RoaringBitmap;
-
-    fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
-        RoaringBitmap::deserialize_from(bytes).ok()
-    }
-}
-
 impl heed::BytesEncode<'_> for RoaringBitmapCodec {
    type EItem = RoaringBitmap;

--- a/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs
@@ -1,23 +1,11 @@
 use std::mem;

-use heed::BytesDecode;
-
-use crate::heed_codec::BytesDecodeOwned;
-
 pub struct BoRoaringBitmapLenCodec;

-impl BytesDecode<'_> for BoRoaringBitmapLenCodec {
+impl heed::BytesDecode<'_> for BoRoaringBitmapLenCodec {
    type DItem = u64;

    fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
        Some((bytes.len() / mem::size_of::<u32>()) as u64)
    }
 }
-
-impl BytesDecodeOwned for BoRoaringBitmapLenCodec {
-    type DItem = u64;
-
-    fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
-        Self::bytes_decode(bytes)
-    }
-}
--- a/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs
@@ -1,14 +1,11 @@
 use std::mem;

-use heed::BytesDecode;
-
 use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec};
 use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD;
-use crate::heed_codec::BytesDecodeOwned;

 pub struct CboRoaringBitmapLenCodec;

-impl BytesDecode<'_> for CboRoaringBitmapLenCodec {
+impl heed::BytesDecode<'_> for CboRoaringBitmapLenCodec {
    type DItem = u64;

    fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
@@ -23,11 +20,3 @@ impl BytesDecode<'_> for CboRoaringBitmapLenCodec {
        }
    }
 }
-
-impl BytesDecodeOwned for CboRoaringBitmapLenCodec {
-    type DItem = u64;
-
-    fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
-        Self::bytes_decode(bytes)
-    }
-}
--- a/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs
@@ -3,8 +3,6 @@ use std::mem;

 use byteorder::{LittleEndian, ReadBytesExt};

-use crate::heed_codec::BytesDecodeOwned;
-
 const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346;
 const SERIAL_COOKIE: u16 = 12347;

@@ -61,14 +59,6 @@ impl heed::BytesDecode<'_> for RoaringBitmapLenCodec {
    }
 }

-impl BytesDecodeOwned for RoaringBitmapLenCodec {
-    type DItem = u64;
-
-    fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
-        RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok()
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use heed::BytesEncode;
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -8,10 +8,12 @@ use charabia::{Language, Script};
 use heed::flags::Flags;
 use heed::types::*;
 use heed::{CompactionOption, Database, PolyDatabase, RoTxn, RwTxn};
+use rand_pcg::Pcg32;
 use roaring::RoaringBitmap;
 use rstar::RTree;
 use time::OffsetDateTime;

+use crate::distance::DotProduct;
 use crate::error::{InternalError, UserError};
 use crate::facet::FacetType;
 use crate::fields_ids_map::FieldsIdsMap;
@@ -26,6 +28,9 @@ use crate::{
    Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32,
 };

+/// The HNSW data-structure that we serialize, fill and search in.
+pub type Hnsw = hnsw::Hnsw<DotProduct, Vec<f32>, Pcg32, 12, 24>;
+
 pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
 pub const DEFAULT_MIN_WORD_LEN_TWO_TYPOS: u8 = 9;

@@ -42,6 +47,7 @@ pub mod main_key {
    pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
    pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
    pub const GEO_RTREE_KEY: &str = "geo-rtree";
+    pub const VECTOR_HNSW_KEY: &str = "vector-hnsw";
    pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
    pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
    pub const PRIMARY_KEY_KEY: &str = "primary-key";
@@ -86,6 +92,7 @@ pub mod db_name {
    pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
    pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
    pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
+    pub const VECTOR_ID_DOCID: &str = "vector-id-docids";
    pub const DOCUMENTS: &str = "documents";
    pub const SCRIPT_LANGUAGE_DOCIDS: &str = "script_language_docids";
 }
@@ -149,6 +156,9 @@ pub struct Index {
    /// Maps the document id, the facet field id and the strings.
    pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>,

+    /// Maps a vector id to the document id that have it.
+    pub vector_id_docid: Database<OwnedType<BEU32>, OwnedType<BEU32>>,
+
    /// Maps the document id to the document as an obkv store.
    pub(crate) documents: Database<OwnedType<BEU32>, ObkvCodec>,
 }
@@ -162,7 +172,7 @@ impl Index {
    ) -> Result<Index> {
        use db_name::*;

-        options.max_dbs(23);
+        options.max_dbs(24);
        unsafe { options.flag(Flags::MdbAlwaysFreePages) };

        let env = options.open(path)?;
@@ -198,11 +208,11 @@ impl Index {
            env.create_database(&mut wtxn, Some(FACET_ID_IS_NULL_DOCIDS))?;
        let facet_id_is_empty_docids =
            env.create_database(&mut wtxn, Some(FACET_ID_IS_EMPTY_DOCIDS))?;
-
        let field_id_docid_facet_f64s =
            env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?;
        let field_id_docid_facet_strings =
            env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_STRINGS))?;
+        let vector_id_docid = env.create_database(&mut wtxn, Some(VECTOR_ID_DOCID))?;
        let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
        wtxn.commit()?;

@@ -231,6 +241,7 @@ impl Index {
            facet_id_is_empty_docids,
            field_id_docid_facet_f64s,
            field_id_docid_facet_strings,
+            vector_id_docid,
            documents,
        })
    }
@@ -502,6 +513,26 @@ impl Index {
        }
    }

+    /* vector HNSW */
+
+    /// Writes the provided `hnsw`.
+    pub(crate) fn put_vector_hnsw(&self, wtxn: &mut RwTxn, hnsw: &Hnsw) -> heed::Result<()> {
+        self.main.put::<_, Str, SerdeBincode<Hnsw>>(wtxn, main_key::VECTOR_HNSW_KEY, hnsw)
+    }
+
+    /// Delete the `hnsw`.
+    pub(crate) fn delete_vector_hnsw(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
+        self.main.delete::<_, Str>(wtxn, main_key::VECTOR_HNSW_KEY)
+    }
+
+    /// Returns the `hnsw`.
+    pub fn vector_hnsw(&self, rtxn: &RoTxn) -> Result<Option<Hnsw>> {
+        match self.main.get::<_, Str, SerdeBincode<Hnsw>>(rtxn, main_key::VECTOR_HNSW_KEY)? {
+            Some(hnsw) => Ok(Some(hnsw)),
+            None => Ok(None),
+        }
+    }
+
    /* field distribution */

    /// Writes the field distribution which associates every field name with
@@ -1466,9 +1497,9 @@ pub(crate) mod tests {

        db_snap!(index, field_distribution,
            @r###"
-        age              1     
-        id               2     
-        name             2     
+        age              1      |
+        id               2      |
+        name             2      |
        "###
        );

@@ -1486,9 +1517,9 @@ pub(crate) mod tests {

        db_snap!(index, field_distribution,
            @r###"
-        age              1     
-        id               2     
-        name             2     
+        age              1      |
+        id               2      |
+        name             2      |
        "###
        );

@@ -1502,9 +1533,9 @@ pub(crate) mod tests {

        db_snap!(index, field_distribution,
            @r###"
-        has_dog          1     
-        id               2     
-        name             2     
+        has_dog          1      |
+        id               2      |
+        name             2      |
        "###
        );
    }
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@@ -10,6 +10,7 @@ pub mod documents;

 mod asc_desc;
 mod criterion;
+pub mod distance;
 mod error;
 mod external_documents_ids;
 pub mod facet;
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -22,12 +22,12 @@ pub mod new;

 pub struct Search<'a> {
    query: Option<String>,
+    vector: Option<Vec<f32>>,
    // this should be linked to the String in the query
    filter: Option<Filter<'a>>,
    offset: usize,
    limit: usize,
    sort_criteria: Option<Vec<AscDesc>>,
-    searchable_attributes: Option<&'a [String]>,
    geo_strategy: new::GeoSortStrategy,
    terms_matching_strategy: TermsMatchingStrategy,
    words_limit: usize,
@@ -40,11 +40,11 @@ impl<'a> Search<'a> {
    pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> Search<'a> {
        Search {
            query: None,
+            vector: None,
            filter: None,
            offset: 0,
            limit: 20,
            sort_criteria: None,
-            searchable_attributes: None,
            geo_strategy: new::GeoSortStrategy::default(),
            terms_matching_strategy: TermsMatchingStrategy::default(),
            exhaustive_number_hits: false,
@@ -59,6 +59,11 @@ impl<'a> Search<'a> {
        self
    }

+    pub fn vector(&mut self, vector: impl Into<Vec<f32>>) -> &mut Search<'a> {
+        self.vector = Some(vector.into());
+        self
+    }
+
    pub fn offset(&mut self, offset: usize) -> &mut Search<'a> {
        self.offset = offset;
        self
@@ -74,11 +79,6 @@ impl<'a> Search<'a> {
        self
    }

-    pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> {
-        self.searchable_attributes = Some(searchable);
-        self
-    }
-
    pub fn terms_matching_strategy(&mut self, value: TermsMatchingStrategy) -> &mut Search<'a> {
        self.terms_matching_strategy = value;
        self
@@ -109,15 +109,11 @@ impl<'a> Search<'a> {

    pub fn execute(&self) -> Result<SearchResult> {
        let mut ctx = SearchContext::new(self.index, self.rtxn);
-
-        if let Some(searchable_attributes) = self.searchable_attributes {
-            ctx.searchable_attributes(searchable_attributes)?;
-        }
-
        let PartialSearchResult { located_query_terms, candidates, documents_ids } =
            execute_search(
                &mut ctx,
                &self.query,
+                &self.vector,
                self.terms_matching_strategy,
                self.exhaustive_number_hits,
                &self.filter,
@@ -144,11 +140,11 @@ impl fmt::Debug for Search<'_> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let Search {
            query,
+            vector: _,
            filter,
            offset,
            limit,
            sort_criteria,
-            searchable_attributes,
            geo_strategy: _,
            terms_matching_strategy,
            words_limit,
@@ -158,11 +154,11 @@ impl fmt::Debug for Search<'_> {
        } = self;
        f.debug_struct("Search")
            .field("query", query)
+            .field("vector", &"[...]")
            .field("filter", filter)
            .field("offset", offset)
            .field("limit", limit)
            .field("sort_criteria", sort_criteria)
-            .field("searchable_attributes", searchable_attributes)
            .field("terms_matching_strategy", terms_matching_strategy)
            .field("exhaustive_number_hits", exhaustive_number_hits)
            .field("words_limit", words_limit)
--- a/milli/src/search/new/db_cache.rs
+++ b/milli/src/search/new/db_cache.rs
@@ -4,13 +4,12 @@ use std::hash::Hash;

 use fxhash::FxHashMap;
 use heed::types::ByteSlice;
-use heed::{BytesEncode, Database, RoTxn};
+use heed::{BytesDecode, BytesEncode, Database, RoTxn};
 use roaring::RoaringBitmap;

 use super::interner::Interned;
 use super::Word;
-use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
-use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
+use crate::heed_codec::StrBEU16Codec;
 use crate::{
    CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext,
 };
@@ -23,110 +22,50 @@ use crate::{
 #[derive(Default)]
 pub struct DatabaseCache<'ctx> {
    pub word_pair_proximity_docids:
-        FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
+        FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'ctx [u8]>>,
    pub word_prefix_pair_proximity_docids:
-        FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
+        FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'ctx [u8]>>,
    pub prefix_word_pair_proximity_docids:
-        FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
-    pub word_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
-    pub exact_word_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
-    pub word_prefix_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
-    pub exact_word_prefix_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
+        FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'ctx [u8]>>,
+    pub word_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,
+    pub exact_word_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,
+    pub word_prefix_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,
+    pub exact_word_prefix_docids: FxHashMap<Interned<String>, Option<&'ctx [u8]>>,

    pub words_fst: Option<fst::Set<Cow<'ctx, [u8]>>>,
-    pub word_position_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
-    pub word_prefix_position_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
+    pub word_position_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
+    pub word_prefix_position_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
    pub word_positions: FxHashMap<Interned<String>, Vec<u16>>,
    pub word_prefix_positions: FxHashMap<Interned<String>, Vec<u16>>,

-    pub word_fid_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
-    pub word_prefix_fid_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
+    pub word_fid_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
+    pub word_prefix_fid_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
    pub word_fids: FxHashMap<Interned<String>, Vec<u16>>,
    pub word_prefix_fids: FxHashMap<Interned<String>, Vec<u16>>,
 }
 impl<'ctx> DatabaseCache<'ctx> {
-    fn get_value<'v, K1, KC, DC>(
+    fn get_value<'v, K1, KC>(
        txn: &'ctx RoTxn,
        cache_key: K1,
        db_key: &'v KC::EItem,
-        cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
+        cache: &mut FxHashMap<K1, Option<&'ctx [u8]>>,
        db: Database<KC, ByteSlice>,
-    ) -> Result<Option<DC::DItem>>
+    ) -> Result<Option<&'ctx [u8]>>
    where
        K1: Copy + Eq + Hash,
        KC: BytesEncode<'v>,
-        DC: BytesDecodeOwned,
    {
-        match cache.entry(cache_key) {
-            Entry::Occupied(_) => {}
+        let bitmap_ptr = match cache.entry(cache_key) {
+            Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(),
            Entry::Vacant(entry) => {
-                let bitmap_ptr = db.get(txn, db_key)?.map(Cow::Borrowed);
-                entry.insert(bitmap_ptr);
-            }
-        }
-
-        match cache.get(&cache_key).unwrap() {
-            Some(Cow::Borrowed(bytes)) => {
-                DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
-            }
-            Some(Cow::Owned(bytes)) => {
-                DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
-            }
-            None => Ok(None),
-        }
-    }
-
-    fn get_value_from_keys<'v, K1, KC, DC>(
-        txn: &'ctx RoTxn,
-        cache_key: K1,
-        db_keys: &'v [KC::EItem],
-        cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
-        db: Database<KC, ByteSlice>,
-        merger: MergeFn,
-    ) -> Result<Option<DC::DItem>>
-    where
-        K1: Copy + Eq + Hash,
-        KC: BytesEncode<'v>,
-        DC: BytesDecodeOwned,
-        KC::EItem: Sized,
-    {
-        match cache.entry(cache_key) {
-            Entry::Occupied(_) => {}
-            Entry::Vacant(entry) => {
-                let bitmap_ptr: Option<Cow<'ctx, [u8]>> = match db_keys {
-                    [] => None,
-                    [key] => db.get(txn, key)?.map(Cow::Borrowed),
-                    keys => {
-                        let bitmaps = keys
-                            .iter()
-                            .filter_map(|key| db.get(txn, key).transpose())
-                            .map(|v| v.map(Cow::Borrowed))
-                            .collect::<std::result::Result<Vec<Cow<[u8]>>, _>>()?;
-
-                        if bitmaps.is_empty() {
-                            None
-                        } else {
-                            Some(merger(&[], &bitmaps[..])?)
-                        }
-                    }
-                };
-
+                let bitmap_ptr = db.get(txn, db_key)?;
                entry.insert(bitmap_ptr);
+                bitmap_ptr
            }
        };
-
-        match cache.get(&cache_key).unwrap() {
-            Some(Cow::Borrowed(bytes)) => {
-                DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
-            }
-            Some(Cow::Owned(bytes)) => {
-                DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some)
-            }
-            None => Ok(None),
-        }
+        Ok(bitmap_ptr)
    }
 }
-
 impl<'ctx> SearchContext<'ctx> {
    pub fn get_words_fst(&mut self) -> Result<fst::Set<Cow<'ctx, [u8]>>> {
        if let Some(fst) = self.db_cache.words_fst.clone() {
@@ -160,41 +99,30 @@ impl<'ctx> SearchContext<'ctx> {

    /// Retrieve or insert the given value in the `word_docids` database.
    fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> {
-        match &self.restricted_fids {
-            Some(restricted_fids) => {
-                let interned = self.word_interner.get(word).as_str();
-                let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect();
-
-                DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
-                    self.txn,
-                    word,
-                    &keys[..],
-                    &mut self.db_cache.word_docids,
-                    self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
-                    merge_cbo_roaring_bitmaps,
-                )
-            }
-            None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
-                self.txn,
-                word,
-                self.word_interner.get(word).as_str(),
-                &mut self.db_cache.word_docids,
-                self.index.word_docids.remap_data_type::<ByteSlice>(),
-            ),
-        }
+        DatabaseCache::get_value(
+            self.txn,
+            word,
+            self.word_interner.get(word).as_str(),
+            &mut self.db_cache.word_docids,
+            self.index.word_docids.remap_data_type::<ByteSlice>(),
+        )?
+        .map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
+        .transpose()
    }

    fn get_db_exact_word_docids(
        &mut self,
        word: Interned<String>,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
+        DatabaseCache::get_value(
            self.txn,
            word,
            self.word_interner.get(word).as_str(),
            &mut self.db_cache.exact_word_docids,
            self.index.exact_word_docids.remap_data_type::<ByteSlice>(),
-        )
+        )?
+        .map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
+        .transpose()
    }

    pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> {
@@ -222,41 +150,30 @@ impl<'ctx> SearchContext<'ctx> {
        &mut self,
        prefix: Interned<String>,
    ) -> Result<Option<RoaringBitmap>> {
-        match &self.restricted_fids {
-            Some(restricted_fids) => {
-                let interned = self.word_interner.get(prefix).as_str();
-                let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect();
-
-                DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
-                    self.txn,
-                    prefix,
-                    &keys[..],
-                    &mut self.db_cache.word_prefix_docids,
-                    self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
-                    merge_cbo_roaring_bitmaps,
-                )
-            }
-            None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
-                self.txn,
-                prefix,
-                self.word_interner.get(prefix).as_str(),
-                &mut self.db_cache.word_prefix_docids,
-                self.index.word_prefix_docids.remap_data_type::<ByteSlice>(),
-            ),
-        }
+        DatabaseCache::get_value(
+            self.txn,
+            prefix,
+            self.word_interner.get(prefix).as_str(),
+            &mut self.db_cache.word_prefix_docids,
+            self.index.word_prefix_docids.remap_data_type::<ByteSlice>(),
+        )?
+        .map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
+        .transpose()
    }

    fn get_db_exact_word_prefix_docids(
        &mut self,
        prefix: Interned<String>,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
+        DatabaseCache::get_value(
            self.txn,
            prefix,
            self.word_interner.get(prefix).as_str(),
            &mut self.db_cache.exact_word_prefix_docids,
            self.index.exact_word_prefix_docids.remap_data_type::<ByteSlice>(),
-        )
+        )?
+        .map(|bytes| RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
+        .transpose()
    }

    pub fn get_db_word_pair_proximity_docids(
@@ -265,7 +182,7 @@ impl<'ctx> SearchContext<'ctx> {
        word2: Interned<String>,
        proximity: u8,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
+        DatabaseCache::get_value(
            self.txn,
            (proximity, word1, word2),
            &(
@@ -275,7 +192,9 @@ impl<'ctx> SearchContext<'ctx> {
            ),
            &mut self.db_cache.word_pair_proximity_docids,
            self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
-        )
+        )?
+        .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
+        .transpose()
    }

    pub fn get_db_word_pair_proximity_docids_len(
@@ -284,7 +203,7 @@ impl<'ctx> SearchContext<'ctx> {
        word2: Interned<String>,
        proximity: u8,
    ) -> Result<Option<u64>> {
-        DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
+        DatabaseCache::get_value(
            self.txn,
            (proximity, word1, word2),
            &(
@@ -294,7 +213,11 @@ impl<'ctx> SearchContext<'ctx> {
            ),
            &mut self.db_cache.word_pair_proximity_docids,
            self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
-        )
+        )?
+        .map(|bytes| {
+            CboRoaringBitmapLenCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into())
+        })
+        .transpose()
    }

    pub fn get_db_word_prefix_pair_proximity_docids(
@@ -303,7 +226,7 @@ impl<'ctx> SearchContext<'ctx> {
        prefix2: Interned<String>,
        proximity: u8,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
+        DatabaseCache::get_value(
            self.txn,
            (proximity, word1, prefix2),
            &(
@@ -313,7 +236,9 @@ impl<'ctx> SearchContext<'ctx> {
            ),
            &mut self.db_cache.word_prefix_pair_proximity_docids,
            self.index.word_prefix_pair_proximity_docids.remap_data_type::<ByteSlice>(),
-        )
+        )?
+        .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
+        .transpose()
    }
    pub fn get_db_prefix_word_pair_proximity_docids(
        &mut self,
@@ -321,7 +246,7 @@ impl<'ctx> SearchContext<'ctx> {
        right: Interned<String>,
        proximity: u8,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
+        DatabaseCache::get_value(
            self.txn,
            (proximity, left_prefix, right),
            &(
@@ -331,7 +256,9 @@ impl<'ctx> SearchContext<'ctx> {
            ),
            &mut self.db_cache.prefix_word_pair_proximity_docids,
            self.index.prefix_word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
-        )
+        )?
+        .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
+        .transpose()
    }

    pub fn get_db_word_fid_docids(
@@ -339,18 +266,15 @@ impl<'ctx> SearchContext<'ctx> {
        word: Interned<String>,
        fid: u16,
    ) -> Result<Option<RoaringBitmap>> {
-        // if the requested fid isn't in the restricted list, return None.
-        if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
-            return Ok(None);
-        }
-
-        DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
+        DatabaseCache::get_value(
            self.txn,
            (word, fid),
            &(self.word_interner.get(word).as_str(), fid),
            &mut self.db_cache.word_fid_docids,
            self.index.word_fid_docids.remap_data_type::<ByteSlice>(),
-        )
+        )?
+        .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
+        .transpose()
    }

    pub fn get_db_word_prefix_fid_docids(
@@ -358,18 +282,15 @@ impl<'ctx> SearchContext<'ctx> {
        word_prefix: Interned<String>,
        fid: u16,
    ) -> Result<Option<RoaringBitmap>> {
-        // if the requested fid isn't in the restricted list, return None.
-        if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
-            return Ok(None);
-        }
-
-        DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
+        DatabaseCache::get_value(
            self.txn,
            (word_prefix, fid),
            &(self.word_interner.get(word_prefix).as_str(), fid),
            &mut self.db_cache.word_prefix_fid_docids,
            self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
-        )
+        )?
+        .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
+        .transpose()
    }

    pub fn get_db_word_fids(&mut self, word: Interned<String>) -> Result<Vec<u16>> {
@@ -388,7 +309,7 @@ impl<'ctx> SearchContext<'ctx> {
                for result in remap_key_type {
                    let ((_, fid), value) = result?;
                    // filling other caches to avoid searching for them again
-                    self.db_cache.word_fid_docids.insert((word, fid), Some(Cow::Borrowed(value)));
+                    self.db_cache.word_fid_docids.insert((word, fid), Some(value));
                    fids.push(fid);
                }
                entry.insert(fids.clone());
@@ -414,9 +335,7 @@ impl<'ctx> SearchContext<'ctx> {
                for result in remap_key_type {
                    let ((_, fid), value) = result?;
                    // filling other caches to avoid searching for them again
-                    self.db_cache
-                        .word_prefix_fid_docids
-                        .insert((word_prefix, fid), Some(Cow::Borrowed(value)));
+                    self.db_cache.word_prefix_fid_docids.insert((word_prefix, fid), Some(value));
                    fids.push(fid);
                }
                entry.insert(fids.clone());
@@ -431,13 +350,15 @@ impl<'ctx> SearchContext<'ctx> {
        word: Interned<String>,
        position: u16,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
+        DatabaseCache::get_value(
            self.txn,
            (word, position),
            &(self.word_interner.get(word).as_str(), position),
            &mut self.db_cache.word_position_docids,
            self.index.word_position_docids.remap_data_type::<ByteSlice>(),
-        )
+        )?
+        .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
+        .transpose()
    }

    pub fn get_db_word_prefix_position_docids(
@@ -445,13 +366,15 @@ impl<'ctx> SearchContext<'ctx> {
        word_prefix: Interned<String>,
        position: u16,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
+        DatabaseCache::get_value(
            self.txn,
            (word_prefix, position),
            &(self.word_interner.get(word_prefix).as_str(), position),
            &mut self.db_cache.word_prefix_position_docids,
            self.index.word_prefix_position_docids.remap_data_type::<ByteSlice>(),
-        )
+        )?
+        .map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
+        .transpose()
    }

    pub fn get_db_word_positions(&mut self, word: Interned<String>) -> Result<Vec<u16>> {
@@ -470,9 +393,7 @@ impl<'ctx> SearchContext<'ctx> {
                for result in remap_key_type {
                    let ((_, position), value) = result?;
                    // filling other caches to avoid searching for them again
-                    self.db_cache
-                        .word_position_docids
-                        .insert((word, position), Some(Cow::Borrowed(value)));
+                    self.db_cache.word_position_docids.insert((word, position), Some(value));
                    positions.push(position);
                }
                entry.insert(positions.clone());
@@ -503,7 +424,7 @@ impl<'ctx> SearchContext<'ctx> {
                    // filling other caches to avoid searching for them again
                    self.db_cache
                        .word_prefix_position_docids
-                        .insert((word_prefix, position), Some(Cow::Borrowed(value)));
+                        .insert((word_prefix, position), Some(value));
                    positions.push(position);
                }
                entry.insert(positions.clone());
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -509,6 +509,7 @@ mod tests {
            let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
                &mut ctx,
                &Some(query.to_string()),
+                &None,
                crate::TermsMatchingStrategy::default(),
                false,
                &None,
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -20,7 +20,7 @@ mod sort;
 #[cfg(test)]
 mod tests;

-use std::collections::{BTreeSet, HashSet};
+use std::collections::HashSet;

 use bucket_sort::{bucket_sort, BucketSortOutput};
 use charabia::TokenizerBuilder;
@@ -28,6 +28,7 @@ use db_cache::DatabaseCache;
 use exact_attribute::ExactAttribute;
 use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo};
 use heed::RoTxn;
+use hnsw::Searcher;
 use interner::{DedupInterner, Interner};
 pub use logger::visual::VisualSearchLogger;
 pub use logger::{DefaultSearchLogger, SearchLogger};
@@ -39,14 +40,16 @@ use ranking_rules::{
 use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache};
 use roaring::RoaringBitmap;
 use sort::Sort;
+use space::Neighbor;

 use self::geo_sort::GeoSort;
 pub use self::geo_sort::Strategy as GeoSortStrategy;
 use self::graph_based_ranking_rule::Words;
 use self::interner::Interned;
-use crate::error::FieldIdMapMissingEntry;
 use crate::search::new::distinct::apply_distinct_rule;
-use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError};
+use crate::{
+    AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, BEU32,
+};

 /// A structure used throughout the execution of a search query.
 pub struct SearchContext<'ctx> {
@@ -57,7 +60,6 @@ pub struct SearchContext<'ctx> {
    pub phrase_interner: DedupInterner<Phrase>,
    pub term_interner: Interner<QueryTerm>,
    pub phrase_docids: PhraseDocIdsCache,
-    pub restricted_fids: Option<Vec<u16>>,
 }

 impl<'ctx> SearchContext<'ctx> {
@@ -70,66 +72,8 @@ impl<'ctx> SearchContext<'ctx> {
            phrase_interner: <_>::default(),
            term_interner: <_>::default(),
            phrase_docids: <_>::default(),
-            restricted_fids: None,
        }
    }
-
-    pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> {
-        let fids_map = self.index.fields_ids_map(self.txn)?;
-        let searchable_names = self.index.searchable_fields(self.txn)?;
-
-        let mut restricted_fids = Vec::new();
-        for field_name in searchable_attributes {
-            let searchable_contains_name =
-                searchable_names.as_ref().map(|sn| sn.iter().any(|name| name == field_name));
-            let fid = match (fids_map.id(field_name), searchable_contains_name) {
-                // The Field id exist and the field is searchable
-                (Some(fid), Some(true)) | (Some(fid), None) => fid,
-                // The field is searchable but the Field id doesn't exist => Internal Error
-                (None, Some(true)) => {
-                    return Err(FieldIdMapMissingEntry::FieldName {
-                        field_name: field_name.to_string(),
-                        process: "search",
-                    }
-                    .into())
-                }
-                // The field is not searchable => User error
-                _otherwise => {
-                    let mut valid_fields: BTreeSet<_> =
-                        fids_map.names().map(String::from).collect();
-
-                    // Filter by the searchable names
-                    if let Some(sn) = searchable_names {
-                        let searchable_names = sn.iter().map(|s| s.to_string()).collect();
-                        valid_fields = &valid_fields & &searchable_names;
-                    }
-
-                    let searchable_count = valid_fields.len();
-
-                    // Remove hidden fields
-                    if let Some(dn) = self.index.displayed_fields(self.txn)? {
-                        let displayable_names = dn.iter().map(|s| s.to_string()).collect();
-                        valid_fields = &valid_fields & &displayable_names;
-                    }
-
-                    let hidden_fields = searchable_count > valid_fields.len();
-                    let field = field_name.to_string();
-                    return Err(UserError::InvalidSearchableAttribute {
-                        field,
-                        valid_fields,
-                        hidden_fields,
-                    }
-                    .into());
-                }
-            };
-
-            restricted_fids.push(fid);
-        }
-
-        self.restricted_fids = Some(restricted_fids);
-
-        Ok(())
-    }
 }

 #[derive(Clone, Copy, PartialEq, PartialOrd, Ord, Eq)]
@@ -409,6 +353,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
 pub fn execute_search(
    ctx: &mut SearchContext,
    query: &Option<String>,
+    vector: &Option<Vec<f32>>,
    terms_matching_strategy: TermsMatchingStrategy,
    exhaustive_number_hits: bool,
    filters: &Option<Filter>,
@@ -488,6 +433,33 @@ pub fn execute_search(

    let BucketSortOutput { docids, mut all_candidates } = bucket_sort_output;

+    let docids = match vector {
+        Some(vector) => {
+            // return the nearest documents that are also part of the candidates.
+            let mut searcher = Searcher::new();
+            let hnsw = ctx.index.vector_hnsw(ctx.txn)?.unwrap_or_default();
+            let ef = hnsw.len().min(100);
+            let mut dest = vec![Neighbor { index: 0, distance: 0 }; ef];
+            let neighbors = hnsw.nearest(vector, ef, &mut searcher, &mut dest[..]);
+
+            let mut docids = Vec::new();
+            for Neighbor { index, distance: _ } in neighbors.iter() {
+                let index = BEU32::new(*index as u32);
+                let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap().get();
+                if universe.contains(docid) {
+                    docids.push(docid);
+                    if docids.len() == (from + length) {
+                        break;
+                    }
+                }
+            }
+
+            docids.into_iter().skip(from).take(length).collect()
+        }
+        // return the search docids if the vector field is not specified
+        None => docids,
+    };
+
    // The candidates is the universe unless the exhaustive number of hits
    // is requested and a distinct attribute is set.
    if exhaustive_number_hits {
--- a/milli/src/snapshot_tests.rs
+++ b/milli/src/snapshot_tests.rs
@@ -318,7 +318,7 @@ pub fn snap_field_distributions(index: &Index) -> String {
    let rtxn = index.read_txn().unwrap();
    let mut snap = String::new();
    for (field, count) in index.field_distribution(&rtxn).unwrap() {
-        writeln!(&mut snap, "{field:<16} {count:<6}").unwrap();
+        writeln!(&mut snap, "{field:<16} {count:<6} |").unwrap();
    }
    snap
 }
@@ -328,7 +328,7 @@ pub fn snap_fields_ids_map(index: &Index) -> String {
    let mut snap = String::new();
    for field_id in fields_ids_map.ids() {
        let name = fields_ids_map.name(field_id).unwrap();
-        writeln!(&mut snap, "{field_id:<3} {name:<16}").unwrap();
+        writeln!(&mut snap, "{field_id:<3} {name:<16} |").unwrap();
    }
    snap
 }
--- a/milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap
+++ b/milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap
@@ -1,7 +1,7 @@
 ---
 source: milli/src/index.rs
 ---
-age              1     
-id               2     
-name             2     
+age              1      |
+id               2      |
+name             2      |

--- a/milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap
+++ b/milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap
@@ -1,7 +1,7 @@
 ---
 source: milli/src/index.rs
 ---
-age              1     
-id               2     
-name             2     
+age              1      |
+id               2      |
+name             2      |

--- a/milli/src/update/clear_documents.rs
+++ b/milli/src/update/clear_documents.rs
@@ -39,6 +39,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
            facet_id_is_empty_docids,
            field_id_docid_facet_f64s,
            field_id_docid_facet_strings,
+            vector_id_docid,
            documents,
        } = self.index;

@@ -57,6 +58,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
        self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
        self.index.delete_geo_rtree(self.wtxn)?;
        self.index.delete_geo_faceted_documents_ids(self.wtxn)?;
+        self.index.delete_vector_hnsw(self.wtxn)?;

        // We clean all the faceted documents ids.
        for field_id in faceted_fields {
@@ -95,6 +97,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
        facet_id_string_docids.clear(self.wtxn)?;
        field_id_docid_facet_f64s.clear(self.wtxn)?;
        field_id_docid_facet_strings.clear(self.wtxn)?;
+        vector_id_docid.clear(self.wtxn)?;
        documents.clear(self.wtxn)?;

        Ok(number_of_documents)
--- a/milli/src/update/delete_documents.rs
+++ b/milli/src/update/delete_documents.rs
@@ -4,8 +4,10 @@ use std::collections::{BTreeSet, HashMap, HashSet};
 use fst::IntoStreamer;
 use heed::types::{ByteSlice, DecodeIgnore, Str, UnalignedSlice};
 use heed::{BytesDecode, BytesEncode, Database, RwIter};
+use hnsw::Searcher;
 use roaring::RoaringBitmap;
 use serde::{Deserialize, Serialize};
+use space::KnnPoints;
 use time::OffsetDateTime;

 use super::facet::delete::FacetsDelete;
@@ -14,6 +16,7 @@ use crate::error::InternalError;
 use crate::facet::FacetType;
 use crate::heed_codec::facet::FieldDocIdFacetCodec;
 use crate::heed_codec::CboRoaringBitmapCodec;
+use crate::index::Hnsw;
 use crate::{
    ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, Index, Result, RoaringBitmapCodec, BEU32,
 };
@@ -247,6 +250,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
            facet_id_exists_docids,
            facet_id_is_null_docids,
            facet_id_is_empty_docids,
+            vector_id_docid,
            documents,
        } = self.index;
        // Remove from the documents database
@@ -436,6 +440,30 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
            &self.to_delete_docids,
        )?;

+        // An ugly and slow way to remove the vectors from the HNSW
+        // It basically reconstructs the HNSW from scratch without editing the current one.
+        let current_hnsw = self.index.vector_hnsw(self.wtxn)?.unwrap_or_default();
+        if !current_hnsw.is_empty() {
+            let mut new_hnsw = Hnsw::default();
+            let mut searcher = Searcher::new();
+            let mut new_vector_id_docids = Vec::new();
+
+            for result in vector_id_docid.iter(self.wtxn)? {
+                let (vector_id, docid) = result?;
+                if !self.to_delete_docids.contains(docid.get()) {
+                    let vector = current_hnsw.get_point(vector_id.get() as usize).clone();
+                    let vector_id = new_hnsw.insert(vector, &mut searcher);
+                    new_vector_id_docids.push((vector_id as u32, docid));
+                }
+            }
+
+            vector_id_docid.clear(self.wtxn)?;
+            for (vector_id, docid) in new_vector_id_docids {
+                vector_id_docid.put(self.wtxn, &BEU32::new(vector_id), &docid)?;
+            }
+            self.index.put_vector_hnsw(self.wtxn, &new_hnsw)?;
+        }
+
        self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?;

        Ok(DetailedDocumentDeletionResult {
--- a/milli/src/update/index_documents/extract/extract_vector_points.rs
+++ b/milli/src/update/index_documents/extract/extract_vector_points.rs
@@ -0,0 +1,40 @@
+use std::fs::File;
+use std::io;
+
+use bytemuck::cast_slice;
+use serde_json::from_slice;
+
+use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
+use crate::{FieldId, InternalError, Result};
+
+/// Extracts the embedding vector contained in each document under the `_vector` field.
+///
+/// Returns the generated grenad reader containing the docid as key associated to the Vec<f32>
+#[logging_timer::time]
+pub fn extract_vector_points<R: io::Read + io::Seek>(
+    obkv_documents: grenad::Reader<R>,
+    indexer: GrenadParameters,
+    vector_fid: FieldId,
+) -> Result<grenad::Reader<File>> {
+    let mut writer = create_writer(
+        indexer.chunk_compression_type,
+        indexer.chunk_compression_level,
+        tempfile::tempfile()?,
+    );
+
+    let mut cursor = obkv_documents.into_cursor()?;
+    while let Some((docid_bytes, value)) = cursor.move_on_next()? {
+        let obkv = obkv::KvReader::new(value);
+
+        // first we get the _vector field
+        if let Some(vector) = obkv.get(vector_fid) {
+            // try to extract the vector
+            let vector: Vec<f32> = from_slice(vector).map_err(InternalError::SerdeJson).unwrap();
+            let bytes = cast_slice(&vector);
+            writer.insert(docid_bytes, bytes)?;
+        }
+        // else => the _vector object was `null`, there is nothing to do
+    }
+
+    writer_into_reader(writer)
+}
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -4,6 +4,7 @@ mod extract_facet_string_docids;
 mod extract_fid_docid_facet_values;
 mod extract_fid_word_count_docids;
 mod extract_geo_points;
+mod extract_vector_points;
 mod extract_word_docids;
 mod extract_word_fid_docids;
 mod extract_word_pair_proximity_docids;
@@ -22,6 +23,7 @@ use self::extract_facet_string_docids::extract_facet_string_docids;
 use self::extract_fid_docid_facet_values::{extract_fid_docid_facet_values, ExtractedFacetValues};
 use self::extract_fid_word_count_docids::extract_fid_word_count_docids;
 use self::extract_geo_points::extract_geo_points;
+use self::extract_vector_points::extract_vector_points;
 use self::extract_word_docids::extract_word_docids;
 use self::extract_word_fid_docids::extract_word_fid_docids;
 use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
@@ -45,6 +47,7 @@ pub(crate) fn data_from_obkv_documents(
    faceted_fields: HashSet<FieldId>,
    primary_key_id: FieldId,
    geo_fields_ids: Option<(FieldId, FieldId)>,
+    vector_field_id: Option<FieldId>,
    stop_words: Option<fst::Set<&[u8]>>,
    max_positions_per_attributes: Option<u32>,
    exact_attributes: HashSet<FieldId>,
@@ -69,6 +72,7 @@ pub(crate) fn data_from_obkv_documents(
                    &faceted_fields,
                    primary_key_id,
                    geo_fields_ids,
+                    vector_field_id,
                    &stop_words,
                    max_positions_per_attributes,
                )
@@ -279,6 +283,7 @@ fn send_and_extract_flattened_documents_data(
    faceted_fields: &HashSet<FieldId>,
    primary_key_id: FieldId,
    geo_fields_ids: Option<(FieldId, FieldId)>,
+    vector_field_id: Option<FieldId>,
    stop_words: &Option<fst::Set<&[u8]>>,
    max_positions_per_attributes: Option<u32>,
 ) -> Result<(
@@ -307,6 +312,20 @@ fn send_and_extract_flattened_documents_data(
        });
    }

+    if let Some(vector_field_id) = vector_field_id {
+        let documents_chunk_cloned = flattened_documents_chunk.clone();
+        let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
+        rayon::spawn(move || {
+            let result = extract_vector_points(documents_chunk_cloned, indexer, vector_field_id);
+            let _ = match result {
+                Ok(vector_points) => {
+                    lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
+                }
+                Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
+            };
+        });
+    }
+
    let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
        rayon::join(
            || {
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -304,6 +304,8 @@ where
            }
            None => None,
        };
+        // get the fid of the `_vector` field.
+        let vector_field_id = self.index.fields_ids_map(self.wtxn)?.id("_vector");

        let stop_words = self.index.stop_words(self.wtxn)?;
        let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
@@ -340,6 +342,7 @@ where
                    faceted_fields,
                    primary_key_id,
                    geo_fields_ids,
+                    vector_field_id,
                    stop_words,
                    max_positions_per_attributes,
                    exact_attributes,
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -4,20 +4,24 @@ use std::convert::TryInto;
 use std::fs::File;
 use std::io;

+use bytemuck::allocation::pod_collect_to_vec;
 use charabia::{Language, Script};
 use grenad::MergerBuilder;
 use heed::types::ByteSlice;
 use heed::RwTxn;
+use hnsw::Searcher;
 use roaring::RoaringBitmap;
+use space::KnnPoints;

 use super::helpers::{
    self, merge_ignore_values, serialize_roaring_bitmap, valid_lmdb_key, CursorClonableMmap,
 };
 use super::{ClonableMmap, MergeFn};
+use crate::error::UserError;
 use crate::facet::FacetType;
 use crate::update::facet::FacetsUpdate;
 use crate::update::index_documents::helpers::as_cloneable_grenad;
-use crate::{lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result};
+use crate::{lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result, BEU32};

 pub(crate) enum TypedChunk {
    FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
@@ -38,6 +42,7 @@ pub(crate) enum TypedChunk {
    FieldIdFacetIsNullDocids(grenad::Reader<File>),
    FieldIdFacetIsEmptyDocids(grenad::Reader<File>),
    GeoPoints(grenad::Reader<File>),
+    VectorPoints(grenad::Reader<File>),
    ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
 }

@@ -221,6 +226,38 @@ pub(crate) fn write_typed_chunk_into_index(
            index.put_geo_rtree(wtxn, &rtree)?;
            index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?;
        }
+        TypedChunk::VectorPoints(vector_points) => {
+            let mut hnsw = index.vector_hnsw(wtxn)?.unwrap_or_default();
+            let mut searcher = Searcher::new();
+
+            let mut expected_dimensions = match index.vector_id_docid.iter(wtxn)?.next() {
+                Some(result) => {
+                    let (vector_id, _) = result?;
+                    Some(hnsw.get_point(vector_id.get() as usize).len())
+                }
+                None => None,
+            };
+
+            let mut cursor = vector_points.into_cursor()?;
+            while let Some((key, value)) = cursor.move_on_next()? {
+                // convert the key back to a u32 (4 bytes)
+                let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();
+                // convert the vector back to a Vec<f32>
+                let vector: Vec<f32> = pod_collect_to_vec(value);
+
+                // TODO Move this error in the vector extractor
+                let found = vector.len();
+                let expected = *expected_dimensions.get_or_insert(found);
+                if expected != found {
+                    return Err(UserError::InvalidVectorDimensions { expected, found })?;
+                }
+
+                let vector_id = hnsw.insert(vector, &mut searcher) as u32;
+                index.vector_id_docid.put(wtxn, &BEU32::new(vector_id), &BEU32::new(docid))?;
+            }
+            log::debug!("There are {} entries in the HNSW so far", hnsw.len());
+            index.put_vector_hnsw(wtxn, &hnsw)?;
+        }
        TypedChunk::ScriptLanguageDocids(hash_pair) => {
            let mut buffer = Vec::new();
            for (key, value) in hash_pair {
--- a/milli/src/update/mod.rs
+++ b/milli/src/update/mod.rs
@@ -4,8 +4,7 @@ pub use self::delete_documents::{DeleteDocuments, DeletionStrategy, DocumentDele
 pub use self::facet::bulk::FacetsUpdateBulk;
 pub use self::facet::incremental::FacetsUpdateIncrementalInner;
 pub use self::index_documents::{
-    merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, DocumentAdditionResult, DocumentId,
-    IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, MergeFn,
+    DocumentAdditionResult, DocumentId, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod,
 };
 pub use self::indexer_config::IndexerConfig;
 pub use self::prefix_word_pairs::{
Author	SHA1	Message	Date
Kerollmops	dd01613a63	Remove the unused distance	2023-06-14 16:37:14 +02:00
Kerollmops	70d975b399	Introduce a new error message for invalid vector dimensions	2023-06-14 16:36:58 +02:00
Kerollmops	a8e6d946a7	Make clippy happy	2023-06-14 15:59:10 +02:00
Kerollmops	7c1f72ae33	Fix the tests	2023-06-14 15:57:31 +02:00
Kerollmops	442a8f44c6	Support more pages but in an ugly way	2023-06-14 15:53:39 +02:00
Kerollmops	185a238c77	Change the name of the distance module	2023-06-14 15:53:39 +02:00
Kerollmops	a82bf776f3	Implement an ugly deletion of values in the HNSW	2023-06-14 15:53:39 +02:00
Kerollmops	b2f86df127	Replace the euclidean with a dot product	2023-06-14 15:53:39 +02:00
Kerollmops	c3a5f51705	Use a basic euclidean distance function	2023-06-14 15:53:39 +02:00
Kerollmops	686d1f4c12	Move back to the hnsw crate This reverts commit 7a4b6c065482f988b01298642f4c18775503f92f.	2023-06-14 15:53:39 +02:00
Kerollmops	ba75606731	Log more to make sure we insert vectors in the hgg data-structure	2023-06-14 15:53:38 +02:00
Kerollmops	baf3b036d9	Introduce an optimized version of the euclidean distance function	2023-06-14 15:53:38 +02:00
Kerollmops	0d499f0055	Move to the hgg crate	2023-06-14 15:53:38 +02:00
Clément Renault	7999c397c5	Expose a new vector field on the search route	2023-06-14 15:53:38 +02:00
Clément Renault	c44db8b4bc	Add a vector field to the search routes	2023-06-14 15:53:38 +02:00
Clément Renault	9466949e34	Store the vectors in an HNSW in LMDB	2023-06-14 15:53:38 +02:00
Clément Renault	f051bbfd84	Extract the vectors from the documents	2023-06-14 15:52:43 +02:00
Clément Renault	72b1c3df08	Create a new _vector extractor	2023-06-14 15:52:43 +02:00
meili-bors[bot]	01d2ee5cc1	Merge #3836 3836: Remove trailing whitespace in snapshots r=dureuill a=dureuill # Pull Request ## Related issue No issue, maintenance ## What does this PR do? - Remove trailing whitespace in snapshots by adding a trailing `\|` at the end of lines that would previously end with fixed-width integers - This allows contributors whose editor is configured to remove trailing whitespace not to modify the tests when changing an unrelated part of the file containing the tests Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2023-06-14 13:00:52 +00:00
Louis Dureuil	e0c4682758	Fix tests	2023-06-14 13:30:52 +02:00
Louis Dureuil	d9b4b39922	Add trailing pipe to the snapshots so it doesn't end with trailing whitespace	2023-06-14 13:30:52 +02:00