mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 21:46:27 +00:00 
			
		
		
		
	Merge #4754
4754: bring back v1.9.0 changes to main r=irevoire a=ManyTheFish Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com> Co-authored-by: Clément Renault <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
		
							
								
								
									
										2
									
								
								.github/workflows/test-suite.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/test-suite.yml
									
									
									
									
										vendored
									
									
								
							| @@ -104,7 +104,7 @@ jobs: | |||||||
|       - uses: helix-editor/rust-toolchain@v1 |       - uses: helix-editor/rust-toolchain@v1 | ||||||
|       - name: Run cargo tree without default features and check lindera is not present |       - name: Run cargo tree without default features and check lindera is not present | ||||||
|         run: | |         run: | | ||||||
|           if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -vqz lindera; then |           if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then | ||||||
|             echo "lindera has been found in the sources and it shouldn't" |             echo "lindera has been found in the sources and it shouldn't" | ||||||
|             exit 1 |             exit 1 | ||||||
|           fi |           fi | ||||||
|   | |||||||
							
								
								
									
										322
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										322
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -36,9 +36,9 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "actix-http" | name = "actix-http" | ||||||
| version = "3.6.0" | version = "3.7.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "d223b13fd481fc0d1f83bb12659ae774d9e3601814c68a0bc539731698cca743" | checksum = "4eb9843d84c775696c37d9a418bbb01b932629d01870722c0f13eb3f95e2536d" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "actix-codec", |  "actix-codec", | ||||||
|  "actix-rt", |  "actix-rt", | ||||||
| @@ -46,7 +46,7 @@ dependencies = [ | |||||||
|  "actix-tls", |  "actix-tls", | ||||||
|  "actix-utils", |  "actix-utils", | ||||||
|  "ahash", |  "ahash", | ||||||
|  "base64 0.21.7", |  "base64 0.22.1", | ||||||
|  "bitflags 2.5.0", |  "bitflags 2.5.0", | ||||||
|  "brotli", |  "brotli", | ||||||
|  "bytes", |  "bytes", | ||||||
| @@ -85,13 +85,15 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "actix-router" | name = "actix-router" | ||||||
| version = "0.5.1" | version = "0.5.3" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "d66ff4d247d2b160861fa2866457e85706833527840e4133f8f49aa423a38799" | checksum = "13d324164c51f63867b57e73ba5936ea151b8a41a1d23d1031eeb9f70d0236f8" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "bytestring", |  "bytestring", | ||||||
|  |  "cfg-if", | ||||||
|  "http 0.2.11", |  "http 0.2.11", | ||||||
|  "regex", |  "regex", | ||||||
|  |  "regex-lite", | ||||||
|  "serde", |  "serde", | ||||||
|  "tracing", |  "tracing", | ||||||
| ] | ] | ||||||
| @@ -138,9 +140,9 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "actix-tls" | name = "actix-tls" | ||||||
| version = "3.3.0" | version = "3.4.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "d4cce60a2f2b477bc72e5cde0af1812a6e82d8fd85b5570a5dcf2a5bf2c5be5f" | checksum = "ac453898d866cdbecdbc2334fe1738c747b4eba14a677261f2b768ba05329389" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "actix-rt", |  "actix-rt", | ||||||
|  "actix-service", |  "actix-service", | ||||||
| @@ -167,9 +169,9 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "actix-web" | name = "actix-web" | ||||||
| version = "4.5.1" | version = "4.6.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "43a6556ddebb638c2358714d853257ed226ece6023ef9364f23f0c70737ea984" | checksum = "b1cf67dadb19d7c95e5a299e2dda24193b89d5d4f33a3b9800888ede9e19aa32" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "actix-codec", |  "actix-codec", | ||||||
|  "actix-http", |  "actix-http", | ||||||
| @@ -196,7 +198,7 @@ dependencies = [ | |||||||
|  "mime", |  "mime", | ||||||
|  "once_cell", |  "once_cell", | ||||||
|  "pin-project-lite", |  "pin-project-lite", | ||||||
|  "regex", |  "regex-lite", | ||||||
|  "serde", |  "serde", | ||||||
|  "serde_json", |  "serde_json", | ||||||
|  "serde_urlencoded", |  "serde_urlencoded", | ||||||
| @@ -220,8 +222,9 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "actix-web-static-files" | name = "actix-web-static-files" | ||||||
| version = "3.0.5" | version = "4.0.1" | ||||||
| source = "git+https://github.com/kilork/actix-web-static-files.git?rev=2d3b6160#2d3b6160f0de4ba061c5d76b5704f34fb677f6df" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "adf6d1ef6d7a60e084f9e0595e2a5234abda14e76c105ecf8e2d0e8800c41a1f" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "actix-web", |  "actix-web", | ||||||
|  "derive_more", |  "derive_more", | ||||||
| @@ -378,9 +381,9 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "arroy" | name = "arroy" | ||||||
| version = "0.3.1" | version = "0.4.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "73897699bf04bac935c0b120990d2a511e91e563e0f9769f9c8bb983d98dfbc9" | checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "bytemuck", |  "bytemuck", | ||||||
|  "byteorder", |  "byteorder", | ||||||
| @@ -613,9 +616,9 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "brotli" | name = "brotli" | ||||||
| version = "3.4.0" | version = "6.0.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f" | checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "alloc-no-stdlib", |  "alloc-no-stdlib", | ||||||
|  "alloc-stdlib", |  "alloc-stdlib", | ||||||
| @@ -624,9 +627,9 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "brotli-decompressor" | name = "brotli-decompressor" | ||||||
| version = "2.5.1" | version = "4.0.1" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" | checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "alloc-no-stdlib", |  "alloc-no-stdlib", | ||||||
|  "alloc-stdlib", |  "alloc-stdlib", | ||||||
| @@ -676,9 +679,9 @@ checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "bytemuck" | name = "bytemuck" | ||||||
| version = "1.15.0" | version = "1.16.1" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" | checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "bytemuck_derive", |  "bytemuck_derive", | ||||||
| ] | ] | ||||||
| @@ -895,9 +898,9 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "charabia" | name = "charabia" | ||||||
| version = "0.8.10" | version = "0.8.11" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "933f20f2269b24d32fd5503e7b3c268af902190daf8d9d2b73ed2e75d77c00b4" | checksum = "11a09ae38cfcc153f01576c3f579dfd916e0320f1b474f298c8d680b2dd92eb6" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "aho-corasick", |  "aho-corasick", | ||||||
|  "cow-utils", |  "cow-utils", | ||||||
| @@ -986,7 +989,7 @@ dependencies = [ | |||||||
|  "anstream", |  "anstream", | ||||||
|  "anstyle", |  "anstyle", | ||||||
|  "clap_lex", |  "clap_lex", | ||||||
|  "strsim", |  "strsim 0.10.0", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| @@ -1277,12 +1280,12 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "darling" | name = "darling" | ||||||
| version = "0.20.3" | version = "0.20.9" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e" | checksum = "83b2eb4d90d12bdda5ed17de686c2acb4c57914f8f921b8da7e112b5a36f3fe1" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "darling_core 0.20.3", |  "darling_core 0.20.9", | ||||||
|  "darling_macro 0.20.3", |  "darling_macro 0.20.9", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| @@ -1295,21 +1298,21 @@ dependencies = [ | |||||||
|  "ident_case", |  "ident_case", | ||||||
|  "proc-macro2", |  "proc-macro2", | ||||||
|  "quote", |  "quote", | ||||||
|  "strsim", |  "strsim 0.10.0", | ||||||
|  "syn 1.0.109", |  "syn 1.0.109", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "darling_core" | name = "darling_core" | ||||||
| version = "0.20.3" | version = "0.20.9" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621" | checksum = "622687fe0bac72a04e5599029151f5796111b90f1baaa9b544d807a5e31cd120" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "fnv", |  "fnv", | ||||||
|  "ident_case", |  "ident_case", | ||||||
|  "proc-macro2", |  "proc-macro2", | ||||||
|  "quote", |  "quote", | ||||||
|  "strsim", |  "strsim 0.11.1", | ||||||
|  "syn 2.0.60", |  "syn 2.0.60", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| @@ -1326,11 +1329,11 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "darling_macro" | name = "darling_macro" | ||||||
| version = "0.20.3" | version = "0.20.9" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" | checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "darling_core 0.20.3", |  "darling_core 0.20.9", | ||||||
|  "quote", |  "quote", | ||||||
|  "syn 2.0.60", |  "syn 2.0.60", | ||||||
| ] | ] | ||||||
| @@ -1383,6 +1386,15 @@ dependencies = [ | |||||||
|  "derive_builder_macro 0.13.1", |  "derive_builder_macro 0.13.1", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "derive_builder" | ||||||
|  | version = "0.20.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7" | ||||||
|  | dependencies = [ | ||||||
|  |  "derive_builder_macro 0.20.0", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "derive_builder_core" | name = "derive_builder_core" | ||||||
| version = "0.12.0" | version = "0.12.0" | ||||||
| @@ -1407,6 +1419,18 @@ dependencies = [ | |||||||
|  "syn 1.0.109", |  "syn 1.0.109", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "derive_builder_core" | ||||||
|  | version = "0.20.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d" | ||||||
|  | dependencies = [ | ||||||
|  |  "darling 0.20.9", | ||||||
|  |  "proc-macro2", | ||||||
|  |  "quote", | ||||||
|  |  "syn 2.0.60", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "derive_builder_macro" | name = "derive_builder_macro" | ||||||
| version = "0.12.0" | version = "0.12.0" | ||||||
| @@ -1427,6 +1451,16 @@ dependencies = [ | |||||||
|  "syn 1.0.109", |  "syn 1.0.109", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "derive_builder_macro" | ||||||
|  | version = "0.20.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" | ||||||
|  | dependencies = [ | ||||||
|  |  "derive_builder_core 0.20.0", | ||||||
|  |  "syn 2.0.60", | ||||||
|  | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "derive_more" | name = "derive_more" | ||||||
| version = "0.99.17" | version = "0.99.17" | ||||||
| @@ -1454,7 +1488,7 @@ dependencies = [ | |||||||
|  "serde-cs", |  "serde-cs", | ||||||
|  "serde_json", |  "serde_json", | ||||||
|  "serde_urlencoded", |  "serde_urlencoded", | ||||||
|  "strsim", |  "strsim 0.10.0", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| @@ -1707,29 +1741,6 @@ dependencies = [ | |||||||
|  "syn 2.0.60", |  "syn 2.0.60", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] |  | ||||||
| name = "env_filter" |  | ||||||
| version = "0.1.0" |  | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" |  | ||||||
| checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea" |  | ||||||
| dependencies = [ |  | ||||||
|  "log", |  | ||||||
|  "regex", |  | ||||||
| ] |  | ||||||
|  |  | ||||||
| [[package]] |  | ||||||
| name = "env_logger" |  | ||||||
| version = "0.11.3" |  | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" |  | ||||||
| checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9" |  | ||||||
| dependencies = [ |  | ||||||
|  "anstream", |  | ||||||
|  "anstyle", |  | ||||||
|  "env_filter", |  | ||||||
|  "humantime", |  | ||||||
|  "log", |  | ||||||
| ] |  | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "equivalent" | name = "equivalent" | ||||||
| version = "1.0.1" | version = "1.0.1" | ||||||
| @@ -1784,7 +1795,7 @@ version = "0.1.10" | |||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "d15473d7f83b54a44826907af16ae5727eaacaf6e53b51474016d3efd9aa35d5" | checksum = "d15473d7f83b54a44826907af16ae5727eaacaf6e53b51474016d3efd9aa35d5" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "darling 0.20.3", |  "darling 0.20.9", | ||||||
|  "proc-macro2", |  "proc-macro2", | ||||||
|  "quote", |  "quote", | ||||||
|  "syn 2.0.60", |  "syn 2.0.60", | ||||||
| @@ -2262,9 +2273,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "heed" | name = "heed" | ||||||
| version = "0.20.1" | version = "0.20.2" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "6f7acb9683d7c7068aa46d47557bfa4e35a277964b350d9504a87b03610163fd" | checksum = "f60d7cff16094be9627830b399c087a25017e93fb3768b87cd656a68ccb1ebe8" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "bitflags 2.5.0", |  "bitflags 2.5.0", | ||||||
|  "byteorder", |  "byteorder", | ||||||
| @@ -2379,12 +2390,6 @@ version = "1.0.2" | |||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" | checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" | ||||||
|  |  | ||||||
| [[package]] |  | ||||||
| name = "humantime" |  | ||||||
| version = "2.1.0" |  | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" |  | ||||||
| checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" |  | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "hyper" | name = "hyper" | ||||||
| version = "0.14.27" | version = "0.14.27" | ||||||
| @@ -2450,6 +2455,7 @@ name = "index-scheduler" | |||||||
| version = "1.9.0" | version = "1.9.0" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "anyhow", |  "anyhow", | ||||||
|  |  "arroy", | ||||||
|  "big_s", |  "big_s", | ||||||
|  "bincode", |  "bincode", | ||||||
|  "crossbeam", |  "crossbeam", | ||||||
| @@ -2460,6 +2466,7 @@ dependencies = [ | |||||||
|  "file-store", |  "file-store", | ||||||
|  "flate2", |  "flate2", | ||||||
|  "insta", |  "insta", | ||||||
|  |  "maplit", | ||||||
|  "meili-snap", |  "meili-snap", | ||||||
|  "meilisearch-auth", |  "meilisearch-auth", | ||||||
|  "meilisearch-types", |  "meilisearch-types", | ||||||
| @@ -2778,9 +2785,9 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lindera" | name = "lindera" | ||||||
| version = "0.30.0" | version = "0.31.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "a1bbf252ea3490053dc397539ece0b510924f2f72605fa28d3e858d86f43ec88" | checksum = "dcd4fa369654517f72c10b24adf03ad4ce69d19facb79c3cb3cf9b4580ac352f" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "lindera-analyzer", |  "lindera-analyzer", | ||||||
|  "lindera-core", |  "lindera-core", | ||||||
| @@ -2791,9 +2798,9 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lindera-analyzer" | name = "lindera-analyzer" | ||||||
| version = "0.30.0" | version = "0.31.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "87febfec0e2859ce2154fb90dd6f66b774ddb0b6e264b44f8e3d1303c9dcedd7" | checksum = "c2cba7fe275cb8ec4c594cfee9cc39e48b71e02a089457d52f3e70dc146a8133" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "anyhow", |  "anyhow", | ||||||
|  "bincode", |  "bincode", | ||||||
| @@ -2821,9 +2828,9 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lindera-cc-cedict" | name = "lindera-cc-cedict" | ||||||
| version = "0.30.0" | version = "0.31.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "fcb91bb8a93ab0f95dbc3c43b5105354bb059134ef731154f75a64b5d919e71d" | checksum = "240adf9faba3f09ad16557aefcd316dd00ebb940ac94334a629660d772f118c1" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "bincode", |  "bincode", | ||||||
|  "byteorder", |  "byteorder", | ||||||
| @@ -2835,29 +2842,21 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lindera-cc-cedict-builder" | name = "lindera-cc-cedict-builder" | ||||||
| version = "0.30.0" | version = "0.31.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "f6022a8309a287dbef425fd09a61585351670c83001d74f6c089979e2330b683" | checksum = "f12241f9e74babe708a0b9441d9f3fa67cb29fd01257918f30ffd480ca568820" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "anyhow", |  "anyhow", | ||||||
|  "bincode", |  | ||||||
|  "byteorder", |  | ||||||
|  "csv", |  | ||||||
|  "encoding", |  | ||||||
|  "env_logger", |  | ||||||
|  "glob", |  | ||||||
|  "lindera-compress", |  | ||||||
|  "lindera-core", |  "lindera-core", | ||||||
|  "lindera-decompress", |  "lindera-decompress", | ||||||
|  "log", |  "lindera-dictionary-builder", | ||||||
|  "yada", |  | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lindera-compress" | name = "lindera-compress" | ||||||
| version = "0.30.0" | version = "0.31.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "32363cbcf433f915e7d77c2a0c410db2d6b23442e80715cf2cf6b9864078a500" | checksum = "50f9f7a858d70ff9e4383cbd507ca9e98c8faf0319e08c10df4c30cb58c9ca6c" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "anyhow", |  "anyhow", | ||||||
|  "flate2", |  "flate2", | ||||||
| @@ -2866,9 +2865,9 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lindera-core" | name = "lindera-core" | ||||||
| version = "0.30.0" | version = "0.31.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "d9a0e858753a02b1a3524fae4fbb11ca4b3a947128fd7854b797386562678be8" | checksum = "7f09810ab98ce2a084d788ac38fbb7b31697f34bc47c61de0d880320a674bd15" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "anyhow", |  "anyhow", | ||||||
|  "bincode", |  "bincode", | ||||||
| @@ -2883,9 +2882,9 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lindera-decompress" | name = "lindera-decompress" | ||||||
| version = "0.30.0" | version = "0.31.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "0e406345f6f8b665b9a129c67079c18ca9d97e9d171d102b4106a64a592c285e" | checksum = "d53400c9b2dd6b45f82d9fa5b5efe079f3acaf6ce609dba8d42c8a76baaa2b12" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "anyhow", |  "anyhow", | ||||||
|  "flate2", |  "flate2", | ||||||
| @@ -2894,9 +2893,9 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lindera-dictionary" | name = "lindera-dictionary" | ||||||
| version = "0.30.0" | version = "0.31.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "3e2a3ec0e5fd6768a27c6ec1040e8470d3a5926418f7afe065859e98aabb3bfe" | checksum = "2053d064a515839250438b8dfa6cf445e2b97633232ded34a54f267e945d196e" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "anyhow", |  "anyhow", | ||||||
|  "bincode", |  "bincode", | ||||||
| @@ -2918,10 +2917,32 @@ dependencies = [ | |||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lindera-filter" | name = "lindera-dictionary-builder" | ||||||
| version = "0.30.0" | version = "0.31.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "1badaf51bad051185ea4917ba91bbbf2d6f8167e155647e21e0eaaef0982a95d" | checksum = "14f486924055f8bedcc5877572e4dc91fbc10370862430ac2e5f7f0d671a18c8" | ||||||
|  | dependencies = [ | ||||||
|  |  "anyhow", | ||||||
|  |  "bincode", | ||||||
|  |  "byteorder", | ||||||
|  |  "csv", | ||||||
|  |  "derive_builder 0.20.0", | ||||||
|  |  "encoding", | ||||||
|  |  "encoding_rs", | ||||||
|  |  "encoding_rs_io", | ||||||
|  |  "glob", | ||||||
|  |  "lindera-compress", | ||||||
|  |  "lindera-core", | ||||||
|  |  "lindera-decompress", | ||||||
|  |  "log", | ||||||
|  |  "yada", | ||||||
|  | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "lindera-filter" | ||||||
|  | version = "0.31.0" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "bb3904fc279f0297f6fd6210435adab1f8c82ba84eba8635407c791af51c0d8a" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "anyhow", |  "anyhow", | ||||||
|  "csv", |  "csv", | ||||||
| @@ -2944,9 +2965,9 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lindera-ipadic" | name = "lindera-ipadic" | ||||||
| version = "0.30.0" | version = "0.31.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "129ec16366354998f9791467ad38731539197747f649e573ead845358271ce25" | checksum = "4aa3ef2f1f6838b0fa2e2fca2896242bb83bc877c1760cdb6fa23449ab95d664" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "bincode", |  "bincode", | ||||||
|  "byteorder", |  "byteorder", | ||||||
| @@ -2958,31 +2979,21 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lindera-ipadic-builder" | name = "lindera-ipadic-builder" | ||||||
| version = "0.30.0" | version = "0.31.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "7f0979a56bc57e9c9be2996dff232c47aa146a2e7baebf5dd567e388eba3dd90" | checksum = "a41287db18eadb58d73a04d49778d41c161549fbbbe155d4338976b7b8541c7d" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "anyhow", |  "anyhow", | ||||||
|  "bincode", |  | ||||||
|  "byteorder", |  | ||||||
|  "csv", |  | ||||||
|  "encoding_rs", |  | ||||||
|  "encoding_rs_io", |  | ||||||
|  "env_logger", |  | ||||||
|  "glob", |  | ||||||
|  "lindera-compress", |  | ||||||
|  "lindera-core", |  "lindera-core", | ||||||
|  "lindera-decompress", |  "lindera-decompress", | ||||||
|  "log", |  "lindera-dictionary-builder", | ||||||
|  "serde", |  | ||||||
|  "yada", |  | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lindera-ipadic-neologd" | name = "lindera-ipadic-neologd" | ||||||
| version = "0.30.0" | version = "0.31.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "20076660c4e79ef0316735b44e18ec7644e54786acdee8946c972d5f97086d0f" | checksum = "49382256f245078400bf7e72663f9eb30afcd9ed54cd46f29d7db1be529678e1" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "bincode", |  "bincode", | ||||||
|  "byteorder", |  "byteorder", | ||||||
| @@ -2994,31 +3005,21 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lindera-ipadic-neologd-builder" | name = "lindera-ipadic-neologd-builder" | ||||||
| version = "0.30.0" | version = "0.31.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "eccd18ed5f65d1d64ac0cbfa1d6827bfbbaf6530520ae6847e6a91ee38f47e20" | checksum = "5ae9cfd2fda68ef526ef0c7b50c5d4d5582a4daa6ecd0cea9e2b0b62564a2a5d" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "anyhow", |  "anyhow", | ||||||
|  "bincode", |  | ||||||
|  "byteorder", |  | ||||||
|  "csv", |  | ||||||
|  "encoding_rs", |  | ||||||
|  "encoding_rs_io", |  | ||||||
|  "env_logger", |  | ||||||
|  "glob", |  | ||||||
|  "lindera-compress", |  | ||||||
|  "lindera-core", |  "lindera-core", | ||||||
|  "lindera-decompress", |  "lindera-decompress", | ||||||
|  "log", |  "lindera-dictionary-builder", | ||||||
|  "serde", |  | ||||||
|  "yada", |  | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lindera-ko-dic" | name = "lindera-ko-dic" | ||||||
| version = "0.30.0" | version = "0.31.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "59073171566c3e498ca048e84c2d0a7e117a42f36c8eb7d7163e65ac38bd6d48" | checksum = "7f86d03a863f3ae1d269e7b7d4dd2cce9385a53463479bafc5d7aa48719f36db" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "bincode", |  "bincode", | ||||||
|  "byteorder", |  "byteorder", | ||||||
| @@ -3034,29 +3035,21 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lindera-ko-dic-builder" | name = "lindera-ko-dic-builder" | ||||||
| version = "0.30.0" | version = "0.31.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "ae176afa8535ca2a5ee9471873f85d531db0a6c32a3c42b41084506aac22b577" | checksum = "bd0f44f2e56358c5879dfb5e7f76cc6ba7853ec31082c4e3f8fb65fb2d849c51" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "anyhow", |  "anyhow", | ||||||
|  "bincode", |  | ||||||
|  "byteorder", |  | ||||||
|  "csv", |  | ||||||
|  "encoding", |  | ||||||
|  "env_logger", |  | ||||||
|  "glob", |  | ||||||
|  "lindera-compress", |  | ||||||
|  "lindera-core", |  "lindera-core", | ||||||
|  "lindera-decompress", |  "lindera-decompress", | ||||||
|  "log", |  "lindera-dictionary-builder", | ||||||
|  "yada", |  | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lindera-tokenizer" | name = "lindera-tokenizer" | ||||||
| version = "0.30.0" | version = "0.31.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "457285bdde84571aa510c9e05371904305a55e8a541fa1473d4393062f06932d" | checksum = "7c5182735cdc2832ac757b31e8a5b150a3514357a30efe3dec212f8dcb06ba14" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "bincode", |  "bincode", | ||||||
|  "lindera-core", |  "lindera-core", | ||||||
| @@ -3068,9 +3061,9 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lindera-unidic" | name = "lindera-unidic" | ||||||
| version = "0.30.0" | version = "0.31.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "5839980be552dfa639b70964c61914a9ad014148663679b0e148aa72e5e30f23" | checksum = "6c63da104728dd1cf14bfa564753cbfa996f6078ed2e23e31475bd1d639fc597" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "bincode", |  "bincode", | ||||||
|  "byteorder", |  "byteorder", | ||||||
| @@ -3086,22 +3079,14 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lindera-unidic-builder" | name = "lindera-unidic-builder" | ||||||
| version = "0.30.0" | version = "0.31.0" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "dcaab8f061d5b944b1e424f49c7efbf8f276e8a72e4f4ff956d01e46d481f008" | checksum = "04acecbc068dac21766a1b7ed1f2608b6f250d10b4f8bff67abc2a00437a0974" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "anyhow", |  "anyhow", | ||||||
|  "bincode", |  | ||||||
|  "byteorder", |  | ||||||
|  "csv", |  | ||||||
|  "encoding", |  | ||||||
|  "env_logger", |  | ||||||
|  "glob", |  | ||||||
|  "lindera-compress", |  | ||||||
|  "lindera-core", |  "lindera-core", | ||||||
|  "lindera-decompress", |  "lindera-decompress", | ||||||
|  "log", |  "lindera-dictionary-builder", | ||||||
|  "yada", |  | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| @@ -3187,9 +3172,9 @@ checksum = "f9d642685b028806386b2b6e75685faadd3eb65a85fff7df711ce18446a422da" | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "lmdb-master-sys" | name = "lmdb-master-sys" | ||||||
| version = "0.2.0" | version = "0.2.1" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "dc9048db3a58c0732d7236abc4909058f9d2708cfb6d7d047eb895fddec6419a" | checksum = "a5142795c220effa4c8f4813537bd4c88113a07e45e93100ccb2adc5cec6c7f3" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "cc", |  "cc", | ||||||
|  "doxygen-rs", |  "doxygen-rs", | ||||||
| @@ -4340,6 +4325,12 @@ dependencies = [ | |||||||
|  "regex-syntax", |  "regex-syntax", | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "regex-lite" | ||||||
|  | version = "0.1.5" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "regex-syntax" | name = "regex-syntax" | ||||||
| version = "0.8.2" | version = "0.8.2" | ||||||
| @@ -4388,12 +4379,6 @@ dependencies = [ | |||||||
|  "winreg", |  "winreg", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] |  | ||||||
| name = "retain_mut" |  | ||||||
| version = "0.1.7" |  | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" |  | ||||||
| checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086" |  | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "ring" | name = "ring" | ||||||
| version = "0.17.8" | version = "0.17.8" | ||||||
| @@ -4411,13 +4396,12 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "roaring" | name = "roaring" | ||||||
| version = "0.10.2" | version = "0.10.5" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "6106b5cf8587f5834158895e9715a3c6c9716c8aefab57f1f7680917191c7873" | checksum = "7699249cc2c7d71939f30868f47e9d7add0bdc030d90ee10bfd16887ff8bb1c8" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "bytemuck", |  "bytemuck", | ||||||
|  "byteorder", |  "byteorder", | ||||||
|  "retain_mut", |  | ||||||
|  "serde", |  "serde", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| @@ -4900,6 +4884,12 @@ version = "0.10.0" | |||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" | ||||||
|  |  | ||||||
|  | [[package]] | ||||||
|  | name = "strsim" | ||||||
|  | version = "0.11.1" | ||||||
|  | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
|  | checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "strum" | name = "strum" | ||||||
| version = "0.26.2" | version = "0.26.2" | ||||||
| @@ -5313,9 +5303,9 @@ dependencies = [ | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "tracing-actix-web" | name = "tracing-actix-web" | ||||||
| version = "0.7.9" | version = "0.7.11" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "1fe0d5feac3f4ca21ba33496bcb1ccab58cca6412b1405ae80f0581541e0ca78" | checksum = "4ee9e39a66d9b615644893ffc1704d2a89b5b315b7fd0228ad3182ca9a306b19" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "actix-web", |  "actix-web", | ||||||
|  "mutually_exclusive_features", |  "mutually_exclusive_features", | ||||||
|   | |||||||
| @@ -780,7 +780,7 @@ expression: document | |||||||
|           1.3484878540039063 |           1.3484878540039063 | ||||||
|         ] |         ] | ||||||
|       ], |       ], | ||||||
|       "userProvided": false |       "regenerate": true | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -779,7 +779,7 @@ expression: document | |||||||
|           1.04031240940094 |           1.04031240940094 | ||||||
|         ] |         ] | ||||||
|       ], |       ], | ||||||
|       "userProvided": false |       "regenerate": true | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| } | } | ||||||
|   | |||||||
										
											Binary file not shown.
										
									
								
							| @@ -40,7 +40,9 @@ ureq = "2.9.7" | |||||||
| uuid = { version = "1.6.1", features = ["serde", "v4"] } | uuid = { version = "1.6.1", features = ["serde", "v4"] } | ||||||
|  |  | ||||||
| [dev-dependencies] | [dev-dependencies] | ||||||
|  | arroy = "0.4.0" | ||||||
| big_s = "1.0.2" | big_s = "1.0.2" | ||||||
| crossbeam = "0.8.4" | crossbeam = "0.8.4" | ||||||
| insta = { version = "1.34.0", features = ["json", "redactions"] } | insta = { version = "1.34.0", features = ["json", "redactions"] } | ||||||
|  | maplit = "1.0.2" | ||||||
| meili-snap = { path = "../meili-snap" } | meili-snap = { path = "../meili-snap" } | ||||||
|   | |||||||
| @@ -909,6 +909,7 @@ impl IndexScheduler { | |||||||
|  |  | ||||||
|                     let fields_ids_map = index.fields_ids_map(&rtxn)?; |                     let fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||||
|                     let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); |                     let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); | ||||||
|  |                     let embedding_configs = index.embedding_configs(&rtxn)?; | ||||||
|  |  | ||||||
|                     // 3.1. Dump the documents |                     // 3.1. Dump the documents | ||||||
|                     for ret in index.all_documents(&rtxn)? { |                     for ret in index.all_documents(&rtxn)? { | ||||||
| @@ -951,16 +952,21 @@ impl IndexScheduler { | |||||||
|                             }; |                             }; | ||||||
|  |  | ||||||
|                             for (embedder_name, embeddings) in embeddings { |                             for (embedder_name, embeddings) in embeddings { | ||||||
|                                 // don't change the entry if it already exists, because it was user-provided |                                 let user_provided = embedding_configs | ||||||
|                                 vectors.entry(embedder_name).or_insert_with(|| { |                                     .iter() | ||||||
|  |                                     .find(|conf| conf.name == embedder_name) | ||||||
|  |                                     .is_some_and(|conf| conf.user_provided.contains(id)); | ||||||
|  |  | ||||||
|                                 let embeddings = ExplicitVectors { |                                 let embeddings = ExplicitVectors { | ||||||
|                                         embeddings: VectorOrArrayOfVectors::from_array_of_vectors( |                                     embeddings: Some( | ||||||
|                                             embeddings, |                                         VectorOrArrayOfVectors::from_array_of_vectors(embeddings), | ||||||
|                                     ), |                                     ), | ||||||
|                                         user_provided: false, |                                     regenerate: !user_provided, | ||||||
|                                 }; |                                 }; | ||||||
|                                     serde_json::to_value(embeddings).unwrap() |                                 vectors.insert( | ||||||
|                                 }); |                                     embedder_name, | ||||||
|  |                                     serde_json::to_value(embeddings).unwrap(), | ||||||
|  |                                 ); | ||||||
|                             } |                             } | ||||||
|                         } |                         } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -53,6 +53,7 @@ use meilisearch_types::heed::byteorder::BE; | |||||||
| use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128}; | use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128}; | ||||||
| use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn}; | use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn}; | ||||||
| use meilisearch_types::milli::documents::DocumentsBatchBuilder; | use meilisearch_types::milli::documents::DocumentsBatchBuilder; | ||||||
|  | use meilisearch_types::milli::index::IndexEmbeddingConfig; | ||||||
| use meilisearch_types::milli::update::IndexerConfig; | use meilisearch_types::milli::update::IndexerConfig; | ||||||
| use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs}; | use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs}; | ||||||
| use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; | use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; | ||||||
| @@ -1459,11 +1460,16 @@ impl IndexScheduler { | |||||||
|     // TODO: consider using a type alias or a struct embedder/template |     // TODO: consider using a type alias or a struct embedder/template | ||||||
|     pub fn embedders( |     pub fn embedders( | ||||||
|         &self, |         &self, | ||||||
|         embedding_configs: Vec<(String, milli::vector::EmbeddingConfig)>, |         embedding_configs: Vec<IndexEmbeddingConfig>, | ||||||
|     ) -> Result<EmbeddingConfigs> { |     ) -> Result<EmbeddingConfigs> { | ||||||
|         let res: Result<_> = embedding_configs |         let res: Result<_> = embedding_configs | ||||||
|             .into_iter() |             .into_iter() | ||||||
|             .map(|(name, milli::vector::EmbeddingConfig { embedder_options, prompt })| { |             .map( | ||||||
|  |                 |IndexEmbeddingConfig { | ||||||
|  |                      name, | ||||||
|  |                      config: milli::vector::EmbeddingConfig { embedder_options, prompt }, | ||||||
|  |                      .. | ||||||
|  |                  }| { | ||||||
|                     let prompt = |                     let prompt = | ||||||
|                         Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?); |                         Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?); | ||||||
|                     // optimistically return existing embedder |                     // optimistically return existing embedder | ||||||
| @@ -1485,7 +1491,8 @@ impl IndexScheduler { | |||||||
|                         embedders.insert(embedder_options, embedder.clone()); |                         embedders.insert(embedder_options, embedder.clone()); | ||||||
|                     } |                     } | ||||||
|                     Ok((name, (embedder, prompt))) |                     Ok((name, (embedder, prompt))) | ||||||
|             }) |                 }, | ||||||
|  |             ) | ||||||
|             .collect(); |             .collect(); | ||||||
|         res.map(EmbeddingConfigs::new) |         res.map(EmbeddingConfigs::new) | ||||||
|     } |     } | ||||||
| @@ -1748,6 +1755,9 @@ mod tests { | |||||||
|     use meilisearch_types::milli::update::IndexDocumentsMethod::{ |     use meilisearch_types::milli::update::IndexDocumentsMethod::{ | ||||||
|         ReplaceDocuments, UpdateDocuments, |         ReplaceDocuments, UpdateDocuments, | ||||||
|     }; |     }; | ||||||
|  |     use meilisearch_types::milli::update::Setting; | ||||||
|  |     use meilisearch_types::milli::vector::settings::EmbeddingSettings; | ||||||
|  |     use meilisearch_types::settings::Unchecked; | ||||||
|     use meilisearch_types::tasks::IndexSwap; |     use meilisearch_types::tasks::IndexSwap; | ||||||
|     use meilisearch_types::VERSION_FILE_NAME; |     use meilisearch_types::VERSION_FILE_NAME; | ||||||
|     use tempfile::{NamedTempFile, TempDir}; |     use tempfile::{NamedTempFile, TempDir}; | ||||||
| @@ -1826,6 +1836,7 @@ mod tests { | |||||||
|             assert_eq!(breakpoint, (Init, false)); |             assert_eq!(breakpoint, (Init, false)); | ||||||
|             let index_scheduler_handle = IndexSchedulerHandle { |             let index_scheduler_handle = IndexSchedulerHandle { | ||||||
|                 _tempdir: tempdir, |                 _tempdir: tempdir, | ||||||
|  |                 index_scheduler: index_scheduler.private_clone(), | ||||||
|                 test_breakpoint_rcv: receiver, |                 test_breakpoint_rcv: receiver, | ||||||
|                 last_breakpoint: breakpoint.0, |                 last_breakpoint: breakpoint.0, | ||||||
|             }; |             }; | ||||||
| @@ -1914,6 +1925,7 @@ mod tests { | |||||||
|  |  | ||||||
|     pub struct IndexSchedulerHandle { |     pub struct IndexSchedulerHandle { | ||||||
|         _tempdir: TempDir, |         _tempdir: TempDir, | ||||||
|  |         index_scheduler: IndexScheduler, | ||||||
|         test_breakpoint_rcv: crossbeam::channel::Receiver<(Breakpoint, bool)>, |         test_breakpoint_rcv: crossbeam::channel::Receiver<(Breakpoint, bool)>, | ||||||
|         last_breakpoint: Breakpoint, |         last_breakpoint: Breakpoint, | ||||||
|     } |     } | ||||||
| @@ -1931,9 +1943,13 @@ mod tests { | |||||||
|             { |             { | ||||||
|                 Ok(b) => b, |                 Ok(b) => b, | ||||||
|                 Err(RecvTimeoutError::Timeout) => { |                 Err(RecvTimeoutError::Timeout) => { | ||||||
|                     panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.") |                     let state = snapshot_index_scheduler(&self.index_scheduler); | ||||||
|  |                     panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.\n{state}") | ||||||
|  |                 } | ||||||
|  |                 Err(RecvTimeoutError::Disconnected) => { | ||||||
|  |                     let state = snapshot_index_scheduler(&self.index_scheduler); | ||||||
|  |                     panic!("The scheduler crashed.\n{state}") | ||||||
|                 } |                 } | ||||||
|                 Err(RecvTimeoutError::Disconnected) => panic!("The scheduler crashed."), |  | ||||||
|             }; |             }; | ||||||
|             // if we've already encountered a breakpoint we're supposed to be stuck on the false |             // if we've already encountered a breakpoint we're supposed to be stuck on the false | ||||||
|             // and we expect the same variant with the true to come now. |             // and we expect the same variant with the true to come now. | ||||||
| @@ -1952,9 +1968,13 @@ mod tests { | |||||||
|             { |             { | ||||||
|                 Ok(b) => b, |                 Ok(b) => b, | ||||||
|                 Err(RecvTimeoutError::Timeout) => { |                 Err(RecvTimeoutError::Timeout) => { | ||||||
|                     panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.") |                     let state = snapshot_index_scheduler(&self.index_scheduler); | ||||||
|  |                     panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.\n{state}") | ||||||
|  |                 } | ||||||
|  |                 Err(RecvTimeoutError::Disconnected) => { | ||||||
|  |                     let state = snapshot_index_scheduler(&self.index_scheduler); | ||||||
|  |                     panic!("The scheduler crashed.\n{state}") | ||||||
|                 } |                 } | ||||||
|                 Err(RecvTimeoutError::Disconnected) => panic!("The scheduler crashed."), |  | ||||||
|             }; |             }; | ||||||
|             assert!(!b, "Found the breakpoint handle in a bad state. Check your test suite"); |             assert!(!b, "Found the breakpoint handle in a bad state. Check your test suite"); | ||||||
|  |  | ||||||
| @@ -1968,9 +1988,10 @@ mod tests { | |||||||
|         fn advance_till(&mut self, breakpoints: impl IntoIterator<Item = Breakpoint>) { |         fn advance_till(&mut self, breakpoints: impl IntoIterator<Item = Breakpoint>) { | ||||||
|             for breakpoint in breakpoints { |             for breakpoint in breakpoints { | ||||||
|                 let b = self.advance(); |                 let b = self.advance(); | ||||||
|  |                 let state = snapshot_index_scheduler(&self.index_scheduler); | ||||||
|                 assert_eq!( |                 assert_eq!( | ||||||
|                     b, breakpoint, |                     b, breakpoint, | ||||||
|                     "Was expecting the breakpoint `{:?}` but instead got `{:?}`.", |                     "Was expecting the breakpoint `{:?}` but instead got `{:?}`.\n{state}", | ||||||
|                     breakpoint, b |                     breakpoint, b | ||||||
|                 ); |                 ); | ||||||
|             } |             } | ||||||
| @@ -1995,6 +2016,7 @@ mod tests { | |||||||
|         // Wait for one successful batch. |         // Wait for one successful batch. | ||||||
|         #[track_caller] |         #[track_caller] | ||||||
|         fn advance_one_successful_batch(&mut self) { |         fn advance_one_successful_batch(&mut self) { | ||||||
|  |             self.index_scheduler.assert_internally_consistent(); | ||||||
|             self.advance_till([Start, BatchCreated]); |             self.advance_till([Start, BatchCreated]); | ||||||
|             loop { |             loop { | ||||||
|                 match self.advance() { |                 match self.advance() { | ||||||
| @@ -2003,13 +2025,17 @@ mod tests { | |||||||
|                     InsideProcessBatch => (), |                     InsideProcessBatch => (), | ||||||
|                     // the batch went successfully, we can stop the loop and go on with the next states. |                     // the batch went successfully, we can stop the loop and go on with the next states. | ||||||
|                     ProcessBatchSucceeded => break, |                     ProcessBatchSucceeded => break, | ||||||
|                     AbortedIndexation => panic!("The batch was aborted."), |                     AbortedIndexation => panic!("The batch was aborted.\n{}", snapshot_index_scheduler(&self.index_scheduler)), | ||||||
|                     ProcessBatchFailed => panic!("The batch failed."), |                     ProcessBatchFailed => { | ||||||
|  |                         while self.advance() != Start {} | ||||||
|  |                         panic!("The batch failed.\n{}", snapshot_index_scheduler(&self.index_scheduler)) | ||||||
|  |                     }, | ||||||
|                     breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint), |                     breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint), | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             self.advance_till([AfterProcessing]); |             self.advance_till([AfterProcessing]); | ||||||
|  |             self.index_scheduler.assert_internally_consistent(); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Wait for one failed batch. |         // Wait for one failed batch. | ||||||
| @@ -2023,8 +2049,8 @@ mod tests { | |||||||
|                     InsideProcessBatch => (), |                     InsideProcessBatch => (), | ||||||
|                     // the batch went failed, we can stop the loop and go on with the next states. |                     // the batch went failed, we can stop the loop and go on with the next states. | ||||||
|                     ProcessBatchFailed => break, |                     ProcessBatchFailed => break, | ||||||
|                     ProcessBatchSucceeded => panic!("The batch succeeded. (and it wasn't supposed to sorry)"), |                     ProcessBatchSucceeded => panic!("The batch succeeded. (and it wasn't supposed to sorry)\n{}", snapshot_index_scheduler(&self.index_scheduler)), | ||||||
|                     AbortedIndexation => panic!("The batch was aborted."), |                     AbortedIndexation => panic!("The batch was aborted.\n{}", snapshot_index_scheduler(&self.index_scheduler)), | ||||||
|                     breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint), |                     breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint), | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
| @@ -3052,8 +3078,10 @@ mod tests { | |||||||
|         let rtxn = index.read_txn().unwrap(); |         let rtxn = index.read_txn().unwrap(); | ||||||
|  |  | ||||||
|         let configs = index.embedding_configs(&rtxn).unwrap(); |         let configs = index.embedding_configs(&rtxn).unwrap(); | ||||||
|         let (_, embedding_config) = configs.first().unwrap(); |         let IndexEmbeddingConfig { name, config, user_provided } = configs.first().unwrap(); | ||||||
|         insta::assert_json_snapshot!(embedding_config.embedder_options); |         insta::assert_snapshot!(name, @"default"); | ||||||
|  |         insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); | ||||||
|  |         insta::assert_json_snapshot!(config.embedder_options); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
| @@ -4989,7 +5017,6 @@ mod tests { | |||||||
|                 false, |                 false, | ||||||
|             ) |             ) | ||||||
|             .unwrap(); |             .unwrap(); | ||||||
|         index_scheduler.assert_internally_consistent(); |  | ||||||
|  |  | ||||||
|         snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task_vectors"); |         snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task_vectors"); | ||||||
|  |  | ||||||
| @@ -5000,7 +5027,7 @@ mod tests { | |||||||
|             insta::assert_json_snapshot!(task.details); |             insta::assert_json_snapshot!(task.details); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         handle.advance_n_successful_batches(1); |         handle.advance_one_successful_batch(); | ||||||
|         snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed_vectors"); |         snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed_vectors"); | ||||||
|  |  | ||||||
|         { |         { | ||||||
| @@ -5017,13 +5044,17 @@ mod tests { | |||||||
|             let configs = index.embedding_configs(&rtxn).unwrap(); |             let configs = index.embedding_configs(&rtxn).unwrap(); | ||||||
|             // for consistency with the below |             // for consistency with the below | ||||||
|             #[allow(clippy::get_first)] |             #[allow(clippy::get_first)] | ||||||
|             let (name, fakerest_config) = configs.get(0).unwrap(); |             let IndexEmbeddingConfig { name, config: fakerest_config, user_provided } = | ||||||
|             insta::assert_json_snapshot!(name, @r###""A_fakerest""###); |                 configs.get(0).unwrap(); | ||||||
|  |             insta::assert_snapshot!(name, @"A_fakerest"); | ||||||
|  |             insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); | ||||||
|             insta::assert_json_snapshot!(fakerest_config.embedder_options); |             insta::assert_json_snapshot!(fakerest_config.embedder_options); | ||||||
|             let fakerest_name = name.clone(); |             let fakerest_name = name.clone(); | ||||||
|  |  | ||||||
|             let (name, simple_hf_config) = configs.get(1).unwrap(); |             let IndexEmbeddingConfig { name, config: simple_hf_config, user_provided } = | ||||||
|             insta::assert_json_snapshot!(name, @r###""B_small_hf""###); |                 configs.get(1).unwrap(); | ||||||
|  |             insta::assert_snapshot!(name, @"B_small_hf"); | ||||||
|  |             insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); | ||||||
|             insta::assert_json_snapshot!(simple_hf_config.embedder_options); |             insta::assert_json_snapshot!(simple_hf_config.embedder_options); | ||||||
|             let simple_hf_name = name.clone(); |             let simple_hf_name = name.clone(); | ||||||
|  |  | ||||||
| @@ -5046,12 +5077,12 @@ mod tests { | |||||||
|                     &fakerest_name: { |                     &fakerest_name: { | ||||||
|                         // this will never trigger regeneration, which is good because we can't actually generate with |                         // this will never trigger regeneration, which is good because we can't actually generate with | ||||||
|                         // this embedder |                         // this embedder | ||||||
|                                 "userProvided": true, |                         "regenerate": false, | ||||||
|                         "embeddings": beagle_embed, |                         "embeddings": beagle_embed, | ||||||
|                     }, |                     }, | ||||||
|                     &simple_hf_name: { |                     &simple_hf_name: { | ||||||
|                         // this will be regenerated on updates |                         // this will be regenerated on updates | ||||||
|                                 "userProvided": false, |                         "regenerate": true, | ||||||
|                         "embeddings": lab_embed, |                         "embeddings": lab_embed, | ||||||
|                     }, |                     }, | ||||||
|                     "noise": [0.1, 0.2, 0.3] |                     "noise": [0.1, 0.2, 0.3] | ||||||
| @@ -5078,7 +5109,6 @@ mod tests { | |||||||
|                 false, |                 false, | ||||||
|             ) |             ) | ||||||
|             .unwrap(); |             .unwrap(); | ||||||
|         index_scheduler.assert_internally_consistent(); |  | ||||||
|  |  | ||||||
|         snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after adding Intel"); |         snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after adding Intel"); | ||||||
|  |  | ||||||
| @@ -5091,6 +5121,19 @@ mod tests { | |||||||
|             let index = index_scheduler.index("doggos").unwrap(); |             let index = index_scheduler.index("doggos").unwrap(); | ||||||
|             let rtxn = index.read_txn().unwrap(); |             let rtxn = index.read_txn().unwrap(); | ||||||
|  |  | ||||||
|  |             // Ensure the document have been inserted into the relevant bitamp | ||||||
|  |             let configs = index.embedding_configs(&rtxn).unwrap(); | ||||||
|  |             // for consistency with the below | ||||||
|  |             #[allow(clippy::get_first)] | ||||||
|  |             let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } = | ||||||
|  |                 configs.get(0).unwrap(); | ||||||
|  |             insta::assert_snapshot!(name, @"A_fakerest"); | ||||||
|  |             insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>"); | ||||||
|  |  | ||||||
|  |             let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap(); | ||||||
|  |             insta::assert_snapshot!(name, @"B_small_hf"); | ||||||
|  |             insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); | ||||||
|  |  | ||||||
|             let embeddings = index.embeddings(&rtxn, 0).unwrap(); |             let embeddings = index.embeddings(&rtxn, 0).unwrap(); | ||||||
|  |  | ||||||
|             assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true"); |             assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true"); | ||||||
| @@ -5140,7 +5183,6 @@ mod tests { | |||||||
|                 false, |                 false, | ||||||
|             ) |             ) | ||||||
|             .unwrap(); |             .unwrap(); | ||||||
|         index_scheduler.assert_internally_consistent(); |  | ||||||
|  |  | ||||||
|         snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir"); |         snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir"); | ||||||
|  |  | ||||||
| @@ -5153,11 +5195,25 @@ mod tests { | |||||||
|                 let index = index_scheduler.index("doggos").unwrap(); |                 let index = index_scheduler.index("doggos").unwrap(); | ||||||
|                 let rtxn = index.read_txn().unwrap(); |                 let rtxn = index.read_txn().unwrap(); | ||||||
|  |  | ||||||
|  |                 // Ensure the document have been inserted into the relevant bitamp | ||||||
|  |                 let configs = index.embedding_configs(&rtxn).unwrap(); | ||||||
|  |                 // for consistency with the below | ||||||
|  |                 #[allow(clippy::get_first)] | ||||||
|  |                 let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } = | ||||||
|  |                     configs.get(0).unwrap(); | ||||||
|  |                 insta::assert_snapshot!(name, @"A_fakerest"); | ||||||
|  |                 insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>"); | ||||||
|  |  | ||||||
|  |                 let IndexEmbeddingConfig { name, config: _, user_provided } = | ||||||
|  |                     configs.get(1).unwrap(); | ||||||
|  |                 insta::assert_snapshot!(name, @"B_small_hf"); | ||||||
|  |                 insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>"); | ||||||
|  |  | ||||||
|                 let embeddings = index.embeddings(&rtxn, 0).unwrap(); |                 let embeddings = index.embeddings(&rtxn, 0).unwrap(); | ||||||
|  |  | ||||||
|                 // automatically changed to patou |                 // automatically changed to patou because set to regenerate | ||||||
|                 assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true"); |                 assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true"); | ||||||
|                 // remained beagle because set to userProvided |                 // remained beagle | ||||||
|                 assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true"); |                 assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true"); | ||||||
|  |  | ||||||
|                 let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1; |                 let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1; | ||||||
| @@ -5176,4 +5232,578 @@ mod tests { | |||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn import_vectors_first_and_embedder_later() { | ||||||
|  |         let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); | ||||||
|  |  | ||||||
|  |         let content = serde_json::json!( | ||||||
|  |             [ | ||||||
|  |                 { | ||||||
|  |                     "id": 0, | ||||||
|  |                     "doggo": "kefir", | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                     "id": 1, | ||||||
|  |                     "doggo": "intel", | ||||||
|  |                     "_vectors": { | ||||||
|  |                         "my_doggo_embedder": vec![1; 384], | ||||||
|  |                         "unknown embedder": vec![1, 2, 3], | ||||||
|  |                     } | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                     "id": 2, | ||||||
|  |                     "doggo": "max", | ||||||
|  |                     "_vectors": { | ||||||
|  |                         "my_doggo_embedder": { | ||||||
|  |                             "regenerate": false, | ||||||
|  |                             "embeddings": vec![2; 384], | ||||||
|  |                         }, | ||||||
|  |                         "unknown embedder": vec![4, 5], | ||||||
|  |                     }, | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                     "id": 3, | ||||||
|  |                     "doggo": "marcel", | ||||||
|  |                     "_vectors": { | ||||||
|  |                         "my_doggo_embedder": { | ||||||
|  |                             "regenerate": true, | ||||||
|  |                             "embeddings": vec![3; 384], | ||||||
|  |                         }, | ||||||
|  |                     }, | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                     "id": 4, | ||||||
|  |                     "doggo": "sora", | ||||||
|  |                     "_vectors": { | ||||||
|  |                         "my_doggo_embedder": { | ||||||
|  |                             "regenerate": true, | ||||||
|  |                         }, | ||||||
|  |                     }, | ||||||
|  |                 }, | ||||||
|  |             ] | ||||||
|  |         ); | ||||||
|  |  | ||||||
|  |         let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap(); | ||||||
|  |         let documents_count = | ||||||
|  |             read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file) | ||||||
|  |                 .unwrap(); | ||||||
|  |         snapshot!(documents_count, @"5"); | ||||||
|  |         file.persist().unwrap(); | ||||||
|  |  | ||||||
|  |         index_scheduler | ||||||
|  |             .register( | ||||||
|  |                 KindWithContent::DocumentAdditionOrUpdate { | ||||||
|  |                     index_uid: S("doggos"), | ||||||
|  |                     primary_key: None, | ||||||
|  |                     method: ReplaceDocuments, | ||||||
|  |                     content_file: uuid, | ||||||
|  |                     documents_count, | ||||||
|  |                     allow_index_creation: true, | ||||||
|  |                 }, | ||||||
|  |                 None, | ||||||
|  |                 false, | ||||||
|  |             ) | ||||||
|  |             .unwrap(); | ||||||
|  |         handle.advance_one_successful_batch(); | ||||||
|  |  | ||||||
|  |         let index = index_scheduler.index("doggos").unwrap(); | ||||||
|  |         let rtxn = index.read_txn().unwrap(); | ||||||
|  |         let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||||
|  |         let field_ids = field_ids_map.ids().collect::<Vec<_>>(); | ||||||
|  |         let documents = index | ||||||
|  |             .all_documents(&rtxn) | ||||||
|  |             .unwrap() | ||||||
|  |             .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) | ||||||
|  |             .collect::<Vec<_>>(); | ||||||
|  |         snapshot!(serde_json::to_string(&documents).unwrap(), name: "documents after initial push"); | ||||||
|  |  | ||||||
|  |         let setting = meilisearch_types::settings::Settings::<Unchecked> { | ||||||
|  |             embedders: Setting::Set(maplit::btreemap! { | ||||||
|  |                 S("my_doggo_embedder") => Setting::Set(EmbeddingSettings { | ||||||
|  |                     source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace), | ||||||
|  |                     model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")), | ||||||
|  |                     revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")), | ||||||
|  |                     document_template: Setting::Set(S("{{doc.doggo}}")), | ||||||
|  |                     ..Default::default() | ||||||
|  |                 }) | ||||||
|  |             }), | ||||||
|  |             ..Default::default() | ||||||
|  |         }; | ||||||
|  |         index_scheduler | ||||||
|  |             .register( | ||||||
|  |                 KindWithContent::SettingsUpdate { | ||||||
|  |                     index_uid: S("doggos"), | ||||||
|  |                     new_settings: Box::new(setting), | ||||||
|  |                     is_deletion: false, | ||||||
|  |                     allow_index_creation: false, | ||||||
|  |                 }, | ||||||
|  |                 None, | ||||||
|  |                 false, | ||||||
|  |             ) | ||||||
|  |             .unwrap(); | ||||||
|  |         index_scheduler.assert_internally_consistent(); | ||||||
|  |         handle.advance_one_successful_batch(); | ||||||
|  |         index_scheduler.assert_internally_consistent(); | ||||||
|  |  | ||||||
|  |         let index = index_scheduler.index("doggos").unwrap(); | ||||||
|  |         let rtxn = index.read_txn().unwrap(); | ||||||
|  |         let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||||
|  |         let field_ids = field_ids_map.ids().collect::<Vec<_>>(); | ||||||
|  |         let documents = index | ||||||
|  |             .all_documents(&rtxn) | ||||||
|  |             .unwrap() | ||||||
|  |             .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) | ||||||
|  |             .collect::<Vec<_>>(); | ||||||
|  |         // the all the vectors linked to the new specified embedder have been removed | ||||||
|  |         // Only the unknown embedders stays in the document DB | ||||||
|  |         snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###); | ||||||
|  |         let conf = index.embedding_configs(&rtxn).unwrap(); | ||||||
|  |         // even though we specified the vector for the ID 3, it shouldn't be marked | ||||||
|  |         // as user provided since we explicitely marked it as NOT user provided. | ||||||
|  |         snapshot!(format!("{conf:#?}"), @r###" | ||||||
|  |         [ | ||||||
|  |             IndexEmbeddingConfig { | ||||||
|  |                 name: "my_doggo_embedder", | ||||||
|  |                 config: EmbeddingConfig { | ||||||
|  |                     embedder_options: HuggingFace( | ||||||
|  |                         EmbedderOptions { | ||||||
|  |                             model: "sentence-transformers/all-MiniLM-L6-v2", | ||||||
|  |                             revision: Some( | ||||||
|  |                                 "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", | ||||||
|  |                             ), | ||||||
|  |                             distribution: None, | ||||||
|  |                         }, | ||||||
|  |                     ), | ||||||
|  |                     prompt: PromptData { | ||||||
|  |                         template: "{{doc.doggo}}", | ||||||
|  |                     }, | ||||||
|  |                 }, | ||||||
|  |                 user_provided: RoaringBitmap<[1, 2]>, | ||||||
|  |             }, | ||||||
|  |         ] | ||||||
|  |         "###); | ||||||
|  |         let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); | ||||||
|  |         let embeddings = index.embeddings(&rtxn, docid).unwrap(); | ||||||
|  |         let embedding = &embeddings["my_doggo_embedder"]; | ||||||
|  |         assert!(!embedding.is_empty(), "{embedding:?}"); | ||||||
|  |  | ||||||
|  |         // the document with the id 3 should keep its original embedding | ||||||
|  |         let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap(); | ||||||
|  |         let mut embeddings = Vec::new(); | ||||||
|  |  | ||||||
|  |         'vectors: for i in 0..=u8::MAX { | ||||||
|  |             let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy) | ||||||
|  |                 .map(Some) | ||||||
|  |                 .or_else(|e| match e { | ||||||
|  |                     arroy::Error::MissingMetadata(_) => Ok(None), | ||||||
|  |                     e => Err(e), | ||||||
|  |                 }) | ||||||
|  |                 .transpose(); | ||||||
|  |  | ||||||
|  |             let Some(reader) = reader else { | ||||||
|  |                 break 'vectors; | ||||||
|  |             }; | ||||||
|  |  | ||||||
|  |             let embedding = reader.unwrap().item_vector(&rtxn, docid).unwrap(); | ||||||
|  |             if let Some(embedding) = embedding { | ||||||
|  |                 embeddings.push(embedding) | ||||||
|  |             } else { | ||||||
|  |                 break 'vectors; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         snapshot!(embeddings.len(), @"1"); | ||||||
|  |         assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]); | ||||||
|  |  | ||||||
|  |         // If we update marcel it should regenerate its embedding automatically | ||||||
|  |  | ||||||
|  |         let content = serde_json::json!( | ||||||
|  |             [ | ||||||
|  |                 { | ||||||
|  |                     "id": 3, | ||||||
|  |                     "doggo": "marvel", | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                     "id": 4, | ||||||
|  |                     "doggo": "sorry", | ||||||
|  |                 }, | ||||||
|  |             ] | ||||||
|  |         ); | ||||||
|  |  | ||||||
|  |         let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(1_u128).unwrap(); | ||||||
|  |         let documents_count = | ||||||
|  |             read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file) | ||||||
|  |                 .unwrap(); | ||||||
|  |         snapshot!(documents_count, @"2"); | ||||||
|  |         file.persist().unwrap(); | ||||||
|  |  | ||||||
|  |         index_scheduler | ||||||
|  |             .register( | ||||||
|  |                 KindWithContent::DocumentAdditionOrUpdate { | ||||||
|  |                     index_uid: S("doggos"), | ||||||
|  |                     primary_key: None, | ||||||
|  |                     method: UpdateDocuments, | ||||||
|  |                     content_file: uuid, | ||||||
|  |                     documents_count, | ||||||
|  |                     allow_index_creation: true, | ||||||
|  |                 }, | ||||||
|  |                 None, | ||||||
|  |                 false, | ||||||
|  |             ) | ||||||
|  |             .unwrap(); | ||||||
|  |         handle.advance_one_successful_batch(); | ||||||
|  |  | ||||||
|  |         // the document with the id 3 should have its original embedding updated | ||||||
|  |         let rtxn = index.read_txn().unwrap(); | ||||||
|  |         let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap(); | ||||||
|  |         let doc = index.documents(&rtxn, Some(docid)).unwrap()[0]; | ||||||
|  |         let doc = obkv_to_json(&field_ids, &field_ids_map, doc.1).unwrap(); | ||||||
|  |         snapshot!(json_string!(doc), @r###" | ||||||
|  |         { | ||||||
|  |           "id": 3, | ||||||
|  |           "doggo": "marvel" | ||||||
|  |         } | ||||||
|  |         "###); | ||||||
|  |  | ||||||
|  |         let embeddings = index.embeddings(&rtxn, docid).unwrap(); | ||||||
|  |         let embedding = &embeddings["my_doggo_embedder"]; | ||||||
|  |  | ||||||
|  |         assert!(!embedding.is_empty()); | ||||||
|  |         assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]); | ||||||
|  |  | ||||||
|  |         // the document with the id 4 should generate an embedding | ||||||
|  |         let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap(); | ||||||
|  |         let embeddings = index.embeddings(&rtxn, docid).unwrap(); | ||||||
|  |         let embedding = &embeddings["my_doggo_embedder"]; | ||||||
|  |  | ||||||
|  |         assert!(!embedding.is_empty()); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn delete_document_containing_vector() { | ||||||
|  |         // 1. Add an embedder | ||||||
|  |         // 2. Push two documents containing a simple vector | ||||||
|  |         // 3. Delete the first document | ||||||
|  |         // 4. The user defined roaring bitmap shouldn't contains the id of the first document anymore | ||||||
|  |         // 5. Clear the index | ||||||
|  |         // 6. The user defined roaring bitmap shouldn't contains the id of the second document | ||||||
|  |         let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); | ||||||
|  |  | ||||||
|  |         let setting = meilisearch_types::settings::Settings::<Unchecked> { | ||||||
|  |             embedders: Setting::Set(maplit::btreemap! { | ||||||
|  |                 S("manual") => Setting::Set(EmbeddingSettings { | ||||||
|  |                     source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided), | ||||||
|  |                     dimensions: Setting::Set(3), | ||||||
|  |                     ..Default::default() | ||||||
|  |                 }) | ||||||
|  |             }), | ||||||
|  |             ..Default::default() | ||||||
|  |         }; | ||||||
|  |         index_scheduler | ||||||
|  |             .register( | ||||||
|  |                 KindWithContent::SettingsUpdate { | ||||||
|  |                     index_uid: S("doggos"), | ||||||
|  |                     new_settings: Box::new(setting), | ||||||
|  |                     is_deletion: false, | ||||||
|  |                     allow_index_creation: true, | ||||||
|  |                 }, | ||||||
|  |                 None, | ||||||
|  |                 false, | ||||||
|  |             ) | ||||||
|  |             .unwrap(); | ||||||
|  |         handle.advance_one_successful_batch(); | ||||||
|  |  | ||||||
|  |         let content = serde_json::json!( | ||||||
|  |             [ | ||||||
|  |                 { | ||||||
|  |                     "id": 0, | ||||||
|  |                     "doggo": "kefir", | ||||||
|  |                     "_vectors": { | ||||||
|  |                         "manual": vec![0, 0, 0], | ||||||
|  |                     } | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                     "id": 1, | ||||||
|  |                     "doggo": "intel", | ||||||
|  |                     "_vectors": { | ||||||
|  |                         "manual": vec![1, 1, 1], | ||||||
|  |                     } | ||||||
|  |                 }, | ||||||
|  |             ] | ||||||
|  |         ); | ||||||
|  |  | ||||||
|  |         let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap(); | ||||||
|  |         let documents_count = | ||||||
|  |             read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file) | ||||||
|  |                 .unwrap(); | ||||||
|  |         snapshot!(documents_count, @"2"); | ||||||
|  |         file.persist().unwrap(); | ||||||
|  |  | ||||||
|  |         index_scheduler | ||||||
|  |             .register( | ||||||
|  |                 KindWithContent::DocumentAdditionOrUpdate { | ||||||
|  |                     index_uid: S("doggos"), | ||||||
|  |                     primary_key: None, | ||||||
|  |                     method: ReplaceDocuments, | ||||||
|  |                     content_file: uuid, | ||||||
|  |                     documents_count, | ||||||
|  |                     allow_index_creation: false, | ||||||
|  |                 }, | ||||||
|  |                 None, | ||||||
|  |                 false, | ||||||
|  |             ) | ||||||
|  |             .unwrap(); | ||||||
|  |         handle.advance_one_successful_batch(); | ||||||
|  |  | ||||||
|  |         index_scheduler | ||||||
|  |             .register( | ||||||
|  |                 KindWithContent::DocumentDeletion { | ||||||
|  |                     index_uid: S("doggos"), | ||||||
|  |                     documents_ids: vec![S("1")], | ||||||
|  |                 }, | ||||||
|  |                 None, | ||||||
|  |                 false, | ||||||
|  |             ) | ||||||
|  |             .unwrap(); | ||||||
|  |         handle.advance_one_successful_batch(); | ||||||
|  |  | ||||||
|  |         let index = index_scheduler.index("doggos").unwrap(); | ||||||
|  |         let rtxn = index.read_txn().unwrap(); | ||||||
|  |         let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||||
|  |         let field_ids = field_ids_map.ids().collect::<Vec<_>>(); | ||||||
|  |         let documents = index | ||||||
|  |             .all_documents(&rtxn) | ||||||
|  |             .unwrap() | ||||||
|  |             .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) | ||||||
|  |             .collect::<Vec<_>>(); | ||||||
|  |         snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###); | ||||||
|  |         let conf = index.embedding_configs(&rtxn).unwrap(); | ||||||
|  |         snapshot!(format!("{conf:#?}"), @r###" | ||||||
|  |         [ | ||||||
|  |             IndexEmbeddingConfig { | ||||||
|  |                 name: "manual", | ||||||
|  |                 config: EmbeddingConfig { | ||||||
|  |                     embedder_options: UserProvided( | ||||||
|  |                         EmbedderOptions { | ||||||
|  |                             dimensions: 3, | ||||||
|  |                             distribution: None, | ||||||
|  |                         }, | ||||||
|  |                     ), | ||||||
|  |                     prompt: PromptData { | ||||||
|  |                         template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}", | ||||||
|  |                     }, | ||||||
|  |                 }, | ||||||
|  |                 user_provided: RoaringBitmap<[0]>, | ||||||
|  |             }, | ||||||
|  |         ] | ||||||
|  |         "###); | ||||||
|  |         let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); | ||||||
|  |         let embeddings = index.embeddings(&rtxn, docid).unwrap(); | ||||||
|  |         let embedding = &embeddings["manual"]; | ||||||
|  |         assert!(!embedding.is_empty(), "{embedding:?}"); | ||||||
|  |  | ||||||
|  |         index_scheduler | ||||||
|  |             .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) | ||||||
|  |             .unwrap(); | ||||||
|  |         handle.advance_one_successful_batch(); | ||||||
|  |  | ||||||
|  |         let index = index_scheduler.index("doggos").unwrap(); | ||||||
|  |         let rtxn = index.read_txn().unwrap(); | ||||||
|  |         let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||||
|  |         let field_ids = field_ids_map.ids().collect::<Vec<_>>(); | ||||||
|  |         let documents = index | ||||||
|  |             .all_documents(&rtxn) | ||||||
|  |             .unwrap() | ||||||
|  |             .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) | ||||||
|  |             .collect::<Vec<_>>(); | ||||||
|  |         snapshot!(serde_json::to_string(&documents).unwrap(), @"[]"); | ||||||
|  |         let conf = index.embedding_configs(&rtxn).unwrap(); | ||||||
|  |         snapshot!(format!("{conf:#?}"), @r###" | ||||||
|  |         [ | ||||||
|  |             IndexEmbeddingConfig { | ||||||
|  |                 name: "manual", | ||||||
|  |                 config: EmbeddingConfig { | ||||||
|  |                     embedder_options: UserProvided( | ||||||
|  |                         EmbedderOptions { | ||||||
|  |                             dimensions: 3, | ||||||
|  |                             distribution: None, | ||||||
|  |                         }, | ||||||
|  |                     ), | ||||||
|  |                     prompt: PromptData { | ||||||
|  |                         template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}", | ||||||
|  |                     }, | ||||||
|  |                 }, | ||||||
|  |                 user_provided: RoaringBitmap<[]>, | ||||||
|  |             }, | ||||||
|  |         ] | ||||||
|  |         "###); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn delete_embedder_with_user_provided_vectors() { | ||||||
|  |         // 1. Add two embedders | ||||||
|  |         // 2. Push two documents containing a simple vector | ||||||
|  |         // 3. The documents must not contain the vectors after the update as they are in the vectors db | ||||||
|  |         // 3. Delete the embedders | ||||||
|  |         // 4. The documents contain the vectors again | ||||||
|  |         let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); | ||||||
|  |  | ||||||
|  |         let setting = meilisearch_types::settings::Settings::<Unchecked> { | ||||||
|  |             embedders: Setting::Set(maplit::btreemap! { | ||||||
|  |                 S("manual") => Setting::Set(EmbeddingSettings { | ||||||
|  |                     source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided), | ||||||
|  |                     dimensions: Setting::Set(3), | ||||||
|  |                     ..Default::default() | ||||||
|  |                 }), | ||||||
|  |                 S("my_doggo_embedder") => Setting::Set(EmbeddingSettings { | ||||||
|  |                     source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace), | ||||||
|  |                     model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")), | ||||||
|  |                     revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")), | ||||||
|  |                     document_template: Setting::Set(S("{{doc.doggo}}")), | ||||||
|  |                     ..Default::default() | ||||||
|  |                 }), | ||||||
|  |             }), | ||||||
|  |             ..Default::default() | ||||||
|  |         }; | ||||||
|  |         index_scheduler | ||||||
|  |             .register( | ||||||
|  |                 KindWithContent::SettingsUpdate { | ||||||
|  |                     index_uid: S("doggos"), | ||||||
|  |                     new_settings: Box::new(setting), | ||||||
|  |                     is_deletion: false, | ||||||
|  |                     allow_index_creation: true, | ||||||
|  |                 }, | ||||||
|  |                 None, | ||||||
|  |                 false, | ||||||
|  |             ) | ||||||
|  |             .unwrap(); | ||||||
|  |         handle.advance_one_successful_batch(); | ||||||
|  |  | ||||||
|  |         let content = serde_json::json!( | ||||||
|  |             [ | ||||||
|  |                 { | ||||||
|  |                     "id": 0, | ||||||
|  |                     "doggo": "kefir", | ||||||
|  |                     "_vectors": { | ||||||
|  |                         "manual": vec![0, 0, 0], | ||||||
|  |                         "my_doggo_embedder": vec![1; 384], | ||||||
|  |                     } | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                     "id": 1, | ||||||
|  |                     "doggo": "intel", | ||||||
|  |                     "_vectors": { | ||||||
|  |                         "manual": vec![1, 1, 1], | ||||||
|  |                     } | ||||||
|  |                 }, | ||||||
|  |             ] | ||||||
|  |         ); | ||||||
|  |  | ||||||
|  |         let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap(); | ||||||
|  |         let documents_count = | ||||||
|  |             read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file) | ||||||
|  |                 .unwrap(); | ||||||
|  |         snapshot!(documents_count, @"2"); | ||||||
|  |         file.persist().unwrap(); | ||||||
|  |  | ||||||
|  |         index_scheduler | ||||||
|  |             .register( | ||||||
|  |                 KindWithContent::DocumentAdditionOrUpdate { | ||||||
|  |                     index_uid: S("doggos"), | ||||||
|  |                     primary_key: None, | ||||||
|  |                     method: ReplaceDocuments, | ||||||
|  |                     content_file: uuid, | ||||||
|  |                     documents_count, | ||||||
|  |                     allow_index_creation: false, | ||||||
|  |                 }, | ||||||
|  |                 None, | ||||||
|  |                 false, | ||||||
|  |             ) | ||||||
|  |             .unwrap(); | ||||||
|  |         handle.advance_one_successful_batch(); | ||||||
|  |  | ||||||
|  |         { | ||||||
|  |             let index = index_scheduler.index("doggos").unwrap(); | ||||||
|  |             let rtxn = index.read_txn().unwrap(); | ||||||
|  |             let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||||
|  |             let field_ids = field_ids_map.ids().collect::<Vec<_>>(); | ||||||
|  |             let documents = index | ||||||
|  |                 .all_documents(&rtxn) | ||||||
|  |                 .unwrap() | ||||||
|  |                 .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) | ||||||
|  |                 .collect::<Vec<_>>(); | ||||||
|  |             snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel"}]"###); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         { | ||||||
|  |             let setting = meilisearch_types::settings::Settings::<Unchecked> { | ||||||
|  |                 embedders: Setting::Set(maplit::btreemap! { | ||||||
|  |                     S("manual") => Setting::Reset, | ||||||
|  |                 }), | ||||||
|  |                 ..Default::default() | ||||||
|  |             }; | ||||||
|  |             index_scheduler | ||||||
|  |                 .register( | ||||||
|  |                     KindWithContent::SettingsUpdate { | ||||||
|  |                         index_uid: S("doggos"), | ||||||
|  |                         new_settings: Box::new(setting), | ||||||
|  |                         is_deletion: false, | ||||||
|  |                         allow_index_creation: true, | ||||||
|  |                     }, | ||||||
|  |                     None, | ||||||
|  |                     false, | ||||||
|  |                 ) | ||||||
|  |                 .unwrap(); | ||||||
|  |             handle.advance_one_successful_batch(); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         { | ||||||
|  |             let index = index_scheduler.index("doggos").unwrap(); | ||||||
|  |             let rtxn = index.read_txn().unwrap(); | ||||||
|  |             let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||||
|  |             let field_ids = field_ids_map.ids().collect::<Vec<_>>(); | ||||||
|  |             let documents = index | ||||||
|  |                 .all_documents(&rtxn) | ||||||
|  |                 .unwrap() | ||||||
|  |                 .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) | ||||||
|  |                 .collect::<Vec<_>>(); | ||||||
|  |             snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         { | ||||||
|  |             let setting = meilisearch_types::settings::Settings::<Unchecked> { | ||||||
|  |                 embedders: Setting::Reset, | ||||||
|  |                 ..Default::default() | ||||||
|  |             }; | ||||||
|  |             index_scheduler | ||||||
|  |                 .register( | ||||||
|  |                     KindWithContent::SettingsUpdate { | ||||||
|  |                         index_uid: S("doggos"), | ||||||
|  |                         new_settings: Box::new(setting), | ||||||
|  |                         is_deletion: false, | ||||||
|  |                         allow_index_creation: true, | ||||||
|  |                     }, | ||||||
|  |                     None, | ||||||
|  |                     false, | ||||||
|  |                 ) | ||||||
|  |                 .unwrap(); | ||||||
|  |             handle.advance_one_successful_batch(); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         { | ||||||
|  |             let index = index_scheduler.index("doggos").unwrap(); | ||||||
|  |             let rtxn = index.read_txn().unwrap(); | ||||||
|  |             let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||||
|  |             let field_ids = field_ids_map.ids().collect::<Vec<_>>(); | ||||||
|  |             let documents = index | ||||||
|  |                 .all_documents(&rtxn) | ||||||
|  |                 .unwrap() | ||||||
|  |                 .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) | ||||||
|  |                 .collect::<Vec<_>>(); | ||||||
|  |  | ||||||
|  |             // FIXME: redaction | ||||||
|  |             snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }),  @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"embeddings\":[[0.0,0.0,0.0]],\"regenerate\":false},\"my_doggo_embedder\":{\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]],\"regenerate\":false}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"embeddings\":[[1.0,1.0,1.0]],\"regenerate\":false}}}]""###); | ||||||
|  |         } | ||||||
|  |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -6,10 +6,6 @@ expression: doc | |||||||
|   "doggo": "Intel", |   "doggo": "Intel", | ||||||
|   "breed": "beagle", |   "breed": "beagle", | ||||||
|   "_vectors": { |   "_vectors": { | ||||||
|     "A_fakerest": { |  | ||||||
|       "embeddings": "[vector]", |  | ||||||
|       "userProvided": true |  | ||||||
|     }, |  | ||||||
|     "noise": [ |     "noise": [ | ||||||
|       0.1, |       0.1, | ||||||
|       0.2, |       0.2, | ||||||
| @@ -6,10 +6,6 @@ expression: doc | |||||||
|   "doggo": "kefir", |   "doggo": "kefir", | ||||||
|   "breed": "patou", |   "breed": "patou", | ||||||
|   "_vectors": { |   "_vectors": { | ||||||
|     "A_fakerest": { |  | ||||||
|       "embeddings": "[vector]", |  | ||||||
|       "userProvided": true |  | ||||||
|     }, |  | ||||||
|     "noise": [ |     "noise": [ | ||||||
|       0.1, |       0.1, | ||||||
|       0.2, |       0.2, | ||||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @@ -11,7 +11,7 @@ edition.workspace = true | |||||||
| license.workspace = true | license.workspace = true | ||||||
|  |  | ||||||
| [dependencies] | [dependencies] | ||||||
| actix-web = { version = "4.5.1", default-features = false } | actix-web = { version = "4.6.0", default-features = false } | ||||||
| anyhow = "1.0.79" | anyhow = "1.0.79" | ||||||
| convert_case = "0.6.0" | convert_case = "0.6.0" | ||||||
| csv = "1.3.0" | csv = "1.3.0" | ||||||
| @@ -30,7 +30,12 @@ serde_json = "1.0.111" | |||||||
| tar = "0.4.40" | tar = "0.4.40" | ||||||
| tempfile = "3.9.0" | tempfile = "3.9.0" | ||||||
| thiserror = "1.0.56" | thiserror = "1.0.56" | ||||||
| time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] } | time = { version = "0.3.31", features = [ | ||||||
|  |     "serde-well-known", | ||||||
|  |     "formatting", | ||||||
|  |     "parsing", | ||||||
|  |     "macros", | ||||||
|  | ] } | ||||||
| tokio = "1.35" | tokio = "1.35" | ||||||
| uuid = { version = "1.6.1", features = ["serde", "v4"] } | uuid = { version = "1.6.1", features = ["serde", "v4"] } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -189,4 +189,6 @@ merge_with_error_impl_take_error_message!(ParseTaskKindError); | |||||||
| merge_with_error_impl_take_error_message!(ParseTaskStatusError); | merge_with_error_impl_take_error_message!(ParseTaskStatusError); | ||||||
| merge_with_error_impl_take_error_message!(IndexUidFormatError); | merge_with_error_impl_take_error_message!(IndexUidFormatError); | ||||||
| merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio); | merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio); | ||||||
|  | merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold); | ||||||
|  | merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold); | ||||||
| merge_with_error_impl_take_error_message!(InvalidSimilarId); | merge_with_error_impl_take_error_message!(InvalidSimilarId); | ||||||
|   | |||||||
| @@ -222,6 +222,7 @@ InvalidApiKeyUid                      , InvalidRequest       , BAD_REQUEST ; | |||||||
| InvalidContentType                    , InvalidRequest       , UNSUPPORTED_MEDIA_TYPE ; | InvalidContentType                    , InvalidRequest       , UNSUPPORTED_MEDIA_TYPE ; | ||||||
| InvalidDocumentCsvDelimiter           , InvalidRequest       , BAD_REQUEST ; | InvalidDocumentCsvDelimiter           , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidDocumentFields                 , InvalidRequest       , BAD_REQUEST ; | InvalidDocumentFields                 , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidDocumentRetrieveVectors        , InvalidRequest       , BAD_REQUEST ; | ||||||
| MissingDocumentFilter                 , InvalidRequest       , BAD_REQUEST ; | MissingDocumentFilter                 , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidDocumentFilter                 , InvalidRequest       , BAD_REQUEST ; | InvalidDocumentFilter                 , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidDocumentGeoField               , InvalidRequest       , BAD_REQUEST ; | InvalidDocumentGeoField               , InvalidRequest       , BAD_REQUEST ; | ||||||
| @@ -240,7 +241,11 @@ InvalidSearchAttributesToSearchOn     , InvalidRequest       , BAD_REQUEST ; | |||||||
| InvalidSearchAttributesToCrop         , InvalidRequest       , BAD_REQUEST ; | InvalidSearchAttributesToCrop         , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSearchAttributesToHighlight    , InvalidRequest       , BAD_REQUEST ; | InvalidSearchAttributesToHighlight    , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSimilarAttributesToRetrieve    , InvalidRequest       , BAD_REQUEST ; | InvalidSimilarAttributesToRetrieve    , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidSimilarRetrieveVectors         , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSearchAttributesToRetrieve     , InvalidRequest       , BAD_REQUEST ; | InvalidSearchAttributesToRetrieve     , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidSearchRankingScoreThreshold    , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidSimilarRankingScoreThreshold   , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidSearchRetrieveVectors          , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSearchCropLength               , InvalidRequest       , BAD_REQUEST ; | InvalidSearchCropLength               , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSearchCropMarker               , InvalidRequest       , BAD_REQUEST ; | InvalidSearchCropMarker               , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSearchFacets                   , InvalidRequest       , BAD_REQUEST ; | InvalidSearchFacets                   , InvalidRequest       , BAD_REQUEST ; | ||||||
| @@ -268,6 +273,7 @@ InvalidSimilarShowRankingScore        , InvalidRequest       , BAD_REQUEST ; | |||||||
| InvalidSearchShowRankingScoreDetails  , InvalidRequest       , BAD_REQUEST ; | InvalidSearchShowRankingScoreDetails  , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSimilarShowRankingScoreDetails , InvalidRequest       , BAD_REQUEST ; | InvalidSimilarShowRankingScoreDetails , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSearchSort                     , InvalidRequest       , BAD_REQUEST ; | InvalidSearchSort                     , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidSearchDistinct                 , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSettingsDisplayedAttributes    , InvalidRequest       , BAD_REQUEST ; | InvalidSettingsDisplayedAttributes    , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSettingsDistinctAttribute      , InvalidRequest       , BAD_REQUEST ; | InvalidSettingsDistinctAttribute      , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSettingsProximityPrecision     , InvalidRequest       , BAD_REQUEST ; | InvalidSettingsProximityPrecision     , InvalidRequest       , BAD_REQUEST ; | ||||||
| @@ -379,6 +385,7 @@ impl ErrorCode for milli::Error { | |||||||
|                         Code::IndexPrimaryKeyMultipleCandidatesFound |                         Code::IndexPrimaryKeyMultipleCandidatesFound | ||||||
|                     } |                     } | ||||||
|                     UserError::PrimaryKeyCannotBeChanged(_) => Code::IndexPrimaryKeyAlreadyExists, |                     UserError::PrimaryKeyCannotBeChanged(_) => Code::IndexPrimaryKeyAlreadyExists, | ||||||
|  |                     UserError::InvalidDistinctAttribute { .. } => Code::InvalidSearchDistinct, | ||||||
|                     UserError::SortRankingRuleMissing => Code::InvalidSearchSort, |                     UserError::SortRankingRuleMissing => Code::InvalidSearchSort, | ||||||
|                     UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets, |                     UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets, | ||||||
|                     UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort, |                     UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort, | ||||||
| @@ -391,7 +398,8 @@ impl ErrorCode for milli::Error { | |||||||
|                     UserError::CriterionError(_) => Code::InvalidSettingsRankingRules, |                     UserError::CriterionError(_) => Code::InvalidSettingsRankingRules, | ||||||
|                     UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField, |                     UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField, | ||||||
|                     UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions, |                     UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions, | ||||||
|                     UserError::InvalidVectorsMapType { .. } => Code::InvalidVectorsType, |                     UserError::InvalidVectorsMapType { .. } | ||||||
|  |                     | UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType, | ||||||
|                     UserError::TooManyVectors(_, _) => Code::TooManyVectors, |                     UserError::TooManyVectors(_, _) => Code::TooManyVectors, | ||||||
|                     UserError::SortError(_) => Code::InvalidSearchSort, |                     UserError::SortError(_) => Code::InvalidSearchSort, | ||||||
|                     UserError::InvalidMinTypoWordLenSetting(_, _) => { |                     UserError::InvalidMinTypoWordLenSetting(_, _) => { | ||||||
| @@ -505,6 +513,21 @@ impl fmt::Display for deserr_codes::InvalidSimilarId { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | impl fmt::Display for deserr_codes::InvalidSearchRankingScoreThreshold { | ||||||
|  |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||||
|  |         write!( | ||||||
|  |             f, | ||||||
|  |             "the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`." | ||||||
|  |         ) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl fmt::Display for deserr_codes::InvalidSimilarRankingScoreThreshold { | ||||||
|  |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||||
|  |         deserr_codes::InvalidSearchRankingScoreThreshold.fmt(f) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| #[macro_export] | #[macro_export] | ||||||
| macro_rules! internal_error { | macro_rules! internal_error { | ||||||
|     ($target:ty : $($other:path), *) => { |     ($target:ty : $($other:path), *) => { | ||||||
|   | |||||||
| @@ -8,6 +8,7 @@ use std::str::FromStr; | |||||||
|  |  | ||||||
| use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef}; | use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef}; | ||||||
| use fst::IntoStreamer; | use fst::IntoStreamer; | ||||||
|  | use milli::index::IndexEmbeddingConfig; | ||||||
| use milli::proximity::ProximityPrecision; | use milli::proximity::ProximityPrecision; | ||||||
| use milli::update::Setting; | use milli::update::Setting; | ||||||
| use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET}; | use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET}; | ||||||
| @@ -672,7 +673,7 @@ pub fn settings( | |||||||
|     let embedders: BTreeMap<_, _> = index |     let embedders: BTreeMap<_, _> = index | ||||||
|         .embedding_configs(rtxn)? |         .embedding_configs(rtxn)? | ||||||
|         .into_iter() |         .into_iter() | ||||||
|         .map(|(name, config)| (name, Setting::Set(config.into()))) |         .map(|IndexEmbeddingConfig { name, config, .. }| (name, Setting::Set(config.into()))) | ||||||
|         .collect(); |         .collect(); | ||||||
|     let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) }; |     let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) }; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -14,20 +14,20 @@ default-run = "meilisearch" | |||||||
|  |  | ||||||
| [dependencies] | [dependencies] | ||||||
| actix-cors = "0.7.0" | actix-cors = "0.7.0" | ||||||
| actix-http = { version = "3.6.0", default-features = false, features = [ | actix-http = { version = "3.7.0", default-features = false, features = [ | ||||||
|     "compress-brotli", |     "compress-brotli", | ||||||
|     "compress-gzip", |     "compress-gzip", | ||||||
|     "rustls-0_21", |     "rustls-0_21", | ||||||
| ] } | ] } | ||||||
| actix-utils = "3.0.1" | actix-utils = "3.0.1" | ||||||
| actix-web = { version = "4.5.1", default-features = false, features = [ | actix-web = { version = "4.6.0", default-features = false, features = [ | ||||||
|     "macros", |     "macros", | ||||||
|     "compress-brotli", |     "compress-brotli", | ||||||
|     "compress-gzip", |     "compress-gzip", | ||||||
|     "cookies", |     "cookies", | ||||||
|     "rustls-0_21", |     "rustls-0_21", | ||||||
| ] } | ] } | ||||||
| actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true } | actix-web-static-files = { version = "4.0.1", optional = true } | ||||||
| anyhow = { version = "1.0.79", features = ["backtrace"] } | anyhow = { version = "1.0.79", features = ["backtrace"] } | ||||||
| async-stream = "0.3.5" | async-stream = "0.3.5" | ||||||
| async-trait = "0.1.77" | async-trait = "0.1.77" | ||||||
| @@ -104,13 +104,13 @@ url = { version = "2.5.0", features = ["serde"] } | |||||||
| tracing = "0.1.40" | tracing = "0.1.40" | ||||||
| tracing-subscriber = { version = "0.3.18", features = ["json"] } | tracing-subscriber = { version = "0.3.18", features = ["json"] } | ||||||
| tracing-trace = { version = "0.1.0", path = "../tracing-trace" } | tracing-trace = { version = "0.1.0", path = "../tracing-trace" } | ||||||
| tracing-actix-web = "0.7.9" | tracing-actix-web = "0.7.10" | ||||||
| build-info = { version = "1.7.0", path = "../build-info" } | build-info = { version = "1.7.0", path = "../build-info" } | ||||||
|  |  | ||||||
| [dev-dependencies] | [dev-dependencies] | ||||||
| actix-rt = "2.9.0" | actix-rt = "2.9.0" | ||||||
| assert-json-diff = "2.0.2" | assert-json-diff = "2.0.2" | ||||||
| brotli = "3.4.0" | brotli = "6.0.0" | ||||||
| insta = "1.34.0" | insta = "1.34.0" | ||||||
| manifest-dir-macros = "0.1.18" | manifest-dir-macros = "0.1.18" | ||||||
| maplit = "1.0.2" | maplit = "1.0.2" | ||||||
| @@ -158,5 +158,5 @@ vietnamese = ["meilisearch-types/vietnamese"] | |||||||
| swedish-recomposition = ["meilisearch-types/swedish-recomposition"] | swedish-recomposition = ["meilisearch-types/swedish-recomposition"] | ||||||
|  |  | ||||||
| [package.metadata.mini-dashboard] | [package.metadata.mini-dashboard] | ||||||
| assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip" | assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip" | ||||||
| sha1 = "e20cc9b390003c6c844f4b8bcc5c5013191a77ff" | sha1 = "592d1b5a3459d621d0aae1dded8fe3154f5c38fe" | ||||||
|   | |||||||
| @@ -74,8 +74,8 @@ pub enum DocumentDeletionKind { | |||||||
|  |  | ||||||
| #[derive(Copy, Clone, Debug, PartialEq, Eq)] | #[derive(Copy, Clone, Debug, PartialEq, Eq)] | ||||||
| pub enum DocumentFetchKind { | pub enum DocumentFetchKind { | ||||||
|     PerDocumentId, |     PerDocumentId { retrieve_vectors: bool }, | ||||||
|     Normal { with_filter: bool, limit: usize, offset: usize }, |     Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool }, | ||||||
| } | } | ||||||
|  |  | ||||||
| pub trait Analytics: Sync + Send { | pub trait Analytics: Sync + Send { | ||||||
|   | |||||||
| @@ -597,6 +597,9 @@ pub struct SearchAggregator { | |||||||
|     // every time a request has a filter, this field must be incremented by one |     // every time a request has a filter, this field must be incremented by one | ||||||
|     sort_total_number_of_criteria: usize, |     sort_total_number_of_criteria: usize, | ||||||
|  |  | ||||||
|  |     // distinct | ||||||
|  |     distinct: bool, | ||||||
|  |  | ||||||
|     // filter |     // filter | ||||||
|     filter_with_geo_radius: bool, |     filter_with_geo_radius: bool, | ||||||
|     filter_with_geo_bounding_box: bool, |     filter_with_geo_bounding_box: bool, | ||||||
| @@ -622,6 +625,7 @@ pub struct SearchAggregator { | |||||||
|     // Whether a non-default embedder was specified |     // Whether a non-default embedder was specified | ||||||
|     embedder: bool, |     embedder: bool, | ||||||
|     hybrid: bool, |     hybrid: bool, | ||||||
|  |     retrieve_vectors: bool, | ||||||
|  |  | ||||||
|     // every time a search is done, we increment the counter linked to the used settings |     // every time a search is done, we increment the counter linked to the used settings | ||||||
|     matching_strategy: HashMap<String, usize>, |     matching_strategy: HashMap<String, usize>, | ||||||
| @@ -648,6 +652,7 @@ pub struct SearchAggregator { | |||||||
|     // scoring |     // scoring | ||||||
|     show_ranking_score: bool, |     show_ranking_score: bool, | ||||||
|     show_ranking_score_details: bool, |     show_ranking_score_details: bool, | ||||||
|  |     ranking_score_threshold: bool, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl SearchAggregator { | impl SearchAggregator { | ||||||
| @@ -661,6 +666,7 @@ impl SearchAggregator { | |||||||
|             page, |             page, | ||||||
|             hits_per_page, |             hits_per_page, | ||||||
|             attributes_to_retrieve: _, |             attributes_to_retrieve: _, | ||||||
|  |             retrieve_vectors, | ||||||
|             attributes_to_crop: _, |             attributes_to_crop: _, | ||||||
|             crop_length, |             crop_length, | ||||||
|             attributes_to_highlight: _, |             attributes_to_highlight: _, | ||||||
| @@ -669,6 +675,7 @@ impl SearchAggregator { | |||||||
|             show_ranking_score_details, |             show_ranking_score_details, | ||||||
|             filter, |             filter, | ||||||
|             sort, |             sort, | ||||||
|  |             distinct, | ||||||
|             facets: _, |             facets: _, | ||||||
|             highlight_pre_tag, |             highlight_pre_tag, | ||||||
|             highlight_post_tag, |             highlight_post_tag, | ||||||
| @@ -676,6 +683,7 @@ impl SearchAggregator { | |||||||
|             matching_strategy, |             matching_strategy, | ||||||
|             attributes_to_search_on, |             attributes_to_search_on, | ||||||
|             hybrid, |             hybrid, | ||||||
|  |             ranking_score_threshold, | ||||||
|         } = query; |         } = query; | ||||||
|  |  | ||||||
|         let mut ret = Self::default(); |         let mut ret = Self::default(); | ||||||
| @@ -690,6 +698,8 @@ impl SearchAggregator { | |||||||
|             ret.sort_sum_of_criteria_terms = sort.len(); |             ret.sort_sum_of_criteria_terms = sort.len(); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         ret.distinct = distinct.is_some(); | ||||||
|  |  | ||||||
|         if let Some(ref filter) = filter { |         if let Some(ref filter) = filter { | ||||||
|             static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap()); |             static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap()); | ||||||
|             ret.filter_total_number_of_criteria = 1; |             ret.filter_total_number_of_criteria = 1; | ||||||
| @@ -726,6 +736,7 @@ impl SearchAggregator { | |||||||
|         if let Some(ref vector) = vector { |         if let Some(ref vector) = vector { | ||||||
|             ret.max_vector_size = vector.len(); |             ret.max_vector_size = vector.len(); | ||||||
|         } |         } | ||||||
|  |         ret.retrieve_vectors |= retrieve_vectors; | ||||||
|  |  | ||||||
|         if query.is_finite_pagination() { |         if query.is_finite_pagination() { | ||||||
|             let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); |             let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); | ||||||
| @@ -748,6 +759,7 @@ impl SearchAggregator { | |||||||
|  |  | ||||||
|         ret.show_ranking_score = *show_ranking_score; |         ret.show_ranking_score = *show_ranking_score; | ||||||
|         ret.show_ranking_score_details = *show_ranking_score_details; |         ret.show_ranking_score_details = *show_ranking_score_details; | ||||||
|  |         ret.ranking_score_threshold = ranking_score_threshold.is_some(); | ||||||
|  |  | ||||||
|         if let Some(hybrid) = hybrid { |         if let Some(hybrid) = hybrid { | ||||||
|             ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO(); |             ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO(); | ||||||
| @@ -792,6 +804,7 @@ impl SearchAggregator { | |||||||
|             sort_with_geo_point, |             sort_with_geo_point, | ||||||
|             sort_sum_of_criteria_terms, |             sort_sum_of_criteria_terms, | ||||||
|             sort_total_number_of_criteria, |             sort_total_number_of_criteria, | ||||||
|  |             distinct, | ||||||
|             filter_with_geo_radius, |             filter_with_geo_radius, | ||||||
|             filter_with_geo_bounding_box, |             filter_with_geo_bounding_box, | ||||||
|             filter_sum_of_criteria_terms, |             filter_sum_of_criteria_terms, | ||||||
| @@ -800,6 +813,7 @@ impl SearchAggregator { | |||||||
|             attributes_to_search_on_total_number_of_uses, |             attributes_to_search_on_total_number_of_uses, | ||||||
|             max_terms_number, |             max_terms_number, | ||||||
|             max_vector_size, |             max_vector_size, | ||||||
|  |             retrieve_vectors, | ||||||
|             matching_strategy, |             matching_strategy, | ||||||
|             max_limit, |             max_limit, | ||||||
|             max_offset, |             max_offset, | ||||||
| @@ -821,6 +835,7 @@ impl SearchAggregator { | |||||||
|             hybrid, |             hybrid, | ||||||
|             total_degraded, |             total_degraded, | ||||||
|             total_used_negative_operator, |             total_used_negative_operator, | ||||||
|  |             ranking_score_threshold, | ||||||
|         } = other; |         } = other; | ||||||
|  |  | ||||||
|         if self.timestamp.is_none() { |         if self.timestamp.is_none() { | ||||||
| @@ -847,6 +862,9 @@ impl SearchAggregator { | |||||||
|         self.sort_total_number_of_criteria = |         self.sort_total_number_of_criteria = | ||||||
|             self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria); |             self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria); | ||||||
|  |  | ||||||
|  |         // distinct | ||||||
|  |         self.distinct |= distinct; | ||||||
|  |  | ||||||
|         // filter |         // filter | ||||||
|         self.filter_with_geo_radius |= filter_with_geo_radius; |         self.filter_with_geo_radius |= filter_with_geo_radius; | ||||||
|         self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box; |         self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box; | ||||||
| @@ -869,6 +887,7 @@ impl SearchAggregator { | |||||||
|  |  | ||||||
|         // vector |         // vector | ||||||
|         self.max_vector_size = self.max_vector_size.max(max_vector_size); |         self.max_vector_size = self.max_vector_size.max(max_vector_size); | ||||||
|  |         self.retrieve_vectors |= retrieve_vectors; | ||||||
|         self.semantic_ratio |= semantic_ratio; |         self.semantic_ratio |= semantic_ratio; | ||||||
|         self.hybrid |= hybrid; |         self.hybrid |= hybrid; | ||||||
|         self.embedder |= embedder; |         self.embedder |= embedder; | ||||||
| @@ -904,6 +923,7 @@ impl SearchAggregator { | |||||||
|         // scoring |         // scoring | ||||||
|         self.show_ranking_score |= show_ranking_score; |         self.show_ranking_score |= show_ranking_score; | ||||||
|         self.show_ranking_score_details |= show_ranking_score_details; |         self.show_ranking_score_details |= show_ranking_score_details; | ||||||
|  |         self.ranking_score_threshold |= ranking_score_threshold; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> { |     pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> { | ||||||
| @@ -916,6 +936,7 @@ impl SearchAggregator { | |||||||
|             sort_with_geo_point, |             sort_with_geo_point, | ||||||
|             sort_sum_of_criteria_terms, |             sort_sum_of_criteria_terms, | ||||||
|             sort_total_number_of_criteria, |             sort_total_number_of_criteria, | ||||||
|  |             distinct, | ||||||
|             filter_with_geo_radius, |             filter_with_geo_radius, | ||||||
|             filter_with_geo_bounding_box, |             filter_with_geo_bounding_box, | ||||||
|             filter_sum_of_criteria_terms, |             filter_sum_of_criteria_terms, | ||||||
| @@ -924,6 +945,7 @@ impl SearchAggregator { | |||||||
|             attributes_to_search_on_total_number_of_uses, |             attributes_to_search_on_total_number_of_uses, | ||||||
|             max_terms_number, |             max_terms_number, | ||||||
|             max_vector_size, |             max_vector_size, | ||||||
|  |             retrieve_vectors, | ||||||
|             matching_strategy, |             matching_strategy, | ||||||
|             max_limit, |             max_limit, | ||||||
|             max_offset, |             max_offset, | ||||||
| @@ -945,6 +967,7 @@ impl SearchAggregator { | |||||||
|             hybrid, |             hybrid, | ||||||
|             total_degraded, |             total_degraded, | ||||||
|             total_used_negative_operator, |             total_used_negative_operator, | ||||||
|  |             ranking_score_threshold, | ||||||
|         } = self; |         } = self; | ||||||
|  |  | ||||||
|         if total_received == 0 { |         if total_received == 0 { | ||||||
| @@ -971,6 +994,7 @@ impl SearchAggregator { | |||||||
|                     "with_geoPoint": sort_with_geo_point, |                     "with_geoPoint": sort_with_geo_point, | ||||||
|                     "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), |                     "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), | ||||||
|                 }, |                 }, | ||||||
|  |                 "distinct": distinct, | ||||||
|                 "filter": { |                 "filter": { | ||||||
|                    "with_geoRadius": filter_with_geo_radius, |                    "with_geoRadius": filter_with_geo_radius, | ||||||
|                    "with_geoBoundingBox": filter_with_geo_bounding_box, |                    "with_geoBoundingBox": filter_with_geo_bounding_box, | ||||||
| @@ -985,6 +1009,7 @@ impl SearchAggregator { | |||||||
|                 }, |                 }, | ||||||
|                 "vector": { |                 "vector": { | ||||||
|                     "max_vector_size": max_vector_size, |                     "max_vector_size": max_vector_size, | ||||||
|  |                     "retrieve_vectors": retrieve_vectors, | ||||||
|                 }, |                 }, | ||||||
|                 "hybrid": { |                 "hybrid": { | ||||||
|                     "enabled": hybrid, |                     "enabled": hybrid, | ||||||
| @@ -1015,6 +1040,7 @@ impl SearchAggregator { | |||||||
|                 "scoring": { |                 "scoring": { | ||||||
|                     "show_ranking_score": show_ranking_score, |                     "show_ranking_score": show_ranking_score, | ||||||
|                     "show_ranking_score_details": show_ranking_score_details, |                     "show_ranking_score_details": show_ranking_score_details, | ||||||
|  |                     "ranking_score_threshold": ranking_score_threshold, | ||||||
|                 }, |                 }, | ||||||
|             }); |             }); | ||||||
|  |  | ||||||
| @@ -1072,6 +1098,7 @@ impl MultiSearchAggregator { | |||||||
|                     page: _, |                     page: _, | ||||||
|                     hits_per_page: _, |                     hits_per_page: _, | ||||||
|                     attributes_to_retrieve: _, |                     attributes_to_retrieve: _, | ||||||
|  |                     retrieve_vectors: _, | ||||||
|                     attributes_to_crop: _, |                     attributes_to_crop: _, | ||||||
|                     crop_length: _, |                     crop_length: _, | ||||||
|                     attributes_to_highlight: _, |                     attributes_to_highlight: _, | ||||||
| @@ -1080,6 +1107,7 @@ impl MultiSearchAggregator { | |||||||
|                     show_matches_position: _, |                     show_matches_position: _, | ||||||
|                     filter: _, |                     filter: _, | ||||||
|                     sort: _, |                     sort: _, | ||||||
|  |                     distinct: _, | ||||||
|                     facets: _, |                     facets: _, | ||||||
|                     highlight_pre_tag: _, |                     highlight_pre_tag: _, | ||||||
|                     highlight_post_tag: _, |                     highlight_post_tag: _, | ||||||
| @@ -1087,6 +1115,7 @@ impl MultiSearchAggregator { | |||||||
|                     matching_strategy: _, |                     matching_strategy: _, | ||||||
|                     attributes_to_search_on: _, |                     attributes_to_search_on: _, | ||||||
|                     hybrid: _, |                     hybrid: _, | ||||||
|  |                     ranking_score_threshold: _, | ||||||
|                 } = query; |                 } = query; | ||||||
|  |  | ||||||
|                 index_uid.as_str() |                 index_uid.as_str() | ||||||
| @@ -1234,6 +1263,7 @@ impl FacetSearchAggregator { | |||||||
|             matching_strategy, |             matching_strategy, | ||||||
|             attributes_to_search_on, |             attributes_to_search_on, | ||||||
|             hybrid, |             hybrid, | ||||||
|  |             ranking_score_threshold, | ||||||
|         } = query; |         } = query; | ||||||
|  |  | ||||||
|         let mut ret = Self::default(); |         let mut ret = Self::default(); | ||||||
| @@ -1248,7 +1278,8 @@ impl FacetSearchAggregator { | |||||||
|             || filter.is_some() |             || filter.is_some() | ||||||
|             || *matching_strategy != MatchingStrategy::default() |             || *matching_strategy != MatchingStrategy::default() | ||||||
|             || attributes_to_search_on.is_some() |             || attributes_to_search_on.is_some() | ||||||
|             || hybrid.is_some(); |             || hybrid.is_some() | ||||||
|  |             || ranking_score_threshold.is_some(); | ||||||
|  |  | ||||||
|         ret |         ret | ||||||
|     } |     } | ||||||
| @@ -1524,6 +1555,9 @@ pub struct DocumentsFetchAggregator { | |||||||
|     // if a filter was used |     // if a filter was used | ||||||
|     per_filter: bool, |     per_filter: bool, | ||||||
|  |  | ||||||
|  |     #[serde(rename = "vector.retrieve_vectors")] | ||||||
|  |     retrieve_vectors: bool, | ||||||
|  |  | ||||||
|     // pagination |     // pagination | ||||||
|     #[serde(rename = "pagination.max_limit")] |     #[serde(rename = "pagination.max_limit")] | ||||||
|     max_limit: usize, |     max_limit: usize, | ||||||
| @@ -1533,18 +1567,21 @@ pub struct DocumentsFetchAggregator { | |||||||
|  |  | ||||||
| impl DocumentsFetchAggregator { | impl DocumentsFetchAggregator { | ||||||
|     pub fn from_query(query: &DocumentFetchKind, request: &HttpRequest) -> Self { |     pub fn from_query(query: &DocumentFetchKind, request: &HttpRequest) -> Self { | ||||||
|         let (limit, offset) = match query { |         let (limit, offset, retrieve_vectors) = match query { | ||||||
|             DocumentFetchKind::PerDocumentId => (1, 0), |             DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors), | ||||||
|             DocumentFetchKind::Normal { limit, offset, .. } => (*limit, *offset), |             DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => { | ||||||
|  |                 (*limit, *offset, *retrieve_vectors) | ||||||
|  |             } | ||||||
|         }; |         }; | ||||||
|         Self { |         Self { | ||||||
|             timestamp: Some(OffsetDateTime::now_utc()), |             timestamp: Some(OffsetDateTime::now_utc()), | ||||||
|             user_agents: extract_user_agents(request).into_iter().collect(), |             user_agents: extract_user_agents(request).into_iter().collect(), | ||||||
|             total_received: 1, |             total_received: 1, | ||||||
|             per_document_id: matches!(query, DocumentFetchKind::PerDocumentId), |             per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }), | ||||||
|             per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter), |             per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter), | ||||||
|             max_limit: limit, |             max_limit: limit, | ||||||
|             max_offset: offset, |             max_offset: offset, | ||||||
|  |             retrieve_vectors, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -1558,6 +1595,7 @@ impl DocumentsFetchAggregator { | |||||||
|             per_filter, |             per_filter, | ||||||
|             max_limit, |             max_limit, | ||||||
|             max_offset, |             max_offset, | ||||||
|  |             retrieve_vectors, | ||||||
|         } = other; |         } = other; | ||||||
|  |  | ||||||
|         if self.timestamp.is_none() { |         if self.timestamp.is_none() { | ||||||
| @@ -1573,6 +1611,8 @@ impl DocumentsFetchAggregator { | |||||||
|  |  | ||||||
|         self.max_limit = self.max_limit.max(max_limit); |         self.max_limit = self.max_limit.max(max_limit); | ||||||
|         self.max_offset = self.max_offset.max(max_offset); |         self.max_offset = self.max_offset.max(max_offset); | ||||||
|  |  | ||||||
|  |         self.retrieve_vectors |= retrieve_vectors; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> { |     pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> { | ||||||
| @@ -1613,6 +1653,7 @@ pub struct SimilarAggregator { | |||||||
|  |  | ||||||
|     // Whether a non-default embedder was specified |     // Whether a non-default embedder was specified | ||||||
|     embedder: bool, |     embedder: bool, | ||||||
|  |     retrieve_vectors: bool, | ||||||
|  |  | ||||||
|     // pagination |     // pagination | ||||||
|     max_limit: usize, |     max_limit: usize, | ||||||
| @@ -1624,6 +1665,7 @@ pub struct SimilarAggregator { | |||||||
|     // scoring |     // scoring | ||||||
|     show_ranking_score: bool, |     show_ranking_score: bool, | ||||||
|     show_ranking_score_details: bool, |     show_ranking_score_details: bool, | ||||||
|  |     ranking_score_threshold: bool, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl SimilarAggregator { | impl SimilarAggregator { | ||||||
| @@ -1635,9 +1677,11 @@ impl SimilarAggregator { | |||||||
|             offset, |             offset, | ||||||
|             limit, |             limit, | ||||||
|             attributes_to_retrieve: _, |             attributes_to_retrieve: _, | ||||||
|  |             retrieve_vectors, | ||||||
|             show_ranking_score, |             show_ranking_score, | ||||||
|             show_ranking_score_details, |             show_ranking_score_details, | ||||||
|             filter, |             filter, | ||||||
|  |             ranking_score_threshold, | ||||||
|         } = query; |         } = query; | ||||||
|  |  | ||||||
|         let mut ret = Self::default(); |         let mut ret = Self::default(); | ||||||
| @@ -1675,8 +1719,10 @@ impl SimilarAggregator { | |||||||
|  |  | ||||||
|         ret.show_ranking_score = *show_ranking_score; |         ret.show_ranking_score = *show_ranking_score; | ||||||
|         ret.show_ranking_score_details = *show_ranking_score_details; |         ret.show_ranking_score_details = *show_ranking_score_details; | ||||||
|  |         ret.ranking_score_threshold = ranking_score_threshold.is_some(); | ||||||
|  |  | ||||||
|         ret.embedder = embedder.is_some(); |         ret.embedder = embedder.is_some(); | ||||||
|  |         ret.retrieve_vectors = *retrieve_vectors; | ||||||
|  |  | ||||||
|         ret |         ret | ||||||
|     } |     } | ||||||
| @@ -1708,6 +1754,8 @@ impl SimilarAggregator { | |||||||
|             show_ranking_score, |             show_ranking_score, | ||||||
|             show_ranking_score_details, |             show_ranking_score_details, | ||||||
|             embedder, |             embedder, | ||||||
|  |             ranking_score_threshold, | ||||||
|  |             retrieve_vectors, | ||||||
|         } = other; |         } = other; | ||||||
|  |  | ||||||
|         if self.timestamp.is_none() { |         if self.timestamp.is_none() { | ||||||
| @@ -1737,6 +1785,7 @@ impl SimilarAggregator { | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         self.embedder |= embedder; |         self.embedder |= embedder; | ||||||
|  |         self.retrieve_vectors |= retrieve_vectors; | ||||||
|  |  | ||||||
|         // pagination |         // pagination | ||||||
|         self.max_limit = self.max_limit.max(max_limit); |         self.max_limit = self.max_limit.max(max_limit); | ||||||
| @@ -1749,6 +1798,7 @@ impl SimilarAggregator { | |||||||
|         // scoring |         // scoring | ||||||
|         self.show_ranking_score |= show_ranking_score; |         self.show_ranking_score |= show_ranking_score; | ||||||
|         self.show_ranking_score_details |= show_ranking_score_details; |         self.show_ranking_score_details |= show_ranking_score_details; | ||||||
|  |         self.ranking_score_threshold |= ranking_score_threshold; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> { |     pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> { | ||||||
| @@ -1769,6 +1819,8 @@ impl SimilarAggregator { | |||||||
|             show_ranking_score, |             show_ranking_score, | ||||||
|             show_ranking_score_details, |             show_ranking_score_details, | ||||||
|             embedder, |             embedder, | ||||||
|  |             ranking_score_threshold, | ||||||
|  |             retrieve_vectors, | ||||||
|         } = self; |         } = self; | ||||||
|  |  | ||||||
|         if total_received == 0 { |         if total_received == 0 { | ||||||
| @@ -1795,6 +1847,9 @@ impl SimilarAggregator { | |||||||
|                    "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), |                    "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), | ||||||
|                    "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), |                    "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), | ||||||
|                 }, |                 }, | ||||||
|  |                 "vector": { | ||||||
|  |                     "retrieve_vectors": retrieve_vectors, | ||||||
|  |                 }, | ||||||
|                 "hybrid": { |                 "hybrid": { | ||||||
|                     "embedder": embedder, |                     "embedder": embedder, | ||||||
|                 }, |                 }, | ||||||
| @@ -1808,6 +1863,7 @@ impl SimilarAggregator { | |||||||
|                 "scoring": { |                 "scoring": { | ||||||
|                     "show_ranking_score": show_ranking_score, |                     "show_ranking_score": show_ranking_score, | ||||||
|                     "show_ranking_score_details": show_ranking_score_details, |                     "show_ranking_score_details": show_ranking_score_details, | ||||||
|  |                     "ranking_score_threshold": ranking_score_threshold, | ||||||
|                 }, |                 }, | ||||||
|             }); |             }); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -16,6 +16,7 @@ use meilisearch_types::error::{Code, ResponseError}; | |||||||
| use meilisearch_types::heed::RoTxn; | use meilisearch_types::heed::RoTxn; | ||||||
| use meilisearch_types::index_uid::IndexUid; | use meilisearch_types::index_uid::IndexUid; | ||||||
| use meilisearch_types::milli::update::IndexDocumentsMethod; | use meilisearch_types::milli::update::IndexDocumentsMethod; | ||||||
|  | use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors; | ||||||
| use meilisearch_types::milli::DocumentId; | use meilisearch_types::milli::DocumentId; | ||||||
| use meilisearch_types::star_or::OptionStarOrList; | use meilisearch_types::star_or::OptionStarOrList; | ||||||
| use meilisearch_types::tasks::KindWithContent; | use meilisearch_types::tasks::KindWithContent; | ||||||
| @@ -39,7 +40,7 @@ use crate::extractors::sequential_extractor::SeqHandler; | |||||||
| use crate::routes::{ | use crate::routes::{ | ||||||
|     get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT, |     get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT, | ||||||
| }; | }; | ||||||
| use crate::search::parse_filter; | use crate::search::{parse_filter, RetrieveVectors}; | ||||||
| use crate::Opt; | use crate::Opt; | ||||||
|  |  | ||||||
| static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| { | static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| { | ||||||
| @@ -94,6 +95,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) { | |||||||
| pub struct GetDocument { | pub struct GetDocument { | ||||||
|     #[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)] |     #[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)] | ||||||
|     fields: OptionStarOrList<String>, |     fields: OptionStarOrList<String>, | ||||||
|  |     #[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)] | ||||||
|  |     retrieve_vectors: Param<bool>, | ||||||
| } | } | ||||||
|  |  | ||||||
| pub async fn get_document( | pub async fn get_document( | ||||||
| @@ -107,13 +110,20 @@ pub async fn get_document( | |||||||
|     debug!(parameters = ?params, "Get document"); |     debug!(parameters = ?params, "Get document"); | ||||||
|     let index_uid = IndexUid::try_from(index_uid)?; |     let index_uid = IndexUid::try_from(index_uid)?; | ||||||
|  |  | ||||||
|     analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req); |     let GetDocument { fields, retrieve_vectors: param_retrieve_vectors } = params.into_inner(); | ||||||
|  |  | ||||||
|     let GetDocument { fields } = params.into_inner(); |  | ||||||
|     let attributes_to_retrieve = fields.merge_star_and_none(); |     let attributes_to_retrieve = fields.merge_star_and_none(); | ||||||
|  |  | ||||||
|  |     let features = index_scheduler.features(); | ||||||
|  |     let retrieve_vectors = RetrieveVectors::new(param_retrieve_vectors.0, features)?; | ||||||
|  |  | ||||||
|  |     analytics.get_fetch_documents( | ||||||
|  |         &DocumentFetchKind::PerDocumentId { retrieve_vectors: param_retrieve_vectors.0 }, | ||||||
|  |         &req, | ||||||
|  |     ); | ||||||
|  |  | ||||||
|     let index = index_scheduler.index(&index_uid)?; |     let index = index_scheduler.index(&index_uid)?; | ||||||
|     let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?; |     let document = | ||||||
|  |         retrieve_document(&index, &document_id, attributes_to_retrieve, retrieve_vectors)?; | ||||||
|     debug!(returns = ?document, "Get document"); |     debug!(returns = ?document, "Get document"); | ||||||
|     Ok(HttpResponse::Ok().json(document)) |     Ok(HttpResponse::Ok().json(document)) | ||||||
| } | } | ||||||
| @@ -153,6 +163,8 @@ pub struct BrowseQueryGet { | |||||||
|     limit: Param<usize>, |     limit: Param<usize>, | ||||||
|     #[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)] |     #[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)] | ||||||
|     fields: OptionStarOrList<String>, |     fields: OptionStarOrList<String>, | ||||||
|  |     #[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)] | ||||||
|  |     retrieve_vectors: Param<bool>, | ||||||
|     #[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)] |     #[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)] | ||||||
|     filter: Option<String>, |     filter: Option<String>, | ||||||
| } | } | ||||||
| @@ -166,6 +178,8 @@ pub struct BrowseQuery { | |||||||
|     limit: usize, |     limit: usize, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidDocumentFields>)] |     #[deserr(default, error = DeserrJsonError<InvalidDocumentFields>)] | ||||||
|     fields: Option<Vec<String>>, |     fields: Option<Vec<String>>, | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidDocumentRetrieveVectors>)] | ||||||
|  |     retrieve_vectors: bool, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)] |     #[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)] | ||||||
|     filter: Option<Value>, |     filter: Option<Value>, | ||||||
| } | } | ||||||
| @@ -185,6 +199,7 @@ pub async fn documents_by_query_post( | |||||||
|             with_filter: body.filter.is_some(), |             with_filter: body.filter.is_some(), | ||||||
|             limit: body.limit, |             limit: body.limit, | ||||||
|             offset: body.offset, |             offset: body.offset, | ||||||
|  |             retrieve_vectors: body.retrieve_vectors, | ||||||
|         }, |         }, | ||||||
|         &req, |         &req, | ||||||
|     ); |     ); | ||||||
| @@ -201,7 +216,7 @@ pub async fn get_documents( | |||||||
| ) -> Result<HttpResponse, ResponseError> { | ) -> Result<HttpResponse, ResponseError> { | ||||||
|     debug!(parameters = ?params, "Get documents GET"); |     debug!(parameters = ?params, "Get documents GET"); | ||||||
|  |  | ||||||
|     let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner(); |     let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter } = params.into_inner(); | ||||||
|  |  | ||||||
|     let filter = match filter { |     let filter = match filter { | ||||||
|         Some(f) => match serde_json::from_str(&f) { |         Some(f) => match serde_json::from_str(&f) { | ||||||
| @@ -215,6 +230,7 @@ pub async fn get_documents( | |||||||
|         offset: offset.0, |         offset: offset.0, | ||||||
|         limit: limit.0, |         limit: limit.0, | ||||||
|         fields: fields.merge_star_and_none(), |         fields: fields.merge_star_and_none(), | ||||||
|  |         retrieve_vectors: retrieve_vectors.0, | ||||||
|         filter, |         filter, | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
| @@ -223,6 +239,7 @@ pub async fn get_documents( | |||||||
|             with_filter: query.filter.is_some(), |             with_filter: query.filter.is_some(), | ||||||
|             limit: query.limit, |             limit: query.limit, | ||||||
|             offset: query.offset, |             offset: query.offset, | ||||||
|  |             retrieve_vectors: query.retrieve_vectors, | ||||||
|         }, |         }, | ||||||
|         &req, |         &req, | ||||||
|     ); |     ); | ||||||
| @@ -236,10 +253,14 @@ fn documents_by_query( | |||||||
|     query: BrowseQuery, |     query: BrowseQuery, | ||||||
| ) -> Result<HttpResponse, ResponseError> { | ) -> Result<HttpResponse, ResponseError> { | ||||||
|     let index_uid = IndexUid::try_from(index_uid.into_inner())?; |     let index_uid = IndexUid::try_from(index_uid.into_inner())?; | ||||||
|     let BrowseQuery { offset, limit, fields, filter } = query; |     let BrowseQuery { offset, limit, fields, retrieve_vectors, filter } = query; | ||||||
|  |  | ||||||
|  |     let features = index_scheduler.features(); | ||||||
|  |     let retrieve_vectors = RetrieveVectors::new(retrieve_vectors, features)?; | ||||||
|  |  | ||||||
|     let index = index_scheduler.index(&index_uid)?; |     let index = index_scheduler.index(&index_uid)?; | ||||||
|     let (total, documents) = retrieve_documents(&index, offset, limit, filter, fields)?; |     let (total, documents) = | ||||||
|  |         retrieve_documents(&index, offset, limit, filter, fields, retrieve_vectors)?; | ||||||
|  |  | ||||||
|     let ret = PaginationView::new(offset, limit, total as usize, documents); |     let ret = PaginationView::new(offset, limit, total as usize, documents); | ||||||
|  |  | ||||||
| @@ -579,13 +600,44 @@ fn some_documents<'a, 't: 'a>( | |||||||
|     index: &'a Index, |     index: &'a Index, | ||||||
|     rtxn: &'t RoTxn, |     rtxn: &'t RoTxn, | ||||||
|     doc_ids: impl IntoIterator<Item = DocumentId> + 'a, |     doc_ids: impl IntoIterator<Item = DocumentId> + 'a, | ||||||
|  |     retrieve_vectors: RetrieveVectors, | ||||||
| ) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> { | ) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> { | ||||||
|     let fields_ids_map = index.fields_ids_map(rtxn)?; |     let fields_ids_map = index.fields_ids_map(rtxn)?; | ||||||
|     let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); |     let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); | ||||||
|  |     let embedding_configs = index.embedding_configs(rtxn)?; | ||||||
|  |  | ||||||
|     Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| { |     Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| { | ||||||
|         ret.map_err(ResponseError::from).and_then(|(_key, document)| -> Result<_, ResponseError> { |         ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> { | ||||||
|             Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?) |             let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?; | ||||||
|  |             match retrieve_vectors { | ||||||
|  |                 RetrieveVectors::Ignore => {} | ||||||
|  |                 RetrieveVectors::Hide => { | ||||||
|  |                     document.remove("_vectors"); | ||||||
|  |                 } | ||||||
|  |                 RetrieveVectors::Retrieve => { | ||||||
|  |                     let mut vectors = match document.remove("_vectors") { | ||||||
|  |                         Some(Value::Object(map)) => map, | ||||||
|  |                         _ => Default::default(), | ||||||
|  |                     }; | ||||||
|  |                     for (name, vector) in index.embeddings(rtxn, key)? { | ||||||
|  |                         let user_provided = embedding_configs | ||||||
|  |                             .iter() | ||||||
|  |                             .find(|conf| conf.name == name) | ||||||
|  |                             .is_some_and(|conf| conf.user_provided.contains(key)); | ||||||
|  |                         let embeddings = ExplicitVectors { | ||||||
|  |                             embeddings: Some(vector.into()), | ||||||
|  |                             regenerate: !user_provided, | ||||||
|  |                         }; | ||||||
|  |                         vectors.insert( | ||||||
|  |                             name, | ||||||
|  |                             serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?, | ||||||
|  |                         ); | ||||||
|  |                     } | ||||||
|  |                     document.insert("_vectors".into(), vectors.into()); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             Ok(document) | ||||||
|         }) |         }) | ||||||
|     })) |     })) | ||||||
| } | } | ||||||
| @@ -596,6 +648,7 @@ fn retrieve_documents<S: AsRef<str>>( | |||||||
|     limit: usize, |     limit: usize, | ||||||
|     filter: Option<Value>, |     filter: Option<Value>, | ||||||
|     attributes_to_retrieve: Option<Vec<S>>, |     attributes_to_retrieve: Option<Vec<S>>, | ||||||
|  |     retrieve_vectors: RetrieveVectors, | ||||||
| ) -> Result<(u64, Vec<Document>), ResponseError> { | ) -> Result<(u64, Vec<Document>), ResponseError> { | ||||||
|     let rtxn = index.read_txn()?; |     let rtxn = index.read_txn()?; | ||||||
|     let filter = &filter; |     let filter = &filter; | ||||||
| @@ -620,53 +673,57 @@ fn retrieve_documents<S: AsRef<str>>( | |||||||
|     let (it, number_of_documents) = { |     let (it, number_of_documents) = { | ||||||
|         let number_of_documents = candidates.len(); |         let number_of_documents = candidates.len(); | ||||||
|         ( |         ( | ||||||
|             some_documents(index, &rtxn, candidates.into_iter().skip(offset).take(limit))?, |             some_documents( | ||||||
|  |                 index, | ||||||
|  |                 &rtxn, | ||||||
|  |                 candidates.into_iter().skip(offset).take(limit), | ||||||
|  |                 retrieve_vectors, | ||||||
|  |             )?, | ||||||
|             number_of_documents, |             number_of_documents, | ||||||
|         ) |         ) | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     let documents: Result<Vec<_>, ResponseError> = it |     let documents: Vec<_> = it | ||||||
|         .map(|document| { |         .map(|document| { | ||||||
|             Ok(match &attributes_to_retrieve { |             Ok(match &attributes_to_retrieve { | ||||||
|                 Some(attributes_to_retrieve) => permissive_json_pointer::select_values( |                 Some(attributes_to_retrieve) => permissive_json_pointer::select_values( | ||||||
|                     &document?, |                     &document?, | ||||||
|                     attributes_to_retrieve.iter().map(|s| s.as_ref()), |                     attributes_to_retrieve.iter().map(|s| s.as_ref()).chain( | ||||||
|  |                         (retrieve_vectors == RetrieveVectors::Retrieve).then_some("_vectors"), | ||||||
|  |                     ), | ||||||
|                 ), |                 ), | ||||||
|                 None => document?, |                 None => document?, | ||||||
|             }) |             }) | ||||||
|         }) |         }) | ||||||
|         .collect(); |         .collect::<Result<_, ResponseError>>()?; | ||||||
|  |  | ||||||
|     Ok((number_of_documents, documents?)) |     Ok((number_of_documents, documents)) | ||||||
| } | } | ||||||
|  |  | ||||||
| fn retrieve_document<S: AsRef<str>>( | fn retrieve_document<S: AsRef<str>>( | ||||||
|     index: &Index, |     index: &Index, | ||||||
|     doc_id: &str, |     doc_id: &str, | ||||||
|     attributes_to_retrieve: Option<Vec<S>>, |     attributes_to_retrieve: Option<Vec<S>>, | ||||||
|  |     retrieve_vectors: RetrieveVectors, | ||||||
| ) -> Result<Document, ResponseError> { | ) -> Result<Document, ResponseError> { | ||||||
|     let txn = index.read_txn()?; |     let txn = index.read_txn()?; | ||||||
|  |  | ||||||
|     let fields_ids_map = index.fields_ids_map(&txn)?; |  | ||||||
|     let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); |  | ||||||
|  |  | ||||||
|     let internal_id = index |     let internal_id = index | ||||||
|         .external_documents_ids() |         .external_documents_ids() | ||||||
|         .get(&txn, doc_id)? |         .get(&txn, doc_id)? | ||||||
|         .ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?; |         .ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?; | ||||||
|  |  | ||||||
|     let document = index |     let document = some_documents(index, &txn, Some(internal_id), retrieve_vectors)? | ||||||
|         .documents(&txn, std::iter::once(internal_id))? |  | ||||||
|         .into_iter() |  | ||||||
|         .next() |         .next() | ||||||
|         .map(|(_, d)| d) |         .ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))??; | ||||||
|         .ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?; |  | ||||||
|  |  | ||||||
|     let document = meilisearch_types::milli::obkv_to_json(&all_fields, &fields_ids_map, document)?; |  | ||||||
|     let document = match &attributes_to_retrieve { |     let document = match &attributes_to_retrieve { | ||||||
|         Some(attributes_to_retrieve) => permissive_json_pointer::select_values( |         Some(attributes_to_retrieve) => permissive_json_pointer::select_values( | ||||||
|             &document, |             &document, | ||||||
|             attributes_to_retrieve.iter().map(|s| s.as_ref()), |             attributes_to_retrieve | ||||||
|  |                 .iter() | ||||||
|  |                 .map(|s| s.as_ref()) | ||||||
|  |                 .chain((retrieve_vectors == RetrieveVectors::Retrieve).then_some("_vectors")), | ||||||
|         ), |         ), | ||||||
|         None => document, |         None => document, | ||||||
|     }; |     }; | ||||||
|   | |||||||
| @@ -14,8 +14,8 @@ use crate::extractors::authentication::policies::*; | |||||||
| use crate::extractors::authentication::GuardedData; | use crate::extractors::authentication::GuardedData; | ||||||
| use crate::routes::indexes::search::search_kind; | use crate::routes::indexes::search::search_kind; | ||||||
| use crate::search::{ | use crate::search::{ | ||||||
|     add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, SearchQuery, |     add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, | ||||||
|     DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, |     SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, | ||||||
|     DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, |     DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, | ||||||
| }; | }; | ||||||
| use crate::search_queue::SearchQueue; | use crate::search_queue::SearchQueue; | ||||||
| @@ -46,6 +46,8 @@ pub struct FacetSearchQuery { | |||||||
|     pub matching_strategy: MatchingStrategy, |     pub matching_strategy: MatchingStrategy, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)] |     #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)] | ||||||
|     pub attributes_to_search_on: Option<Vec<String>>, |     pub attributes_to_search_on: Option<Vec<String>>, | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)] | ||||||
|  |     pub ranking_score_threshold: Option<RankingScoreThreshold>, | ||||||
| } | } | ||||||
|  |  | ||||||
| pub async fn search( | pub async fn search( | ||||||
| @@ -103,6 +105,7 @@ impl From<FacetSearchQuery> for SearchQuery { | |||||||
|             matching_strategy, |             matching_strategy, | ||||||
|             attributes_to_search_on, |             attributes_to_search_on, | ||||||
|             hybrid, |             hybrid, | ||||||
|  |             ranking_score_threshold, | ||||||
|         } = value; |         } = value; | ||||||
|  |  | ||||||
|         SearchQuery { |         SearchQuery { | ||||||
| @@ -112,6 +115,7 @@ impl From<FacetSearchQuery> for SearchQuery { | |||||||
|             page: None, |             page: None, | ||||||
|             hits_per_page: None, |             hits_per_page: None, | ||||||
|             attributes_to_retrieve: None, |             attributes_to_retrieve: None, | ||||||
|  |             retrieve_vectors: false, | ||||||
|             attributes_to_crop: None, |             attributes_to_crop: None, | ||||||
|             crop_length: DEFAULT_CROP_LENGTH(), |             crop_length: DEFAULT_CROP_LENGTH(), | ||||||
|             attributes_to_highlight: None, |             attributes_to_highlight: None, | ||||||
| @@ -120,6 +124,7 @@ impl From<FacetSearchQuery> for SearchQuery { | |||||||
|             show_ranking_score_details: false, |             show_ranking_score_details: false, | ||||||
|             filter, |             filter, | ||||||
|             sort: None, |             sort: None, | ||||||
|  |             distinct: None, | ||||||
|             facets: None, |             facets: None, | ||||||
|             highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(), |             highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(), | ||||||
|             highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(), |             highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(), | ||||||
| @@ -128,6 +133,7 @@ impl From<FacetSearchQuery> for SearchQuery { | |||||||
|             vector, |             vector, | ||||||
|             attributes_to_search_on, |             attributes_to_search_on, | ||||||
|             hybrid, |             hybrid, | ||||||
|  |             ranking_score_threshold, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -19,9 +19,10 @@ use crate::extractors::authentication::GuardedData; | |||||||
| use crate::extractors::sequential_extractor::SeqHandler; | use crate::extractors::sequential_extractor::SeqHandler; | ||||||
| use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS; | use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS; | ||||||
| use crate::search::{ | use crate::search::{ | ||||||
|     add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchKind, SearchQuery, |     add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, | ||||||
|     SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, |     RetrieveVectors, SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH, | ||||||
|     DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, |     DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, | ||||||
|  |     DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, | ||||||
| }; | }; | ||||||
| use crate::search_queue::SearchQueue; | use crate::search_queue::SearchQueue; | ||||||
|  |  | ||||||
| @@ -50,6 +51,8 @@ pub struct SearchQueryGet { | |||||||
|     hits_per_page: Option<Param<usize>>, |     hits_per_page: Option<Param<usize>>, | ||||||
|     #[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToRetrieve>)] |     #[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToRetrieve>)] | ||||||
|     attributes_to_retrieve: Option<CS<String>>, |     attributes_to_retrieve: Option<CS<String>>, | ||||||
|  |     #[deserr(default, error = DeserrQueryParamError<InvalidSearchRetrieveVectors>)] | ||||||
|  |     retrieve_vectors: Param<bool>, | ||||||
|     #[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToCrop>)] |     #[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToCrop>)] | ||||||
|     attributes_to_crop: Option<CS<String>>, |     attributes_to_crop: Option<CS<String>>, | ||||||
|     #[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError<InvalidSearchCropLength>)] |     #[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError<InvalidSearchCropLength>)] | ||||||
| @@ -60,6 +63,8 @@ pub struct SearchQueryGet { | |||||||
|     filter: Option<String>, |     filter: Option<String>, | ||||||
|     #[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)] |     #[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)] | ||||||
|     sort: Option<String>, |     sort: Option<String>, | ||||||
|  |     #[deserr(default, error = DeserrQueryParamError<InvalidSearchDistinct>)] | ||||||
|  |     distinct: Option<String>, | ||||||
|     #[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)] |     #[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)] | ||||||
|     show_matches_position: Param<bool>, |     show_matches_position: Param<bool>, | ||||||
|     #[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)] |     #[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)] | ||||||
| @@ -82,6 +87,21 @@ pub struct SearchQueryGet { | |||||||
|     pub hybrid_embedder: Option<String>, |     pub hybrid_embedder: Option<String>, | ||||||
|     #[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)] |     #[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)] | ||||||
|     pub hybrid_semantic_ratio: Option<SemanticRatioGet>, |     pub hybrid_semantic_ratio: Option<SemanticRatioGet>, | ||||||
|  |     #[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>)] | ||||||
|  |     pub ranking_score_threshold: Option<RankingScoreThresholdGet>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)] | ||||||
|  | #[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)] | ||||||
|  | pub struct RankingScoreThresholdGet(RankingScoreThreshold); | ||||||
|  |  | ||||||
|  | impl std::convert::TryFrom<String> for RankingScoreThresholdGet { | ||||||
|  |     type Error = InvalidSearchRankingScoreThreshold; | ||||||
|  |  | ||||||
|  |     fn try_from(s: String) -> Result<Self, Self::Error> { | ||||||
|  |         let f: f64 = s.parse().map_err(|_| InvalidSearchRankingScoreThreshold)?; | ||||||
|  |         Ok(RankingScoreThresholdGet(RankingScoreThreshold::try_from(f)?)) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)] | #[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)] | ||||||
| @@ -137,11 +157,13 @@ impl From<SearchQueryGet> for SearchQuery { | |||||||
|             page: other.page.as_deref().copied(), |             page: other.page.as_deref().copied(), | ||||||
|             hits_per_page: other.hits_per_page.as_deref().copied(), |             hits_per_page: other.hits_per_page.as_deref().copied(), | ||||||
|             attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()), |             attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()), | ||||||
|  |             retrieve_vectors: other.retrieve_vectors.0, | ||||||
|             attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()), |             attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()), | ||||||
|             crop_length: other.crop_length.0, |             crop_length: other.crop_length.0, | ||||||
|             attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()), |             attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()), | ||||||
|             filter, |             filter, | ||||||
|             sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)), |             sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)), | ||||||
|  |             distinct: other.distinct, | ||||||
|             show_matches_position: other.show_matches_position.0, |             show_matches_position: other.show_matches_position.0, | ||||||
|             show_ranking_score: other.show_ranking_score.0, |             show_ranking_score: other.show_ranking_score.0, | ||||||
|             show_ranking_score_details: other.show_ranking_score_details.0, |             show_ranking_score_details: other.show_ranking_score_details.0, | ||||||
| @@ -152,6 +174,7 @@ impl From<SearchQueryGet> for SearchQuery { | |||||||
|             matching_strategy: other.matching_strategy, |             matching_strategy: other.matching_strategy, | ||||||
|             attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()), |             attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()), | ||||||
|             hybrid, |             hybrid, | ||||||
|  |             ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -205,10 +228,12 @@ pub async fn search_with_url_query( | |||||||
|     let features = index_scheduler.features(); |     let features = index_scheduler.features(); | ||||||
|  |  | ||||||
|     let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?; |     let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?; | ||||||
|  |     let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?; | ||||||
|     let _permit = search_queue.try_get_search_permit().await?; |     let _permit = search_queue.try_get_search_permit().await?; | ||||||
|     let search_result = |     let search_result = tokio::task::spawn_blocking(move || { | ||||||
|         tokio::task::spawn_blocking(move || perform_search(&index, query, search_kind)).await?; |         perform_search(&index, query, search_kind, retrieve_vector) | ||||||
|  |     }) | ||||||
|  |     .await?; | ||||||
|     if let Ok(ref search_result) = search_result { |     if let Ok(ref search_result) = search_result { | ||||||
|         aggregate.succeed(search_result); |         aggregate.succeed(search_result); | ||||||
|     } |     } | ||||||
| @@ -245,10 +270,13 @@ pub async fn search_with_post( | |||||||
|     let features = index_scheduler.features(); |     let features = index_scheduler.features(); | ||||||
|  |  | ||||||
|     let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?; |     let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?; | ||||||
|  |     let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?; | ||||||
|  |  | ||||||
|     let _permit = search_queue.try_get_search_permit().await?; |     let _permit = search_queue.try_get_search_permit().await?; | ||||||
|     let search_result = |     let search_result = tokio::task::spawn_blocking(move || { | ||||||
|         tokio::task::spawn_blocking(move || perform_search(&index, query, search_kind)).await?; |         perform_search(&index, query, search_kind, retrieve_vectors) | ||||||
|  |     }) | ||||||
|  |     .await?; | ||||||
|     if let Ok(ref search_result) = search_result { |     if let Ok(ref search_result) = search_result { | ||||||
|         aggregate.succeed(search_result); |         aggregate.succeed(search_result); | ||||||
|         if search_result.degraded { |         if search_result.degraded { | ||||||
| @@ -270,11 +298,10 @@ pub fn search_kind( | |||||||
|     features: RoFeatures, |     features: RoFeatures, | ||||||
| ) -> Result<SearchKind, ResponseError> { | ) -> Result<SearchKind, ResponseError> { | ||||||
|     if query.vector.is_some() { |     if query.vector.is_some() { | ||||||
|         features.check_vector("Passing `vector` as a query parameter")?; |         features.check_vector("Passing `vector` as a parameter")?; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if query.hybrid.is_some() { |     if query.hybrid.is_some() { | ||||||
|         features.check_vector("Passing `hybrid` as a query parameter")?; |         features.check_vector("Passing `hybrid` as a parameter")?; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing |     // regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing | ||||||
|   | |||||||
| @@ -4,11 +4,7 @@ use deserr::actix_web::{AwebJson, AwebQueryParameter}; | |||||||
| use index_scheduler::IndexScheduler; | use index_scheduler::IndexScheduler; | ||||||
| use meilisearch_types::deserr::query_params::Param; | use meilisearch_types::deserr::query_params::Param; | ||||||
| use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; | use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; | ||||||
| use meilisearch_types::error::deserr_codes::{ | use meilisearch_types::error::deserr_codes::*; | ||||||
|     InvalidEmbedder, InvalidSimilarAttributesToRetrieve, InvalidSimilarFilter, InvalidSimilarId, |  | ||||||
|     InvalidSimilarLimit, InvalidSimilarOffset, InvalidSimilarShowRankingScore, |  | ||||||
|     InvalidSimilarShowRankingScoreDetails, |  | ||||||
| }; |  | ||||||
| use meilisearch_types::error::{ErrorCode as _, ResponseError}; | use meilisearch_types::error::{ErrorCode as _, ResponseError}; | ||||||
| use meilisearch_types::index_uid::IndexUid; | use meilisearch_types::index_uid::IndexUid; | ||||||
| use meilisearch_types::keys::actions; | use meilisearch_types::keys::actions; | ||||||
| @@ -21,8 +17,8 @@ use crate::analytics::{Analytics, SimilarAggregator}; | |||||||
| use crate::extractors::authentication::GuardedData; | use crate::extractors::authentication::GuardedData; | ||||||
| use crate::extractors::sequential_extractor::SeqHandler; | use crate::extractors::sequential_extractor::SeqHandler; | ||||||
| use crate::search::{ | use crate::search::{ | ||||||
|     add_search_rules, perform_similar, SearchKind, SimilarQuery, SimilarResult, |     add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, SearchKind, | ||||||
|     DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, |     SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| pub fn configure(cfg: &mut web::ServiceConfig) { | pub fn configure(cfg: &mut web::ServiceConfig) { | ||||||
| @@ -42,9 +38,7 @@ pub async fn similar_get( | |||||||
| ) -> Result<HttpResponse, ResponseError> { | ) -> Result<HttpResponse, ResponseError> { | ||||||
|     let index_uid = IndexUid::try_from(index_uid.into_inner())?; |     let index_uid = IndexUid::try_from(index_uid.into_inner())?; | ||||||
|  |  | ||||||
|     let query = params.0.try_into().map_err(|code: InvalidSimilarId| { |     let query = params.0.try_into()?; | ||||||
|         ResponseError::from_msg(code.to_string(), code.error_code()) |  | ||||||
|     })?; |  | ||||||
|  |  | ||||||
|     let mut aggregate = SimilarAggregator::from_query(&query, &req); |     let mut aggregate = SimilarAggregator::from_query(&query, &req); | ||||||
|  |  | ||||||
| @@ -99,6 +93,8 @@ async fn similar( | |||||||
|  |  | ||||||
|     features.check_vector("Using the similar API")?; |     features.check_vector("Using the similar API")?; | ||||||
|  |  | ||||||
|  |     let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?; | ||||||
|  |  | ||||||
|     // Tenant token search_rules. |     // Tenant token search_rules. | ||||||
|     if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { |     if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { | ||||||
|         add_search_rules(&mut query.filter, search_rules); |         add_search_rules(&mut query.filter, search_rules); | ||||||
| @@ -109,7 +105,9 @@ async fn similar( | |||||||
|     let (embedder_name, embedder) = |     let (embedder_name, embedder) = | ||||||
|         SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?; |         SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?; | ||||||
|  |  | ||||||
|     tokio::task::spawn_blocking(move || perform_similar(&index, query, embedder_name, embedder)) |     tokio::task::spawn_blocking(move || { | ||||||
|  |         perform_similar(&index, query, embedder_name, embedder, retrieve_vectors) | ||||||
|  |     }) | ||||||
|     .await? |     .await? | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -124,18 +122,35 @@ pub struct SimilarQueryGet { | |||||||
|     limit: Param<usize>, |     limit: Param<usize>, | ||||||
|     #[deserr(default, error = DeserrQueryParamError<InvalidSimilarAttributesToRetrieve>)] |     #[deserr(default, error = DeserrQueryParamError<InvalidSimilarAttributesToRetrieve>)] | ||||||
|     attributes_to_retrieve: Option<CS<String>>, |     attributes_to_retrieve: Option<CS<String>>, | ||||||
|  |     #[deserr(default, error = DeserrQueryParamError<InvalidSimilarRetrieveVectors>)] | ||||||
|  |     retrieve_vectors: Param<bool>, | ||||||
|     #[deserr(default, error = DeserrQueryParamError<InvalidSimilarFilter>)] |     #[deserr(default, error = DeserrQueryParamError<InvalidSimilarFilter>)] | ||||||
|     filter: Option<String>, |     filter: Option<String>, | ||||||
|     #[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScore>)] |     #[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScore>)] | ||||||
|     show_ranking_score: Param<bool>, |     show_ranking_score: Param<bool>, | ||||||
|     #[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScoreDetails>)] |     #[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScoreDetails>)] | ||||||
|     show_ranking_score_details: Param<bool>, |     show_ranking_score_details: Param<bool>, | ||||||
|  |     #[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)] | ||||||
|  |     pub ranking_score_threshold: Option<RankingScoreThresholdGet>, | ||||||
|     #[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)] |     #[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)] | ||||||
|     pub embedder: Option<String>, |     pub embedder: Option<String>, | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)] | ||||||
|  | #[deserr(try_from(String) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)] | ||||||
|  | pub struct RankingScoreThresholdGet(RankingScoreThresholdSimilar); | ||||||
|  |  | ||||||
|  | impl std::convert::TryFrom<String> for RankingScoreThresholdGet { | ||||||
|  |     type Error = InvalidSimilarRankingScoreThreshold; | ||||||
|  |  | ||||||
|  |     fn try_from(s: String) -> Result<Self, Self::Error> { | ||||||
|  |         let f: f64 = s.parse().map_err(|_| InvalidSimilarRankingScoreThreshold)?; | ||||||
|  |         Ok(RankingScoreThresholdGet(RankingScoreThresholdSimilar::try_from(f)?)) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| impl TryFrom<SimilarQueryGet> for SimilarQuery { | impl TryFrom<SimilarQueryGet> for SimilarQuery { | ||||||
|     type Error = InvalidSimilarId; |     type Error = ResponseError; | ||||||
|  |  | ||||||
|     fn try_from( |     fn try_from( | ||||||
|         SimilarQueryGet { |         SimilarQueryGet { | ||||||
| @@ -143,10 +158,12 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery { | |||||||
|             offset, |             offset, | ||||||
|             limit, |             limit, | ||||||
|             attributes_to_retrieve, |             attributes_to_retrieve, | ||||||
|  |             retrieve_vectors, | ||||||
|             filter, |             filter, | ||||||
|             show_ranking_score, |             show_ranking_score, | ||||||
|             show_ranking_score_details, |             show_ranking_score_details, | ||||||
|             embedder, |             embedder, | ||||||
|  |             ranking_score_threshold, | ||||||
|         }: SimilarQueryGet, |         }: SimilarQueryGet, | ||||||
|     ) -> Result<Self, Self::Error> { |     ) -> Result<Self, Self::Error> { | ||||||
|         let filter = match filter { |         let filter = match filter { | ||||||
| @@ -158,14 +175,18 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery { | |||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         Ok(SimilarQuery { |         Ok(SimilarQuery { | ||||||
|             id: id.0.try_into()?, |             id: id.0.try_into().map_err(|code: InvalidSimilarId| { | ||||||
|  |                 ResponseError::from_msg(code.to_string(), code.error_code()) | ||||||
|  |             })?, | ||||||
|             offset: offset.0, |             offset: offset.0, | ||||||
|             limit: limit.0, |             limit: limit.0, | ||||||
|             filter, |             filter, | ||||||
|             embedder, |             embedder, | ||||||
|             attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()), |             attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()), | ||||||
|  |             retrieve_vectors: retrieve_vectors.0, | ||||||
|             show_ranking_score: show_ranking_score.0, |             show_ranking_score: show_ranking_score.0, | ||||||
|             show_ranking_score_details: show_ranking_score_details.0, |             show_ranking_score_details: show_ranking_score_details.0, | ||||||
|  |             ranking_score_threshold: ranking_score_threshold.map(|x| x.0), | ||||||
|         }) |         }) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -15,7 +15,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData}; | |||||||
| use crate::extractors::sequential_extractor::SeqHandler; | use crate::extractors::sequential_extractor::SeqHandler; | ||||||
| use crate::routes::indexes::search::search_kind; | use crate::routes::indexes::search::search_kind; | ||||||
| use crate::search::{ | use crate::search::{ | ||||||
|     add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex, |     add_search_rules, perform_search, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex, | ||||||
| }; | }; | ||||||
| use crate::search_queue::SearchQueue; | use crate::search_queue::SearchQueue; | ||||||
|  |  | ||||||
| @@ -83,9 +83,12 @@ pub async fn multi_search_with_post( | |||||||
|  |  | ||||||
|             let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features) |             let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features) | ||||||
|                 .with_index(query_index)?; |                 .with_index(query_index)?; | ||||||
|  |             let retrieve_vector = | ||||||
|  |                 RetrieveVectors::new(query.retrieve_vectors, features).with_index(query_index)?; | ||||||
|  |  | ||||||
|             let search_result = |             let search_result = tokio::task::spawn_blocking(move || { | ||||||
|                 tokio::task::spawn_blocking(move || perform_search(&index, query, search_kind)) |                 perform_search(&index, query, search_kind, retrieve_vector) | ||||||
|  |             }) | ||||||
|             .await |             .await | ||||||
|             .with_index(query_index)?; |             .with_index(query_index)?; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -15,6 +15,7 @@ use meilisearch_types::error::{Code, ResponseError}; | |||||||
| use meilisearch_types::heed::RoTxn; | use meilisearch_types::heed::RoTxn; | ||||||
| use meilisearch_types::index_uid::IndexUid; | use meilisearch_types::index_uid::IndexUid; | ||||||
| use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy}; | use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy}; | ||||||
|  | use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors; | ||||||
| use meilisearch_types::milli::vector::Embedder; | use meilisearch_types::milli::vector::Embedder; | ||||||
| use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget}; | use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget}; | ||||||
| use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; | use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; | ||||||
| @@ -59,6 +60,8 @@ pub struct SearchQuery { | |||||||
|     pub hits_per_page: Option<usize>, |     pub hits_per_page: Option<usize>, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)] |     #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)] | ||||||
|     pub attributes_to_retrieve: Option<BTreeSet<String>>, |     pub attributes_to_retrieve: Option<BTreeSet<String>>, | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidSearchRetrieveVectors>)] | ||||||
|  |     pub retrieve_vectors: bool, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)] |     #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)] | ||||||
|     pub attributes_to_crop: Option<Vec<String>>, |     pub attributes_to_crop: Option<Vec<String>>, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())] |     #[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())] | ||||||
| @@ -75,6 +78,8 @@ pub struct SearchQuery { | |||||||
|     pub filter: Option<Value>, |     pub filter: Option<Value>, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchSort>)] |     #[deserr(default, error = DeserrJsonError<InvalidSearchSort>)] | ||||||
|     pub sort: Option<Vec<String>>, |     pub sort: Option<Vec<String>>, | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidSearchDistinct>)] | ||||||
|  |     pub distinct: Option<String>, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)] |     #[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)] | ||||||
|     pub facets: Option<Vec<String>>, |     pub facets: Option<Vec<String>>, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())] |     #[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())] | ||||||
| @@ -87,6 +92,44 @@ pub struct SearchQuery { | |||||||
|     pub matching_strategy: MatchingStrategy, |     pub matching_strategy: MatchingStrategy, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)] |     #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)] | ||||||
|     pub attributes_to_search_on: Option<Vec<String>>, |     pub attributes_to_search_on: Option<Vec<String>>, | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)] | ||||||
|  |     pub ranking_score_threshold: Option<RankingScoreThreshold>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, Clone, Copy, PartialEq, Deserr)] | ||||||
|  | #[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)] | ||||||
|  | pub struct RankingScoreThreshold(f64); | ||||||
|  |  | ||||||
|  | impl std::convert::TryFrom<f64> for RankingScoreThreshold { | ||||||
|  |     type Error = InvalidSearchRankingScoreThreshold; | ||||||
|  |  | ||||||
|  |     fn try_from(f: f64) -> Result<Self, Self::Error> { | ||||||
|  |         // the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable | ||||||
|  |         #[allow(clippy::manual_range_contains)] | ||||||
|  |         if f > 1.0 || f < 0.0 { | ||||||
|  |             Err(InvalidSearchRankingScoreThreshold) | ||||||
|  |         } else { | ||||||
|  |             Ok(RankingScoreThreshold(f)) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, Clone, Copy, PartialEq, Deserr)] | ||||||
|  | #[deserr(try_from(f64) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)] | ||||||
|  | pub struct RankingScoreThresholdSimilar(f64); | ||||||
|  |  | ||||||
|  | impl std::convert::TryFrom<f64> for RankingScoreThresholdSimilar { | ||||||
|  |     type Error = InvalidSimilarRankingScoreThreshold; | ||||||
|  |  | ||||||
|  |     fn try_from(f: f64) -> Result<Self, Self::Error> { | ||||||
|  |         // the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable | ||||||
|  |         #[allow(clippy::manual_range_contains)] | ||||||
|  |         if f > 1.0 || f < 0.0 { | ||||||
|  |             Err(InvalidSimilarRankingScoreThreshold) | ||||||
|  |         } else { | ||||||
|  |             Ok(Self(f)) | ||||||
|  |         } | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| // Since this structure is logged A LOT we're going to reduce the number of things it logs to the bare minimum. | // Since this structure is logged A LOT we're going to reduce the number of things it logs to the bare minimum. | ||||||
| @@ -103,6 +146,7 @@ impl fmt::Debug for SearchQuery { | |||||||
|             page, |             page, | ||||||
|             hits_per_page, |             hits_per_page, | ||||||
|             attributes_to_retrieve, |             attributes_to_retrieve, | ||||||
|  |             retrieve_vectors, | ||||||
|             attributes_to_crop, |             attributes_to_crop, | ||||||
|             crop_length, |             crop_length, | ||||||
|             attributes_to_highlight, |             attributes_to_highlight, | ||||||
| @@ -111,12 +155,14 @@ impl fmt::Debug for SearchQuery { | |||||||
|             show_ranking_score_details, |             show_ranking_score_details, | ||||||
|             filter, |             filter, | ||||||
|             sort, |             sort, | ||||||
|  |             distinct, | ||||||
|             facets, |             facets, | ||||||
|             highlight_pre_tag, |             highlight_pre_tag, | ||||||
|             highlight_post_tag, |             highlight_post_tag, | ||||||
|             crop_marker, |             crop_marker, | ||||||
|             matching_strategy, |             matching_strategy, | ||||||
|             attributes_to_search_on, |             attributes_to_search_on, | ||||||
|  |             ranking_score_threshold, | ||||||
|         } = self; |         } = self; | ||||||
|  |  | ||||||
|         let mut debug = f.debug_struct("SearchQuery"); |         let mut debug = f.debug_struct("SearchQuery"); | ||||||
| @@ -134,6 +180,9 @@ impl fmt::Debug for SearchQuery { | |||||||
|         if let Some(q) = q { |         if let Some(q) = q { | ||||||
|             debug.field("q", &q); |             debug.field("q", &q); | ||||||
|         } |         } | ||||||
|  |         if *retrieve_vectors { | ||||||
|  |             debug.field("retrieve_vectors", &retrieve_vectors); | ||||||
|  |         } | ||||||
|         if let Some(v) = vector { |         if let Some(v) = vector { | ||||||
|             if v.len() < 10 { |             if v.len() < 10 { | ||||||
|                 debug.field("vector", &v); |                 debug.field("vector", &v); | ||||||
| @@ -156,6 +205,9 @@ impl fmt::Debug for SearchQuery { | |||||||
|         if let Some(sort) = sort { |         if let Some(sort) = sort { | ||||||
|             debug.field("sort", &sort); |             debug.field("sort", &sort); | ||||||
|         } |         } | ||||||
|  |         if let Some(distinct) = distinct { | ||||||
|  |             debug.field("distinct", &distinct); | ||||||
|  |         } | ||||||
|         if let Some(facets) = facets { |         if let Some(facets) = facets { | ||||||
|             debug.field("facets", &facets); |             debug.field("facets", &facets); | ||||||
|         } |         } | ||||||
| @@ -188,6 +240,9 @@ impl fmt::Debug for SearchQuery { | |||||||
|         debug.field("highlight_pre_tag", &highlight_pre_tag); |         debug.field("highlight_pre_tag", &highlight_pre_tag); | ||||||
|         debug.field("highlight_post_tag", &highlight_post_tag); |         debug.field("highlight_post_tag", &highlight_post_tag); | ||||||
|         debug.field("crop_marker", &crop_marker); |         debug.field("crop_marker", &crop_marker); | ||||||
|  |         if let Some(ranking_score_threshold) = ranking_score_threshold { | ||||||
|  |             debug.field("ranking_score_threshold", &ranking_score_threshold); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         debug.finish() |         debug.finish() | ||||||
|     } |     } | ||||||
| @@ -328,6 +383,8 @@ pub struct SearchQueryWithIndex { | |||||||
|     pub hits_per_page: Option<usize>, |     pub hits_per_page: Option<usize>, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)] |     #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)] | ||||||
|     pub attributes_to_retrieve: Option<BTreeSet<String>>, |     pub attributes_to_retrieve: Option<BTreeSet<String>>, | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidSearchRetrieveVectors>)] | ||||||
|  |     pub retrieve_vectors: bool, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)] |     #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)] | ||||||
|     pub attributes_to_crop: Option<Vec<String>>, |     pub attributes_to_crop: Option<Vec<String>>, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())] |     #[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())] | ||||||
| @@ -344,6 +401,8 @@ pub struct SearchQueryWithIndex { | |||||||
|     pub filter: Option<Value>, |     pub filter: Option<Value>, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchSort>)] |     #[deserr(default, error = DeserrJsonError<InvalidSearchSort>)] | ||||||
|     pub sort: Option<Vec<String>>, |     pub sort: Option<Vec<String>>, | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidSearchDistinct>)] | ||||||
|  |     pub distinct: Option<String>, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)] |     #[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)] | ||||||
|     pub facets: Option<Vec<String>>, |     pub facets: Option<Vec<String>>, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())] |     #[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())] | ||||||
| @@ -356,6 +415,8 @@ pub struct SearchQueryWithIndex { | |||||||
|     pub matching_strategy: MatchingStrategy, |     pub matching_strategy: MatchingStrategy, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)] |     #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)] | ||||||
|     pub attributes_to_search_on: Option<Vec<String>>, |     pub attributes_to_search_on: Option<Vec<String>>, | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)] | ||||||
|  |     pub ranking_score_threshold: Option<RankingScoreThreshold>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl SearchQueryWithIndex { | impl SearchQueryWithIndex { | ||||||
| @@ -369,6 +430,7 @@ impl SearchQueryWithIndex { | |||||||
|             page, |             page, | ||||||
|             hits_per_page, |             hits_per_page, | ||||||
|             attributes_to_retrieve, |             attributes_to_retrieve, | ||||||
|  |             retrieve_vectors, | ||||||
|             attributes_to_crop, |             attributes_to_crop, | ||||||
|             crop_length, |             crop_length, | ||||||
|             attributes_to_highlight, |             attributes_to_highlight, | ||||||
| @@ -377,6 +439,7 @@ impl SearchQueryWithIndex { | |||||||
|             show_matches_position, |             show_matches_position, | ||||||
|             filter, |             filter, | ||||||
|             sort, |             sort, | ||||||
|  |             distinct, | ||||||
|             facets, |             facets, | ||||||
|             highlight_pre_tag, |             highlight_pre_tag, | ||||||
|             highlight_post_tag, |             highlight_post_tag, | ||||||
| @@ -384,6 +447,7 @@ impl SearchQueryWithIndex { | |||||||
|             matching_strategy, |             matching_strategy, | ||||||
|             attributes_to_search_on, |             attributes_to_search_on, | ||||||
|             hybrid, |             hybrid, | ||||||
|  |             ranking_score_threshold, | ||||||
|         } = self; |         } = self; | ||||||
|         ( |         ( | ||||||
|             index_uid, |             index_uid, | ||||||
| @@ -395,6 +459,7 @@ impl SearchQueryWithIndex { | |||||||
|                 page, |                 page, | ||||||
|                 hits_per_page, |                 hits_per_page, | ||||||
|                 attributes_to_retrieve, |                 attributes_to_retrieve, | ||||||
|  |                 retrieve_vectors, | ||||||
|                 attributes_to_crop, |                 attributes_to_crop, | ||||||
|                 crop_length, |                 crop_length, | ||||||
|                 attributes_to_highlight, |                 attributes_to_highlight, | ||||||
| @@ -403,6 +468,7 @@ impl SearchQueryWithIndex { | |||||||
|                 show_matches_position, |                 show_matches_position, | ||||||
|                 filter, |                 filter, | ||||||
|                 sort, |                 sort, | ||||||
|  |                 distinct, | ||||||
|                 facets, |                 facets, | ||||||
|                 highlight_pre_tag, |                 highlight_pre_tag, | ||||||
|                 highlight_post_tag, |                 highlight_post_tag, | ||||||
| @@ -410,6 +476,7 @@ impl SearchQueryWithIndex { | |||||||
|                 matching_strategy, |                 matching_strategy, | ||||||
|                 attributes_to_search_on, |                 attributes_to_search_on, | ||||||
|                 hybrid, |                 hybrid, | ||||||
|  |                 ranking_score_threshold, | ||||||
|                 // do not use ..Default::default() here, |                 // do not use ..Default::default() here, | ||||||
|                 // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex` |                 // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex` | ||||||
|             }, |             }, | ||||||
| @@ -432,10 +499,14 @@ pub struct SimilarQuery { | |||||||
|     pub embedder: Option<String>, |     pub embedder: Option<String>, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)] |     #[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)] | ||||||
|     pub attributes_to_retrieve: Option<BTreeSet<String>>, |     pub attributes_to_retrieve: Option<BTreeSet<String>>, | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidSimilarRetrieveVectors>)] | ||||||
|  |     pub retrieve_vectors: bool, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScore>, default)] |     #[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScore>, default)] | ||||||
|     pub show_ranking_score: bool, |     pub show_ranking_score: bool, | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScoreDetails>, default)] |     #[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScoreDetails>, default)] | ||||||
|     pub show_ranking_score_details: bool, |     pub show_ranking_score_details: bool, | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidSimilarRankingScoreThreshold>, default)] | ||||||
|  |     pub ranking_score_threshold: Option<RankingScoreThresholdSimilar>, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, PartialEq, Deserr)] | #[derive(Debug, Clone, PartialEq, Deserr)] | ||||||
| @@ -664,6 +735,13 @@ fn prepare_search<'t>( | |||||||
| ) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> { | ) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> { | ||||||
|     let mut search = index.search(rtxn); |     let mut search = index.search(rtxn); | ||||||
|     search.time_budget(time_budget); |     search.time_budget(time_budget); | ||||||
|  |     if let Some(ranking_score_threshold) = query.ranking_score_threshold { | ||||||
|  |         search.ranking_score_threshold(ranking_score_threshold.0); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     if let Some(distinct) = &query.distinct { | ||||||
|  |         search.distinct(distinct.clone()); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     match search_kind { |     match search_kind { | ||||||
|         SearchKind::KeywordOnly => { |         SearchKind::KeywordOnly => { | ||||||
| @@ -705,11 +783,16 @@ fn prepare_search<'t>( | |||||||
|         .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); |         .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); | ||||||
|  |  | ||||||
|     search.exhaustive_number_hits(is_finite_pagination); |     search.exhaustive_number_hits(is_finite_pagination); | ||||||
|     search.scoring_strategy(if query.show_ranking_score || query.show_ranking_score_details { |     search.scoring_strategy( | ||||||
|  |         if query.show_ranking_score | ||||||
|  |             || query.show_ranking_score_details | ||||||
|  |             || query.ranking_score_threshold.is_some() | ||||||
|  |         { | ||||||
|             ScoringStrategy::Detailed |             ScoringStrategy::Detailed | ||||||
|         } else { |         } else { | ||||||
|             ScoringStrategy::Skip |             ScoringStrategy::Skip | ||||||
|     }); |         }, | ||||||
|  |     ); | ||||||
|  |  | ||||||
|     // compute the offset on the limit depending on the pagination mode. |     // compute the offset on the limit depending on the pagination mode. | ||||||
|     let (offset, limit) = if is_finite_pagination { |     let (offset, limit) = if is_finite_pagination { | ||||||
| @@ -754,6 +837,7 @@ pub fn perform_search( | |||||||
|     index: &Index, |     index: &Index, | ||||||
|     query: SearchQuery, |     query: SearchQuery, | ||||||
|     search_kind: SearchKind, |     search_kind: SearchKind, | ||||||
|  |     retrieve_vectors: RetrieveVectors, | ||||||
| ) -> Result<SearchResult, MeilisearchHttpError> { | ) -> Result<SearchResult, MeilisearchHttpError> { | ||||||
|     let before_search = Instant::now(); |     let before_search = Instant::now(); | ||||||
|     let rtxn = index.read_txn()?; |     let rtxn = index.read_txn()?; | ||||||
| @@ -787,32 +871,37 @@ pub fn perform_search( | |||||||
|  |  | ||||||
|     let SearchQuery { |     let SearchQuery { | ||||||
|         q, |         q, | ||||||
|         vector: _, |  | ||||||
|         hybrid: _, |  | ||||||
|         // already computed from prepare_search |  | ||||||
|         offset: _, |  | ||||||
|         limit, |         limit, | ||||||
|         page, |         page, | ||||||
|         hits_per_page, |         hits_per_page, | ||||||
|         attributes_to_retrieve, |         attributes_to_retrieve, | ||||||
|  |         // use the enum passed as parameter | ||||||
|  |         retrieve_vectors: _, | ||||||
|         attributes_to_crop, |         attributes_to_crop, | ||||||
|         crop_length, |         crop_length, | ||||||
|         attributes_to_highlight, |         attributes_to_highlight, | ||||||
|         show_matches_position, |         show_matches_position, | ||||||
|         show_ranking_score, |         show_ranking_score, | ||||||
|         show_ranking_score_details, |         show_ranking_score_details, | ||||||
|         filter: _, |  | ||||||
|         sort, |         sort, | ||||||
|         facets, |         facets, | ||||||
|         highlight_pre_tag, |         highlight_pre_tag, | ||||||
|         highlight_post_tag, |         highlight_post_tag, | ||||||
|         crop_marker, |         crop_marker, | ||||||
|  |         // already used in prepare_search | ||||||
|  |         vector: _, | ||||||
|  |         hybrid: _, | ||||||
|  |         offset: _, | ||||||
|  |         ranking_score_threshold: _, | ||||||
|         matching_strategy: _, |         matching_strategy: _, | ||||||
|         attributes_to_search_on: _, |         attributes_to_search_on: _, | ||||||
|  |         filter: _, | ||||||
|  |         distinct: _, | ||||||
|     } = query; |     } = query; | ||||||
|  |  | ||||||
|     let format = AttributesFormat { |     let format = AttributesFormat { | ||||||
|         attributes_to_retrieve, |         attributes_to_retrieve, | ||||||
|  |         retrieve_vectors, | ||||||
|         attributes_to_highlight, |         attributes_to_highlight, | ||||||
|         attributes_to_crop, |         attributes_to_crop, | ||||||
|         crop_length, |         crop_length, | ||||||
| @@ -896,6 +985,7 @@ pub fn perform_search( | |||||||
|  |  | ||||||
| struct AttributesFormat { | struct AttributesFormat { | ||||||
|     attributes_to_retrieve: Option<BTreeSet<String>>, |     attributes_to_retrieve: Option<BTreeSet<String>>, | ||||||
|  |     retrieve_vectors: RetrieveVectors, | ||||||
|     attributes_to_highlight: Option<HashSet<String>>, |     attributes_to_highlight: Option<HashSet<String>>, | ||||||
|     attributes_to_crop: Option<Vec<String>>, |     attributes_to_crop: Option<Vec<String>>, | ||||||
|     crop_length: usize, |     crop_length: usize, | ||||||
| @@ -908,6 +998,36 @@ struct AttributesFormat { | |||||||
|     show_ranking_score_details: bool, |     show_ranking_score_details: bool, | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, Clone, Copy, PartialEq, Eq)] | ||||||
|  | pub enum RetrieveVectors { | ||||||
|  |     /// Do not touch the `_vectors` field | ||||||
|  |     /// | ||||||
|  |     /// this is the behavior when the vectorStore feature is disabled | ||||||
|  |     Ignore, | ||||||
|  |     /// Remove the `_vectors` field | ||||||
|  |     /// | ||||||
|  |     /// this is the behavior when the vectorStore feature is enabled, and `retrieveVectors` is `false` | ||||||
|  |     Hide, | ||||||
|  |     /// Retrieve vectors from the DB and merge them into the `_vectors` field | ||||||
|  |     /// | ||||||
|  |     /// this is the behavior when the vectorStore feature is enabled, and `retrieveVectors` is `true` | ||||||
|  |     Retrieve, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl RetrieveVectors { | ||||||
|  |     pub fn new( | ||||||
|  |         retrieve_vector: bool, | ||||||
|  |         features: index_scheduler::RoFeatures, | ||||||
|  |     ) -> Result<Self, index_scheduler::Error> { | ||||||
|  |         match (retrieve_vector, features.check_vector("Passing `retrieveVectors` as a parameter")) { | ||||||
|  |             (true, Ok(())) => Ok(Self::Retrieve), | ||||||
|  |             (true, Err(error)) => Err(error), | ||||||
|  |             (false, Ok(())) => Ok(Self::Hide), | ||||||
|  |             (false, Err(_)) => Ok(Self::Ignore), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| fn make_hits( | fn make_hits( | ||||||
|     index: &Index, |     index: &Index, | ||||||
|     rtxn: &RoTxn<'_>, |     rtxn: &RoTxn<'_>, | ||||||
| @@ -917,10 +1037,32 @@ fn make_hits( | |||||||
|     document_scores: Vec<Vec<ScoreDetails>>, |     document_scores: Vec<Vec<ScoreDetails>>, | ||||||
| ) -> Result<Vec<SearchHit>, MeilisearchHttpError> { | ) -> Result<Vec<SearchHit>, MeilisearchHttpError> { | ||||||
|     let fields_ids_map = index.fields_ids_map(rtxn).unwrap(); |     let fields_ids_map = index.fields_ids_map(rtxn).unwrap(); | ||||||
|     let displayed_ids = index |     let displayed_ids = | ||||||
|         .displayed_fields_ids(rtxn)? |         index.displayed_fields_ids(rtxn)?.map(|fields| fields.into_iter().collect::<BTreeSet<_>>()); | ||||||
|         .map(|fields| fields.into_iter().collect::<BTreeSet<_>>()) |  | ||||||
|         .unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect()); |     let vectors_fid = fields_ids_map.id(milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME); | ||||||
|  |  | ||||||
|  |     let vectors_is_hidden = match (&displayed_ids, vectors_fid) { | ||||||
|  |         // displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid | ||||||
|  |         (None, _) => false, | ||||||
|  |         // displayed_ids is a finite list, and `_vectors` cannot be part of it because it is not an existing field | ||||||
|  |         (Some(_), None) => true, | ||||||
|  |         // displayed_ids is a finit list, so hide if `_vectors` is not part of it | ||||||
|  |         (Some(map), Some(vectors_fid)) => map.contains(&vectors_fid), | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     let retrieve_vectors = if let RetrieveVectors::Retrieve = format.retrieve_vectors { | ||||||
|  |         if vectors_is_hidden { | ||||||
|  |             RetrieveVectors::Hide | ||||||
|  |         } else { | ||||||
|  |             RetrieveVectors::Retrieve | ||||||
|  |         } | ||||||
|  |     } else { | ||||||
|  |         format.retrieve_vectors | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     let displayed_ids = | ||||||
|  |         displayed_ids.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect()); | ||||||
|     let fids = |attrs: &BTreeSet<String>| { |     let fids = |attrs: &BTreeSet<String>| { | ||||||
|         let mut ids = BTreeSet::new(); |         let mut ids = BTreeSet::new(); | ||||||
|         for attr in attrs { |         for attr in attrs { | ||||||
| @@ -943,6 +1085,7 @@ fn make_hits( | |||||||
|         .intersection(&displayed_ids) |         .intersection(&displayed_ids) | ||||||
|         .cloned() |         .cloned() | ||||||
|         .collect(); |         .collect(); | ||||||
|  |  | ||||||
|     let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default(); |     let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default(); | ||||||
|     let attr_to_crop = format.attributes_to_crop.unwrap_or_default(); |     let attr_to_crop = format.attributes_to_crop.unwrap_or_default(); | ||||||
|     let formatted_options = compute_formatted_options( |     let formatted_options = compute_formatted_options( | ||||||
| @@ -976,18 +1119,48 @@ fn make_hits( | |||||||
|     formatter_builder.highlight_prefix(format.highlight_pre_tag); |     formatter_builder.highlight_prefix(format.highlight_pre_tag); | ||||||
|     formatter_builder.highlight_suffix(format.highlight_post_tag); |     formatter_builder.highlight_suffix(format.highlight_post_tag); | ||||||
|     let mut documents = Vec::new(); |     let mut documents = Vec::new(); | ||||||
|  |     let embedding_configs = index.embedding_configs(rtxn)?; | ||||||
|     let documents_iter = index.documents(rtxn, documents_ids)?; |     let documents_iter = index.documents(rtxn, documents_ids)?; | ||||||
|     for ((_id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) { |     for ((id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) { | ||||||
|         // First generate a document with all the displayed fields |         // First generate a document with all the displayed fields | ||||||
|         let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?; |         let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?; | ||||||
|  |  | ||||||
|  |         let add_vectors_fid = | ||||||
|  |             vectors_fid.filter(|_fid| retrieve_vectors == RetrieveVectors::Retrieve); | ||||||
|  |  | ||||||
|         // select the attributes to retrieve |         // select the attributes to retrieve | ||||||
|         let attributes_to_retrieve = to_retrieve_ids |         let attributes_to_retrieve = to_retrieve_ids | ||||||
|             .iter() |             .iter() | ||||||
|  |             // skip the vectors_fid if RetrieveVectors::Hide | ||||||
|  |             .filter(|fid| match vectors_fid { | ||||||
|  |                 Some(vectors_fid) => { | ||||||
|  |                     !(retrieve_vectors == RetrieveVectors::Hide && **fid == vectors_fid) | ||||||
|  |                 } | ||||||
|  |                 None => true, | ||||||
|  |             }) | ||||||
|  |             // need to retrieve the existing `_vectors` field if the `RetrieveVectors::Retrieve` | ||||||
|  |             .chain(add_vectors_fid.iter()) | ||||||
|             .map(|&fid| fields_ids_map.name(fid).expect("Missing field name")); |             .map(|&fid| fields_ids_map.name(fid).expect("Missing field name")); | ||||||
|         let mut document = |         let mut document = | ||||||
|             permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve); |             permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve); | ||||||
|  |  | ||||||
|  |         if retrieve_vectors == RetrieveVectors::Retrieve { | ||||||
|  |             let mut vectors = match document.remove("_vectors") { | ||||||
|  |                 Some(Value::Object(map)) => map, | ||||||
|  |                 _ => Default::default(), | ||||||
|  |             }; | ||||||
|  |             for (name, vector) in index.embeddings(rtxn, id)? { | ||||||
|  |                 let user_provided = embedding_configs | ||||||
|  |                     .iter() | ||||||
|  |                     .find(|conf| conf.name == name) | ||||||
|  |                     .is_some_and(|conf| conf.user_provided.contains(id)); | ||||||
|  |                 let embeddings = | ||||||
|  |                     ExplicitVectors { embeddings: Some(vector.into()), regenerate: !user_provided }; | ||||||
|  |                 vectors.insert(name, serde_json::to_value(embeddings)?); | ||||||
|  |             } | ||||||
|  |             document.insert("_vectors".into(), vectors.into()); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         let (matches_position, formatted) = format_fields( |         let (matches_position, formatted) = format_fields( | ||||||
|             &displayed_document, |             &displayed_document, | ||||||
|             &fields_ids_map, |             &fields_ids_map, | ||||||
| @@ -1057,6 +1230,7 @@ pub fn perform_similar( | |||||||
|     query: SimilarQuery, |     query: SimilarQuery, | ||||||
|     embedder_name: String, |     embedder_name: String, | ||||||
|     embedder: Arc<Embedder>, |     embedder: Arc<Embedder>, | ||||||
|  |     retrieve_vectors: RetrieveVectors, | ||||||
| ) -> Result<SimilarResult, ResponseError> { | ) -> Result<SimilarResult, ResponseError> { | ||||||
|     let before_search = Instant::now(); |     let before_search = Instant::now(); | ||||||
|     let rtxn = index.read_txn()?; |     let rtxn = index.read_txn()?; | ||||||
| @@ -1068,8 +1242,10 @@ pub fn perform_similar( | |||||||
|         filter: _, |         filter: _, | ||||||
|         embedder: _, |         embedder: _, | ||||||
|         attributes_to_retrieve, |         attributes_to_retrieve, | ||||||
|  |         retrieve_vectors: _, | ||||||
|         show_ranking_score, |         show_ranking_score, | ||||||
|         show_ranking_score_details, |         show_ranking_score_details, | ||||||
|  |         ranking_score_threshold, | ||||||
|     } = query; |     } = query; | ||||||
|  |  | ||||||
|     // using let-else rather than `?` so that the borrow checker identifies we're always returning here, |     // using let-else rather than `?` so that the borrow checker identifies we're always returning here, | ||||||
| @@ -1093,6 +1269,10 @@ pub fn perform_similar( | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     if let Some(ranking_score_threshold) = ranking_score_threshold { | ||||||
|  |         similar.ranking_score_threshold(ranking_score_threshold.0); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     let milli::SearchResult { |     let milli::SearchResult { | ||||||
|         documents_ids, |         documents_ids, | ||||||
|         matching_words: _, |         matching_words: _, | ||||||
| @@ -1109,6 +1289,7 @@ pub fn perform_similar( | |||||||
|  |  | ||||||
|     let format = AttributesFormat { |     let format = AttributesFormat { | ||||||
|         attributes_to_retrieve, |         attributes_to_retrieve, | ||||||
|  |         retrieve_vectors, | ||||||
|         attributes_to_highlight: None, |         attributes_to_highlight: None, | ||||||
|         attributes_to_crop: None, |         attributes_to_crop: None, | ||||||
|         crop_length: DEFAULT_CROP_LENGTH(), |         crop_length: DEFAULT_CROP_LENGTH(), | ||||||
|   | |||||||
| @@ -40,8 +40,9 @@ pub struct Permit { | |||||||
|  |  | ||||||
| impl Drop for Permit { | impl Drop for Permit { | ||||||
|     fn drop(&mut self) { |     fn drop(&mut self) { | ||||||
|  |         let sender = self.sender.clone(); | ||||||
|         // if the channel is closed then the whole instance is down |         // if the channel is closed then the whole instance is down | ||||||
|         let _ = futures::executor::block_on(self.sender.send(())); |         std::mem::drop(tokio::spawn(async move { sender.send(()).await })); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -182,14 +182,10 @@ impl Index<'_> { | |||||||
|         self.service.get(url).await |         self.service.get(url).await | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub async fn get_document( |     pub async fn get_document(&self, id: u64, options: Option<Value>) -> (Value, StatusCode) { | ||||||
|         &self, |  | ||||||
|         id: u64, |  | ||||||
|         options: Option<GetDocumentOptions>, |  | ||||||
|     ) -> (Value, StatusCode) { |  | ||||||
|         let mut url = format!("/indexes/{}/documents/{}", urlencode(self.uid.as_ref()), id); |         let mut url = format!("/indexes/{}/documents/{}", urlencode(self.uid.as_ref()), id); | ||||||
|         if let Some(fields) = options.and_then(|o| o.fields) { |         if let Some(options) = options { | ||||||
|             let _ = write!(url, "?fields={}", fields.join(",")); |             write!(url, "{}", yaup::to_string(&options).unwrap()).unwrap(); | ||||||
|         } |         } | ||||||
|         self.service.get(url).await |         self.service.get(url).await | ||||||
|     } |     } | ||||||
| @@ -205,18 +201,11 @@ impl Index<'_> { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub async fn get_all_documents(&self, options: GetAllDocumentsOptions) -> (Value, StatusCode) { |     pub async fn get_all_documents(&self, options: GetAllDocumentsOptions) -> (Value, StatusCode) { | ||||||
|         let mut url = format!("/indexes/{}/documents?", urlencode(self.uid.as_ref())); |         let url = format!( | ||||||
|         if let Some(limit) = options.limit { |             "/indexes/{}/documents{}", | ||||||
|             let _ = write!(url, "limit={}&", limit); |             urlencode(self.uid.as_ref()), | ||||||
|         } |             yaup::to_string(&options).unwrap() | ||||||
|  |         ); | ||||||
|         if let Some(offset) = options.offset { |  | ||||||
|             let _ = write!(url, "offset={}&", offset); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if let Some(attributes_to_retrieve) = options.attributes_to_retrieve { |  | ||||||
|             let _ = write!(url, "fields={}&", attributes_to_retrieve.join(",")); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         self.service.get(url).await |         self.service.get(url).await | ||||||
|     } |     } | ||||||
| @@ -435,13 +424,14 @@ impl Index<'_> { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| pub struct GetDocumentOptions { | #[derive(Debug, Default, serde::Serialize)] | ||||||
|     pub fields: Option<Vec<&'static str>>, | #[serde(rename_all = "camelCase")] | ||||||
| } |  | ||||||
|  |  | ||||||
| #[derive(Debug, Default)] |  | ||||||
| pub struct GetAllDocumentsOptions { | pub struct GetAllDocumentsOptions { | ||||||
|  |     #[serde(skip_serializing_if = "Option::is_none")] | ||||||
|     pub limit: Option<usize>, |     pub limit: Option<usize>, | ||||||
|  |     #[serde(skip_serializing_if = "Option::is_none")] | ||||||
|     pub offset: Option<usize>, |     pub offset: Option<usize>, | ||||||
|     pub attributes_to_retrieve: Option<Vec<&'static str>>, |     #[serde(skip_serializing_if = "Option::is_none")] | ||||||
|  |     pub fields: Option<Vec<&'static str>>, | ||||||
|  |     pub retrieve_vectors: bool, | ||||||
| } | } | ||||||
|   | |||||||
| @@ -6,7 +6,7 @@ pub mod service; | |||||||
| use std::fmt::{self, Display}; | use std::fmt::{self, Display}; | ||||||
|  |  | ||||||
| #[allow(unused)] | #[allow(unused)] | ||||||
| pub use index::{GetAllDocumentsOptions, GetDocumentOptions}; | pub use index::GetAllDocumentsOptions; | ||||||
| use meili_snap::json_string; | use meili_snap::json_string; | ||||||
| use serde::{Deserialize, Serialize}; | use serde::{Deserialize, Serialize}; | ||||||
| #[allow(unused)] | #[allow(unused)] | ||||||
| @@ -71,7 +71,7 @@ impl Display for Value { | |||||||
|         write!( |         write!( | ||||||
|             f, |             f, | ||||||
|             "{}", |             "{}", | ||||||
|             json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }) |             json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]", ".processingTimeMs" => "[duration]" }) | ||||||
|         ) |         ) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -719,7 +719,7 @@ async fn fetch_document_by_filter() { | |||||||
|  |  | ||||||
|     let (response, code) = index.get_document_by_filter(json!(null)).await; |     let (response, code) = index.get_document_by_filter(json!(null)).await; | ||||||
|     snapshot!(code, @"400 Bad Request"); |     snapshot!(code, @"400 Bad Request"); | ||||||
|     snapshot!(json_string!(response), @r###" |     snapshot!(response, @r###" | ||||||
|     { |     { | ||||||
|       "message": "Invalid value type: expected an object, but found null", |       "message": "Invalid value type: expected an object, but found null", | ||||||
|       "code": "bad_request", |       "code": "bad_request", | ||||||
| @@ -730,7 +730,7 @@ async fn fetch_document_by_filter() { | |||||||
|  |  | ||||||
|     let (response, code) = index.get_document_by_filter(json!({ "offset": "doggo" })).await; |     let (response, code) = index.get_document_by_filter(json!({ "offset": "doggo" })).await; | ||||||
|     snapshot!(code, @"400 Bad Request"); |     snapshot!(code, @"400 Bad Request"); | ||||||
|     snapshot!(json_string!(response), @r###" |     snapshot!(response, @r###" | ||||||
|     { |     { | ||||||
|       "message": "Invalid value type at `.offset`: expected a positive integer, but found a string: `\"doggo\"`", |       "message": "Invalid value type at `.offset`: expected a positive integer, but found a string: `\"doggo\"`", | ||||||
|       "code": "invalid_document_offset", |       "code": "invalid_document_offset", | ||||||
| @@ -741,7 +741,7 @@ async fn fetch_document_by_filter() { | |||||||
|  |  | ||||||
|     let (response, code) = index.get_document_by_filter(json!({ "limit": "doggo" })).await; |     let (response, code) = index.get_document_by_filter(json!({ "limit": "doggo" })).await; | ||||||
|     snapshot!(code, @"400 Bad Request"); |     snapshot!(code, @"400 Bad Request"); | ||||||
|     snapshot!(json_string!(response), @r###" |     snapshot!(response, @r###" | ||||||
|     { |     { | ||||||
|       "message": "Invalid value type at `.limit`: expected a positive integer, but found a string: `\"doggo\"`", |       "message": "Invalid value type at `.limit`: expected a positive integer, but found a string: `\"doggo\"`", | ||||||
|       "code": "invalid_document_limit", |       "code": "invalid_document_limit", | ||||||
| @@ -752,7 +752,7 @@ async fn fetch_document_by_filter() { | |||||||
|  |  | ||||||
|     let (response, code) = index.get_document_by_filter(json!({ "fields": "doggo" })).await; |     let (response, code) = index.get_document_by_filter(json!({ "fields": "doggo" })).await; | ||||||
|     snapshot!(code, @"400 Bad Request"); |     snapshot!(code, @"400 Bad Request"); | ||||||
|     snapshot!(json_string!(response), @r###" |     snapshot!(response, @r###" | ||||||
|     { |     { | ||||||
|       "message": "Invalid value type at `.fields`: expected an array, but found a string: `\"doggo\"`", |       "message": "Invalid value type at `.fields`: expected an array, but found a string: `\"doggo\"`", | ||||||
|       "code": "invalid_document_fields", |       "code": "invalid_document_fields", | ||||||
| @@ -763,7 +763,7 @@ async fn fetch_document_by_filter() { | |||||||
|  |  | ||||||
|     let (response, code) = index.get_document_by_filter(json!({ "filter": true })).await; |     let (response, code) = index.get_document_by_filter(json!({ "filter": true })).await; | ||||||
|     snapshot!(code, @"400 Bad Request"); |     snapshot!(code, @"400 Bad Request"); | ||||||
|     snapshot!(json_string!(response), @r###" |     snapshot!(response, @r###" | ||||||
|     { |     { | ||||||
|       "message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.", |       "message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.", | ||||||
|       "code": "invalid_document_filter", |       "code": "invalid_document_filter", | ||||||
| @@ -774,7 +774,7 @@ async fn fetch_document_by_filter() { | |||||||
|  |  | ||||||
|     let (response, code) = index.get_document_by_filter(json!({ "filter": "cool doggo" })).await; |     let (response, code) = index.get_document_by_filter(json!({ "filter": "cool doggo" })).await; | ||||||
|     snapshot!(code, @"400 Bad Request"); |     snapshot!(code, @"400 Bad Request"); | ||||||
|     snapshot!(json_string!(response), @r###" |     snapshot!(response, @r###" | ||||||
|     { |     { | ||||||
|       "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo", |       "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo", | ||||||
|       "code": "invalid_document_filter", |       "code": "invalid_document_filter", | ||||||
| @@ -786,7 +786,7 @@ async fn fetch_document_by_filter() { | |||||||
|     let (response, code) = |     let (response, code) = | ||||||
|         index.get_document_by_filter(json!({ "filter": "doggo = bernese" })).await; |         index.get_document_by_filter(json!({ "filter": "doggo = bernese" })).await; | ||||||
|     snapshot!(code, @"400 Bad Request"); |     snapshot!(code, @"400 Bad Request"); | ||||||
|     snapshot!(json_string!(response), @r###" |     snapshot!(response, @r###" | ||||||
|     { |     { | ||||||
|       "message": "Attribute `doggo` is not filterable. Available filterable attributes are: `color`.\n1:6 doggo = bernese", |       "message": "Attribute `doggo` is not filterable. Available filterable attributes are: `color`.\n1:6 doggo = bernese", | ||||||
|       "code": "invalid_document_filter", |       "code": "invalid_document_filter", | ||||||
| @@ -795,3 +795,70 @@ async fn fetch_document_by_filter() { | |||||||
|     } |     } | ||||||
|     "###); |     "###); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn retrieve_vectors() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("doggo"); | ||||||
|  |  | ||||||
|  |     // GET ALL DOCUMENTS BY QUERY | ||||||
|  |     let (response, _code) = index.get_all_documents_raw("?retrieveVectors=tamo").await; | ||||||
|  |     snapshot!(response, @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value in parameter `retrieveVectors`: could not parse `tamo` as a boolean, expected either `true` or `false`", | ||||||
|  |       "code": "invalid_document_retrieve_vectors", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_document_retrieve_vectors" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |     let (response, _code) = index.get_all_documents_raw("?retrieveVectors=true").await; | ||||||
|  |     snapshot!(response, @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677", | ||||||
|  |       "code": "feature_not_enabled", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#feature_not_enabled" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // FETCH ALL DOCUMENTS BY POST | ||||||
|  |     let (response, _code) = | ||||||
|  |         index.get_document_by_filter(json!({ "retrieveVectors": "tamo" })).await; | ||||||
|  |     snapshot!(response, @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"tamo\"`", | ||||||
|  |       "code": "invalid_document_retrieve_vectors", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_document_retrieve_vectors" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |     let (response, _code) = index.get_document_by_filter(json!({ "retrieveVectors": true })).await; | ||||||
|  |     snapshot!(response, @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677", | ||||||
|  |       "code": "feature_not_enabled", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#feature_not_enabled" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // GET A SINGLE DOCUMENT | ||||||
|  |     let (response, _code) = index.get_document(0, Some(json!({"retrieveVectors": "tamo"}))).await; | ||||||
|  |     snapshot!(response, @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value in parameter `retrieveVectors`: could not parse `tamo` as a boolean, expected either `true` or `false`", | ||||||
|  |       "code": "invalid_document_retrieve_vectors", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_document_retrieve_vectors" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |     let (response, _code) = index.get_document(0, Some(json!({"retrieveVectors": true}))).await; | ||||||
|  |     snapshot!(response, @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677", | ||||||
|  |       "code": "feature_not_enabled", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#feature_not_enabled" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|   | |||||||
| @@ -4,7 +4,7 @@ use meili_snap::*; | |||||||
| use urlencoding::encode as urlencode; | use urlencoding::encode as urlencode; | ||||||
|  |  | ||||||
| use crate::common::encoder::Encoder; | use crate::common::encoder::Encoder; | ||||||
| use crate::common::{GetAllDocumentsOptions, GetDocumentOptions, Server, Value}; | use crate::common::{GetAllDocumentsOptions, Server, Value}; | ||||||
| use crate::json; | use crate::json; | ||||||
|  |  | ||||||
| // TODO: partial test since we are testing error, amd error is not yet fully implemented in | // TODO: partial test since we are testing error, amd error is not yet fully implemented in | ||||||
| @@ -59,8 +59,7 @@ async fn get_document() { | |||||||
|         }) |         }) | ||||||
|     ); |     ); | ||||||
|  |  | ||||||
|     let (response, code) = |     let (response, code) = index.get_document(0, Some(json!({ "fields": ["id"] }))).await; | ||||||
|         index.get_document(0, Some(GetDocumentOptions { fields: Some(vec!["id"]) })).await; |  | ||||||
|     assert_eq!(code, 200); |     assert_eq!(code, 200); | ||||||
|     assert_eq!( |     assert_eq!( | ||||||
|         response, |         response, | ||||||
| @@ -69,9 +68,8 @@ async fn get_document() { | |||||||
|         }) |         }) | ||||||
|     ); |     ); | ||||||
|  |  | ||||||
|     let (response, code) = index |     let (response, code) = | ||||||
|         .get_document(0, Some(GetDocumentOptions { fields: Some(vec!["nested.content"]) })) |         index.get_document(0, Some(json!({ "fields": ["nested.content"] }))).await; | ||||||
|         .await; |  | ||||||
|     assert_eq!(code, 200); |     assert_eq!(code, 200); | ||||||
|     assert_eq!( |     assert_eq!( | ||||||
|         response, |         response, | ||||||
| @@ -211,7 +209,7 @@ async fn test_get_all_documents_attributes_to_retrieve() { | |||||||
|  |  | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .get_all_documents(GetAllDocumentsOptions { |         .get_all_documents(GetAllDocumentsOptions { | ||||||
|             attributes_to_retrieve: Some(vec!["name"]), |             fields: Some(vec!["name"]), | ||||||
|             ..Default::default() |             ..Default::default() | ||||||
|         }) |         }) | ||||||
|         .await; |         .await; | ||||||
| @@ -225,9 +223,19 @@ async fn test_get_all_documents_attributes_to_retrieve() { | |||||||
|     assert_eq!(response["limit"], json!(20)); |     assert_eq!(response["limit"], json!(20)); | ||||||
|     assert_eq!(response["total"], json!(77)); |     assert_eq!(response["total"], json!(77)); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.get_all_documents_raw("?fields=").await; | ||||||
|  |     assert_eq!(code, 200); | ||||||
|  |     assert_eq!(response["results"].as_array().unwrap().len(), 20); | ||||||
|  |     for results in response["results"].as_array().unwrap() { | ||||||
|  |         assert_eq!(results.as_object().unwrap().keys().count(), 0); | ||||||
|  |     } | ||||||
|  |     assert_eq!(response["offset"], json!(0)); | ||||||
|  |     assert_eq!(response["limit"], json!(20)); | ||||||
|  |     assert_eq!(response["total"], json!(77)); | ||||||
|  |  | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .get_all_documents(GetAllDocumentsOptions { |         .get_all_documents(GetAllDocumentsOptions { | ||||||
|             attributes_to_retrieve: Some(vec![]), |             fields: Some(vec!["wrong"]), | ||||||
|             ..Default::default() |             ..Default::default() | ||||||
|         }) |         }) | ||||||
|         .await; |         .await; | ||||||
| @@ -242,22 +250,7 @@ async fn test_get_all_documents_attributes_to_retrieve() { | |||||||
|  |  | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .get_all_documents(GetAllDocumentsOptions { |         .get_all_documents(GetAllDocumentsOptions { | ||||||
|             attributes_to_retrieve: Some(vec!["wrong"]), |             fields: Some(vec!["name", "tags"]), | ||||||
|             ..Default::default() |  | ||||||
|         }) |  | ||||||
|         .await; |  | ||||||
|     assert_eq!(code, 200); |  | ||||||
|     assert_eq!(response["results"].as_array().unwrap().len(), 20); |  | ||||||
|     for results in response["results"].as_array().unwrap() { |  | ||||||
|         assert_eq!(results.as_object().unwrap().keys().count(), 0); |  | ||||||
|     } |  | ||||||
|     assert_eq!(response["offset"], json!(0)); |  | ||||||
|     assert_eq!(response["limit"], json!(20)); |  | ||||||
|     assert_eq!(response["total"], json!(77)); |  | ||||||
|  |  | ||||||
|     let (response, code) = index |  | ||||||
|         .get_all_documents(GetAllDocumentsOptions { |  | ||||||
|             attributes_to_retrieve: Some(vec!["name", "tags"]), |  | ||||||
|             ..Default::default() |             ..Default::default() | ||||||
|         }) |         }) | ||||||
|         .await; |         .await; | ||||||
| @@ -270,10 +263,7 @@ async fn test_get_all_documents_attributes_to_retrieve() { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .get_all_documents(GetAllDocumentsOptions { |         .get_all_documents(GetAllDocumentsOptions { fields: Some(vec!["*"]), ..Default::default() }) | ||||||
|             attributes_to_retrieve: Some(vec!["*"]), |  | ||||||
|             ..Default::default() |  | ||||||
|         }) |  | ||||||
|         .await; |         .await; | ||||||
|     assert_eq!(code, 200); |     assert_eq!(code, 200); | ||||||
|     assert_eq!(response["results"].as_array().unwrap().len(), 20); |     assert_eq!(response["results"].as_array().unwrap().len(), 20); | ||||||
| @@ -283,7 +273,7 @@ async fn test_get_all_documents_attributes_to_retrieve() { | |||||||
|  |  | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .get_all_documents(GetAllDocumentsOptions { |         .get_all_documents(GetAllDocumentsOptions { | ||||||
|             attributes_to_retrieve: Some(vec!["*", "wrong"]), |             fields: Some(vec!["*", "wrong"]), | ||||||
|             ..Default::default() |             ..Default::default() | ||||||
|         }) |         }) | ||||||
|         .await; |         .await; | ||||||
| @@ -316,12 +306,10 @@ async fn get_document_s_nested_attributes_to_retrieve() { | |||||||
|     assert_eq!(code, 202); |     assert_eq!(code, 202); | ||||||
|     index.wait_task(1).await; |     index.wait_task(1).await; | ||||||
|  |  | ||||||
|     let (response, code) = |     let (response, code) = index.get_document(0, Some(json!({ "fields": ["content"] }))).await; | ||||||
|         index.get_document(0, Some(GetDocumentOptions { fields: Some(vec!["content"]) })).await; |  | ||||||
|     assert_eq!(code, 200); |     assert_eq!(code, 200); | ||||||
|     assert_eq!(response, json!({})); |     assert_eq!(response, json!({})); | ||||||
|     let (response, code) = |     let (response, code) = index.get_document(1, Some(json!({ "fields": ["content"] }))).await; | ||||||
|         index.get_document(1, Some(GetDocumentOptions { fields: Some(vec!["content"]) })).await; |  | ||||||
|     assert_eq!(code, 200); |     assert_eq!(code, 200); | ||||||
|     assert_eq!( |     assert_eq!( | ||||||
|         response, |         response, | ||||||
| @@ -333,9 +321,7 @@ async fn get_document_s_nested_attributes_to_retrieve() { | |||||||
|         }) |         }) | ||||||
|     ); |     ); | ||||||
|  |  | ||||||
|     let (response, code) = index |     let (response, code) = index.get_document(0, Some(json!({ "fields": ["content.truc"] }))).await; | ||||||
|         .get_document(0, Some(GetDocumentOptions { fields: Some(vec!["content.truc"]) })) |  | ||||||
|         .await; |  | ||||||
|     assert_eq!(code, 200); |     assert_eq!(code, 200); | ||||||
|     assert_eq!( |     assert_eq!( | ||||||
|         response, |         response, | ||||||
| @@ -343,9 +329,7 @@ async fn get_document_s_nested_attributes_to_retrieve() { | |||||||
|             "content.truc": "foobar", |             "content.truc": "foobar", | ||||||
|         }) |         }) | ||||||
|     ); |     ); | ||||||
|     let (response, code) = index |     let (response, code) = index.get_document(1, Some(json!({ "fields": ["content.truc"] }))).await; | ||||||
|         .get_document(1, Some(GetDocumentOptions { fields: Some(vec!["content.truc"]) })) |  | ||||||
|         .await; |  | ||||||
|     assert_eq!(code, 200); |     assert_eq!(code, 200); | ||||||
|     assert_eq!( |     assert_eq!( | ||||||
|         response, |         response, | ||||||
| @@ -540,3 +524,207 @@ async fn get_document_by_filter() { | |||||||
|     } |     } | ||||||
|     "###); |     "###); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn get_document_with_vectors() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("doggo"); | ||||||
|  |     let (value, code) = server.set_features(json!({"vectorStore": true})).await; | ||||||
|  |     snapshot!(code, @"200 OK"); | ||||||
|  |     snapshot!(value, @r###" | ||||||
|  |     { | ||||||
|  |       "vectorStore": true, | ||||||
|  |       "metrics": false, | ||||||
|  |       "logsRoute": false | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .update_settings(json!({ | ||||||
|  |           "embedders": { | ||||||
|  |               "manual": { | ||||||
|  |                   "source": "userProvided", | ||||||
|  |                   "dimensions": 3, | ||||||
|  |               } | ||||||
|  |           }, | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     server.wait_task(response.uid()).await; | ||||||
|  |  | ||||||
|  |     let documents = json!([ | ||||||
|  |       {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }}, | ||||||
|  |       {"id": 1, "name": "echo", "_vectors": { "manual": null }}, | ||||||
|  |     ]); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     index.wait_task(value.uid()).await; | ||||||
|  |  | ||||||
|  |     // by default you shouldn't see the `_vectors` object | ||||||
|  |     let (documents, _code) = index.get_all_documents(Default::default()).await; | ||||||
|  |     snapshot!(json_string!(documents), @r###" | ||||||
|  |     { | ||||||
|  |       "results": [ | ||||||
|  |         { | ||||||
|  |           "id": 0, | ||||||
|  |           "name": "kefir" | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": 1, | ||||||
|  |           "name": "echo" | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "offset": 0, | ||||||
|  |       "limit": 20, | ||||||
|  |       "total": 2 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |     let (documents, _code) = index.get_document(0, None).await; | ||||||
|  |     snapshot!(json_string!(documents), @r###" | ||||||
|  |     { | ||||||
|  |       "id": 0, | ||||||
|  |       "name": "kefir" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // if we try to retrieve the vectors with the `fields` parameter they | ||||||
|  |     // still shouldn't be displayed | ||||||
|  |     let (documents, _code) = index | ||||||
|  |         .get_all_documents(GetAllDocumentsOptions { | ||||||
|  |             fields: Some(vec!["name", "_vectors"]), | ||||||
|  |             ..Default::default() | ||||||
|  |         }) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(json_string!(documents), @r###" | ||||||
|  |     { | ||||||
|  |       "results": [ | ||||||
|  |         { | ||||||
|  |           "name": "kefir" | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "name": "echo" | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "offset": 0, | ||||||
|  |       "limit": 20, | ||||||
|  |       "total": 2 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |     let (documents, _code) = | ||||||
|  |         index.get_document(0, Some(json!({"fields": ["name", "_vectors"]}))).await; | ||||||
|  |     snapshot!(json_string!(documents), @r###" | ||||||
|  |     { | ||||||
|  |       "name": "kefir" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // If we specify the retrieve vectors boolean and nothing else we should get the vectors | ||||||
|  |     let (documents, _code) = index | ||||||
|  |         .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(json_string!(documents), @r###" | ||||||
|  |     { | ||||||
|  |       "results": [ | ||||||
|  |         { | ||||||
|  |           "id": 0, | ||||||
|  |           "name": "kefir", | ||||||
|  |           "_vectors": { | ||||||
|  |             "manual": { | ||||||
|  |               "embeddings": [ | ||||||
|  |                 [ | ||||||
|  |                   0.0, | ||||||
|  |                   0.0, | ||||||
|  |                   0.0 | ||||||
|  |                 ] | ||||||
|  |               ], | ||||||
|  |               "regenerate": false | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": 1, | ||||||
|  |           "name": "echo", | ||||||
|  |           "_vectors": {} | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "offset": 0, | ||||||
|  |       "limit": 20, | ||||||
|  |       "total": 2 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |     let (documents, _code) = index.get_document(0, Some(json!({"retrieveVectors": true}))).await; | ||||||
|  |     snapshot!(json_string!(documents), @r###" | ||||||
|  |     { | ||||||
|  |       "id": 0, | ||||||
|  |       "name": "kefir", | ||||||
|  |       "_vectors": { | ||||||
|  |         "manual": { | ||||||
|  |           "embeddings": [ | ||||||
|  |             [ | ||||||
|  |               0.0, | ||||||
|  |               0.0, | ||||||
|  |               0.0 | ||||||
|  |             ] | ||||||
|  |           ], | ||||||
|  |           "regenerate": false | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // If we specify the retrieve vectors boolean and exclude vectors form the `fields` we should still get the vectors | ||||||
|  |     let (documents, _code) = index | ||||||
|  |         .get_all_documents(GetAllDocumentsOptions { | ||||||
|  |             retrieve_vectors: true, | ||||||
|  |             fields: Some(vec!["name"]), | ||||||
|  |             ..Default::default() | ||||||
|  |         }) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(json_string!(documents), @r###" | ||||||
|  |     { | ||||||
|  |       "results": [ | ||||||
|  |         { | ||||||
|  |           "name": "kefir", | ||||||
|  |           "_vectors": { | ||||||
|  |             "manual": { | ||||||
|  |               "embeddings": [ | ||||||
|  |                 [ | ||||||
|  |                   0.0, | ||||||
|  |                   0.0, | ||||||
|  |                   0.0 | ||||||
|  |                 ] | ||||||
|  |               ], | ||||||
|  |               "regenerate": false | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "name": "echo", | ||||||
|  |           "_vectors": {} | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "offset": 0, | ||||||
|  |       "limit": 20, | ||||||
|  |       "total": 2 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |     let (documents, _code) = | ||||||
|  |         index.get_document(0, Some(json!({"retrieveVectors": true, "fields": ["name"]}))).await; | ||||||
|  |     snapshot!(json_string!(documents), @r###" | ||||||
|  |     { | ||||||
|  |       "name": "kefir", | ||||||
|  |       "_vectors": { | ||||||
|  |         "manual": { | ||||||
|  |           "embeddings": [ | ||||||
|  |             [ | ||||||
|  |               0.0, | ||||||
|  |               0.0, | ||||||
|  |               0.0 | ||||||
|  |             ] | ||||||
|  |           ], | ||||||
|  |           "regenerate": false | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|   | |||||||
| @@ -1938,3 +1938,210 @@ async fn import_dump_v6_containing_experimental_features() { | |||||||
|         }) |         }) | ||||||
|         .await; |         .await; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // In this test we must generate the dump ourselves to ensure the | ||||||
|  | // `user provided` vectors are well set | ||||||
|  | #[actix_rt::test] | ||||||
|  | #[cfg_attr(target_os = "windows", ignore)] | ||||||
|  | async fn generate_and_import_dump_containing_vectors() { | ||||||
|  |     let temp = tempfile::tempdir().unwrap(); | ||||||
|  |     let mut opt = default_settings(temp.path()); | ||||||
|  |     let server = Server::new_with_options(opt.clone()).await.unwrap(); | ||||||
|  |     let (code, _) = server.set_features(json!({"vectorStore": true})).await; | ||||||
|  |     snapshot!(code, @r###" | ||||||
|  |     { | ||||||
|  |       "vectorStore": true, | ||||||
|  |       "metrics": false, | ||||||
|  |       "logsRoute": false | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |     let index = server.index("pets"); | ||||||
|  |     let (response, code) = index | ||||||
|  |         .update_settings(json!( | ||||||
|  |         { | ||||||
|  |             "embedders": { | ||||||
|  |                 "doggo_embedder": { | ||||||
|  |                     "source": "huggingFace", | ||||||
|  |                     "model": "sentence-transformers/all-MiniLM-L6-v2", | ||||||
|  |                     "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", | ||||||
|  |                     "documentTemplate": "{{doc.doggo}}", | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         )) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     let response = index.wait_task(response.uid()).await; | ||||||
|  |     snapshot!(response); | ||||||
|  |     let (response, code) = index | ||||||
|  |         .add_documents( | ||||||
|  |             json!([ | ||||||
|  |                 {"id": 0, "doggo": "kefir", "_vectors": { "doggo_embedder": vec![0; 384] }}, | ||||||
|  |                 {"id": 1, "doggo": "echo", "_vectors": { "doggo_embedder": { "regenerate": false, "embeddings": vec![1; 384] }}}, | ||||||
|  |                 {"id": 2, "doggo": "intel", "_vectors": { "doggo_embedder": { "regenerate": true, "embeddings": vec![2; 384] }}}, | ||||||
|  |                 {"id": 3, "doggo": "bill", "_vectors": { "doggo_embedder": { "regenerate": true }}}, | ||||||
|  |                 {"id": 4, "doggo": "max" }, | ||||||
|  |             ]), | ||||||
|  |             None, | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     let response = index.wait_task(response.uid()).await; | ||||||
|  |     snapshot!(response); | ||||||
|  |  | ||||||
|  |     let (response, code) = server.create_dump().await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     let response = index.wait_task(response.uid()).await; | ||||||
|  |     snapshot!(response["status"], @r###""succeeded""###); | ||||||
|  |  | ||||||
|  |     // ========= We made a dump, now we should clear the DB and try to import our dump | ||||||
|  |     drop(server); | ||||||
|  |     tokio::fs::remove_dir_all(&opt.db_path).await.unwrap(); | ||||||
|  |     let dump_name = format!("{}.dump", response["details"]["dumpUid"].as_str().unwrap()); | ||||||
|  |     let dump_path = opt.dump_dir.join(dump_name); | ||||||
|  |     assert!(dump_path.exists(), "path: `{}`", dump_path.display()); | ||||||
|  |  | ||||||
|  |     opt.import_dump = Some(dump_path); | ||||||
|  |     // NOTE: We shouldn't have to change the database path but I lost one hour | ||||||
|  |     // because of a « bad path » error and that fixed it. | ||||||
|  |     opt.db_path = temp.path().join("data.ms"); | ||||||
|  |  | ||||||
|  |     let mut server = Server::new_auth_with_options(opt, temp).await; | ||||||
|  |     server.use_api_key("MASTER_KEY"); | ||||||
|  |  | ||||||
|  |     let (indexes, code) = server.list_indexes(None, None).await; | ||||||
|  |     assert_eq!(code, 200, "{indexes}"); | ||||||
|  |  | ||||||
|  |     snapshot!(indexes["results"].as_array().unwrap().len(), @"1"); | ||||||
|  |     snapshot!(indexes["results"][0]["uid"], @r###""pets""###); | ||||||
|  |     snapshot!(indexes["results"][0]["primaryKey"], @r###""id""###); | ||||||
|  |  | ||||||
|  |     let (response, code) = server.get_features().await; | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "vectorStore": true, | ||||||
|  |       "metrics": false, | ||||||
|  |       "logsRoute": false | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let index = server.index("pets"); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.settings().await; | ||||||
|  |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |     meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "displayedAttributes": [ | ||||||
|  |         "*" | ||||||
|  |       ], | ||||||
|  |       "searchableAttributes": [ | ||||||
|  |         "*" | ||||||
|  |       ], | ||||||
|  |       "filterableAttributes": [], | ||||||
|  |       "sortableAttributes": [], | ||||||
|  |       "rankingRules": [ | ||||||
|  |         "words", | ||||||
|  |         "typo", | ||||||
|  |         "proximity", | ||||||
|  |         "attribute", | ||||||
|  |         "sort", | ||||||
|  |         "exactness" | ||||||
|  |       ], | ||||||
|  |       "stopWords": [], | ||||||
|  |       "nonSeparatorTokens": [], | ||||||
|  |       "separatorTokens": [], | ||||||
|  |       "dictionary": [], | ||||||
|  |       "synonyms": {}, | ||||||
|  |       "distinctAttribute": null, | ||||||
|  |       "proximityPrecision": "byWord", | ||||||
|  |       "typoTolerance": { | ||||||
|  |         "enabled": true, | ||||||
|  |         "minWordSizeForTypos": { | ||||||
|  |           "oneTypo": 5, | ||||||
|  |           "twoTypos": 9 | ||||||
|  |         }, | ||||||
|  |         "disableOnWords": [], | ||||||
|  |         "disableOnAttributes": [] | ||||||
|  |       }, | ||||||
|  |       "faceting": { | ||||||
|  |         "maxValuesPerFacet": 100, | ||||||
|  |         "sortFacetValuesBy": { | ||||||
|  |           "*": "alpha" | ||||||
|  |         } | ||||||
|  |       }, | ||||||
|  |       "pagination": { | ||||||
|  |         "maxTotalHits": 1000 | ||||||
|  |       }, | ||||||
|  |       "embedders": { | ||||||
|  |         "doggo_embedder": { | ||||||
|  |           "source": "huggingFace", | ||||||
|  |           "model": "sentence-transformers/all-MiniLM-L6-v2", | ||||||
|  |           "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", | ||||||
|  |           "documentTemplate": "{{doc.doggo}}" | ||||||
|  |         } | ||||||
|  |       }, | ||||||
|  |       "searchCutoffMs": null | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     index | ||||||
|  |         .search(json!({"retrieveVectors": true}), |response, code| { | ||||||
|  |             snapshot!(code, @"200 OK"); | ||||||
|  |             snapshot!(json_string!(response["hits"], { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###" | ||||||
|  |             [ | ||||||
|  |               { | ||||||
|  |                 "id": 0, | ||||||
|  |                 "doggo": "kefir", | ||||||
|  |                 "_vectors": { | ||||||
|  |                   "doggo_embedder": { | ||||||
|  |                     "embeddings": "[vector]", | ||||||
|  |                     "regenerate": false | ||||||
|  |                   } | ||||||
|  |                 } | ||||||
|  |               }, | ||||||
|  |               { | ||||||
|  |                 "id": 1, | ||||||
|  |                 "doggo": "echo", | ||||||
|  |                 "_vectors": { | ||||||
|  |                   "doggo_embedder": { | ||||||
|  |                     "embeddings": "[vector]", | ||||||
|  |                     "regenerate": false | ||||||
|  |                   } | ||||||
|  |                 } | ||||||
|  |               }, | ||||||
|  |               { | ||||||
|  |                 "id": 2, | ||||||
|  |                 "doggo": "intel", | ||||||
|  |                 "_vectors": { | ||||||
|  |                   "doggo_embedder": { | ||||||
|  |                     "embeddings": "[vector]", | ||||||
|  |                     "regenerate": true | ||||||
|  |                   } | ||||||
|  |                 } | ||||||
|  |               }, | ||||||
|  |               { | ||||||
|  |                 "id": 3, | ||||||
|  |                 "doggo": "bill", | ||||||
|  |                 "_vectors": { | ||||||
|  |                   "doggo_embedder": { | ||||||
|  |                     "embeddings": "[vector]", | ||||||
|  |                     "regenerate": true | ||||||
|  |                   } | ||||||
|  |                 } | ||||||
|  |               }, | ||||||
|  |               { | ||||||
|  |                 "id": 4, | ||||||
|  |                 "doggo": "max", | ||||||
|  |                 "_vectors": { | ||||||
|  |                   "doggo_embedder": { | ||||||
|  |                     "embeddings": "[vector]", | ||||||
|  |                     "regenerate": true | ||||||
|  |                   } | ||||||
|  |                 } | ||||||
|  |               } | ||||||
|  |             ] | ||||||
|  |             "###); | ||||||
|  |         }) | ||||||
|  |         .await; | ||||||
|  | } | ||||||
|   | |||||||
| @@ -0,0 +1,25 @@ | |||||||
|  | --- | ||||||
|  | source: meilisearch/tests/dumps/mod.rs | ||||||
|  | --- | ||||||
|  | { | ||||||
|  |   "uid": 0, | ||||||
|  |   "indexUid": "pets", | ||||||
|  |   "status": "succeeded", | ||||||
|  |   "type": "settingsUpdate", | ||||||
|  |   "canceledBy": null, | ||||||
|  |   "details": { | ||||||
|  |     "embedders": { | ||||||
|  |       "doggo_embedder": { | ||||||
|  |         "source": "huggingFace", | ||||||
|  |         "model": "sentence-transformers/all-MiniLM-L6-v2", | ||||||
|  |         "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", | ||||||
|  |         "documentTemplate": "{{doc.doggo}}" | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   }, | ||||||
|  |   "error": null, | ||||||
|  |   "duration": "[duration]", | ||||||
|  |   "enqueuedAt": "[date]", | ||||||
|  |   "startedAt": "[date]", | ||||||
|  |   "finishedAt": "[date]" | ||||||
|  | } | ||||||
| @@ -0,0 +1,19 @@ | |||||||
|  | --- | ||||||
|  | source: meilisearch/tests/dumps/mod.rs | ||||||
|  | --- | ||||||
|  | { | ||||||
|  |   "uid": 1, | ||||||
|  |   "indexUid": "pets", | ||||||
|  |   "status": "succeeded", | ||||||
|  |   "type": "documentAdditionOrUpdate", | ||||||
|  |   "canceledBy": null, | ||||||
|  |   "details": { | ||||||
|  |     "receivedDocuments": 5, | ||||||
|  |     "indexedDocuments": 5 | ||||||
|  |   }, | ||||||
|  |   "error": null, | ||||||
|  |   "duration": "[duration]", | ||||||
|  |   "enqueuedAt": "[date]", | ||||||
|  |   "startedAt": "[date]", | ||||||
|  |   "finishedAt": "[date]" | ||||||
|  | } | ||||||
| @@ -13,6 +13,7 @@ mod snapshot; | |||||||
| mod stats; | mod stats; | ||||||
| mod swap_indexes; | mod swap_indexes; | ||||||
| mod tasks; | mod tasks; | ||||||
|  | mod vector; | ||||||
|  |  | ||||||
| // Tests are isolated by features in different modules to allow better readability, test | // Tests are isolated by features in different modules to allow better readability, test | ||||||
| // targetability, and improved incremental compilation times. | // targetability, and improved incremental compilation times. | ||||||
|   | |||||||
| @@ -107,6 +107,39 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| { | |||||||
|     ]) |     ]) | ||||||
| }); | }); | ||||||
|  |  | ||||||
|  | static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| { | ||||||
|  |     json!([ | ||||||
|  |       { | ||||||
|  |         "id": 1, | ||||||
|  |         "description": "Leather Jacket", | ||||||
|  |         "brand": "Lee Jeans", | ||||||
|  |         "product_id": "123456", | ||||||
|  |         "color": { "main": "Brown", "pattern": "stripped" }, | ||||||
|  |       }, | ||||||
|  |       { | ||||||
|  |         "id": 2, | ||||||
|  |         "description": "Leather Jacket", | ||||||
|  |         "brand": "Lee Jeans", | ||||||
|  |         "product_id": "123456", | ||||||
|  |         "color": { "main": "Black", "pattern": "stripped" }, | ||||||
|  |       }, | ||||||
|  |       { | ||||||
|  |         "id": 3, | ||||||
|  |         "description": "Leather Jacket", | ||||||
|  |         "brand": "Lee Jeans", | ||||||
|  |         "product_id": "123456", | ||||||
|  |         "color": { "main": "Blue", "pattern": "used" }, | ||||||
|  |       }, | ||||||
|  |       { | ||||||
|  |         "id": 4, | ||||||
|  |         "description": "T-Shirt", | ||||||
|  |         "brand": "Nike", | ||||||
|  |         "product_id": "789012", | ||||||
|  |         "color": { "main": "Blue", "pattern": "stripped" }, | ||||||
|  |       } | ||||||
|  |     ]) | ||||||
|  | }); | ||||||
|  |  | ||||||
| static DOCUMENT_PRIMARY_KEY: &str = "id"; | static DOCUMENT_PRIMARY_KEY: &str = "id"; | ||||||
| static DOCUMENT_DISTINCT_KEY: &str = "product_id"; | static DOCUMENT_DISTINCT_KEY: &str = "product_id"; | ||||||
|  |  | ||||||
| @@ -239,3 +272,35 @@ async fn distinct_search_with_pagination_no_ranking() { | |||||||
|     snapshot!(response["totalPages"], @"2"); |     snapshot!(response["totalPages"], @"2"); | ||||||
|     snapshot!(response["totalHits"], @"6"); |     snapshot!(response["totalHits"], @"6"); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn distinct_at_search_time() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("tamo"); | ||||||
|  |  | ||||||
|  |     let documents = NESTED_DOCUMENTS.clone(); | ||||||
|  |     index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await; | ||||||
|  |     let (task, _) = index.update_settings_filterable_attributes(json!(["color.main"])).await; | ||||||
|  |     let task = index.wait_task(task.uid()).await; | ||||||
|  |     snapshot!(task, name: "succeed"); | ||||||
|  |  | ||||||
|  |     fn get_hits(response: &Value) -> Vec<String> { | ||||||
|  |         let hits_array = response["hits"] | ||||||
|  |             .as_array() | ||||||
|  |             .unwrap_or_else(|| panic!("{}", &serde_json::to_string_pretty(&response).unwrap())); | ||||||
|  |         hits_array | ||||||
|  |             .iter() | ||||||
|  |             .map(|h| h[DOCUMENT_PRIMARY_KEY].as_number().unwrap().to_string()) | ||||||
|  |             .collect::<Vec<_>>() | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     let (response, code) = | ||||||
|  |         index.search_post(json!({"page": 1, "hitsPerPage": 3, "distinct": "color.main"})).await; | ||||||
|  |     let hits = get_hits(&response); | ||||||
|  |     snapshot!(code, @"200 OK"); | ||||||
|  |     snapshot!(hits.len(), @"3"); | ||||||
|  |     snapshot!(format!("{:?}", hits), @r###"["1", "2", "3"]"###); | ||||||
|  |     snapshot!(response["page"], @"1"); | ||||||
|  |     snapshot!(response["totalPages"], @"1"); | ||||||
|  |     snapshot!(response["totalHits"], @"3"); | ||||||
|  | } | ||||||
|   | |||||||
| @@ -167,6 +167,74 @@ async fn search_bad_hits_per_page() { | |||||||
|     "###); |     "###); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn search_bad_attributes_to_retrieve() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("test"); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.search_post(json!({"attributesToRetrieve": "doggo"})).await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value type at `.attributesToRetrieve`: expected an array, but found a string: `\"doggo\"`", | ||||||
|  |       "code": "invalid_search_attributes_to_retrieve", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_retrieve" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |     // Can't make the `attributes_to_retrieve` fail with a get search since it'll accept anything as an array of strings. | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn search_bad_retrieve_vectors() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("test"); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.search_post(json!({"retrieveVectors": "doggo"})).await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"doggo\"`", | ||||||
|  |       "code": "invalid_search_retrieve_vectors", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_search_retrieve_vectors" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.search_post(json!({"retrieveVectors": [true]})).await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found an array: `[true]`", | ||||||
|  |       "code": "invalid_search_retrieve_vectors", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_search_retrieve_vectors" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.search_get("?retrieveVectors=").await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value in parameter `retrieveVectors`: could not parse `` as a boolean, expected either `true` or `false`", | ||||||
|  |       "code": "invalid_search_retrieve_vectors", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_search_retrieve_vectors" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.search_get("?retrieveVectors=doggo").await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value in parameter `retrieveVectors`: could not parse `doggo` as a boolean, expected either `true` or `false`", | ||||||
|  |       "code": "invalid_search_retrieve_vectors", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_search_retrieve_vectors" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
| #[actix_rt::test] | #[actix_rt::test] | ||||||
| async fn search_bad_attributes_to_crop() { | async fn search_bad_attributes_to_crop() { | ||||||
|     let server = Server::new().await; |     let server = Server::new().await; | ||||||
| @@ -321,6 +389,40 @@ async fn search_bad_facets() { | |||||||
|     // Can't make the `attributes_to_highlight` fail with a get search since it'll accept anything as an array of strings. |     // Can't make the `attributes_to_highlight` fail with a get search since it'll accept anything as an array of strings. | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn search_bad_threshold() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("test"); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.search_post(json!({"rankingScoreThreshold": "doggo"})).await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value type at `.rankingScoreThreshold`: expected a number, but found a string: `\"doggo\"`", | ||||||
|  |       "code": "invalid_search_ranking_score_threshold", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_search_ranking_score_threshold" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn search_invalid_threshold() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("test"); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.search_post(json!({"rankingScoreThreshold": 42})).await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value at `.rankingScoreThreshold`: the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`.", | ||||||
|  |       "code": "invalid_search_ranking_score_threshold", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_search_ranking_score_threshold" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
| #[actix_rt::test] | #[actix_rt::test] | ||||||
| async fn search_non_filterable_facets() { | async fn search_non_filterable_facets() { | ||||||
|     let server = Server::new().await; |     let server = Server::new().await; | ||||||
| @@ -1038,3 +1140,66 @@ async fn search_on_unknown_field_plus_joker() { | |||||||
|         ) |         ) | ||||||
|         .await; |         .await; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn distinct_at_search_time() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("tamo"); | ||||||
|  |     let (task, _) = index.create(None).await; | ||||||
|  |     let task = index.wait_task(task.uid()).await; | ||||||
|  |     snapshot!(task, name: "task-succeed"); | ||||||
|  |  | ||||||
|  |     let (response, code) = | ||||||
|  |         index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(response, @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.", | ||||||
|  |       "code": "invalid_search_distinct", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_search_distinct" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (task, _) = index.update_settings_filterable_attributes(json!(["color", "machin"])).await; | ||||||
|  |     index.wait_task(task.uid()).await; | ||||||
|  |  | ||||||
|  |     let (response, code) = | ||||||
|  |         index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(response, @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.", | ||||||
|  |       "code": "invalid_search_distinct", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_search_distinct" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (task, _) = index.update_settings_displayed_attributes(json!(["color"])).await; | ||||||
|  |     index.wait_task(task.uid()).await; | ||||||
|  |  | ||||||
|  |     let (response, code) = | ||||||
|  |         index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(response, @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.", | ||||||
|  |       "code": "invalid_search_distinct", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_search_distinct" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = | ||||||
|  |         index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": true})).await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(response, @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value type at `.distinct`: expected a string, but found a boolean: `true`", | ||||||
|  |       "code": "invalid_search_distinct", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_search_distinct" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|   | |||||||
| @@ -124,32 +124,61 @@ async fn simple_search() { | |||||||
|  |  | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post( |         .search_post( | ||||||
|             json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}}), |             json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), | ||||||
|         ) |         ) | ||||||
|         .await; |         .await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]}},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]}}]"###); |     snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}}},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}}}]"###); | ||||||
|     snapshot!(response["semanticHitCount"], @"0"); |     snapshot!(response["semanticHitCount"], @"0"); | ||||||
|  |  | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post( |         .search_post( | ||||||
|             json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}, "showRankingScore": true}), |             json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}, "showRankingScore": true, "retrieveVectors": true}), | ||||||
|         ) |         ) | ||||||
|         .await; |         .await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###); |     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###); | ||||||
|     snapshot!(response["semanticHitCount"], @"2"); |     snapshot!(response["semanticHitCount"], @"2"); | ||||||
|  |  | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post( |         .search_post( | ||||||
|             json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}, "showRankingScore": true}), |             json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}, "showRankingScore": true, "retrieveVectors": true}), | ||||||
|         ) |         ) | ||||||
|         .await; |         .await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###); |     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###); | ||||||
|     snapshot!(response["semanticHitCount"], @"3"); |     snapshot!(response["semanticHitCount"], @"3"); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn limit_offset() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .search_post( | ||||||
|  |             json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}), | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"200 OK"); | ||||||
|  |     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}}}]"###); | ||||||
|  |     snapshot!(response["semanticHitCount"], @"0"); | ||||||
|  |     assert_eq!(response["hits"].as_array().unwrap().len(), 1); | ||||||
|  |  | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .search_post( | ||||||
|  |             json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}), | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"200 OK"); | ||||||
|  |     snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}}}]"###); | ||||||
|  |     snapshot!(response["semanticHitCount"], @"1"); | ||||||
|  |     assert_eq!(response["hits"].as_array().unwrap().len(), 1); | ||||||
|  | } | ||||||
|  |  | ||||||
| #[actix_rt::test] | #[actix_rt::test] | ||||||
| async fn simple_search_hf() { | async fn simple_search_hf() { | ||||||
|     let server = Server::new().await; |     let server = Server::new().await; | ||||||
| @@ -204,10 +233,10 @@ async fn distribution_shift() { | |||||||
|     let server = Server::new().await; |     let server = Server::new().await; | ||||||
|     let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; |     let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; | ||||||
|  |  | ||||||
|     let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}}); |     let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true}); | ||||||
|     let (response, code) = index.search_post(search.clone()).await; |     let (response, code) = index.search_post(search.clone()).await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###); |     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###); | ||||||
|  |  | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .update_settings(json!({ |         .update_settings(json!({ | ||||||
| @@ -228,7 +257,7 @@ async fn distribution_shift() { | |||||||
|  |  | ||||||
|     let (response, code) = index.search_post(search).await; |     let (response, code) = index.search_post(search).await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.19161224365234375},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":1.1920928955078125e-7},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.1920928955078125e-7}]"###); |     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.19161224365234375},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.1920928955078125e-7},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.1920928955078125e-7}]"###); | ||||||
| } | } | ||||||
|  |  | ||||||
| #[actix_rt::test] | #[actix_rt::test] | ||||||
| @@ -239,20 +268,23 @@ async fn highlighter() { | |||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], |         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], | ||||||
|             "hybrid": {"semanticRatio": 0.2}, |             "hybrid": {"semanticRatio": 0.2}, | ||||||
|  |            "retrieveVectors": true, | ||||||
|            "attributesToHighlight": [ |            "attributesToHighlight": [ | ||||||
|                      "desc" |                      "desc", | ||||||
|  |                      "_vectors", | ||||||
|                    ], |                    ], | ||||||
|            "highlightPreTag": "**BEGIN**", |            "highlightPreTag": "**BEGIN**", | ||||||
|                    "highlightPostTag": "**END**" |            "highlightPostTag": "**END**", | ||||||
|         })) |         })) | ||||||
|         .await; |         .await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}}}]"###); |     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"}}]"###); | ||||||
|     snapshot!(response["semanticHitCount"], @"0"); |     snapshot!(response["semanticHitCount"], @"0"); | ||||||
|  |  | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], |         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], | ||||||
|             "hybrid": {"semanticRatio": 0.8}, |             "hybrid": {"semanticRatio": 0.8}, | ||||||
|  |             "retrieveVectors": true, | ||||||
|             "showRankingScore": true, |             "showRankingScore": true, | ||||||
|             "attributesToHighlight": [ |             "attributesToHighlight": [ | ||||||
|                      "desc" |                      "desc" | ||||||
| @@ -262,13 +294,14 @@ async fn highlighter() { | |||||||
|         })) |         })) | ||||||
|         .await; |         .await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_rankingScore":0.9472135901451112}]"###); |     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###); | ||||||
|     snapshot!(response["semanticHitCount"], @"3"); |     snapshot!(response["semanticHitCount"], @"3"); | ||||||
|  |  | ||||||
|     // no highlighting on full semantic |     // no highlighting on full semantic | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], |         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], | ||||||
|             "hybrid": {"semanticRatio": 1.0}, |             "hybrid": {"semanticRatio": 1.0}, | ||||||
|  |             "retrieveVectors": true, | ||||||
|             "showRankingScore": true, |             "showRankingScore": true, | ||||||
|             "attributesToHighlight": [ |             "attributesToHighlight": [ | ||||||
|                      "desc" |                      "desc" | ||||||
| @@ -278,7 +311,7 @@ async fn highlighter() { | |||||||
|         })) |         })) | ||||||
|         .await; |         .await; | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_rankingScore":0.9472135901451112}]"###); |     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###); | ||||||
|     snapshot!(response["semanticHitCount"], @"3"); |     snapshot!(response["semanticHitCount"], @"3"); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -361,12 +394,12 @@ async fn single_document() { | |||||||
|  |  | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|     .search_post( |     .search_post( | ||||||
|         json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}), |         json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}), | ||||||
|     ) |     ) | ||||||
|     .await; |     .await; | ||||||
|  |  | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0}"###); |     snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0}"###); | ||||||
|     snapshot!(response["semanticHitCount"], @"1"); |     snapshot!(response["semanticHitCount"], @"1"); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -377,25 +410,25 @@ async fn query_combination() { | |||||||
|  |  | ||||||
|     // search without query and vector, but with hybrid => still placeholder |     // search without query and vector, but with hybrid => still placeholder | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true})) |         .search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) | ||||||
|         .await; |         .await; | ||||||
|  |  | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":1.0}]"###); |     snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":1.0}]"###); | ||||||
|     snapshot!(response["semanticHitCount"], @"null"); |     snapshot!(response["semanticHitCount"], @"null"); | ||||||
|  |  | ||||||
|     // same with a different semantic ratio |     // same with a different semantic ratio | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true})) |         .search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true})) | ||||||
|         .await; |         .await; | ||||||
|  |  | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":1.0}]"###); |     snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":1.0}]"###); | ||||||
|     snapshot!(response["semanticHitCount"], @"null"); |     snapshot!(response["semanticHitCount"], @"null"); | ||||||
|  |  | ||||||
|     // wrong vector dimensions |     // wrong vector dimensions | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|     .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true})) |     .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) | ||||||
|     .await; |     .await; | ||||||
|  |  | ||||||
|     snapshot!(code, @"400 Bad Request"); |     snapshot!(code, @"400 Bad Request"); | ||||||
| @@ -410,34 +443,34 @@ async fn query_combination() { | |||||||
|  |  | ||||||
|     // full vector |     // full vector | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|     .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true})) |     .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) | ||||||
|     .await; |     .await; | ||||||
|  |  | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.7773500680923462},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.7236068248748779},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.6581138968467712}]"###); |     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.7773500680923462},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.7236068248748779},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.6581138968467712}]"###); | ||||||
|     snapshot!(response["semanticHitCount"], @"3"); |     snapshot!(response["semanticHitCount"], @"3"); | ||||||
|  |  | ||||||
|     // full keyword, without a query |     // full keyword, without a query | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|     .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true})) |     .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) | ||||||
|     .await; |     .await; | ||||||
|  |  | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":1.0}]"###); |     snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":1.0}]"###); | ||||||
|     snapshot!(response["semanticHitCount"], @"null"); |     snapshot!(response["semanticHitCount"], @"null"); | ||||||
|  |  | ||||||
|     // query + vector, full keyword => keyword |     // query + vector, full keyword => keyword | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|     .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true})) |     .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) | ||||||
|     .await; |     .await; | ||||||
|  |  | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9242424242424242}]"###); |     snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.9848484848484848},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9242424242424242}]"###); | ||||||
|     snapshot!(response["semanticHitCount"], @"null"); |     snapshot!(response["semanticHitCount"], @"null"); | ||||||
|  |  | ||||||
|     // query + vector, no hybrid keyword => |     // query + vector, no hybrid keyword => | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true})) |         .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true, "retrieveVectors": true})) | ||||||
|         .await; |         .await; | ||||||
|  |  | ||||||
|     snapshot!(code, @"400 Bad Request"); |     snapshot!(code, @"400 Bad Request"); | ||||||
| @@ -453,7 +486,7 @@ async fn query_combination() { | |||||||
|     // full vector, without a vector => error |     // full vector, without a vector => error | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post( |         .search_post( | ||||||
|             json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}), |             json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}), | ||||||
|         ) |         ) | ||||||
|         .await; |         .await; | ||||||
|  |  | ||||||
| @@ -470,11 +503,93 @@ async fn query_combination() { | |||||||
|     // hybrid without a vector => full keyword |     // hybrid without a vector => full keyword | ||||||
|     let (response, code) = index |     let (response, code) = index | ||||||
|         .search_post( |         .search_post( | ||||||
|             json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true}), |             json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}), | ||||||
|         ) |         ) | ||||||
|         .await; |         .await; | ||||||
|  |  | ||||||
|     snapshot!(code, @"200 OK"); |     snapshot!(code, @"200 OK"); | ||||||
|     snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9242424242424242}]"###); |     snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.9242424242424242}]"###); | ||||||
|     snapshot!(response["semanticHitCount"], @"0"); |     snapshot!(response["semanticHitCount"], @"0"); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn retrieve_vectors() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await; | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .search_post( | ||||||
|  |             json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"200 OK"); | ||||||
|  |     insta::assert_json_snapshot!(response["hits"], {"[]._vectors.default.embeddings" => "[vectors]"},  @r###" | ||||||
|  |     [ | ||||||
|  |       { | ||||||
|  |         "title": "Captain Planet", | ||||||
|  |         "desc": "He's not part of the Marvel Cinematic Universe", | ||||||
|  |         "id": "2", | ||||||
|  |         "_vectors": { | ||||||
|  |           "default": { | ||||||
|  |             "embeddings": "[vectors]", | ||||||
|  |             "regenerate": true | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |       }, | ||||||
|  |       { | ||||||
|  |         "title": "Captain Marvel", | ||||||
|  |         "desc": "a Shazam ersatz", | ||||||
|  |         "id": "3", | ||||||
|  |         "_vectors": { | ||||||
|  |           "default": { | ||||||
|  |             "embeddings": "[vectors]", | ||||||
|  |             "regenerate": true | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |       }, | ||||||
|  |       { | ||||||
|  |         "title": "Shazam!", | ||||||
|  |         "desc": "a Captain Marvel ersatz", | ||||||
|  |         "id": "1", | ||||||
|  |         "_vectors": { | ||||||
|  |           "default": { | ||||||
|  |             "embeddings": "[vectors]", | ||||||
|  |             "regenerate": true | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |     ] | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // remove `_vectors` from displayed attributes | ||||||
|  |     let (response, code) = | ||||||
|  |         index.update_settings(json!({ "displayedAttributes": ["id", "title", "desc"]} )).await; | ||||||
|  |     assert_eq!(202, code, "{:?}", response); | ||||||
|  |     index.wait_task(response.uid()).await; | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .search_post( | ||||||
|  |             json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"200 OK"); | ||||||
|  |     insta::assert_json_snapshot!(response["hits"], {"[]._vectors.default.embeddings" => "[vectors]"},  @r###" | ||||||
|  |     [ | ||||||
|  |       { | ||||||
|  |         "title": "Captain Planet", | ||||||
|  |         "desc": "He's not part of the Marvel Cinematic Universe", | ||||||
|  |         "id": "2" | ||||||
|  |       }, | ||||||
|  |       { | ||||||
|  |         "title": "Captain Marvel", | ||||||
|  |         "desc": "a Shazam ersatz", | ||||||
|  |         "id": "3" | ||||||
|  |       }, | ||||||
|  |       { | ||||||
|  |         "title": "Shazam!", | ||||||
|  |         "desc": "a Captain Marvel ersatz", | ||||||
|  |         "id": "1" | ||||||
|  |       } | ||||||
|  |     ] | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|   | |||||||
| @@ -48,6 +48,31 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| { | |||||||
|     ]) |     ]) | ||||||
| }); | }); | ||||||
|  |  | ||||||
|  | static SCORE_DOCUMENTS: Lazy<Value> = Lazy::new(|| { | ||||||
|  |     json!([ | ||||||
|  |         { | ||||||
|  |             "title": "Batman the dark knight returns: Part 1", | ||||||
|  |             "id": "A", | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             "title": "Batman the dark knight returns: Part 2", | ||||||
|  |             "id": "B", | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             "title": "Batman Returns", | ||||||
|  |             "id": "C", | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             "title": "Batman", | ||||||
|  |             "id": "D", | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             "title": "Badman", | ||||||
|  |             "id": "E", | ||||||
|  |         } | ||||||
|  |     ]) | ||||||
|  | }); | ||||||
|  |  | ||||||
| static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| { | static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| { | ||||||
|     json!([ |     json!([ | ||||||
|         { |         { | ||||||
| @@ -276,7 +301,7 @@ async fn negative_special_cases_search() { | |||||||
|     index.add_documents(documents, None).await; |     index.add_documents(documents, None).await; | ||||||
|     index.wait_task(0).await; |     index.wait_task(0).await; | ||||||
|  |  | ||||||
|     index.update_settings(json!({"synonyms": { "escape": ["glass"] }})).await; |     index.update_settings(json!({"synonyms": { "escape": ["gläss"] }})).await; | ||||||
|     index.wait_task(1).await; |     index.wait_task(1).await; | ||||||
|  |  | ||||||
|     // There is a synonym for escape -> glass but we don't want "escape", only the derivates: glass |     // There is a synonym for escape -> glass but we don't want "escape", only the derivates: glass | ||||||
| @@ -960,6 +985,213 @@ async fn test_score_details() { | |||||||
|         .await; |         .await; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn test_score() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("test"); | ||||||
|  |  | ||||||
|  |     let documents = SCORE_DOCUMENTS.clone(); | ||||||
|  |  | ||||||
|  |     let res = index.add_documents(json!(documents), None).await; | ||||||
|  |     index.wait_task(res.0.uid()).await; | ||||||
|  |  | ||||||
|  |     index | ||||||
|  |         .search( | ||||||
|  |             json!({ | ||||||
|  |                 "q": "Badman the dark knight returns 1", | ||||||
|  |                 "showRankingScore": true, | ||||||
|  |             }), | ||||||
|  |             |response, code| { | ||||||
|  |                 meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |                 meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" | ||||||
|  |                 [ | ||||||
|  |                   { | ||||||
|  |                     "title": "Batman the dark knight returns: Part 1", | ||||||
|  |                     "id": "A", | ||||||
|  |                     "_rankingScore": 0.9746605609456898 | ||||||
|  |                   }, | ||||||
|  |                   { | ||||||
|  |                     "title": "Batman the dark knight returns: Part 2", | ||||||
|  |                     "id": "B", | ||||||
|  |                     "_rankingScore": 0.8055252965383685 | ||||||
|  |                   }, | ||||||
|  |                   { | ||||||
|  |                     "title": "Badman", | ||||||
|  |                     "id": "E", | ||||||
|  |                     "_rankingScore": 0.16666666666666666 | ||||||
|  |                   }, | ||||||
|  |                   { | ||||||
|  |                     "title": "Batman Returns", | ||||||
|  |                     "id": "C", | ||||||
|  |                     "_rankingScore": 0.07702020202020202 | ||||||
|  |                   }, | ||||||
|  |                   { | ||||||
|  |                     "title": "Batman", | ||||||
|  |                     "id": "D", | ||||||
|  |                     "_rankingScore": 0.07702020202020202 | ||||||
|  |                   } | ||||||
|  |                 ] | ||||||
|  |                 "###); | ||||||
|  |             }, | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn test_score_threshold() { | ||||||
|  |     let query = "Badman dark returns 1"; | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("test"); | ||||||
|  |  | ||||||
|  |     let documents = SCORE_DOCUMENTS.clone(); | ||||||
|  |  | ||||||
|  |     let res = index.add_documents(json!(documents), None).await; | ||||||
|  |     index.wait_task(res.0.uid()).await; | ||||||
|  |  | ||||||
|  |     index | ||||||
|  |         .search( | ||||||
|  |             json!({ | ||||||
|  |                 "q": query, | ||||||
|  |                 "showRankingScore": true, | ||||||
|  |                 "rankingScoreThreshold": 0.0 | ||||||
|  |             }), | ||||||
|  |             |response, code| { | ||||||
|  |                 meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |                 meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"5"); | ||||||
|  |                 meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" | ||||||
|  |                 [ | ||||||
|  |                   { | ||||||
|  |                     "title": "Batman the dark knight returns: Part 1", | ||||||
|  |                     "id": "A", | ||||||
|  |                     "_rankingScore": 0.93430081300813 | ||||||
|  |                   }, | ||||||
|  |                   { | ||||||
|  |                     "title": "Batman the dark knight returns: Part 2", | ||||||
|  |                     "id": "B", | ||||||
|  |                     "_rankingScore": 0.6685627880184332 | ||||||
|  |                   }, | ||||||
|  |                   { | ||||||
|  |                     "title": "Badman", | ||||||
|  |                     "id": "E", | ||||||
|  |                     "_rankingScore": 0.25 | ||||||
|  |                   }, | ||||||
|  |                   { | ||||||
|  |                     "title": "Batman Returns", | ||||||
|  |                     "id": "C", | ||||||
|  |                     "_rankingScore": 0.11553030303030302 | ||||||
|  |                   }, | ||||||
|  |                   { | ||||||
|  |                     "title": "Batman", | ||||||
|  |                     "id": "D", | ||||||
|  |                     "_rankingScore": 0.11553030303030302 | ||||||
|  |                   } | ||||||
|  |                 ] | ||||||
|  |                 "###); | ||||||
|  |             }, | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     index | ||||||
|  |         .search( | ||||||
|  |             json!({ | ||||||
|  |                 "q": query, | ||||||
|  |                 "showRankingScore": true, | ||||||
|  |                 "rankingScoreThreshold": 0.2 | ||||||
|  |             }), | ||||||
|  |             |response, code| { | ||||||
|  |                 meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |                 meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"3"###); | ||||||
|  |                 meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" | ||||||
|  |                 [ | ||||||
|  |                   { | ||||||
|  |                     "title": "Batman the dark knight returns: Part 1", | ||||||
|  |                     "id": "A", | ||||||
|  |                     "_rankingScore": 0.93430081300813 | ||||||
|  |                   }, | ||||||
|  |                   { | ||||||
|  |                     "title": "Batman the dark knight returns: Part 2", | ||||||
|  |                     "id": "B", | ||||||
|  |                     "_rankingScore": 0.6685627880184332 | ||||||
|  |                   }, | ||||||
|  |                   { | ||||||
|  |                     "title": "Badman", | ||||||
|  |                     "id": "E", | ||||||
|  |                     "_rankingScore": 0.25 | ||||||
|  |                   } | ||||||
|  |                 ] | ||||||
|  |                 "###); | ||||||
|  |             }, | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     index | ||||||
|  |         .search( | ||||||
|  |             json!({ | ||||||
|  |                 "q": query, | ||||||
|  |                 "showRankingScore": true, | ||||||
|  |                 "rankingScoreThreshold": 0.5 | ||||||
|  |             }), | ||||||
|  |             |response, code| { | ||||||
|  |                 meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |                 meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"2"###); | ||||||
|  |                 meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" | ||||||
|  |                 [ | ||||||
|  |                   { | ||||||
|  |                     "title": "Batman the dark knight returns: Part 1", | ||||||
|  |                     "id": "A", | ||||||
|  |                     "_rankingScore": 0.93430081300813 | ||||||
|  |                   }, | ||||||
|  |                   { | ||||||
|  |                     "title": "Batman the dark knight returns: Part 2", | ||||||
|  |                     "id": "B", | ||||||
|  |                     "_rankingScore": 0.6685627880184332 | ||||||
|  |                   } | ||||||
|  |                 ] | ||||||
|  |                 "###); | ||||||
|  |             }, | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     index | ||||||
|  |         .search( | ||||||
|  |             json!({ | ||||||
|  |                 "q": query, | ||||||
|  |                 "showRankingScore": true, | ||||||
|  |                 "rankingScoreThreshold": 0.8 | ||||||
|  |             }), | ||||||
|  |             |response, code| { | ||||||
|  |                 meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |                 meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"1"###); | ||||||
|  |                 meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" | ||||||
|  |                 [ | ||||||
|  |                   { | ||||||
|  |                     "title": "Batman the dark knight returns: Part 1", | ||||||
|  |                     "id": "A", | ||||||
|  |                     "_rankingScore": 0.93430081300813 | ||||||
|  |                   } | ||||||
|  |                 ] | ||||||
|  |                 "###); | ||||||
|  |             }, | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     index | ||||||
|  |         .search( | ||||||
|  |             json!({ | ||||||
|  |                 "q": query, | ||||||
|  |                 "showRankingScore": true, | ||||||
|  |                 "rankingScoreThreshold": 1.0 | ||||||
|  |             }), | ||||||
|  |             |response, code| { | ||||||
|  |                 meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |                 meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @r###"0"###); | ||||||
|  |                 // nobody is perfect | ||||||
|  |                 meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @"[]"); | ||||||
|  |             }, | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  | } | ||||||
|  |  | ||||||
| #[actix_rt::test] | #[actix_rt::test] | ||||||
| async fn test_degraded_score_details() { | async fn test_degraded_score_details() { | ||||||
|     let server = Server::new().await; |     let server = Server::new().await; | ||||||
| @@ -1058,21 +1290,38 @@ async fn experimental_feature_vector_store() { | |||||||
|     index.add_documents(json!(documents), None).await; |     index.add_documents(json!(documents), None).await; | ||||||
|     index.wait_task(0).await; |     index.wait_task(0).await; | ||||||
|  |  | ||||||
|     let (response, code) = index |     index | ||||||
|         .search_post(json!({ |         .search(json!({ | ||||||
|             "vector": [1.0, 2.0, 3.0], |             "vector": [1.0, 2.0, 3.0], | ||||||
|             "showRankingScore": true |             "showRankingScore": true | ||||||
|         })) |         }), |response, code|{ | ||||||
|         .await; |  | ||||||
|             meili_snap::snapshot!(code, @"400 Bad Request"); |             meili_snap::snapshot!(code, @"400 Bad Request"); | ||||||
|             meili_snap::snapshot!(meili_snap::json_string!(response), @r###" |             meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|             { |             { | ||||||
|       "message": "Passing `vector` as a query parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677", |               "message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677", | ||||||
|               "code": "feature_not_enabled", |               "code": "feature_not_enabled", | ||||||
|               "type": "invalid_request", |               "type": "invalid_request", | ||||||
|               "link": "https://docs.meilisearch.com/errors#feature_not_enabled" |               "link": "https://docs.meilisearch.com/errors#feature_not_enabled" | ||||||
|             } |             } | ||||||
|             "###); |             "###); | ||||||
|  |         }) | ||||||
|  |         .await; | ||||||
|  |     index | ||||||
|  |         .search(json!({ | ||||||
|  |             "retrieveVectors": true, | ||||||
|  |             "showRankingScore": true | ||||||
|  |         }), |response, code|{ | ||||||
|  |             meili_snap::snapshot!(code, @"400 Bad Request"); | ||||||
|  |             meili_snap::snapshot!(meili_snap::json_string!(response), @r###" | ||||||
|  |             { | ||||||
|  |               "message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677", | ||||||
|  |               "code": "feature_not_enabled", | ||||||
|  |               "type": "invalid_request", | ||||||
|  |               "link": "https://docs.meilisearch.com/errors#feature_not_enabled" | ||||||
|  |             } | ||||||
|  |             "###); | ||||||
|  |         }) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|     let (response, code) = server.set_features(json!({"vectorStore": true})).await; |     let (response, code) = server.set_features(json!({"vectorStore": true})).await; | ||||||
|     meili_snap::snapshot!(code, @"200 OK"); |     meili_snap::snapshot!(code, @"200 OK"); | ||||||
| @@ -1105,6 +1354,7 @@ async fn experimental_feature_vector_store() { | |||||||
|         .search_post(json!({ |         .search_post(json!({ | ||||||
|             "vector": [1.0, 2.0, 3.0], |             "vector": [1.0, 2.0, 3.0], | ||||||
|             "showRankingScore": true, |             "showRankingScore": true, | ||||||
|  |             "retrieveVectors": true, | ||||||
|         })) |         })) | ||||||
|         .await; |         .await; | ||||||
|  |  | ||||||
| @@ -1116,11 +1366,16 @@ async fn experimental_feature_vector_store() { | |||||||
|         "title": "Shazam!", |         "title": "Shazam!", | ||||||
|         "id": "287947", |         "id": "287947", | ||||||
|         "_vectors": { |         "_vectors": { | ||||||
|           "manual": [ |           "manual": { | ||||||
|  |             "embeddings": [ | ||||||
|  |               [ | ||||||
|                 1.0, |                 1.0, | ||||||
|                 2.0, |                 2.0, | ||||||
|                 3.0 |                 3.0 | ||||||
|               ] |               ] | ||||||
|  |             ], | ||||||
|  |             "regenerate": false | ||||||
|  |           } | ||||||
|         }, |         }, | ||||||
|         "_rankingScore": 1.0 |         "_rankingScore": 1.0 | ||||||
|       }, |       }, | ||||||
| @@ -1128,11 +1383,16 @@ async fn experimental_feature_vector_store() { | |||||||
|         "title": "Captain Marvel", |         "title": "Captain Marvel", | ||||||
|         "id": "299537", |         "id": "299537", | ||||||
|         "_vectors": { |         "_vectors": { | ||||||
|           "manual": [ |           "manual": { | ||||||
|  |             "embeddings": [ | ||||||
|  |               [ | ||||||
|                 1.0, |                 1.0, | ||||||
|                 2.0, |                 2.0, | ||||||
|                 54.0 |                 54.0 | ||||||
|               ] |               ] | ||||||
|  |             ], | ||||||
|  |             "regenerate": false | ||||||
|  |           } | ||||||
|         }, |         }, | ||||||
|         "_rankingScore": 0.9129111766815186 |         "_rankingScore": 0.9129111766815186 | ||||||
|       }, |       }, | ||||||
| @@ -1140,11 +1400,16 @@ async fn experimental_feature_vector_store() { | |||||||
|         "title": "Gläss", |         "title": "Gläss", | ||||||
|         "id": "450465", |         "id": "450465", | ||||||
|         "_vectors": { |         "_vectors": { | ||||||
|           "manual": [ |           "manual": { | ||||||
|  |             "embeddings": [ | ||||||
|  |               [ | ||||||
|                 -100.0, |                 -100.0, | ||||||
|                 340.0, |                 340.0, | ||||||
|                 90.0 |                 90.0 | ||||||
|               ] |               ] | ||||||
|  |             ], | ||||||
|  |             "regenerate": false | ||||||
|  |           } | ||||||
|         }, |         }, | ||||||
|         "_rankingScore": 0.8106412887573242 |         "_rankingScore": 0.8106412887573242 | ||||||
|       }, |       }, | ||||||
| @@ -1152,11 +1417,16 @@ async fn experimental_feature_vector_store() { | |||||||
|         "title": "How to Train Your Dragon: The Hidden World", |         "title": "How to Train Your Dragon: The Hidden World", | ||||||
|         "id": "166428", |         "id": "166428", | ||||||
|         "_vectors": { |         "_vectors": { | ||||||
|           "manual": [ |           "manual": { | ||||||
|  |             "embeddings": [ | ||||||
|  |               [ | ||||||
|                 -100.0, |                 -100.0, | ||||||
|                 231.0, |                 231.0, | ||||||
|                 32.0 |                 32.0 | ||||||
|               ] |               ] | ||||||
|  |             ], | ||||||
|  |             "regenerate": false | ||||||
|  |           } | ||||||
|         }, |         }, | ||||||
|         "_rankingScore": 0.7412010431289673 |         "_rankingScore": 0.7412010431289673 | ||||||
|       }, |       }, | ||||||
| @@ -1164,11 +1434,16 @@ async fn experimental_feature_vector_store() { | |||||||
|         "title": "Escape Room", |         "title": "Escape Room", | ||||||
|         "id": "522681", |         "id": "522681", | ||||||
|         "_vectors": { |         "_vectors": { | ||||||
|           "manual": [ |           "manual": { | ||||||
|  |             "embeddings": [ | ||||||
|  |               [ | ||||||
|                 10.0, |                 10.0, | ||||||
|                 -23.0, |                 -23.0, | ||||||
|                 32.0 |                 32.0 | ||||||
|               ] |               ] | ||||||
|  |             ], | ||||||
|  |             "regenerate": false | ||||||
|  |           } | ||||||
|         }, |         }, | ||||||
|         "_rankingScore": 0.6972063183784485 |         "_rankingScore": 0.6972063183784485 | ||||||
|       } |       } | ||||||
|   | |||||||
| @@ -0,0 +1,20 @@ | |||||||
|  | --- | ||||||
|  | source: meilisearch/tests/search/distinct.rs | ||||||
|  | --- | ||||||
|  | { | ||||||
|  |   "uid": 1, | ||||||
|  |   "indexUid": "tamo", | ||||||
|  |   "status": "succeeded", | ||||||
|  |   "type": "settingsUpdate", | ||||||
|  |   "canceledBy": null, | ||||||
|  |   "details": { | ||||||
|  |     "filterableAttributes": [ | ||||||
|  |       "color.main" | ||||||
|  |     ] | ||||||
|  |   }, | ||||||
|  |   "error": null, | ||||||
|  |   "duration": "[duration]", | ||||||
|  |   "enqueuedAt": "[date]", | ||||||
|  |   "startedAt": "[date]", | ||||||
|  |   "finishedAt": "[date]" | ||||||
|  | } | ||||||
| @@ -0,0 +1,18 @@ | |||||||
|  | --- | ||||||
|  | source: meilisearch/tests/search/errors.rs | ||||||
|  | --- | ||||||
|  | { | ||||||
|  |   "uid": 0, | ||||||
|  |   "indexUid": "tamo", | ||||||
|  |   "status": "succeeded", | ||||||
|  |   "type": "indexCreation", | ||||||
|  |   "canceledBy": null, | ||||||
|  |   "details": { | ||||||
|  |     "primaryKey": null | ||||||
|  |   }, | ||||||
|  |   "error": null, | ||||||
|  |   "duration": "[duration]", | ||||||
|  |   "enqueuedAt": "[date]", | ||||||
|  |   "startedAt": "[date]", | ||||||
|  |   "finishedAt": "[date]" | ||||||
|  | } | ||||||
| @@ -87,6 +87,68 @@ async fn similar_bad_id() { | |||||||
|     "###); |     "###); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn similar_bad_ranking_score_threshold() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("test"); | ||||||
|  |     server.set_features(json!({"vectorStore": true})).await; | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .update_settings(json!({ | ||||||
|  |         "embedders": { | ||||||
|  |             "manual": { | ||||||
|  |                 "source": "userProvided", | ||||||
|  |                 "dimensions": 3, | ||||||
|  |             } | ||||||
|  |         }, | ||||||
|  |         "filterableAttributes": ["title"]})) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     server.wait_task(response.uid()).await; | ||||||
|  |  | ||||||
|  |     let (response, code) = index.similar_post(json!({"rankingScoreThreshold": ["doggo"]})).await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value type at `.rankingScoreThreshold`: expected a number, but found an array: `[\"doggo\"]`", | ||||||
|  |       "code": "invalid_similar_ranking_score_threshold", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_similar_ranking_score_threshold" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn similar_invalid_ranking_score_threshold() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("test"); | ||||||
|  |     server.set_features(json!({"vectorStore": true})).await; | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .update_settings(json!({ | ||||||
|  |         "embedders": { | ||||||
|  |             "manual": { | ||||||
|  |                 "source": "userProvided", | ||||||
|  |                 "dimensions": 3, | ||||||
|  |             } | ||||||
|  |         }, | ||||||
|  |         "filterableAttributes": ["title"]})) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     server.wait_task(response.uid()).await; | ||||||
|  |  | ||||||
|  |     let (response, code) = index.similar_post(json!({"rankingScoreThreshold": 42})).await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value at `.rankingScoreThreshold`: the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`.", | ||||||
|  |       "code": "invalid_similar_ranking_score_threshold", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_similar_ranking_score_threshold" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
| #[actix_rt::test] | #[actix_rt::test] | ||||||
| async fn similar_invalid_id() { | async fn similar_invalid_id() { | ||||||
|     let server = Server::new().await; |     let server = Server::new().await; | ||||||
| @@ -694,3 +756,54 @@ async fn filter_reserved_geo_point_string() { | |||||||
|         }) |         }) | ||||||
|         .await; |         .await; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn similar_bad_retrieve_vectors() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     server.set_features(json!({"vectorStore": true})).await; | ||||||
|  |     let index = server.index("test"); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.similar_post(json!({"retrieveVectors": "doggo"})).await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"doggo\"`", | ||||||
|  |       "code": "invalid_similar_retrieve_vectors", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_similar_retrieve_vectors" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.similar_post(json!({"retrieveVectors": [true]})).await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found an array: `[true]`", | ||||||
|  |       "code": "invalid_similar_retrieve_vectors", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_similar_retrieve_vectors" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.similar_get("?retrieveVectors=").await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value in parameter `retrieveVectors`: could not parse `` as a boolean, expected either `true` or `false`", | ||||||
|  |       "code": "invalid_similar_retrieve_vectors", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_similar_retrieve_vectors" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.similar_get("?retrieveVectors=doggo").await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value in parameter `retrieveVectors`: could not parse `doggo` as a boolean, expected either `true` or `false`", | ||||||
|  |       "code": "invalid_similar_retrieve_vectors", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_similar_retrieve_vectors" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|   | |||||||
| @@ -78,7 +78,7 @@ async fn basic() { | |||||||
|     index.wait_task(value.uid()).await; |     index.wait_task(value.uid()).await; | ||||||
|  |  | ||||||
|     index |     index | ||||||
|         .similar(json!({"id": 143}), |response, code| { |         .similar(json!({"id": 143, "retrieveVectors": true}), |response, code| { | ||||||
|             snapshot!(code, @"200 OK"); |             snapshot!(code, @"200 OK"); | ||||||
|             snapshot!(json_string!(response["hits"]), @r###" |             snapshot!(json_string!(response["hits"]), @r###" | ||||||
|             [ |             [ | ||||||
| @@ -87,11 +87,16 @@ async fn basic() { | |||||||
|                 "release_year": 2019, |                 "release_year": 2019, | ||||||
|                 "id": "522681", |                 "id": "522681", | ||||||
|                 "_vectors": { |                 "_vectors": { | ||||||
|                   "manual": [ |                   "manual": { | ||||||
|                     0.1, |                     "embeddings": [ | ||||||
|                     0.6, |                       [ | ||||||
|                     0.8 |                         0.10000000149011612, | ||||||
|  |                         0.6000000238418579, | ||||||
|  |                         0.800000011920929 | ||||||
|                       ] |                       ] | ||||||
|  |                     ], | ||||||
|  |                     "regenerate": false | ||||||
|  |                   } | ||||||
|                 } |                 } | ||||||
|               }, |               }, | ||||||
|               { |               { | ||||||
| @@ -99,11 +104,16 @@ async fn basic() { | |||||||
|                 "release_year": 2019, |                 "release_year": 2019, | ||||||
|                 "id": "299537", |                 "id": "299537", | ||||||
|                 "_vectors": { |                 "_vectors": { | ||||||
|                   "manual": [ |                   "manual": { | ||||||
|                     0.6, |                     "embeddings": [ | ||||||
|                     0.8, |                       [ | ||||||
|                     -0.2 |                         0.6000000238418579, | ||||||
|  |                         0.800000011920929, | ||||||
|  |                         -0.20000000298023224 | ||||||
|                       ] |                       ] | ||||||
|  |                     ], | ||||||
|  |                     "regenerate": false | ||||||
|  |                   } | ||||||
|                 } |                 } | ||||||
|               }, |               }, | ||||||
|               { |               { | ||||||
| @@ -111,11 +121,16 @@ async fn basic() { | |||||||
|                 "release_year": 2019, |                 "release_year": 2019, | ||||||
|                 "id": "166428", |                 "id": "166428", | ||||||
|                 "_vectors": { |                 "_vectors": { | ||||||
|                   "manual": [ |                   "manual": { | ||||||
|                     0.7, |                     "embeddings": [ | ||||||
|                     0.7, |                       [ | ||||||
|                     -0.4 |                         0.699999988079071, | ||||||
|  |                         0.699999988079071, | ||||||
|  |                         -0.4000000059604645 | ||||||
|                       ] |                       ] | ||||||
|  |                     ], | ||||||
|  |                     "regenerate": false | ||||||
|  |                   } | ||||||
|                 } |                 } | ||||||
|               }, |               }, | ||||||
|               { |               { | ||||||
| @@ -123,11 +138,16 @@ async fn basic() { | |||||||
|                 "release_year": 2019, |                 "release_year": 2019, | ||||||
|                 "id": "287947", |                 "id": "287947", | ||||||
|                 "_vectors": { |                 "_vectors": { | ||||||
|                   "manual": [ |                   "manual": { | ||||||
|                     0.8, |                     "embeddings": [ | ||||||
|                     0.4, |                       [ | ||||||
|  |                         0.800000011920929, | ||||||
|  |                         0.4000000059604645, | ||||||
|                         -0.5 |                         -0.5 | ||||||
|                       ] |                       ] | ||||||
|  |                     ], | ||||||
|  |                     "regenerate": false | ||||||
|  |                   } | ||||||
|                 } |                 } | ||||||
|               } |               } | ||||||
|             ] |             ] | ||||||
| @@ -136,7 +156,7 @@ async fn basic() { | |||||||
|         .await; |         .await; | ||||||
|  |  | ||||||
|     index |     index | ||||||
|         .similar(json!({"id": "299537"}), |response, code| { |         .similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| { | ||||||
|             snapshot!(code, @"200 OK"); |             snapshot!(code, @"200 OK"); | ||||||
|             snapshot!(json_string!(response["hits"]), @r###" |             snapshot!(json_string!(response["hits"]), @r###" | ||||||
|             [ |             [ | ||||||
| @@ -145,11 +165,16 @@ async fn basic() { | |||||||
|                 "release_year": 2019, |                 "release_year": 2019, | ||||||
|                 "id": "166428", |                 "id": "166428", | ||||||
|                 "_vectors": { |                 "_vectors": { | ||||||
|                   "manual": [ |                   "manual": { | ||||||
|                     0.7, |                     "embeddings": [ | ||||||
|                     0.7, |                       [ | ||||||
|                     -0.4 |                         0.699999988079071, | ||||||
|  |                         0.699999988079071, | ||||||
|  |                         -0.4000000059604645 | ||||||
|                       ] |                       ] | ||||||
|  |                     ], | ||||||
|  |                     "regenerate": false | ||||||
|  |                   } | ||||||
|                 } |                 } | ||||||
|               }, |               }, | ||||||
|               { |               { | ||||||
| @@ -157,11 +182,16 @@ async fn basic() { | |||||||
|                 "release_year": 2019, |                 "release_year": 2019, | ||||||
|                 "id": "287947", |                 "id": "287947", | ||||||
|                 "_vectors": { |                 "_vectors": { | ||||||
|                   "manual": [ |                   "manual": { | ||||||
|                     0.8, |                     "embeddings": [ | ||||||
|                     0.4, |                       [ | ||||||
|  |                         0.800000011920929, | ||||||
|  |                         0.4000000059604645, | ||||||
|                         -0.5 |                         -0.5 | ||||||
|                       ] |                       ] | ||||||
|  |                     ], | ||||||
|  |                     "regenerate": false | ||||||
|  |                   } | ||||||
|                 } |                 } | ||||||
|               }, |               }, | ||||||
|               { |               { | ||||||
| @@ -169,11 +199,16 @@ async fn basic() { | |||||||
|                 "release_year": 2019, |                 "release_year": 2019, | ||||||
|                 "id": "522681", |                 "id": "522681", | ||||||
|                 "_vectors": { |                 "_vectors": { | ||||||
|                   "manual": [ |                   "manual": { | ||||||
|                     0.1, |                     "embeddings": [ | ||||||
|                     0.6, |                       [ | ||||||
|                     0.8 |                         0.10000000149011612, | ||||||
|  |                         0.6000000238418579, | ||||||
|  |                         0.800000011920929 | ||||||
|                       ] |                       ] | ||||||
|  |                     ], | ||||||
|  |                     "regenerate": false | ||||||
|  |                   } | ||||||
|                 } |                 } | ||||||
|               }, |               }, | ||||||
|               { |               { | ||||||
| @@ -181,11 +216,16 @@ async fn basic() { | |||||||
|                 "release_year": 1930, |                 "release_year": 1930, | ||||||
|                 "id": "143", |                 "id": "143", | ||||||
|                 "_vectors": { |                 "_vectors": { | ||||||
|                   "manual": [ |                   "manual": { | ||||||
|  |                     "embeddings": [ | ||||||
|  |                       [ | ||||||
|                         -0.5, |                         -0.5, | ||||||
|                     0.3, |                         0.30000001192092896, | ||||||
|                     0.85 |                         0.8500000238418579 | ||||||
|                       ] |                       ] | ||||||
|  |                     ], | ||||||
|  |                     "regenerate": false | ||||||
|  |                   } | ||||||
|                 } |                 } | ||||||
|               } |               } | ||||||
|             ] |             ] | ||||||
| @@ -194,6 +234,285 @@ async fn basic() { | |||||||
|         .await; |         .await; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn ranking_score_threshold() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("test"); | ||||||
|  |     let (value, code) = server.set_features(json!({"vectorStore": true})).await; | ||||||
|  |     snapshot!(code, @"200 OK"); | ||||||
|  |     snapshot!(value, @r###" | ||||||
|  |     { | ||||||
|  |       "vectorStore": true, | ||||||
|  |       "metrics": false, | ||||||
|  |       "logsRoute": false | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .update_settings(json!({ | ||||||
|  |         "embedders": { | ||||||
|  |             "manual": { | ||||||
|  |                 "source": "userProvided", | ||||||
|  |                 "dimensions": 3, | ||||||
|  |             } | ||||||
|  |         }, | ||||||
|  |         "filterableAttributes": ["title"]})) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     server.wait_task(response.uid()).await; | ||||||
|  |  | ||||||
|  |     let documents = DOCUMENTS.clone(); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     index.wait_task(value.uid()).await; | ||||||
|  |  | ||||||
|  |     index | ||||||
|  |         .similar( | ||||||
|  |             json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true}), | ||||||
|  |             |response, code| { | ||||||
|  |                 snapshot!(code, @"200 OK"); | ||||||
|  |                 meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4"); | ||||||
|  |                 snapshot!(json_string!(response["hits"]), @r###" | ||||||
|  |                 [ | ||||||
|  |                   { | ||||||
|  |                     "title": "Escape Room", | ||||||
|  |                     "release_year": 2019, | ||||||
|  |                     "id": "522681", | ||||||
|  |                     "_vectors": { | ||||||
|  |                       "manual": { | ||||||
|  |                         "embeddings": [ | ||||||
|  |                           [ | ||||||
|  |                             0.10000000149011612, | ||||||
|  |                             0.6000000238418579, | ||||||
|  |                             0.800000011920929 | ||||||
|  |                           ] | ||||||
|  |                         ], | ||||||
|  |                         "regenerate": false | ||||||
|  |                       } | ||||||
|  |                     }, | ||||||
|  |                     "_rankingScore": 0.890957772731781 | ||||||
|  |                   }, | ||||||
|  |                   { | ||||||
|  |                     "title": "Captain Marvel", | ||||||
|  |                     "release_year": 2019, | ||||||
|  |                     "id": "299537", | ||||||
|  |                     "_vectors": { | ||||||
|  |                       "manual": { | ||||||
|  |                         "embeddings": [ | ||||||
|  |                           [ | ||||||
|  |                             0.6000000238418579, | ||||||
|  |                             0.800000011920929, | ||||||
|  |                             -0.20000000298023224 | ||||||
|  |                           ] | ||||||
|  |                         ], | ||||||
|  |                         "regenerate": false | ||||||
|  |                       } | ||||||
|  |                     }, | ||||||
|  |                     "_rankingScore": 0.39060014486312866 | ||||||
|  |                   }, | ||||||
|  |                   { | ||||||
|  |                     "title": "How to Train Your Dragon: The Hidden World", | ||||||
|  |                     "release_year": 2019, | ||||||
|  |                     "id": "166428", | ||||||
|  |                     "_vectors": { | ||||||
|  |                       "manual": { | ||||||
|  |                         "embeddings": [ | ||||||
|  |                           [ | ||||||
|  |                             0.699999988079071, | ||||||
|  |                             0.699999988079071, | ||||||
|  |                             -0.4000000059604645 | ||||||
|  |                           ] | ||||||
|  |                         ], | ||||||
|  |                         "regenerate": false | ||||||
|  |                       } | ||||||
|  |                     }, | ||||||
|  |                     "_rankingScore": 0.2819308042526245 | ||||||
|  |                   }, | ||||||
|  |                   { | ||||||
|  |                     "title": "Shazam!", | ||||||
|  |                     "release_year": 2019, | ||||||
|  |                     "id": "287947", | ||||||
|  |                     "_vectors": { | ||||||
|  |                       "manual": { | ||||||
|  |                         "embeddings": [ | ||||||
|  |                           [ | ||||||
|  |                             0.800000011920929, | ||||||
|  |                             0.4000000059604645, | ||||||
|  |                             -0.5 | ||||||
|  |                           ] | ||||||
|  |                         ], | ||||||
|  |                         "regenerate": false | ||||||
|  |                       } | ||||||
|  |                     }, | ||||||
|  |                     "_rankingScore": 0.1662663221359253 | ||||||
|  |                   } | ||||||
|  |                 ] | ||||||
|  |                 "###); | ||||||
|  |             }, | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     index | ||||||
|  |         .similar( | ||||||
|  |             json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true}), | ||||||
|  |             |response, code| { | ||||||
|  |                 snapshot!(code, @"200 OK"); | ||||||
|  |                 meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3"); | ||||||
|  |                 snapshot!(json_string!(response["hits"]), @r###" | ||||||
|  |                 [ | ||||||
|  |                   { | ||||||
|  |                     "title": "Escape Room", | ||||||
|  |                     "release_year": 2019, | ||||||
|  |                     "id": "522681", | ||||||
|  |                     "_vectors": { | ||||||
|  |                       "manual": { | ||||||
|  |                         "embeddings": [ | ||||||
|  |                           [ | ||||||
|  |                             0.10000000149011612, | ||||||
|  |                             0.6000000238418579, | ||||||
|  |                             0.800000011920929 | ||||||
|  |                           ] | ||||||
|  |                         ], | ||||||
|  |                         "regenerate": false | ||||||
|  |                       } | ||||||
|  |                     }, | ||||||
|  |                     "_rankingScore": 0.890957772731781 | ||||||
|  |                   }, | ||||||
|  |                   { | ||||||
|  |                     "title": "Captain Marvel", | ||||||
|  |                     "release_year": 2019, | ||||||
|  |                     "id": "299537", | ||||||
|  |                     "_vectors": { | ||||||
|  |                       "manual": { | ||||||
|  |                         "embeddings": [ | ||||||
|  |                           [ | ||||||
|  |                             0.6000000238418579, | ||||||
|  |                             0.800000011920929, | ||||||
|  |                             -0.20000000298023224 | ||||||
|  |                           ] | ||||||
|  |                         ], | ||||||
|  |                         "regenerate": false | ||||||
|  |                       } | ||||||
|  |                     }, | ||||||
|  |                     "_rankingScore": 0.39060014486312866 | ||||||
|  |                   }, | ||||||
|  |                   { | ||||||
|  |                     "title": "How to Train Your Dragon: The Hidden World", | ||||||
|  |                     "release_year": 2019, | ||||||
|  |                     "id": "166428", | ||||||
|  |                     "_vectors": { | ||||||
|  |                       "manual": { | ||||||
|  |                         "embeddings": [ | ||||||
|  |                           [ | ||||||
|  |                             0.699999988079071, | ||||||
|  |                             0.699999988079071, | ||||||
|  |                             -0.4000000059604645 | ||||||
|  |                           ] | ||||||
|  |                         ], | ||||||
|  |                         "regenerate": false | ||||||
|  |                       } | ||||||
|  |                     }, | ||||||
|  |                     "_rankingScore": 0.2819308042526245 | ||||||
|  |                   } | ||||||
|  |                 ] | ||||||
|  |                 "###); | ||||||
|  |             }, | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     index | ||||||
|  |         .similar( | ||||||
|  |             json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true}), | ||||||
|  |             |response, code| { | ||||||
|  |                 snapshot!(code, @"200 OK"); | ||||||
|  |                 meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2"); | ||||||
|  |                 snapshot!(json_string!(response["hits"]), @r###" | ||||||
|  |                 [ | ||||||
|  |                   { | ||||||
|  |                     "title": "Escape Room", | ||||||
|  |                     "release_year": 2019, | ||||||
|  |                     "id": "522681", | ||||||
|  |                     "_vectors": { | ||||||
|  |                       "manual": { | ||||||
|  |                         "embeddings": [ | ||||||
|  |                           [ | ||||||
|  |                             0.10000000149011612, | ||||||
|  |                             0.6000000238418579, | ||||||
|  |                             0.800000011920929 | ||||||
|  |                           ] | ||||||
|  |                         ], | ||||||
|  |                         "regenerate": false | ||||||
|  |                       } | ||||||
|  |                     }, | ||||||
|  |                     "_rankingScore": 0.890957772731781 | ||||||
|  |                   }, | ||||||
|  |                   { | ||||||
|  |                     "title": "Captain Marvel", | ||||||
|  |                     "release_year": 2019, | ||||||
|  |                     "id": "299537", | ||||||
|  |                     "_vectors": { | ||||||
|  |                       "manual": { | ||||||
|  |                         "embeddings": [ | ||||||
|  |                           [ | ||||||
|  |                             0.6000000238418579, | ||||||
|  |                             0.800000011920929, | ||||||
|  |                             -0.20000000298023224 | ||||||
|  |                           ] | ||||||
|  |                         ], | ||||||
|  |                         "regenerate": false | ||||||
|  |                       } | ||||||
|  |                     }, | ||||||
|  |                     "_rankingScore": 0.39060014486312866 | ||||||
|  |                   } | ||||||
|  |                 ] | ||||||
|  |                 "###); | ||||||
|  |             }, | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     index | ||||||
|  |         .similar( | ||||||
|  |             json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true}), | ||||||
|  |             |response, code| { | ||||||
|  |                 snapshot!(code, @"200 OK"); | ||||||
|  |                 meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1"); | ||||||
|  |                 snapshot!(json_string!(response["hits"]), @r###" | ||||||
|  |                 [ | ||||||
|  |                   { | ||||||
|  |                     "title": "Escape Room", | ||||||
|  |                     "release_year": 2019, | ||||||
|  |                     "id": "522681", | ||||||
|  |                     "_vectors": { | ||||||
|  |                       "manual": { | ||||||
|  |                         "embeddings": [ | ||||||
|  |                           [ | ||||||
|  |                             0.10000000149011612, | ||||||
|  |                             0.6000000238418579, | ||||||
|  |                             0.800000011920929 | ||||||
|  |                           ] | ||||||
|  |                         ], | ||||||
|  |                         "regenerate": false | ||||||
|  |                       } | ||||||
|  |                     }, | ||||||
|  |                     "_rankingScore": 0.890957772731781 | ||||||
|  |                   } | ||||||
|  |                 ] | ||||||
|  |                 "###); | ||||||
|  |             }, | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     index | ||||||
|  |         .similar( | ||||||
|  |             json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true}), | ||||||
|  |             |response, code| { | ||||||
|  |                 snapshot!(code, @"200 OK"); | ||||||
|  |                 snapshot!(json_string!(response["hits"]), @"[]"); | ||||||
|  |             }, | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  | } | ||||||
|  |  | ||||||
| #[actix_rt::test] | #[actix_rt::test] | ||||||
| async fn filter() { | async fn filter() { | ||||||
|     let server = Server::new().await; |     let server = Server::new().await; | ||||||
| @@ -227,7 +546,9 @@ async fn filter() { | |||||||
|     index.wait_task(value.uid()).await; |     index.wait_task(value.uid()).await; | ||||||
|  |  | ||||||
|     index |     index | ||||||
|         .similar(json!({"id": 522681, "filter": "release_year = 2019"}), |response, code| { |         .similar( | ||||||
|  |             json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}), | ||||||
|  |             |response, code| { | ||||||
|                 snapshot!(code, @"200 OK"); |                 snapshot!(code, @"200 OK"); | ||||||
|                 snapshot!(json_string!(response["hits"]), @r###" |                 snapshot!(json_string!(response["hits"]), @r###" | ||||||
|                 [ |                 [ | ||||||
| @@ -236,11 +557,16 @@ async fn filter() { | |||||||
|                     "release_year": 2019, |                     "release_year": 2019, | ||||||
|                     "id": "299537", |                     "id": "299537", | ||||||
|                     "_vectors": { |                     "_vectors": { | ||||||
|                   "manual": [ |                       "manual": { | ||||||
|                     0.6, |                         "embeddings": [ | ||||||
|                     0.8, |                           [ | ||||||
|                     -0.2 |                             0.6000000238418579, | ||||||
|  |                             0.800000011920929, | ||||||
|  |                             -0.20000000298023224 | ||||||
|                           ] |                           ] | ||||||
|  |                         ], | ||||||
|  |                         "regenerate": false | ||||||
|  |                       } | ||||||
|                     } |                     } | ||||||
|                   }, |                   }, | ||||||
|                   { |                   { | ||||||
| @@ -248,11 +574,16 @@ async fn filter() { | |||||||
|                     "release_year": 2019, |                     "release_year": 2019, | ||||||
|                     "id": "166428", |                     "id": "166428", | ||||||
|                     "_vectors": { |                     "_vectors": { | ||||||
|                   "manual": [ |                       "manual": { | ||||||
|                     0.7, |                         "embeddings": [ | ||||||
|                     0.7, |                           [ | ||||||
|                     -0.4 |                             0.699999988079071, | ||||||
|  |                             0.699999988079071, | ||||||
|  |                             -0.4000000059604645 | ||||||
|                           ] |                           ] | ||||||
|  |                         ], | ||||||
|  |                         "regenerate": false | ||||||
|  |                       } | ||||||
|                     } |                     } | ||||||
|                   }, |                   }, | ||||||
|                   { |                   { | ||||||
| @@ -260,20 +591,28 @@ async fn filter() { | |||||||
|                     "release_year": 2019, |                     "release_year": 2019, | ||||||
|                     "id": "287947", |                     "id": "287947", | ||||||
|                     "_vectors": { |                     "_vectors": { | ||||||
|                   "manual": [ |                       "manual": { | ||||||
|                     0.8, |                         "embeddings": [ | ||||||
|                     0.4, |                           [ | ||||||
|  |                             0.800000011920929, | ||||||
|  |                             0.4000000059604645, | ||||||
|                             -0.5 |                             -0.5 | ||||||
|                           ] |                           ] | ||||||
|  |                         ], | ||||||
|  |                         "regenerate": false | ||||||
|  |                       } | ||||||
|                     } |                     } | ||||||
|                   } |                   } | ||||||
|                 ] |                 ] | ||||||
|                 "###); |                 "###); | ||||||
|         }) |             }, | ||||||
|  |         ) | ||||||
|         .await; |         .await; | ||||||
|  |  | ||||||
|     index |     index | ||||||
|         .similar(json!({"id": 522681, "filter": "release_year < 2000"}), |response, code| { |         .similar( | ||||||
|  |             json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}), | ||||||
|  |             |response, code| { | ||||||
|                 snapshot!(code, @"200 OK"); |                 snapshot!(code, @"200 OK"); | ||||||
|                 snapshot!(json_string!(response["hits"]), @r###" |                 snapshot!(json_string!(response["hits"]), @r###" | ||||||
|                 [ |                 [ | ||||||
| @@ -282,16 +621,22 @@ async fn filter() { | |||||||
|                     "release_year": 1930, |                     "release_year": 1930, | ||||||
|                     "id": "143", |                     "id": "143", | ||||||
|                     "_vectors": { |                     "_vectors": { | ||||||
|                   "manual": [ |                       "manual": { | ||||||
|  |                         "embeddings": [ | ||||||
|  |                           [ | ||||||
|                             -0.5, |                             -0.5, | ||||||
|                     0.3, |                             0.30000001192092896, | ||||||
|                     0.85 |                             0.8500000238418579 | ||||||
|                           ] |                           ] | ||||||
|  |                         ], | ||||||
|  |                         "regenerate": false | ||||||
|  |                       } | ||||||
|                     } |                     } | ||||||
|                   } |                   } | ||||||
|                 ] |                 ] | ||||||
|                 "###); |                 "###); | ||||||
|         }) |             }, | ||||||
|  |         ) | ||||||
|         .await; |         .await; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -328,7 +673,7 @@ async fn limit_and_offset() { | |||||||
|     index.wait_task(value.uid()).await; |     index.wait_task(value.uid()).await; | ||||||
|  |  | ||||||
|     index |     index | ||||||
|         .similar(json!({"id": 143, "limit": 1}), |response, code| { |         .similar(json!({"id": 143, "limit": 1, "retrieveVectors": true}), |response, code| { | ||||||
|             snapshot!(code, @"200 OK"); |             snapshot!(code, @"200 OK"); | ||||||
|             snapshot!(json_string!(response["hits"]), @r###" |             snapshot!(json_string!(response["hits"]), @r###" | ||||||
|             [ |             [ | ||||||
| @@ -337,11 +682,16 @@ async fn limit_and_offset() { | |||||||
|                 "release_year": 2019, |                 "release_year": 2019, | ||||||
|                 "id": "522681", |                 "id": "522681", | ||||||
|                 "_vectors": { |                 "_vectors": { | ||||||
|                   "manual": [ |                   "manual": { | ||||||
|                     0.1, |                     "embeddings": [ | ||||||
|                     0.6, |                       [ | ||||||
|                     0.8 |                         0.10000000149011612, | ||||||
|  |                         0.6000000238418579, | ||||||
|  |                         0.800000011920929 | ||||||
|                       ] |                       ] | ||||||
|  |                     ], | ||||||
|  |                     "regenerate": false | ||||||
|  |                   } | ||||||
|                 } |                 } | ||||||
|               } |               } | ||||||
|             ] |             ] | ||||||
| @@ -350,7 +700,9 @@ async fn limit_and_offset() { | |||||||
|         .await; |         .await; | ||||||
|  |  | ||||||
|     index |     index | ||||||
|         .similar(json!({"id": 143, "limit": 1, "offset": 1}), |response, code| { |         .similar( | ||||||
|  |             json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}), | ||||||
|  |             |response, code| { | ||||||
|                 snapshot!(code, @"200 OK"); |                 snapshot!(code, @"200 OK"); | ||||||
|                 snapshot!(json_string!(response["hits"]), @r###" |                 snapshot!(json_string!(response["hits"]), @r###" | ||||||
|                 [ |                 [ | ||||||
| @@ -359,15 +711,21 @@ async fn limit_and_offset() { | |||||||
|                     "release_year": 2019, |                     "release_year": 2019, | ||||||
|                     "id": "299537", |                     "id": "299537", | ||||||
|                     "_vectors": { |                     "_vectors": { | ||||||
|                   "manual": [ |                       "manual": { | ||||||
|                     0.6, |                         "embeddings": [ | ||||||
|                     0.8, |                           [ | ||||||
|                     -0.2 |                             0.6000000238418579, | ||||||
|  |                             0.800000011920929, | ||||||
|  |                             -0.20000000298023224 | ||||||
|                           ] |                           ] | ||||||
|  |                         ], | ||||||
|  |                         "regenerate": false | ||||||
|  |                       } | ||||||
|                     } |                     } | ||||||
|                   } |                   } | ||||||
|                 ] |                 ] | ||||||
|                 "###); |                 "###); | ||||||
|         }) |             }, | ||||||
|  |         ) | ||||||
|         .await; |         .await; | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										588
									
								
								meilisearch/tests/vector/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										588
									
								
								meilisearch/tests/vector/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,588 @@ | |||||||
|  | mod settings; | ||||||
|  |  | ||||||
|  | use meili_snap::{json_string, snapshot}; | ||||||
|  |  | ||||||
|  | use crate::common::index::Index; | ||||||
|  | use crate::common::{GetAllDocumentsOptions, Server}; | ||||||
|  | use crate::json; | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn add_remove_user_provided() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("doggo"); | ||||||
|  |     let (value, code) = server.set_features(json!({"vectorStore": true})).await; | ||||||
|  |     snapshot!(code, @"200 OK"); | ||||||
|  |     snapshot!(value, @r###" | ||||||
|  |     { | ||||||
|  |       "vectorStore": true, | ||||||
|  |       "metrics": false, | ||||||
|  |       "logsRoute": false | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .update_settings(json!({ | ||||||
|  |           "embedders": { | ||||||
|  |               "manual": { | ||||||
|  |                   "source": "userProvided", | ||||||
|  |                   "dimensions": 3, | ||||||
|  |               } | ||||||
|  |           }, | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     server.wait_task(response.uid()).await; | ||||||
|  |  | ||||||
|  |     let documents = json!([ | ||||||
|  |       {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }}, | ||||||
|  |       {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1] }}, | ||||||
|  |     ]); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     index.wait_task(value.uid()).await; | ||||||
|  |  | ||||||
|  |     let (documents, _code) = index | ||||||
|  |         .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(json_string!(documents), @r###" | ||||||
|  |     { | ||||||
|  |       "results": [ | ||||||
|  |         { | ||||||
|  |           "id": 0, | ||||||
|  |           "name": "kefir", | ||||||
|  |           "_vectors": { | ||||||
|  |             "manual": { | ||||||
|  |               "embeddings": [ | ||||||
|  |                 [ | ||||||
|  |                   0.0, | ||||||
|  |                   0.0, | ||||||
|  |                   0.0 | ||||||
|  |                 ] | ||||||
|  |               ], | ||||||
|  |               "regenerate": false | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": 1, | ||||||
|  |           "name": "echo", | ||||||
|  |           "_vectors": { | ||||||
|  |             "manual": { | ||||||
|  |               "embeddings": [ | ||||||
|  |                 [ | ||||||
|  |                   1.0, | ||||||
|  |                   1.0, | ||||||
|  |                   1.0 | ||||||
|  |                 ] | ||||||
|  |               ], | ||||||
|  |               "regenerate": false | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "offset": 0, | ||||||
|  |       "limit": 20, | ||||||
|  |       "total": 2 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let documents = json!([ | ||||||
|  |       {"id": 0, "name": "kefir", "_vectors": { "manual": [10, 10, 10] }}, | ||||||
|  |       {"id": 1, "name": "echo", "_vectors": { "manual": null }}, | ||||||
|  |     ]); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     index.wait_task(value.uid()).await; | ||||||
|  |  | ||||||
|  |     let (documents, _code) = index | ||||||
|  |         .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(json_string!(documents), @r###" | ||||||
|  |     { | ||||||
|  |       "results": [ | ||||||
|  |         { | ||||||
|  |           "id": 0, | ||||||
|  |           "name": "kefir", | ||||||
|  |           "_vectors": { | ||||||
|  |             "manual": { | ||||||
|  |               "embeddings": [ | ||||||
|  |                 [ | ||||||
|  |                   10.0, | ||||||
|  |                   10.0, | ||||||
|  |                   10.0 | ||||||
|  |                 ] | ||||||
|  |               ], | ||||||
|  |               "regenerate": false | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": 1, | ||||||
|  |           "name": "echo", | ||||||
|  |           "_vectors": {} | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "offset": 0, | ||||||
|  |       "limit": 20, | ||||||
|  |       "total": 2 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (value, code) = index.delete_document(0).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     index.wait_task(value.uid()).await; | ||||||
|  |  | ||||||
|  |     let (documents, _code) = index | ||||||
|  |         .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(json_string!(documents), @r###" | ||||||
|  |     { | ||||||
|  |       "results": [ | ||||||
|  |         { | ||||||
|  |           "id": 1, | ||||||
|  |           "name": "echo", | ||||||
|  |           "_vectors": {} | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "offset": 0, | ||||||
|  |       "limit": 20, | ||||||
|  |       "total": 1 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | async fn generate_default_user_provided_documents(server: &Server) -> Index { | ||||||
|  |     let index = server.index("doggo"); | ||||||
|  |     let (value, code) = server.set_features(json!({"vectorStore": true})).await; | ||||||
|  |     snapshot!(code, @"200 OK"); | ||||||
|  |     snapshot!(value, @r###" | ||||||
|  |     { | ||||||
|  |       "vectorStore": true, | ||||||
|  |       "metrics": false, | ||||||
|  |       "logsRoute": false | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .update_settings(json!({ | ||||||
|  |           "embedders": { | ||||||
|  |               "manual": { | ||||||
|  |                   "source": "userProvided", | ||||||
|  |                   "dimensions": 3, | ||||||
|  |               } | ||||||
|  |           }, | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     server.wait_task(response.uid()).await; | ||||||
|  |  | ||||||
|  |     let documents = json!([ | ||||||
|  |       {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }}, | ||||||
|  |       {"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1] }}, | ||||||
|  |       {"id": 2, "name": "billou", "_vectors": { "manual": [[2, 2, 2], [2, 2, 3]] }}, | ||||||
|  |       {"id": 3, "name": "intel", "_vectors": { "manual": { "regenerate": false, "embeddings": [3, 3, 3] }}}, | ||||||
|  |       {"id": 4, "name": "max", "_vectors": { "manual": { "regenerate": false, "embeddings": [[4, 4, 4], [4, 4, 5]] }}}, | ||||||
|  |     ]); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     index.wait_task(value.uid()).await; | ||||||
|  |  | ||||||
|  |     index | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn user_provided_embeddings_error() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = generate_default_user_provided_documents(&server).await; | ||||||
|  |  | ||||||
|  |     // First case, we forget to specify the `regenerate` | ||||||
|  |     let documents = | ||||||
|  |         json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [0, 0, 0] }}}); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     let task = index.wait_task(value.uid()).await; | ||||||
|  |     snapshot!(task, @r###" | ||||||
|  |     { | ||||||
|  |       "uid": 2, | ||||||
|  |       "indexUid": "doggo", | ||||||
|  |       "status": "failed", | ||||||
|  |       "type": "documentAdditionOrUpdate", | ||||||
|  |       "canceledBy": null, | ||||||
|  |       "details": { | ||||||
|  |         "receivedDocuments": 1, | ||||||
|  |         "indexedDocuments": 0 | ||||||
|  |       }, | ||||||
|  |       "error": { | ||||||
|  |         "message": "Bad embedder configuration in the document with id: `\"0\"`. Missing field `regenerate` inside `.manual`", | ||||||
|  |         "code": "invalid_vectors_type", | ||||||
|  |         "type": "invalid_request", | ||||||
|  |         "link": "https://docs.meilisearch.com/errors#invalid_vectors_type" | ||||||
|  |       }, | ||||||
|  |       "duration": "[duration]", | ||||||
|  |       "enqueuedAt": "[date]", | ||||||
|  |       "startedAt": "[date]", | ||||||
|  |       "finishedAt": "[date]" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // Second case, we don't specify anything | ||||||
|  |     let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": {}}}); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     let task = index.wait_task(value.uid()).await; | ||||||
|  |     snapshot!(task, @r###" | ||||||
|  |     { | ||||||
|  |       "uid": 3, | ||||||
|  |       "indexUid": "doggo", | ||||||
|  |       "status": "failed", | ||||||
|  |       "type": "documentAdditionOrUpdate", | ||||||
|  |       "canceledBy": null, | ||||||
|  |       "details": { | ||||||
|  |         "receivedDocuments": 1, | ||||||
|  |         "indexedDocuments": 0 | ||||||
|  |       }, | ||||||
|  |       "error": { | ||||||
|  |         "message": "Bad embedder configuration in the document with id: `\"0\"`. Missing field `regenerate` inside `.manual`", | ||||||
|  |         "code": "invalid_vectors_type", | ||||||
|  |         "type": "invalid_request", | ||||||
|  |         "link": "https://docs.meilisearch.com/errors#invalid_vectors_type" | ||||||
|  |       }, | ||||||
|  |       "duration": "[duration]", | ||||||
|  |       "enqueuedAt": "[date]", | ||||||
|  |       "startedAt": "[date]", | ||||||
|  |       "finishedAt": "[date]" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // Third case, we specify something wrong in place of regenerate | ||||||
|  |     let documents = | ||||||
|  |         json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": "yes please" }}}); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     let task = index.wait_task(value.uid()).await; | ||||||
|  |     snapshot!(task, @r###" | ||||||
|  |     { | ||||||
|  |       "uid": 4, | ||||||
|  |       "indexUid": "doggo", | ||||||
|  |       "status": "failed", | ||||||
|  |       "type": "documentAdditionOrUpdate", | ||||||
|  |       "canceledBy": null, | ||||||
|  |       "details": { | ||||||
|  |         "receivedDocuments": 1, | ||||||
|  |         "indexedDocuments": 0 | ||||||
|  |       }, | ||||||
|  |       "error": { | ||||||
|  |         "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.regenerate`: expected a boolean, but found a string: `\"yes please\"`", | ||||||
|  |         "code": "invalid_vectors_type", | ||||||
|  |         "type": "invalid_request", | ||||||
|  |         "link": "https://docs.meilisearch.com/errors#invalid_vectors_type" | ||||||
|  |       }, | ||||||
|  |       "duration": "[duration]", | ||||||
|  |       "enqueuedAt": "[date]", | ||||||
|  |       "startedAt": "[date]", | ||||||
|  |       "finishedAt": "[date]" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let documents = | ||||||
|  |         json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": true }}}); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     let task = index.wait_task(value.uid()).await; | ||||||
|  |     snapshot!(task, @r###" | ||||||
|  |     { | ||||||
|  |       "uid": 5, | ||||||
|  |       "indexUid": "doggo", | ||||||
|  |       "status": "failed", | ||||||
|  |       "type": "documentAdditionOrUpdate", | ||||||
|  |       "canceledBy": null, | ||||||
|  |       "details": { | ||||||
|  |         "receivedDocuments": 1, | ||||||
|  |         "indexedDocuments": 0 | ||||||
|  |       }, | ||||||
|  |       "error": { | ||||||
|  |         "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings`: expected null or an array, but found a boolean: `true`", | ||||||
|  |         "code": "invalid_vectors_type", | ||||||
|  |         "type": "invalid_request", | ||||||
|  |         "link": "https://docs.meilisearch.com/errors#invalid_vectors_type" | ||||||
|  |       }, | ||||||
|  |       "duration": "[duration]", | ||||||
|  |       "enqueuedAt": "[date]", | ||||||
|  |       "startedAt": "[date]", | ||||||
|  |       "finishedAt": "[date]" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let documents = | ||||||
|  |         json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [true] }}}); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     let task = index.wait_task(value.uid()).await; | ||||||
|  |     snapshot!(task, @r###" | ||||||
|  |     { | ||||||
|  |       "uid": 6, | ||||||
|  |       "indexUid": "doggo", | ||||||
|  |       "status": "failed", | ||||||
|  |       "type": "documentAdditionOrUpdate", | ||||||
|  |       "canceledBy": null, | ||||||
|  |       "details": { | ||||||
|  |         "receivedDocuments": 1, | ||||||
|  |         "indexedDocuments": 0 | ||||||
|  |       }, | ||||||
|  |       "error": { | ||||||
|  |         "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0]`: expected a number or an array, but found a boolean: `true`", | ||||||
|  |         "code": "invalid_vectors_type", | ||||||
|  |         "type": "invalid_request", | ||||||
|  |         "link": "https://docs.meilisearch.com/errors#invalid_vectors_type" | ||||||
|  |       }, | ||||||
|  |       "duration": "[duration]", | ||||||
|  |       "enqueuedAt": "[date]", | ||||||
|  |       "startedAt": "[date]", | ||||||
|  |       "finishedAt": "[date]" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let documents = | ||||||
|  |         json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [[true]] }}}); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     let task = index.wait_task(value.uid()).await; | ||||||
|  |     snapshot!(task, @r###" | ||||||
|  |     { | ||||||
|  |       "uid": 7, | ||||||
|  |       "indexUid": "doggo", | ||||||
|  |       "status": "failed", | ||||||
|  |       "type": "documentAdditionOrUpdate", | ||||||
|  |       "canceledBy": null, | ||||||
|  |       "details": { | ||||||
|  |         "receivedDocuments": 1, | ||||||
|  |         "indexedDocuments": 0 | ||||||
|  |       }, | ||||||
|  |       "error": { | ||||||
|  |         "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0][0]`: expected a number, but found a boolean: `true`", | ||||||
|  |         "code": "invalid_vectors_type", | ||||||
|  |         "type": "invalid_request", | ||||||
|  |         "link": "https://docs.meilisearch.com/errors#invalid_vectors_type" | ||||||
|  |       }, | ||||||
|  |       "duration": "[duration]", | ||||||
|  |       "enqueuedAt": "[date]", | ||||||
|  |       "startedAt": "[date]", | ||||||
|  |       "finishedAt": "[date]" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [23, 0.1, -12], "regenerate": true }}}); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     let task = index.wait_task(value.uid()).await; | ||||||
|  |     snapshot!(task["status"], @r###""succeeded""###); | ||||||
|  |  | ||||||
|  |     let documents = | ||||||
|  |         json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false }}}); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     let task = index.wait_task(value.uid()).await; | ||||||
|  |     snapshot!(task["status"], @r###""succeeded""###); | ||||||
|  |  | ||||||
|  |     let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [0.1, [0.2, 0.3]] }}}); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     let task = index.wait_task(value.uid()).await; | ||||||
|  |     snapshot!(task, @r###" | ||||||
|  |     { | ||||||
|  |       "uid": 10, | ||||||
|  |       "indexUid": "doggo", | ||||||
|  |       "status": "failed", | ||||||
|  |       "type": "documentAdditionOrUpdate", | ||||||
|  |       "canceledBy": null, | ||||||
|  |       "details": { | ||||||
|  |         "receivedDocuments": 1, | ||||||
|  |         "indexedDocuments": 0 | ||||||
|  |       }, | ||||||
|  |       "error": { | ||||||
|  |         "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[1]`: expected a number, but found an array: `[0.2,0.3]`", | ||||||
|  |         "code": "invalid_vectors_type", | ||||||
|  |         "type": "invalid_request", | ||||||
|  |         "link": "https://docs.meilisearch.com/errors#invalid_vectors_type" | ||||||
|  |       }, | ||||||
|  |       "duration": "[duration]", | ||||||
|  |       "enqueuedAt": "[date]", | ||||||
|  |       "startedAt": "[date]", | ||||||
|  |       "finishedAt": "[date]" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [[0.1, 0.2], 0.3] }}}); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     let task = index.wait_task(value.uid()).await; | ||||||
|  |     snapshot!(task, @r###" | ||||||
|  |     { | ||||||
|  |       "uid": 11, | ||||||
|  |       "indexUid": "doggo", | ||||||
|  |       "status": "failed", | ||||||
|  |       "type": "documentAdditionOrUpdate", | ||||||
|  |       "canceledBy": null, | ||||||
|  |       "details": { | ||||||
|  |         "receivedDocuments": 1, | ||||||
|  |         "indexedDocuments": 0 | ||||||
|  |       }, | ||||||
|  |       "error": { | ||||||
|  |         "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[1]`: expected an array, but found a number: `0.3`", | ||||||
|  |         "code": "invalid_vectors_type", | ||||||
|  |         "type": "invalid_request", | ||||||
|  |         "link": "https://docs.meilisearch.com/errors#invalid_vectors_type" | ||||||
|  |       }, | ||||||
|  |       "duration": "[duration]", | ||||||
|  |       "enqueuedAt": "[date]", | ||||||
|  |       "startedAt": "[date]", | ||||||
|  |       "finishedAt": "[date]" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [[0.1, true], 0.3] }}}); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     let task = index.wait_task(value.uid()).await; | ||||||
|  |     snapshot!(task, @r###" | ||||||
|  |     { | ||||||
|  |       "uid": 12, | ||||||
|  |       "indexUid": "doggo", | ||||||
|  |       "status": "failed", | ||||||
|  |       "type": "documentAdditionOrUpdate", | ||||||
|  |       "canceledBy": null, | ||||||
|  |       "details": { | ||||||
|  |         "receivedDocuments": 1, | ||||||
|  |         "indexedDocuments": 0 | ||||||
|  |       }, | ||||||
|  |       "error": { | ||||||
|  |         "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0][1]`: expected a number, but found a boolean: `true`", | ||||||
|  |         "code": "invalid_vectors_type", | ||||||
|  |         "type": "invalid_request", | ||||||
|  |         "link": "https://docs.meilisearch.com/errors#invalid_vectors_type" | ||||||
|  |       }, | ||||||
|  |       "duration": "[duration]", | ||||||
|  |       "enqueuedAt": "[date]", | ||||||
|  |       "startedAt": "[date]", | ||||||
|  |       "finishedAt": "[date]" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn clear_documents() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = generate_default_user_provided_documents(&server).await; | ||||||
|  |  | ||||||
|  |     let (value, _code) = index.clear_all_documents().await; | ||||||
|  |     index.wait_task(value.uid()).await; | ||||||
|  |  | ||||||
|  |     // Make sure the documents DB has been cleared | ||||||
|  |     let (documents, _code) = index | ||||||
|  |         .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(json_string!(documents), @r###" | ||||||
|  |     { | ||||||
|  |       "results": [], | ||||||
|  |       "offset": 0, | ||||||
|  |       "limit": 20, | ||||||
|  |       "total": 0 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // Make sure the arroy DB has been cleared | ||||||
|  |     let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; | ||||||
|  |     snapshot!(documents, @r###" | ||||||
|  |     { | ||||||
|  |       "hits": [], | ||||||
|  |       "query": "", | ||||||
|  |       "processingTimeMs": "[duration]", | ||||||
|  |       "limit": 20, | ||||||
|  |       "offset": 0, | ||||||
|  |       "estimatedTotalHits": 0, | ||||||
|  |       "semanticHitCount": 0 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn add_remove_one_vector_4588() { | ||||||
|  |     // https://github.com/meilisearch/meilisearch/issues/4588 | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("doggo"); | ||||||
|  |     let (value, code) = server.set_features(json!({"vectorStore": true})).await; | ||||||
|  |     snapshot!(code, @"200 OK"); | ||||||
|  |     snapshot!(value, @r###" | ||||||
|  |     { | ||||||
|  |       "vectorStore": true, | ||||||
|  |       "metrics": false, | ||||||
|  |       "logsRoute": false | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .update_settings(json!({ | ||||||
|  |           "embedders": { | ||||||
|  |               "manual": { | ||||||
|  |                   "source": "userProvided", | ||||||
|  |                   "dimensions": 3, | ||||||
|  |               } | ||||||
|  |           }, | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     let task = server.wait_task(response.uid()).await; | ||||||
|  |     snapshot!(task, name: "settings-processed"); | ||||||
|  |  | ||||||
|  |     let documents = json!([ | ||||||
|  |       {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }}, | ||||||
|  |     ]); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     let task = index.wait_task(value.uid()).await; | ||||||
|  |     snapshot!(task, name: "document-added"); | ||||||
|  |  | ||||||
|  |     let documents = json!([ | ||||||
|  |       {"id": 0, "name": "kefir", "_vectors": { "manual": null }}, | ||||||
|  |     ]); | ||||||
|  |     let (value, code) = index.add_documents(documents, None).await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     let task = index.wait_task(value.uid()).await; | ||||||
|  |     snapshot!(task, name: "document-deleted"); | ||||||
|  |  | ||||||
|  |     let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await; | ||||||
|  |     snapshot!(documents, @r###" | ||||||
|  |     { | ||||||
|  |       "hits": [ | ||||||
|  |         { | ||||||
|  |           "id": 0, | ||||||
|  |           "name": "kefir" | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "query": "", | ||||||
|  |       "processingTimeMs": "[duration]", | ||||||
|  |       "limit": 20, | ||||||
|  |       "offset": 0, | ||||||
|  |       "estimatedTotalHits": 1, | ||||||
|  |       "semanticHitCount": 1 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (documents, _code) = index | ||||||
|  |         .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(json_string!(documents), @r###" | ||||||
|  |     { | ||||||
|  |       "results": [ | ||||||
|  |         { | ||||||
|  |           "id": 0, | ||||||
|  |           "name": "kefir", | ||||||
|  |           "_vectors": {} | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "offset": 0, | ||||||
|  |       "limit": 20, | ||||||
|  |       "total": 1 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
							
								
								
									
										228
									
								
								meilisearch/tests/vector/settings.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										228
									
								
								meilisearch/tests/vector/settings.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,228 @@ | |||||||
|  | use meili_snap::{json_string, snapshot}; | ||||||
|  |  | ||||||
|  | use crate::common::{GetAllDocumentsOptions, Server}; | ||||||
|  | use crate::json; | ||||||
|  | use crate::vector::generate_default_user_provided_documents; | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn update_embedder() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("doggo"); | ||||||
|  |     let (value, code) = server.set_features(json!({"vectorStore": true})).await; | ||||||
|  |     snapshot!(code, @"200 OK"); | ||||||
|  |     snapshot!(value, @r###" | ||||||
|  |     { | ||||||
|  |       "vectorStore": true, | ||||||
|  |       "metrics": false, | ||||||
|  |       "logsRoute": false | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .update_settings(json!({ | ||||||
|  |           "embedders": { "manual": {}}, | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     server.wait_task(response.uid()).await; | ||||||
|  |  | ||||||
|  |     let (response, code) = index | ||||||
|  |         .update_settings(json!({ | ||||||
|  |           "embedders": { | ||||||
|  |               "manual": { | ||||||
|  |                   "source": "userProvided", | ||||||
|  |                   "dimensions": 2, | ||||||
|  |               } | ||||||
|  |           }, | ||||||
|  |         })) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |  | ||||||
|  |     let ret = server.wait_task(response.uid()).await; | ||||||
|  |     snapshot!(ret, @r###" | ||||||
|  |     { | ||||||
|  |       "uid": 1, | ||||||
|  |       "indexUid": "doggo", | ||||||
|  |       "status": "succeeded", | ||||||
|  |       "type": "settingsUpdate", | ||||||
|  |       "canceledBy": null, | ||||||
|  |       "details": { | ||||||
|  |         "embedders": { | ||||||
|  |           "manual": { | ||||||
|  |             "source": "userProvided", | ||||||
|  |             "dimensions": 2 | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |       }, | ||||||
|  |       "error": null, | ||||||
|  |       "duration": "[duration]", | ||||||
|  |       "enqueuedAt": "[date]", | ||||||
|  |       "startedAt": "[date]", | ||||||
|  |       "finishedAt": "[date]" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn reset_embedder_documents() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = generate_default_user_provided_documents(&server).await; | ||||||
|  |  | ||||||
|  |     let (response, code) = index.delete_settings().await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     server.wait_task(response.uid()).await; | ||||||
|  |  | ||||||
|  |     // Make sure the documents are still present | ||||||
|  |     let (documents, _code) = index | ||||||
|  |         .get_all_documents(GetAllDocumentsOptions { | ||||||
|  |             limit: None, | ||||||
|  |             offset: None, | ||||||
|  |             retrieve_vectors: false, | ||||||
|  |             fields: None, | ||||||
|  |         }) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(json_string!(documents), @r###" | ||||||
|  |     { | ||||||
|  |       "results": [ | ||||||
|  |         { | ||||||
|  |           "id": 0, | ||||||
|  |           "name": "kefir" | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": 1, | ||||||
|  |           "name": "echo" | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": 2, | ||||||
|  |           "name": "billou" | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": 3, | ||||||
|  |           "name": "intel" | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": 4, | ||||||
|  |           "name": "max" | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "offset": 0, | ||||||
|  |       "limit": 20, | ||||||
|  |       "total": 5 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // Make sure we are still able to retrieve their vectors | ||||||
|  |     let (documents, _code) = index | ||||||
|  |         .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) | ||||||
|  |         .await; | ||||||
|  |     snapshot!(json_string!(documents), @r###" | ||||||
|  |     { | ||||||
|  |       "results": [ | ||||||
|  |         { | ||||||
|  |           "id": 0, | ||||||
|  |           "name": "kefir", | ||||||
|  |           "_vectors": { | ||||||
|  |             "manual": { | ||||||
|  |               "embeddings": [ | ||||||
|  |                 [ | ||||||
|  |                   0.0, | ||||||
|  |                   0.0, | ||||||
|  |                   0.0 | ||||||
|  |                 ] | ||||||
|  |               ], | ||||||
|  |               "regenerate": false | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": 1, | ||||||
|  |           "name": "echo", | ||||||
|  |           "_vectors": { | ||||||
|  |             "manual": { | ||||||
|  |               "embeddings": [ | ||||||
|  |                 [ | ||||||
|  |                   1.0, | ||||||
|  |                   1.0, | ||||||
|  |                   1.0 | ||||||
|  |                 ] | ||||||
|  |               ], | ||||||
|  |               "regenerate": false | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": 2, | ||||||
|  |           "name": "billou", | ||||||
|  |           "_vectors": { | ||||||
|  |             "manual": { | ||||||
|  |               "embeddings": [ | ||||||
|  |                 [ | ||||||
|  |                   2.0, | ||||||
|  |                   2.0, | ||||||
|  |                   2.0 | ||||||
|  |                 ], | ||||||
|  |                 [ | ||||||
|  |                   2.0, | ||||||
|  |                   2.0, | ||||||
|  |                   3.0 | ||||||
|  |                 ] | ||||||
|  |               ], | ||||||
|  |               "regenerate": false | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": 3, | ||||||
|  |           "name": "intel", | ||||||
|  |           "_vectors": { | ||||||
|  |             "manual": { | ||||||
|  |               "embeddings": [ | ||||||
|  |                 [ | ||||||
|  |                   3.0, | ||||||
|  |                   3.0, | ||||||
|  |                   3.0 | ||||||
|  |                 ] | ||||||
|  |               ], | ||||||
|  |               "regenerate": false | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "id": 4, | ||||||
|  |           "name": "max", | ||||||
|  |           "_vectors": { | ||||||
|  |             "manual": { | ||||||
|  |               "embeddings": [ | ||||||
|  |                 [ | ||||||
|  |                   4.0, | ||||||
|  |                   4.0, | ||||||
|  |                   4.0 | ||||||
|  |                 ], | ||||||
|  |                 [ | ||||||
|  |                   4.0, | ||||||
|  |                   4.0, | ||||||
|  |                   5.0 | ||||||
|  |                 ] | ||||||
|  |               ], | ||||||
|  |               "regenerate": false | ||||||
|  |             } | ||||||
|  |           } | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "offset": 0, | ||||||
|  |       "limit": 20, | ||||||
|  |       "total": 5 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // Make sure the arroy DB has been cleared | ||||||
|  |     let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; | ||||||
|  |     snapshot!(json_string!(documents), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Cannot find embedder with name `default`.", | ||||||
|  |       "code": "invalid_embedder", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_embedder" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
| @@ -0,0 +1,19 @@ | |||||||
|  | --- | ||||||
|  | source: meilisearch/tests/vector/mod.rs | ||||||
|  | --- | ||||||
|  | { | ||||||
|  |   "uid": 1, | ||||||
|  |   "indexUid": "doggo", | ||||||
|  |   "status": "succeeded", | ||||||
|  |   "type": "documentAdditionOrUpdate", | ||||||
|  |   "canceledBy": null, | ||||||
|  |   "details": { | ||||||
|  |     "receivedDocuments": 1, | ||||||
|  |     "indexedDocuments": 1 | ||||||
|  |   }, | ||||||
|  |   "error": null, | ||||||
|  |   "duration": "[duration]", | ||||||
|  |   "enqueuedAt": "[date]", | ||||||
|  |   "startedAt": "[date]", | ||||||
|  |   "finishedAt": "[date]" | ||||||
|  | } | ||||||
| @@ -0,0 +1,19 @@ | |||||||
|  | --- | ||||||
|  | source: meilisearch/tests/vector/mod.rs | ||||||
|  | --- | ||||||
|  | { | ||||||
|  |   "uid": 2, | ||||||
|  |   "indexUid": "doggo", | ||||||
|  |   "status": "succeeded", | ||||||
|  |   "type": "documentAdditionOrUpdate", | ||||||
|  |   "canceledBy": null, | ||||||
|  |   "details": { | ||||||
|  |     "receivedDocuments": 1, | ||||||
|  |     "indexedDocuments": 1 | ||||||
|  |   }, | ||||||
|  |   "error": null, | ||||||
|  |   "duration": "[duration]", | ||||||
|  |   "enqueuedAt": "[date]", | ||||||
|  |   "startedAt": "[date]", | ||||||
|  |   "finishedAt": "[date]" | ||||||
|  | } | ||||||
| @@ -0,0 +1,23 @@ | |||||||
|  | --- | ||||||
|  | source: meilisearch/tests/vector/mod.rs | ||||||
|  | --- | ||||||
|  | { | ||||||
|  |   "uid": 0, | ||||||
|  |   "indexUid": "doggo", | ||||||
|  |   "status": "succeeded", | ||||||
|  |   "type": "settingsUpdate", | ||||||
|  |   "canceledBy": null, | ||||||
|  |   "details": { | ||||||
|  |     "embedders": { | ||||||
|  |       "manual": { | ||||||
|  |         "source": "userProvided", | ||||||
|  |         "dimensions": 3 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   }, | ||||||
|  |   "error": null, | ||||||
|  |   "duration": "[duration]", | ||||||
|  |   "enqueuedAt": "[date]", | ||||||
|  |   "startedAt": "[date]", | ||||||
|  |   "finishedAt": "[date]" | ||||||
|  | } | ||||||
| @@ -17,7 +17,7 @@ bincode = "1.3.3" | |||||||
| bstr = "1.9.0" | bstr = "1.9.0" | ||||||
| bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] } | bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] } | ||||||
| byteorder = "1.5.0" | byteorder = "1.5.0" | ||||||
| charabia = { version = "0.8.10", default-features = false } | charabia = { version = "0.8.11", default-features = false } | ||||||
| concat-arrays = "0.1.2" | concat-arrays = "0.1.2" | ||||||
| crossbeam-channel = "0.5.11" | crossbeam-channel = "0.5.11" | ||||||
| deserr = "0.6.1" | deserr = "0.6.1" | ||||||
| @@ -44,7 +44,7 @@ once_cell = "1.19.0" | |||||||
| ordered-float = "4.2.0" | ordered-float = "4.2.0" | ||||||
| rand_pcg = { version = "0.3.1", features = ["serde1"] } | rand_pcg = { version = "0.3.1", features = ["serde1"] } | ||||||
| rayon = "1.8.0" | rayon = "1.8.0" | ||||||
| roaring = "0.10.2" | roaring = { version = "0.10.2", features = ["serde"] } | ||||||
| rstar = { version = "0.11.0", features = ["serde"] } | rstar = { version = "0.11.0", features = ["serde"] } | ||||||
| serde = { version = "1.0.195", features = ["derive"] } | serde = { version = "1.0.195", features = ["derive"] } | ||||||
| serde_json = { version = "1.0.111", features = ["preserve_order"] } | serde_json = { version = "1.0.111", features = ["preserve_order"] } | ||||||
| @@ -79,7 +79,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", | |||||||
| ] } | ] } | ||||||
| tiktoken-rs = "0.5.8" | tiktoken-rs = "0.5.8" | ||||||
| liquid = "0.26.4" | liquid = "0.26.4" | ||||||
| arroy = "0.3.1" | arroy = "0.4.0" | ||||||
| rand = "0.8.5" | rand = "0.8.5" | ||||||
| tracing = "0.1.40" | tracing = "0.1.40" | ||||||
| ureq = { version = "2.9.7", features = ["json"] } | ureq = { version = "2.9.7", features = ["json"] } | ||||||
|   | |||||||
| @@ -59,6 +59,7 @@ fn main() -> Result<(), Box<dyn Error>> { | |||||||
|                 false, |                 false, | ||||||
|                 universe, |                 universe, | ||||||
|                 &None, |                 &None, | ||||||
|  |                 &None, | ||||||
|                 GeoSortStrategy::default(), |                 GeoSortStrategy::default(), | ||||||
|                 0, |                 0, | ||||||
|                 20, |                 20, | ||||||
| @@ -66,6 +67,7 @@ fn main() -> Result<(), Box<dyn Error>> { | |||||||
|                 &mut DefaultSearchLogger, |                 &mut DefaultSearchLogger, | ||||||
|                 logger, |                 logger, | ||||||
|                 TimeBudget::max(), |                 TimeBudget::max(), | ||||||
|  |                 None, | ||||||
|             )?; |             )?; | ||||||
|             if let Some((logger, dir)) = detailed_logger { |             if let Some((logger, dir)) = detailed_logger { | ||||||
|                 logger.finish(&mut ctx, Path::new(dir))?; |                 logger.finish(&mut ctx, Path::new(dir))?; | ||||||
|   | |||||||
| @@ -119,6 +119,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco | |||||||
|     InvalidVectorDimensions { expected: usize, found: usize }, |     InvalidVectorDimensions { expected: usize, found: usize }, | ||||||
|     #[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")] |     #[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")] | ||||||
|     InvalidVectorsMapType { document_id: String, value: Value }, |     InvalidVectorsMapType { document_id: String, value: Value }, | ||||||
|  |     #[error("Bad embedder configuration in the document with id: `{document_id}`. {error}")] | ||||||
|  |     InvalidVectorsEmbedderConf { document_id: String, error: deserr::errors::JsonError }, | ||||||
|     #[error("{0}")] |     #[error("{0}")] | ||||||
|     InvalidFilter(String), |     InvalidFilter(String), | ||||||
|     #[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))] |     #[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))] | ||||||
| @@ -134,6 +136,17 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco | |||||||
|         } |         } | ||||||
|     )] |     )] | ||||||
|     InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool }, |     InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool }, | ||||||
|  |     #[error("Attribute `{}` is not filterable and thus, cannot be used as distinct attribute. {}", | ||||||
|  |         .field, | ||||||
|  |         match .valid_fields.is_empty() { | ||||||
|  |             true => "This index does not have configured filterable attributes.".to_string(), | ||||||
|  |             false => format!("Available filterable attributes are: `{}{}`.", | ||||||
|  |                     valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "), | ||||||
|  |                     .hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""), | ||||||
|  |                 ), | ||||||
|  |         } | ||||||
|  |     )] | ||||||
|  |     InvalidDistinctAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool }, | ||||||
|     #[error("Attribute `{}` is not facet-searchable. {}", |     #[error("Attribute `{}` is not facet-searchable. {}", | ||||||
|         .field, |         .field, | ||||||
|         match .valid_fields.is_empty() { |         match .valid_fields.is_empty() { | ||||||
| @@ -270,8 +283,9 @@ impl From<arroy::Error> for Error { | |||||||
|             arroy::Error::DatabaseFull |             arroy::Error::DatabaseFull | ||||||
|             | arroy::Error::InvalidItemAppend |             | arroy::Error::InvalidItemAppend | ||||||
|             | arroy::Error::UnmatchingDistance { .. } |             | arroy::Error::UnmatchingDistance { .. } | ||||||
|             | arroy::Error::MissingNode |             | arroy::Error::NeedBuild(_) | ||||||
|             | arroy::Error::MissingMetadata => { |             | arroy::Error::MissingKey { .. } | ||||||
|  |             | arroy::Error::MissingMetadata(_) => { | ||||||
|                 Error::InternalError(InternalError::ArroyError(value)) |                 Error::InternalError(InternalError::ArroyError(value)) | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -4,6 +4,7 @@ use std::collections::HashMap; | |||||||
|  |  | ||||||
| use serde::{Deserialize, Serialize}; | use serde::{Deserialize, Serialize}; | ||||||
|  |  | ||||||
|  | use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME; | ||||||
| use crate::{FieldId, FieldsIdsMap, Weight}; | use crate::{FieldId, FieldsIdsMap, Weight}; | ||||||
|  |  | ||||||
| #[derive(Debug, Default, Serialize, Deserialize)] | #[derive(Debug, Default, Serialize, Deserialize)] | ||||||
| @@ -23,7 +24,13 @@ impl FieldidsWeightsMap { | |||||||
|     /// Should only be called in the case there are NO searchable attributes. |     /// Should only be called in the case there are NO searchable attributes. | ||||||
|     /// All the fields will be inserted in the order of the fields ids map with a weight of 0. |     /// All the fields will be inserted in the order of the fields ids map with a weight of 0. | ||||||
|     pub fn from_field_id_map_without_searchable(fid_map: &FieldsIdsMap) -> Self { |     pub fn from_field_id_map_without_searchable(fid_map: &FieldsIdsMap) -> Self { | ||||||
|         FieldidsWeightsMap { map: fid_map.ids().map(|fid| (fid, 0)).collect() } |         FieldidsWeightsMap { | ||||||
|  |             map: fid_map | ||||||
|  |                 .iter() | ||||||
|  |                 .filter(|(_fid, name)| !crate::is_faceted_by(name, RESERVED_VECTORS_FIELD_NAME)) | ||||||
|  |                 .map(|(fid, _name)| (fid, 0)) | ||||||
|  |                 .collect(), | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// Removes a field id from the map, returning the associated weight previously in the map. |     /// Removes a field id from the map, returning the associated weight previously in the map. | ||||||
|   | |||||||
| @@ -41,6 +41,16 @@ impl FieldsIdsMap { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Get the ids of a field and all its nested fields based on its name. | ||||||
|  |     pub fn nested_ids(&self, name: &str) -> Vec<FieldId> { | ||||||
|  |         self.names_ids | ||||||
|  |             .range(name.to_string()..) | ||||||
|  |             .take_while(|(key, _)| key.starts_with(name)) | ||||||
|  |             .filter(|(key, _)| crate::is_faceted_by(key, name)) | ||||||
|  |             .map(|(_name, id)| *id) | ||||||
|  |             .collect() | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Get the id of a field based on its name. |     /// Get the id of a field based on its name. | ||||||
|     pub fn id(&self, name: &str) -> Option<FieldId> { |     pub fn id(&self, name: &str) -> Option<FieldId> { | ||||||
|         self.names_ids.get(name).copied() |         self.names_ids.get(name).copied() | ||||||
| @@ -126,4 +136,32 @@ mod tests { | |||||||
|         assert_eq!(iter.next(), Some((3, "title"))); |         assert_eq!(iter.next(), Some((3, "title"))); | ||||||
|         assert_eq!(iter.next(), None); |         assert_eq!(iter.next(), None); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn nested_fields() { | ||||||
|  |         let mut map = FieldsIdsMap::new(); | ||||||
|  |  | ||||||
|  |         assert_eq!(map.insert("id"), Some(0)); | ||||||
|  |         assert_eq!(map.insert("doggo"), Some(1)); | ||||||
|  |         assert_eq!(map.insert("doggo.name"), Some(2)); | ||||||
|  |         assert_eq!(map.insert("doggolution"), Some(3)); | ||||||
|  |         assert_eq!(map.insert("doggo.breed.name"), Some(4)); | ||||||
|  |         assert_eq!(map.insert("description"), Some(5)); | ||||||
|  |  | ||||||
|  |         insta::assert_debug_snapshot!(map.nested_ids("doggo"), @r###" | ||||||
|  |         [ | ||||||
|  |             1, | ||||||
|  |             4, | ||||||
|  |             2, | ||||||
|  |         ] | ||||||
|  |         "###); | ||||||
|  |  | ||||||
|  |         insta::assert_debug_snapshot!(map.nested_ids("doggo.breed"), @r###" | ||||||
|  |         [ | ||||||
|  |             4, | ||||||
|  |         ] | ||||||
|  |         "###); | ||||||
|  |  | ||||||
|  |         insta::assert_debug_snapshot!(map.nested_ids("_vector"), @"[]"); | ||||||
|  |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -47,6 +47,12 @@ pub struct FacetGroupValue { | |||||||
|     pub bitmap: RoaringBitmap, |     pub bitmap: RoaringBitmap, | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[derive(Debug)] | ||||||
|  | pub struct FacetGroupLazyValue<'b> { | ||||||
|  |     pub size: u8, | ||||||
|  |     pub bitmap_bytes: &'b [u8], | ||||||
|  | } | ||||||
|  |  | ||||||
| pub struct FacetGroupKeyCodec<T> { | pub struct FacetGroupKeyCodec<T> { | ||||||
|     _phantom: PhantomData<T>, |     _phantom: PhantomData<T>, | ||||||
| } | } | ||||||
| @@ -69,6 +75,7 @@ where | |||||||
|         Ok(Cow::Owned(v)) |         Ok(Cow::Owned(v)) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T> | impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T> | ||||||
| where | where | ||||||
|     T: BytesDecode<'a>, |     T: BytesDecode<'a>, | ||||||
| @@ -84,6 +91,7 @@ where | |||||||
| } | } | ||||||
|  |  | ||||||
| pub struct FacetGroupValueCodec; | pub struct FacetGroupValueCodec; | ||||||
|  |  | ||||||
| impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec { | impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec { | ||||||
|     type EItem = FacetGroupValue; |     type EItem = FacetGroupValue; | ||||||
|  |  | ||||||
| @@ -93,11 +101,23 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec { | |||||||
|         Ok(Cow::Owned(v)) |         Ok(Cow::Owned(v)) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec { | impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec { | ||||||
|     type DItem = FacetGroupValue; |     type DItem = FacetGroupValue; | ||||||
|  |  | ||||||
|     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { |     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||||
|         let size = bytes[0]; |         let size = bytes[0]; | ||||||
|         let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?; |         let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?; | ||||||
|         Ok(FacetGroupValue { size, bitmap }) |         Ok(FacetGroupValue { size, bitmap }) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | pub struct FacetGroupLazyValueCodec; | ||||||
|  |  | ||||||
|  | impl<'a> heed::BytesDecode<'a> for FacetGroupLazyValueCodec { | ||||||
|  |     type DItem = FacetGroupLazyValue<'a>; | ||||||
|  |  | ||||||
|  |     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||||
|  |         Ok(FacetGroupLazyValue { size: bytes[0], bitmap_bytes: &bytes[1..] }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|   | |||||||
| @@ -1,5 +1,5 @@ | |||||||
| use std::borrow::Cow; | use std::borrow::Cow; | ||||||
| use std::io; | use std::io::{self, Cursor}; | ||||||
| use std::mem::size_of; | use std::mem::size_of; | ||||||
|  |  | ||||||
| use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; | use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; | ||||||
| @@ -57,6 +57,24 @@ impl CboRoaringBitmapCodec { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn intersection_with_serialized( | ||||||
|  |         mut bytes: &[u8], | ||||||
|  |         other: &RoaringBitmap, | ||||||
|  |     ) -> io::Result<RoaringBitmap> { | ||||||
|  |         // See above `deserialize_from` method for implementation details. | ||||||
|  |         if bytes.len() <= THRESHOLD * size_of::<u32>() { | ||||||
|  |             let mut bitmap = RoaringBitmap::new(); | ||||||
|  |             while let Ok(integer) = bytes.read_u32::<NativeEndian>() { | ||||||
|  |                 if other.contains(integer) { | ||||||
|  |                     bitmap.insert(integer); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             Ok(bitmap) | ||||||
|  |         } else { | ||||||
|  |             other.intersection_with_serialized_unchecked(Cursor::new(bytes)) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Merge serialized CboRoaringBitmaps in a buffer. |     /// Merge serialized CboRoaringBitmaps in a buffer. | ||||||
|     /// |     /// | ||||||
|     /// if the merged values length is under the threshold, values are directly |     /// if the merged values length is under the threshold, values are directly | ||||||
|   | |||||||
| @@ -9,6 +9,7 @@ use heed::types::*; | |||||||
| use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified}; | use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified}; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| use rstar::RTree; | use rstar::RTree; | ||||||
|  | use serde::{Deserialize, Serialize}; | ||||||
| use time::OffsetDateTime; | use time::OffsetDateTime; | ||||||
|  |  | ||||||
| use crate::documents::PrimaryKey; | use crate::documents::PrimaryKey; | ||||||
| @@ -23,6 +24,7 @@ use crate::heed_codec::{ | |||||||
| }; | }; | ||||||
| use crate::order_by_map::OrderByMap; | use crate::order_by_map::OrderByMap; | ||||||
| use crate::proximity::ProximityPrecision; | use crate::proximity::ProximityPrecision; | ||||||
|  | use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME; | ||||||
| use crate::vector::{Embedding, EmbeddingConfig}; | use crate::vector::{Embedding, EmbeddingConfig}; | ||||||
| use crate::{ | use crate::{ | ||||||
|     default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, |     default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, | ||||||
| @@ -644,6 +646,7 @@ impl Index { | |||||||
|         &self, |         &self, | ||||||
|         wtxn: &mut RwTxn, |         wtxn: &mut RwTxn, | ||||||
|         user_fields: &[&str], |         user_fields: &[&str], | ||||||
|  |         non_searchable_fields_ids: &[FieldId], | ||||||
|         fields_ids_map: &FieldsIdsMap, |         fields_ids_map: &FieldsIdsMap, | ||||||
|     ) -> Result<()> { |     ) -> Result<()> { | ||||||
|         // We can write the user defined searchable fields as-is. |         // We can write the user defined searchable fields as-is. | ||||||
| @@ -662,6 +665,7 @@ impl Index { | |||||||
|             for (weight, user_field) in user_fields.iter().enumerate() { |             for (weight, user_field) in user_fields.iter().enumerate() { | ||||||
|                 if crate::is_faceted_by(field_from_map, user_field) |                 if crate::is_faceted_by(field_from_map, user_field) | ||||||
|                     && !real_fields.contains(&field_from_map) |                     && !real_fields.contains(&field_from_map) | ||||||
|  |                     && !non_searchable_fields_ids.contains(&id) | ||||||
|                 { |                 { | ||||||
|                     real_fields.push(field_from_map); |                     real_fields.push(field_from_map); | ||||||
|  |  | ||||||
| @@ -708,6 +712,7 @@ impl Index { | |||||||
|                 Ok(self |                 Ok(self | ||||||
|                     .fields_ids_map(rtxn)? |                     .fields_ids_map(rtxn)? | ||||||
|                     .names() |                     .names() | ||||||
|  |                     .filter(|name| !crate::is_faceted_by(name, RESERVED_VECTORS_FIELD_NAME)) | ||||||
|                     .map(|field| Cow::Owned(field.to_string())) |                     .map(|field| Cow::Owned(field.to_string())) | ||||||
|                     .collect()) |                     .collect()) | ||||||
|             }) |             }) | ||||||
| @@ -1568,12 +1573,16 @@ impl Index { | |||||||
|         Ok(script_language) |         Ok(script_language) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Put the embedding configs: | ||||||
|  |     /// 1. The name of the embedder | ||||||
|  |     /// 2. The configuration option for this embedder | ||||||
|  |     /// 3. The list of documents with a user provided embedding | ||||||
|     pub(crate) fn put_embedding_configs( |     pub(crate) fn put_embedding_configs( | ||||||
|         &self, |         &self, | ||||||
|         wtxn: &mut RwTxn<'_>, |         wtxn: &mut RwTxn<'_>, | ||||||
|         configs: Vec<(String, EmbeddingConfig)>, |         configs: Vec<IndexEmbeddingConfig>, | ||||||
|     ) -> heed::Result<()> { |     ) -> heed::Result<()> { | ||||||
|         self.main.remap_types::<Str, SerdeJson<Vec<(String, EmbeddingConfig)>>>().put( |         self.main.remap_types::<Str, SerdeJson<Vec<IndexEmbeddingConfig>>>().put( | ||||||
|             wtxn, |             wtxn, | ||||||
|             main_key::EMBEDDING_CONFIGS, |             main_key::EMBEDDING_CONFIGS, | ||||||
|             &configs, |             &configs, | ||||||
| @@ -1584,13 +1593,10 @@ impl Index { | |||||||
|         self.main.remap_key_type::<Str>().delete(wtxn, main_key::EMBEDDING_CONFIGS) |         self.main.remap_key_type::<Str>().delete(wtxn, main_key::EMBEDDING_CONFIGS) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn embedding_configs( |     pub fn embedding_configs(&self, rtxn: &RoTxn<'_>) -> Result<Vec<IndexEmbeddingConfig>> { | ||||||
|         &self, |  | ||||||
|         rtxn: &RoTxn<'_>, |  | ||||||
|     ) -> Result<Vec<(String, crate::vector::EmbeddingConfig)>> { |  | ||||||
|         Ok(self |         Ok(self | ||||||
|             .main |             .main | ||||||
|             .remap_types::<Str, SerdeJson<Vec<(String, EmbeddingConfig)>>>() |             .remap_types::<Str, SerdeJson<Vec<IndexEmbeddingConfig>>>() | ||||||
|             .get(rtxn, main_key::EMBEDDING_CONFIGS)? |             .get(rtxn, main_key::EMBEDDING_CONFIGS)? | ||||||
|             .unwrap_or_default()) |             .unwrap_or_default()) | ||||||
|     } |     } | ||||||
| @@ -1604,7 +1610,7 @@ impl Index { | |||||||
|             arroy::Reader::open(rtxn, k, self.vector_arroy) |             arroy::Reader::open(rtxn, k, self.vector_arroy) | ||||||
|                 .map(Some) |                 .map(Some) | ||||||
|                 .or_else(|e| match e { |                 .or_else(|e| match e { | ||||||
|                     arroy::Error::MissingMetadata => Ok(None), |                     arroy::Error::MissingMetadata(_) => Ok(None), | ||||||
|                     e => Err(e.into()), |                     e => Err(e.into()), | ||||||
|                 }) |                 }) | ||||||
|                 .transpose() |                 .transpose() | ||||||
| @@ -1637,7 +1643,7 @@ impl Index { | |||||||
|                 let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy) |                 let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy) | ||||||
|                     .map(Some) |                     .map(Some) | ||||||
|                     .or_else(|e| match e { |                     .or_else(|e| match e { | ||||||
|                         arroy::Error::MissingMetadata => Ok(None), |                         arroy::Error::MissingMetadata(_) => Ok(None), | ||||||
|                         e => Err(e), |                         e => Err(e), | ||||||
|                     }) |                     }) | ||||||
|                     .transpose(); |                     .transpose(); | ||||||
| @@ -1662,6 +1668,13 @@ impl Index { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, Deserialize, Serialize)] | ||||||
|  | pub struct IndexEmbeddingConfig { | ||||||
|  |     pub name: String, | ||||||
|  |     pub config: EmbeddingConfig, | ||||||
|  |     pub user_provided: RoaringBitmap, | ||||||
|  | } | ||||||
|  |  | ||||||
| #[cfg(test)] | #[cfg(test)] | ||||||
| pub(crate) mod tests { | pub(crate) mod tests { | ||||||
|     use std::collections::HashSet; |     use std::collections::HashSet; | ||||||
| @@ -1669,15 +1682,17 @@ pub(crate) mod tests { | |||||||
|  |  | ||||||
|     use big_s::S; |     use big_s::S; | ||||||
|     use heed::{EnvOpenOptions, RwTxn}; |     use heed::{EnvOpenOptions, RwTxn}; | ||||||
|     use maplit::hashset; |     use maplit::{btreemap, hashset}; | ||||||
|     use tempfile::TempDir; |     use tempfile::TempDir; | ||||||
|  |  | ||||||
|     use crate::documents::DocumentsBatchReader; |     use crate::documents::DocumentsBatchReader; | ||||||
|     use crate::error::{Error, InternalError}; |     use crate::error::{Error, InternalError}; | ||||||
|     use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; |     use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; | ||||||
|     use crate::update::{ |     use crate::update::{ | ||||||
|         self, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings, |         self, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting, | ||||||
|  |         Settings, | ||||||
|     }; |     }; | ||||||
|  |     use crate::vector::settings::{EmbedderSource, EmbeddingSettings}; | ||||||
|     use crate::{db_snap, obkv_to_json, Filter, Index, Search, SearchResult}; |     use crate::{db_snap, obkv_to_json, Filter, Index, Search, SearchResult}; | ||||||
|  |  | ||||||
|     pub(crate) struct TempIndex { |     pub(crate) struct TempIndex { | ||||||
| @@ -2783,4 +2798,95 @@ pub(crate) mod tests { | |||||||
|         ] |         ] | ||||||
|         "###); |         "###); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn vectors_are_never_indexed_as_searchable_or_filterable() { | ||||||
|  |         let index = TempIndex::new(); | ||||||
|  |  | ||||||
|  |         index | ||||||
|  |             .add_documents(documents!([ | ||||||
|  |                 { "id": 0, "_vectors": { "doggo": [2345] } }, | ||||||
|  |                 { "id": 1, "_vectors": { "doggo": [6789] } }, | ||||||
|  |             ])) | ||||||
|  |             .unwrap(); | ||||||
|  |  | ||||||
|  |         db_snap!(index, fields_ids_map, @r###" | ||||||
|  |         0   id               | | ||||||
|  |         1   _vectors         | | ||||||
|  |         2   _vectors.doggo   | | ||||||
|  |         "###); | ||||||
|  |         db_snap!(index, searchable_fields, @r###"["id"]"###); | ||||||
|  |         db_snap!(index, fieldids_weights_map, @r###" | ||||||
|  |         fid weight | ||||||
|  |         0   0   | | ||||||
|  |         "###); | ||||||
|  |  | ||||||
|  |         let rtxn = index.read_txn().unwrap(); | ||||||
|  |         let mut search = index.search(&rtxn); | ||||||
|  |         let results = search.query("2345").execute().unwrap(); | ||||||
|  |         assert!(results.candidates.is_empty()); | ||||||
|  |         drop(rtxn); | ||||||
|  |  | ||||||
|  |         index | ||||||
|  |             .update_settings(|settings| { | ||||||
|  |                 settings.set_searchable_fields(vec![S("_vectors"), S("_vectors.doggo")]); | ||||||
|  |                 settings.set_filterable_fields(hashset![S("_vectors"), S("_vectors.doggo")]); | ||||||
|  |             }) | ||||||
|  |             .unwrap(); | ||||||
|  |  | ||||||
|  |         db_snap!(index, fields_ids_map, @r###" | ||||||
|  |         0   id               | | ||||||
|  |         1   _vectors         | | ||||||
|  |         2   _vectors.doggo   | | ||||||
|  |         "###); | ||||||
|  |         db_snap!(index, searchable_fields, @"[]"); | ||||||
|  |         db_snap!(index, fieldids_weights_map, @r###" | ||||||
|  |         fid weight | ||||||
|  |         "###); | ||||||
|  |  | ||||||
|  |         let rtxn = index.read_txn().unwrap(); | ||||||
|  |         let mut search = index.search(&rtxn); | ||||||
|  |         let results = search.query("2345").execute().unwrap(); | ||||||
|  |         assert!(results.candidates.is_empty()); | ||||||
|  |  | ||||||
|  |         let mut search = index.search(&rtxn); | ||||||
|  |         let results = search | ||||||
|  |             .filter(Filter::from_str("_vectors.doggo = 6789").unwrap().unwrap()) | ||||||
|  |             .execute() | ||||||
|  |             .unwrap(); | ||||||
|  |         assert!(results.candidates.is_empty()); | ||||||
|  |  | ||||||
|  |         index | ||||||
|  |             .update_settings(|settings| { | ||||||
|  |                 settings.set_embedder_settings(btreemap! { | ||||||
|  |                     S("doggo") => Setting::Set(EmbeddingSettings { | ||||||
|  |                         dimensions: Setting::Set(1), | ||||||
|  |                         source: Setting::Set(EmbedderSource::UserProvided), | ||||||
|  |                         ..EmbeddingSettings::default()}), | ||||||
|  |                 }); | ||||||
|  |             }) | ||||||
|  |             .unwrap(); | ||||||
|  |  | ||||||
|  |         db_snap!(index, fields_ids_map, @r###" | ||||||
|  |         0   id               | | ||||||
|  |         1   _vectors         | | ||||||
|  |         2   _vectors.doggo   | | ||||||
|  |         "###); | ||||||
|  |         db_snap!(index, searchable_fields, @"[]"); | ||||||
|  |         db_snap!(index, fieldids_weights_map, @r###" | ||||||
|  |         fid weight | ||||||
|  |         "###); | ||||||
|  |  | ||||||
|  |         let rtxn = index.read_txn().unwrap(); | ||||||
|  |         let mut search = index.search(&rtxn); | ||||||
|  |         let results = search.query("2345").execute().unwrap(); | ||||||
|  |         assert!(results.candidates.is_empty()); | ||||||
|  |  | ||||||
|  |         let mut search = index.search(&rtxn); | ||||||
|  |         let results = search | ||||||
|  |             .filter(Filter::from_str("_vectors.doggo = 6789").unwrap().unwrap()) | ||||||
|  |             .execute() | ||||||
|  |             .unwrap(); | ||||||
|  |         assert!(results.candidates.is_empty()); | ||||||
|  |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -6,9 +6,11 @@ use heed::Result; | |||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use super::{get_first_facet_value, get_highest_level}; | use super::{get_first_facet_value, get_highest_level}; | ||||||
| use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; | use crate::heed_codec::facet::{ | ||||||
|  |     FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec, | ||||||
|  | }; | ||||||
| use crate::heed_codec::BytesRefCodec; | use crate::heed_codec::BytesRefCodec; | ||||||
| use crate::DocumentId; | use crate::{CboRoaringBitmapCodec, DocumentId}; | ||||||
|  |  | ||||||
| /// Call the given closure on the facet distribution of the candidate documents. | /// Call the given closure on the facet distribution of the candidate documents. | ||||||
| /// | /// | ||||||
| @@ -31,14 +33,11 @@ pub fn lexicographically_iterate_over_facet_distribution<'t, CB>( | |||||||
| where | where | ||||||
|     CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>, |     CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>, | ||||||
| { | { | ||||||
|  |     let db = db.remap_data_type::<FacetGroupLazyValueCodec>(); | ||||||
|     let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback }; |     let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback }; | ||||||
|     let highest_level = get_highest_level( |     let highest_level = get_highest_level(rtxn, db, field_id)?; | ||||||
|         rtxn, |  | ||||||
|         db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(), |  | ||||||
|         field_id, |  | ||||||
|     )?; |  | ||||||
|  |  | ||||||
|     if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? { |     if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? { | ||||||
|         fd.iterate(candidates, highest_level, first_bound, usize::MAX)?; |         fd.iterate(candidates, highest_level, first_bound, usize::MAX)?; | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } else { |     } else { | ||||||
| @@ -75,13 +74,10 @@ where | |||||||
|  |  | ||||||
|     // Represents the list of keys that we must explore. |     // Represents the list of keys that we must explore. | ||||||
|     let mut heap = BinaryHeap::new(); |     let mut heap = BinaryHeap::new(); | ||||||
|     let highest_level = get_highest_level( |     let db = db.remap_data_type::<FacetGroupLazyValueCodec>(); | ||||||
|         rtxn, |     let highest_level = get_highest_level(rtxn, db, field_id)?; | ||||||
|         db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(), |  | ||||||
|         field_id, |  | ||||||
|     )?; |  | ||||||
|  |  | ||||||
|     if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? { |     if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? { | ||||||
|         // We first fill the heap with values from the highest level |         // We first fill the heap with values from the highest level | ||||||
|         let starting_key = |         let starting_key = | ||||||
|             FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; |             FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; | ||||||
| @@ -92,7 +88,10 @@ where | |||||||
|             if key.field_id != field_id { |             if key.field_id != field_id { | ||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
|             let intersection = value.bitmap & candidates; |             let intersection = CboRoaringBitmapCodec::intersection_with_serialized( | ||||||
|  |                 value.bitmap_bytes, | ||||||
|  |                 candidates, | ||||||
|  |             )?; | ||||||
|             let count = intersection.len(); |             let count = intersection.len(); | ||||||
|             if count != 0 { |             if count != 0 { | ||||||
|                 heap.push(LevelEntry { |                 heap.push(LevelEntry { | ||||||
| @@ -121,7 +120,10 @@ where | |||||||
|                     if key.field_id != field_id { |                     if key.field_id != field_id { | ||||||
|                         break; |                         break; | ||||||
|                     } |                     } | ||||||
|                     let intersection = value.bitmap & candidates; |                     let intersection = CboRoaringBitmapCodec::intersection_with_serialized( | ||||||
|  |                         value.bitmap_bytes, | ||||||
|  |                         candidates, | ||||||
|  |                     )?; | ||||||
|                     let count = intersection.len(); |                     let count = intersection.len(); | ||||||
|                     if count != 0 { |                     if count != 0 { | ||||||
|                         heap.push(LevelEntry { |                         heap.push(LevelEntry { | ||||||
| @@ -146,7 +148,7 @@ where | |||||||
|     CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>, |     CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>, | ||||||
| { | { | ||||||
|     rtxn: &'t heed::RoTxn<'t>, |     rtxn: &'t heed::RoTxn<'t>, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
|     callback: CB, |     callback: CB, | ||||||
| } | } | ||||||
| @@ -171,7 +173,10 @@ where | |||||||
|             if key.field_id != self.field_id { |             if key.field_id != self.field_id { | ||||||
|                 return Ok(ControlFlow::Break(())); |                 return Ok(ControlFlow::Break(())); | ||||||
|             } |             } | ||||||
|             let docids_in_common = value.bitmap & candidates; |             let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized( | ||||||
|  |                 value.bitmap_bytes, | ||||||
|  |                 candidates, | ||||||
|  |             )?; | ||||||
|             if !docids_in_common.is_empty() { |             if !docids_in_common.is_empty() { | ||||||
|                 let any_docid_in_common = docids_in_common.min().unwrap(); |                 let any_docid_in_common = docids_in_common.min().unwrap(); | ||||||
|                 match (self.callback)(key.left_bound, docids_in_common.len(), any_docid_in_common)? |                 match (self.callback)(key.left_bound, docids_in_common.len(), any_docid_in_common)? | ||||||
| @@ -205,7 +210,10 @@ where | |||||||
|             if key.field_id != self.field_id { |             if key.field_id != self.field_id { | ||||||
|                 return Ok(ControlFlow::Break(())); |                 return Ok(ControlFlow::Break(())); | ||||||
|             } |             } | ||||||
|             let docids_in_common = value.bitmap & candidates; |             let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized( | ||||||
|  |                 value.bitmap_bytes, | ||||||
|  |                 candidates, | ||||||
|  |             )?; | ||||||
|             if !docids_in_common.is_empty() { |             if !docids_in_common.is_empty() { | ||||||
|                 let cf = self.iterate( |                 let cf = self.iterate( | ||||||
|                     &docids_in_common, |                     &docids_in_common, | ||||||
|   | |||||||
| @@ -4,9 +4,11 @@ use heed::BytesEncode; | |||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; | use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; | ||||||
| use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; | use crate::heed_codec::facet::{ | ||||||
|  |     FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec, | ||||||
|  | }; | ||||||
| use crate::heed_codec::BytesRefCodec; | use crate::heed_codec::BytesRefCodec; | ||||||
| use crate::Result; | use crate::{CboRoaringBitmapCodec, Result}; | ||||||
|  |  | ||||||
| /// Find all the document ids for which the given field contains a value contained within | /// Find all the document ids for which the given field contains a value contained within | ||||||
| /// the two bounds. | /// the two bounds. | ||||||
| @@ -16,6 +18,7 @@ pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>( | |||||||
|     field_id: u16, |     field_id: u16, | ||||||
|     left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>, |     left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>, | ||||||
|     right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>, |     right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>, | ||||||
|  |     universe: Option<&RoaringBitmap>, | ||||||
|     docids: &mut RoaringBitmap, |     docids: &mut RoaringBitmap, | ||||||
| ) -> Result<()> | ) -> Result<()> | ||||||
| where | where | ||||||
| @@ -46,13 +49,15 @@ where | |||||||
|         } |         } | ||||||
|         Bound::Unbounded => Bound::Unbounded, |         Bound::Unbounded => Bound::Unbounded, | ||||||
|     }; |     }; | ||||||
|     let db = db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(); |     let db = db.remap_types::<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>(); | ||||||
|     let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids }; |     let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, universe, docids }; | ||||||
|     let highest_level = get_highest_level(rtxn, db, field_id)?; |     let highest_level = get_highest_level(rtxn, db, field_id)?; | ||||||
|  |  | ||||||
|     if let Some(starting_left_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? { |     if let Some(starting_left_bound) = | ||||||
|  |         get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? | ||||||
|  |     { | ||||||
|         let rightmost_bound = |         let rightmost_bound = | ||||||
|             Bound::Included(get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded |             Bound::Included(get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded | ||||||
|         let group_size = usize::MAX; |         let group_size = usize::MAX; | ||||||
|         f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?; |         f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?; | ||||||
|         Ok(()) |         Ok(()) | ||||||
| @@ -64,12 +69,16 @@ where | |||||||
| /// Fetch the document ids that have a facet with a value between the two given bounds | /// Fetch the document ids that have a facet with a value between the two given bounds | ||||||
| struct FacetRangeSearch<'t, 'b, 'bitmap> { | struct FacetRangeSearch<'t, 'b, 'bitmap> { | ||||||
|     rtxn: &'t heed::RoTxn<'t>, |     rtxn: &'t heed::RoTxn<'t>, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
|     left: Bound<&'b [u8]>, |     left: Bound<&'b [u8]>, | ||||||
|     right: Bound<&'b [u8]>, |     right: Bound<&'b [u8]>, | ||||||
|  |     /// The subset of documents ids that are useful for this search. | ||||||
|  |     /// Great performance optimizations can be achieved by only fetching values matching this subset. | ||||||
|  |     universe: Option<&'bitmap RoaringBitmap>, | ||||||
|     docids: &'bitmap mut RoaringBitmap, |     docids: &'bitmap mut RoaringBitmap, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { | impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { | ||||||
|     fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> { |     fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> { | ||||||
|         let left_key = |         let left_key = | ||||||
| @@ -104,7 +113,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { | |||||||
|             } |             } | ||||||
|  |  | ||||||
|             if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) { |             if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) { | ||||||
|                 *self.docids |= value.bitmap; |                 *self.docids |= match self.universe { | ||||||
|  |                     Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized( | ||||||
|  |                         value.bitmap_bytes, | ||||||
|  |                         universe, | ||||||
|  |                     )?, | ||||||
|  |                     None => CboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?, | ||||||
|  |                 }; | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         Ok(()) |         Ok(()) | ||||||
| @@ -195,7 +210,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { | |||||||
|                 left_condition && right_condition |                 left_condition && right_condition | ||||||
|             }; |             }; | ||||||
|             if should_take_whole_group { |             if should_take_whole_group { | ||||||
|                 *self.docids |= &previous_value.bitmap; |                 *self.docids |= match self.universe { | ||||||
|  |                     Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized( | ||||||
|  |                         previous_value.bitmap_bytes, | ||||||
|  |                         universe, | ||||||
|  |                     )?, | ||||||
|  |                     None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?, | ||||||
|  |                 }; | ||||||
|                 previous_key = next_key; |                 previous_key = next_key; | ||||||
|                 previous_value = next_value; |                 previous_value = next_value; | ||||||
|                 continue; |                 continue; | ||||||
| @@ -291,7 +312,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { | |||||||
|             left_condition && right_condition |             left_condition && right_condition | ||||||
|         }; |         }; | ||||||
|         if should_take_whole_group { |         if should_take_whole_group { | ||||||
|             *self.docids |= &previous_value.bitmap; |             *self.docids |= match self.universe { | ||||||
|  |                 Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized( | ||||||
|  |                     previous_value.bitmap_bytes, | ||||||
|  |                     universe, | ||||||
|  |                 )?, | ||||||
|  |                 None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?, | ||||||
|  |             }; | ||||||
|         } else { |         } else { | ||||||
|             let level = level - 1; |             let level = level - 1; | ||||||
|             let starting_left_bound = previous_key.left_bound; |             let starting_left_bound = previous_key.left_bound; | ||||||
| @@ -365,6 +392,7 @@ mod tests { | |||||||
|                     0, |                     0, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
| @@ -384,6 +412,7 @@ mod tests { | |||||||
|                     0, |                     0, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
| @@ -418,6 +447,7 @@ mod tests { | |||||||
|                     0, |                     0, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
| @@ -439,6 +469,7 @@ mod tests { | |||||||
|                     0, |                     0, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
| @@ -474,6 +505,7 @@ mod tests { | |||||||
|                     0, |                     0, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
| @@ -499,6 +531,7 @@ mod tests { | |||||||
|                     0, |                     0, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
| @@ -537,6 +570,7 @@ mod tests { | |||||||
|                     0, |                     0, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
| @@ -556,6 +590,7 @@ mod tests { | |||||||
|                     0, |                     0, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
| @@ -571,6 +606,7 @@ mod tests { | |||||||
|                 0, |                 0, | ||||||
|                 &Bound::Unbounded, |                 &Bound::Unbounded, | ||||||
|                 &Bound::Unbounded, |                 &Bound::Unbounded, | ||||||
|  |                 None, | ||||||
|                 &mut docids, |                 &mut docids, | ||||||
|             ) |             ) | ||||||
|             .unwrap(); |             .unwrap(); | ||||||
| @@ -586,6 +622,7 @@ mod tests { | |||||||
|                 1, |                 1, | ||||||
|                 &Bound::Unbounded, |                 &Bound::Unbounded, | ||||||
|                 &Bound::Unbounded, |                 &Bound::Unbounded, | ||||||
|  |                 None, | ||||||
|                 &mut docids, |                 &mut docids, | ||||||
|             ) |             ) | ||||||
|             .unwrap(); |             .unwrap(); | ||||||
| @@ -621,6 +658,7 @@ mod tests { | |||||||
|                     0, |                     0, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
| @@ -634,6 +672,7 @@ mod tests { | |||||||
|                     1, |                     1, | ||||||
|                     &start, |                     &start, | ||||||
|                     &end, |                     &end, | ||||||
|  |                     None, | ||||||
|                     &mut docids, |                     &mut docids, | ||||||
|                 ) |                 ) | ||||||
|                 .unwrap(); |                 .unwrap(); | ||||||
|   | |||||||
| @@ -36,7 +36,7 @@ pub fn ascending_facet_sort<'t>( | |||||||
|     candidates: RoaringBitmap, |     candidates: RoaringBitmap, | ||||||
| ) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> { | ) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> { | ||||||
|     let highest_level = get_highest_level(rtxn, db, field_id)?; |     let highest_level = get_highest_level(rtxn, db, field_id)?; | ||||||
|     if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? { |     if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? { | ||||||
|         let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; |         let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; | ||||||
|         let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX); |         let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -19,9 +19,9 @@ pub fn descending_facet_sort<'t>( | |||||||
|     candidates: RoaringBitmap, |     candidates: RoaringBitmap, | ||||||
| ) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> { | ) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> { | ||||||
|     let highest_level = get_highest_level(rtxn, db, field_id)?; |     let highest_level = get_highest_level(rtxn, db, field_id)?; | ||||||
|     if let Some(first_bound) = get_first_facet_value::<BytesRefCodec>(rtxn, db, field_id)? { |     if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? { | ||||||
|         let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; |         let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; | ||||||
|         let last_bound = get_last_facet_value::<BytesRefCodec>(rtxn, db, field_id)?.unwrap(); |         let last_bound = get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap(); | ||||||
|         let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound }; |         let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound }; | ||||||
|         let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX); |         let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX); | ||||||
|         Ok(itertools::Either::Left(DescendingFacetSort { |         Ok(itertools::Either::Left(DescendingFacetSort { | ||||||
|   | |||||||
| @@ -4,7 +4,7 @@ use std::ops::Bound::{self, Excluded, Included}; | |||||||
|  |  | ||||||
| use either::Either; | use either::Either; | ||||||
| pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token}; | pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token}; | ||||||
| use roaring::RoaringBitmap; | use roaring::{MultiOps, RoaringBitmap}; | ||||||
| use serde_json::Value; | use serde_json::Value; | ||||||
|  |  | ||||||
| use super::facet_range_search; | use super::facet_range_search; | ||||||
| @@ -224,14 +224,14 @@ impl<'a> Filter<'a> { | |||||||
|     pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> { |     pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> { | ||||||
|         // to avoid doing this for each recursive call we're going to do it ONCE ahead of time |         // to avoid doing this for each recursive call we're going to do it ONCE ahead of time | ||||||
|         let filterable_fields = index.filterable_fields(rtxn)?; |         let filterable_fields = index.filterable_fields(rtxn)?; | ||||||
|  |         self.inner_evaluate(rtxn, index, &filterable_fields, None) | ||||||
|         self.inner_evaluate(rtxn, index, &filterable_fields) |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn evaluate_operator( |     fn evaluate_operator( | ||||||
|         rtxn: &heed::RoTxn, |         rtxn: &heed::RoTxn, | ||||||
|         index: &Index, |         index: &Index, | ||||||
|         field_id: FieldId, |         field_id: FieldId, | ||||||
|  |         universe: Option<&RoaringBitmap>, | ||||||
|         operator: &Condition<'a>, |         operator: &Condition<'a>, | ||||||
|     ) -> Result<RoaringBitmap> { |     ) -> Result<RoaringBitmap> { | ||||||
|         let numbers_db = index.facet_id_f64_docids; |         let numbers_db = index.facet_id_f64_docids; | ||||||
| @@ -291,14 +291,22 @@ impl<'a> Filter<'a> { | |||||||
|             } |             } | ||||||
|             Condition::NotEqual(val) => { |             Condition::NotEqual(val) => { | ||||||
|                 let operator = Condition::Equal(val.clone()); |                 let operator = Condition::Equal(val.clone()); | ||||||
|                 let docids = Self::evaluate_operator(rtxn, index, field_id, &operator)?; |                 let docids = Self::evaluate_operator(rtxn, index, field_id, None, &operator)?; | ||||||
|                 let all_ids = index.documents_ids(rtxn)?; |                 let all_ids = index.documents_ids(rtxn)?; | ||||||
|                 return Ok(all_ids - docids); |                 return Ok(all_ids - docids); | ||||||
|             } |             } | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         let mut output = RoaringBitmap::new(); |         let mut output = RoaringBitmap::new(); | ||||||
|         Self::explore_facet_number_levels(rtxn, numbers_db, field_id, left, right, &mut output)?; |         Self::explore_facet_number_levels( | ||||||
|  |             rtxn, | ||||||
|  |             numbers_db, | ||||||
|  |             field_id, | ||||||
|  |             left, | ||||||
|  |             right, | ||||||
|  |             universe, | ||||||
|  |             &mut output, | ||||||
|  |         )?; | ||||||
|         Ok(output) |         Ok(output) | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -310,6 +318,7 @@ impl<'a> Filter<'a> { | |||||||
|         field_id: FieldId, |         field_id: FieldId, | ||||||
|         left: Bound<f64>, |         left: Bound<f64>, | ||||||
|         right: Bound<f64>, |         right: Bound<f64>, | ||||||
|  |         universe: Option<&RoaringBitmap>, | ||||||
|         output: &mut RoaringBitmap, |         output: &mut RoaringBitmap, | ||||||
|     ) -> Result<()> { |     ) -> Result<()> { | ||||||
|         match (left, right) { |         match (left, right) { | ||||||
| @@ -321,7 +330,7 @@ impl<'a> Filter<'a> { | |||||||
|             (_, _) => (), |             (_, _) => (), | ||||||
|         } |         } | ||||||
|         facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>( |         facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>( | ||||||
|             rtxn, db, field_id, &left, &right, output, |             rtxn, db, field_id, &left, &right, universe, output, | ||||||
|         )?; |         )?; | ||||||
|  |  | ||||||
|         Ok(()) |         Ok(()) | ||||||
| @@ -332,31 +341,37 @@ impl<'a> Filter<'a> { | |||||||
|         rtxn: &heed::RoTxn, |         rtxn: &heed::RoTxn, | ||||||
|         index: &Index, |         index: &Index, | ||||||
|         filterable_fields: &HashSet<String>, |         filterable_fields: &HashSet<String>, | ||||||
|  |         universe: Option<&RoaringBitmap>, | ||||||
|     ) -> Result<RoaringBitmap> { |     ) -> Result<RoaringBitmap> { | ||||||
|  |         if universe.map_or(false, |u| u.is_empty()) { | ||||||
|  |             return Ok(RoaringBitmap::new()); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         match &self.condition { |         match &self.condition { | ||||||
|             FilterCondition::Not(f) => { |             FilterCondition::Not(f) => { | ||||||
|                 let all_ids = index.documents_ids(rtxn)?; |  | ||||||
|                 let selected = Self::inner_evaluate( |                 let selected = Self::inner_evaluate( | ||||||
|                     &(f.as_ref().clone()).into(), |                     &(f.as_ref().clone()).into(), | ||||||
|                     rtxn, |                     rtxn, | ||||||
|                     index, |                     index, | ||||||
|                     filterable_fields, |                     filterable_fields, | ||||||
|  |                     universe, | ||||||
|                 )?; |                 )?; | ||||||
|  |                 match universe { | ||||||
|  |                     Some(universe) => Ok(universe - selected), | ||||||
|  |                     None => { | ||||||
|  |                         let all_ids = index.documents_ids(rtxn)?; | ||||||
|                         Ok(all_ids - selected) |                         Ok(all_ids - selected) | ||||||
|                     } |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|             FilterCondition::In { fid, els } => { |             FilterCondition::In { fid, els } => { | ||||||
|                 if crate::is_faceted(fid.value(), filterable_fields) { |                 if crate::is_faceted(fid.value(), filterable_fields) { | ||||||
|                     let field_ids_map = index.fields_ids_map(rtxn)?; |                     let field_ids_map = index.fields_ids_map(rtxn)?; | ||||||
|  |  | ||||||
|                     if let Some(fid) = field_ids_map.id(fid.value()) { |                     if let Some(fid) = field_ids_map.id(fid.value()) { | ||||||
|                         let mut bitmap = RoaringBitmap::new(); |                         els.iter() | ||||||
|  |                             .map(|el| Condition::Equal(el.clone())) | ||||||
|                         for el in els { |                             .map(|op| Self::evaluate_operator(rtxn, index, fid, universe, &op)) | ||||||
|                             let op = Condition::Equal(el.clone()); |                             .union() | ||||||
|                             let el_bitmap = Self::evaluate_operator(rtxn, index, fid, &op)?; |  | ||||||
|                             bitmap |= el_bitmap; |  | ||||||
|                         } |  | ||||||
|                         Ok(bitmap) |  | ||||||
|                     } else { |                     } else { | ||||||
|                         Ok(RoaringBitmap::new()) |                         Ok(RoaringBitmap::new()) | ||||||
|                     } |                     } | ||||||
| @@ -371,7 +386,7 @@ impl<'a> Filter<'a> { | |||||||
|                 if crate::is_faceted(fid.value(), filterable_fields) { |                 if crate::is_faceted(fid.value(), filterable_fields) { | ||||||
|                     let field_ids_map = index.fields_ids_map(rtxn)?; |                     let field_ids_map = index.fields_ids_map(rtxn)?; | ||||||
|                     if let Some(fid) = field_ids_map.id(fid.value()) { |                     if let Some(fid) = field_ids_map.id(fid.value()) { | ||||||
|                         Self::evaluate_operator(rtxn, index, fid, op) |                         Self::evaluate_operator(rtxn, index, fid, universe, op) | ||||||
|                     } else { |                     } else { | ||||||
|                         Ok(RoaringBitmap::new()) |                         Ok(RoaringBitmap::new()) | ||||||
|                     } |                     } | ||||||
| @@ -382,14 +397,11 @@ impl<'a> Filter<'a> { | |||||||
|                     }))? |                     }))? | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|             FilterCondition::Or(subfilters) => { |             FilterCondition::Or(subfilters) => subfilters | ||||||
|                 let mut bitmap = RoaringBitmap::new(); |                 .iter() | ||||||
|                 for f in subfilters { |                 .cloned() | ||||||
|                     bitmap |= |                 .map(|f| Self::inner_evaluate(&f.into(), rtxn, index, filterable_fields, universe)) | ||||||
|                         Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?; |                 .union(), | ||||||
|                 } |  | ||||||
|                 Ok(bitmap) |  | ||||||
|             } |  | ||||||
|             FilterCondition::And(subfilters) => { |             FilterCondition::And(subfilters) => { | ||||||
|                 let mut subfilters_iter = subfilters.iter(); |                 let mut subfilters_iter = subfilters.iter(); | ||||||
|                 if let Some(first_subfilter) = subfilters_iter.next() { |                 if let Some(first_subfilter) = subfilters_iter.next() { | ||||||
| @@ -398,16 +410,21 @@ impl<'a> Filter<'a> { | |||||||
|                         rtxn, |                         rtxn, | ||||||
|                         index, |                         index, | ||||||
|                         filterable_fields, |                         filterable_fields, | ||||||
|  |                         universe, | ||||||
|                     )?; |                     )?; | ||||||
|                     for f in subfilters_iter { |                     for f in subfilters_iter { | ||||||
|                         if bitmap.is_empty() { |                         if bitmap.is_empty() { | ||||||
|                             return Ok(bitmap); |                             return Ok(bitmap); | ||||||
|                         } |                         } | ||||||
|  |                         // TODO We are doing the intersections two times, | ||||||
|  |                         //      it could be more efficient | ||||||
|  |                         //      Can't I just replace this `&=` by an `=`? | ||||||
|                         bitmap &= Self::inner_evaluate( |                         bitmap &= Self::inner_evaluate( | ||||||
|                             &(f.clone()).into(), |                             &(f.clone()).into(), | ||||||
|                             rtxn, |                             rtxn, | ||||||
|                             index, |                             index, | ||||||
|                             filterable_fields, |                             filterable_fields, | ||||||
|  |                             Some(&bitmap), | ||||||
|                         )?; |                         )?; | ||||||
|                     } |                     } | ||||||
|                     Ok(bitmap) |                     Ok(bitmap) | ||||||
| @@ -507,6 +524,7 @@ impl<'a> Filter<'a> { | |||||||
|                         rtxn, |                         rtxn, | ||||||
|                         index, |                         index, | ||||||
|                         filterable_fields, |                         filterable_fields, | ||||||
|  |                         universe, | ||||||
|                     )?; |                     )?; | ||||||
|  |  | ||||||
|                     let geo_lng_token = Token::new( |                     let geo_lng_token = Token::new( | ||||||
| @@ -539,6 +557,7 @@ impl<'a> Filter<'a> { | |||||||
|                             rtxn, |                             rtxn, | ||||||
|                             index, |                             index, | ||||||
|                             filterable_fields, |                             filterable_fields, | ||||||
|  |                             universe, | ||||||
|                         )?; |                         )?; | ||||||
|  |  | ||||||
|                         let condition_right = FilterCondition::Condition { |                         let condition_right = FilterCondition::Condition { | ||||||
| @@ -552,6 +571,7 @@ impl<'a> Filter<'a> { | |||||||
|                             rtxn, |                             rtxn, | ||||||
|                             index, |                             index, | ||||||
|                             filterable_fields, |                             filterable_fields, | ||||||
|  |                             universe, | ||||||
|                         )?; |                         )?; | ||||||
|  |  | ||||||
|                         left | right |                         left | right | ||||||
| @@ -567,6 +587,7 @@ impl<'a> Filter<'a> { | |||||||
|                             rtxn, |                             rtxn, | ||||||
|                             index, |                             index, | ||||||
|                             filterable_fields, |                             filterable_fields, | ||||||
|  |                             universe, | ||||||
|                         )? |                         )? | ||||||
|                     }; |                     }; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -7,7 +7,7 @@ use roaring::RoaringBitmap; | |||||||
| pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET}; | pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET}; | ||||||
| pub use self::filter::{BadGeoError, Filter}; | pub use self::filter::{BadGeoError, Filter}; | ||||||
| pub use self::search::{FacetValueHit, SearchForFacetValues}; | pub use self::search::{FacetValueHit, SearchForFacetValues}; | ||||||
| use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec}; | use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec}; | ||||||
| use crate::heed_codec::BytesRefCodec; | use crate::heed_codec::BytesRefCodec; | ||||||
| use crate::{Index, Result}; | use crate::{Index, Result}; | ||||||
|  |  | ||||||
| @@ -54,9 +54,9 @@ pub fn facet_max_value<'t>( | |||||||
| } | } | ||||||
|  |  | ||||||
| /// Get the first facet value in the facet database | /// Get the first facet value in the facet database | ||||||
| pub(crate) fn get_first_facet_value<'t, BoundCodec>( | pub(crate) fn get_first_facet_value<'t, BoundCodec, DC>( | ||||||
|     txn: &'t RoTxn, |     txn: &'t RoTxn, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
| ) -> heed::Result<Option<BoundCodec::DItem>> | ) -> heed::Result<Option<BoundCodec::DItem>> | ||||||
| where | where | ||||||
| @@ -78,9 +78,9 @@ where | |||||||
| } | } | ||||||
|  |  | ||||||
| /// Get the last facet value in the facet database | /// Get the last facet value in the facet database | ||||||
| pub(crate) fn get_last_facet_value<'t, BoundCodec>( | pub(crate) fn get_last_facet_value<'t, BoundCodec, DC>( | ||||||
|     txn: &'t RoTxn, |     txn: &'t RoTxn, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
| ) -> heed::Result<Option<BoundCodec::DItem>> | ) -> heed::Result<Option<BoundCodec::DItem>> | ||||||
| where | where | ||||||
| @@ -102,9 +102,9 @@ where | |||||||
| } | } | ||||||
|  |  | ||||||
| /// Get the height of the highest level in the facet database | /// Get the height of the highest level in the facet database | ||||||
| pub(crate) fn get_highest_level<'t>( | pub(crate) fn get_highest_level<'t, DC>( | ||||||
|     txn: &'t RoTxn<'t>, |     txn: &'t RoTxn<'t>, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
| ) -> heed::Result<u8> { | ) -> heed::Result<u8> { | ||||||
|     let field_id_prefix = &field_id.to_be_bytes(); |     let field_id_prefix = &field_id.to_be_bytes(); | ||||||
|   | |||||||
| @@ -159,6 +159,7 @@ impl<'a> Search<'a> { | |||||||
|             offset: 0, |             offset: 0, | ||||||
|             limit: self.limit + self.offset, |             limit: self.limit + self.offset, | ||||||
|             sort_criteria: self.sort_criteria.clone(), |             sort_criteria: self.sort_criteria.clone(), | ||||||
|  |             distinct: self.distinct.clone(), | ||||||
|             searchable_attributes: self.searchable_attributes, |             searchable_attributes: self.searchable_attributes, | ||||||
|             geo_strategy: self.geo_strategy, |             geo_strategy: self.geo_strategy, | ||||||
|             terms_matching_strategy: self.terms_matching_strategy, |             terms_matching_strategy: self.terms_matching_strategy, | ||||||
| @@ -169,6 +170,7 @@ impl<'a> Search<'a> { | |||||||
|             index: self.index, |             index: self.index, | ||||||
|             semantic: self.semantic.clone(), |             semantic: self.semantic.clone(), | ||||||
|             time_budget: self.time_budget.clone(), |             time_budget: self.time_budget.clone(), | ||||||
|  |             ranking_score_threshold: self.ranking_score_threshold, | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         let semantic = search.semantic.take(); |         let semantic = search.semantic.take(); | ||||||
| @@ -176,16 +178,16 @@ impl<'a> Search<'a> { | |||||||
|  |  | ||||||
|         // completely skip semantic search if the results of the keyword search are good enough |         // completely skip semantic search if the results of the keyword search are good enough | ||||||
|         if self.results_good_enough(&keyword_results, semantic_ratio) { |         if self.results_good_enough(&keyword_results, semantic_ratio) { | ||||||
|             return Ok((keyword_results, Some(0))); |             return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // no vector search against placeholder search |         // no vector search against placeholder search | ||||||
|         let Some(query) = search.query.take() else { |         let Some(query) = search.query.take() else { | ||||||
|             return Ok((keyword_results, Some(0))); |             return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); | ||||||
|         }; |         }; | ||||||
|         // no embedder, no semantic search |         // no embedder, no semantic search | ||||||
|         let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else { |         let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else { | ||||||
|             return Ok((keyword_results, Some(0))); |             return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         let vector_query = match vector { |         let vector_query = match vector { | ||||||
| @@ -237,3 +239,44 @@ impl<'a> Search<'a> { | |||||||
|         true |         true | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | fn return_keyword_results( | ||||||
|  |     limit: usize, | ||||||
|  |     offset: usize, | ||||||
|  |     SearchResult { | ||||||
|  |         matching_words, | ||||||
|  |         candidates, | ||||||
|  |         mut documents_ids, | ||||||
|  |         mut document_scores, | ||||||
|  |         degraded, | ||||||
|  |         used_negative_operator, | ||||||
|  |     }: SearchResult, | ||||||
|  | ) -> (SearchResult, Option<u32>) { | ||||||
|  |     let (documents_ids, document_scores) = if offset >= documents_ids.len() || | ||||||
|  |     // technically redudant because documents_ids.len() == document_scores.len(), | ||||||
|  |     // defensive programming | ||||||
|  |     offset >= document_scores.len() | ||||||
|  |     { | ||||||
|  |         (vec![], vec![]) | ||||||
|  |     } else { | ||||||
|  |         // PANICS: offset < len | ||||||
|  |         documents_ids.rotate_left(offset); | ||||||
|  |         documents_ids.truncate(limit); | ||||||
|  |  | ||||||
|  |         // PANICS: offset < len | ||||||
|  |         document_scores.rotate_left(offset); | ||||||
|  |         document_scores.truncate(limit); | ||||||
|  |         (documents_ids, document_scores) | ||||||
|  |     }; | ||||||
|  |     ( | ||||||
|  |         SearchResult { | ||||||
|  |             matching_words, | ||||||
|  |             candidates, | ||||||
|  |             documents_ids, | ||||||
|  |             document_scores, | ||||||
|  |             degraded, | ||||||
|  |             used_negative_operator, | ||||||
|  |         }, | ||||||
|  |         Some(0), | ||||||
|  |     ) | ||||||
|  | } | ||||||
|   | |||||||
| @@ -11,8 +11,8 @@ use self::new::{execute_vector_search, PartialSearchResult}; | |||||||
| use crate::score_details::{ScoreDetails, ScoringStrategy}; | use crate::score_details::{ScoreDetails, ScoringStrategy}; | ||||||
| use crate::vector::Embedder; | use crate::vector::Embedder; | ||||||
| use crate::{ | use crate::{ | ||||||
|     execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, |     execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Error, Index, | ||||||
|     SearchContext, TimeBudget, |     Result, SearchContext, TimeBudget, UserError, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| // Building these factories is not free. | // Building these factories is not free. | ||||||
| @@ -40,6 +40,7 @@ pub struct Search<'a> { | |||||||
|     offset: usize, |     offset: usize, | ||||||
|     limit: usize, |     limit: usize, | ||||||
|     sort_criteria: Option<Vec<AscDesc>>, |     sort_criteria: Option<Vec<AscDesc>>, | ||||||
|  |     distinct: Option<String>, | ||||||
|     searchable_attributes: Option<&'a [String]>, |     searchable_attributes: Option<&'a [String]>, | ||||||
|     geo_strategy: new::GeoSortStrategy, |     geo_strategy: new::GeoSortStrategy, | ||||||
|     terms_matching_strategy: TermsMatchingStrategy, |     terms_matching_strategy: TermsMatchingStrategy, | ||||||
| @@ -50,6 +51,7 @@ pub struct Search<'a> { | |||||||
|     index: &'a Index, |     index: &'a Index, | ||||||
|     semantic: Option<SemanticSearch>, |     semantic: Option<SemanticSearch>, | ||||||
|     time_budget: TimeBudget, |     time_budget: TimeBudget, | ||||||
|  |     ranking_score_threshold: Option<f64>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a> Search<'a> { | impl<'a> Search<'a> { | ||||||
| @@ -60,6 +62,7 @@ impl<'a> Search<'a> { | |||||||
|             offset: 0, |             offset: 0, | ||||||
|             limit: 20, |             limit: 20, | ||||||
|             sort_criteria: None, |             sort_criteria: None, | ||||||
|  |             distinct: None, | ||||||
|             searchable_attributes: None, |             searchable_attributes: None, | ||||||
|             geo_strategy: new::GeoSortStrategy::default(), |             geo_strategy: new::GeoSortStrategy::default(), | ||||||
|             terms_matching_strategy: TermsMatchingStrategy::default(), |             terms_matching_strategy: TermsMatchingStrategy::default(), | ||||||
| @@ -70,6 +73,7 @@ impl<'a> Search<'a> { | |||||||
|             index, |             index, | ||||||
|             semantic: None, |             semantic: None, | ||||||
|             time_budget: TimeBudget::max(), |             time_budget: TimeBudget::max(), | ||||||
|  |             ranking_score_threshold: None, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -103,6 +107,11 @@ impl<'a> Search<'a> { | |||||||
|         self |         self | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn distinct(&mut self, distinct: String) -> &mut Search<'a> { | ||||||
|  |         self.distinct = Some(distinct); | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> { |     pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> { | ||||||
|         self.searchable_attributes = Some(searchable); |         self.searchable_attributes = Some(searchable); | ||||||
|         self |         self | ||||||
| @@ -146,6 +155,11 @@ impl<'a> Search<'a> { | |||||||
|         self |         self | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn ranking_score_threshold(&mut self, ranking_score_threshold: f64) -> &mut Search<'a> { | ||||||
|  |         self.ranking_score_threshold = Some(ranking_score_threshold); | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> { |     pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> { | ||||||
|         if has_vector_search { |         if has_vector_search { | ||||||
|             let ctx = SearchContext::new(self.index, self.rtxn)?; |             let ctx = SearchContext::new(self.index, self.rtxn)?; | ||||||
| @@ -162,6 +176,19 @@ impl<'a> Search<'a> { | |||||||
|             ctx.attributes_to_search_on(searchable_attributes)?; |             ctx.attributes_to_search_on(searchable_attributes)?; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         if let Some(distinct) = &self.distinct { | ||||||
|  |             let filterable_fields = ctx.index.filterable_fields(ctx.txn)?; | ||||||
|  |             if !crate::is_faceted(distinct, &filterable_fields) { | ||||||
|  |                 let (valid_fields, hidden_fields) = | ||||||
|  |                     ctx.index.remove_hidden_fields(ctx.txn, filterable_fields)?; | ||||||
|  |                 return Err(Error::UserError(UserError::InvalidDistinctAttribute { | ||||||
|  |                     field: distinct.clone(), | ||||||
|  |                     valid_fields, | ||||||
|  |                     hidden_fields, | ||||||
|  |                 })); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|         let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?; |         let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?; | ||||||
|         let PartialSearchResult { |         let PartialSearchResult { | ||||||
|             located_query_terms, |             located_query_terms, | ||||||
| @@ -178,12 +205,14 @@ impl<'a> Search<'a> { | |||||||
|                     self.scoring_strategy, |                     self.scoring_strategy, | ||||||
|                     universe, |                     universe, | ||||||
|                     &self.sort_criteria, |                     &self.sort_criteria, | ||||||
|  |                     &self.distinct, | ||||||
|                     self.geo_strategy, |                     self.geo_strategy, | ||||||
|                     self.offset, |                     self.offset, | ||||||
|                     self.limit, |                     self.limit, | ||||||
|                     embedder_name, |                     embedder_name, | ||||||
|                     embedder, |                     embedder, | ||||||
|                     self.time_budget.clone(), |                     self.time_budget.clone(), | ||||||
|  |                     self.ranking_score_threshold, | ||||||
|                 )? |                 )? | ||||||
|             } |             } | ||||||
|             _ => execute_search( |             _ => execute_search( | ||||||
| @@ -194,6 +223,7 @@ impl<'a> Search<'a> { | |||||||
|                 self.exhaustive_number_hits, |                 self.exhaustive_number_hits, | ||||||
|                 universe, |                 universe, | ||||||
|                 &self.sort_criteria, |                 &self.sort_criteria, | ||||||
|  |                 &self.distinct, | ||||||
|                 self.geo_strategy, |                 self.geo_strategy, | ||||||
|                 self.offset, |                 self.offset, | ||||||
|                 self.limit, |                 self.limit, | ||||||
| @@ -201,6 +231,7 @@ impl<'a> Search<'a> { | |||||||
|                 &mut DefaultSearchLogger, |                 &mut DefaultSearchLogger, | ||||||
|                 &mut DefaultSearchLogger, |                 &mut DefaultSearchLogger, | ||||||
|                 self.time_budget.clone(), |                 self.time_budget.clone(), | ||||||
|  |                 self.ranking_score_threshold, | ||||||
|             )?, |             )?, | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
| @@ -229,6 +260,7 @@ impl fmt::Debug for Search<'_> { | |||||||
|             offset, |             offset, | ||||||
|             limit, |             limit, | ||||||
|             sort_criteria, |             sort_criteria, | ||||||
|  |             distinct, | ||||||
|             searchable_attributes, |             searchable_attributes, | ||||||
|             geo_strategy: _, |             geo_strategy: _, | ||||||
|             terms_matching_strategy, |             terms_matching_strategy, | ||||||
| @@ -239,6 +271,7 @@ impl fmt::Debug for Search<'_> { | |||||||
|             index: _, |             index: _, | ||||||
|             semantic, |             semantic, | ||||||
|             time_budget, |             time_budget, | ||||||
|  |             ranking_score_threshold, | ||||||
|         } = self; |         } = self; | ||||||
|         f.debug_struct("Search") |         f.debug_struct("Search") | ||||||
|             .field("query", query) |             .field("query", query) | ||||||
| @@ -247,6 +280,7 @@ impl fmt::Debug for Search<'_> { | |||||||
|             .field("offset", offset) |             .field("offset", offset) | ||||||
|             .field("limit", limit) |             .field("limit", limit) | ||||||
|             .field("sort_criteria", sort_criteria) |             .field("sort_criteria", sort_criteria) | ||||||
|  |             .field("distinct", distinct) | ||||||
|             .field("searchable_attributes", searchable_attributes) |             .field("searchable_attributes", searchable_attributes) | ||||||
|             .field("terms_matching_strategy", terms_matching_strategy) |             .field("terms_matching_strategy", terms_matching_strategy) | ||||||
|             .field("scoring_strategy", scoring_strategy) |             .field("scoring_strategy", scoring_strategy) | ||||||
| @@ -257,6 +291,7 @@ impl fmt::Debug for Search<'_> { | |||||||
|                 &semantic.as_ref().map(|semantic| &semantic.embedder_name), |                 &semantic.as_ref().map(|semantic| &semantic.embedder_name), | ||||||
|             ) |             ) | ||||||
|             .field("time_budget", time_budget) |             .field("time_budget", time_budget) | ||||||
|  |             .field("ranking_score_threshold", ranking_score_threshold) | ||||||
|             .finish() |             .finish() | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -22,18 +22,25 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|     ctx: &mut SearchContext<'ctx>, |     ctx: &mut SearchContext<'ctx>, | ||||||
|     mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>, |     mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>, | ||||||
|     query: &Q, |     query: &Q, | ||||||
|  |     distinct: Option<&str>, | ||||||
|     universe: &RoaringBitmap, |     universe: &RoaringBitmap, | ||||||
|     from: usize, |     from: usize, | ||||||
|     length: usize, |     length: usize, | ||||||
|     scoring_strategy: ScoringStrategy, |     scoring_strategy: ScoringStrategy, | ||||||
|     logger: &mut dyn SearchLogger<Q>, |     logger: &mut dyn SearchLogger<Q>, | ||||||
|     time_budget: TimeBudget, |     time_budget: TimeBudget, | ||||||
|  |     ranking_score_threshold: Option<f64>, | ||||||
| ) -> Result<BucketSortOutput> { | ) -> Result<BucketSortOutput> { | ||||||
|     logger.initial_query(query); |     logger.initial_query(query); | ||||||
|     logger.ranking_rules(&ranking_rules); |     logger.ranking_rules(&ranking_rules); | ||||||
|     logger.initial_universe(universe); |     logger.initial_universe(universe); | ||||||
|  |  | ||||||
|     let distinct_fid = if let Some(field) = ctx.index.distinct_field(ctx.txn)? { |     let distinct_field = match distinct { | ||||||
|  |         Some(distinct) => Some(distinct), | ||||||
|  |         None => ctx.index.distinct_field(ctx.txn)?, | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     let distinct_fid = if let Some(field) = distinct_field { | ||||||
|         ctx.index.fields_ids_map(ctx.txn)?.id(field) |         ctx.index.fields_ids_map(ctx.txn)?.id(field) | ||||||
|     } else { |     } else { | ||||||
|         None |         None | ||||||
| @@ -164,7 +171,19 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|             loop { |             loop { | ||||||
|                 let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]); |                 let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]); | ||||||
|                 ranking_rule_scores.push(ScoreDetails::Skipped); |                 ranking_rule_scores.push(ScoreDetails::Skipped); | ||||||
|  |  | ||||||
|  |                 // remove candidates from the universe without adding them to result if their score is below the threshold | ||||||
|  |                 if let Some(ranking_score_threshold) = ranking_score_threshold { | ||||||
|  |                     let current_score = ScoreDetails::global_score(ranking_rule_scores.iter()); | ||||||
|  |                     if current_score < ranking_score_threshold { | ||||||
|  |                         all_candidates -= bucket | &ranking_rule_universes[cur_ranking_rule_index]; | ||||||
|  |                         back!(); | ||||||
|  |                         continue; | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |  | ||||||
|                 maybe_add_to_results!(bucket); |                 maybe_add_to_results!(bucket); | ||||||
|  |  | ||||||
|                 ranking_rule_scores.pop(); |                 ranking_rule_scores.pop(); | ||||||
|  |  | ||||||
|                 if cur_ranking_rule_index == 0 { |                 if cur_ranking_rule_index == 0 { | ||||||
| @@ -220,6 +239,18 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|         debug_assert!( |         debug_assert!( | ||||||
|             ranking_rule_universes[cur_ranking_rule_index].is_superset(&next_bucket.candidates) |             ranking_rule_universes[cur_ranking_rule_index].is_superset(&next_bucket.candidates) | ||||||
|         ); |         ); | ||||||
|  |  | ||||||
|  |         // remove candidates from the universe without adding them to result if their score is below the threshold | ||||||
|  |         if let Some(ranking_score_threshold) = ranking_score_threshold { | ||||||
|  |             let current_score = ScoreDetails::global_score(ranking_rule_scores.iter()); | ||||||
|  |             if current_score < ranking_score_threshold { | ||||||
|  |                 all_candidates -= | ||||||
|  |                     next_bucket.candidates | &ranking_rule_universes[cur_ranking_rule_index]; | ||||||
|  |                 back!(); | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|         ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates; |         ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates; | ||||||
|  |  | ||||||
|         if cur_ranking_rule_index == ranking_rules_len - 1 |         if cur_ranking_rule_index == ranking_rules_len - 1 | ||||||
|   | |||||||
| @@ -516,6 +516,7 @@ mod tests { | |||||||
|                 false, |                 false, | ||||||
|                 universe, |                 universe, | ||||||
|                 &None, |                 &None, | ||||||
|  |                 &None, | ||||||
|                 crate::search::new::GeoSortStrategy::default(), |                 crate::search::new::GeoSortStrategy::default(), | ||||||
|                 0, |                 0, | ||||||
|                 100, |                 100, | ||||||
| @@ -523,6 +524,7 @@ mod tests { | |||||||
|                 &mut crate::DefaultSearchLogger, |                 &mut crate::DefaultSearchLogger, | ||||||
|                 &mut crate::DefaultSearchLogger, |                 &mut crate::DefaultSearchLogger, | ||||||
|                 TimeBudget::max(), |                 TimeBudget::max(), | ||||||
|  |                 None, | ||||||
|             ) |             ) | ||||||
|             .unwrap(); |             .unwrap(); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -548,6 +548,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>( | |||||||
|     Ok(()) |     Ok(()) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[tracing::instrument(level = "trace", skip_all, target = "search")] | ||||||
| pub fn filtered_universe( | pub fn filtered_universe( | ||||||
|     index: &Index, |     index: &Index, | ||||||
|     txn: &RoTxn<'_>, |     txn: &RoTxn<'_>, | ||||||
| @@ -567,12 +568,14 @@ pub fn execute_vector_search( | |||||||
|     scoring_strategy: ScoringStrategy, |     scoring_strategy: ScoringStrategy, | ||||||
|     universe: RoaringBitmap, |     universe: RoaringBitmap, | ||||||
|     sort_criteria: &Option<Vec<AscDesc>>, |     sort_criteria: &Option<Vec<AscDesc>>, | ||||||
|  |     distinct: &Option<String>, | ||||||
|     geo_strategy: geo_sort::Strategy, |     geo_strategy: geo_sort::Strategy, | ||||||
|     from: usize, |     from: usize, | ||||||
|     length: usize, |     length: usize, | ||||||
|     embedder_name: &str, |     embedder_name: &str, | ||||||
|     embedder: &Embedder, |     embedder: &Embedder, | ||||||
|     time_budget: TimeBudget, |     time_budget: TimeBudget, | ||||||
|  |     ranking_score_threshold: Option<f64>, | ||||||
| ) -> Result<PartialSearchResult> { | ) -> Result<PartialSearchResult> { | ||||||
|     check_sort_criteria(ctx, sort_criteria.as_ref())?; |     check_sort_criteria(ctx, sort_criteria.as_ref())?; | ||||||
|  |  | ||||||
| @@ -596,12 +599,14 @@ pub fn execute_vector_search( | |||||||
|         ctx, |         ctx, | ||||||
|         ranking_rules, |         ranking_rules, | ||||||
|         &PlaceholderQuery, |         &PlaceholderQuery, | ||||||
|  |         distinct.as_deref(), | ||||||
|         &universe, |         &universe, | ||||||
|         from, |         from, | ||||||
|         length, |         length, | ||||||
|         scoring_strategy, |         scoring_strategy, | ||||||
|         placeholder_search_logger, |         placeholder_search_logger, | ||||||
|         time_budget, |         time_budget, | ||||||
|  |         ranking_score_threshold, | ||||||
|     )?; |     )?; | ||||||
|  |  | ||||||
|     Ok(PartialSearchResult { |     Ok(PartialSearchResult { | ||||||
| @@ -624,6 +629,7 @@ pub fn execute_search( | |||||||
|     exhaustive_number_hits: bool, |     exhaustive_number_hits: bool, | ||||||
|     mut universe: RoaringBitmap, |     mut universe: RoaringBitmap, | ||||||
|     sort_criteria: &Option<Vec<AscDesc>>, |     sort_criteria: &Option<Vec<AscDesc>>, | ||||||
|  |     distinct: &Option<String>, | ||||||
|     geo_strategy: geo_sort::Strategy, |     geo_strategy: geo_sort::Strategy, | ||||||
|     from: usize, |     from: usize, | ||||||
|     length: usize, |     length: usize, | ||||||
| @@ -631,6 +637,7 @@ pub fn execute_search( | |||||||
|     placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>, |     placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>, | ||||||
|     query_graph_logger: &mut dyn SearchLogger<QueryGraph>, |     query_graph_logger: &mut dyn SearchLogger<QueryGraph>, | ||||||
|     time_budget: TimeBudget, |     time_budget: TimeBudget, | ||||||
|  |     ranking_score_threshold: Option<f64>, | ||||||
| ) -> Result<PartialSearchResult> { | ) -> Result<PartialSearchResult> { | ||||||
|     check_sort_criteria(ctx, sort_criteria.as_ref())?; |     check_sort_criteria(ctx, sort_criteria.as_ref())?; | ||||||
|  |  | ||||||
| @@ -713,12 +720,14 @@ pub fn execute_search( | |||||||
|             ctx, |             ctx, | ||||||
|             ranking_rules, |             ranking_rules, | ||||||
|             &graph, |             &graph, | ||||||
|  |             distinct.as_deref(), | ||||||
|             &universe, |             &universe, | ||||||
|             from, |             from, | ||||||
|             length, |             length, | ||||||
|             scoring_strategy, |             scoring_strategy, | ||||||
|             query_graph_logger, |             query_graph_logger, | ||||||
|             time_budget, |             time_budget, | ||||||
|  |             ranking_score_threshold, | ||||||
|         )? |         )? | ||||||
|     } else { |     } else { | ||||||
|         let ranking_rules = |         let ranking_rules = | ||||||
| @@ -727,12 +736,14 @@ pub fn execute_search( | |||||||
|             ctx, |             ctx, | ||||||
|             ranking_rules, |             ranking_rules, | ||||||
|             &PlaceholderQuery, |             &PlaceholderQuery, | ||||||
|  |             distinct.as_deref(), | ||||||
|             &universe, |             &universe, | ||||||
|             from, |             from, | ||||||
|             length, |             length, | ||||||
|             scoring_strategy, |             scoring_strategy, | ||||||
|             placeholder_search_logger, |             placeholder_search_logger, | ||||||
|             time_budget, |             time_budget, | ||||||
|  |             ranking_score_threshold, | ||||||
|         )? |         )? | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
| @@ -742,7 +753,12 @@ pub fn execute_search( | |||||||
|     // The candidates is the universe unless the exhaustive number of hits |     // The candidates is the universe unless the exhaustive number of hits | ||||||
|     // is requested and a distinct attribute is set. |     // is requested and a distinct attribute is set. | ||||||
|     if exhaustive_number_hits { |     if exhaustive_number_hits { | ||||||
|         if let Some(f) = ctx.index.distinct_field(ctx.txn)? { |         let distinct_field = match distinct.as_deref() { | ||||||
|  |             Some(distinct) => Some(distinct), | ||||||
|  |             None => ctx.index.distinct_field(ctx.txn)?, | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         if let Some(f) = distinct_field { | ||||||
|             if let Some(distinct_fid) = fields_ids_map.id(f) { |             if let Some(distinct_fid) = fields_ids_map.id(f) { | ||||||
|                 all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining; |                 all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining; | ||||||
|             } |             } | ||||||
|   | |||||||
| @@ -205,8 +205,18 @@ fn create_index() -> TempIndex { | |||||||
|     index |     index | ||||||
| } | } | ||||||
|  |  | ||||||
| fn verify_distinct(index: &Index, txn: &RoTxn, docids: &[u32]) -> Vec<String> { | fn verify_distinct( | ||||||
|     let vs = collect_field_values(index, txn, index.distinct_field(txn).unwrap().unwrap(), docids); |     index: &Index, | ||||||
|  |     txn: &RoTxn, | ||||||
|  |     distinct: Option<&str>, | ||||||
|  |     docids: &[u32], | ||||||
|  | ) -> Vec<String> { | ||||||
|  |     let vs = collect_field_values( | ||||||
|  |         index, | ||||||
|  |         txn, | ||||||
|  |         distinct.or_else(|| index.distinct_field(txn).unwrap()).unwrap(), | ||||||
|  |         docids, | ||||||
|  |     ); | ||||||
|  |  | ||||||
|     let mut unique = HashSet::new(); |     let mut unique = HashSet::new(); | ||||||
|     for v in vs.iter() { |     for v in vs.iter() { | ||||||
| @@ -223,12 +233,49 @@ fn verify_distinct(index: &Index, txn: &RoTxn, docids: &[u32]) -> Vec<String> { | |||||||
| fn test_distinct_placeholder_no_ranking_rules() { | fn test_distinct_placeholder_no_ranking_rules() { | ||||||
|     let index = create_index(); |     let index = create_index(); | ||||||
|  |  | ||||||
|  |     // Set the letter as filterable and unset the distinct attribute. | ||||||
|  |     index | ||||||
|  |         .update_settings(|s| { | ||||||
|  |             s.set_filterable_fields(hashset! { S("letter") }); | ||||||
|  |             s.reset_distinct_field(); | ||||||
|  |         }) | ||||||
|  |         .unwrap(); | ||||||
|  |  | ||||||
|  |     let txn = index.read_txn().unwrap(); | ||||||
|  |  | ||||||
|  |     let mut s = Search::new(&txn, &index); | ||||||
|  |     s.distinct(S("letter")); | ||||||
|  |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|  |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]"); | ||||||
|  |     let distinct_values = verify_distinct(&index, &txn, Some("letter"), &documents_ids); | ||||||
|  |     insta::assert_debug_snapshot!(distinct_values, @r###" | ||||||
|  |     [ | ||||||
|  |         "\"A\"", | ||||||
|  |         "\"B\"", | ||||||
|  |         "\"C\"", | ||||||
|  |         "\"D\"", | ||||||
|  |         "\"E\"", | ||||||
|  |         "\"F\"", | ||||||
|  |         "\"G\"", | ||||||
|  |         "\"H\"", | ||||||
|  |         "\"I\"", | ||||||
|  |         "__does_not_exist__", | ||||||
|  |         "__does_not_exist__", | ||||||
|  |         "__does_not_exist__", | ||||||
|  |     ] | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn test_distinct_at_search_placeholder_no_ranking_rules() { | ||||||
|  |     let index = create_index(); | ||||||
|  |  | ||||||
|     let txn = index.read_txn().unwrap(); |     let txn = index.read_txn().unwrap(); | ||||||
|  |  | ||||||
|     let s = Search::new(&txn, &index); |     let s = Search::new(&txn, &index); | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]"); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]"); | ||||||
|     let distinct_values = verify_distinct(&index, &txn, &documents_ids); |     let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); | ||||||
|     insta::assert_debug_snapshot!(distinct_values, @r###" |     insta::assert_debug_snapshot!(distinct_values, @r###" | ||||||
|     [ |     [ | ||||||
|         "\"A\"", |         "\"A\"", | ||||||
| @@ -263,7 +310,7 @@ fn test_distinct_placeholder_sort() { | |||||||
|  |  | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]"); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]"); | ||||||
|     let distinct_values = verify_distinct(&index, &txn, &documents_ids); |     let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); | ||||||
|     insta::assert_debug_snapshot!(distinct_values, @r###" |     insta::assert_debug_snapshot!(distinct_values, @r###" | ||||||
|     [ |     [ | ||||||
|         "\"E\"", |         "\"E\"", | ||||||
| @@ -303,7 +350,7 @@ fn test_distinct_placeholder_sort() { | |||||||
|  |  | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 20, 18, 15, 9, 8, 5, 2, 0, 24, 25, 26]"); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 20, 18, 15, 9, 8, 5, 2, 0, 24, 25, 26]"); | ||||||
|     let distinct_values = verify_distinct(&index, &txn, &documents_ids); |     let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); | ||||||
|     insta::assert_debug_snapshot!(distinct_values, @r###" |     insta::assert_debug_snapshot!(distinct_values, @r###" | ||||||
|     [ |     [ | ||||||
|         "\"I\"", |         "\"I\"", | ||||||
| @@ -346,7 +393,7 @@ fn test_distinct_placeholder_sort() { | |||||||
|  |  | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[23, 20, 19, 17, 14, 8, 7, 4, 1, 26, 25, 24]"); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[23, 20, 19, 17, 14, 8, 7, 4, 1, 26, 25, 24]"); | ||||||
|     let distinct_values = verify_distinct(&index, &txn, &documents_ids); |     let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); | ||||||
|     insta::assert_debug_snapshot!(distinct_values, @r###" |     insta::assert_debug_snapshot!(distinct_values, @r###" | ||||||
|     [ |     [ | ||||||
|         "\"I\"", |         "\"I\"", | ||||||
| @@ -399,7 +446,7 @@ fn test_distinct_words() { | |||||||
|  |  | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 26, 5, 8, 9, 15, 18, 20, 21, 25, 24]"); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 26, 5, 8, 9, 15, 18, 20, 21, 25, 24]"); | ||||||
|     let distinct_values = verify_distinct(&index, &txn, &documents_ids); |     let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); | ||||||
|     insta::assert_debug_snapshot!(distinct_values, @r###" |     insta::assert_debug_snapshot!(distinct_values, @r###" | ||||||
|     [ |     [ | ||||||
|         "\"A\"", |         "\"A\"", | ||||||
| @@ -453,7 +500,7 @@ fn test_distinct_sort_words() { | |||||||
|  |  | ||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[22, 20, 19, 16, 9, 8, 7, 3, 1, 26, 25, 24]"); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[22, 20, 19, 16, 9, 8, 7, 3, 1, 26, 25, 24]"); | ||||||
|     let distinct_values = verify_distinct(&index, &txn, &documents_ids); |     let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); | ||||||
|     insta::assert_debug_snapshot!(distinct_values, @r###" |     insta::assert_debug_snapshot!(distinct_values, @r###" | ||||||
|     [ |     [ | ||||||
|         "\"I\"", |         "\"I\"", | ||||||
| @@ -549,7 +596,7 @@ fn test_distinct_typo() { | |||||||
|     let SearchResult { documents_ids, .. } = s.execute().unwrap(); |     let SearchResult { documents_ids, .. } = s.execute().unwrap(); | ||||||
|     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 26, 0, 7, 8, 9, 15, 22, 18, 20, 25, 24]"); |     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 26, 0, 7, 8, 9, 15, 22, 18, 20, 25, 24]"); | ||||||
|  |  | ||||||
|     let distinct_values = verify_distinct(&index, &txn, &documents_ids); |     let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); | ||||||
|     insta::assert_debug_snapshot!(distinct_values, @r###" |     insta::assert_debug_snapshot!(distinct_values, @r###" | ||||||
|     [ |     [ | ||||||
|         "\"B\"", |         "\"B\"", | ||||||
|   | |||||||
| @@ -1,244 +0,0 @@ | |||||||
| --- |  | ||||||
| source: milli/src/search/new/tests/attribute_fid.rs |  | ||||||
| expression: "format!(\"{document_ids_scores:#?}\")" |  | ||||||
| --- |  | ||||||
| [ |  | ||||||
|     ( |  | ||||||
|         2, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 19, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 91, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         6, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 15, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 81, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         5, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 14, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 79, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         4, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 13, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 77, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         3, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 12, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 83, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         9, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 11, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 75, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         8, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 10, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 79, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         7, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 10, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 73, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         11, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 7, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 77, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         10, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 6, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 81, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         13, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 6, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 81, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         12, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 6, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 78, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         14, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 5, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 75, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
|     ( |  | ||||||
|         0, |  | ||||||
|         [ |  | ||||||
|             Fid( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 1, |  | ||||||
|                     max_rank: 19, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|             Position( |  | ||||||
|                 Rank { |  | ||||||
|                     rank: 91, |  | ||||||
|                     max_rank: 91, |  | ||||||
|                 }, |  | ||||||
|             ), |  | ||||||
|         ], |  | ||||||
|     ), |  | ||||||
| ] |  | ||||||
| @@ -13,7 +13,7 @@ use std::collections::BTreeSet; | |||||||
| use std::iter::FromIterator; | use std::iter::FromIterator; | ||||||
|  |  | ||||||
| use crate::index::tests::TempIndex; | use crate::index::tests::TempIndex; | ||||||
| use crate::{db_snap, Search, SearchResult, TermsMatchingStrategy}; | use crate::{Search, SearchResult, TermsMatchingStrategy}; | ||||||
|  |  | ||||||
| fn create_index() -> TempIndex { | fn create_index() -> TempIndex { | ||||||
|     let index = TempIndex::new(); |     let index = TempIndex::new(); | ||||||
| @@ -66,9 +66,10 @@ fn create_index() -> TempIndex { | |||||||
| } | } | ||||||
|  |  | ||||||
| #[test] | #[test] | ||||||
|  | #[cfg(not(feature = "swedish-recomposition"))] | ||||||
| fn test_stop_words_not_indexed() { | fn test_stop_words_not_indexed() { | ||||||
|     let index = create_index(); |     let index = create_index(); | ||||||
|     db_snap!(index, word_docids, @"6288f9d7db3703b02c57025eb4a69264"); |     crate::db_snap!(index, word_docids, @"6288f9d7db3703b02c57025eb4a69264"); | ||||||
| } | } | ||||||
|  |  | ||||||
| #[test] | #[test] | ||||||
|   | |||||||
| @@ -17,6 +17,7 @@ pub struct Similar<'a> { | |||||||
|     index: &'a Index, |     index: &'a Index, | ||||||
|     embedder_name: String, |     embedder_name: String, | ||||||
|     embedder: Arc<Embedder>, |     embedder: Arc<Embedder>, | ||||||
|  |     ranking_score_threshold: Option<f64>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a> Similar<'a> { | impl<'a> Similar<'a> { | ||||||
| @@ -29,7 +30,17 @@ impl<'a> Similar<'a> { | |||||||
|         embedder_name: String, |         embedder_name: String, | ||||||
|         embedder: Arc<Embedder>, |         embedder: Arc<Embedder>, | ||||||
|     ) -> Self { |     ) -> Self { | ||||||
|         Self { id, filter: None, offset, limit, rtxn, index, embedder_name, embedder } |         Self { | ||||||
|  |             id, | ||||||
|  |             filter: None, | ||||||
|  |             offset, | ||||||
|  |             limit, | ||||||
|  |             rtxn, | ||||||
|  |             index, | ||||||
|  |             embedder_name, | ||||||
|  |             embedder, | ||||||
|  |             ranking_score_threshold: None, | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn filter(&mut self, filter: Filter<'a>) -> &mut Self { |     pub fn filter(&mut self, filter: Filter<'a>) -> &mut Self { | ||||||
| @@ -37,8 +48,18 @@ impl<'a> Similar<'a> { | |||||||
|         self |         self | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn ranking_score_threshold(&mut self, ranking_score_threshold: f64) -> &mut Self { | ||||||
|  |         self.ranking_score_threshold = Some(ranking_score_threshold); | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn execute(&self) -> Result<SearchResult> { |     pub fn execute(&self) -> Result<SearchResult> { | ||||||
|         let universe = filtered_universe(self.index, self.rtxn, &self.filter)?; |         let mut universe = filtered_universe(self.index, self.rtxn, &self.filter)?; | ||||||
|  |  | ||||||
|  |         // we never want to receive the docid | ||||||
|  |         universe.remove(self.id); | ||||||
|  |  | ||||||
|  |         let universe = universe; | ||||||
|  |  | ||||||
|         let embedder_index = |         let embedder_index = | ||||||
|             self.index |             self.index | ||||||
| @@ -77,6 +98,8 @@ impl<'a> Similar<'a> { | |||||||
|         let mut documents_seen = RoaringBitmap::new(); |         let mut documents_seen = RoaringBitmap::new(); | ||||||
|         documents_seen.insert(self.id); |         documents_seen.insert(self.id); | ||||||
|  |  | ||||||
|  |         let mut candidates = universe; | ||||||
|  |  | ||||||
|         for (docid, distance) in results |         for (docid, distance) in results | ||||||
|             .into_iter() |             .into_iter() | ||||||
|             // skip documents we've already seen & mark that we saw the current document |             // skip documents we've already seen & mark that we saw the current document | ||||||
| @@ -85,8 +108,6 @@ impl<'a> Similar<'a> { | |||||||
|             // take **after** filter and skip so that we get exactly limit elements if available |             // take **after** filter and skip so that we get exactly limit elements if available | ||||||
|             .take(self.limit) |             .take(self.limit) | ||||||
|         { |         { | ||||||
|             documents_ids.push(docid); |  | ||||||
|  |  | ||||||
|             let score = 1.0 - distance; |             let score = 1.0 - distance; | ||||||
|             let score = self |             let score = self | ||||||
|                 .embedder |                 .embedder | ||||||
| @@ -94,14 +115,28 @@ impl<'a> Similar<'a> { | |||||||
|                 .map(|distribution| distribution.shift(score)) |                 .map(|distribution| distribution.shift(score)) | ||||||
|                 .unwrap_or(score); |                 .unwrap_or(score); | ||||||
|  |  | ||||||
|             let score = ScoreDetails::Vector(score_details::Vector { similarity: Some(score) }); |             let score_details = | ||||||
|  |                 vec![ScoreDetails::Vector(score_details::Vector { similarity: Some(score) })]; | ||||||
|  |  | ||||||
|             document_scores.push(vec![score]); |             let score = ScoreDetails::global_score(score_details.iter()); | ||||||
|  |  | ||||||
|  |             if let Some(ranking_score_threshold) = &self.ranking_score_threshold { | ||||||
|  |                 if score < *ranking_score_threshold { | ||||||
|  |                     // this document is no longer a candidate | ||||||
|  |                     candidates.remove(docid); | ||||||
|  |                     // any document after this one is no longer a candidate either, so restrict the set to documents already seen. | ||||||
|  |                     candidates &= documents_seen; | ||||||
|  |                     break; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             documents_ids.push(docid); | ||||||
|  |             document_scores.push(score_details); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         Ok(SearchResult { |         Ok(SearchResult { | ||||||
|             matching_words: Default::default(), |             matching_words: Default::default(), | ||||||
|             candidates: universe, |             candidates, | ||||||
|             documents_ids, |             documents_ids, | ||||||
|             document_scores, |             document_scores, | ||||||
|             degraded: false, |             degraded: false, | ||||||
|   | |||||||
| @@ -1,7 +0,0 @@ | |||||||
| --- |  | ||||||
| source: milli/src/index.rs |  | ||||||
| --- |  | ||||||
| age              1      | |  | ||||||
| id               2      | |  | ||||||
| name             2      | |  | ||||||
|  |  | ||||||
| @@ -1,7 +0,0 @@ | |||||||
| --- |  | ||||||
| source: milli/src/index.rs |  | ||||||
| --- |  | ||||||
| age              1      | |  | ||||||
| id               2      | |  | ||||||
| name             2      | |  | ||||||
|  |  | ||||||
| @@ -64,6 +64,13 @@ impl<'t, 'i> ClearDocuments<'t, 'i> { | |||||||
|         self.index.delete_geo_rtree(self.wtxn)?; |         self.index.delete_geo_rtree(self.wtxn)?; | ||||||
|         self.index.delete_geo_faceted_documents_ids(self.wtxn)?; |         self.index.delete_geo_faceted_documents_ids(self.wtxn)?; | ||||||
|  |  | ||||||
|  |         // Remove all user-provided bits from the configs | ||||||
|  |         let mut configs = self.index.embedding_configs(self.wtxn)?; | ||||||
|  |         for config in configs.iter_mut() { | ||||||
|  |             config.user_provided.clear(); | ||||||
|  |         } | ||||||
|  |         self.index.put_embedding_configs(self.wtxn, configs)?; | ||||||
|  |  | ||||||
|         // Clear the other databases. |         // Clear the other databases. | ||||||
|         external_documents_ids.clear(self.wtxn)?; |         external_documents_ids.clear(self.wtxn)?; | ||||||
|         word_docids.clear(self.wtxn)?; |         word_docids.clear(self.wtxn)?; | ||||||
|   | |||||||
| @@ -8,18 +8,19 @@ use std::sync::Arc; | |||||||
|  |  | ||||||
| use bytemuck::cast_slice; | use bytemuck::cast_slice; | ||||||
| use grenad::Writer; | use grenad::Writer; | ||||||
| use itertools::EitherOrBoth; |  | ||||||
| use ordered_float::OrderedFloat; | use ordered_float::OrderedFloat; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
| use serde_json::Value; | use serde_json::Value; | ||||||
|  |  | ||||||
| use super::helpers::{create_writer, writer_into_reader, GrenadParameters}; | use super::helpers::{create_writer, writer_into_reader, GrenadParameters}; | ||||||
|  | use crate::index::IndexEmbeddingConfig; | ||||||
| use crate::prompt::Prompt; | use crate::prompt::Prompt; | ||||||
| use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; | use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; | ||||||
| use crate::update::index_documents::helpers::try_split_at; |  | ||||||
| use crate::update::settings::InnerIndexSettingsDiff; | use crate::update::settings::InnerIndexSettingsDiff; | ||||||
| use crate::vector::parsed_vectors::{ParsedVectorsDiff, RESERVED_VECTORS_FIELD_NAME}; | use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME}; | ||||||
|  | use crate::vector::settings::{EmbedderAction, ReindexAction}; | ||||||
| use crate::vector::Embedder; | use crate::vector::Embedder; | ||||||
| use crate::{DocumentId, Result, ThreadPoolNoAbort}; | use crate::{try_split_array_at, DocumentId, FieldId, FieldsIdsMap, Result, ThreadPoolNoAbort}; | ||||||
|  |  | ||||||
| /// The length of the elements that are always in the buffer when inserting new values. | /// The length of the elements that are always in the buffer when inserting new values. | ||||||
| const TRUNCATE_SIZE: usize = size_of::<DocumentId>(); | const TRUNCATE_SIZE: usize = size_of::<DocumentId>(); | ||||||
| @@ -35,6 +36,8 @@ pub struct ExtractedVectorPoints { | |||||||
|     // embedder |     // embedder | ||||||
|     pub embedder_name: String, |     pub embedder_name: String, | ||||||
|     pub embedder: Arc<Embedder>, |     pub embedder: Arc<Embedder>, | ||||||
|  |     pub add_to_user_provided: RoaringBitmap, | ||||||
|  |     pub remove_from_user_provided: RoaringBitmap, | ||||||
| } | } | ||||||
|  |  | ||||||
| enum VectorStateDelta { | enum VectorStateDelta { | ||||||
| @@ -42,12 +45,7 @@ enum VectorStateDelta { | |||||||
|     // Remove all vectors, generated or manual, from this document |     // Remove all vectors, generated or manual, from this document | ||||||
|     NowRemoved, |     NowRemoved, | ||||||
|  |  | ||||||
|     // Add the manually specified vectors, passed in the other grenad |     NowManual(Vec<Vec<f32>>), | ||||||
|     // Remove any previously generated vectors |  | ||||||
|     // Note: changing the value of the manually specified vector **should not record** this delta |  | ||||||
|     WasGeneratedNowManual(Vec<Vec<f32>>), |  | ||||||
|  |  | ||||||
|     ManualDelta(Vec<Vec<f32>>, Vec<Vec<f32>>), |  | ||||||
|  |  | ||||||
|     // Add the vector computed from the specified prompt |     // Add the vector computed from the specified prompt | ||||||
|     // Remove any previous vector |     // Remove any previous vector | ||||||
| @@ -56,14 +54,12 @@ enum VectorStateDelta { | |||||||
| } | } | ||||||
|  |  | ||||||
| impl VectorStateDelta { | impl VectorStateDelta { | ||||||
|     fn into_values(self) -> (bool, String, (Vec<Vec<f32>>, Vec<Vec<f32>>)) { |     fn into_values(self) -> (bool, String, Vec<Vec<f32>>) { | ||||||
|         match self { |         match self { | ||||||
|             VectorStateDelta::NoChange => Default::default(), |             VectorStateDelta::NoChange => Default::default(), | ||||||
|             VectorStateDelta::NowRemoved => (true, Default::default(), Default::default()), |             VectorStateDelta::NowRemoved => (true, Default::default(), Default::default()), | ||||||
|             VectorStateDelta::WasGeneratedNowManual(add) => { |             // We always delete the previous vectors | ||||||
|                 (true, Default::default(), (Default::default(), add)) |             VectorStateDelta::NowManual(add) => (true, Default::default(), add), | ||||||
|             } |  | ||||||
|             VectorStateDelta::ManualDelta(del, add) => (false, Default::default(), (del, add)), |  | ||||||
|             VectorStateDelta::NowGenerated(prompt) => (true, prompt, Default::default()), |             VectorStateDelta::NowGenerated(prompt) => (true, prompt, Default::default()), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -74,12 +70,27 @@ struct EmbedderVectorExtractor { | |||||||
|     embedder: Arc<Embedder>, |     embedder: Arc<Embedder>, | ||||||
|     prompt: Arc<Prompt>, |     prompt: Arc<Prompt>, | ||||||
|  |  | ||||||
|     // (docid, _index) -> KvWriterDelAdd -> Vector |  | ||||||
|     manual_vectors_writer: Writer<BufWriter<File>>, |  | ||||||
|     // (docid) -> (prompt) |     // (docid) -> (prompt) | ||||||
|     prompts_writer: Writer<BufWriter<File>>, |     prompts_writer: Writer<BufWriter<File>>, | ||||||
|     // (docid) -> () |     // (docid) -> () | ||||||
|     remove_vectors_writer: Writer<BufWriter<File>>, |     remove_vectors_writer: Writer<BufWriter<File>>, | ||||||
|  |     // (docid, _index) -> KvWriterDelAdd -> Vector | ||||||
|  |     manual_vectors_writer: Writer<BufWriter<File>>, | ||||||
|  |     // The docids of the documents that contains a user defined embedding | ||||||
|  |     add_to_user_provided: RoaringBitmap, | ||||||
|  |  | ||||||
|  |     action: ExtractionAction, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | struct DocumentOperation { | ||||||
|  |     // The docids of the documents that contains an auto-generated embedding | ||||||
|  |     remove_from_user_provided: RoaringBitmap, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | enum ExtractionAction { | ||||||
|  |     SettingsFullReindex, | ||||||
|  |     SettingsRegeneratePrompts { old_prompt: Arc<Prompt> }, | ||||||
|  |     DocumentOperation(DocumentOperation), | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Extracts the embedding vector contained in each document under the `_vectors` field. | /// Extracts the embedding vector contained in each document under the `_vectors` field. | ||||||
| @@ -89,6 +100,7 @@ struct EmbedderVectorExtractor { | |||||||
| pub fn extract_vector_points<R: io::Read + io::Seek>( | pub fn extract_vector_points<R: io::Read + io::Seek>( | ||||||
|     obkv_documents: grenad::Reader<R>, |     obkv_documents: grenad::Reader<R>, | ||||||
|     indexer: GrenadParameters, |     indexer: GrenadParameters, | ||||||
|  |     embedders_configs: &[IndexEmbeddingConfig], | ||||||
|     settings_diff: &InnerIndexSettingsDiff, |     settings_diff: &InnerIndexSettingsDiff, | ||||||
| ) -> Result<Vec<ExtractedVectorPoints>> { | ) -> Result<Vec<ExtractedVectorPoints>> { | ||||||
|     let reindex_vectors = settings_diff.reindex_vectors(); |     let reindex_vectors = settings_diff.reindex_vectors(); | ||||||
| @@ -97,15 +109,75 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | |||||||
|     let new_fields_ids_map = &settings_diff.new.fields_ids_map; |     let new_fields_ids_map = &settings_diff.new.fields_ids_map; | ||||||
|     // the vector field id may have changed |     // the vector field id may have changed | ||||||
|     let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME); |     let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME); | ||||||
|     // filter the old vector fid if the settings has been changed forcing reindexing. |  | ||||||
|     let old_vectors_fid = old_vectors_fid.filter(|_| !reindex_vectors); |  | ||||||
|  |  | ||||||
|     let new_vectors_fid = new_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME); |     let new_vectors_fid = new_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME); | ||||||
|  |  | ||||||
|     let mut extractors = Vec::new(); |     let mut extractors = Vec::new(); | ||||||
|     for (embedder_name, (embedder, prompt)) in |  | ||||||
|         settings_diff.new.embedding_configs.clone().into_iter() |     let mut configs = settings_diff.new.embedding_configs.clone().into_inner(); | ||||||
|     { |     let old_configs = &settings_diff.old.embedding_configs; | ||||||
|  |  | ||||||
|  |     if reindex_vectors { | ||||||
|  |         for (name, action) in settings_diff.embedding_config_updates.iter() { | ||||||
|  |             match action { | ||||||
|  |                 EmbedderAction::WriteBackToDocuments(_) => continue, // already deleted | ||||||
|  |                 EmbedderAction::Reindex(action) => { | ||||||
|  |                     let Some((embedder_name, (embedder, prompt))) = configs.remove_entry(name) | ||||||
|  |                     else { | ||||||
|  |                         tracing::error!(embedder = name, "Requested embedder config not found"); | ||||||
|  |                         continue; | ||||||
|  |                     }; | ||||||
|  |  | ||||||
|  |                     // (docid, _index) -> KvWriterDelAdd -> Vector | ||||||
|  |                     let manual_vectors_writer = create_writer( | ||||||
|  |                         indexer.chunk_compression_type, | ||||||
|  |                         indexer.chunk_compression_level, | ||||||
|  |                         tempfile::tempfile()?, | ||||||
|  |                     ); | ||||||
|  |  | ||||||
|  |                     // (docid) -> (prompt) | ||||||
|  |                     let prompts_writer = create_writer( | ||||||
|  |                         indexer.chunk_compression_type, | ||||||
|  |                         indexer.chunk_compression_level, | ||||||
|  |                         tempfile::tempfile()?, | ||||||
|  |                     ); | ||||||
|  |  | ||||||
|  |                     // (docid) -> () | ||||||
|  |                     let remove_vectors_writer = create_writer( | ||||||
|  |                         indexer.chunk_compression_type, | ||||||
|  |                         indexer.chunk_compression_level, | ||||||
|  |                         tempfile::tempfile()?, | ||||||
|  |                     ); | ||||||
|  |  | ||||||
|  |                     let action = match action { | ||||||
|  |                         ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex, | ||||||
|  |                         ReindexAction::RegeneratePrompts => { | ||||||
|  |                             let Some((_, old_prompt)) = old_configs.get(name) else { | ||||||
|  |                                 tracing::error!(embedder = name, "Old embedder config not found"); | ||||||
|  |                                 continue; | ||||||
|  |                             }; | ||||||
|  |  | ||||||
|  |                             ExtractionAction::SettingsRegeneratePrompts { old_prompt } | ||||||
|  |                         } | ||||||
|  |                     }; | ||||||
|  |  | ||||||
|  |                     extractors.push(EmbedderVectorExtractor { | ||||||
|  |                         embedder_name, | ||||||
|  |                         embedder, | ||||||
|  |                         prompt, | ||||||
|  |                         prompts_writer, | ||||||
|  |                         remove_vectors_writer, | ||||||
|  |                         manual_vectors_writer, | ||||||
|  |                         add_to_user_provided: RoaringBitmap::new(), | ||||||
|  |                         action, | ||||||
|  |                     }); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } else { | ||||||
|  |         // document operation | ||||||
|  |  | ||||||
|  |         for (embedder_name, (embedder, prompt)) in configs.into_iter() { | ||||||
|             // (docid, _index) -> KvWriterDelAdd -> Vector |             // (docid, _index) -> KvWriterDelAdd -> Vector | ||||||
|             let manual_vectors_writer = create_writer( |             let manual_vectors_writer = create_writer( | ||||||
|                 indexer.chunk_compression_type, |                 indexer.chunk_compression_type, | ||||||
| @@ -131,75 +203,185 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | |||||||
|                 embedder_name, |                 embedder_name, | ||||||
|                 embedder, |                 embedder, | ||||||
|                 prompt, |                 prompt, | ||||||
|             manual_vectors_writer, |  | ||||||
|                 prompts_writer, |                 prompts_writer, | ||||||
|                 remove_vectors_writer, |                 remove_vectors_writer, | ||||||
|  |                 manual_vectors_writer, | ||||||
|  |                 add_to_user_provided: RoaringBitmap::new(), | ||||||
|  |                 action: ExtractionAction::DocumentOperation(DocumentOperation { | ||||||
|  |                     remove_from_user_provided: RoaringBitmap::new(), | ||||||
|  |                 }), | ||||||
|             }); |             }); | ||||||
|         } |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     let mut key_buffer = Vec::new(); |     let mut key_buffer = Vec::new(); | ||||||
|     let mut cursor = obkv_documents.into_cursor()?; |     let mut cursor = obkv_documents.into_cursor()?; | ||||||
|     while let Some((key, value)) = cursor.move_on_next()? { |     while let Some((key, value)) = cursor.move_on_next()? { | ||||||
|         // this must always be serialized as (docid, external_docid); |         // this must always be serialized as (docid, external_docid); | ||||||
|  |         const SIZE_OF_DOCUMENTID: usize = std::mem::size_of::<DocumentId>(); | ||||||
|         let (docid_bytes, external_id_bytes) = |         let (docid_bytes, external_id_bytes) = | ||||||
|             try_split_at(key, std::mem::size_of::<DocumentId>()).unwrap(); |             try_split_array_at::<u8, SIZE_OF_DOCUMENTID>(key).unwrap(); | ||||||
|         debug_assert!(from_utf8(external_id_bytes).is_ok()); |         debug_assert!(from_utf8(external_id_bytes).is_ok()); | ||||||
|  |         let docid = DocumentId::from_be_bytes(docid_bytes); | ||||||
|  |  | ||||||
|         let obkv = obkv::KvReader::new(value); |         let obkv = obkv::KvReader::new(value); | ||||||
|         key_buffer.clear(); |         key_buffer.clear(); | ||||||
|         key_buffer.extend_from_slice(docid_bytes); |         key_buffer.extend_from_slice(docid_bytes.as_slice()); | ||||||
|  |  | ||||||
|         // since we only need the primary key when we throw an error we create this getter to |         // since we only need the primary key when we throw an error we create this getter to | ||||||
|         // lazily get it when needed |         // lazily get it when needed | ||||||
|         let document_id = || -> Value { from_utf8(external_id_bytes).unwrap().into() }; |         let document_id = || -> Value { from_utf8(external_id_bytes).unwrap().into() }; | ||||||
|  |  | ||||||
|         let mut parsed_vectors = ParsedVectorsDiff::new(obkv, old_vectors_fid, new_vectors_fid) |         let mut parsed_vectors = ParsedVectorsDiff::new( | ||||||
|  |             docid, | ||||||
|  |             embedders_configs, | ||||||
|  |             obkv, | ||||||
|  |             old_vectors_fid, | ||||||
|  |             new_vectors_fid, | ||||||
|  |         ) | ||||||
|         .map_err(|error| error.to_crate_error(document_id().to_string()))?; |         .map_err(|error| error.to_crate_error(document_id().to_string()))?; | ||||||
|  |  | ||||||
|         for EmbedderVectorExtractor { |         for EmbedderVectorExtractor { | ||||||
|             embedder_name, |             embedder_name, | ||||||
|             embedder: _, |             embedder: _, | ||||||
|             prompt, |             prompt, | ||||||
|             manual_vectors_writer, |  | ||||||
|             prompts_writer, |             prompts_writer, | ||||||
|             remove_vectors_writer, |             remove_vectors_writer, | ||||||
|  |             manual_vectors_writer, | ||||||
|  |             add_to_user_provided, | ||||||
|  |             action, | ||||||
|         } in extractors.iter_mut() |         } in extractors.iter_mut() | ||||||
|         { |         { | ||||||
|             let delta = match parsed_vectors.remove(embedder_name) { |             let (old, new) = parsed_vectors.remove(embedder_name); | ||||||
|                 (Some(old), Some(new)) => { |             let delta = match action { | ||||||
|                     // no autogeneration |                 ExtractionAction::SettingsFullReindex => match old { | ||||||
|                     let del_vectors = old.into_array_of_vectors(); |                     // A full reindex can be triggered either by: | ||||||
|                     let add_vectors = new.into_array_of_vectors(); |                     // 1. a new embedder | ||||||
|  |                     // 2. an existing embedder changed so that it must regenerate all generated embeddings. | ||||||
|  |                     // For a new embedder, there can be `_vectors.embedder` embeddings to add to the DB | ||||||
|  |                     VectorState::Inline(vectors) => { | ||||||
|  |                         if !vectors.must_regenerate() { | ||||||
|  |                             add_to_user_provided.insert(docid); | ||||||
|  |                         } | ||||||
|  |  | ||||||
|  |                         match vectors.into_array_of_vectors() { | ||||||
|  |                             Some(add_vectors) => { | ||||||
|                                 if add_vectors.len() > usize::from(u8::MAX) { |                                 if add_vectors.len() > usize::from(u8::MAX) { | ||||||
|                         return Err(crate::Error::UserError(crate::UserError::TooManyVectors( |                                     return Err(crate::Error::UserError( | ||||||
|  |                                         crate::UserError::TooManyVectors( | ||||||
|                                             document_id().to_string(), |                                             document_id().to_string(), | ||||||
|                                             add_vectors.len(), |                                             add_vectors.len(), | ||||||
|                         ))); |                                         ), | ||||||
|  |                                     )); | ||||||
|                                 } |                                 } | ||||||
|  |                                 VectorStateDelta::NowManual(add_vectors) | ||||||
|                     VectorStateDelta::ManualDelta(del_vectors, add_vectors) |  | ||||||
|                             } |                             } | ||||||
|                 (Some(_old), None) => { |                             None => VectorStateDelta::NoChange, | ||||||
|                     // Do we keep this document? |                         } | ||||||
|                     let document_is_kept = obkv |                     } | ||||||
|                         .iter() |                     // this happens only when an existing embedder changed. We cannot regenerate userProvided vectors | ||||||
|                         .map(|(_, deladd)| KvReaderDelAdd::new(deladd)) |                     VectorState::Manual => VectorStateDelta::NoChange, | ||||||
|                         .any(|deladd| deladd.get(DelAdd::Addition).is_some()); |                     // generated vectors must be regenerated | ||||||
|                     if document_is_kept { |                     VectorState::Generated => regenerate_prompt(obkv, prompt, new_fields_ids_map)?, | ||||||
|                         // becomes autogenerated |                 }, | ||||||
|                         VectorStateDelta::NowGenerated(prompt.render( |                 // prompt regeneration is only triggered for existing embedders | ||||||
|  |                 ExtractionAction::SettingsRegeneratePrompts { old_prompt } => { | ||||||
|  |                     if old.must_regenerate() { | ||||||
|  |                         regenerate_if_prompt_changed( | ||||||
|                             obkv, |                             obkv, | ||||||
|                             DelAdd::Addition, |                             (old_prompt, prompt), | ||||||
|                             new_fields_ids_map, |                             (old_fields_ids_map, new_fields_ids_map), | ||||||
|                         )?) |                         )? | ||||||
|                     } else { |                     } else { | ||||||
|                         VectorStateDelta::NowRemoved |                         // we can simply ignore user provided vectors as they are not regenerated and are | ||||||
|  |                         // already in the DB since this is an existing embedder | ||||||
|  |                         VectorStateDelta::NoChange | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|                 (None, Some(new)) => { |                 ExtractionAction::DocumentOperation(DocumentOperation { | ||||||
|                     // was possibly autogenerated, remove all vectors for that document |                     remove_from_user_provided, | ||||||
|                     let add_vectors = new.into_array_of_vectors(); |                 }) => extract_vector_document_diff( | ||||||
|  |                     docid, | ||||||
|  |                     obkv, | ||||||
|  |                     prompt, | ||||||
|  |                     (add_to_user_provided, remove_from_user_provided), | ||||||
|  |                     (old, new), | ||||||
|  |                     (old_fields_ids_map, new_fields_ids_map), | ||||||
|  |                     document_id, | ||||||
|  |                 )?, | ||||||
|  |             }; | ||||||
|  |             // and we finally push the unique vectors into the writer | ||||||
|  |             push_vectors_diff( | ||||||
|  |                 remove_vectors_writer, | ||||||
|  |                 prompts_writer, | ||||||
|  |                 manual_vectors_writer, | ||||||
|  |                 &mut key_buffer, | ||||||
|  |                 delta, | ||||||
|  |             )?; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     let mut results = Vec::new(); | ||||||
|  |  | ||||||
|  |     for EmbedderVectorExtractor { | ||||||
|  |         embedder_name, | ||||||
|  |         embedder, | ||||||
|  |         prompt: _, | ||||||
|  |         prompts_writer, | ||||||
|  |         remove_vectors_writer, | ||||||
|  |         action, | ||||||
|  |         manual_vectors_writer, | ||||||
|  |         add_to_user_provided, | ||||||
|  |     } in extractors | ||||||
|  |     { | ||||||
|  |         let remove_from_user_provided = | ||||||
|  |             if let ExtractionAction::DocumentOperation(DocumentOperation { | ||||||
|  |                 remove_from_user_provided, | ||||||
|  |             }) = action | ||||||
|  |             { | ||||||
|  |                 remove_from_user_provided | ||||||
|  |             } else { | ||||||
|  |                 Default::default() | ||||||
|  |             }; | ||||||
|  |  | ||||||
|  |         results.push(ExtractedVectorPoints { | ||||||
|  |             manual_vectors: writer_into_reader(manual_vectors_writer)?, | ||||||
|  |             remove_vectors: writer_into_reader(remove_vectors_writer)?, | ||||||
|  |             prompts: writer_into_reader(prompts_writer)?, | ||||||
|  |             embedder, | ||||||
|  |             embedder_name, | ||||||
|  |             add_to_user_provided, | ||||||
|  |             remove_from_user_provided, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     Ok(results) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn extract_vector_document_diff( | ||||||
|  |     docid: DocumentId, | ||||||
|  |     obkv: obkv::KvReader<'_, FieldId>, | ||||||
|  |     prompt: &Prompt, | ||||||
|  |     (add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap), | ||||||
|  |     (old, new): (VectorState, VectorState), | ||||||
|  |     (old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap), | ||||||
|  |     document_id: impl Fn() -> Value, | ||||||
|  | ) -> Result<VectorStateDelta> { | ||||||
|  |     match (old.must_regenerate(), new.must_regenerate()) { | ||||||
|  |         (true, true) | (false, false) => {} | ||||||
|  |         (true, false) => { | ||||||
|  |             add_to_user_provided.insert(docid); | ||||||
|  |         } | ||||||
|  |         (false, true) => { | ||||||
|  |             remove_from_user_provided.insert(docid); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     let delta = match (old, new) { | ||||||
|  |         // regardless of the previous state, if a document now contains inline _vectors, they must | ||||||
|  |         // be extracted manually | ||||||
|  |         (_old, VectorState::Inline(new)) => match new.into_array_of_vectors() { | ||||||
|  |             Some(add_vectors) => { | ||||||
|                 if add_vectors.len() > usize::from(u8::MAX) { |                 if add_vectors.len() > usize::from(u8::MAX) { | ||||||
|                     return Err(crate::Error::UserError(crate::UserError::TooManyVectors( |                     return Err(crate::Error::UserError(crate::UserError::TooManyVectors( | ||||||
|                         document_id().to_string(), |                         document_id().to_string(), | ||||||
| @@ -207,9 +389,13 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | |||||||
|                     ))); |                     ))); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                     VectorStateDelta::WasGeneratedNowManual(add_vectors) |                 VectorStateDelta::NowManual(add_vectors) | ||||||
|             } |             } | ||||||
|                 (None, None) => { |             None => VectorStateDelta::NoChange, | ||||||
|  |         }, | ||||||
|  |         // no `_vectors` anywhere, we check for document removal and otherwise we regenerate the prompt if the | ||||||
|  |         // document changed | ||||||
|  |         (VectorState::Generated, VectorState::Generated) => { | ||||||
|             // Do we keep this document? |             // Do we keep this document? | ||||||
|             let document_is_kept = obkv |             let document_is_kept = obkv | ||||||
|                 .iter() |                 .iter() | ||||||
| @@ -218,16 +404,10 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | |||||||
|  |  | ||||||
|             if document_is_kept { |             if document_is_kept { | ||||||
|                 // Don't give up if the old prompt was failing |                 // Don't give up if the old prompt was failing | ||||||
|                         let old_prompt = Some(&prompt) |                 let old_prompt = Some(&prompt).map(|p| { | ||||||
|                             // TODO: this filter works because we erase the vec database when a embedding setting changes. |                     p.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or_default() | ||||||
|                             // When vector pipeline will be optimized, this should be removed. |  | ||||||
|                             .filter(|_| !settings_diff.reindex_vectors()) |  | ||||||
|                             .map(|p| { |  | ||||||
|                                 p.render(obkv, DelAdd::Deletion, old_fields_ids_map) |  | ||||||
|                                     .unwrap_or_default() |  | ||||||
|                 }); |                 }); | ||||||
|                         let new_prompt = |                 let new_prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?; | ||||||
|                             prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?; |  | ||||||
|                 if old_prompt.as_ref() != Some(&new_prompt) { |                 if old_prompt.as_ref() != Some(&new_prompt) { | ||||||
|                     let old_prompt = old_prompt.unwrap_or_default(); |                     let old_prompt = old_prompt.unwrap_or_default(); | ||||||
|                     tracing::trace!( |                     tracing::trace!( | ||||||
| @@ -242,63 +422,90 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | |||||||
|                 VectorStateDelta::NowRemoved |                 VectorStateDelta::NowRemoved | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |         // inline to the left is not supposed to be possible because the embedder is not new, so `_vectors` was removed from | ||||||
|  |         // the previous version of the document. | ||||||
|  |         // Manual -> Generated is also not possible without an Inline to the right (which is handled above) | ||||||
|  |         // Generated -> Generated is handled above, so not possible | ||||||
|  |         // As a result, this code is unreachable | ||||||
|  |         (_not_generated, VectorState::Generated) => { | ||||||
|  |             // Do we keep this document? | ||||||
|  |             let document_is_kept = obkv | ||||||
|  |                 .iter() | ||||||
|  |                 .map(|(_, deladd)| KvReaderDelAdd::new(deladd)) | ||||||
|  |                 .any(|deladd| deladd.get(DelAdd::Addition).is_some()); | ||||||
|  |             if document_is_kept { | ||||||
|  |                 // becomes autogenerated | ||||||
|  |                 VectorStateDelta::NowGenerated(prompt.render( | ||||||
|  |                     obkv, | ||||||
|  |                     DelAdd::Addition, | ||||||
|  |                     new_fields_ids_map, | ||||||
|  |                 )?) | ||||||
|  |             } else { | ||||||
|  |                 // make sure the document is always removed from user provided on removal | ||||||
|  |                 remove_from_user_provided.insert(docid); | ||||||
|  |                 VectorStateDelta::NowRemoved | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         // inline to the left is not possible because the embedder is not new, and so `_vectors` was removed from the previous | ||||||
|  |         // version of the document. | ||||||
|  |         // however the Rust type system cannot know that. | ||||||
|  |         (_manual, VectorState::Manual) => { | ||||||
|  |             // Do we keep this document? | ||||||
|  |             let document_is_kept = obkv | ||||||
|  |                 .iter() | ||||||
|  |                 .map(|(_, deladd)| KvReaderDelAdd::new(deladd)) | ||||||
|  |                 .any(|deladd| deladd.get(DelAdd::Addition).is_some()); | ||||||
|  |             if document_is_kept { | ||||||
|  |                 // if the new version of documents has the vectors in the DB, | ||||||
|  |                 // then they are user-provided and nothing possibly changed | ||||||
|  |                 VectorStateDelta::NoChange | ||||||
|  |             } else { | ||||||
|  |                 // make sure the document is always removed from user provided on removal | ||||||
|  |                 remove_from_user_provided.insert(docid); | ||||||
|  |                 VectorStateDelta::NowRemoved | ||||||
|  |             } | ||||||
|  |         } | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|             // and we finally push the unique vectors into the writer |     Ok(delta) | ||||||
|             push_vectors_diff( |  | ||||||
|                 remove_vectors_writer, |  | ||||||
|                 prompts_writer, |  | ||||||
|                 manual_vectors_writer, |  | ||||||
|                 &mut key_buffer, |  | ||||||
|                 delta, |  | ||||||
|                 reindex_vectors, |  | ||||||
|             )?; |  | ||||||
|         } |  | ||||||
| } | } | ||||||
|  |  | ||||||
|     let mut results = Vec::new(); | fn regenerate_if_prompt_changed( | ||||||
|  |     obkv: obkv::KvReader<'_, FieldId>, | ||||||
|  |     (old_prompt, new_prompt): (&Prompt, &Prompt), | ||||||
|  |     (old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap), | ||||||
|  | ) -> Result<VectorStateDelta> { | ||||||
|  |     let old_prompt = | ||||||
|  |         old_prompt.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or(Default::default()); | ||||||
|  |     let new_prompt = new_prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?; | ||||||
|  |  | ||||||
|     for EmbedderVectorExtractor { |     if new_prompt == old_prompt { | ||||||
|         embedder_name, |         return Ok(VectorStateDelta::NoChange); | ||||||
|         embedder, |     } | ||||||
|         prompt: _, |     Ok(VectorStateDelta::NowGenerated(new_prompt)) | ||||||
|         manual_vectors_writer, |  | ||||||
|         prompts_writer, |  | ||||||
|         remove_vectors_writer, |  | ||||||
|     } in extractors |  | ||||||
|     { |  | ||||||
|         results.push(ExtractedVectorPoints { |  | ||||||
|             // docid, _index -> KvWriterDelAdd -> Vector |  | ||||||
|             manual_vectors: writer_into_reader(manual_vectors_writer)?, |  | ||||||
|             // docid -> () |  | ||||||
|             remove_vectors: writer_into_reader(remove_vectors_writer)?, |  | ||||||
|             // docid -> prompt |  | ||||||
|             prompts: writer_into_reader(prompts_writer)?, |  | ||||||
|  |  | ||||||
|             embedder, |  | ||||||
|             embedder_name, |  | ||||||
|         }) |  | ||||||
| } | } | ||||||
|  |  | ||||||
|     Ok(results) | fn regenerate_prompt( | ||||||
|  |     obkv: obkv::KvReader<'_, FieldId>, | ||||||
|  |     prompt: &Prompt, | ||||||
|  |     new_fields_ids_map: &FieldsIdsMap, | ||||||
|  | ) -> Result<VectorStateDelta> { | ||||||
|  |     let prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?; | ||||||
|  |  | ||||||
|  |     Ok(VectorStateDelta::NowGenerated(prompt)) | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Computes the diff between both Del and Add numbers and | /// We cannot compute the diff between both Del and Add vectors. | ||||||
| /// only inserts the parts that differ in the sorter. | /// We'll push every vector and compute the difference later in TypedChunk. | ||||||
| fn push_vectors_diff( | fn push_vectors_diff( | ||||||
|     remove_vectors_writer: &mut Writer<BufWriter<File>>, |     remove_vectors_writer: &mut Writer<BufWriter<File>>, | ||||||
|     prompts_writer: &mut Writer<BufWriter<File>>, |     prompts_writer: &mut Writer<BufWriter<File>>, | ||||||
|     manual_vectors_writer: &mut Writer<BufWriter<File>>, |     manual_vectors_writer: &mut Writer<BufWriter<File>>, | ||||||
|     key_buffer: &mut Vec<u8>, |     key_buffer: &mut Vec<u8>, | ||||||
|     delta: VectorStateDelta, |     delta: VectorStateDelta, | ||||||
|     reindex_vectors: bool, |  | ||||||
| ) -> Result<()> { | ) -> Result<()> { | ||||||
|     let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values(); |     let (must_remove, prompt, mut add_vectors) = delta.into_values(); | ||||||
|     if must_remove |     if must_remove { | ||||||
|     // TODO: the below condition works because we erase the vec database when a embedding setting changes. |  | ||||||
|     // When vector pipeline will be optimized, this should be removed. |  | ||||||
|     && !reindex_vectors |  | ||||||
|     { |  | ||||||
|         key_buffer.truncate(TRUNCATE_SIZE); |         key_buffer.truncate(TRUNCATE_SIZE); | ||||||
|         remove_vectors_writer.insert(&key_buffer, [])?; |         remove_vectors_writer.insert(&key_buffer, [])?; | ||||||
|     } |     } | ||||||
| @@ -308,36 +515,16 @@ fn push_vectors_diff( | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     // We sort and dedup the vectors |     // We sort and dedup the vectors | ||||||
|     del_vectors.sort_unstable_by(|a, b| compare_vectors(a, b)); |  | ||||||
|     add_vectors.sort_unstable_by(|a, b| compare_vectors(a, b)); |     add_vectors.sort_unstable_by(|a, b| compare_vectors(a, b)); | ||||||
|     del_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq()); |  | ||||||
|     add_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq()); |     add_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq()); | ||||||
|  |  | ||||||
|     let merged_vectors_iter = |  | ||||||
|         itertools::merge_join_by(del_vectors, add_vectors, |del, add| compare_vectors(del, add)); |  | ||||||
|  |  | ||||||
|     // insert vectors into the writer |     // insert vectors into the writer | ||||||
|     for (i, eob) in merged_vectors_iter.into_iter().enumerate().take(u16::MAX as usize) { |     for (i, vector) in add_vectors.into_iter().enumerate().take(u16::MAX as usize) { | ||||||
|         // Generate the key by extending the unique index to it. |         // Generate the key by extending the unique index to it. | ||||||
|         key_buffer.truncate(TRUNCATE_SIZE); |         key_buffer.truncate(TRUNCATE_SIZE); | ||||||
|         let index = u16::try_from(i).unwrap(); |         let index = u16::try_from(i).unwrap(); | ||||||
|         key_buffer.extend_from_slice(&index.to_be_bytes()); |         key_buffer.extend_from_slice(&index.to_be_bytes()); | ||||||
|  |  | ||||||
|         match eob { |  | ||||||
|             EitherOrBoth::Both(_, _) => (), // no need to touch anything |  | ||||||
|             EitherOrBoth::Left(vector) => { |  | ||||||
|                 // TODO: the below condition works because we erase the vec database when a embedding setting changes. |  | ||||||
|                 // When vector pipeline will be optimized, this should be removed. |  | ||||||
|                 if !reindex_vectors { |  | ||||||
|                     // We insert only the Del part of the Obkv to inform |  | ||||||
|                     // that we only want to remove all those vectors. |  | ||||||
|                     let mut obkv = KvWriterDelAdd::memory(); |  | ||||||
|                     obkv.insert(DelAdd::Deletion, cast_slice(&vector))?; |  | ||||||
|                     let bytes = obkv.into_inner()?; |  | ||||||
|                     manual_vectors_writer.insert(&key_buffer, bytes)?; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|             EitherOrBoth::Right(vector) => { |  | ||||||
|         // We insert only the Add part of the Obkv to inform |         // We insert only the Add part of the Obkv to inform | ||||||
|         // that we only want to remove all those vectors. |         // that we only want to remove all those vectors. | ||||||
|         let mut obkv = KvWriterDelAdd::memory(); |         let mut obkv = KvWriterDelAdd::memory(); | ||||||
| @@ -345,8 +532,6 @@ fn push_vectors_diff( | |||||||
|         let bytes = obkv.into_inner()?; |         let bytes = obkv.into_inner()?; | ||||||
|         manual_vectors_writer.insert(&key_buffer, bytes)?; |         manual_vectors_writer.insert(&key_buffer, bytes)?; | ||||||
|     } |     } | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     Ok(()) |     Ok(()) | ||||||
| } | } | ||||||
|   | |||||||
| @@ -11,7 +11,7 @@ mod extract_word_position_docids; | |||||||
|  |  | ||||||
| use std::fs::File; | use std::fs::File; | ||||||
| use std::io::BufReader; | use std::io::BufReader; | ||||||
| use std::sync::Arc; | use std::sync::{Arc, OnceLock}; | ||||||
|  |  | ||||||
| use crossbeam_channel::Sender; | use crossbeam_channel::Sender; | ||||||
| use rayon::prelude::*; | use rayon::prelude::*; | ||||||
| @@ -30,8 +30,9 @@ use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids | |||||||
| use self::extract_word_position_docids::extract_word_position_docids; | use self::extract_word_position_docids::extract_word_position_docids; | ||||||
| use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters}; | use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters}; | ||||||
| use super::{helpers, TypedChunk}; | use super::{helpers, TypedChunk}; | ||||||
|  | use crate::index::IndexEmbeddingConfig; | ||||||
| use crate::update::settings::InnerIndexSettingsDiff; | use crate::update::settings::InnerIndexSettingsDiff; | ||||||
| use crate::{FieldId, Result, ThreadPoolNoAbortBuilder}; | use crate::{FieldId, Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder}; | ||||||
|  |  | ||||||
| /// Extract data for each databases from obkv documents in parallel. | /// Extract data for each databases from obkv documents in parallel. | ||||||
| /// Send data in grenad file over provided Sender. | /// Send data in grenad file over provided Sender. | ||||||
| @@ -43,6 +44,7 @@ pub(crate) fn data_from_obkv_documents( | |||||||
|     indexer: GrenadParameters, |     indexer: GrenadParameters, | ||||||
|     lmdb_writer_sx: Sender<Result<TypedChunk>>, |     lmdb_writer_sx: Sender<Result<TypedChunk>>, | ||||||
|     primary_key_id: FieldId, |     primary_key_id: FieldId, | ||||||
|  |     embedders_configs: Arc<Vec<IndexEmbeddingConfig>>, | ||||||
|     settings_diff: Arc<InnerIndexSettingsDiff>, |     settings_diff: Arc<InnerIndexSettingsDiff>, | ||||||
|     max_positions_per_attributes: Option<u32>, |     max_positions_per_attributes: Option<u32>, | ||||||
| ) -> Result<()> { | ) -> Result<()> { | ||||||
| @@ -55,6 +57,7 @@ pub(crate) fn data_from_obkv_documents( | |||||||
|                         original_documents_chunk, |                         original_documents_chunk, | ||||||
|                         indexer, |                         indexer, | ||||||
|                         lmdb_writer_sx.clone(), |                         lmdb_writer_sx.clone(), | ||||||
|  |                         embedders_configs.clone(), | ||||||
|                         settings_diff.clone(), |                         settings_diff.clone(), | ||||||
|                     ) |                     ) | ||||||
|                 }) |                 }) | ||||||
| @@ -204,33 +207,47 @@ fn run_extraction_task<FE, FS, M>( | |||||||
|     }) |     }) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | fn request_threads() -> &'static ThreadPoolNoAbort { | ||||||
|  |     static REQUEST_THREADS: OnceLock<ThreadPoolNoAbort> = OnceLock::new(); | ||||||
|  |  | ||||||
|  |     REQUEST_THREADS.get_or_init(|| { | ||||||
|  |         ThreadPoolNoAbortBuilder::new() | ||||||
|  |             .num_threads(crate::vector::REQUEST_PARALLELISM) | ||||||
|  |             .thread_name(|index| format!("embedding-request-{index}")) | ||||||
|  |             .build() | ||||||
|  |             .unwrap() | ||||||
|  |     }) | ||||||
|  | } | ||||||
|  |  | ||||||
| /// Extract chunked data and send it into lmdb_writer_sx sender: | /// Extract chunked data and send it into lmdb_writer_sx sender: | ||||||
| /// - documents | /// - documents | ||||||
| fn send_original_documents_data( | fn send_original_documents_data( | ||||||
|     original_documents_chunk: Result<grenad::Reader<BufReader<File>>>, |     original_documents_chunk: Result<grenad::Reader<BufReader<File>>>, | ||||||
|     indexer: GrenadParameters, |     indexer: GrenadParameters, | ||||||
|     lmdb_writer_sx: Sender<Result<TypedChunk>>, |     lmdb_writer_sx: Sender<Result<TypedChunk>>, | ||||||
|  |     embedders_configs: Arc<Vec<IndexEmbeddingConfig>>, | ||||||
|     settings_diff: Arc<InnerIndexSettingsDiff>, |     settings_diff: Arc<InnerIndexSettingsDiff>, | ||||||
| ) -> Result<()> { | ) -> Result<()> { | ||||||
|     let original_documents_chunk = |     let original_documents_chunk = | ||||||
|         original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?; |         original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?; | ||||||
|  |  | ||||||
|     let request_threads = ThreadPoolNoAbortBuilder::new() |  | ||||||
|         .num_threads(crate::vector::REQUEST_PARALLELISM) |  | ||||||
|         .thread_name(|index| format!("embedding-request-{index}")) |  | ||||||
|         .build()?; |  | ||||||
|  |  | ||||||
|     let index_vectors = (settings_diff.reindex_vectors() || !settings_diff.settings_update_only()) |     let index_vectors = (settings_diff.reindex_vectors() || !settings_diff.settings_update_only()) | ||||||
|         // no point in indexing vectors without embedders |         // no point in indexing vectors without embedders | ||||||
|         && (!settings_diff.new.embedding_configs.inner_as_ref().is_empty()); |         && (!settings_diff.new.embedding_configs.inner_as_ref().is_empty()); | ||||||
|  |  | ||||||
|     if index_vectors { |     if index_vectors { | ||||||
|         let settings_diff = settings_diff.clone(); |         let settings_diff = settings_diff.clone(); | ||||||
|  |         let embedders_configs = embedders_configs.clone(); | ||||||
|  |  | ||||||
|         let original_documents_chunk = original_documents_chunk.clone(); |         let original_documents_chunk = original_documents_chunk.clone(); | ||||||
|         let lmdb_writer_sx = lmdb_writer_sx.clone(); |         let lmdb_writer_sx = lmdb_writer_sx.clone(); | ||||||
|         rayon::spawn(move || { |         rayon::spawn(move || { | ||||||
|             match extract_vector_points(original_documents_chunk.clone(), indexer, &settings_diff) { |             match extract_vector_points( | ||||||
|  |                 original_documents_chunk.clone(), | ||||||
|  |                 indexer, | ||||||
|  |                 &embedders_configs, | ||||||
|  |                 &settings_diff, | ||||||
|  |             ) { | ||||||
|                 Ok(extracted_vectors) => { |                 Ok(extracted_vectors) => { | ||||||
|                     for ExtractedVectorPoints { |                     for ExtractedVectorPoints { | ||||||
|                         manual_vectors, |                         manual_vectors, | ||||||
| @@ -238,13 +255,15 @@ fn send_original_documents_data( | |||||||
|                         prompts, |                         prompts, | ||||||
|                         embedder_name, |                         embedder_name, | ||||||
|                         embedder, |                         embedder, | ||||||
|  |                         add_to_user_provided, | ||||||
|  |                         remove_from_user_provided, | ||||||
|                     } in extracted_vectors |                     } in extracted_vectors | ||||||
|                     { |                     { | ||||||
|                         let embeddings = match extract_embeddings( |                         let embeddings = match extract_embeddings( | ||||||
|                             prompts, |                             prompts, | ||||||
|                             indexer, |                             indexer, | ||||||
|                             embedder.clone(), |                             embedder.clone(), | ||||||
|                             &request_threads, |                             request_threads(), | ||||||
|                         ) { |                         ) { | ||||||
|                             Ok(results) => Some(results), |                             Ok(results) => Some(results), | ||||||
|                             Err(error) => { |                             Err(error) => { | ||||||
| @@ -262,6 +281,8 @@ fn send_original_documents_data( | |||||||
|                                 expected_dimension: embedder.dimensions(), |                                 expected_dimension: embedder.dimensions(), | ||||||
|                                 manual_vectors, |                                 manual_vectors, | ||||||
|                                 embedder_name, |                                 embedder_name, | ||||||
|  |                                 add_to_user_provided, | ||||||
|  |                                 remove_from_user_provided, | ||||||
|                             })); |                             })); | ||||||
|                         } |                         } | ||||||
|                     } |                     } | ||||||
|   | |||||||
| @@ -286,6 +286,7 @@ where | |||||||
|         settings_diff.new.recompute_searchables(self.wtxn, self.index)?; |         settings_diff.new.recompute_searchables(self.wtxn, self.index)?; | ||||||
|  |  | ||||||
|         let settings_diff = Arc::new(settings_diff); |         let settings_diff = Arc::new(settings_diff); | ||||||
|  |         let embedders_configs = Arc::new(self.index.embedding_configs(self.wtxn)?); | ||||||
|  |  | ||||||
|         let backup_pool; |         let backup_pool; | ||||||
|         let pool = match self.indexer_config.thread_pool { |         let pool = match self.indexer_config.thread_pool { | ||||||
| @@ -399,6 +400,7 @@ where | |||||||
|                         pool_params, |                         pool_params, | ||||||
|                         lmdb_writer_sx.clone(), |                         lmdb_writer_sx.clone(), | ||||||
|                         primary_key_id, |                         primary_key_id, | ||||||
|  |                         embedders_configs.clone(), | ||||||
|                         settings_diff_cloned, |                         settings_diff_cloned, | ||||||
|                         max_positions_per_attributes, |                         max_positions_per_attributes, | ||||||
|                     ) |                     ) | ||||||
| @@ -501,6 +503,8 @@ where | |||||||
|                                 embeddings, |                                 embeddings, | ||||||
|                                 manual_vectors, |                                 manual_vectors, | ||||||
|                                 embedder_name, |                                 embedder_name, | ||||||
|  |                                 add_to_user_provided, | ||||||
|  |                                 remove_from_user_provided, | ||||||
|                             } => { |                             } => { | ||||||
|                                 dimension.insert(embedder_name.clone(), expected_dimension); |                                 dimension.insert(embedder_name.clone(), expected_dimension); | ||||||
|                                 TypedChunk::VectorPoints { |                                 TypedChunk::VectorPoints { | ||||||
| @@ -509,6 +513,8 @@ where | |||||||
|                                     expected_dimension, |                                     expected_dimension, | ||||||
|                                     manual_vectors, |                                     manual_vectors, | ||||||
|                                     embedder_name, |                                     embedder_name, | ||||||
|  |                                     add_to_user_provided, | ||||||
|  |                                     remove_from_user_provided, | ||||||
|                                 } |                                 } | ||||||
|                             } |                             } | ||||||
|                             otherwise => otherwise, |                             otherwise => otherwise, | ||||||
| @@ -541,10 +547,11 @@ where | |||||||
|             pool.install(|| { |             pool.install(|| { | ||||||
|                 for k in crate::vector::arroy_db_range_for_embedder(embedder_index) { |                 for k in crate::vector::arroy_db_range_for_embedder(embedder_index) { | ||||||
|                     let writer = arroy::Writer::new(vector_arroy, k, dimension); |                     let writer = arroy::Writer::new(vector_arroy, k, dimension); | ||||||
|                     if writer.is_empty(wtxn)? { |                     if writer.need_build(wtxn)? { | ||||||
|  |                         writer.build(wtxn, &mut rng, None)?; | ||||||
|  |                     } else if writer.is_empty(wtxn)? { | ||||||
|                         break; |                         break; | ||||||
|                     } |                     } | ||||||
|                     writer.build(wtxn, &mut rng, None)?; |  | ||||||
|                 } |                 } | ||||||
|                 Result::Ok(()) |                 Result::Ok(()) | ||||||
|             }) |             }) | ||||||
| @@ -781,6 +788,7 @@ mod tests { | |||||||
|     use super::*; |     use super::*; | ||||||
|     use crate::documents::documents_batch_reader_from_objects; |     use crate::documents::documents_batch_reader_from_objects; | ||||||
|     use crate::index::tests::TempIndex; |     use crate::index::tests::TempIndex; | ||||||
|  |     use crate::index::IndexEmbeddingConfig; | ||||||
|     use crate::search::TermsMatchingStrategy; |     use crate::search::TermsMatchingStrategy; | ||||||
|     use crate::update::Setting; |     use crate::update::Setting; | ||||||
|     use crate::{db_snap, Filter, Search}; |     use crate::{db_snap, Filter, Search}; | ||||||
| @@ -2616,10 +2624,12 @@ mod tests { | |||||||
|  |  | ||||||
|         let rtxn = index.read_txn().unwrap(); |         let rtxn = index.read_txn().unwrap(); | ||||||
|         let mut embedding_configs = index.embedding_configs(&rtxn).unwrap(); |         let mut embedding_configs = index.embedding_configs(&rtxn).unwrap(); | ||||||
|         let (embedder_name, embedder) = embedding_configs.pop().unwrap(); |         let IndexEmbeddingConfig { name: embedder_name, config: embedder, user_provided } = | ||||||
|  |             embedding_configs.pop().unwrap(); | ||||||
|  |         insta::assert_snapshot!(embedder_name, @"manual"); | ||||||
|  |         insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[0, 1, 2]>"); | ||||||
|         let embedder = |         let embedder = | ||||||
|             std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap()); |             std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap()); | ||||||
|         assert_eq!("manual", embedder_name); |  | ||||||
|         let res = index |         let res = index | ||||||
|             .search(&rtxn) |             .search(&rtxn) | ||||||
|             .semantic(embedder_name, embedder, Some([0.0, 1.0, 2.0].to_vec())) |             .semantic(embedder_name, embedder, Some([0.0, 1.0, 2.0].to_vec())) | ||||||
|   | |||||||
| @@ -1,7 +1,7 @@ | |||||||
| use std::borrow::Cow; | use std::borrow::Cow; | ||||||
| use std::collections::btree_map::Entry as BEntry; | use std::collections::btree_map::Entry as BEntry; | ||||||
| use std::collections::hash_map::Entry as HEntry; | use std::collections::hash_map::Entry as HEntry; | ||||||
| use std::collections::{HashMap, HashSet}; | use std::collections::{BTreeMap, HashMap, HashSet}; | ||||||
| use std::fs::File; | use std::fs::File; | ||||||
| use std::io::{Read, Seek}; | use std::io::{Read, Seek}; | ||||||
|  |  | ||||||
| @@ -27,6 +27,8 @@ use crate::update::del_add::{ | |||||||
| use crate::update::index_documents::GrenadParameters; | use crate::update::index_documents::GrenadParameters; | ||||||
| use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; | use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; | ||||||
| use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; | use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; | ||||||
|  | use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; | ||||||
|  | use crate::vector::settings::{EmbedderAction, WriteBackToDocuments}; | ||||||
| use crate::{ | use crate::{ | ||||||
|     is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, |     is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, | ||||||
| }; | }; | ||||||
| @@ -806,13 +808,13 @@ impl<'a, 'i> Transform<'a, 'i> { | |||||||
|         let mut new_inner_settings = old_inner_settings.clone(); |         let mut new_inner_settings = old_inner_settings.clone(); | ||||||
|         new_inner_settings.fields_ids_map = fields_ids_map; |         new_inner_settings.fields_ids_map = fields_ids_map; | ||||||
|  |  | ||||||
|         let embedding_configs_updated = false; |         let embedding_config_updates = Default::default(); | ||||||
|         let settings_update_only = false; |         let settings_update_only = false; | ||||||
|         let settings_diff = InnerIndexSettingsDiff::new( |         let settings_diff = InnerIndexSettingsDiff::new( | ||||||
|             old_inner_settings, |             old_inner_settings, | ||||||
|             new_inner_settings, |             new_inner_settings, | ||||||
|             primary_key_id, |             primary_key_id, | ||||||
|             embedding_configs_updated, |             embedding_config_updates, | ||||||
|             settings_update_only, |             settings_update_only, | ||||||
|         ); |         ); | ||||||
|  |  | ||||||
| @@ -833,10 +835,13 @@ impl<'a, 'i> Transform<'a, 'i> { | |||||||
|     /// Rebind the field_ids of the provided document to their values |     /// Rebind the field_ids of the provided document to their values | ||||||
|     /// based on the field_ids_maps difference between the old and the new settings, |     /// based on the field_ids_maps difference between the old and the new settings, | ||||||
|     /// then fill the provided buffers with delta documents using KvWritterDelAdd. |     /// then fill the provided buffers with delta documents using KvWritterDelAdd. | ||||||
|  |     #[allow(clippy::too_many_arguments)] // need the vectors + fid, feel free to create a struct xo xo | ||||||
|     fn rebind_existing_document( |     fn rebind_existing_document( | ||||||
|         old_obkv: KvReader<FieldId>, |         old_obkv: KvReader<FieldId>, | ||||||
|         settings_diff: &InnerIndexSettingsDiff, |         settings_diff: &InnerIndexSettingsDiff, | ||||||
|         modified_faceted_fields: &HashSet<String>, |         modified_faceted_fields: &HashSet<String>, | ||||||
|  |         mut injected_vectors: serde_json::Map<String, serde_json::Value>, | ||||||
|  |         old_vectors_fid: Option<FieldId>, | ||||||
|         original_obkv_buffer: Option<&mut Vec<u8>>, |         original_obkv_buffer: Option<&mut Vec<u8>>, | ||||||
|         flattened_obkv_buffer: Option<&mut Vec<u8>>, |         flattened_obkv_buffer: Option<&mut Vec<u8>>, | ||||||
|     ) -> Result<()> { |     ) -> Result<()> { | ||||||
| @@ -859,9 +864,49 @@ impl<'a, 'i> Transform<'a, 'i> { | |||||||
|  |  | ||||||
|         // The operations that we must perform on the different fields. |         // The operations that we must perform on the different fields. | ||||||
|         let mut operations = HashMap::new(); |         let mut operations = HashMap::new(); | ||||||
|  |         let mut error_seen = false; | ||||||
|  |  | ||||||
|         let mut obkv_writer = KvWriter::<_, FieldId>::memory(); |         let mut obkv_writer = KvWriter::<_, FieldId>::memory(); | ||||||
|         for (id, val) in old_obkv.iter() { |         'write_fid: for (id, val) in old_obkv.iter() { | ||||||
|  |             if !injected_vectors.is_empty() { | ||||||
|  |                 'inject_vectors: { | ||||||
|  |                     let Some(vectors_fid) = old_vectors_fid else { break 'inject_vectors }; | ||||||
|  |  | ||||||
|  |                     if id < vectors_fid { | ||||||
|  |                         break 'inject_vectors; | ||||||
|  |                     } | ||||||
|  |  | ||||||
|  |                     let mut existing_vectors = if id == vectors_fid { | ||||||
|  |                         let existing_vectors: std::result::Result< | ||||||
|  |                             serde_json::Map<String, serde_json::Value>, | ||||||
|  |                             serde_json::Error, | ||||||
|  |                         > = serde_json::from_slice(val); | ||||||
|  |  | ||||||
|  |                         match existing_vectors { | ||||||
|  |                             Ok(existing_vectors) => existing_vectors, | ||||||
|  |                             Err(error) => { | ||||||
|  |                                 if !error_seen { | ||||||
|  |                                     tracing::error!(%error, "Unexpected `_vectors` field that is not a map. Treating as an empty map"); | ||||||
|  |                                     error_seen = true; | ||||||
|  |                                 } | ||||||
|  |                                 Default::default() | ||||||
|  |                             } | ||||||
|  |                         } | ||||||
|  |                     } else { | ||||||
|  |                         Default::default() | ||||||
|  |                     }; | ||||||
|  |  | ||||||
|  |                     existing_vectors.append(&mut injected_vectors); | ||||||
|  |  | ||||||
|  |                     operations.insert(vectors_fid, DelAddOperation::DeletionAndAddition); | ||||||
|  |                     obkv_writer | ||||||
|  |                         .insert(vectors_fid, serde_json::to_vec(&existing_vectors).unwrap())?; | ||||||
|  |                     if id == vectors_fid { | ||||||
|  |                         continue 'write_fid; | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|             if is_primary_key(id) || necessary_faceted_field(id) || reindex_vectors { |             if is_primary_key(id) || necessary_faceted_field(id) || reindex_vectors { | ||||||
|                 operations.insert(id, DelAddOperation::DeletionAndAddition); |                 operations.insert(id, DelAddOperation::DeletionAndAddition); | ||||||
|                 obkv_writer.insert(id, val)?; |                 obkv_writer.insert(id, val)?; | ||||||
| @@ -870,6 +915,15 @@ impl<'a, 'i> Transform<'a, 'i> { | |||||||
|                 obkv_writer.insert(id, val)?; |                 obkv_writer.insert(id, val)?; | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |         if !injected_vectors.is_empty() { | ||||||
|  |             'inject_vectors: { | ||||||
|  |                 let Some(vectors_fid) = old_vectors_fid else { break 'inject_vectors }; | ||||||
|  |  | ||||||
|  |                 operations.insert(vectors_fid, DelAddOperation::DeletionAndAddition); | ||||||
|  |                 obkv_writer.insert(vectors_fid, serde_json::to_vec(&injected_vectors).unwrap())?; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|         let data = obkv_writer.into_inner()?; |         let data = obkv_writer.into_inner()?; | ||||||
|         let obkv = KvReader::<FieldId>::new(&data); |         let obkv = KvReader::<FieldId>::new(&data); | ||||||
|  |  | ||||||
| @@ -935,6 +989,35 @@ impl<'a, 'i> Transform<'a, 'i> { | |||||||
|             None |             None | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|  |         let readers: Result< | ||||||
|  |             BTreeMap<&str, (Vec<arroy::Reader<arroy::distances::Angular>>, &RoaringBitmap)>, | ||||||
|  |         > = settings_diff | ||||||
|  |             .embedding_config_updates | ||||||
|  |             .iter() | ||||||
|  |             .filter_map(|(name, action)| { | ||||||
|  |                 if let EmbedderAction::WriteBackToDocuments(WriteBackToDocuments { | ||||||
|  |                     embedder_id, | ||||||
|  |                     user_provided, | ||||||
|  |                 }) = action | ||||||
|  |                 { | ||||||
|  |                     let readers: Result<Vec<_>> = | ||||||
|  |                         self.index.arroy_readers(wtxn, *embedder_id).collect(); | ||||||
|  |                     match readers { | ||||||
|  |                         Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))), | ||||||
|  |                         Err(error) => Some(Err(error)), | ||||||
|  |                     } | ||||||
|  |                 } else { | ||||||
|  |                     None | ||||||
|  |                 } | ||||||
|  |             }) | ||||||
|  |             .collect(); | ||||||
|  |         let readers = readers?; | ||||||
|  |  | ||||||
|  |         let old_vectors_fid = settings_diff | ||||||
|  |             .old | ||||||
|  |             .fields_ids_map | ||||||
|  |             .id(crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME); | ||||||
|  |  | ||||||
|         // We initialize the sorter with the user indexing settings. |         // We initialize the sorter with the user indexing settings. | ||||||
|         let mut flattened_sorter = |         let mut flattened_sorter = | ||||||
|             if settings_diff.reindex_searchable() || settings_diff.reindex_facets() { |             if settings_diff.reindex_searchable() || settings_diff.reindex_facets() { | ||||||
| @@ -961,10 +1044,50 @@ impl<'a, 'i> Transform<'a, 'i> { | |||||||
|                     InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None }, |                     InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None }, | ||||||
|                 )?; |                 )?; | ||||||
|  |  | ||||||
|  |                 let injected_vectors: std::result::Result< | ||||||
|  |                     serde_json::Map<String, serde_json::Value>, | ||||||
|  |                     arroy::Error, | ||||||
|  |                 > = readers | ||||||
|  |                     .iter() | ||||||
|  |                     .filter_map(|(name, (readers, user_provided))| { | ||||||
|  |                         if !user_provided.contains(docid) { | ||||||
|  |                             return None; | ||||||
|  |                         } | ||||||
|  |                         let mut vectors = Vec::new(); | ||||||
|  |                         for reader in readers { | ||||||
|  |                             let Some(vector) = reader.item_vector(wtxn, docid).transpose() else { | ||||||
|  |                                 break; | ||||||
|  |                             }; | ||||||
|  |  | ||||||
|  |                             match vector { | ||||||
|  |                                 Ok(vector) => vectors.push(vector), | ||||||
|  |                                 Err(error) => return Some(Err(error)), | ||||||
|  |                             } | ||||||
|  |                         } | ||||||
|  |                         if vectors.is_empty() { | ||||||
|  |                             return None; | ||||||
|  |                         } | ||||||
|  |                         Some(Ok(( | ||||||
|  |                             name.to_string(), | ||||||
|  |                             serde_json::to_value(ExplicitVectors { | ||||||
|  |                                 embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors( | ||||||
|  |                                     vectors, | ||||||
|  |                                 )), | ||||||
|  |                                 regenerate: false, | ||||||
|  |                             }) | ||||||
|  |                             .unwrap(), | ||||||
|  |                         ))) | ||||||
|  |                     }) | ||||||
|  |                     .collect(); | ||||||
|  |  | ||||||
|  |                 let injected_vectors = injected_vectors?; | ||||||
|  |  | ||||||
|                 Self::rebind_existing_document( |                 Self::rebind_existing_document( | ||||||
|                     old_obkv, |                     old_obkv, | ||||||
|                     &settings_diff, |                     &settings_diff, | ||||||
|                     &modified_faceted_fields, |                     &modified_faceted_fields, | ||||||
|  |                     injected_vectors, | ||||||
|  |                     old_vectors_fid, | ||||||
|                     Some(&mut original_obkv_buffer).filter(|_| original_sorter.is_some()), |                     Some(&mut original_obkv_buffer).filter(|_| original_sorter.is_some()), | ||||||
|                     Some(&mut flattened_obkv_buffer).filter(|_| flattened_sorter.is_some()), |                     Some(&mut flattened_obkv_buffer).filter(|_| flattened_sorter.is_some()), | ||||||
|                 )?; |                 )?; | ||||||
| @@ -981,6 +1104,23 @@ impl<'a, 'i> Transform<'a, 'i> { | |||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         let mut writers = Vec::new(); | ||||||
|  |  | ||||||
|  |         // delete all vectors from the embedders that need removal | ||||||
|  |         for (_, (readers, _)) in readers { | ||||||
|  |             for reader in readers { | ||||||
|  |                 let dimensions = reader.dimensions(); | ||||||
|  |                 let arroy_index = reader.index(); | ||||||
|  |                 drop(reader); | ||||||
|  |                 let writer = arroy::Writer::new(self.index.vector_arroy, arroy_index, dimensions); | ||||||
|  |                 writers.push(writer); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         for writer in writers { | ||||||
|  |             writer.clear(wtxn)?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|         let grenad_params = GrenadParameters { |         let grenad_params = GrenadParameters { | ||||||
|             chunk_compression_type: self.indexer_settings.chunk_compression_type, |             chunk_compression_type: self.indexer_settings.chunk_compression_type, | ||||||
|             chunk_compression_level: self.indexer_settings.chunk_compression_level, |             chunk_compression_level: self.indexer_settings.chunk_compression_level, | ||||||
|   | |||||||
| @@ -20,6 +20,7 @@ use super::MergeFn; | |||||||
| use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind}; | use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind}; | ||||||
| use crate::facet::FacetType; | use crate::facet::FacetType; | ||||||
| use crate::index::db_name::DOCUMENTS; | use crate::index::db_name::DOCUMENTS; | ||||||
|  | use crate::index::IndexEmbeddingConfig; | ||||||
| use crate::proximity::MAX_DISTANCE; | use crate::proximity::MAX_DISTANCE; | ||||||
| use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd}; | use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd}; | ||||||
| use crate::update::facet::FacetsUpdate; | use crate::update::facet::FacetsUpdate; | ||||||
| @@ -90,6 +91,8 @@ pub(crate) enum TypedChunk { | |||||||
|         expected_dimension: usize, |         expected_dimension: usize, | ||||||
|         manual_vectors: grenad::Reader<BufReader<File>>, |         manual_vectors: grenad::Reader<BufReader<File>>, | ||||||
|         embedder_name: String, |         embedder_name: String, | ||||||
|  |         add_to_user_provided: RoaringBitmap, | ||||||
|  |         remove_from_user_provided: RoaringBitmap, | ||||||
|     }, |     }, | ||||||
|     ScriptLanguageDocids(HashMap<(Script, Language), (RoaringBitmap, RoaringBitmap)>), |     ScriptLanguageDocids(HashMap<(Script, Language), (RoaringBitmap, RoaringBitmap)>), | ||||||
| } | } | ||||||
| @@ -154,8 +157,11 @@ pub(crate) fn write_typed_chunk_into_index( | |||||||
|             let mut docids = index.documents_ids(wtxn)?; |             let mut docids = index.documents_ids(wtxn)?; | ||||||
|             let mut iter = merger.into_stream_merger_iter()?; |             let mut iter = merger.into_stream_merger_iter()?; | ||||||
|  |  | ||||||
|             let embedders: BTreeSet<_> = |             let embedders: BTreeSet<_> = index | ||||||
|                 index.embedding_configs(wtxn)?.into_iter().map(|(k, _v)| k).collect(); |                 .embedding_configs(wtxn)? | ||||||
|  |                 .into_iter() | ||||||
|  |                 .map(|IndexEmbeddingConfig { name, .. }| name) | ||||||
|  |                 .collect(); | ||||||
|             let mut vectors_buffer = Vec::new(); |             let mut vectors_buffer = Vec::new(); | ||||||
|             while let Some((key, reader)) = iter.next()? { |             while let Some((key, reader)) = iter.next()? { | ||||||
|                 let mut writer: KvWriter<_, FieldId> = KvWriter::memory(); |                 let mut writer: KvWriter<_, FieldId> = KvWriter::memory(); | ||||||
| @@ -181,7 +187,7 @@ pub(crate) fn write_typed_chunk_into_index( | |||||||
|                                     // if the `_vectors` field cannot be parsed as map of vectors, just write it as-is |                                     // if the `_vectors` field cannot be parsed as map of vectors, just write it as-is | ||||||
|                                     break 'vectors Some(addition); |                                     break 'vectors Some(addition); | ||||||
|                                 }; |                                 }; | ||||||
|                                 vectors.retain_user_provided_vectors(&embedders); |                                 vectors.retain_not_embedded_vectors(&embedders); | ||||||
|                                 let crate::vector::parsed_vectors::ParsedVectors(vectors) = vectors; |                                 let crate::vector::parsed_vectors::ParsedVectors(vectors) = vectors; | ||||||
|                                 if vectors.is_empty() { |                                 if vectors.is_empty() { | ||||||
|                                     // skip writing empty `_vectors` map |                                     // skip writing empty `_vectors` map | ||||||
| @@ -619,6 +625,8 @@ pub(crate) fn write_typed_chunk_into_index( | |||||||
|             let mut remove_vectors_builder = MergerBuilder::new(keep_first as MergeFn); |             let mut remove_vectors_builder = MergerBuilder::new(keep_first as MergeFn); | ||||||
|             let mut manual_vectors_builder = MergerBuilder::new(keep_first as MergeFn); |             let mut manual_vectors_builder = MergerBuilder::new(keep_first as MergeFn); | ||||||
|             let mut embeddings_builder = MergerBuilder::new(keep_first as MergeFn); |             let mut embeddings_builder = MergerBuilder::new(keep_first as MergeFn); | ||||||
|  |             let mut add_to_user_provided = RoaringBitmap::new(); | ||||||
|  |             let mut remove_from_user_provided = RoaringBitmap::new(); | ||||||
|             let mut params = None; |             let mut params = None; | ||||||
|             for typed_chunk in typed_chunks { |             for typed_chunk in typed_chunks { | ||||||
|                 let TypedChunk::VectorPoints { |                 let TypedChunk::VectorPoints { | ||||||
| @@ -627,6 +635,8 @@ pub(crate) fn write_typed_chunk_into_index( | |||||||
|                     embeddings, |                     embeddings, | ||||||
|                     expected_dimension, |                     expected_dimension, | ||||||
|                     embedder_name, |                     embedder_name, | ||||||
|  |                     add_to_user_provided: aud, | ||||||
|  |                     remove_from_user_provided: rud, | ||||||
|                 } = typed_chunk |                 } = typed_chunk | ||||||
|                 else { |                 else { | ||||||
|                     unreachable!(); |                     unreachable!(); | ||||||
| @@ -639,11 +649,23 @@ pub(crate) fn write_typed_chunk_into_index( | |||||||
|                 if let Some(embeddings) = embeddings { |                 if let Some(embeddings) = embeddings { | ||||||
|                     embeddings_builder.push(embeddings.into_cursor()?); |                     embeddings_builder.push(embeddings.into_cursor()?); | ||||||
|                 } |                 } | ||||||
|  |                 add_to_user_provided |= aud; | ||||||
|  |                 remove_from_user_provided |= rud; | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             // typed chunks has always at least 1 chunk. |             // typed chunks has always at least 1 chunk. | ||||||
|             let Some((expected_dimension, embedder_name)) = params else { unreachable!() }; |             let Some((expected_dimension, embedder_name)) = params else { unreachable!() }; | ||||||
|  |  | ||||||
|  |             let mut embedding_configs = index.embedding_configs(wtxn)?; | ||||||
|  |             let index_embedder_config = embedding_configs | ||||||
|  |                 .iter_mut() | ||||||
|  |                 .find(|IndexEmbeddingConfig { name, .. }| name == &embedder_name) | ||||||
|  |                 .unwrap(); | ||||||
|  |             index_embedder_config.user_provided -= remove_from_user_provided; | ||||||
|  |             index_embedder_config.user_provided |= add_to_user_provided; | ||||||
|  |  | ||||||
|  |             index.put_embedding_configs(wtxn, embedding_configs)?; | ||||||
|  |  | ||||||
|             let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( |             let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( | ||||||
|                 InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, |                 InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, | ||||||
|             )?; |             )?; | ||||||
|   | |||||||
| @@ -6,6 +6,7 @@ use std::sync::Arc; | |||||||
| use charabia::{Normalize, Tokenizer, TokenizerBuilder}; | use charabia::{Normalize, Tokenizer, TokenizerBuilder}; | ||||||
| use deserr::{DeserializeError, Deserr}; | use deserr::{DeserializeError, Deserr}; | ||||||
| use itertools::{EitherOrBoth, Itertools}; | use itertools::{EitherOrBoth, Itertools}; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
| use serde::{Deserialize, Deserializer, Serialize, Serializer}; | use serde::{Deserialize, Deserializer, Serialize, Serializer}; | ||||||
| use time::OffsetDateTime; | use time::OffsetDateTime; | ||||||
|  |  | ||||||
| @@ -14,12 +15,18 @@ use super::index_documents::{IndexDocumentsConfig, Transform}; | |||||||
| use super::IndexerConfig; | use super::IndexerConfig; | ||||||
| use crate::criterion::Criterion; | use crate::criterion::Criterion; | ||||||
| use crate::error::UserError; | use crate::error::UserError; | ||||||
| use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; | use crate::index::{ | ||||||
|  |     IndexEmbeddingConfig, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS, | ||||||
|  | }; | ||||||
| use crate::order_by_map::OrderByMap; | use crate::order_by_map::OrderByMap; | ||||||
| use crate::proximity::ProximityPrecision; | use crate::proximity::ProximityPrecision; | ||||||
| use crate::update::index_documents::IndexDocumentsMethod; | use crate::update::index_documents::IndexDocumentsMethod; | ||||||
| use crate::update::{IndexDocuments, UpdateIndexingStep}; | use crate::update::{IndexDocuments, UpdateIndexingStep}; | ||||||
| use crate::vector::settings::{check_set, check_unset, EmbedderSource, EmbeddingSettings}; | use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME; | ||||||
|  | use crate::vector::settings::{ | ||||||
|  |     check_set, check_unset, EmbedderAction, EmbedderSource, EmbeddingSettings, ReindexAction, | ||||||
|  |     WriteBackToDocuments, | ||||||
|  | }; | ||||||
| use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs}; | use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs}; | ||||||
| use crate::{FieldId, FieldsIdsMap, Index, Result}; | use crate::{FieldId, FieldsIdsMap, Index, Result}; | ||||||
|  |  | ||||||
| @@ -490,6 +497,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | |||||||
|                 self.index.put_all_searchable_fields_from_fields_ids_map( |                 self.index.put_all_searchable_fields_from_fields_ids_map( | ||||||
|                     self.wtxn, |                     self.wtxn, | ||||||
|                     &names, |                     &names, | ||||||
|  |                     &fields_ids_map.nested_ids(RESERVED_VECTORS_FIELD_NAME), | ||||||
|                     &fields_ids_map, |                     &fields_ids_map, | ||||||
|                 )?; |                 )?; | ||||||
|                 self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; |                 self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; | ||||||
| @@ -919,92 +927,177 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | |||||||
|         Ok(changed) |         Ok(changed) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn update_embedding_configs(&mut self) -> Result<bool> { |     fn update_embedding_configs(&mut self) -> Result<BTreeMap<String, EmbedderAction>> { | ||||||
|         let update = match std::mem::take(&mut self.embedder_settings) { |         match std::mem::take(&mut self.embedder_settings) { | ||||||
|             Setting::Set(configs) => { |             Setting::Set(configs) => self.update_embedding_configs_set(configs), | ||||||
|                 let mut changed = false; |             Setting::Reset => { | ||||||
|  |                 // all vectors should be written back to documents | ||||||
|                 let old_configs = self.index.embedding_configs(self.wtxn)?; |                 let old_configs = self.index.embedding_configs(self.wtxn)?; | ||||||
|                 let old_configs: BTreeMap<String, Setting<EmbeddingSettings>> = |                 let remove_all: Result<BTreeMap<String, EmbedderAction>> = old_configs | ||||||
|                     old_configs.into_iter().map(|(k, v)| (k, Setting::Set(v.into()))).collect(); |                     .into_iter() | ||||||
|  |                     .map(|IndexEmbeddingConfig { name, config: _, user_provided }| -> Result<_> { | ||||||
|  |                         let embedder_id = | ||||||
|  |                             self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or( | ||||||
|  |                                 crate::InternalError::DatabaseMissingEntry { | ||||||
|  |                                     db_name: crate::index::db_name::VECTOR_EMBEDDER_CATEGORY_ID, | ||||||
|  |                                     key: None, | ||||||
|  |                                 }, | ||||||
|  |                             )?; | ||||||
|  |                         Ok(( | ||||||
|  |                             name, | ||||||
|  |                             EmbedderAction::WriteBackToDocuments(WriteBackToDocuments { | ||||||
|  |                                 embedder_id, | ||||||
|  |                                 user_provided, | ||||||
|  |                             }), | ||||||
|  |                         )) | ||||||
|  |                     }) | ||||||
|  |                     .collect(); | ||||||
|  |  | ||||||
|                 let mut new_configs = BTreeMap::new(); |                 let remove_all = remove_all?; | ||||||
|  |  | ||||||
|  |                 self.index.embedder_category_id.clear(self.wtxn)?; | ||||||
|  |                 self.index.delete_embedding_configs(self.wtxn)?; | ||||||
|  |                 Ok(remove_all) | ||||||
|  |             } | ||||||
|  |             Setting::NotSet => Ok(Default::default()), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn update_embedding_configs_set( | ||||||
|  |         &mut self, | ||||||
|  |         configs: BTreeMap<String, Setting<EmbeddingSettings>>, | ||||||
|  |     ) -> Result<BTreeMap<String, EmbedderAction>> { | ||||||
|  |         use crate::vector::settings::SettingsDiff; | ||||||
|  |  | ||||||
|  |         let old_configs = self.index.embedding_configs(self.wtxn)?; | ||||||
|  |         let old_configs: BTreeMap<String, (EmbeddingSettings, RoaringBitmap)> = old_configs | ||||||
|  |             .into_iter() | ||||||
|  |             .map(|IndexEmbeddingConfig { name, config, user_provided }| { | ||||||
|  |                 (name, (config.into(), user_provided)) | ||||||
|  |             }) | ||||||
|  |             .collect(); | ||||||
|  |         let mut updated_configs = BTreeMap::new(); | ||||||
|  |         let mut embedder_actions = BTreeMap::new(); | ||||||
|         for joined in old_configs |         for joined in old_configs | ||||||
|             .into_iter() |             .into_iter() | ||||||
|             .merge_join_by(configs.into_iter(), |(left, _), (right, _)| left.cmp(right)) |             .merge_join_by(configs.into_iter(), |(left, _), (right, _)| left.cmp(right)) | ||||||
|         { |         { | ||||||
|             match joined { |             match joined { | ||||||
|                 // updated config |                 // updated config | ||||||
|                         EitherOrBoth::Both((name, mut old), (_, new)) => { |                 EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => { | ||||||
|                             changed |= EmbeddingSettings::apply_and_need_reindex(&mut old, new); |                     let settings_diff = SettingsDiff::from_settings(old, new); | ||||||
|                             if changed { |                     match settings_diff { | ||||||
|                                 tracing::debug!(embedder = name, "need reindex"); |                         SettingsDiff::Remove => { | ||||||
|                             } else { |                             tracing::debug!( | ||||||
|                                 tracing::debug!(embedder = name, "skip reindex"); |                                 embedder = name, | ||||||
|  |                                 user_provided = user_provided.len(), | ||||||
|  |                                 "removing embedder" | ||||||
|  |                             ); | ||||||
|  |                             let embedder_id = | ||||||
|  |                                 self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or( | ||||||
|  |                                     crate::InternalError::DatabaseMissingEntry { | ||||||
|  |                                         db_name: crate::index::db_name::VECTOR_EMBEDDER_CATEGORY_ID, | ||||||
|  |                                         key: None, | ||||||
|  |                                     }, | ||||||
|  |                                 )?; | ||||||
|  |                             // free id immediately | ||||||
|  |                             self.index.embedder_category_id.delete(self.wtxn, &name)?; | ||||||
|  |                             embedder_actions.insert( | ||||||
|  |                                 name, | ||||||
|  |                                 EmbedderAction::WriteBackToDocuments(WriteBackToDocuments { | ||||||
|  |                                     embedder_id, | ||||||
|  |                                     user_provided, | ||||||
|  |                                 }), | ||||||
|  |                             ); | ||||||
|  |                         } | ||||||
|  |                         SettingsDiff::Reindex { action, updated_settings } => { | ||||||
|  |                             tracing::debug!( | ||||||
|  |                                 embedder = name, | ||||||
|  |                                 user_provided = user_provided.len(), | ||||||
|  |                                 ?action, | ||||||
|  |                                 "reindex embedder" | ||||||
|  |                             ); | ||||||
|  |                             embedder_actions.insert(name.clone(), EmbedderAction::Reindex(action)); | ||||||
|  |                             let new = | ||||||
|  |                                 validate_embedding_settings(Setting::Set(updated_settings), &name)?; | ||||||
|  |                             updated_configs.insert(name, (new, user_provided)); | ||||||
|  |                         } | ||||||
|  |                         SettingsDiff::UpdateWithoutReindex { updated_settings } => { | ||||||
|  |                             tracing::debug!( | ||||||
|  |                                 embedder = name, | ||||||
|  |                                 user_provided = user_provided.len(), | ||||||
|  |                                 "update without reindex embedder" | ||||||
|  |                             ); | ||||||
|  |                             let new = | ||||||
|  |                                 validate_embedding_settings(Setting::Set(updated_settings), &name)?; | ||||||
|  |                             updated_configs.insert(name, (new, user_provided)); | ||||||
|  |                         } | ||||||
|                     } |                     } | ||||||
|                             let new = validate_embedding_settings(old, &name)?; |  | ||||||
|                             new_configs.insert(name, new); |  | ||||||
|                 } |                 } | ||||||
|                 // unchanged config |                 // unchanged config | ||||||
|                         EitherOrBoth::Left((name, setting)) => { |                 EitherOrBoth::Left((name, (setting, user_provided))) => { | ||||||
|                             new_configs.insert(name, setting); |                     tracing::debug!(embedder = name, "unchanged embedder"); | ||||||
|  |                     updated_configs.insert(name, (Setting::Set(setting), user_provided)); | ||||||
|                 } |                 } | ||||||
|                 // new config |                 // new config | ||||||
|                 EitherOrBoth::Right((name, mut setting)) => { |                 EitherOrBoth::Right((name, mut setting)) => { | ||||||
|  |                     tracing::debug!(embedder = name, "new embedder"); | ||||||
|                     // apply the default source in case the source was not set so that it gets validated |                     // apply the default source in case the source was not set so that it gets validated | ||||||
|                             crate::vector::settings::EmbeddingSettings::apply_default_source( |                     crate::vector::settings::EmbeddingSettings::apply_default_source(&mut setting); | ||||||
|                                 &mut setting, |  | ||||||
|                             ); |  | ||||||
|                     crate::vector::settings::EmbeddingSettings::apply_default_openai_model( |                     crate::vector::settings::EmbeddingSettings::apply_default_openai_model( | ||||||
|                         &mut setting, |                         &mut setting, | ||||||
|                     ); |                     ); | ||||||
|                     let setting = validate_embedding_settings(setting, &name)?; |                     let setting = validate_embedding_settings(setting, &name)?; | ||||||
|                             changed = true; |                     embedder_actions | ||||||
|                             new_configs.insert(name, setting); |                         .insert(name.clone(), EmbedderAction::Reindex(ReindexAction::FullReindex)); | ||||||
|  |                     updated_configs.insert(name, (setting, RoaringBitmap::new())); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|                 let new_configs: Vec<(String, EmbeddingConfig)> = new_configs |         let mut free_indices: [bool; u8::MAX as usize] = [true; u8::MAX as usize]; | ||||||
|  |         for res in self.index.embedder_category_id.iter(self.wtxn)? { | ||||||
|  |             let (_name, id) = res?; | ||||||
|  |             free_indices[id as usize] = false; | ||||||
|  |         } | ||||||
|  |         let mut free_indices = free_indices.iter_mut().enumerate(); | ||||||
|  |         let mut find_free_index = | ||||||
|  |             move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8); | ||||||
|  |         for (name, action) in embedder_actions.iter() { | ||||||
|  |             match action { | ||||||
|  |                 EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => { | ||||||
|  |                     /* cannot be a new embedder, so has to have an id already */ | ||||||
|  |                 } | ||||||
|  |                 EmbedderAction::Reindex(ReindexAction::FullReindex) => { | ||||||
|  |                     if self.index.embedder_category_id.get(self.wtxn, name)?.is_none() { | ||||||
|  |                         let id = find_free_index() | ||||||
|  |                             .ok_or(UserError::TooManyEmbedders(updated_configs.len()))?; | ||||||
|  |                         tracing::debug!(embedder = name, id, "assigning free id to new embedder"); | ||||||
|  |                         self.index.embedder_category_id.put(self.wtxn, name, &id)?; | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 EmbedderAction::WriteBackToDocuments(_) => { /* already removed */ } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         let updated_configs: Vec<IndexEmbeddingConfig> = updated_configs | ||||||
|             .into_iter() |             .into_iter() | ||||||
|                     .filter_map(|(name, setting)| match setting { |             .filter_map(|(name, (config, user_provided))| match config { | ||||||
|                         Setting::Set(value) => Some((name, value.into())), |                 Setting::Set(config) => { | ||||||
|  |                     Some(IndexEmbeddingConfig { name, config: config.into(), user_provided }) | ||||||
|  |                 } | ||||||
|                 Setting::Reset => None, |                 Setting::Reset => None, | ||||||
|                         Setting::NotSet => Some((name, EmbeddingSettings::default().into())), |                 Setting::NotSet => Some(IndexEmbeddingConfig { | ||||||
|  |                     name, | ||||||
|  |                     config: EmbeddingSettings::default().into(), | ||||||
|  |                     user_provided, | ||||||
|  |                 }), | ||||||
|             }) |             }) | ||||||
|             .collect(); |             .collect(); | ||||||
|  |         if updated_configs.is_empty() { | ||||||
|                 self.index.embedder_category_id.clear(self.wtxn)?; |  | ||||||
|                 for (index, (embedder_name, _)) in new_configs.iter().enumerate() { |  | ||||||
|                     self.index.embedder_category_id.put_with_flags( |  | ||||||
|                         self.wtxn, |  | ||||||
|                         heed::PutFlags::APPEND, |  | ||||||
|                         embedder_name, |  | ||||||
|                         &index |  | ||||||
|                             .try_into() |  | ||||||
|                             .map_err(|_| UserError::TooManyEmbedders(new_configs.len()))?, |  | ||||||
|                     )?; |  | ||||||
|                 } |  | ||||||
|  |  | ||||||
|                 if new_configs.is_empty() { |  | ||||||
|             self.index.delete_embedding_configs(self.wtxn)?; |             self.index.delete_embedding_configs(self.wtxn)?; | ||||||
|         } else { |         } else { | ||||||
|                     self.index.put_embedding_configs(self.wtxn, new_configs)?; |             self.index.put_embedding_configs(self.wtxn, updated_configs)?; | ||||||
|         } |         } | ||||||
|                 changed |         Ok(embedder_actions) | ||||||
|             } |  | ||||||
|             Setting::Reset => { |  | ||||||
|                 self.index.delete_embedding_configs(self.wtxn)?; |  | ||||||
|                 true |  | ||||||
|             } |  | ||||||
|             Setting::NotSet => false, |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         // if any changes force a reindexing |  | ||||||
|         // clear the vector database. |  | ||||||
|         if update { |  | ||||||
|             self.index.vector_arroy.clear(self.wtxn)?; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         Ok(update) |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn update_search_cutoff(&mut self) -> Result<bool> { |     fn update_search_cutoff(&mut self) -> Result<bool> { | ||||||
| @@ -1058,13 +1151,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | |||||||
|         self.update_searchable()?; |         self.update_searchable()?; | ||||||
|         self.update_exact_attributes()?; |         self.update_exact_attributes()?; | ||||||
|         self.update_proximity_precision()?; |         self.update_proximity_precision()?; | ||||||
|         // TODO: very rough approximation of the needs for reindexing where any change will result in |  | ||||||
|         // a full reindexing. |         let embedding_config_updates = self.update_embedding_configs()?; | ||||||
|         // What can be done instead: |  | ||||||
|         // 1. Only change the distance on a distance change |  | ||||||
|         // 2. Only change the name -> embedder mapping on a name change |  | ||||||
|         // 3. Keep the old vectors but reattempt indexing on a prompt change: only actually changed prompt will need embedding + storage |  | ||||||
|         let embedding_configs_updated = self.update_embedding_configs()?; |  | ||||||
|  |  | ||||||
|         let mut new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn)?; |         let mut new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn)?; | ||||||
|         new_inner_settings.recompute_facets(self.wtxn, self.index)?; |         new_inner_settings.recompute_facets(self.wtxn, self.index)?; | ||||||
| @@ -1078,7 +1166,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | |||||||
|             old_inner_settings, |             old_inner_settings, | ||||||
|             new_inner_settings, |             new_inner_settings, | ||||||
|             primary_key_id, |             primary_key_id, | ||||||
|             embedding_configs_updated, |             embedding_config_updates, | ||||||
|             settings_update_only, |             settings_update_only, | ||||||
|         ); |         ); | ||||||
|  |  | ||||||
| @@ -1094,8 +1182,7 @@ pub struct InnerIndexSettingsDiff { | |||||||
|     pub(crate) old: InnerIndexSettings, |     pub(crate) old: InnerIndexSettings, | ||||||
|     pub(crate) new: InnerIndexSettings, |     pub(crate) new: InnerIndexSettings, | ||||||
|     pub(crate) primary_key_id: Option<FieldId>, |     pub(crate) primary_key_id: Option<FieldId>, | ||||||
|     // TODO: compare directly the embedders. |     pub(crate) embedding_config_updates: BTreeMap<String, EmbedderAction>, | ||||||
|     pub(crate) embedding_configs_updated: bool, |  | ||||||
|     pub(crate) settings_update_only: bool, |     pub(crate) settings_update_only: bool, | ||||||
|     /// The set of only the additional searchable fields. |     /// The set of only the additional searchable fields. | ||||||
|     /// If any other searchable field has been modified, is set to None. |     /// If any other searchable field has been modified, is set to None. | ||||||
| @@ -1116,7 +1203,7 @@ impl InnerIndexSettingsDiff { | |||||||
|         old_settings: InnerIndexSettings, |         old_settings: InnerIndexSettings, | ||||||
|         new_settings: InnerIndexSettings, |         new_settings: InnerIndexSettings, | ||||||
|         primary_key_id: Option<FieldId>, |         primary_key_id: Option<FieldId>, | ||||||
|         embedding_configs_updated: bool, |         embedding_config_updates: BTreeMap<String, EmbedderAction>, | ||||||
|         settings_update_only: bool, |         settings_update_only: bool, | ||||||
|     ) -> Self { |     ) -> Self { | ||||||
|         let only_additional_fields = match ( |         let only_additional_fields = match ( | ||||||
| @@ -1153,7 +1240,7 @@ impl InnerIndexSettingsDiff { | |||||||
|             old: old_settings, |             old: old_settings, | ||||||
|             new: new_settings, |             new: new_settings, | ||||||
|             primary_key_id, |             primary_key_id, | ||||||
|             embedding_configs_updated, |             embedding_config_updates, | ||||||
|             settings_update_only, |             settings_update_only, | ||||||
|             only_additional_fields, |             only_additional_fields, | ||||||
|             cache_reindex_searchable_without_user_defined, |             cache_reindex_searchable_without_user_defined, | ||||||
| @@ -1220,7 +1307,7 @@ impl InnerIndexSettingsDiff { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn reindex_vectors(&self) -> bool { |     pub fn reindex_vectors(&self) -> bool { | ||||||
|         self.embedding_configs_updated |         !self.embedding_config_updates.is_empty() | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn settings_update_only(&self) -> bool { |     pub fn settings_update_only(&self) -> bool { | ||||||
| @@ -1252,6 +1339,8 @@ pub(crate) struct InnerIndexSettings { | |||||||
|     pub embedding_configs: EmbeddingConfigs, |     pub embedding_configs: EmbeddingConfigs, | ||||||
|     pub existing_fields: HashSet<String>, |     pub existing_fields: HashSet<String>, | ||||||
|     pub geo_fields_ids: Option<(FieldId, FieldId)>, |     pub geo_fields_ids: Option<(FieldId, FieldId)>, | ||||||
|  |     pub non_searchable_fields_ids: Vec<FieldId>, | ||||||
|  |     pub non_faceted_fields_ids: Vec<FieldId>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl InnerIndexSettings { | impl InnerIndexSettings { | ||||||
| @@ -1265,8 +1354,8 @@ impl InnerIndexSettings { | |||||||
|         let user_defined_searchable_fields = |         let user_defined_searchable_fields = | ||||||
|             user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect()); |             user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect()); | ||||||
|         let user_defined_faceted_fields = index.user_defined_faceted_fields(rtxn)?; |         let user_defined_faceted_fields = index.user_defined_faceted_fields(rtxn)?; | ||||||
|         let searchable_fields_ids = index.searchable_fields_ids(rtxn)?; |         let mut searchable_fields_ids = index.searchable_fields_ids(rtxn)?; | ||||||
|         let faceted_fields_ids = index.faceted_fields_ids(rtxn)?; |         let mut faceted_fields_ids = index.faceted_fields_ids(rtxn)?; | ||||||
|         let exact_attributes = index.exact_attributes_ids(rtxn)?; |         let exact_attributes = index.exact_attributes_ids(rtxn)?; | ||||||
|         let proximity_precision = index.proximity_precision(rtxn)?.unwrap_or_default(); |         let proximity_precision = index.proximity_precision(rtxn)?.unwrap_or_default(); | ||||||
|         let embedding_configs = embedders(index.embedding_configs(rtxn)?)?; |         let embedding_configs = embedders(index.embedding_configs(rtxn)?)?; | ||||||
| @@ -1294,6 +1383,10 @@ impl InnerIndexSettings { | |||||||
|             None => None, |             None => None, | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|  |         let vectors_fids = fields_ids_map.nested_ids(RESERVED_VECTORS_FIELD_NAME); | ||||||
|  |         searchable_fields_ids.retain(|id| !vectors_fids.contains(id)); | ||||||
|  |         faceted_fields_ids.retain(|id| !vectors_fids.contains(id)); | ||||||
|  |  | ||||||
|         Ok(Self { |         Ok(Self { | ||||||
|             stop_words, |             stop_words, | ||||||
|             allowed_separators, |             allowed_separators, | ||||||
| @@ -1308,6 +1401,8 @@ impl InnerIndexSettings { | |||||||
|             embedding_configs, |             embedding_configs, | ||||||
|             existing_fields, |             existing_fields, | ||||||
|             geo_fields_ids, |             geo_fields_ids, | ||||||
|  |             non_searchable_fields_ids: vectors_fids.clone(), | ||||||
|  |             non_faceted_fields_ids: vectors_fids.clone(), | ||||||
|         }) |         }) | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -1315,9 +1410,10 @@ impl InnerIndexSettings { | |||||||
|     pub fn recompute_facets(&mut self, wtxn: &mut heed::RwTxn, index: &Index) -> Result<()> { |     pub fn recompute_facets(&mut self, wtxn: &mut heed::RwTxn, index: &Index) -> Result<()> { | ||||||
|         let new_facets = self |         let new_facets = self | ||||||
|             .fields_ids_map |             .fields_ids_map | ||||||
|             .names() |             .iter() | ||||||
|             .filter(|&field| crate::is_faceted(field, &self.user_defined_faceted_fields)) |             .filter(|(fid, _field)| !self.non_faceted_fields_ids.contains(fid)) | ||||||
|             .map(|field| field.to_string()) |             .filter(|(_fid, field)| crate::is_faceted(field, &self.user_defined_faceted_fields)) | ||||||
|  |             .map(|(_fid, field)| field.to_string()) | ||||||
|             .collect(); |             .collect(); | ||||||
|         index.put_faceted_fields(wtxn, &new_facets)?; |         index.put_faceted_fields(wtxn, &new_facets)?; | ||||||
|  |  | ||||||
| @@ -1337,6 +1433,7 @@ impl InnerIndexSettings { | |||||||
|             index.put_all_searchable_fields_from_fields_ids_map( |             index.put_all_searchable_fields_from_fields_ids_map( | ||||||
|                 wtxn, |                 wtxn, | ||||||
|                 &searchable_fields, |                 &searchable_fields, | ||||||
|  |                 &self.non_searchable_fields_ids, | ||||||
|                 &self.fields_ids_map, |                 &self.fields_ids_map, | ||||||
|             )?; |             )?; | ||||||
|         } |         } | ||||||
| @@ -1347,10 +1444,15 @@ impl InnerIndexSettings { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| fn embedders(embedding_configs: Vec<(String, EmbeddingConfig)>) -> Result<EmbeddingConfigs> { | fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingConfigs> { | ||||||
|     let res: Result<_> = embedding_configs |     let res: Result<_> = embedding_configs | ||||||
|         .into_iter() |         .into_iter() | ||||||
|         .map(|(name, EmbeddingConfig { embedder_options, prompt })| { |         .map( | ||||||
|  |             |IndexEmbeddingConfig { | ||||||
|  |                  name, | ||||||
|  |                  config: EmbeddingConfig { embedder_options, prompt }, | ||||||
|  |                  .. | ||||||
|  |              }| { | ||||||
|                 let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?); |                 let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?); | ||||||
|  |  | ||||||
|                 let embedder = Arc::new( |                 let embedder = Arc::new( | ||||||
| @@ -1359,7 +1461,8 @@ fn embedders(embedding_configs: Vec<(String, EmbeddingConfig)>) -> Result<Embedd | |||||||
|                         .map_err(crate::Error::from)?, |                         .map_err(crate::Error::from)?, | ||||||
|                 ); |                 ); | ||||||
|                 Ok((name, (embedder, prompt))) |                 Ok((name, (embedder, prompt))) | ||||||
|         }) |             }, | ||||||
|  |         ) | ||||||
|         .collect(); |         .collect(); | ||||||
|     res.map(EmbeddingConfigs::new) |     res.map(EmbeddingConfigs::new) | ||||||
| } | } | ||||||
|   | |||||||
| @@ -152,6 +152,10 @@ impl EmbeddingConfigs { | |||||||
|         &self.0 |         &self.0 | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn into_inner(self) -> HashMap<String, (Arc<Embedder>, Arc<Prompt>)> { | ||||||
|  |         self.0 | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Get the name of the default embedder configuration. |     /// Get the name of the default embedder configuration. | ||||||
|     /// |     /// | ||||||
|     /// The default embedder is determined as follows: |     /// The default embedder is determined as follows: | ||||||
|   | |||||||
| @@ -1,51 +1,119 @@ | |||||||
| use std::collections::{BTreeMap, BTreeSet}; | use std::collections::{BTreeMap, BTreeSet}; | ||||||
|  |  | ||||||
|  | use deserr::{take_cf_content, DeserializeError, Deserr, Sequence}; | ||||||
| use obkv::KvReader; | use obkv::KvReader; | ||||||
| use serde_json::{from_slice, Value}; | use serde_json::{from_slice, Value}; | ||||||
|  |  | ||||||
| use super::Embedding; | use super::Embedding; | ||||||
|  | use crate::index::IndexEmbeddingConfig; | ||||||
| use crate::update::del_add::{DelAdd, KvReaderDelAdd}; | use crate::update::del_add::{DelAdd, KvReaderDelAdd}; | ||||||
| use crate::{FieldId, InternalError, UserError}; | use crate::{DocumentId, FieldId, InternalError, UserError}; | ||||||
|  |  | ||||||
| pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors"; | pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors"; | ||||||
|  |  | ||||||
| #[derive(serde::Serialize, serde::Deserialize, Debug)] | #[derive(serde::Serialize, Debug)] | ||||||
| #[serde(untagged)] | #[serde(untagged)] | ||||||
| pub enum Vectors { | pub enum Vectors { | ||||||
|     ImplicitlyUserProvided(VectorOrArrayOfVectors), |     ImplicitlyUserProvided(VectorOrArrayOfVectors), | ||||||
|     Explicit(ExplicitVectors), |     Explicit(ExplicitVectors), | ||||||
| } | } | ||||||
|  |  | ||||||
|  | impl<E: DeserializeError> Deserr<E> for Vectors { | ||||||
|  |     fn deserialize_from_value<V: deserr::IntoValue>( | ||||||
|  |         value: deserr::Value<V>, | ||||||
|  |         location: deserr::ValuePointerRef, | ||||||
|  |     ) -> Result<Self, E> { | ||||||
|  |         match value { | ||||||
|  |             deserr::Value::Sequence(_) | deserr::Value::Null => { | ||||||
|  |                 Ok(Vectors::ImplicitlyUserProvided(VectorOrArrayOfVectors::deserialize_from_value( | ||||||
|  |                     value, location, | ||||||
|  |                 )?)) | ||||||
|  |             } | ||||||
|  |             deserr::Value::Map(_) => { | ||||||
|  |                 Ok(Vectors::Explicit(ExplicitVectors::deserialize_from_value(value, location)?)) | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             value => Err(take_cf_content(E::error( | ||||||
|  |                 None, | ||||||
|  |                 deserr::ErrorKind::IncorrectValueKind { | ||||||
|  |                     actual: value, | ||||||
|  |                     accepted: &[ | ||||||
|  |                         deserr::ValueKind::Sequence, | ||||||
|  |                         deserr::ValueKind::Map, | ||||||
|  |                         deserr::ValueKind::Null, | ||||||
|  |                     ], | ||||||
|  |                 }, | ||||||
|  |                 location, | ||||||
|  |             ))), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| impl Vectors { | impl Vectors { | ||||||
|     pub fn into_array_of_vectors(self) -> Vec<Embedding> { |     pub fn must_regenerate(&self) -> bool { | ||||||
|         match self { |         match self { | ||||||
|             Vectors::ImplicitlyUserProvided(embeddings) |             Vectors::ImplicitlyUserProvided(_) => false, | ||||||
|             | Vectors::Explicit(ExplicitVectors { embeddings, user_provided: _ }) => { |             Vectors::Explicit(ExplicitVectors { regenerate, .. }) => *regenerate, | ||||||
|                 embeddings.into_array_of_vectors().unwrap_or_default() |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn into_array_of_vectors(self) -> Option<Vec<Embedding>> { | ||||||
|  |         match self { | ||||||
|  |             Vectors::ImplicitlyUserProvided(embeddings) => { | ||||||
|  |                 Some(embeddings.into_array_of_vectors().unwrap_or_default()) | ||||||
|  |             } | ||||||
|  |             Vectors::Explicit(ExplicitVectors { embeddings, regenerate: _ }) => { | ||||||
|  |                 embeddings.map(|embeddings| embeddings.into_array_of_vectors().unwrap_or_default()) | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(serde::Serialize, serde::Deserialize, Debug)] | #[derive(serde::Serialize, Deserr, Debug)] | ||||||
| #[serde(rename_all = "camelCase")] | #[serde(rename_all = "camelCase")] | ||||||
| pub struct ExplicitVectors { | pub struct ExplicitVectors { | ||||||
|     pub embeddings: VectorOrArrayOfVectors, |     #[serde(default)] | ||||||
|     pub user_provided: bool, |     #[deserr(default)] | ||||||
|  |     pub embeddings: Option<VectorOrArrayOfVectors>, | ||||||
|  |     pub regenerate: bool, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub enum VectorState { | ||||||
|  |     Inline(Vectors), | ||||||
|  |     Manual, | ||||||
|  |     Generated, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl VectorState { | ||||||
|  |     pub fn must_regenerate(&self) -> bool { | ||||||
|  |         match self { | ||||||
|  |             VectorState::Inline(vectors) => vectors.must_regenerate(), | ||||||
|  |             VectorState::Manual => false, | ||||||
|  |             VectorState::Generated => true, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub enum VectorsState { | ||||||
|  |     NoVectorsFid, | ||||||
|  |     NoVectorsFieldInDocument, | ||||||
|  |     Vectors(BTreeMap<String, Vectors>), | ||||||
| } | } | ||||||
|  |  | ||||||
| pub struct ParsedVectorsDiff { | pub struct ParsedVectorsDiff { | ||||||
|     pub old: Option<BTreeMap<String, Vectors>>, |     old: BTreeMap<String, VectorState>, | ||||||
|     pub new: Option<BTreeMap<String, Vectors>>, |     new: VectorsState, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl ParsedVectorsDiff { | impl ParsedVectorsDiff { | ||||||
|     pub fn new( |     pub fn new( | ||||||
|  |         docid: DocumentId, | ||||||
|  |         embedders_configs: &[IndexEmbeddingConfig], | ||||||
|         documents_diff: KvReader<'_, FieldId>, |         documents_diff: KvReader<'_, FieldId>, | ||||||
|         old_vectors_fid: Option<FieldId>, |         old_vectors_fid: Option<FieldId>, | ||||||
|         new_vectors_fid: Option<FieldId>, |         new_vectors_fid: Option<FieldId>, | ||||||
|     ) -> Result<Self, Error> { |     ) -> Result<Self, Error> { | ||||||
|         let old = match old_vectors_fid |         let mut old = match old_vectors_fid | ||||||
|             .and_then(|vectors_fid| documents_diff.get(vectors_fid)) |             .and_then(|vectors_fid| documents_diff.get(vectors_fid)) | ||||||
|             .map(KvReaderDelAdd::new) |             .map(KvReaderDelAdd::new) | ||||||
|             .map(|obkv| to_vector_map(obkv, DelAdd::Deletion)) |             .map(|obkv| to_vector_map(obkv, DelAdd::Deletion)) | ||||||
| @@ -61,48 +129,84 @@ impl ParsedVectorsDiff { | |||||||
|                 return Err(error); |                 return Err(error); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         .flatten(); |         .flatten().map_or(BTreeMap::default(), |del| del.into_iter().map(|(name, vec)| (name, VectorState::Inline(vec))).collect()); | ||||||
|         let new = new_vectors_fid |         for embedding_config in embedders_configs { | ||||||
|             .and_then(|vectors_fid| documents_diff.get(vectors_fid)) |             if embedding_config.user_provided.contains(docid) { | ||||||
|             .map(KvReaderDelAdd::new) |                 old.entry(embedding_config.name.to_string()).or_insert(VectorState::Manual); | ||||||
|             .map(|obkv| to_vector_map(obkv, DelAdd::Addition)) |             } | ||||||
|             .transpose()? |         } | ||||||
|             .flatten(); |  | ||||||
|  |         let new = 'new: { | ||||||
|  |             let Some(new_vectors_fid) = new_vectors_fid else { | ||||||
|  |                 break 'new VectorsState::NoVectorsFid; | ||||||
|  |             }; | ||||||
|  |             let Some(bytes) = documents_diff.get(new_vectors_fid) else { | ||||||
|  |                 break 'new VectorsState::NoVectorsFieldInDocument; | ||||||
|  |             }; | ||||||
|  |             let obkv = KvReaderDelAdd::new(bytes); | ||||||
|  |             match to_vector_map(obkv, DelAdd::Addition)? { | ||||||
|  |                 Some(new) => VectorsState::Vectors(new), | ||||||
|  |                 None => VectorsState::NoVectorsFieldInDocument, | ||||||
|  |             } | ||||||
|  |         }; | ||||||
|  |  | ||||||
|         Ok(Self { old, new }) |         Ok(Self { old, new }) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn remove(&mut self, embedder_name: &str) -> (Option<Vectors>, Option<Vectors>) { |     pub fn remove(&mut self, embedder_name: &str) -> (VectorState, VectorState) { | ||||||
|         let old = self.old.as_mut().and_then(|old| old.remove(embedder_name)); |         let old = self.old.remove(embedder_name).unwrap_or(VectorState::Generated); | ||||||
|         let new = self.new.as_mut().and_then(|new| new.remove(embedder_name)); |         let state_from_old = match old { | ||||||
|  |             // assume a userProvided is still userProvided | ||||||
|  |             VectorState::Manual => VectorState::Manual, | ||||||
|  |             // generated is still generated | ||||||
|  |             VectorState::Generated => VectorState::Generated, | ||||||
|  |             // weird case that shouldn't happen were the previous docs version is inline, | ||||||
|  |             // but it was removed in the new version | ||||||
|  |             // Since it is not in the new version, we switch to generated | ||||||
|  |             VectorState::Inline(_) => VectorState::Generated, | ||||||
|  |         }; | ||||||
|  |         let new = match &mut self.new { | ||||||
|  |             VectorsState::Vectors(new) => { | ||||||
|  |                 new.remove(embedder_name).map(VectorState::Inline).unwrap_or(state_from_old) | ||||||
|  |             } | ||||||
|  |             _ => | ||||||
|  |             // if no `_vectors` field is present in the new document, | ||||||
|  |             // the state depends on the previous version of the document | ||||||
|  |             { | ||||||
|  |                 state_from_old | ||||||
|  |             } | ||||||
|  |         }; | ||||||
|  |  | ||||||
|         (old, new) |         (old, new) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| pub struct ParsedVectors(pub BTreeMap<String, Vectors>); | pub struct ParsedVectors(pub BTreeMap<String, Vectors>); | ||||||
|  |  | ||||||
| impl ParsedVectors { | impl<E: DeserializeError> Deserr<E> for ParsedVectors { | ||||||
|     pub fn from_bytes(value: &[u8]) -> Result<Self, Error> { |     fn deserialize_from_value<V: deserr::IntoValue>( | ||||||
|         let Ok(value) = from_slice(value) else { |         value: deserr::Value<V>, | ||||||
|             let value = from_slice(value).map_err(Error::InternalSerdeJson)?; |         location: deserr::ValuePointerRef, | ||||||
|             return Err(Error::InvalidMap(value)); |     ) -> Result<Self, E> { | ||||||
|         }; |         let value = <BTreeMap<String, Vectors>>::deserialize_from_value(value, location)?; | ||||||
|         Ok(ParsedVectors(value)) |         Ok(ParsedVectors(value)) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn retain_user_provided_vectors(&mut self, embedders: &BTreeSet<String>) { |  | ||||||
|         self.0.retain(|k, v| match v { |  | ||||||
|             Vectors::ImplicitlyUserProvided(_) => true, |  | ||||||
|             Vectors::Explicit(ExplicitVectors { embeddings: _, user_provided }) => { |  | ||||||
|                 *user_provided |  | ||||||
|                 // if the embedder is not in the config, then never touch it |  | ||||||
|                 || !embedders.contains(k) |  | ||||||
| } | } | ||||||
|         }); |  | ||||||
|  | impl ParsedVectors { | ||||||
|  |     pub fn from_bytes(value: &[u8]) -> Result<Self, Error> { | ||||||
|  |         let value: serde_json::Value = from_slice(value).map_err(Error::InternalSerdeJson)?; | ||||||
|  |         deserr::deserialize(value).map_err(|error| Error::InvalidEmbedderConf { error }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn retain_not_embedded_vectors(&mut self, embedders: &BTreeSet<String>) { | ||||||
|  |         self.0.retain(|k, _v| !embedders.contains(k)) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| pub enum Error { | pub enum Error { | ||||||
|     InvalidMap(Value), |     InvalidMap(Value), | ||||||
|  |     InvalidEmbedderConf { error: deserr::errors::JsonError }, | ||||||
|     InternalSerdeJson(serde_json::Error), |     InternalSerdeJson(serde_json::Error), | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -112,6 +216,12 @@ impl Error { | |||||||
|             Error::InvalidMap(value) => { |             Error::InvalidMap(value) => { | ||||||
|                 crate::Error::UserError(UserError::InvalidVectorsMapType { document_id, value }) |                 crate::Error::UserError(UserError::InvalidVectorsMapType { document_id, value }) | ||||||
|             } |             } | ||||||
|  |             Error::InvalidEmbedderConf { error } => { | ||||||
|  |                 crate::Error::UserError(UserError::InvalidVectorsEmbedderConf { | ||||||
|  |                     document_id, | ||||||
|  |                     error, | ||||||
|  |                 }) | ||||||
|  |             } | ||||||
|             Error::InternalSerdeJson(error) => { |             Error::InternalSerdeJson(error) => { | ||||||
|                 crate::Error::InternalError(InternalError::SerdeJson(error)) |                 crate::Error::InternalError(InternalError::SerdeJson(error)) | ||||||
|             } |             } | ||||||
| @@ -132,13 +242,84 @@ fn to_vector_map( | |||||||
| } | } | ||||||
|  |  | ||||||
| /// Represents either a vector or an array of multiple vectors. | /// Represents either a vector or an array of multiple vectors. | ||||||
| #[derive(serde::Serialize, serde::Deserialize, Debug)] | #[derive(serde::Serialize, Debug)] | ||||||
| #[serde(transparent)] | #[serde(transparent)] | ||||||
| pub struct VectorOrArrayOfVectors { | pub struct VectorOrArrayOfVectors { | ||||||
|     #[serde(with = "either::serde_untagged_optional")] |     #[serde(with = "either::serde_untagged_optional")] | ||||||
|     inner: Option<either::Either<Vec<Embedding>, Embedding>>, |     inner: Option<either::Either<Vec<Embedding>, Embedding>>, | ||||||
| } | } | ||||||
|  |  | ||||||
|  | impl<E: DeserializeError> Deserr<E> for VectorOrArrayOfVectors { | ||||||
|  |     fn deserialize_from_value<V: deserr::IntoValue>( | ||||||
|  |         value: deserr::Value<V>, | ||||||
|  |         location: deserr::ValuePointerRef, | ||||||
|  |     ) -> Result<Self, E> { | ||||||
|  |         match value { | ||||||
|  |             deserr::Value::Null => Ok(VectorOrArrayOfVectors { inner: None }), | ||||||
|  |             deserr::Value::Sequence(seq) => { | ||||||
|  |                 let mut iter = seq.into_iter(); | ||||||
|  |                 match iter.next().map(|v| v.into_value()) { | ||||||
|  |                     None => { | ||||||
|  |                         // With the strange way serde serialize the `Either`, we must send the left part | ||||||
|  |                         // otherwise it'll consider we returned [[]] | ||||||
|  |                         Ok(VectorOrArrayOfVectors { inner: Some(either::Either::Left(Vec::new())) }) | ||||||
|  |                     } | ||||||
|  |                     Some(val @ deserr::Value::Sequence(_)) => { | ||||||
|  |                         let first = Embedding::deserialize_from_value(val, location.push_index(0))?; | ||||||
|  |                         let mut collect = vec![first]; | ||||||
|  |                         let mut tail = iter | ||||||
|  |                             .enumerate() | ||||||
|  |                             .map(|(i, v)| { | ||||||
|  |                                 Embedding::deserialize_from_value( | ||||||
|  |                                     v.into_value(), | ||||||
|  |                                     location.push_index(i + 1), | ||||||
|  |                                 ) | ||||||
|  |                             }) | ||||||
|  |                             .collect::<Result<Vec<_>, _>>()?; | ||||||
|  |                         collect.append(&mut tail); | ||||||
|  |  | ||||||
|  |                         Ok(VectorOrArrayOfVectors { inner: Some(either::Either::Left(collect)) }) | ||||||
|  |                     } | ||||||
|  |                     Some( | ||||||
|  |                         val @ deserr::Value::Integer(_) | ||||||
|  |                         | val @ deserr::Value::NegativeInteger(_) | ||||||
|  |                         | val @ deserr::Value::Float(_), | ||||||
|  |                     ) => { | ||||||
|  |                         let first = <f32>::deserialize_from_value(val, location.push_index(0))?; | ||||||
|  |                         let mut embedding = iter | ||||||
|  |                             .enumerate() | ||||||
|  |                             .map(|(i, v)| { | ||||||
|  |                                 <f32>::deserialize_from_value( | ||||||
|  |                                     v.into_value(), | ||||||
|  |                                     location.push_index(i + 1), | ||||||
|  |                                 ) | ||||||
|  |                             }) | ||||||
|  |                             .collect::<Result<Vec<_>, _>>()?; | ||||||
|  |                         embedding.insert(0, first); | ||||||
|  |                         Ok(VectorOrArrayOfVectors { inner: Some(either::Either::Right(embedding)) }) | ||||||
|  |                     } | ||||||
|  |                     Some(value) => Err(take_cf_content(E::error( | ||||||
|  |                         None, | ||||||
|  |                         deserr::ErrorKind::IncorrectValueKind { | ||||||
|  |                             actual: value, | ||||||
|  |                             accepted: &[deserr::ValueKind::Sequence, deserr::ValueKind::Float], | ||||||
|  |                         }, | ||||||
|  |                         location.push_index(0), | ||||||
|  |                     ))), | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             value => Err(take_cf_content(E::error( | ||||||
|  |                 None, | ||||||
|  |                 deserr::ErrorKind::IncorrectValueKind { | ||||||
|  |                     actual: value, | ||||||
|  |                     accepted: &[deserr::ValueKind::Sequence, deserr::ValueKind::Null], | ||||||
|  |                 }, | ||||||
|  |                 location, | ||||||
|  |             ))), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| impl VectorOrArrayOfVectors { | impl VectorOrArrayOfVectors { | ||||||
|     pub fn into_array_of_vectors(self) -> Option<Vec<Embedding>> { |     pub fn into_array_of_vectors(self) -> Option<Vec<Embedding>> { | ||||||
|         match self.inner? { |         match self.inner? { | ||||||
| @@ -150,21 +331,41 @@ impl VectorOrArrayOfVectors { | |||||||
|     pub fn from_array_of_vectors(array_of_vec: Vec<Embedding>) -> Self { |     pub fn from_array_of_vectors(array_of_vec: Vec<Embedding>) -> Self { | ||||||
|         Self { inner: Some(either::Either::Left(array_of_vec)) } |         Self { inner: Some(either::Either::Left(array_of_vec)) } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn from_vector(vec: Embedding) -> Self { | ||||||
|  |         Self { inner: Some(either::Either::Right(vec)) } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl From<Embedding> for VectorOrArrayOfVectors { | ||||||
|  |     fn from(vec: Embedding) -> Self { | ||||||
|  |         Self::from_vector(vec) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl From<Vec<Embedding>> for VectorOrArrayOfVectors { | ||||||
|  |     fn from(vec: Vec<Embedding>) -> Self { | ||||||
|  |         Self::from_array_of_vectors(vec) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[cfg(test)] | #[cfg(test)] | ||||||
| mod test { | mod test { | ||||||
|     use super::VectorOrArrayOfVectors; |     use super::VectorOrArrayOfVectors; | ||||||
|  |  | ||||||
|  |     fn embedding_from_str(s: &str) -> Result<VectorOrArrayOfVectors, deserr::errors::JsonError> { | ||||||
|  |         let value: serde_json::Value = serde_json::from_str(s).unwrap(); | ||||||
|  |         deserr::deserialize(value) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
|     fn array_of_vectors() { |     fn array_of_vectors() { | ||||||
|         let null: VectorOrArrayOfVectors = serde_json::from_str("null").unwrap(); |         let null = embedding_from_str("null").unwrap(); | ||||||
|         let empty: VectorOrArrayOfVectors = serde_json::from_str("[]").unwrap(); |         let empty = embedding_from_str("[]").unwrap(); | ||||||
|         let one: VectorOrArrayOfVectors = serde_json::from_str("[0.1]").unwrap(); |         let one = embedding_from_str("[0.1]").unwrap(); | ||||||
|         let two: VectorOrArrayOfVectors = serde_json::from_str("[0.1, 0.2]").unwrap(); |         let two = embedding_from_str("[0.1, 0.2]").unwrap(); | ||||||
|         let one_vec: VectorOrArrayOfVectors = serde_json::from_str("[[0.1, 0.2]]").unwrap(); |         let one_vec = embedding_from_str("[[0.1, 0.2]]").unwrap(); | ||||||
|         let two_vecs: VectorOrArrayOfVectors = |         let two_vecs = embedding_from_str("[[0.1, 0.2], [0.3, 0.4]]").unwrap(); | ||||||
|             serde_json::from_str("[[0.1, 0.2], [0.3, 0.4]]").unwrap(); |  | ||||||
|  |  | ||||||
|         insta::assert_json_snapshot!(null.into_array_of_vectors(), @"null"); |         insta::assert_json_snapshot!(null.into_array_of_vectors(), @"null"); | ||||||
|         insta::assert_json_snapshot!(empty.into_array_of_vectors(), @"[]"); |         insta::assert_json_snapshot!(empty.into_array_of_vectors(), @"[]"); | ||||||
|   | |||||||
| @@ -1,4 +1,5 @@ | |||||||
| use deserr::Deserr; | use deserr::Deserr; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
| use serde::{Deserialize, Serialize}; | use serde::{Deserialize, Serialize}; | ||||||
|  |  | ||||||
| use super::rest::InputType; | use super::rest::InputType; | ||||||
| @@ -72,6 +73,238 @@ pub fn check_unset<T>( | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /// Indicates what action should take place during a reindexing operation for an embedder | ||||||
|  | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] | ||||||
|  | pub enum ReindexAction { | ||||||
|  |     /// An indexing operation should take place for this embedder, keeping existing vectors | ||||||
|  |     /// and checking whether the document template changed or not | ||||||
|  |     RegeneratePrompts, | ||||||
|  |     /// An indexing operation should take place for all documents for this embedder, removing existing vectors | ||||||
|  |     /// (except userProvided ones) | ||||||
|  |     FullReindex, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub enum SettingsDiff { | ||||||
|  |     Remove, | ||||||
|  |     Reindex { action: ReindexAction, updated_settings: EmbeddingSettings }, | ||||||
|  |     UpdateWithoutReindex { updated_settings: EmbeddingSettings }, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub enum EmbedderAction { | ||||||
|  |     WriteBackToDocuments(WriteBackToDocuments), | ||||||
|  |     Reindex(ReindexAction), | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub struct WriteBackToDocuments { | ||||||
|  |     pub embedder_id: u8, | ||||||
|  |     pub user_provided: RoaringBitmap, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl SettingsDiff { | ||||||
|  |     pub fn from_settings(old: EmbeddingSettings, new: Setting<EmbeddingSettings>) -> Self { | ||||||
|  |         match new { | ||||||
|  |             Setting::Set(new) => { | ||||||
|  |                 let EmbeddingSettings { | ||||||
|  |                     mut source, | ||||||
|  |                     mut model, | ||||||
|  |                     mut revision, | ||||||
|  |                     mut api_key, | ||||||
|  |                     mut dimensions, | ||||||
|  |                     mut document_template, | ||||||
|  |                     mut url, | ||||||
|  |                     mut query, | ||||||
|  |                     mut input_field, | ||||||
|  |                     mut path_to_embeddings, | ||||||
|  |                     mut embedding_object, | ||||||
|  |                     mut input_type, | ||||||
|  |                     mut distribution, | ||||||
|  |                 } = old; | ||||||
|  |  | ||||||
|  |                 let EmbeddingSettings { | ||||||
|  |                     source: new_source, | ||||||
|  |                     model: new_model, | ||||||
|  |                     revision: new_revision, | ||||||
|  |                     api_key: new_api_key, | ||||||
|  |                     dimensions: new_dimensions, | ||||||
|  |                     document_template: new_document_template, | ||||||
|  |                     url: new_url, | ||||||
|  |                     query: new_query, | ||||||
|  |                     input_field: new_input_field, | ||||||
|  |                     path_to_embeddings: new_path_to_embeddings, | ||||||
|  |                     embedding_object: new_embedding_object, | ||||||
|  |                     input_type: new_input_type, | ||||||
|  |                     distribution: new_distribution, | ||||||
|  |                 } = new; | ||||||
|  |  | ||||||
|  |                 let mut reindex_action = None; | ||||||
|  |  | ||||||
|  |                 // **Warning**: do not use short-circuiting || here, we want all these operations applied | ||||||
|  |                 if source.apply(new_source) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                     // when the source changes, we need to reapply the default settings for the new source | ||||||
|  |                     apply_default_for_source( | ||||||
|  |                         &source, | ||||||
|  |                         &mut model, | ||||||
|  |                         &mut revision, | ||||||
|  |                         &mut dimensions, | ||||||
|  |                         &mut url, | ||||||
|  |                         &mut query, | ||||||
|  |                         &mut input_field, | ||||||
|  |                         &mut path_to_embeddings, | ||||||
|  |                         &mut embedding_object, | ||||||
|  |                         &mut input_type, | ||||||
|  |                         &mut document_template, | ||||||
|  |                     ) | ||||||
|  |                 } | ||||||
|  |                 if model.apply(new_model) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|  |                 if revision.apply(new_revision) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|  |                 if dimensions.apply(new_dimensions) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|  |                 if url.apply(new_url) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|  |                 if query.apply(new_query) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|  |                 if input_field.apply(new_input_field) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|  |                 if path_to_embeddings.apply(new_path_to_embeddings) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|  |                 if embedding_object.apply(new_embedding_object) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|  |                 if input_type.apply(new_input_type) { | ||||||
|  |                     ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); | ||||||
|  |                 } | ||||||
|  |                 if document_template.apply(new_document_template) { | ||||||
|  |                     ReindexAction::push_action( | ||||||
|  |                         &mut reindex_action, | ||||||
|  |                         ReindexAction::RegeneratePrompts, | ||||||
|  |                     ); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 distribution.apply(new_distribution); | ||||||
|  |                 api_key.apply(new_api_key); | ||||||
|  |  | ||||||
|  |                 let updated_settings = EmbeddingSettings { | ||||||
|  |                     source, | ||||||
|  |                     model, | ||||||
|  |                     revision, | ||||||
|  |                     api_key, | ||||||
|  |                     dimensions, | ||||||
|  |                     document_template, | ||||||
|  |                     url, | ||||||
|  |                     query, | ||||||
|  |                     input_field, | ||||||
|  |                     path_to_embeddings, | ||||||
|  |                     embedding_object, | ||||||
|  |                     input_type, | ||||||
|  |                     distribution, | ||||||
|  |                 }; | ||||||
|  |  | ||||||
|  |                 match reindex_action { | ||||||
|  |                     Some(action) => Self::Reindex { action, updated_settings }, | ||||||
|  |                     None => Self::UpdateWithoutReindex { updated_settings }, | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             Setting::Reset => Self::Remove, | ||||||
|  |             Setting::NotSet => Self::UpdateWithoutReindex { updated_settings: old }, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl ReindexAction { | ||||||
|  |     fn push_action(this: &mut Option<Self>, other: Self) { | ||||||
|  |         *this = match (*this, other) { | ||||||
|  |             (_, ReindexAction::FullReindex) => Some(ReindexAction::FullReindex), | ||||||
|  |             (Some(ReindexAction::FullReindex), _) => Some(ReindexAction::FullReindex), | ||||||
|  |             (_, ReindexAction::RegeneratePrompts) => Some(ReindexAction::RegeneratePrompts), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[allow(clippy::too_many_arguments)] // private function | ||||||
|  | fn apply_default_for_source( | ||||||
|  |     source: &Setting<EmbedderSource>, | ||||||
|  |     model: &mut Setting<String>, | ||||||
|  |     revision: &mut Setting<String>, | ||||||
|  |     dimensions: &mut Setting<usize>, | ||||||
|  |     url: &mut Setting<String>, | ||||||
|  |     query: &mut Setting<serde_json::Value>, | ||||||
|  |     input_field: &mut Setting<Vec<String>>, | ||||||
|  |     path_to_embeddings: &mut Setting<Vec<String>>, | ||||||
|  |     embedding_object: &mut Setting<Vec<String>>, | ||||||
|  |     input_type: &mut Setting<InputType>, | ||||||
|  |     document_template: &mut Setting<String>, | ||||||
|  | ) { | ||||||
|  |     match source { | ||||||
|  |         Setting::Set(EmbedderSource::HuggingFace) => { | ||||||
|  |             *model = Setting::Reset; | ||||||
|  |             *revision = Setting::Reset; | ||||||
|  |             *dimensions = Setting::NotSet; | ||||||
|  |             *url = Setting::NotSet; | ||||||
|  |             *query = Setting::NotSet; | ||||||
|  |             *input_field = Setting::NotSet; | ||||||
|  |             *path_to_embeddings = Setting::NotSet; | ||||||
|  |             *embedding_object = Setting::NotSet; | ||||||
|  |             *input_type = Setting::NotSet; | ||||||
|  |         } | ||||||
|  |         Setting::Set(EmbedderSource::Ollama) => { | ||||||
|  |             *model = Setting::Reset; | ||||||
|  |             *revision = Setting::NotSet; | ||||||
|  |             *dimensions = Setting::Reset; | ||||||
|  |             *url = Setting::NotSet; | ||||||
|  |             *query = Setting::NotSet; | ||||||
|  |             *input_field = Setting::NotSet; | ||||||
|  |             *path_to_embeddings = Setting::NotSet; | ||||||
|  |             *embedding_object = Setting::NotSet; | ||||||
|  |             *input_type = Setting::NotSet; | ||||||
|  |         } | ||||||
|  |         Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => { | ||||||
|  |             *model = Setting::Reset; | ||||||
|  |             *revision = Setting::NotSet; | ||||||
|  |             *dimensions = Setting::NotSet; | ||||||
|  |             *url = Setting::NotSet; | ||||||
|  |             *query = Setting::NotSet; | ||||||
|  |             *input_field = Setting::NotSet; | ||||||
|  |             *path_to_embeddings = Setting::NotSet; | ||||||
|  |             *embedding_object = Setting::NotSet; | ||||||
|  |             *input_type = Setting::NotSet; | ||||||
|  |         } | ||||||
|  |         Setting::Set(EmbedderSource::Rest) => { | ||||||
|  |             *model = Setting::NotSet; | ||||||
|  |             *revision = Setting::NotSet; | ||||||
|  |             *dimensions = Setting::Reset; | ||||||
|  |             *url = Setting::Reset; | ||||||
|  |             *query = Setting::Reset; | ||||||
|  |             *input_field = Setting::Reset; | ||||||
|  |             *path_to_embeddings = Setting::Reset; | ||||||
|  |             *embedding_object = Setting::Reset; | ||||||
|  |             *input_type = Setting::Reset; | ||||||
|  |         } | ||||||
|  |         Setting::Set(EmbedderSource::UserProvided) => { | ||||||
|  |             *model = Setting::NotSet; | ||||||
|  |             *revision = Setting::NotSet; | ||||||
|  |             *dimensions = Setting::Reset; | ||||||
|  |             *url = Setting::NotSet; | ||||||
|  |             *query = Setting::NotSet; | ||||||
|  |             *input_field = Setting::NotSet; | ||||||
|  |             *path_to_embeddings = Setting::NotSet; | ||||||
|  |             *embedding_object = Setting::NotSet; | ||||||
|  |             *input_type = Setting::NotSet; | ||||||
|  |             *document_template = Setting::NotSet; | ||||||
|  |         } | ||||||
|  |         Setting::NotSet => {} | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| pub fn check_set<T>( | pub fn check_set<T>( | ||||||
|     key: &Setting<T>, |     key: &Setting<T>, | ||||||
|     field: &'static str, |     field: &'static str, | ||||||
| @@ -210,66 +443,6 @@ impl EmbeddingSettings { | |||||||
|             *model = Setting::Set(openai::EmbeddingModel::default().name().to_owned()) |             *model = Setting::Set(openai::EmbeddingModel::default().name().to_owned()) | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub(crate) fn apply_and_need_reindex( |  | ||||||
|         old: &mut Setting<EmbeddingSettings>, |  | ||||||
|         new: Setting<EmbeddingSettings>, |  | ||||||
|     ) -> bool { |  | ||||||
|         match (old, new) { |  | ||||||
|             ( |  | ||||||
|                 Setting::Set(EmbeddingSettings { |  | ||||||
|                     source: old_source, |  | ||||||
|                     model: old_model, |  | ||||||
|                     revision: old_revision, |  | ||||||
|                     api_key: old_api_key, |  | ||||||
|                     dimensions: old_dimensions, |  | ||||||
|                     document_template: old_document_template, |  | ||||||
|                     url: old_url, |  | ||||||
|                     query: old_query, |  | ||||||
|                     input_field: old_input_field, |  | ||||||
|                     path_to_embeddings: old_path_to_embeddings, |  | ||||||
|                     embedding_object: old_embedding_object, |  | ||||||
|                     input_type: old_input_type, |  | ||||||
|                     distribution: old_distribution, |  | ||||||
|                 }), |  | ||||||
|                 Setting::Set(EmbeddingSettings { |  | ||||||
|                     source: new_source, |  | ||||||
|                     model: new_model, |  | ||||||
|                     revision: new_revision, |  | ||||||
|                     api_key: new_api_key, |  | ||||||
|                     dimensions: new_dimensions, |  | ||||||
|                     document_template: new_document_template, |  | ||||||
|                     url: new_url, |  | ||||||
|                     query: new_query, |  | ||||||
|                     input_field: new_input_field, |  | ||||||
|                     path_to_embeddings: new_path_to_embeddings, |  | ||||||
|                     embedding_object: new_embedding_object, |  | ||||||
|                     input_type: new_input_type, |  | ||||||
|                     distribution: new_distribution, |  | ||||||
|                 }), |  | ||||||
|             ) => { |  | ||||||
|                 let mut needs_reindex = false; |  | ||||||
|  |  | ||||||
|                 needs_reindex |= old_source.apply(new_source); |  | ||||||
|                 needs_reindex |= old_model.apply(new_model); |  | ||||||
|                 needs_reindex |= old_revision.apply(new_revision); |  | ||||||
|                 needs_reindex |= old_dimensions.apply(new_dimensions); |  | ||||||
|                 needs_reindex |= old_document_template.apply(new_document_template); |  | ||||||
|                 needs_reindex |= old_url.apply(new_url); |  | ||||||
|                 needs_reindex |= old_query.apply(new_query); |  | ||||||
|                 needs_reindex |= old_input_field.apply(new_input_field); |  | ||||||
|                 needs_reindex |= old_path_to_embeddings.apply(new_path_to_embeddings); |  | ||||||
|                 needs_reindex |= old_embedding_object.apply(new_embedding_object); |  | ||||||
|                 needs_reindex |= old_input_type.apply(new_input_type); |  | ||||||
|  |  | ||||||
|                 old_distribution.apply(new_distribution); |  | ||||||
|                 old_api_key.apply(new_api_key); |  | ||||||
|                 needs_reindex |  | ||||||
|             } |  | ||||||
|             (Setting::Reset, Setting::Reset) | (_, Setting::NotSet) => false, |  | ||||||
|             _ => true, |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)] | #[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)] | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user