mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-13 07:57:02 +00:00
Compare commits
17 Commits
stream-dum
...
tmp-v1.8.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ee99196c92 | ||
|
|
aeef2bae33 | ||
|
|
7f7d2d0449 | ||
|
|
e100292417 | ||
|
|
ba75d23bfe | ||
|
|
7fbb3bf8e8 | ||
|
|
9066a446a3 | ||
|
|
f762307838 | ||
|
|
3e94a90722 | ||
|
|
fc7e817221 | ||
|
|
0f78703b85 | ||
|
|
c668043c4f | ||
|
|
5a305bfdea | ||
|
|
f4dd73ec8c | ||
|
|
66dce4600d | ||
|
|
fe51ceca6d | ||
|
|
88174b8ae4 |
38
Cargo.lock
generated
38
Cargo.lock
generated
@@ -494,7 +494,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "benchmarks"
|
name = "benchmarks"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bytes",
|
"bytes",
|
||||||
@@ -639,7 +639,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "build-info"
|
name = "build-info"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"time",
|
"time",
|
||||||
@@ -889,9 +889,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "charabia"
|
name = "charabia"
|
||||||
version = "0.8.9"
|
version = "0.8.10"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f6a65052f308636e5d5e1777f0dbc07919f5fbac24b6c8ad3e140472e5520de9"
|
checksum = "933f20f2269b24d32fd5503e7b3c268af902190daf8d9d2b73ed2e75d77c00b4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick",
|
"aho-corasick",
|
||||||
"cow-utils",
|
"cow-utils",
|
||||||
@@ -1539,7 +1539,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dump"
|
name = "dump"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"big_s",
|
"big_s",
|
||||||
@@ -1787,7 +1787,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "file-store"
|
name = "file-store"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"faux",
|
"faux",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
@@ -1810,7 +1810,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "filter-parser"
|
name = "filter-parser"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"nom",
|
"nom",
|
||||||
@@ -1830,7 +1830,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flatten-serde-json"
|
name = "flatten-serde-json"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -1948,7 +1948,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fuzzers"
|
name = "fuzzers"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arbitrary",
|
"arbitrary",
|
||||||
"clap",
|
"clap",
|
||||||
@@ -2442,7 +2442,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "index-scheduler"
|
name = "index-scheduler"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"big_s",
|
"big_s",
|
||||||
@@ -2638,7 +2638,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "json-depth-checker"
|
name = "json-depth-checker"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -3275,7 +3275,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meili-snap"
|
name = "meili-snap"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"md5",
|
"md5",
|
||||||
@@ -3284,7 +3284,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch"
|
name = "meilisearch"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-cors",
|
"actix-cors",
|
||||||
"actix-http",
|
"actix-http",
|
||||||
@@ -3377,7 +3377,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-auth"
|
name = "meilisearch-auth"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64 0.21.7",
|
"base64 0.21.7",
|
||||||
"enum-iterator",
|
"enum-iterator",
|
||||||
@@ -3396,7 +3396,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-types"
|
name = "meilisearch-types"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-web",
|
"actix-web",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
@@ -3426,7 +3426,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilitool"
|
name = "meilitool"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"clap",
|
"clap",
|
||||||
@@ -3465,7 +3465,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "milli"
|
name = "milli"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arroy",
|
"arroy",
|
||||||
"big_s",
|
"big_s",
|
||||||
@@ -3906,7 +3906,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "permissive-json-pointer"
|
name = "permissive-json-pointer"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"big_s",
|
"big_s",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -6074,7 +6074,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "xtask"
|
name = "xtask"
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"build-info",
|
"build-info",
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ members = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "1.8.0"
|
version = "1.8.1"
|
||||||
authors = [
|
authors = [
|
||||||
"Quentin de Quelen <quentin@dequelen.me>",
|
"Quentin de Quelen <quentin@dequelen.me>",
|
||||||
"Clément Renault <clement@meilisearch.com>",
|
"Clément Renault <clement@meilisearch.com>",
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ use serde::{Deserialize, Serialize};
|
|||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
mod error;
|
mod error;
|
||||||
mod new_writer;
|
|
||||||
mod reader;
|
mod reader;
|
||||||
mod writer;
|
mod writer;
|
||||||
|
|
||||||
|
|||||||
@@ -1,251 +0,0 @@
|
|||||||
use std::fs::File;
|
|
||||||
use std::io::{Read, Seek, Write};
|
|
||||||
use std::path::Path;
|
|
||||||
use std::result::Result as StdResult;
|
|
||||||
|
|
||||||
use flate2::write::GzEncoder;
|
|
||||||
use flate2::Compression;
|
|
||||||
use meilisearch_types::milli::documents::{
|
|
||||||
obkv_to_object, DocumentsBatchCursor, DocumentsBatchIndex, DocumentsBatchReader,
|
|
||||||
};
|
|
||||||
use tar::{Builder as TarBuilder, Header};
|
|
||||||
use time::OffsetDateTime;
|
|
||||||
use uuid::Uuid;
|
|
||||||
|
|
||||||
use crate::{Key, Metadata, Result, TaskId, CURRENT_DUMP_VERSION};
|
|
||||||
|
|
||||||
pub struct DumpWriter<W: Write> {
|
|
||||||
tar: TarBuilder<GzEncoder<W>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<W: Write> DumpWriter<W> {
|
|
||||||
pub fn new(instance_uuid: Option<Uuid>, writer: W) -> Result<Self> {
|
|
||||||
/// TODO: should we use a BuffWriter?
|
|
||||||
let gz_encoder = GzEncoder::new(writer, Compression::default());
|
|
||||||
let mut tar = TarBuilder::new(gz_encoder);
|
|
||||||
|
|
||||||
let mut header = Header::new_gnu();
|
|
||||||
|
|
||||||
// Append metadata into metadata.json.
|
|
||||||
let metadata = Metadata {
|
|
||||||
dump_version: CURRENT_DUMP_VERSION,
|
|
||||||
db_version: env!("CARGO_PKG_VERSION").to_string(),
|
|
||||||
dump_date: OffsetDateTime::now_utc(),
|
|
||||||
};
|
|
||||||
let data = serde_json::to_string(&metadata).unwrap();
|
|
||||||
header.set_size(data.len() as u64);
|
|
||||||
tar.append_data(&mut header, "metadata.json", data.as_bytes()).unwrap();
|
|
||||||
|
|
||||||
// Append instance uid into instance_uid.uuid.
|
|
||||||
if let Some(instance_uuid) = instance_uuid {
|
|
||||||
let data = instance_uuid.as_hyphenated().to_string();
|
|
||||||
header.set_size(data.len() as u64);
|
|
||||||
tar.append_data(&mut header, "instance_uid.uuid", data.as_bytes()).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Self { tar })
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn dump_keys(&mut self, keys: &[Key]) -> Result<()> {
|
|
||||||
let mut buffer = Vec::new();
|
|
||||||
for key in keys {
|
|
||||||
serde_json::to_writer(&mut buffer, key)?;
|
|
||||||
buffer.push(b'\n');
|
|
||||||
}
|
|
||||||
let mut header = Header::new_gnu();
|
|
||||||
header.set_path("keys.jsonl");
|
|
||||||
header.set_size(buffer.len() as u64);
|
|
||||||
|
|
||||||
self.tar.append(&mut header, buffer.as_slice())?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn create_tasks(&mut self) -> Result<FileWriter<W>> {
|
|
||||||
FileWriter::new(&mut self.tar, "tasks/queue.jsonl")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn dump_update_file<R: Read + Seek>(
|
|
||||||
&mut self,
|
|
||||||
task_uid: TaskId,
|
|
||||||
update_file: DocumentsBatchReader<R>,
|
|
||||||
) -> Result<()> {
|
|
||||||
let path = format!("tasks/update_files/{}.jsonl", task_uid);
|
|
||||||
let mut fw = FileWriter::new(&mut self.tar, path)?;
|
|
||||||
let mut serializer = UpdateFileSerializer::new(update_file);
|
|
||||||
fw.calculate_len(SerializerIteratorReader::new(&mut serializer))?;
|
|
||||||
serializer.reset();
|
|
||||||
fw.write_data(SerializerIteratorReader::new(&mut serializer))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
trait SerializerIterator {
|
|
||||||
fn next_serialize_into(&mut self, buffer: &mut Vec<u8>) -> StdResult<bool, std::io::Error>;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct SerializerIteratorReader<'i, I: SerializerIterator> {
|
|
||||||
iterator: &'i mut I,
|
|
||||||
buffer: Vec<u8>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<I: SerializerIterator> Read for SerializerIteratorReader<'_, I> {
|
|
||||||
fn read(&mut self, buf: &mut [u8]) -> StdResult<usize, std::io::Error> {
|
|
||||||
let mut size = 0;
|
|
||||||
loop {
|
|
||||||
// if the inner buffer is empty, fill it with a new document.
|
|
||||||
if self.buffer.is_empty() {
|
|
||||||
if !self.iterator.next_serialize_into(&mut self.buffer)? {
|
|
||||||
// nothing more to write, return the written size.
|
|
||||||
return Ok(size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let doc_size = self.buffer.len();
|
|
||||||
let remaining_size = buf[size..].len();
|
|
||||||
if remaining_size < doc_size {
|
|
||||||
// if the serialized document size exceed the buf size,
|
|
||||||
// drain the inner buffer filling the remaining space.
|
|
||||||
buf[size..].copy_from_slice(&self.buffer[..remaining_size]);
|
|
||||||
self.buffer.drain(..remaining_size);
|
|
||||||
|
|
||||||
// then return.
|
|
||||||
return Ok(buf.len());
|
|
||||||
} else {
|
|
||||||
// otherwise write the whole inner buffer into the buf, clear it and continue.
|
|
||||||
buf[size..][..doc_size].copy_from_slice(&self.buffer);
|
|
||||||
size += doc_size;
|
|
||||||
self.buffer.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'i, I: SerializerIterator> SerializerIteratorReader<'i, I> {
|
|
||||||
fn new(iterator: &'i mut I) -> Self {
|
|
||||||
Self { iterator, buffer: Vec::new() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct UpdateFileSerializer<R: Read> {
|
|
||||||
cursor: DocumentsBatchCursor<R>,
|
|
||||||
documents_batch_index: DocumentsBatchIndex,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<R: Read + Seek> SerializerIterator for UpdateFileSerializer<R> {
|
|
||||||
fn next_serialize_into(&mut self, buffer: &mut Vec<u8>) -> StdResult<bool, std::io::Error> {
|
|
||||||
/// TODO: don't unwrap, original version: `cursor.next_document().map_err(milli::Error::from)?`
|
|
||||||
match self.cursor.next_document().unwrap() {
|
|
||||||
Some(doc) => {
|
|
||||||
/// TODO: don't unwrap
|
|
||||||
let json_value = obkv_to_object(&doc, &self.documents_batch_index).unwrap();
|
|
||||||
serde_json::to_writer(&mut *buffer, &json_value)?;
|
|
||||||
buffer.push(b'\n');
|
|
||||||
Ok(true)
|
|
||||||
}
|
|
||||||
None => Ok(false),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<R: Read + Seek> UpdateFileSerializer<R> {
|
|
||||||
fn new(reader: DocumentsBatchReader<R>) -> Self {
|
|
||||||
let (cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
|
|
||||||
|
|
||||||
Self { cursor, documents_batch_index }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Resets the cursor to be able to read from the start again.
|
|
||||||
pub fn reset(&mut self) {
|
|
||||||
self.cursor.reset();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct FileWriter<'a, W: Write> {
|
|
||||||
header: Header,
|
|
||||||
tar: &'a mut TarBuilder<GzEncoder<W>>,
|
|
||||||
size: Option<u64>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, W: Write> FileWriter<'a, W> {
|
|
||||||
pub(crate) fn new<P: AsRef<Path>>(
|
|
||||||
tar: &'a mut TarBuilder<GzEncoder<W>>,
|
|
||||||
path: P,
|
|
||||||
) -> Result<Self> {
|
|
||||||
let mut header = Header::new_gnu();
|
|
||||||
header.set_path(path);
|
|
||||||
Ok(Self { header, tar, size: None })
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn calculate_len<R: Read>(&mut self, mut reader: R) -> Result<u64> {
|
|
||||||
let mut calculator = SizeCalculatorWriter::new();
|
|
||||||
std::io::copy(&mut reader, &mut calculator)?;
|
|
||||||
let size = calculator.into_inner();
|
|
||||||
self.size = Some(size);
|
|
||||||
|
|
||||||
Ok(size)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn write_data<R: Read>(mut self, reader: R) -> Result<()> {
|
|
||||||
let expected_size =
|
|
||||||
self.size.expect("calculate_len must be called before writing the data.");
|
|
||||||
self.header.set_size(expected_size);
|
|
||||||
|
|
||||||
let mut scr = SizeCalculatorReader::new(reader);
|
|
||||||
self.tar.append(&mut self.header, &mut scr)?;
|
|
||||||
assert_eq!(
|
|
||||||
expected_size,
|
|
||||||
scr.into_inner(),
|
|
||||||
"Provided data size is different from the pre-calculated size."
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct SizeCalculatorWriter {
|
|
||||||
size: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SizeCalculatorWriter {
|
|
||||||
fn new() -> Self {
|
|
||||||
Self { size: 0 }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_inner(self) -> u64 {
|
|
||||||
self.size as u64
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Write for SizeCalculatorWriter {
|
|
||||||
fn write(&mut self, buf: &[u8]) -> StdResult<usize, std::io::Error> {
|
|
||||||
self.size += buf.len();
|
|
||||||
Ok(self.size)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn flush(&mut self) -> std::result::Result<(), std::io::Error> {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct SizeCalculatorReader<R: Read> {
|
|
||||||
size: usize,
|
|
||||||
reader: R,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<R: Read> SizeCalculatorReader<R> {
|
|
||||||
fn new(reader: R) -> Self {
|
|
||||||
Self { size: 0, reader }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn into_inner(self) -> u64 {
|
|
||||||
self.size as u64
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<R: Read> Read for SizeCalculatorReader<R> {
|
|
||||||
fn read(&mut self, buf: &mut [u8]) -> StdResult<usize, std::io::Error> {
|
|
||||||
let size = self.reader.read(buf)?;
|
|
||||||
self.size += size;
|
|
||||||
|
|
||||||
Ok(size)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -567,16 +567,16 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
tracing::debug!("index budget: {budget}B");
|
tracing::debug!("index budget: {budget}B");
|
||||||
let mut index_count = budget / base_map_size;
|
let mut index_count = budget / base_map_size;
|
||||||
if index_count < 2 {
|
if index_count < 3 {
|
||||||
// take a bit less than half than the budget to make sure we can always afford to open an index
|
// take a bit less than half than the budget to make sure we can always afford to open an index
|
||||||
let map_size = (budget * 2) / 5;
|
let map_size = (budget * 2) / 5;
|
||||||
// single index of max budget
|
// single index of max budget
|
||||||
tracing::debug!("1 index of {map_size}B can be opened simultaneously.");
|
tracing::debug!("1 index of {map_size}B can be opened simultaneously.");
|
||||||
return IndexBudget { map_size, index_count: 1, task_db_size };
|
return IndexBudget { map_size, index_count: 1, task_db_size };
|
||||||
}
|
}
|
||||||
// give us some space for an additional index when the cache is already full
|
// give us some space for additional indexes when the cache is already full
|
||||||
// decrement is OK because index_count >= 2.
|
// decrement is OK because index_count >= 3.
|
||||||
index_count -= 1;
|
index_count -= 2;
|
||||||
if index_count > max_index_count {
|
if index_count > max_index_count {
|
||||||
index_count = max_index_count;
|
index_count = max_index_count;
|
||||||
}
|
}
|
||||||
@@ -1834,7 +1834,7 @@ mod tests {
|
|||||||
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
||||||
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
||||||
enable_mdb_writemap: false,
|
enable_mdb_writemap: false,
|
||||||
index_growth_amount: 1000 * 1000, // 1 MB
|
index_growth_amount: 1000 * 1000 * 1000 * 1000, // 1 TB
|
||||||
index_count: 5,
|
index_count: 5,
|
||||||
indexer_config,
|
indexer_config,
|
||||||
autobatching_enabled: true,
|
autobatching_enabled: true,
|
||||||
|
|||||||
@@ -57,3 +57,5 @@ greek = ["milli/greek"]
|
|||||||
khmer = ["milli/khmer"]
|
khmer = ["milli/khmer"]
|
||||||
# allow vietnamese specialized tokenization
|
# allow vietnamese specialized tokenization
|
||||||
vietnamese = ["milli/vietnamese"]
|
vietnamese = ["milli/vietnamese"]
|
||||||
|
# force swedish character recomposition
|
||||||
|
swedish-recomposition = ["milli/swedish-recomposition"]
|
||||||
|
|||||||
@@ -156,6 +156,7 @@ thai = ["meilisearch-types/thai"]
|
|||||||
greek = ["meilisearch-types/greek"]
|
greek = ["meilisearch-types/greek"]
|
||||||
khmer = ["meilisearch-types/khmer"]
|
khmer = ["meilisearch-types/khmer"]
|
||||||
vietnamese = ["meilisearch-types/vietnamese"]
|
vietnamese = ["meilisearch-types/vietnamese"]
|
||||||
|
swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
|
||||||
|
|
||||||
[package.metadata.mini-dashboard]
|
[package.metadata.mini-dashboard]
|
||||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip"
|
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip"
|
||||||
|
|||||||
@@ -367,12 +367,6 @@ async fn get_version(
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct KeysResponse {
|
|
||||||
private: Option<String>,
|
|
||||||
public: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn get_health(
|
pub async fn get_health(
|
||||||
index_scheduler: Data<IndexScheduler>,
|
index_scheduler: Data<IndexScheduler>,
|
||||||
auth_controller: Data<AuthController>,
|
auth_controller: Data<AuthController>,
|
||||||
|
|||||||
@@ -117,3 +117,69 @@ async fn geo_bounding_box_with_string_and_number() {
|
|||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn bug_4640() {
|
||||||
|
// https://github.com/meilisearch/meilisearch/issues/4640
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
index.add_documents(documents, None).await;
|
||||||
|
index.update_settings_filterable_attributes(json!(["_geo"])).await;
|
||||||
|
let (ret, _code) = index.update_settings_sortable_attributes(json!(["_geo"])).await;
|
||||||
|
index.wait_task(ret.uid()).await;
|
||||||
|
|
||||||
|
// Sort the document with the second one first
|
||||||
|
index
|
||||||
|
.search(
|
||||||
|
json!({
|
||||||
|
"sort": ["_geoPoint(45.4777599, 9.1967508):asc"],
|
||||||
|
}),
|
||||||
|
|response, code| {
|
||||||
|
assert_eq!(code, 200, "{}", response);
|
||||||
|
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"name": "La Bella Italia",
|
||||||
|
"address": "456 Elm Street, Townsville",
|
||||||
|
"type": "Italian",
|
||||||
|
"rating": 9,
|
||||||
|
"_geo": {
|
||||||
|
"lat": "45.4777599",
|
||||||
|
"lng": "9.1967508"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"name": "Taco Truck",
|
||||||
|
"address": "444 Salsa Street, Burritoville",
|
||||||
|
"type": "Mexican",
|
||||||
|
"rating": 9,
|
||||||
|
"_geo": {
|
||||||
|
"lat": 34.0522,
|
||||||
|
"lng": -118.2437
|
||||||
|
},
|
||||||
|
"_geoDistance": 9714063
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"name": "Crêpe Truck",
|
||||||
|
"address": "2 Billig Avenue, Rouenville",
|
||||||
|
"type": "French",
|
||||||
|
"rating": 10
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[time]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 3
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ bincode = "1.3.3"
|
|||||||
bstr = "1.9.0"
|
bstr = "1.9.0"
|
||||||
bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
|
bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
|
||||||
byteorder = "1.5.0"
|
byteorder = "1.5.0"
|
||||||
charabia = { version = "0.8.9", default-features = false }
|
charabia = { version = "0.8.10", default-features = false }
|
||||||
concat-arrays = "0.1.2"
|
concat-arrays = "0.1.2"
|
||||||
crossbeam-channel = "0.5.11"
|
crossbeam-channel = "0.5.11"
|
||||||
deserr = "0.6.1"
|
deserr = "0.6.1"
|
||||||
@@ -136,7 +136,11 @@ greek = ["charabia/greek"]
|
|||||||
# allow khmer specialized tokenization
|
# allow khmer specialized tokenization
|
||||||
khmer = ["charabia/khmer"]
|
khmer = ["charabia/khmer"]
|
||||||
|
|
||||||
|
# allow vietnamese specialized tokenization
|
||||||
vietnamese = ["charabia/vietnamese"]
|
vietnamese = ["charabia/vietnamese"]
|
||||||
|
|
||||||
|
# force swedish character recomposition
|
||||||
|
swedish-recomposition = ["charabia/swedish-recomposition"]
|
||||||
|
|
||||||
# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
|
# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
|
||||||
cuda = ["candle-core/cuda"]
|
cuda = ["candle-core/cuda"]
|
||||||
|
|||||||
@@ -45,7 +45,6 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||||||
obkv_documents: grenad::Reader<R>,
|
obkv_documents: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
settings_diff: &InnerIndexSettingsDiff,
|
settings_diff: &InnerIndexSettingsDiff,
|
||||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
|
||||||
) -> Result<ExtractedFacetValues> {
|
) -> Result<ExtractedFacetValues> {
|
||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
|
|
||||||
@@ -127,12 +126,18 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||||||
add_exists.insert(document);
|
add_exists.insert(document);
|
||||||
}
|
}
|
||||||
|
|
||||||
let geo_support =
|
let del_geo_support = settings_diff
|
||||||
geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
|
.old
|
||||||
|
.geo_fields_ids
|
||||||
|
.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
|
||||||
|
let add_geo_support = settings_diff
|
||||||
|
.new
|
||||||
|
.geo_fields_ids
|
||||||
|
.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
|
||||||
let del_filterable_values =
|
let del_filterable_values =
|
||||||
del_value.map(|value| extract_facet_values(&value, geo_support));
|
del_value.map(|value| extract_facet_values(&value, del_geo_support));
|
||||||
let add_filterable_values =
|
let add_filterable_values =
|
||||||
add_value.map(|value| extract_facet_values(&value, geo_support));
|
add_value.map(|value| extract_facet_values(&value, add_geo_support));
|
||||||
|
|
||||||
// Those closures are just here to simplify things a bit.
|
// Those closures are just here to simplify things a bit.
|
||||||
let mut insert_numbers_diff = |del_numbers, add_numbers| {
|
let mut insert_numbers_diff = |del_numbers, add_numbers| {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
|||||||
use crate::error::GeoError;
|
use crate::error::GeoError;
|
||||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||||
use crate::update::index_documents::extract_finite_float_from_value;
|
use crate::update::index_documents::extract_finite_float_from_value;
|
||||||
|
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
||||||
use crate::{FieldId, InternalError, Result};
|
use crate::{FieldId, InternalError, Result};
|
||||||
|
|
||||||
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
|
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
|
||||||
@@ -18,7 +19,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
|||||||
obkv_documents: grenad::Reader<R>,
|
obkv_documents: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
primary_key_id: FieldId,
|
primary_key_id: FieldId,
|
||||||
(lat_fid, lng_fid): (FieldId, FieldId),
|
settings_diff: &InnerIndexSettingsDiff,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
|
|
||||||
@@ -40,47 +41,27 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
|||||||
serde_json::from_slice(document_id).unwrap()
|
serde_json::from_slice(document_id).unwrap()
|
||||||
};
|
};
|
||||||
|
|
||||||
// first we get the two fields
|
// extract old version
|
||||||
match (obkv.get(lat_fid), obkv.get(lng_fid)) {
|
let del_lat_lng =
|
||||||
(Some(lat), Some(lng)) => {
|
extract_lat_lng(&obkv, &settings_diff.old, DelAdd::Deletion, document_id)?;
|
||||||
let deladd_lat_obkv = KvReaderDelAdd::new(lat);
|
// extract new version
|
||||||
let deladd_lng_obkv = KvReaderDelAdd::new(lng);
|
let add_lat_lng =
|
||||||
|
extract_lat_lng(&obkv, &settings_diff.new, DelAdd::Addition, document_id)?;
|
||||||
|
|
||||||
// then we extract the values
|
if del_lat_lng != add_lat_lng {
|
||||||
let del_lat_lng = deladd_lat_obkv
|
let mut obkv = KvWriterDelAdd::memory();
|
||||||
.get(DelAdd::Deletion)
|
if let Some([lat, lng]) = del_lat_lng {
|
||||||
.zip(deladd_lng_obkv.get(DelAdd::Deletion))
|
#[allow(clippy::drop_non_drop)]
|
||||||
.map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
|
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||||
.transpose()?;
|
obkv.insert(DelAdd::Deletion, bytes)?;
|
||||||
let add_lat_lng = deladd_lat_obkv
|
|
||||||
.get(DelAdd::Addition)
|
|
||||||
.zip(deladd_lng_obkv.get(DelAdd::Addition))
|
|
||||||
.map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
|
|
||||||
.transpose()?;
|
|
||||||
|
|
||||||
if del_lat_lng != add_lat_lng {
|
|
||||||
let mut obkv = KvWriterDelAdd::memory();
|
|
||||||
if let Some([lat, lng]) = del_lat_lng {
|
|
||||||
#[allow(clippy::drop_non_drop)]
|
|
||||||
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
|
||||||
obkv.insert(DelAdd::Deletion, bytes)?;
|
|
||||||
}
|
|
||||||
if let Some([lat, lng]) = add_lat_lng {
|
|
||||||
#[allow(clippy::drop_non_drop)]
|
|
||||||
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
|
||||||
obkv.insert(DelAdd::Addition, bytes)?;
|
|
||||||
}
|
|
||||||
let bytes = obkv.into_inner()?;
|
|
||||||
writer.insert(docid_bytes, bytes)?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
(None, Some(_)) => {
|
if let Some([lat, lng]) = add_lat_lng {
|
||||||
return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
|
#[allow(clippy::drop_non_drop)]
|
||||||
|
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
|
||||||
|
obkv.insert(DelAdd::Addition, bytes)?;
|
||||||
}
|
}
|
||||||
(Some(_), None) => {
|
let bytes = obkv.into_inner()?;
|
||||||
return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
|
writer.insert(docid_bytes, bytes)?;
|
||||||
}
|
|
||||||
(None, None) => (),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -88,16 +69,37 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Extract the finite floats lat and lng from two bytes slices.
|
/// Extract the finite floats lat and lng from two bytes slices.
|
||||||
fn extract_lat_lng(lat: &[u8], lng: &[u8], document_id: impl Fn() -> Value) -> Result<[f64; 2]> {
|
fn extract_lat_lng(
|
||||||
let lat = extract_finite_float_from_value(
|
document: &obkv::KvReader<FieldId>,
|
||||||
serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
|
settings: &InnerIndexSettings,
|
||||||
)
|
deladd: DelAdd,
|
||||||
.map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
|
document_id: impl Fn() -> Value,
|
||||||
|
) -> Result<Option<[f64; 2]>> {
|
||||||
|
match settings.geo_fields_ids {
|
||||||
|
Some((lat_fid, lng_fid)) => {
|
||||||
|
let lat = document.get(lat_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
|
||||||
|
let lng = document.get(lng_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
|
||||||
|
let (lat, lng) = match (lat, lng) {
|
||||||
|
(Some(lat), Some(lng)) => (lat, lng),
|
||||||
|
(Some(_), None) => {
|
||||||
|
return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
|
||||||
|
}
|
||||||
|
(None, Some(_)) => {
|
||||||
|
return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
|
||||||
|
}
|
||||||
|
(None, None) => return Ok(None),
|
||||||
|
};
|
||||||
|
let lat = extract_finite_float_from_value(
|
||||||
|
serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
|
||||||
|
)
|
||||||
|
.map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
|
||||||
|
|
||||||
let lng = extract_finite_float_from_value(
|
let lng = extract_finite_float_from_value(
|
||||||
serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
|
serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
|
||||||
)
|
)
|
||||||
.map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
|
.map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
|
||||||
|
Ok(Some([lat, lng]))
|
||||||
Ok([lat, lng])
|
}
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -43,7 +43,6 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||||
primary_key_id: FieldId,
|
primary_key_id: FieldId,
|
||||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
|
||||||
settings_diff: Arc<InnerIndexSettingsDiff>,
|
settings_diff: Arc<InnerIndexSettingsDiff>,
|
||||||
max_positions_per_attributes: Option<u32>,
|
max_positions_per_attributes: Option<u32>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
@@ -72,7 +71,6 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
indexer,
|
indexer,
|
||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
primary_key_id,
|
primary_key_id,
|
||||||
geo_fields_ids,
|
|
||||||
settings_diff.clone(),
|
settings_diff.clone(),
|
||||||
max_positions_per_attributes,
|
max_positions_per_attributes,
|
||||||
)
|
)
|
||||||
@@ -300,7 +298,6 @@ fn send_and_extract_flattened_documents_data(
|
|||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||||
primary_key_id: FieldId,
|
primary_key_id: FieldId,
|
||||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
|
||||||
settings_diff: Arc<InnerIndexSettingsDiff>,
|
settings_diff: Arc<InnerIndexSettingsDiff>,
|
||||||
max_positions_per_attributes: Option<u32>,
|
max_positions_per_attributes: Option<u32>,
|
||||||
) -> Result<(
|
) -> Result<(
|
||||||
@@ -310,12 +307,13 @@ fn send_and_extract_flattened_documents_data(
|
|||||||
let flattened_documents_chunk =
|
let flattened_documents_chunk =
|
||||||
flattened_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
|
flattened_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
|
||||||
|
|
||||||
if let Some(geo_fields_ids) = geo_fields_ids {
|
if settings_diff.run_geo_indexing() {
|
||||||
let documents_chunk_cloned = flattened_documents_chunk.clone();
|
let documents_chunk_cloned = flattened_documents_chunk.clone();
|
||||||
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
||||||
|
let settings_diff = settings_diff.clone();
|
||||||
rayon::spawn(move || {
|
rayon::spawn(move || {
|
||||||
let result =
|
let result =
|
||||||
extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, geo_fields_ids);
|
extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, &settings_diff);
|
||||||
let _ = match result {
|
let _ = match result {
|
||||||
Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
|
Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
|
||||||
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
|
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
|
||||||
@@ -354,7 +352,6 @@ fn send_and_extract_flattened_documents_data(
|
|||||||
flattened_documents_chunk.clone(),
|
flattened_documents_chunk.clone(),
|
||||||
indexer,
|
indexer,
|
||||||
&settings_diff,
|
&settings_diff,
|
||||||
geo_fields_ids,
|
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
// send fid_docid_facet_numbers_chunk to DB writer
|
// send fid_docid_facet_numbers_chunk to DB writer
|
||||||
|
|||||||
@@ -324,28 +324,6 @@ where
|
|||||||
// get the primary key field id
|
// get the primary key field id
|
||||||
let primary_key_id = settings_diff.new.fields_ids_map.id(&primary_key).unwrap();
|
let primary_key_id = settings_diff.new.fields_ids_map.id(&primary_key).unwrap();
|
||||||
|
|
||||||
// get the fid of the `_geo.lat` and `_geo.lng` fields.
|
|
||||||
let mut field_id_map = self.index.fields_ids_map(self.wtxn)?;
|
|
||||||
|
|
||||||
// self.index.fields_ids_map($a)? ==>> field_id_map
|
|
||||||
let geo_fields_ids = match field_id_map.id("_geo") {
|
|
||||||
Some(gfid) => {
|
|
||||||
let is_sortable = self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid);
|
|
||||||
let is_filterable = self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid);
|
|
||||||
// if `_geo` is faceted then we get the `lat` and `lng`
|
|
||||||
if is_sortable || is_filterable {
|
|
||||||
let field_ids = field_id_map
|
|
||||||
.insert("_geo.lat")
|
|
||||||
.zip(field_id_map.insert("_geo.lng"))
|
|
||||||
.ok_or(UserError::AttributeLimitReached)?;
|
|
||||||
Some(field_ids)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let pool_params = GrenadParameters {
|
let pool_params = GrenadParameters {
|
||||||
chunk_compression_type: self.indexer_config.chunk_compression_type,
|
chunk_compression_type: self.indexer_config.chunk_compression_type,
|
||||||
chunk_compression_level: self.indexer_config.chunk_compression_level,
|
chunk_compression_level: self.indexer_config.chunk_compression_level,
|
||||||
@@ -412,7 +390,6 @@ where
|
|||||||
pool_params,
|
pool_params,
|
||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
primary_key_id,
|
primary_key_id,
|
||||||
geo_fields_ids,
|
|
||||||
settings_diff.clone(),
|
settings_diff.clone(),
|
||||||
max_positions_per_attributes,
|
max_positions_per_attributes,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1161,6 +1161,11 @@ impl InnerIndexSettingsDiff {
|
|||||||
pub fn settings_update_only(&self) -> bool {
|
pub fn settings_update_only(&self) -> bool {
|
||||||
self.settings_update_only
|
self.settings_update_only
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn run_geo_indexing(&self) -> bool {
|
||||||
|
self.old.geo_fields_ids != self.new.geo_fields_ids
|
||||||
|
|| (!self.settings_update_only && self.new.geo_fields_ids.is_some())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@@ -1177,6 +1182,7 @@ pub(crate) struct InnerIndexSettings {
|
|||||||
pub proximity_precision: ProximityPrecision,
|
pub proximity_precision: ProximityPrecision,
|
||||||
pub embedding_configs: EmbeddingConfigs,
|
pub embedding_configs: EmbeddingConfigs,
|
||||||
pub existing_fields: HashSet<String>,
|
pub existing_fields: HashSet<String>,
|
||||||
|
pub geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl InnerIndexSettings {
|
impl InnerIndexSettings {
|
||||||
@@ -1185,7 +1191,7 @@ impl InnerIndexSettings {
|
|||||||
let stop_words = stop_words.map(|sw| sw.map_data(Vec::from).unwrap());
|
let stop_words = stop_words.map(|sw| sw.map_data(Vec::from).unwrap());
|
||||||
let allowed_separators = index.allowed_separators(rtxn)?;
|
let allowed_separators = index.allowed_separators(rtxn)?;
|
||||||
let dictionary = index.dictionary(rtxn)?;
|
let dictionary = index.dictionary(rtxn)?;
|
||||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
let mut fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?;
|
let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?;
|
||||||
let user_defined_searchable_fields =
|
let user_defined_searchable_fields =
|
||||||
user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect());
|
user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect());
|
||||||
@@ -1200,6 +1206,24 @@ impl InnerIndexSettings {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|(field, count)| (count != 0).then_some(field))
|
.filter_map(|(field, count)| (count != 0).then_some(field))
|
||||||
.collect();
|
.collect();
|
||||||
|
// index.fields_ids_map($a)? ==>> fields_ids_map
|
||||||
|
let geo_fields_ids = match fields_ids_map.id("_geo") {
|
||||||
|
Some(gfid) => {
|
||||||
|
let is_sortable = index.sortable_fields_ids(rtxn)?.contains(&gfid);
|
||||||
|
let is_filterable = index.filterable_fields_ids(rtxn)?.contains(&gfid);
|
||||||
|
// if `_geo` is faceted then we get the `lat` and `lng`
|
||||||
|
if is_sortable || is_filterable {
|
||||||
|
let field_ids = fields_ids_map
|
||||||
|
.insert("_geo.lat")
|
||||||
|
.zip(fields_ids_map.insert("_geo.lng"))
|
||||||
|
.ok_or(UserError::AttributeLimitReached)?;
|
||||||
|
Some(field_ids)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
stop_words,
|
stop_words,
|
||||||
@@ -1214,6 +1238,7 @@ impl InnerIndexSettings {
|
|||||||
proximity_precision,
|
proximity_precision,
|
||||||
embedding_configs,
|
embedding_configs,
|
||||||
existing_fields,
|
existing_fields,
|
||||||
|
geo_fields_ids,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -301,10 +301,14 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
fn from(value: EmbeddingConfig) -> Self {
|
fn from(value: EmbeddingConfig) -> Self {
|
||||||
let EmbeddingConfig { embedder_options, prompt } = value;
|
let EmbeddingConfig { embedder_options, prompt } = value;
|
||||||
match embedder_options {
|
match embedder_options {
|
||||||
super::EmbedderOptions::HuggingFace(options) => Self {
|
super::EmbedderOptions::HuggingFace(super::hf::EmbedderOptions {
|
||||||
|
model,
|
||||||
|
revision,
|
||||||
|
distribution,
|
||||||
|
}) => Self {
|
||||||
source: Setting::Set(EmbedderSource::HuggingFace),
|
source: Setting::Set(EmbedderSource::HuggingFace),
|
||||||
model: Setting::Set(options.model),
|
model: Setting::Set(model),
|
||||||
revision: options.revision.map(Setting::Set).unwrap_or_default(),
|
revision: revision.map(Setting::Set).unwrap_or_default(),
|
||||||
api_key: Setting::NotSet,
|
api_key: Setting::NotSet,
|
||||||
dimensions: Setting::NotSet,
|
dimensions: Setting::NotSet,
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
@@ -314,14 +318,19 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
path_to_embeddings: Setting::NotSet,
|
path_to_embeddings: Setting::NotSet,
|
||||||
embedding_object: Setting::NotSet,
|
embedding_object: Setting::NotSet,
|
||||||
input_type: Setting::NotSet,
|
input_type: Setting::NotSet,
|
||||||
distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
|
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::OpenAi(options) => Self {
|
super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
|
||||||
|
api_key,
|
||||||
|
embedding_model,
|
||||||
|
dimensions,
|
||||||
|
distribution,
|
||||||
|
}) => Self {
|
||||||
source: Setting::Set(EmbedderSource::OpenAi),
|
source: Setting::Set(EmbedderSource::OpenAi),
|
||||||
model: Setting::Set(options.embedding_model.name().to_owned()),
|
model: Setting::Set(embedding_model.name().to_owned()),
|
||||||
revision: Setting::NotSet,
|
revision: Setting::NotSet,
|
||||||
api_key: options.api_key.map(Setting::Set).unwrap_or_default(),
|
api_key: api_key.map(Setting::Set).unwrap_or_default(),
|
||||||
dimensions: options.dimensions.map(Setting::Set).unwrap_or_default(),
|
dimensions: dimensions.map(Setting::Set).unwrap_or_default(),
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
url: Setting::NotSet,
|
url: Setting::NotSet,
|
||||||
query: Setting::NotSet,
|
query: Setting::NotSet,
|
||||||
@@ -329,29 +338,37 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
path_to_embeddings: Setting::NotSet,
|
path_to_embeddings: Setting::NotSet,
|
||||||
embedding_object: Setting::NotSet,
|
embedding_object: Setting::NotSet,
|
||||||
input_type: Setting::NotSet,
|
input_type: Setting::NotSet,
|
||||||
distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
|
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::Ollama(options) => Self {
|
super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions {
|
||||||
|
embedding_model,
|
||||||
|
url,
|
||||||
|
api_key,
|
||||||
|
distribution,
|
||||||
|
}) => Self {
|
||||||
source: Setting::Set(EmbedderSource::Ollama),
|
source: Setting::Set(EmbedderSource::Ollama),
|
||||||
model: Setting::Set(options.embedding_model.to_owned()),
|
model: Setting::Set(embedding_model),
|
||||||
revision: Setting::NotSet,
|
revision: Setting::NotSet,
|
||||||
api_key: options.api_key.map(Setting::Set).unwrap_or_default(),
|
api_key: api_key.map(Setting::Set).unwrap_or_default(),
|
||||||
dimensions: Setting::NotSet,
|
dimensions: Setting::NotSet,
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
url: Setting::NotSet,
|
url: url.map(Setting::Set).unwrap_or_default(),
|
||||||
query: Setting::NotSet,
|
query: Setting::NotSet,
|
||||||
input_field: Setting::NotSet,
|
input_field: Setting::NotSet,
|
||||||
path_to_embeddings: Setting::NotSet,
|
path_to_embeddings: Setting::NotSet,
|
||||||
embedding_object: Setting::NotSet,
|
embedding_object: Setting::NotSet,
|
||||||
input_type: Setting::NotSet,
|
input_type: Setting::NotSet,
|
||||||
distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
|
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::UserProvided(options) => Self {
|
super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
|
||||||
|
dimensions,
|
||||||
|
distribution,
|
||||||
|
}) => Self {
|
||||||
source: Setting::Set(EmbedderSource::UserProvided),
|
source: Setting::Set(EmbedderSource::UserProvided),
|
||||||
model: Setting::NotSet,
|
model: Setting::NotSet,
|
||||||
revision: Setting::NotSet,
|
revision: Setting::NotSet,
|
||||||
api_key: Setting::NotSet,
|
api_key: Setting::NotSet,
|
||||||
dimensions: Setting::Set(options.dimensions),
|
dimensions: Setting::Set(dimensions),
|
||||||
document_template: Setting::NotSet,
|
document_template: Setting::NotSet,
|
||||||
url: Setting::NotSet,
|
url: Setting::NotSet,
|
||||||
query: Setting::NotSet,
|
query: Setting::NotSet,
|
||||||
@@ -359,7 +376,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
path_to_embeddings: Setting::NotSet,
|
path_to_embeddings: Setting::NotSet,
|
||||||
embedding_object: Setting::NotSet,
|
embedding_object: Setting::NotSet,
|
||||||
input_type: Setting::NotSet,
|
input_type: Setting::NotSet,
|
||||||
distribution: options.distribution.map(Setting::Set).unwrap_or_default(),
|
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
|
super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
|
||||||
api_key,
|
api_key,
|
||||||
|
|||||||
Reference in New Issue
Block a user