mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-27 00:31:02 +00:00
implement the binary quantization in meilisearch
This commit is contained in:
@ -1,8 +1,12 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arroy::distances::{Angular, BinaryQuantizedAngular};
|
||||
use arroy::ItemId;
|
||||
use deserr::{DeserializeError, Deserr};
|
||||
use heed::{RoTxn, RwTxn, Unspecified};
|
||||
use ordered_float::OrderedFloat;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use self::error::{EmbedError, NewEmbedderError};
|
||||
@ -26,6 +30,171 @@ pub type Embedding = Vec<f32>;
|
||||
|
||||
pub const REQUEST_PARALLELISM: usize = 40;
|
||||
|
||||
pub struct ArroyReader {
|
||||
quantized: bool,
|
||||
index: u16,
|
||||
database: arroy::Database<Unspecified>,
|
||||
}
|
||||
|
||||
impl ArroyReader {
|
||||
pub fn new(database: arroy::Database<Unspecified>, index: u16, quantized: bool) -> Self {
|
||||
Self { database, index, quantized }
|
||||
}
|
||||
|
||||
pub fn index(&self) -> u16 {
|
||||
self.index
|
||||
}
|
||||
|
||||
pub fn dimensions(&self, rtxn: &RoTxn) -> Result<usize, arroy::Error> {
|
||||
if self.quantized {
|
||||
Ok(arroy::Reader::open(rtxn, self.index, self.quantized_db())?.dimensions())
|
||||
} else {
|
||||
Ok(arroy::Reader::open(rtxn, self.index, self.angular_db())?.dimensions())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn quantize(
|
||||
&mut self,
|
||||
wtxn: &mut RwTxn,
|
||||
index: u16,
|
||||
dimension: usize,
|
||||
) -> Result<(), arroy::Error> {
|
||||
if !self.quantized {
|
||||
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
|
||||
writer.prepare_changing_distance::<BinaryQuantizedAngular>(wtxn)?;
|
||||
self.quantized = true;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn need_build(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Writer::new(self.quantized_db(), self.index, dimension).need_build(rtxn)
|
||||
} else {
|
||||
arroy::Writer::new(self.angular_db(), self.index, dimension).need_build(rtxn)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build<R: rand::Rng + rand::SeedableRng>(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
rng: &mut R,
|
||||
dimension: usize,
|
||||
) -> Result<(), arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Writer::new(self.quantized_db(), self.index, dimension).build(wtxn, rng, None)
|
||||
} else {
|
||||
arroy::Writer::new(self.angular_db(), self.index, dimension).build(wtxn, rng, None)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_item(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
dimension: usize,
|
||||
item_id: arroy::ItemId,
|
||||
vector: &[f32],
|
||||
) -> Result<(), arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Writer::new(self.quantized_db(), self.index, dimension)
|
||||
.add_item(wtxn, item_id, vector)
|
||||
} else {
|
||||
arroy::Writer::new(self.angular_db(), self.index, dimension)
|
||||
.add_item(wtxn, item_id, vector)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn del_item(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
dimension: usize,
|
||||
item_id: arroy::ItemId,
|
||||
) -> Result<bool, arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Writer::new(self.quantized_db(), self.index, dimension).del_item(wtxn, item_id)
|
||||
} else {
|
||||
arroy::Writer::new(self.angular_db(), self.index, dimension).del_item(wtxn, item_id)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Writer::new(self.quantized_db(), self.index, dimension).clear(wtxn)
|
||||
} else {
|
||||
arroy::Writer::new(self.angular_db(), self.index, dimension).clear(wtxn)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Writer::new(self.quantized_db(), self.index, dimension).is_empty(rtxn)
|
||||
} else {
|
||||
arroy::Writer::new(self.angular_db(), self.index, dimension).is_empty(rtxn)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn contains_item(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
dimension: usize,
|
||||
item: arroy::ItemId,
|
||||
) -> Result<bool, arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Writer::new(self.quantized_db(), self.index, dimension).contains_item(rtxn, item)
|
||||
} else {
|
||||
arroy::Writer::new(self.angular_db(), self.index, dimension).contains_item(rtxn, item)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn nns_by_item(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
item: ItemId,
|
||||
limit: usize,
|
||||
filter: Option<&RoaringBitmap>,
|
||||
) -> Result<Option<Vec<(ItemId, f32)>>, arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Reader::open(rtxn, self.index, self.quantized_db())?
|
||||
.nns_by_item(rtxn, item, limit, None, None, filter)
|
||||
} else {
|
||||
arroy::Reader::open(rtxn, self.index, self.angular_db())?
|
||||
.nns_by_item(rtxn, item, limit, None, None, filter)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn nns_by_vector(
|
||||
&self,
|
||||
txn: &RoTxn,
|
||||
item: &[f32],
|
||||
limit: usize,
|
||||
filter: Option<&RoaringBitmap>,
|
||||
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Reader::open(txn, self.index, self.quantized_db())?
|
||||
.nns_by_vector(txn, item, limit, None, None, filter)
|
||||
} else {
|
||||
arroy::Reader::open(txn, self.index, self.angular_db())?
|
||||
.nns_by_vector(txn, item, limit, None, None, filter)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn item_vector(&self, rtxn: &RoTxn, docid: u32) -> Result<Option<Vec<f32>>, arroy::Error> {
|
||||
if self.quantized {
|
||||
arroy::Reader::open(rtxn, self.index, self.quantized_db())?.item_vector(rtxn, docid)
|
||||
} else {
|
||||
arroy::Reader::open(rtxn, self.index, self.angular_db())?.item_vector(rtxn, docid)
|
||||
}
|
||||
}
|
||||
|
||||
fn angular_db(&self) -> arroy::Database<Angular> {
|
||||
self.database.remap_data_type()
|
||||
}
|
||||
|
||||
fn quantized_db(&self) -> arroy::Database<BinaryQuantizedAngular> {
|
||||
self.database.remap_data_type()
|
||||
}
|
||||
}
|
||||
|
||||
/// One or multiple embeddings stored consecutively in a flat vector.
|
||||
pub struct Embeddings<F> {
|
||||
data: Vec<F>,
|
||||
@ -124,39 +293,48 @@ pub struct EmbeddingConfig {
|
||||
pub embedder_options: EmbedderOptions,
|
||||
/// Document template
|
||||
pub prompt: PromptData,
|
||||
/// If this embedder is binary quantized
|
||||
pub quantized: Option<bool>,
|
||||
// TODO: add metrics and anything needed
|
||||
}
|
||||
|
||||
impl EmbeddingConfig {
|
||||
pub fn quantized(&self) -> bool {
|
||||
self.quantized.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Map of embedder configurations.
|
||||
///
|
||||
/// Each configuration is mapped to a name.
|
||||
#[derive(Clone, Default)]
|
||||
pub struct EmbeddingConfigs(HashMap<String, (Arc<Embedder>, Arc<Prompt>)>);
|
||||
pub struct EmbeddingConfigs(HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)>);
|
||||
|
||||
impl EmbeddingConfigs {
|
||||
/// Create the map from its internal component.s
|
||||
pub fn new(data: HashMap<String, (Arc<Embedder>, Arc<Prompt>)>) -> Self {
|
||||
pub fn new(data: HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)>) -> Self {
|
||||
Self(data)
|
||||
}
|
||||
|
||||
/// Get an embedder configuration and template from its name.
|
||||
pub fn get(&self, name: &str) -> Option<(Arc<Embedder>, Arc<Prompt>)> {
|
||||
pub fn get(&self, name: &str) -> Option<(Arc<Embedder>, Arc<Prompt>, bool)> {
|
||||
self.0.get(name).cloned()
|
||||
}
|
||||
|
||||
pub fn inner_as_ref(&self) -> &HashMap<String, (Arc<Embedder>, Arc<Prompt>)> {
|
||||
pub fn inner_as_ref(&self) -> &HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)> {
|
||||
&self.0
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> HashMap<String, (Arc<Embedder>, Arc<Prompt>)> {
|
||||
pub fn into_inner(self) -> HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)> {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoIterator for EmbeddingConfigs {
|
||||
type Item = (String, (Arc<Embedder>, Arc<Prompt>));
|
||||
type Item = (String, (Arc<Embedder>, Arc<Prompt>, bool));
|
||||
|
||||
type IntoIter = std::collections::hash_map::IntoIter<String, (Arc<Embedder>, Arc<Prompt>)>;
|
||||
type IntoIter =
|
||||
std::collections::hash_map::IntoIter<String, (Arc<Embedder>, Arc<Prompt>, bool)>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.0.into_iter()
|
||||
|
@ -32,6 +32,9 @@ pub struct EmbeddingSettings {
|
||||
pub dimensions: Setting<usize>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
pub binary_quantized: Setting<bool>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
pub document_template: Setting<String>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
@ -85,23 +88,62 @@ pub enum ReindexAction {
|
||||
|
||||
pub enum SettingsDiff {
|
||||
Remove,
|
||||
Reindex { action: ReindexAction, updated_settings: EmbeddingSettings },
|
||||
UpdateWithoutReindex { updated_settings: EmbeddingSettings },
|
||||
Reindex { action: ReindexAction, updated_settings: EmbeddingSettings, quantize: bool },
|
||||
UpdateWithoutReindex { updated_settings: EmbeddingSettings, quantize: bool },
|
||||
}
|
||||
|
||||
pub enum EmbedderAction {
|
||||
WriteBackToDocuments(WriteBackToDocuments),
|
||||
Reindex(ReindexAction),
|
||||
#[derive(Default, Debug)]
|
||||
pub struct EmbedderAction {
|
||||
pub was_quantized: bool,
|
||||
pub is_being_quantized: bool,
|
||||
pub write_back: Option<WriteBackToDocuments>,
|
||||
pub reindex: Option<ReindexAction>,
|
||||
}
|
||||
|
||||
impl EmbedderAction {
|
||||
pub fn is_being_quantized(&self) -> bool {
|
||||
self.is_being_quantized
|
||||
}
|
||||
|
||||
pub fn write_back(&self) -> Option<&WriteBackToDocuments> {
|
||||
self.write_back.as_ref()
|
||||
}
|
||||
|
||||
pub fn reindex(&self) -> Option<&ReindexAction> {
|
||||
self.reindex.as_ref()
|
||||
}
|
||||
|
||||
pub fn with_is_being_quantized(mut self, quantize: bool) -> Self {
|
||||
self.is_being_quantized = quantize;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_write_back(write_back: WriteBackToDocuments, was_quantized: bool) -> Self {
|
||||
Self {
|
||||
was_quantized,
|
||||
is_being_quantized: false,
|
||||
write_back: Some(write_back),
|
||||
reindex: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_reindex(reindex: ReindexAction, was_quantized: bool) -> Self {
|
||||
Self { was_quantized, is_being_quantized: false, write_back: None, reindex: Some(reindex) }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct WriteBackToDocuments {
|
||||
pub embedder_id: u8,
|
||||
pub user_provided: RoaringBitmap,
|
||||
}
|
||||
|
||||
impl SettingsDiff {
|
||||
pub fn from_settings(old: EmbeddingSettings, new: Setting<EmbeddingSettings>) -> Self {
|
||||
match new {
|
||||
pub fn from_settings(
|
||||
old: EmbeddingSettings,
|
||||
new: Setting<EmbeddingSettings>,
|
||||
) -> Result<Self, UserError> {
|
||||
let ret = match new {
|
||||
Setting::Set(new) => {
|
||||
let EmbeddingSettings {
|
||||
mut source,
|
||||
@ -116,6 +158,7 @@ impl SettingsDiff {
|
||||
mut distribution,
|
||||
mut headers,
|
||||
mut document_template_max_bytes,
|
||||
binary_quantized: mut binary_quantize,
|
||||
} = old;
|
||||
|
||||
let EmbeddingSettings {
|
||||
@ -131,8 +174,17 @@ impl SettingsDiff {
|
||||
distribution: new_distribution,
|
||||
headers: new_headers,
|
||||
document_template_max_bytes: new_document_template_max_bytes,
|
||||
binary_quantized: new_binary_quantize,
|
||||
} = new;
|
||||
|
||||
if matches!(binary_quantize, Setting::Set(true))
|
||||
&& matches!(new_binary_quantize, Setting::Set(false))
|
||||
{
|
||||
return Err(UserError::InvalidDisableBinaryQuantization {
|
||||
embedder_name: String::from("todo"),
|
||||
});
|
||||
}
|
||||
|
||||
let mut reindex_action = None;
|
||||
|
||||
// **Warning**: do not use short-circuiting || here, we want all these operations applied
|
||||
@ -172,6 +224,7 @@ impl SettingsDiff {
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
let binary_quantize_changed = binary_quantize.apply(new_binary_quantize);
|
||||
if url.apply(new_url) {
|
||||
match source {
|
||||
// do not regenerate on an url change in OpenAI
|
||||
@ -231,16 +284,27 @@ impl SettingsDiff {
|
||||
distribution,
|
||||
headers,
|
||||
document_template_max_bytes,
|
||||
binary_quantized: binary_quantize,
|
||||
};
|
||||
|
||||
match reindex_action {
|
||||
Some(action) => Self::Reindex { action, updated_settings },
|
||||
None => Self::UpdateWithoutReindex { updated_settings },
|
||||
Some(action) => Self::Reindex {
|
||||
action,
|
||||
updated_settings,
|
||||
quantize: binary_quantize_changed,
|
||||
},
|
||||
None => Self::UpdateWithoutReindex {
|
||||
updated_settings,
|
||||
quantize: binary_quantize_changed,
|
||||
},
|
||||
}
|
||||
}
|
||||
Setting::Reset => Self::Remove,
|
||||
Setting::NotSet => Self::UpdateWithoutReindex { updated_settings: old },
|
||||
}
|
||||
Setting::NotSet => {
|
||||
Self::UpdateWithoutReindex { updated_settings: old, quantize: false }
|
||||
}
|
||||
};
|
||||
Ok(ret)
|
||||
}
|
||||
}
|
||||
|
||||
@ -486,7 +550,7 @@ impl std::fmt::Display for EmbedderSource {
|
||||
|
||||
impl From<EmbeddingConfig> for EmbeddingSettings {
|
||||
fn from(value: EmbeddingConfig) -> Self {
|
||||
let EmbeddingConfig { embedder_options, prompt } = value;
|
||||
let EmbeddingConfig { embedder_options, prompt, quantized } = value;
|
||||
let document_template_max_bytes =
|
||||
Setting::Set(prompt.max_bytes.unwrap_or(default_max_bytes()).get());
|
||||
match embedder_options {
|
||||
@ -507,6 +571,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
||||
response: Setting::NotSet,
|
||||
headers: Setting::NotSet,
|
||||
distribution: Setting::some_or_not_set(distribution),
|
||||
binary_quantized: Setting::some_or_not_set(quantized),
|
||||
},
|
||||
super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
|
||||
url,
|
||||
@ -527,6 +592,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
||||
response: Setting::NotSet,
|
||||
headers: Setting::NotSet,
|
||||
distribution: Setting::some_or_not_set(distribution),
|
||||
binary_quantized: Setting::some_or_not_set(quantized),
|
||||
},
|
||||
super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions {
|
||||
embedding_model,
|
||||
@ -547,6 +613,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
||||
response: Setting::NotSet,
|
||||
headers: Setting::NotSet,
|
||||
distribution: Setting::some_or_not_set(distribution),
|
||||
binary_quantized: Setting::some_or_not_set(quantized),
|
||||
},
|
||||
super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
|
||||
dimensions,
|
||||
@ -564,6 +631,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
||||
response: Setting::NotSet,
|
||||
headers: Setting::NotSet,
|
||||
distribution: Setting::some_or_not_set(distribution),
|
||||
binary_quantized: Setting::some_or_not_set(quantized),
|
||||
},
|
||||
super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
|
||||
api_key,
|
||||
@ -586,6 +654,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
||||
response: Setting::Set(response),
|
||||
distribution: Setting::some_or_not_set(distribution),
|
||||
headers: Setting::Set(headers),
|
||||
binary_quantized: Setting::some_or_not_set(quantized),
|
||||
},
|
||||
}
|
||||
}
|
||||
@ -607,8 +676,11 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
|
||||
response,
|
||||
distribution,
|
||||
headers,
|
||||
binary_quantized,
|
||||
} = value;
|
||||
|
||||
this.quantized = binary_quantized.set();
|
||||
|
||||
if let Some(source) = source.set() {
|
||||
match source {
|
||||
EmbedderSource::OpenAi => {
|
||||
|
Reference in New Issue
Block a user