mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-26 00:01:00 +00:00
Various changes
- fixed seed for arroy - check vector dimensions as soon as it is provided to search - don't embed whitespace
This commit is contained in:
@ -154,6 +154,15 @@ impl<'a> Search<'a> {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
|
||||
if has_vector_search {
|
||||
let ctx = SearchContext::new(self.index, self.rtxn);
|
||||
filtered_universe(&ctx, &self.filter)
|
||||
} else {
|
||||
Ok(self.execute()?.candidates)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn execute(&self) -> Result<SearchResult> {
|
||||
let embedder_name;
|
||||
let embedder_name = match &self.embedder_name {
|
||||
@ -297,11 +306,16 @@ pub struct SearchForFacetValues<'a> {
|
||||
query: Option<String>,
|
||||
facet: String,
|
||||
search_query: Search<'a>,
|
||||
is_hybrid: bool,
|
||||
}
|
||||
|
||||
impl<'a> SearchForFacetValues<'a> {
|
||||
pub fn new(facet: String, search_query: Search<'a>) -> SearchForFacetValues<'a> {
|
||||
SearchForFacetValues { query: None, facet, search_query }
|
||||
pub fn new(
|
||||
facet: String,
|
||||
search_query: Search<'a>,
|
||||
is_hybrid: bool,
|
||||
) -> SearchForFacetValues<'a> {
|
||||
SearchForFacetValues { query: None, facet, search_query, is_hybrid }
|
||||
}
|
||||
|
||||
pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
|
||||
@ -351,7 +365,9 @@ impl<'a> SearchForFacetValues<'a> {
|
||||
None => return Ok(vec![]),
|
||||
};
|
||||
|
||||
let search_candidates = self.search_query.execute()?.candidates;
|
||||
let search_candidates = self
|
||||
.search_query
|
||||
.execute_for_candidates(self.is_hybrid || self.search_query.vector.is_some())?;
|
||||
|
||||
match self.query.as_ref() {
|
||||
Some(query) => {
|
||||
|
@ -509,7 +509,7 @@ where
|
||||
// We write the primary key field id into the main database
|
||||
self.index.put_primary_key(self.wtxn, &primary_key)?;
|
||||
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
||||
let mut rng = rand::rngs::StdRng::from_entropy();
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||
|
||||
for (embedder_name, dimension) in dimension {
|
||||
let wtxn = &mut *self.wtxn;
|
||||
|
@ -7,7 +7,7 @@ use hf_hub::{Repo, RepoType};
|
||||
use tokenizers::{PaddingParams, Tokenizer};
|
||||
|
||||
pub use super::error::{EmbedError, Error, NewEmbedderError};
|
||||
use super::{Embedding, Embeddings};
|
||||
use super::{DistributionShift, Embedding, Embeddings};
|
||||
|
||||
#[derive(
|
||||
Debug,
|
||||
@ -184,4 +184,12 @@ impl Embedder {
|
||||
pub fn dimensions(&self) -> usize {
|
||||
self.dimensions
|
||||
}
|
||||
|
||||
pub fn distribution(&self) -> Option<DistributionShift> {
|
||||
if self.options.model == "BAAI/bge-base-en-v1.5" {
|
||||
Some(DistributionShift { current_mean: 0.85, current_sigma: 0.1 })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -202,6 +202,14 @@ impl Embedder {
|
||||
Embedder::UserProvided(embedder) => embedder.dimensions(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn distribution(&self) -> Option<DistributionShift> {
|
||||
match self {
|
||||
Embedder::HuggingFace(embedder) => embedder.distribution(),
|
||||
Embedder::OpenAi(embedder) => embedder.distribution(),
|
||||
Embedder::UserProvided(_embedder) => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
|
@ -4,7 +4,7 @@ use reqwest::StatusCode;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::error::{EmbedError, NewEmbedderError};
|
||||
use super::{Embedding, Embeddings};
|
||||
use super::{DistributionShift, Embedding, Embeddings};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Embedder {
|
||||
@ -65,6 +65,14 @@ impl EmbeddingModel {
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn distribution(&self) -> Option<DistributionShift> {
|
||||
match self {
|
||||
EmbeddingModel::TextEmbeddingAda002 => {
|
||||
Some(DistributionShift { current_mean: 0.90, current_sigma: 0.08 })
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub const OPENAI_EMBEDDINGS_URL: &str = "https://api.openai.com/v1/embeddings";
|
||||
@ -326,6 +334,10 @@ impl Embedder {
|
||||
pub fn dimensions(&self) -> usize {
|
||||
self.options.embedding_model.dimensions()
|
||||
}
|
||||
|
||||
pub fn distribution(&self) -> Option<DistributionShift> {
|
||||
self.options.embedding_model.distribution()
|
||||
}
|
||||
}
|
||||
|
||||
// retrying in case of failure
|
||||
|
Reference in New Issue
Block a user