mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-07 13:16:31 +00:00
Merge remote-tracking branch 'origin/release-v1.16.0' into document-sorting
This commit is contained in:
@ -7,6 +7,7 @@ use roaring::RoaringBitmap;
|
||||
use crate::score_details::{ScoreDetails, ScoreValue, ScoringStrategy};
|
||||
use crate::search::new::{distinct_fid, distinct_single_docid};
|
||||
use crate::search::SemanticSearch;
|
||||
use crate::vector::SearchQuery;
|
||||
use crate::{Index, MatchingWords, Result, Search, SearchResult};
|
||||
|
||||
struct ScoreWithRatioResult {
|
||||
@ -225,12 +226,9 @@ impl Search<'_> {
|
||||
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
||||
}
|
||||
|
||||
// no vector search against placeholder search
|
||||
let Some(query) = search.query.take() else {
|
||||
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
||||
};
|
||||
// no embedder, no semantic search
|
||||
let Some(SemanticSearch { vector, embedder_name, embedder, quantized }) = semantic else {
|
||||
let Some(SemanticSearch { vector, embedder_name, embedder, quantized, media }) = semantic
|
||||
else {
|
||||
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
|
||||
};
|
||||
|
||||
@ -241,9 +239,17 @@ impl Search<'_> {
|
||||
let span = tracing::trace_span!(target: "search::hybrid", "embed_one");
|
||||
let _entered = span.enter();
|
||||
|
||||
let q = search.query.as_deref();
|
||||
let media = media.as_ref();
|
||||
|
||||
let query = match (q, media) {
|
||||
(Some(text), None) => SearchQuery::Text(text),
|
||||
(q, media) => SearchQuery::Media { q, media },
|
||||
};
|
||||
|
||||
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(3);
|
||||
|
||||
match embedder.embed_search(&query, Some(deadline)) {
|
||||
match embedder.embed_search(query, Some(deadline)) {
|
||||
Ok(embedding) => embedding,
|
||||
Err(error) => {
|
||||
tracing::error!(error=%error, "Embedding failed");
|
||||
@ -257,8 +263,13 @@ impl Search<'_> {
|
||||
}
|
||||
};
|
||||
|
||||
search.semantic =
|
||||
Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder, quantized });
|
||||
search.semantic = Some(SemanticSearch {
|
||||
vector: Some(vector_query),
|
||||
embedder_name,
|
||||
embedder,
|
||||
quantized,
|
||||
media,
|
||||
});
|
||||
|
||||
// TODO: would be better to have two distinct functions at this point
|
||||
let vector_results = search.execute()?;
|
||||
|
@ -13,7 +13,7 @@ use crate::documents::GeoSortParameter;
|
||||
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
|
||||
use crate::index::MatchingStrategy;
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::vector::Embedder;
|
||||
use crate::vector::{Embedder, Embedding};
|
||||
use crate::{
|
||||
execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Error, Index,
|
||||
Result, SearchContext, TimeBudget, UserError,
|
||||
@ -33,6 +33,7 @@ pub mod similar;
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SemanticSearch {
|
||||
vector: Option<Vec<f32>>,
|
||||
media: Option<serde_json::Value>,
|
||||
embedder_name: String,
|
||||
embedder: Arc<Embedder>,
|
||||
quantized: bool,
|
||||
@ -94,9 +95,10 @@ impl<'a> Search<'a> {
|
||||
embedder_name: String,
|
||||
embedder: Arc<Embedder>,
|
||||
quantized: bool,
|
||||
vector: Option<Vec<f32>>,
|
||||
vector: Option<Embedding>,
|
||||
media: Option<serde_json::Value>,
|
||||
) -> &mut Search<'a> {
|
||||
self.semantic = Some(SemanticSearch { embedder_name, embedder, quantized, vector });
|
||||
self.semantic = Some(SemanticSearch { embedder_name, embedder, quantized, vector, media });
|
||||
self
|
||||
}
|
||||
|
||||
@ -232,24 +234,28 @@ impl<'a> Search<'a> {
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
} = match self.semantic.as_ref() {
|
||||
Some(SemanticSearch { vector: Some(vector), embedder_name, embedder, quantized }) => {
|
||||
execute_vector_search(
|
||||
&mut ctx,
|
||||
vector,
|
||||
self.scoring_strategy,
|
||||
universe,
|
||||
&self.sort_criteria,
|
||||
&self.distinct,
|
||||
self.geo_param,
|
||||
self.offset,
|
||||
self.limit,
|
||||
embedder_name,
|
||||
embedder,
|
||||
*quantized,
|
||||
self.time_budget.clone(),
|
||||
self.ranking_score_threshold,
|
||||
)?
|
||||
}
|
||||
Some(SemanticSearch {
|
||||
vector: Some(vector),
|
||||
embedder_name,
|
||||
embedder,
|
||||
quantized,
|
||||
media: _,
|
||||
}) => execute_vector_search(
|
||||
&mut ctx,
|
||||
vector,
|
||||
self.scoring_strategy,
|
||||
universe,
|
||||
&self.sort_criteria,
|
||||
&self.distinct,
|
||||
self.geo_param,
|
||||
self.offset,
|
||||
self.limit,
|
||||
embedder_name,
|
||||
embedder,
|
||||
*quantized,
|
||||
self.time_budget.clone(),
|
||||
self.ranking_score_threshold,
|
||||
)?,
|
||||
_ => execute_search(
|
||||
&mut ctx,
|
||||
self.query.as_deref(),
|
||||
|
@ -8,7 +8,7 @@ use maplit::{btreemap, hashset};
|
||||
use crate::progress::Progress;
|
||||
use crate::update::new::indexer;
|
||||
use crate::update::{IndexerConfig, Settings};
|
||||
use crate::vector::EmbeddingConfigs;
|
||||
use crate::vector::RuntimeEmbedders;
|
||||
use crate::{db_snap, Criterion, FilterableAttributesRule, Index};
|
||||
pub const CONTENT: &str = include_str!("../../../../tests/assets/test_set.ndjson");
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
@ -44,7 +44,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
S("america") => vec![S("the united states")],
|
||||
});
|
||||
builder.set_searchable_fields(vec![S("title"), S("description")]);
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// index documents
|
||||
@ -55,7 +55,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let embedders = EmbeddingConfigs::default();
|
||||
let embedders = RuntimeEmbedders::default();
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
|
||||
let mut file = tempfile::tempfile().unwrap();
|
||||
@ -95,6 +95,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
embedders,
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
@ -32,8 +32,8 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||
) -> Result<Self> {
|
||||
let embedder_index = ctx
|
||||
.index
|
||||
.embedder_category_id
|
||||
.get(ctx.txn, embedder_name)?
|
||||
.embedding_configs()
|
||||
.embedder_id(ctx.txn, embedder_name)?
|
||||
.ok_or_else(|| crate::UserError::InvalidSearchEmbedder(embedder_name.to_owned()))?;
|
||||
|
||||
Ok(Self {
|
||||
|
@ -64,10 +64,13 @@ impl<'a> Similar<'a> {
|
||||
|
||||
let universe = universe;
|
||||
|
||||
let embedder_index =
|
||||
self.index.embedder_category_id.get(self.rtxn, &self.embedder_name)?.ok_or_else(
|
||||
|| crate::UserError::InvalidSimilarEmbedder(self.embedder_name.to_owned()),
|
||||
)?;
|
||||
let embedder_index = self
|
||||
.index
|
||||
.embedding_configs()
|
||||
.embedder_id(self.rtxn, &self.embedder_name)?
|
||||
.ok_or_else(|| {
|
||||
crate::UserError::InvalidSimilarEmbedder(self.embedder_name.to_owned())
|
||||
})?;
|
||||
|
||||
let reader = ArroyWrapper::new(self.index.vector_arroy, embedder_index, self.quantized);
|
||||
let results = reader.nns_by_item(
|
||||
|
Reference in New Issue
Block a user