mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-06 12:46:31 +00:00
Implement in new document indexer
This commit is contained in:
@ -12,9 +12,9 @@ use super::document::{Document, DocumentFromDb, DocumentFromVersions, Versions};
|
||||
use super::indexer::de::DeserrRawValue;
|
||||
use crate::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use crate::documents::FieldIdMapper;
|
||||
use crate::index::IndexEmbeddingConfig;
|
||||
use crate::vector::db::{EmbeddingStatus, IndexEmbeddingConfig};
|
||||
use crate::vector::parsed_vectors::{RawVectors, RawVectorsError, VectorOrArrayOfVectors};
|
||||
use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfigs};
|
||||
use crate::vector::{ArroyWrapper, Embedding, RuntimeEmbedders};
|
||||
use crate::{DocumentId, Index, InternalError, Result, UserError};
|
||||
|
||||
#[derive(Serialize)]
|
||||
@ -109,7 +109,7 @@ impl<'t> VectorDocumentFromDb<'t> {
|
||||
None => None,
|
||||
};
|
||||
|
||||
let embedding_config = index.embedding_configs(rtxn)?;
|
||||
let embedding_config = index.embedding_configs().embedding_configs(rtxn)?;
|
||||
|
||||
Ok(Some(Self { docid, embedding_config, index, vectors_field, rtxn, doc_alloc }))
|
||||
}
|
||||
@ -118,6 +118,7 @@ impl<'t> VectorDocumentFromDb<'t> {
|
||||
&self,
|
||||
embedder_id: u8,
|
||||
config: &IndexEmbeddingConfig,
|
||||
status: &EmbeddingStatus,
|
||||
) -> Result<VectorEntry<'t>> {
|
||||
let reader =
|
||||
ArroyWrapper::new(self.index.vector_arroy, embedder_id, config.config.quantized());
|
||||
@ -126,7 +127,7 @@ impl<'t> VectorDocumentFromDb<'t> {
|
||||
Ok(VectorEntry {
|
||||
has_configured_embedder: true,
|
||||
embeddings: Some(Embeddings::FromDb(vectors)),
|
||||
regenerate: !config.user_provided.contains(self.docid),
|
||||
regenerate: status.must_regenerate(self.docid),
|
||||
implicit: false,
|
||||
})
|
||||
}
|
||||
@ -137,9 +138,9 @@ impl<'t> VectorDocument<'t> for VectorDocumentFromDb<'t> {
|
||||
self.embedding_config
|
||||
.iter()
|
||||
.map(|config| {
|
||||
let embedder_id =
|
||||
self.index.embedder_category_id.get(self.rtxn, &config.name)?.unwrap();
|
||||
let entry = self.entry_from_db(embedder_id, config)?;
|
||||
let info =
|
||||
self.index.embedding_configs().embedder_info(self.rtxn, &config.name)?.unwrap();
|
||||
let entry = self.entry_from_db(info.embedder_id, config, &info.embedding_status)?;
|
||||
let config_name = self.doc_alloc.alloc_str(config.name.as_str());
|
||||
Ok((&*config_name, entry))
|
||||
})
|
||||
@ -156,11 +157,11 @@ impl<'t> VectorDocument<'t> for VectorDocumentFromDb<'t> {
|
||||
}
|
||||
|
||||
fn vectors_for_key(&self, key: &str) -> Result<Option<VectorEntry<'t>>> {
|
||||
Ok(match self.index.embedder_category_id.get(self.rtxn, key)? {
|
||||
Some(embedder_id) => {
|
||||
Ok(match self.index.embedding_configs().embedder_info(self.rtxn, key)? {
|
||||
Some(info) => {
|
||||
let config =
|
||||
self.embedding_config.iter().find(|config| config.name == key).unwrap();
|
||||
Some(self.entry_from_db(embedder_id, config)?)
|
||||
Some(self.entry_from_db(info.embedder_id, config, &info.embedding_status)?)
|
||||
}
|
||||
None => match self.vectors_field.as_ref().and_then(|obkv| obkv.get(key)) {
|
||||
Some(embedding_from_doc) => {
|
||||
@ -222,7 +223,7 @@ fn entry_from_raw_value(
|
||||
pub struct VectorDocumentFromVersions<'doc> {
|
||||
external_document_id: &'doc str,
|
||||
vectors: RawMap<'doc, FxBuildHasher>,
|
||||
embedders: &'doc EmbeddingConfigs,
|
||||
embedders: &'doc RuntimeEmbedders,
|
||||
}
|
||||
|
||||
impl<'doc> VectorDocumentFromVersions<'doc> {
|
||||
@ -230,7 +231,7 @@ impl<'doc> VectorDocumentFromVersions<'doc> {
|
||||
external_document_id: &'doc str,
|
||||
versions: &Versions<'doc>,
|
||||
bump: &'doc Bump,
|
||||
embedders: &'doc EmbeddingConfigs,
|
||||
embedders: &'doc RuntimeEmbedders,
|
||||
) -> Result<Option<Self>> {
|
||||
let document = DocumentFromVersions::new(versions);
|
||||
if let Some(vectors_field) = document.vectors_field()? {
|
||||
@ -283,7 +284,7 @@ impl<'doc> MergedVectorDocument<'doc> {
|
||||
db_fields_ids_map: &'doc Mapper,
|
||||
versions: &Versions<'doc>,
|
||||
doc_alloc: &'doc Bump,
|
||||
embedders: &'doc EmbeddingConfigs,
|
||||
embedders: &'doc RuntimeEmbedders,
|
||||
) -> Result<Option<Self>> {
|
||||
let db = VectorDocumentFromDb::new(docid, index, rtxn, db_fields_ids_map, doc_alloc)?;
|
||||
let new_doc =
|
||||
@ -295,7 +296,7 @@ impl<'doc> MergedVectorDocument<'doc> {
|
||||
external_document_id: &'doc str,
|
||||
versions: &Versions<'doc>,
|
||||
doc_alloc: &'doc Bump,
|
||||
embedders: &'doc EmbeddingConfigs,
|
||||
embedders: &'doc RuntimeEmbedders,
|
||||
) -> Result<Option<Self>> {
|
||||
let Some(new_doc) =
|
||||
VectorDocumentFromVersions::new(external_document_id, versions, doc_alloc, embedders)?
|
||||
|
Reference in New Issue
Block a user