mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-23 21:26:02 +00:00
Compare commits
4 Commits
v1.23.0
...
embedder-s
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
00a0f74afa | ||
|
|
71b5afa23b | ||
|
|
bff49cae38 | ||
|
|
17abe14bd9 |
423
crates/milli/src/bin/embedder_settings.rs
Normal file
423
crates/milli/src/bin/embedder_settings.rs
Normal file
@@ -0,0 +1,423 @@
|
|||||||
|
use std::io::Write;
|
||||||
|
|
||||||
|
use milli::vector::settings::{
|
||||||
|
EmbedderSource, EmbeddingSettings, FieldStatus, MetaEmbeddingSetting, NestingContext,
|
||||||
|
ReindexOutcome,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub trait Formatter {
|
||||||
|
fn begin_document(&mut self);
|
||||||
|
fn end_document(&mut self);
|
||||||
|
|
||||||
|
fn begin_header(&mut self);
|
||||||
|
fn put_source_header(&mut self, source: EmbedderSource);
|
||||||
|
fn end_header(&mut self);
|
||||||
|
|
||||||
|
fn begin_setting(
|
||||||
|
&mut self,
|
||||||
|
setting: MetaEmbeddingSetting,
|
||||||
|
description: &'static str,
|
||||||
|
kind: &'static str,
|
||||||
|
reindex_outcome: ReindexOutcome,
|
||||||
|
default_value: &'static str,
|
||||||
|
);
|
||||||
|
fn end_setting(&mut self, setting: MetaEmbeddingSetting);
|
||||||
|
|
||||||
|
fn put_setting_status(
|
||||||
|
&mut self,
|
||||||
|
source: EmbedderSource,
|
||||||
|
field_status_by_nesting_context: FieldStatusByNestingContext,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct GitHubMdFormatter<W> {
|
||||||
|
w: W,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<W: Write> GitHubMdFormatter<W> {
|
||||||
|
pub fn new(w: W) -> Self {
|
||||||
|
Self { w }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<W: Write> Formatter for GitHubMdFormatter<W> {
|
||||||
|
fn begin_document(&mut self) {
|
||||||
|
let s = r#"
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tbody>
|
||||||
|
"#;
|
||||||
|
write!(self.w, "{s}").unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn end_document(&mut self) {
|
||||||
|
write!(
|
||||||
|
self.w,
|
||||||
|
r#"
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn begin_header(&mut self) {
|
||||||
|
write!(
|
||||||
|
self.w,
|
||||||
|
r#"
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Setting</th>
|
||||||
|
<th>Description</th>
|
||||||
|
<th>Type</th>
|
||||||
|
<th>Default Value</th>
|
||||||
|
<th>Regenerate on Change</th>
|
||||||
|
<th colspan="6">Availability for source</th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th colspan="5"></th>
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
fn put_source_header(&mut self, source: EmbedderSource) {
|
||||||
|
write!(
|
||||||
|
self.w,
|
||||||
|
r#"
|
||||||
|
<th>
|
||||||
|
|
||||||
|
{source}
|
||||||
|
|
||||||
|
</th>
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
fn end_header(&mut self) {
|
||||||
|
write!(
|
||||||
|
self.w,
|
||||||
|
r#"
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn begin_setting(
|
||||||
|
&mut self,
|
||||||
|
setting: MetaEmbeddingSetting,
|
||||||
|
description: &'static str,
|
||||||
|
kind: &'static str,
|
||||||
|
reindex_outcome: ReindexOutcome,
|
||||||
|
default_value: &'static str,
|
||||||
|
) {
|
||||||
|
let name = setting.name();
|
||||||
|
let reindex_outcome = match reindex_outcome {
|
||||||
|
ReindexOutcome::AlwaysReindex => "🏗️ Always",
|
||||||
|
ReindexOutcome::NeverReindex => "🌱 Never",
|
||||||
|
ReindexOutcome::ReindexSometimes(sometimes) => sometimes,
|
||||||
|
};
|
||||||
|
write!(
|
||||||
|
self.w,
|
||||||
|
r#"
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
|
||||||
|
`{name}`
|
||||||
|
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
|
||||||
|
{description}
|
||||||
|
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
|
||||||
|
{kind}
|
||||||
|
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
|
||||||
|
{default_value}
|
||||||
|
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
|
||||||
|
{reindex_outcome}
|
||||||
|
|
||||||
|
</td>
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn end_setting(&mut self, _setting: MetaEmbeddingSetting) {
|
||||||
|
write!(
|
||||||
|
self.w,
|
||||||
|
r#"
|
||||||
|
|
||||||
|
|
||||||
|
</tr>
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn put_setting_status(
|
||||||
|
&mut self,
|
||||||
|
_source: EmbedderSource,
|
||||||
|
field_status_by_nesting_context: FieldStatusByNestingContext,
|
||||||
|
) {
|
||||||
|
let field_status = match field_status_by_nesting_context {
|
||||||
|
FieldStatusByNestingContext::Invariant(field_status) => {
|
||||||
|
format_field_status(field_status).to_string()
|
||||||
|
}
|
||||||
|
FieldStatusByNestingContext::Variant(variant_field_status_by_nesting_context) => {
|
||||||
|
format!(
|
||||||
|
r#"
|
||||||
|
- Usually, {}
|
||||||
|
- When used in `searchEmbedder` in a `composite` embedder, {}
|
||||||
|
- When used in `indexingEmbedder` in a `composite` embedder, {}
|
||||||
|
"#,
|
||||||
|
format_field_status(variant_field_status_by_nesting_context.not_nested),
|
||||||
|
format_field_status(variant_field_status_by_nesting_context.search),
|
||||||
|
format_field_status(variant_field_status_by_nesting_context.index)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
write!(
|
||||||
|
self.w,
|
||||||
|
r#"
|
||||||
|
<td>
|
||||||
|
|
||||||
|
{field_status}
|
||||||
|
|
||||||
|
</td>
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_field_status(field_status: FieldStatus) -> &'static str {
|
||||||
|
match field_status {
|
||||||
|
FieldStatus::Mandatory => "🔐 **Mandatory**",
|
||||||
|
FieldStatus::Allowed => "✅ Allowed",
|
||||||
|
FieldStatus::Disallowed => "🚫 Disallowed",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct GitHubMdAvailabilityFormatter<W>(pub GitHubMdFormatter<W>);
|
||||||
|
impl<W: Write> Formatter for GitHubMdAvailabilityFormatter<W> {
|
||||||
|
fn begin_document(&mut self) {
|
||||||
|
write!(self.0.w, "## Availability of the settings depending on the selected source\n\n")
|
||||||
|
.unwrap();
|
||||||
|
self.0.begin_document();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn end_document(&mut self) {
|
||||||
|
self.0.end_document();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn begin_header(&mut self) {
|
||||||
|
write!(
|
||||||
|
self.0.w,
|
||||||
|
r#"
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Setting</th>
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn put_source_header(&mut self, source: EmbedderSource) {
|
||||||
|
self.0.put_source_header(source);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn end_header(&mut self) {
|
||||||
|
self.0.end_header();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn begin_setting(
|
||||||
|
&mut self,
|
||||||
|
setting: MetaEmbeddingSetting,
|
||||||
|
_description: &'static str,
|
||||||
|
_kind: &'static str,
|
||||||
|
_reindex_outcome: ReindexOutcome,
|
||||||
|
_default_value: &'static str,
|
||||||
|
) {
|
||||||
|
if setting == MetaEmbeddingSetting::Source {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let name = setting.name();
|
||||||
|
write!(
|
||||||
|
self.0.w,
|
||||||
|
r#"
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
|
||||||
|
`{name}`
|
||||||
|
|
||||||
|
</td>
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn end_setting(&mut self, setting: MetaEmbeddingSetting) {
|
||||||
|
if setting == MetaEmbeddingSetting::Source {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
self.0.end_setting(setting);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn put_setting_status(
|
||||||
|
&mut self,
|
||||||
|
source: EmbedderSource,
|
||||||
|
field_status_by_nesting_context: FieldStatusByNestingContext,
|
||||||
|
) {
|
||||||
|
self.0.put_setting_status(source, field_status_by_nesting_context);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct GitHubMdBasicFormatter<W>(pub GitHubMdFormatter<W>);
|
||||||
|
impl<W: Write> Formatter for GitHubMdBasicFormatter<W> {
|
||||||
|
fn begin_document(&mut self) {
|
||||||
|
write!(self.0.w, "## List of the embedder settings\n\n").unwrap();
|
||||||
|
self.0.begin_document();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn end_document(&mut self) {
|
||||||
|
self.0.end_document();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn begin_header(&mut self) {
|
||||||
|
write!(
|
||||||
|
self.0.w,
|
||||||
|
r#"
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Setting</th>
|
||||||
|
<th>Description</th>
|
||||||
|
<th>Type</th>
|
||||||
|
<th>Default Value</th>
|
||||||
|
<th>Regenerate on Change</th>
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn put_source_header(&mut self, _source: EmbedderSource) {}
|
||||||
|
|
||||||
|
fn end_header(&mut self) {
|
||||||
|
self.0.end_header();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn begin_setting(
|
||||||
|
&mut self,
|
||||||
|
setting: MetaEmbeddingSetting,
|
||||||
|
description: &'static str,
|
||||||
|
kind: &'static str,
|
||||||
|
reindex_outcome: ReindexOutcome,
|
||||||
|
default_value: &'static str,
|
||||||
|
) {
|
||||||
|
self.0.begin_setting(setting, description, kind, reindex_outcome, default_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn end_setting(&mut self, setting: MetaEmbeddingSetting) {
|
||||||
|
self.0.end_setting(setting);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn put_setting_status(
|
||||||
|
&mut self,
|
||||||
|
_source: EmbedderSource,
|
||||||
|
_field_status_by_nesting_context: FieldStatusByNestingContext,
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum FieldStatusByNestingContext {
|
||||||
|
Invariant(FieldStatus),
|
||||||
|
Variant(VariantFieldStatusByNestingContext),
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct VariantFieldStatusByNestingContext {
|
||||||
|
not_nested: FieldStatus,
|
||||||
|
search: FieldStatus,
|
||||||
|
index: FieldStatus,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_settings(mut fmt: impl Formatter) {
|
||||||
|
#![allow(unused_labels)] // the labels are used as documentation
|
||||||
|
fmt.begin_document();
|
||||||
|
fmt.begin_header();
|
||||||
|
for source in enum_iterator::all::<EmbedderSource>() {
|
||||||
|
fmt.put_source_header(source);
|
||||||
|
}
|
||||||
|
fmt.end_header();
|
||||||
|
'setting: for setting in enum_iterator::all::<MetaEmbeddingSetting>() {
|
||||||
|
let description = setting.description();
|
||||||
|
let kind = setting.kind();
|
||||||
|
let reindex_outcome = setting.reindex_outcome();
|
||||||
|
let default_value = setting.default_value();
|
||||||
|
fmt.begin_setting(setting, description, kind, reindex_outcome, default_value);
|
||||||
|
|
||||||
|
'source: for source in enum_iterator::all::<EmbedderSource>() {
|
||||||
|
if setting == MetaEmbeddingSetting::Source {
|
||||||
|
break 'source;
|
||||||
|
}
|
||||||
|
let mut field_status = VariantFieldStatusByNestingContext {
|
||||||
|
not_nested: FieldStatus::Disallowed,
|
||||||
|
search: FieldStatus::Disallowed,
|
||||||
|
index: FieldStatus::Disallowed,
|
||||||
|
};
|
||||||
|
'nesting: for nesting_context in enum_iterator::all::<NestingContext>() {
|
||||||
|
let status = EmbeddingSettings::field_status(source, setting, nesting_context);
|
||||||
|
|
||||||
|
match nesting_context {
|
||||||
|
NestingContext::NotNested => {
|
||||||
|
field_status.not_nested = status;
|
||||||
|
}
|
||||||
|
NestingContext::Search => {
|
||||||
|
field_status.search = status;
|
||||||
|
}
|
||||||
|
NestingContext::Indexing => {
|
||||||
|
field_status.index = status;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let field_status_by_nesting_context = if field_status.index == field_status.search
|
||||||
|
&& field_status.search == field_status.not_nested
|
||||||
|
{
|
||||||
|
FieldStatusByNestingContext::Invariant(field_status.not_nested)
|
||||||
|
} else {
|
||||||
|
FieldStatusByNestingContext::Variant(field_status)
|
||||||
|
};
|
||||||
|
fmt.put_setting_status(source, field_status_by_nesting_context);
|
||||||
|
}
|
||||||
|
fmt.end_setting(setting);
|
||||||
|
}
|
||||||
|
fmt.end_document();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let mut std_out = std::io::stdout().lock();
|
||||||
|
|
||||||
|
write!(
|
||||||
|
&mut std_out,
|
||||||
|
"The tables below have been generated by calling `cargo run --bin embedder_settings`\n\n"
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let formatter = GitHubMdFormatter::new(&mut std_out);
|
||||||
|
let formatter = GitHubMdBasicFormatter(formatter);
|
||||||
|
format_settings(formatter);
|
||||||
|
|
||||||
|
write!(&mut std_out, "\n\n").unwrap();
|
||||||
|
|
||||||
|
let formatter = GitHubMdFormatter::new(&mut std_out);
|
||||||
|
let formatter = GitHubMdAvailabilityFormatter(formatter);
|
||||||
|
format_settings(formatter);
|
||||||
|
}
|
||||||
@@ -65,7 +65,7 @@ fn default_template() -> liquid::Template {
|
|||||||
new_template(default_template_text()).unwrap()
|
new_template(default_template_text()).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default_template_text() -> &'static str {
|
pub(crate) fn default_template_text() -> &'static str {
|
||||||
"{% for field in fields %}\
|
"{% for field in fields %}\
|
||||||
{% if field.is_searchable and field.value != nil %}\
|
{% if field.is_searchable and field.value != nil %}\
|
||||||
{{ field.name }}: {{ field.value }}\n\
|
{{ field.name }}: {{ field.value }}\n\
|
||||||
|
|||||||
@@ -1065,13 +1065,14 @@ fn apply_default_for_source(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) enum FieldStatus {
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum FieldStatus {
|
||||||
Mandatory,
|
Mandatory,
|
||||||
Allowed,
|
Allowed,
|
||||||
Disallowed,
|
Disallowed,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy, enum_iterator::Sequence)]
|
||||||
pub enum NestingContext {
|
pub enum NestingContext {
|
||||||
NotNested,
|
NotNested,
|
||||||
Search,
|
Search,
|
||||||
@@ -1108,7 +1109,7 @@ impl NestingContext {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, enum_iterator::Sequence)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, enum_iterator::Sequence)]
|
||||||
pub enum MetaEmbeddingSetting {
|
pub enum MetaEmbeddingSetting {
|
||||||
Source,
|
Source,
|
||||||
Model,
|
Model,
|
||||||
@@ -1128,8 +1129,14 @@ pub enum MetaEmbeddingSetting {
|
|||||||
BinaryQuantized,
|
BinaryQuantized,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub enum ReindexOutcome {
|
||||||
|
AlwaysReindex,
|
||||||
|
NeverReindex,
|
||||||
|
ReindexSometimes(&'static str),
|
||||||
|
}
|
||||||
|
|
||||||
impl MetaEmbeddingSetting {
|
impl MetaEmbeddingSetting {
|
||||||
pub(crate) fn name(&self) -> &'static str {
|
pub fn name(&self) -> &'static str {
|
||||||
use MetaEmbeddingSetting::*;
|
use MetaEmbeddingSetting::*;
|
||||||
match self {
|
match self {
|
||||||
Source => "source",
|
Source => "source",
|
||||||
@@ -1150,6 +1157,159 @@ impl MetaEmbeddingSetting {
|
|||||||
BinaryQuantized => "binaryQuantized",
|
BinaryQuantized => "binaryQuantized",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn description(&self) -> &'static str {
|
||||||
|
use MetaEmbeddingSetting::*;
|
||||||
|
match self {
|
||||||
|
Source => {
|
||||||
|
r#"
|
||||||
|
The source used to provide the embeddings.
|
||||||
|
|
||||||
|
Which embedder parameters are available and mandatory is determined by the value of this setting.
|
||||||
|
"#
|
||||||
|
}
|
||||||
|
Model => "The name of the model to use.",
|
||||||
|
Revision => {
|
||||||
|
r#"
|
||||||
|
The revision (commit SHA1) of the model to use.
|
||||||
|
|
||||||
|
If unspecified, Meilisearch picks the latest revision of the model.
|
||||||
|
"#
|
||||||
|
}
|
||||||
|
Pooling => "The pooling method to use.",
|
||||||
|
ApiKey => "The API key to pass to the remote embedder while making requests.",
|
||||||
|
Dimensions => "The expected dimensions of the embeddings produced by this embedder.",
|
||||||
|
DocumentTemplate => {
|
||||||
|
r#"
|
||||||
|
A liquid template used to render documents to a text that can be embedded.
|
||||||
|
|
||||||
|
Meillisearch interpolates the template for each document and sends the resulting text to the embedder.
|
||||||
|
The embedder then generates document vectors based on this text.
|
||||||
|
"#
|
||||||
|
}
|
||||||
|
DocumentTemplateMaxBytes => {
|
||||||
|
"Rendered texts are truncated to this size before embedding."
|
||||||
|
}
|
||||||
|
Url => "URL to reach the remote embedder.",
|
||||||
|
Request => "Template request to send to the remote embedder.",
|
||||||
|
Response => "Template response indicating how to find the embeddings in the response from the remote embedder.",
|
||||||
|
Headers => "Additional headers to send to the remote embedder.",
|
||||||
|
SearchEmbedder => "Embedder settings for the embedder used at search time.",
|
||||||
|
IndexingEmbedder => "Embedder settings for the embedder used at indexing time.",
|
||||||
|
Distribution => "Affine transformation applied to the semantic score to make it more comparable to the ranking score.",
|
||||||
|
BinaryQuantized => r#"
|
||||||
|
Whether to binary quantize the embeddings of this embedder.
|
||||||
|
|
||||||
|
Binary quantized embeddings are smaller than regular embeddings, which improves
|
||||||
|
disk usage and retrieval speed, at the cost of relevancy.
|
||||||
|
"#,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn kind(&self) -> &'static str {
|
||||||
|
use MetaEmbeddingSetting::*;
|
||||||
|
match self {
|
||||||
|
Source => {
|
||||||
|
r#""openAi" | "huggingFace" | "userProvided" | "ollama" | "rest" | "composite""#
|
||||||
|
}
|
||||||
|
Model => "string",
|
||||||
|
Revision => "string",
|
||||||
|
Pooling => r#""useModel" | "forceCls" | "forceMean""#,
|
||||||
|
ApiKey => "string",
|
||||||
|
Dimensions => "number",
|
||||||
|
DocumentTemplate => "string",
|
||||||
|
DocumentTemplateMaxBytes => "number",
|
||||||
|
Url => "string",
|
||||||
|
Request => "any",
|
||||||
|
Response => "any",
|
||||||
|
Headers => "object",
|
||||||
|
SearchEmbedder => "object",
|
||||||
|
IndexingEmbedder => "object",
|
||||||
|
Distribution => "object",
|
||||||
|
BinaryQuantized => "boolean",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn default_value(&self) -> &'static str {
|
||||||
|
use MetaEmbeddingSetting::*;
|
||||||
|
match self {
|
||||||
|
Source => r#""openAi""#,
|
||||||
|
Model => {
|
||||||
|
r#"
|
||||||
|
- For source `openAi`, defaults to "text-embedding-3-small"
|
||||||
|
- For source `huggingFace`, defaults to "BAAI/bge-base-en-v1.5"
|
||||||
|
"#
|
||||||
|
}
|
||||||
|
Revision => {
|
||||||
|
r#"
|
||||||
|
- When `model` is set to default, defaults to "617ca489d9e86b49b8167676d8220688b99db36e"
|
||||||
|
- Otherwise, defaults to `null`
|
||||||
|
"#
|
||||||
|
}
|
||||||
|
Pooling => r#""useModel""#,
|
||||||
|
ApiKey => "`null`",
|
||||||
|
Dimensions => "`null`",
|
||||||
|
DocumentTemplate => crate::prompt::default_template_text(),
|
||||||
|
DocumentTemplateMaxBytes => "400",
|
||||||
|
Url => "`null`",
|
||||||
|
Request => "`null`",
|
||||||
|
Response => "`null`",
|
||||||
|
Headers => "`null`",
|
||||||
|
SearchEmbedder => "`null`",
|
||||||
|
IndexingEmbedder => "`null`",
|
||||||
|
Distribution => "`null`",
|
||||||
|
BinaryQuantized => "`false`",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reindex_outcome(&self) -> ReindexOutcome {
|
||||||
|
use MetaEmbeddingSetting::*;
|
||||||
|
match self {
|
||||||
|
Source => ReindexOutcome::AlwaysReindex,
|
||||||
|
Model => ReindexOutcome::AlwaysReindex,
|
||||||
|
Revision => ReindexOutcome::AlwaysReindex,
|
||||||
|
Pooling => ReindexOutcome::AlwaysReindex,
|
||||||
|
ApiKey => ReindexOutcome::NeverReindex,
|
||||||
|
Dimensions => ReindexOutcome::ReindexSometimes(
|
||||||
|
r#"
|
||||||
|
- 🏗️ When the source is `openAi`, changing the value of this parameter always regenerates embeddings
|
||||||
|
- 🌱 For other sources, changing the value of this parameter never regenerates embeddings
|
||||||
|
"#,
|
||||||
|
),
|
||||||
|
DocumentTemplate => ReindexOutcome::ReindexSometimes(
|
||||||
|
r#"
|
||||||
|
- 🏗️ When modified, embeddings are regenerated for documents whose rendering through the template produces a different text.
|
||||||
|
"#,
|
||||||
|
),
|
||||||
|
DocumentTemplateMaxBytes => ReindexOutcome::ReindexSometimes(
|
||||||
|
r#"
|
||||||
|
- 🏗️ When increased, embeddings are regenerated for documents whose rendering through the template produces a different text.
|
||||||
|
- 🌱 When decreased, embeddings are never regenerated
|
||||||
|
"#,
|
||||||
|
),
|
||||||
|
Url => ReindexOutcome::ReindexSometimes(
|
||||||
|
r#"
|
||||||
|
- 🌱 When modified for source `openAi`, embeddings are never regenerated
|
||||||
|
- 🏗️ When modified for sources `ollama` and `rest`, embeddings are always regenerated
|
||||||
|
"#,
|
||||||
|
),
|
||||||
|
Request => ReindexOutcome::AlwaysReindex,
|
||||||
|
Response => ReindexOutcome::AlwaysReindex,
|
||||||
|
Headers => ReindexOutcome::NeverReindex,
|
||||||
|
SearchEmbedder => ReindexOutcome::NeverReindex,
|
||||||
|
IndexingEmbedder => ReindexOutcome::ReindexSometimes(
|
||||||
|
r#"
|
||||||
|
- Embedding are regenerated when the setting modified in the indexing embedder require regeneration.
|
||||||
|
"#,
|
||||||
|
),
|
||||||
|
Distribution => ReindexOutcome::NeverReindex,
|
||||||
|
BinaryQuantized => ReindexOutcome::ReindexSometimes(
|
||||||
|
r#"
|
||||||
|
- Embeddings are not regenerated, but the binary quantization takes time during indexing.
|
||||||
|
"#,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EmbeddingSettings {
|
impl EmbeddingSettings {
|
||||||
@@ -1311,7 +1471,7 @@ impl EmbeddingSettings {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn field_status(
|
pub fn field_status(
|
||||||
source: EmbedderSource,
|
source: EmbedderSource,
|
||||||
field: MetaEmbeddingSetting,
|
field: MetaEmbeddingSetting,
|
||||||
context: NestingContext,
|
context: NestingContext,
|
||||||
|
|||||||
5
docs/README.md
Normal file
5
docs/README.md
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
This directory is meant for auto-generated documentation for internal purposes.
|
||||||
|
|
||||||
|
Please refer to <https://meilisearch.com/docs> for the public documentation of Meilisearch.
|
||||||
|
|
||||||
|
- [Embedder settings auto-generated description](./embedder_settings.md)
|
||||||
1398
docs/embedder_settings.md
Normal file
1398
docs/embedder_settings.md
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user