mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-21 13:51:05 +00:00
Compare commits
5 Commits
v1.6.0-rc.
...
prototype-
Author | SHA1 | Date | |
---|---|---|---|
bceaf4f981 | |||
d29b301618 | |||
a6fa0b97ec | |||
38abfec611 | |||
84a5c304fc |
@ -600,11 +600,12 @@ pub fn settings(
|
||||
),
|
||||
};
|
||||
|
||||
let embedders = index
|
||||
let embedders: BTreeMap<_, _> = index
|
||||
.embedding_configs(rtxn)?
|
||||
.into_iter()
|
||||
.map(|(name, config)| (name, Setting::Set(config.into())))
|
||||
.collect();
|
||||
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
|
||||
|
||||
Ok(Settings {
|
||||
displayed_attributes: match displayed_attributes {
|
||||
@ -631,7 +632,7 @@ pub fn settings(
|
||||
typo_tolerance: Setting::Set(typo_tolerance),
|
||||
faceting: Setting::Set(faceting),
|
||||
pagination: Setting::Set(pagination),
|
||||
embedders: Setting::Set(embedders),
|
||||
embedders,
|
||||
_kind: PhantomData,
|
||||
})
|
||||
}
|
||||
|
@ -77,8 +77,7 @@ async fn import_dump_v1_movie_raw() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -239,8 +238,7 @@ async fn import_dump_v1_movie_with_settings() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -387,8 +385,7 @@ async fn import_dump_v1_rubygems_with_settings() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -521,8 +518,7 @@ async fn import_dump_v2_movie_raw() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -667,8 +663,7 @@ async fn import_dump_v2_movie_with_settings() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -812,8 +807,7 @@ async fn import_dump_v2_rubygems_with_settings() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -946,8 +940,7 @@ async fn import_dump_v3_movie_raw() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -1092,8 +1085,7 @@ async fn import_dump_v3_movie_with_settings() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -1237,8 +1229,7 @@ async fn import_dump_v3_rubygems_with_settings() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -1371,8 +1362,7 @@ async fn import_dump_v4_movie_raw() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -1517,8 +1507,7 @@ async fn import_dump_v4_movie_with_settings() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -1662,8 +1651,7 @@ async fn import_dump_v4_rubygems_with_settings() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
}
|
||||
"###
|
||||
);
|
||||
@ -1907,8 +1895,7 @@ async fn import_dump_v6_containing_experimental_features() {
|
||||
},
|
||||
"pagination": {
|
||||
"maxTotalHits": 1000
|
||||
},
|
||||
"embedders": {}
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
|
@ -54,7 +54,7 @@ async fn get_settings() {
|
||||
let (response, code) = index.settings().await;
|
||||
assert_eq!(code, 200);
|
||||
let settings = response.as_object().unwrap();
|
||||
assert_eq!(settings.keys().len(), 16);
|
||||
assert_eq!(settings.keys().len(), 15);
|
||||
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
||||
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
||||
assert_eq!(settings["filterableAttributes"], json!([]));
|
||||
@ -83,7 +83,6 @@ async fn get_settings() {
|
||||
"maxTotalHits": 1000,
|
||||
})
|
||||
);
|
||||
assert_eq!(settings["embedders"], json!({}));
|
||||
assert_eq!(settings["proximityPrecision"], json!("byWord"));
|
||||
}
|
||||
|
||||
|
@ -61,6 +61,7 @@ impl FacetsUpdateIncremental {
|
||||
}
|
||||
}
|
||||
|
||||
#[logging_timer::time("FacetsUpdateIncremental::{}")]
|
||||
pub fn execute(self, wtxn: &mut RwTxn) -> crate::Result<()> {
|
||||
let mut cursor = self.delta_data.into_cursor()?;
|
||||
while let Some((key, value)) = cursor.move_on_next()? {
|
||||
|
@ -170,91 +170,91 @@ impl<'i> FacetsUpdate<'i> {
|
||||
incremental_update.execute(wtxn)?;
|
||||
}
|
||||
|
||||
// We clear the list of normalized-for-search facets
|
||||
// and the previous FSTs to compute everything from scratch
|
||||
self.index.facet_id_normalized_string_strings.clear(wtxn)?;
|
||||
self.index.facet_id_string_fst.clear(wtxn)?;
|
||||
// // We clear the list of normalized-for-search facets
|
||||
// // and the previous FSTs to compute everything from scratch
|
||||
// self.index.facet_id_normalized_string_strings.clear(wtxn)?;
|
||||
// self.index.facet_id_string_fst.clear(wtxn)?;
|
||||
|
||||
// As we can't use the same write transaction to read and write in two different databases
|
||||
// we must create a temporary sorter that we will write into LMDB afterward.
|
||||
// As multiple unnormalized facet values can become the same normalized facet value
|
||||
// we must merge them together.
|
||||
let mut sorter = create_sorter(
|
||||
SortAlgorithm::Unstable,
|
||||
merge_btreeset_string,
|
||||
CompressionType::None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
// // As we can't use the same write transaction to read and write in two different databases
|
||||
// // we must create a temporary sorter that we will write into LMDB afterward.
|
||||
// // As multiple unnormalized facet values can become the same normalized facet value
|
||||
// // we must merge them together.
|
||||
// let mut sorter = create_sorter(
|
||||
// SortAlgorithm::Unstable,
|
||||
// merge_btreeset_string,
|
||||
// CompressionType::None,
|
||||
// None,
|
||||
// None,
|
||||
// None,
|
||||
// );
|
||||
|
||||
// We iterate on the list of original, semi-normalized, facet values
|
||||
// and normalize them for search, inserting them in LMDB in any given order.
|
||||
let options = NormalizerOption { lossy: true, ..Default::default() };
|
||||
let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
|
||||
for result in database.iter(wtxn)? {
|
||||
let (facet_group_key, ()) = result?;
|
||||
if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
|
||||
let mut normalized_facet = left_bound.normalize(&options);
|
||||
let normalized_truncated_facet: String;
|
||||
if normalized_facet.len() > MAX_FACET_VALUE_LENGTH {
|
||||
normalized_truncated_facet = normalized_facet
|
||||
.char_indices()
|
||||
.take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
|
||||
.map(|(_, c)| c)
|
||||
.collect();
|
||||
normalized_facet = normalized_truncated_facet.into();
|
||||
}
|
||||
let set = BTreeSet::from_iter(std::iter::once(left_bound));
|
||||
let key = (field_id, normalized_facet.as_ref());
|
||||
let key = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
|
||||
let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
|
||||
sorter.insert(key, val)?;
|
||||
}
|
||||
}
|
||||
// // We iterate on the list of original, semi-normalized, facet values
|
||||
// // and normalize them for search, inserting them in LMDB in any given order.
|
||||
// let options = NormalizerOption { lossy: true, ..Default::default() };
|
||||
// let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
|
||||
// for result in database.iter(wtxn)? {
|
||||
// let (facet_group_key, ()) = result?;
|
||||
// if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
|
||||
// let mut normalized_facet = left_bound.normalize(&options);
|
||||
// let normalized_truncated_facet: String;
|
||||
// if normalized_facet.len() > MAX_FACET_VALUE_LENGTH {
|
||||
// normalized_truncated_facet = normalized_facet
|
||||
// .char_indices()
|
||||
// .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
|
||||
// .map(|(_, c)| c)
|
||||
// .collect();
|
||||
// normalized_facet = normalized_truncated_facet.into();
|
||||
// }
|
||||
// let set = BTreeSet::from_iter(std::iter::once(left_bound));
|
||||
// let key = (field_id, normalized_facet.as_ref());
|
||||
// let key = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
|
||||
// let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
|
||||
// sorter.insert(key, val)?;
|
||||
// }
|
||||
// }
|
||||
|
||||
// In this loop we don't need to take care of merging bitmaps
|
||||
// as the grenad sorter already merged them for us.
|
||||
let mut merger_iter = sorter.into_stream_merger_iter()?;
|
||||
while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? {
|
||||
self.index.facet_id_normalized_string_strings.remap_types::<Bytes, Bytes>().put(
|
||||
wtxn,
|
||||
key_bytes,
|
||||
btreeset_bytes,
|
||||
)?;
|
||||
}
|
||||
// // In this loop we don't need to take care of merging bitmaps
|
||||
// // as the grenad sorter already merged them for us.
|
||||
// let mut merger_iter = sorter.into_stream_merger_iter()?;
|
||||
// while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? {
|
||||
// self.index.facet_id_normalized_string_strings.remap_types::<Bytes, Bytes>().put(
|
||||
// wtxn,
|
||||
// key_bytes,
|
||||
// btreeset_bytes,
|
||||
// )?;
|
||||
// }
|
||||
|
||||
// We compute one FST by string facet
|
||||
let mut text_fsts = vec![];
|
||||
let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None;
|
||||
let database =
|
||||
self.index.facet_id_normalized_string_strings.remap_data_type::<DecodeIgnore>();
|
||||
for result in database.iter(wtxn)? {
|
||||
let ((field_id, normalized_facet), _) = result?;
|
||||
current_fst = match current_fst.take() {
|
||||
Some((fid, fst_builder)) if fid != field_id => {
|
||||
let fst = fst_builder.into_set();
|
||||
text_fsts.push((fid, fst));
|
||||
Some((field_id, fst::SetBuilder::memory()))
|
||||
}
|
||||
Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
|
||||
None => Some((field_id, fst::SetBuilder::memory())),
|
||||
};
|
||||
// // We compute one FST by string facet
|
||||
// let mut text_fsts = vec![];
|
||||
// let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None;
|
||||
// let database =
|
||||
// self.index.facet_id_normalized_string_strings.remap_data_type::<DecodeIgnore>();
|
||||
// for result in database.iter(wtxn)? {
|
||||
// let ((field_id, normalized_facet), _) = result?;
|
||||
// current_fst = match current_fst.take() {
|
||||
// Some((fid, fst_builder)) if fid != field_id => {
|
||||
// let fst = fst_builder.into_set();
|
||||
// text_fsts.push((fid, fst));
|
||||
// Some((field_id, fst::SetBuilder::memory()))
|
||||
// }
|
||||
// Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
|
||||
// None => Some((field_id, fst::SetBuilder::memory())),
|
||||
// };
|
||||
|
||||
if let Some((_, fst_builder)) = current_fst.as_mut() {
|
||||
fst_builder.insert(normalized_facet)?;
|
||||
}
|
||||
}
|
||||
// if let Some((_, fst_builder)) = current_fst.as_mut() {
|
||||
// fst_builder.insert(normalized_facet)?;
|
||||
// }
|
||||
// }
|
||||
|
||||
if let Some((field_id, fst_builder)) = current_fst {
|
||||
let fst = fst_builder.into_set();
|
||||
text_fsts.push((field_id, fst));
|
||||
}
|
||||
// if let Some((field_id, fst_builder)) = current_fst {
|
||||
// let fst = fst_builder.into_set();
|
||||
// text_fsts.push((field_id, fst));
|
||||
// }
|
||||
|
||||
// We write those FSTs in LMDB now
|
||||
for (field_id, fst) in text_fsts {
|
||||
self.index.facet_id_string_fst.put(wtxn, &field_id, &fst)?;
|
||||
}
|
||||
// // We write those FSTs in LMDB now
|
||||
// for (field_id, fst) in text_fsts {
|
||||
// self.index.facet_id_string_fst.put(wtxn, &field_id, &fst)?;
|
||||
// }
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
Reference in New Issue
Block a user