Merge remote-tracking branch 'milli/main' into bring-v1-changes

This commit is contained in:
Kerollmops
2023-02-06 16:48:10 +01:00
15 changed files with 171 additions and 88 deletions

View File

@ -575,8 +575,8 @@ fn remove_from_word_docids(
}
fn remove_docids_from_field_id_docid_facet_value(
index: &'_ Index,
wtxn: &'_ mut heed::RwTxn,
index: &Index,
wtxn: &mut heed::RwTxn,
facet_type: FacetType,
field_id: FieldId,
to_remove: &RoaringBitmap,

View File

@ -159,7 +159,7 @@ impl FacetsUpdateIncrementalInner {
/// See documentation of `insert_in_level`
fn insert_in_level_0(
&self,
txn: &'_ mut RwTxn,
txn: &mut RwTxn,
field_id: u16,
facet_value: &[u8],
docids: &RoaringBitmap,
@ -213,7 +213,7 @@ impl FacetsUpdateIncrementalInner {
/// of the parent node should be incremented.
fn insert_in_level(
&self,
txn: &'_ mut RwTxn,
txn: &mut RwTxn,
field_id: u16,
level: u8,
facet_value: &[u8],
@ -350,7 +350,7 @@ impl FacetsUpdateIncrementalInner {
/// Insert the given facet value and corresponding document ids in the database.
pub fn insert(
&self,
txn: &'_ mut RwTxn,
txn: &mut RwTxn,
field_id: u16,
facet_value: &[u8],
docids: &RoaringBitmap,
@ -472,7 +472,7 @@ impl FacetsUpdateIncrementalInner {
/// its left bound as well.
fn delete_in_level(
&self,
txn: &'_ mut RwTxn,
txn: &mut RwTxn,
field_id: u16,
level: u8,
facet_value: &[u8],
@ -531,7 +531,7 @@ impl FacetsUpdateIncrementalInner {
fn delete_in_level_0(
&self,
txn: &'_ mut RwTxn,
txn: &mut RwTxn,
field_id: u16,
facet_value: &[u8],
docids: &RoaringBitmap,
@ -559,7 +559,7 @@ impl FacetsUpdateIncrementalInner {
pub fn delete(
&self,
txn: &'_ mut RwTxn,
txn: &mut RwTxn,
field_id: u16,
facet_value: &[u8],
docids: &RoaringBitmap,

View File

@ -98,7 +98,12 @@ pub fn enrich_documents_batch<R: Read + Seek>(
// If the settings specifies that a _geo field must be used therefore we must check the
// validity of it in all the documents of this batch and this is when we return `Some`.
let geo_field_id = match documents_batch_index.id("_geo") {
Some(geo_field_id) if index.sortable_fields(rtxn)?.contains("_geo") => Some(geo_field_id),
Some(geo_field_id)
if index.sortable_fields(rtxn)?.contains("_geo")
|| index.filterable_fields(rtxn)?.contains("_geo") =>
{
Some(geo_field_id)
}
_otherwise => None,
};
@ -367,11 +372,17 @@ pub fn extract_finite_float_from_value(value: Value) -> StdResult<f64, Value> {
pub fn validate_geo_from_json(id: &DocumentId, bytes: &[u8]) -> Result<StdResult<(), GeoError>> {
use GeoError::*;
let debug_id = || Value::from(id.debug());
let debug_id = || {
serde_json::from_slice(id.value().as_bytes()).unwrap_or_else(|_| Value::from(id.debug()))
};
match serde_json::from_slice(bytes).map_err(InternalError::SerdeJson)? {
Value::Object(mut object) => match (object.remove("lat"), object.remove("lng")) {
(Some(lat), Some(lng)) => {
match (extract_finite_float_from_value(lat), extract_finite_float_from_value(lng)) {
(Ok(_), Ok(_)) if !object.is_empty() => Ok(Err(UnexpectedExtraFields {
document_id: debug_id(),
value: object.into(),
})),
(Ok(_), Ok(_)) => Ok(Ok(())),
(Err(value), Ok(_)) => Ok(Err(BadLatitude { document_id: debug_id(), value })),
(Ok(_), Err(value)) => Ok(Err(BadLongitude { document_id: debug_id(), value })),

View File

@ -965,34 +965,6 @@ mod tests {
.unwrap();
}
#[test]
fn index_all_flavour_of_geo() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments;
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset!(S("_geo")));
})
.unwrap();
index
.add_documents(documents!([
{ "id": 0, "_geo": { "lat": 31, "lng": [42] } },
{ "id": 1, "_geo": { "lat": "31" }, "_geo.lng": 42 },
{ "id": 2, "_geo": { "lng": "42" }, "_geo.lat": "31" },
{ "id": 3, "_geo.lat": 31, "_geo.lng": "42" },
]))
.unwrap();
let rtxn = index.read_txn().unwrap();
let mut search = crate::Search::new(&rtxn, &index);
search.filter(crate::Filter::from_str("_geoRadius(31, 42, 0.000001)").unwrap().unwrap());
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids, vec![0, 1, 2, 3]);
}
#[test]
fn geo_error() {
let mut index = TempIndex::new();

View File

@ -37,9 +37,6 @@ where
_ => T::deserialize_from_value(value, location).map(Setting::Set),
}
}
fn default() -> Option<Self> {
Some(Self::NotSet)
}
}
impl<T> Default for Setting<T> {

View File

@ -140,16 +140,20 @@ impl<'t, 'u, 'i> WordPrefixPositionDocids<'t, 'u, 'i> {
// We remove all the entries that are no more required in this word prefix position
// docids database.
let mut iter =
self.index.word_prefix_position_docids.iter_mut(self.wtxn)?.lazily_decode_data();
while let Some(((prefix, _), _)) = iter.next().transpose()? {
if del_prefix_fst_words.contains(prefix.as_bytes()) {
unsafe { iter.del_current()? };
// We also avoid iterating over the whole `word_prefix_position_docids` database if we know in
// advance that the `if del_prefix_fst_words.contains(prefix.as_bytes()) {` condition below
// will always be false (i.e. if `del_prefix_fst_words` is empty).
if !del_prefix_fst_words.is_empty() {
let mut iter =
self.index.word_prefix_position_docids.iter_mut(self.wtxn)?.lazily_decode_data();
while let Some(((prefix, _), _)) = iter.next().transpose()? {
if del_prefix_fst_words.contains(prefix.as_bytes()) {
unsafe { iter.del_current()? };
}
}
drop(iter);
}
drop(iter);
// We finally write all the word prefix position docids into the LMDB database.
sorter_into_lmdb_database(
self.wtxn,