mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Implementing an IS EMPTY filter
This commit is contained in:
		| @@ -21,6 +21,7 @@ pub enum Condition<'a> { | |||||||
|     Equal(Token<'a>), |     Equal(Token<'a>), | ||||||
|     NotEqual(Token<'a>), |     NotEqual(Token<'a>), | ||||||
|     Null, |     Null, | ||||||
|  |     Empty, | ||||||
|     Exists, |     Exists, | ||||||
|     LowerThan(Token<'a>), |     LowerThan(Token<'a>), | ||||||
|     LowerThanOrEqual(Token<'a>), |     LowerThanOrEqual(Token<'a>), | ||||||
| @@ -61,6 +62,22 @@ pub fn parse_is_not_null(input: Span) -> IResult<FilterCondition> { | |||||||
|     Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Null })))) |     Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Null })))) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /// empty          = value "IS" WS+ "EMPTY" | ||||||
|  | pub fn parse_is_empty(input: Span) -> IResult<FilterCondition> { | ||||||
|  |     let (input, key) = parse_value(input)?; | ||||||
|  |  | ||||||
|  |     let (input, _) = tuple((tag("IS"), multispace1, tag("EMPTY")))(input)?; | ||||||
|  |     Ok((input, FilterCondition::Condition { fid: key, op: Empty })) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// empty          = value "IS" WS+ "NOT" WS+ "EMPTY" | ||||||
|  | pub fn parse_is_not_empty(input: Span) -> IResult<FilterCondition> { | ||||||
|  |     let (input, key) = parse_value(input)?; | ||||||
|  |  | ||||||
|  |     let (input, _) = tuple((tag("IS"), multispace1, tag("NOT"), multispace1, tag("EMPTY")))(input)?; | ||||||
|  |     Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Empty })))) | ||||||
|  | } | ||||||
|  |  | ||||||
| /// exist          = value "EXISTS" | /// exist          = value "EXISTS" | ||||||
| pub fn parse_exists(input: Span) -> IResult<FilterCondition> { | pub fn parse_exists(input: Span) -> IResult<FilterCondition> { | ||||||
|     let (input, key) = terminated(parse_value, tag("EXISTS"))(input)?; |     let (input, key) = terminated(parse_value, tag("EXISTS"))(input)?; | ||||||
|   | |||||||
| @@ -143,11 +143,9 @@ impl<'a> Display for Error<'a> { | |||||||
|             ErrorKind::MissingClosingDelimiter(c) => { |             ErrorKind::MissingClosingDelimiter(c) => { | ||||||
|                 writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)? |                 writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)? | ||||||
|             } |             } | ||||||
|             ErrorKind::InvalidPrimary if input.trim().is_empty() => { |  | ||||||
|                 writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.")? |  | ||||||
|             } |  | ||||||
|             ErrorKind::InvalidPrimary => { |             ErrorKind::InvalidPrimary => { | ||||||
|                 writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `{}`.", escaped_input)? |                 let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) }; | ||||||
|  |                 writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)? | ||||||
|             } |             } | ||||||
|             ErrorKind::ExpectedEof => { |             ErrorKind::ExpectedEof => { | ||||||
|                 writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)? |                 writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)? | ||||||
|   | |||||||
| @@ -47,7 +47,10 @@ mod value; | |||||||
| use std::fmt::Debug; | use std::fmt::Debug; | ||||||
|  |  | ||||||
| pub use condition::{parse_condition, parse_to, Condition}; | pub use condition::{parse_condition, parse_to, Condition}; | ||||||
| use condition::{parse_exists, parse_is_not_null, parse_is_null, parse_not_exists}; | use condition::{ | ||||||
|  |     parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, parse_is_null, | ||||||
|  |     parse_not_exists, | ||||||
|  | }; | ||||||
| use error::{cut_with_err, ExpectedValueKind, NomErrorExt}; | use error::{cut_with_err, ExpectedValueKind, NomErrorExt}; | ||||||
| pub use error::{Error, ErrorKind}; | pub use error::{Error, ErrorKind}; | ||||||
| use nom::branch::alt; | use nom::branch::alt; | ||||||
| @@ -416,6 +419,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> { | |||||||
|         parse_condition, |         parse_condition, | ||||||
|         parse_is_null, |         parse_is_null, | ||||||
|         parse_is_not_null, |         parse_is_not_null, | ||||||
|  |         parse_is_empty, | ||||||
|  |         parse_is_not_empty, | ||||||
|         parse_exists, |         parse_exists, | ||||||
|         parse_not_exists, |         parse_not_exists, | ||||||
|         parse_to, |         parse_to, | ||||||
| @@ -509,6 +514,13 @@ pub mod tests { | |||||||
|         insta::assert_display_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL"); |         insta::assert_display_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL"); | ||||||
|         insta::assert_display_snapshot!(p("subscribers  IS   NOT   NULL"), @"NOT ({subscribers} IS NULL)"); |         insta::assert_display_snapshot!(p("subscribers  IS   NOT   NULL"), @"NOT ({subscribers} IS NULL)"); | ||||||
|  |  | ||||||
|  |         // Test EMPTY + NOT EMPTY | ||||||
|  |         insta::assert_display_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY"); | ||||||
|  |         insta::assert_display_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)"); | ||||||
|  |         insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)"); | ||||||
|  |         insta::assert_display_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY"); | ||||||
|  |         insta::assert_display_snapshot!(p("subscribers  IS   NOT   EMPTY"), @"NOT ({subscribers} IS EMPTY)"); | ||||||
|  |  | ||||||
|         // Test EXISTS + NOT EXITS |         // Test EXISTS + NOT EXITS | ||||||
|         insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS"); |         insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS"); | ||||||
|         insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)"); |         insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)"); | ||||||
| @@ -587,7 +599,7 @@ pub mod tests { | |||||||
|         "###); |         "###); | ||||||
|  |  | ||||||
|         insta::assert_display_snapshot!(p("'OR'"), @r###" |         insta::assert_display_snapshot!(p("'OR'"), @r###" | ||||||
|         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. |         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. | ||||||
|         1:5 'OR' |         1:5 'OR' | ||||||
|         "###); |         "###); | ||||||
|  |  | ||||||
| @@ -597,12 +609,12 @@ pub mod tests { | |||||||
|         "###); |         "###); | ||||||
|  |  | ||||||
|         insta::assert_display_snapshot!(p("channel Ponce"), @r###" |         insta::assert_display_snapshot!(p("channel Ponce"), @r###" | ||||||
|         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. |         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. | ||||||
|         1:14 channel Ponce |         1:14 channel Ponce | ||||||
|         "###); |         "###); | ||||||
|  |  | ||||||
|         insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###" |         insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###" | ||||||
|         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. |         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. | ||||||
|         19:19 channel = Ponce OR |         19:19 channel = Ponce OR | ||||||
|         "###); |         "###); | ||||||
|  |  | ||||||
| @@ -667,12 +679,12 @@ pub mod tests { | |||||||
|         "###); |         "###); | ||||||
|  |  | ||||||
|         insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###" |         insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###" | ||||||
|         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. |         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. | ||||||
|         1:17 colour NOT EXIST |         1:17 colour NOT EXIST | ||||||
|         "###); |         "###); | ||||||
|  |  | ||||||
|         insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###" |         insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###" | ||||||
|         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. |         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. | ||||||
|         1:23 subscribers 100 TO1000 |         1:23 subscribers 100 TO1000 | ||||||
|         "###); |         "###); | ||||||
|  |  | ||||||
| @@ -812,6 +824,7 @@ impl<'a> std::fmt::Display for Condition<'a> { | |||||||
|             Condition::Equal(token) => write!(f, "= {token}"), |             Condition::Equal(token) => write!(f, "= {token}"), | ||||||
|             Condition::NotEqual(token) => write!(f, "!= {token}"), |             Condition::NotEqual(token) => write!(f, "!= {token}"), | ||||||
|             Condition::Null => write!(f, "IS NULL"), |             Condition::Null => write!(f, "IS NULL"), | ||||||
|  |             Condition::Empty => write!(f, "IS EMPTY"), | ||||||
|             Condition::Exists => write!(f, "EXISTS"), |             Condition::Exists => write!(f, "EXISTS"), | ||||||
|             Condition::LowerThan(token) => write!(f, "< {token}"), |             Condition::LowerThan(token) => write!(f, "< {token}"), | ||||||
|             Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"), |             Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"), | ||||||
|   | |||||||
| @@ -188,6 +188,7 @@ fn is_keyword(s: &str) -> bool { | |||||||
|             | "EXISTS" |             | "EXISTS" | ||||||
|             | "IS" |             | "IS" | ||||||
|             | "NULL" |             | "NULL" | ||||||
|  |             | "EMPTY" | ||||||
|             | "_geoRadius" |             | "_geoRadius" | ||||||
|             | "_geoBoundingBox" |             | "_geoBoundingBox" | ||||||
|     ) |     ) | ||||||
|   | |||||||
| @@ -547,7 +547,7 @@ async fn filter_invalid_syntax_object() { | |||||||
|     index.wait_task(1).await; |     index.wait_task(1).await; | ||||||
|  |  | ||||||
|     let expected_response = json!({ |     let expected_response = json!({ | ||||||
|         "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", |         "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", | ||||||
|         "code": "invalid_search_filter", |         "code": "invalid_search_filter", | ||||||
|         "type": "invalid_request", |         "type": "invalid_request", | ||||||
|         "link": "https://docs.meilisearch.com/errors#invalid_search_filter" |         "link": "https://docs.meilisearch.com/errors#invalid_search_filter" | ||||||
| @@ -572,7 +572,7 @@ async fn filter_invalid_syntax_array() { | |||||||
|     index.wait_task(1).await; |     index.wait_task(1).await; | ||||||
|  |  | ||||||
|     let expected_response = json!({ |     let expected_response = json!({ | ||||||
|         "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", |         "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", | ||||||
|         "code": "invalid_search_filter", |         "code": "invalid_search_filter", | ||||||
|         "type": "invalid_request", |         "type": "invalid_request", | ||||||
|         "link": "https://docs.meilisearch.com/errors#invalid_search_filter" |         "link": "https://docs.meilisearch.com/errors#invalid_search_filter" | ||||||
|   | |||||||
| @@ -81,6 +81,7 @@ pub mod db_name { | |||||||
|     pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids"; |     pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids"; | ||||||
|     pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids"; |     pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids"; | ||||||
|     pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids"; |     pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids"; | ||||||
|  |     pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids"; | ||||||
|     pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids"; |     pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids"; | ||||||
|     pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s"; |     pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s"; | ||||||
|     pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings"; |     pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings"; | ||||||
| @@ -130,9 +131,10 @@ pub struct Index { | |||||||
|  |  | ||||||
|     /// Maps the facet field id and the docids for which this field exists |     /// Maps the facet field id and the docids for which this field exists | ||||||
|     pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>, |     pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>, | ||||||
|  |  | ||||||
|     /// Maps the facet field id and the docids for which this field is set as null |     /// Maps the facet field id and the docids for which this field is set as null | ||||||
|     pub facet_id_is_null_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>, |     pub facet_id_is_null_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>, | ||||||
|  |     /// Maps the facet field id and the docids for which this field is considered empty | ||||||
|  |     pub facet_id_is_empty_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>, | ||||||
|  |  | ||||||
|     /// Maps the facet field id and ranges of numbers with the docids that corresponds to them. |     /// Maps the facet field id and ranges of numbers with the docids that corresponds to them. | ||||||
|     pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>, |     pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>, | ||||||
| @@ -157,7 +159,7 @@ impl Index { | |||||||
|     ) -> Result<Index> { |     ) -> Result<Index> { | ||||||
|         use db_name::*; |         use db_name::*; | ||||||
|  |  | ||||||
|         options.max_dbs(20); |         options.max_dbs(21); | ||||||
|         unsafe { options.flag(Flags::MdbAlwaysFreePages) }; |         unsafe { options.flag(Flags::MdbAlwaysFreePages) }; | ||||||
|  |  | ||||||
|         let env = options.open(path)?; |         let env = options.open(path)?; | ||||||
| @@ -180,6 +182,7 @@ impl Index { | |||||||
|         let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?; |         let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?; | ||||||
|         let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?; |         let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?; | ||||||
|         let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?; |         let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?; | ||||||
|  |         let facet_id_is_empty_docids = env.create_database(Some(FACET_ID_IS_EMPTY_DOCIDS))?; | ||||||
|  |  | ||||||
|         let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?; |         let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?; | ||||||
|         let field_id_docid_facet_strings = |         let field_id_docid_facet_strings = | ||||||
| @@ -207,6 +210,7 @@ impl Index { | |||||||
|             facet_id_string_docids, |             facet_id_string_docids, | ||||||
|             facet_id_exists_docids, |             facet_id_exists_docids, | ||||||
|             facet_id_is_null_docids, |             facet_id_is_null_docids, | ||||||
|  |             facet_id_is_empty_docids, | ||||||
|             field_id_docid_facet_f64s, |             field_id_docid_facet_f64s, | ||||||
|             field_id_docid_facet_strings, |             field_id_docid_facet_strings, | ||||||
|             documents, |             documents, | ||||||
| @@ -851,6 +855,18 @@ impl Index { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Retrieve all the documents which contain this field id and that is considered empty | ||||||
|  |     pub fn empty_faceted_documents_ids( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |         field_id: FieldId, | ||||||
|  |     ) -> heed::Result<RoaringBitmap> { | ||||||
|  |         match self.facet_id_is_empty_docids.get(rtxn, &BEU16::new(field_id))? { | ||||||
|  |             Some(docids) => Ok(docids), | ||||||
|  |             None => Ok(RoaringBitmap::new()), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Retrieve all the documents which contain this field id |     /// Retrieve all the documents which contain this field id | ||||||
|     pub fn exists_faceted_documents_ids( |     pub fn exists_faceted_documents_ids( | ||||||
|         &self, |         &self, | ||||||
|   | |||||||
| @@ -223,6 +223,10 @@ impl<'a> Filter<'a> { | |||||||
|                 let is_null = index.null_faceted_documents_ids(rtxn, field_id)?; |                 let is_null = index.null_faceted_documents_ids(rtxn, field_id)?; | ||||||
|                 return Ok(is_null); |                 return Ok(is_null); | ||||||
|             } |             } | ||||||
|  |             Condition::Empty => { | ||||||
|  |                 let is_empty = index.empty_faceted_documents_ids(rtxn, field_id)?; | ||||||
|  |                 return Ok(is_empty); | ||||||
|  |             } | ||||||
|             Condition::Exists => { |             Condition::Exists => { | ||||||
|                 let exist = index.exists_faceted_documents_ids(rtxn, field_id)?; |                 let exist = index.exists_faceted_documents_ids(rtxn, field_id)?; | ||||||
|                 return Ok(exist); |                 return Ok(exist); | ||||||
|   | |||||||
| @@ -35,6 +35,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { | |||||||
|             facet_id_string_docids, |             facet_id_string_docids, | ||||||
|             facet_id_exists_docids, |             facet_id_exists_docids, | ||||||
|             facet_id_is_null_docids, |             facet_id_is_null_docids, | ||||||
|  |             facet_id_is_empty_docids, | ||||||
|             field_id_docid_facet_f64s, |             field_id_docid_facet_f64s, | ||||||
|             field_id_docid_facet_strings, |             field_id_docid_facet_strings, | ||||||
|             documents, |             documents, | ||||||
| @@ -88,6 +89,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { | |||||||
|         facet_id_f64_docids.clear(self.wtxn)?; |         facet_id_f64_docids.clear(self.wtxn)?; | ||||||
|         facet_id_exists_docids.clear(self.wtxn)?; |         facet_id_exists_docids.clear(self.wtxn)?; | ||||||
|         facet_id_is_null_docids.clear(self.wtxn)?; |         facet_id_is_null_docids.clear(self.wtxn)?; | ||||||
|  |         facet_id_is_empty_docids.clear(self.wtxn)?; | ||||||
|         facet_id_string_docids.clear(self.wtxn)?; |         facet_id_string_docids.clear(self.wtxn)?; | ||||||
|         field_id_docid_facet_f64s.clear(self.wtxn)?; |         field_id_docid_facet_f64s.clear(self.wtxn)?; | ||||||
|         field_id_docid_facet_strings.clear(self.wtxn)?; |         field_id_docid_facet_strings.clear(self.wtxn)?; | ||||||
|   | |||||||
| @@ -246,6 +246,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | |||||||
|             script_language_docids, |             script_language_docids, | ||||||
|             facet_id_exists_docids, |             facet_id_exists_docids, | ||||||
|             facet_id_is_null_docids, |             facet_id_is_null_docids, | ||||||
|  |             facet_id_is_empty_docids, | ||||||
|             documents, |             documents, | ||||||
|         } = self.index; |         } = self.index; | ||||||
|  |  | ||||||
| @@ -531,6 +532,13 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | |||||||
|             &self.to_delete_docids, |             &self.to_delete_docids, | ||||||
|         )?; |         )?; | ||||||
|  |  | ||||||
|  |         // We delete the documents ids that are under the facet field id values. | ||||||
|  |         remove_docids_from_facet_id_docids( | ||||||
|  |             self.wtxn, | ||||||
|  |             facet_id_is_empty_docids, | ||||||
|  |             &self.to_delete_docids, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|         self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?; |         self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?; | ||||||
|  |  | ||||||
|         Ok(DetailedDocumentDeletionResult { |         Ok(DetailedDocumentDeletionResult { | ||||||
|   | |||||||
| @@ -21,6 +21,7 @@ pub struct ExtractedFacetValues { | |||||||
|     pub docid_fid_facet_numbers_chunk: grenad::Reader<File>, |     pub docid_fid_facet_numbers_chunk: grenad::Reader<File>, | ||||||
|     pub docid_fid_facet_strings_chunk: grenad::Reader<File>, |     pub docid_fid_facet_strings_chunk: grenad::Reader<File>, | ||||||
|     pub fid_facet_is_null_docids_chunk: grenad::Reader<File>, |     pub fid_facet_is_null_docids_chunk: grenad::Reader<File>, | ||||||
|  |     pub fid_facet_is_empty_docids_chunk: grenad::Reader<File>, | ||||||
|     pub fid_facet_exists_docids_chunk: grenad::Reader<File>, |     pub fid_facet_exists_docids_chunk: grenad::Reader<File>, | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -56,6 +57,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>( | |||||||
|  |  | ||||||
|     let mut facet_exists_docids = BTreeMap::<FieldId, RoaringBitmap>::new(); |     let mut facet_exists_docids = BTreeMap::<FieldId, RoaringBitmap>::new(); | ||||||
|     let mut facet_is_null_docids = BTreeMap::<FieldId, RoaringBitmap>::new(); |     let mut facet_is_null_docids = BTreeMap::<FieldId, RoaringBitmap>::new(); | ||||||
|  |     let mut facet_is_empty_docids = BTreeMap::<FieldId, RoaringBitmap>::new(); | ||||||
|  |  | ||||||
|     let mut key_buffer = Vec::new(); |     let mut key_buffer = Vec::new(); | ||||||
|     let mut cursor = obkv_documents.into_cursor()?; |     let mut cursor = obkv_documents.into_cursor()?; | ||||||
| @@ -80,10 +82,14 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>( | |||||||
|                 key_buffer.extend_from_slice(docid_bytes); |                 key_buffer.extend_from_slice(docid_bytes); | ||||||
|  |  | ||||||
|                 let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?; |                 let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?; | ||||||
|  |  | ||||||
|                 match extract_facet_values(&value) { |                 match extract_facet_values(&value) { | ||||||
|                     FilterableValues::Null => { |                     FilterableValues::Null => { | ||||||
|                         facet_is_null_docids.entry(field_id).or_default().insert(document); |                         facet_is_null_docids.entry(field_id).or_default().insert(document); | ||||||
|                     } |                     } | ||||||
|  |                     FilterableValues::Empty => { | ||||||
|  |                         facet_is_empty_docids.entry(field_id).or_default().insert(document); | ||||||
|  |                     } | ||||||
|                     FilterableValues::Values { numbers, strings } => { |                     FilterableValues::Values { numbers, strings } => { | ||||||
|                         // insert facet numbers in sorter |                         // insert facet numbers in sorter | ||||||
|                         for number in numbers { |                         for number in numbers { | ||||||
| @@ -140,22 +146,34 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>( | |||||||
|     } |     } | ||||||
|     let facet_is_null_docids_reader = writer_into_reader(facet_is_null_docids_writer)?; |     let facet_is_null_docids_reader = writer_into_reader(facet_is_null_docids_writer)?; | ||||||
|  |  | ||||||
|  |     let mut facet_is_empty_docids_writer = create_writer( | ||||||
|  |         indexer.chunk_compression_type, | ||||||
|  |         indexer.chunk_compression_level, | ||||||
|  |         tempfile::tempfile()?, | ||||||
|  |     ); | ||||||
|  |     for (fid, bitmap) in facet_is_empty_docids.into_iter() { | ||||||
|  |         let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap(); | ||||||
|  |         facet_is_empty_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?; | ||||||
|  |     } | ||||||
|  |     let facet_is_empty_docids_reader = writer_into_reader(facet_is_empty_docids_writer)?; | ||||||
|  |  | ||||||
|     Ok(ExtractedFacetValues { |     Ok(ExtractedFacetValues { | ||||||
|         docid_fid_facet_numbers_chunk: sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?, |         docid_fid_facet_numbers_chunk: sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?, | ||||||
|         docid_fid_facet_strings_chunk: sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?, |         docid_fid_facet_strings_chunk: sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?, | ||||||
|         fid_facet_is_null_docids_chunk: facet_is_null_docids_reader, |         fid_facet_is_null_docids_chunk: facet_is_null_docids_reader, | ||||||
|  |         fid_facet_is_empty_docids_chunk: facet_is_empty_docids_reader, | ||||||
|         fid_facet_exists_docids_chunk: facet_exists_docids_reader, |         fid_facet_exists_docids_chunk: facet_exists_docids_reader, | ||||||
|     }) |     }) | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Represent what a document field contains. | /// Represent what a document field contains. | ||||||
| enum FilterableValues { | enum FilterableValues { | ||||||
|  |     /// Corresponds to the JSON `null` value. | ||||||
|     Null, |     Null, | ||||||
|  |     /// Corresponds to either, an empty string `""`, an empty array `[]`, or an empty object `{}`. | ||||||
|  |     Empty, | ||||||
|     /// Represents all the numbers and strings values found in this document field. |     /// Represents all the numbers and strings values found in this document field. | ||||||
|     Values { |     Values { numbers: Vec<f64>, strings: Vec<(String, String)> }, | ||||||
|         numbers: Vec<f64>, |  | ||||||
|         strings: Vec<(String, String)>, |  | ||||||
|     }, |  | ||||||
| } | } | ||||||
|  |  | ||||||
| fn extract_facet_values(value: &Value) -> FilterableValues { | fn extract_facet_values(value: &Value) -> FilterableValues { | ||||||
| @@ -192,6 +210,9 @@ fn extract_facet_values(value: &Value) -> FilterableValues { | |||||||
|  |  | ||||||
|     match value { |     match value { | ||||||
|         Value::Null => FilterableValues::Null, |         Value::Null => FilterableValues::Null, | ||||||
|  |         Value::String(s) if s.is_empty() => FilterableValues::Empty, | ||||||
|  |         Value::Array(a) if a.is_empty() => FilterableValues::Empty, | ||||||
|  |         Value::Object(o) if o.is_empty() => FilterableValues::Empty, | ||||||
|         otherwise => { |         otherwise => { | ||||||
|             let mut numbers = Vec::new(); |             let mut numbers = Vec::new(); | ||||||
|             let mut strings = Vec::new(); |             let mut strings = Vec::new(); | ||||||
|   | |||||||
| @@ -55,7 +55,8 @@ pub(crate) fn data_from_obkv_documents( | |||||||
|         .collect::<Result<()>>()?; |         .collect::<Result<()>>()?; | ||||||
|  |  | ||||||
|     #[allow(clippy::type_complexity)] |     #[allow(clippy::type_complexity)] | ||||||
|     let result: Result<(Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, Vec<_>))))> = flattened_obkv_chunks |     let result: Result<(Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, Vec<_>)))))> = | ||||||
|  |         flattened_obkv_chunks | ||||||
|             .par_bridge() |             .par_bridge() | ||||||
|             .map(|flattened_obkv_chunks| { |             .map(|flattened_obkv_chunks| { | ||||||
|                 send_and_extract_flattened_documents_data( |                 send_and_extract_flattened_documents_data( | ||||||
| @@ -78,7 +79,10 @@ pub(crate) fn data_from_obkv_documents( | |||||||
|             docid_fid_facet_numbers_chunks, |             docid_fid_facet_numbers_chunks, | ||||||
|             ( |             ( | ||||||
|                 docid_fid_facet_strings_chunks, |                 docid_fid_facet_strings_chunks, | ||||||
|                 (facet_is_null_docids_chunks, facet_exists_docids_chunks), |                 ( | ||||||
|  |                     facet_is_null_docids_chunks, | ||||||
|  |                     (facet_is_empty_docids_chunks, facet_exists_docids_chunks), | ||||||
|  |                 ), | ||||||
|             ), |             ), | ||||||
|         ), |         ), | ||||||
|     ) = result?; |     ) = result?; | ||||||
| @@ -115,6 +119,22 @@ pub(crate) fn data_from_obkv_documents( | |||||||
|         }); |         }); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     // merge facet_is_empty_docids and send them as a typed chunk | ||||||
|  |     { | ||||||
|  |         let lmdb_writer_sx = lmdb_writer_sx.clone(); | ||||||
|  |         rayon::spawn(move || { | ||||||
|  |             debug!("merge {} database", "facet-id-is-empty-docids"); | ||||||
|  |             match facet_is_empty_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) { | ||||||
|  |                 Ok(reader) => { | ||||||
|  |                     let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader))); | ||||||
|  |                 } | ||||||
|  |                 Err(e) => { | ||||||
|  |                     let _ = lmdb_writer_sx.send(Err(e)); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         }); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>( |     spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>( | ||||||
|         docid_word_positions_chunks.clone(), |         docid_word_positions_chunks.clone(), | ||||||
|         indexer, |         indexer, | ||||||
| @@ -254,7 +274,10 @@ fn send_and_extract_flattened_documents_data( | |||||||
|     grenad::Reader<CursorClonableMmap>, |     grenad::Reader<CursorClonableMmap>, | ||||||
|     ( |     ( | ||||||
|         grenad::Reader<CursorClonableMmap>, |         grenad::Reader<CursorClonableMmap>, | ||||||
|         (grenad::Reader<CursorClonableMmap>, (grenad::Reader<File>, grenad::Reader<File>)), |         ( | ||||||
|  |             grenad::Reader<CursorClonableMmap>, | ||||||
|  |             (grenad::Reader<File>, (grenad::Reader<File>, grenad::Reader<File>)), | ||||||
|  |         ), | ||||||
|     ), |     ), | ||||||
| )> { | )> { | ||||||
|     let flattened_documents_chunk = |     let flattened_documents_chunk = | ||||||
| @@ -304,6 +327,7 @@ fn send_and_extract_flattened_documents_data( | |||||||
|                     docid_fid_facet_numbers_chunk, |                     docid_fid_facet_numbers_chunk, | ||||||
|                     docid_fid_facet_strings_chunk, |                     docid_fid_facet_strings_chunk, | ||||||
|                     fid_facet_is_null_docids_chunk, |                     fid_facet_is_null_docids_chunk, | ||||||
|  |                     fid_facet_is_empty_docids_chunk, | ||||||
|                     fid_facet_exists_docids_chunk, |                     fid_facet_exists_docids_chunk, | ||||||
|                 } = extract_fid_docid_facet_values( |                 } = extract_fid_docid_facet_values( | ||||||
|                     flattened_documents_chunk.clone(), |                     flattened_documents_chunk.clone(), | ||||||
| @@ -331,7 +355,10 @@ fn send_and_extract_flattened_documents_data( | |||||||
|                     docid_fid_facet_numbers_chunk, |                     docid_fid_facet_numbers_chunk, | ||||||
|                     ( |                     ( | ||||||
|                         docid_fid_facet_strings_chunk, |                         docid_fid_facet_strings_chunk, | ||||||
|                         (fid_facet_is_null_docids_chunk, fid_facet_exists_docids_chunk), |                         ( | ||||||
|  |                             fid_facet_is_null_docids_chunk, | ||||||
|  |                             (fid_facet_is_empty_docids_chunk, fid_facet_exists_docids_chunk), | ||||||
|  |                         ), | ||||||
|                     ), |                     ), | ||||||
|                 )) |                 )) | ||||||
|             }, |             }, | ||||||
|   | |||||||
| @@ -40,6 +40,7 @@ pub(crate) enum TypedChunk { | |||||||
|     FieldIdFacetNumberDocids(grenad::Reader<File>), |     FieldIdFacetNumberDocids(grenad::Reader<File>), | ||||||
|     FieldIdFacetExistsDocids(grenad::Reader<File>), |     FieldIdFacetExistsDocids(grenad::Reader<File>), | ||||||
|     FieldIdFacetIsNullDocids(grenad::Reader<File>), |     FieldIdFacetIsNullDocids(grenad::Reader<File>), | ||||||
|  |     FieldIdFacetIsEmptyDocids(grenad::Reader<File>), | ||||||
|     GeoPoints(grenad::Reader<File>), |     GeoPoints(grenad::Reader<File>), | ||||||
|     ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>), |     ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>), | ||||||
| } | } | ||||||
| @@ -173,6 +174,17 @@ pub(crate) fn write_typed_chunk_into_index( | |||||||
|             )?; |             )?; | ||||||
|             is_merged_database = true; |             is_merged_database = true; | ||||||
|         } |         } | ||||||
|  |         TypedChunk::FieldIdFacetIsEmptyDocids(facet_id_is_empty_docids) => { | ||||||
|  |             append_entries_into_database( | ||||||
|  |                 facet_id_is_empty_docids, | ||||||
|  |                 &index.facet_id_is_empty_docids, | ||||||
|  |                 wtxn, | ||||||
|  |                 index_is_empty, | ||||||
|  |                 |value, _buffer| Ok(value), | ||||||
|  |                 merge_cbo_roaring_bitmaps, | ||||||
|  |             )?; | ||||||
|  |             is_merged_database = true; | ||||||
|  |         } | ||||||
|         TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => { |         TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => { | ||||||
|             append_entries_into_database( |             append_entries_into_database( | ||||||
|                 word_pair_proximity_docids_iter, |                 word_pair_proximity_docids_iter, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user