mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Merge #3571
3571: Introduce two filters to select documents with `null` and empty fields r=irevoire a=Kerollmops # Pull Request ## Related issue This PR implements the `X IS NULL`, `X IS NOT NULL`, `X IS EMPTY`, `X IS NOT EMPTY` filters that [this comment](https://github.com/meilisearch/product/discussions/539#discussioncomment-5115884) is describing in a very detailed manner. ## What does this PR do? ### `IS NULL` and `IS NOT NULL` This PR will be exposed as a prototype for now. Below is the copy/pasted version of a spec that defines this filter. - `IS NULL` matches fields that `EXISTS` AND `= IS NULL` - `IS NOT NULL` matches fields that `NOT EXISTS` OR `!= IS NULL` 1. `{"name": "A", "price": null}` 2. `{"name": "A", "price": 10}` 3. `{"name": "A"}` `price IS NULL` would match 1 `price IS NOT NULL` or `NOT price IS NULL` would match 2,3 `price EXISTS` would match 1, 2 `price NOT EXISTS` or `NOT price EXISTS` would match 3 common query : `(price EXISTS) AND (price IS NOT NULL)` would match 2 ### `IS EMPTY` and `IS NOT EMPTY` - `IS EMPTY` matches Array `[]`, Object `{}`, or String `""` fields that `EXISTS` and are empty - `IS NOT EMPTY` matches fields that `NOT EXISTS` OR are not empty. 1. `{"name": "A", "tags": null}` 2. `{"name": "A", "tags": [null]}` 3. `{"name": "A", "tags": []}` 4. `{"name": "A", "tags": ["hello","world"]}` 5. `{"name": "A", "tags": [""]}` 6. `{"name": "A"}` 7. `{"name": "A", "tags": {}}` 8. `{"name": "A", "tags": {"t1":"v1"}}` 9. `{"name": "A", "tags": {"t1":""}}` 10. `{"name": "A", "tags": ""}` `tags IS EMPTY` would match 3,7,10 `tags IS NOT EMPTY` or `NOT tags IS EMPTY` would match 1,2,4,5,6,8,9 `tags IS NULL` would match 1 `tags IS NOT NULL` or `NOT tags IS NULL` would match 2,3,4,5,6,7,8,9,10 `tags EXISTS` would match 1,2,3,4,5,7,8,9,10 `tags NOT EXISTS` or `NOT tags EXISTS` would match 6 common query : `(tags EXISTS) AND (tags IS NOT NULL) AND (tags IS NOT EMPTY)` would match 2,4,5,8,9 ## What should the reviewer do? - Check that I tested the filters - Check that I deleted the ids of the documents when deleting documents Co-authored-by: Clément Renault <clement@meilisearch.com> Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
		| @@ -20,6 +20,8 @@ pub enum Condition<'a> { | |||||||
|     GreaterThanOrEqual(Token<'a>), |     GreaterThanOrEqual(Token<'a>), | ||||||
|     Equal(Token<'a>), |     Equal(Token<'a>), | ||||||
|     NotEqual(Token<'a>), |     NotEqual(Token<'a>), | ||||||
|  |     Null, | ||||||
|  |     Empty, | ||||||
|     Exists, |     Exists, | ||||||
|     LowerThan(Token<'a>), |     LowerThan(Token<'a>), | ||||||
|     LowerThanOrEqual(Token<'a>), |     LowerThanOrEqual(Token<'a>), | ||||||
| @@ -44,6 +46,38 @@ pub fn parse_condition(input: Span) -> IResult<FilterCondition> { | |||||||
|     Ok((input, condition)) |     Ok((input, condition)) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /// null          = value "IS" WS+ "NULL" | ||||||
|  | pub fn parse_is_null(input: Span) -> IResult<FilterCondition> { | ||||||
|  |     let (input, key) = parse_value(input)?; | ||||||
|  |  | ||||||
|  |     let (input, _) = tuple((tag("IS"), multispace1, tag("NULL")))(input)?; | ||||||
|  |     Ok((input, FilterCondition::Condition { fid: key, op: Null })) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// null          = value "IS" WS+ "NOT" WS+ "NULL" | ||||||
|  | pub fn parse_is_not_null(input: Span) -> IResult<FilterCondition> { | ||||||
|  |     let (input, key) = parse_value(input)?; | ||||||
|  |  | ||||||
|  |     let (input, _) = tuple((tag("IS"), multispace1, tag("NOT"), multispace1, tag("NULL")))(input)?; | ||||||
|  |     Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Null })))) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// empty          = value "IS" WS+ "EMPTY" | ||||||
|  | pub fn parse_is_empty(input: Span) -> IResult<FilterCondition> { | ||||||
|  |     let (input, key) = parse_value(input)?; | ||||||
|  |  | ||||||
|  |     let (input, _) = tuple((tag("IS"), multispace1, tag("EMPTY")))(input)?; | ||||||
|  |     Ok((input, FilterCondition::Condition { fid: key, op: Empty })) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// empty          = value "IS" WS+ "NOT" WS+ "EMPTY" | ||||||
|  | pub fn parse_is_not_empty(input: Span) -> IResult<FilterCondition> { | ||||||
|  |     let (input, key) = parse_value(input)?; | ||||||
|  |  | ||||||
|  |     let (input, _) = tuple((tag("IS"), multispace1, tag("NOT"), multispace1, tag("EMPTY")))(input)?; | ||||||
|  |     Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Empty })))) | ||||||
|  | } | ||||||
|  |  | ||||||
| /// exist          = value "EXISTS" | /// exist          = value "EXISTS" | ||||||
| pub fn parse_exists(input: Span) -> IResult<FilterCondition> { | pub fn parse_exists(input: Span) -> IResult<FilterCondition> { | ||||||
|     let (input, key) = terminated(parse_value, tag("EXISTS"))(input)?; |     let (input, key) = terminated(parse_value, tag("EXISTS"))(input)?; | ||||||
|   | |||||||
| @@ -143,11 +143,9 @@ impl<'a> Display for Error<'a> { | |||||||
|             ErrorKind::MissingClosingDelimiter(c) => { |             ErrorKind::MissingClosingDelimiter(c) => { | ||||||
|                 writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)? |                 writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)? | ||||||
|             } |             } | ||||||
|             ErrorKind::InvalidPrimary if input.trim().is_empty() => { |  | ||||||
|                 writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.")? |  | ||||||
|             } |  | ||||||
|             ErrorKind::InvalidPrimary => { |             ErrorKind::InvalidPrimary => { | ||||||
|                 writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `{}`.", escaped_input)? |                 let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) }; | ||||||
|  |                 writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)? | ||||||
|             } |             } | ||||||
|             ErrorKind::ExpectedEof => { |             ErrorKind::ExpectedEof => { | ||||||
|                 writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)? |                 writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)? | ||||||
|   | |||||||
| @@ -47,7 +47,10 @@ mod value; | |||||||
| use std::fmt::Debug; | use std::fmt::Debug; | ||||||
|  |  | ||||||
| pub use condition::{parse_condition, parse_to, Condition}; | pub use condition::{parse_condition, parse_to, Condition}; | ||||||
| use condition::{parse_exists, parse_not_exists}; | use condition::{ | ||||||
|  |     parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, parse_is_null, | ||||||
|  |     parse_not_exists, | ||||||
|  | }; | ||||||
| use error::{cut_with_err, ExpectedValueKind, NomErrorExt}; | use error::{cut_with_err, ExpectedValueKind, NomErrorExt}; | ||||||
| pub use error::{Error, ErrorKind}; | pub use error::{Error, ErrorKind}; | ||||||
| use nom::branch::alt; | use nom::branch::alt; | ||||||
| @@ -442,6 +445,10 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> { | |||||||
|         parse_in, |         parse_in, | ||||||
|         parse_not_in, |         parse_not_in, | ||||||
|         parse_condition, |         parse_condition, | ||||||
|  |         parse_is_null, | ||||||
|  |         parse_is_not_null, | ||||||
|  |         parse_is_empty, | ||||||
|  |         parse_is_not_empty, | ||||||
|         parse_exists, |         parse_exists, | ||||||
|         parse_not_exists, |         parse_not_exists, | ||||||
|         parse_to, |         parse_to, | ||||||
| @@ -526,14 +533,30 @@ pub mod tests { | |||||||
|         insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}"); |         insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}"); | ||||||
|         insta::assert_display_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}"); |         insta::assert_display_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}"); | ||||||
|  |  | ||||||
|         // Test NOT + EXISTS |         // Test NOT | ||||||
|         insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS"); |  | ||||||
|         insta::assert_display_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})"); |         insta::assert_display_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})"); | ||||||
|  |         insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})"); | ||||||
|  |  | ||||||
|  |         // Test NULL + NOT NULL | ||||||
|  |         insta::assert_display_snapshot!(p("subscribers IS NULL"), @"{subscribers} IS NULL"); | ||||||
|  |         insta::assert_display_snapshot!(p("NOT subscribers IS NULL"), @"NOT ({subscribers} IS NULL)"); | ||||||
|  |         insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)"); | ||||||
|  |         insta::assert_display_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL"); | ||||||
|  |         insta::assert_display_snapshot!(p("subscribers  IS   NOT   NULL"), @"NOT ({subscribers} IS NULL)"); | ||||||
|  |  | ||||||
|  |         // Test EMPTY + NOT EMPTY | ||||||
|  |         insta::assert_display_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY"); | ||||||
|  |         insta::assert_display_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)"); | ||||||
|  |         insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)"); | ||||||
|  |         insta::assert_display_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY"); | ||||||
|  |         insta::assert_display_snapshot!(p("subscribers  IS   NOT   EMPTY"), @"NOT ({subscribers} IS EMPTY)"); | ||||||
|  |  | ||||||
|  |         // Test EXISTS + NOT EXITS | ||||||
|  |         insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS"); | ||||||
|         insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)"); |         insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)"); | ||||||
|         insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)"); |         insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)"); | ||||||
|         insta::assert_display_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS"); |         insta::assert_display_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS"); | ||||||
|         insta::assert_display_snapshot!(p("subscribers NOT   EXISTS"), @"NOT ({subscribers} EXISTS)"); |         insta::assert_display_snapshot!(p("subscribers NOT   EXISTS"), @"NOT ({subscribers} EXISTS)"); | ||||||
|         insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})"); |  | ||||||
|  |  | ||||||
|         // Test nested NOT |         // Test nested NOT | ||||||
|         insta::assert_display_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}"); |         insta::assert_display_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}"); | ||||||
| @@ -606,7 +629,7 @@ pub mod tests { | |||||||
|         "###); |         "###); | ||||||
|  |  | ||||||
|         insta::assert_display_snapshot!(p("'OR'"), @r###" |         insta::assert_display_snapshot!(p("'OR'"), @r###" | ||||||
|         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. |         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. | ||||||
|         1:5 'OR' |         1:5 'OR' | ||||||
|         "###); |         "###); | ||||||
|  |  | ||||||
| @@ -616,12 +639,12 @@ pub mod tests { | |||||||
|         "###); |         "###); | ||||||
|  |  | ||||||
|         insta::assert_display_snapshot!(p("channel Ponce"), @r###" |         insta::assert_display_snapshot!(p("channel Ponce"), @r###" | ||||||
|         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. |         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. | ||||||
|         1:14 channel Ponce |         1:14 channel Ponce | ||||||
|         "###); |         "###); | ||||||
|  |  | ||||||
|         insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###" |         insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###" | ||||||
|         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. |         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. | ||||||
|         19:19 channel = Ponce OR |         19:19 channel = Ponce OR | ||||||
|         "###); |         "###); | ||||||
|  |  | ||||||
| @@ -706,12 +729,12 @@ pub mod tests { | |||||||
|         "###); |         "###); | ||||||
|  |  | ||||||
|         insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###" |         insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###" | ||||||
|         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. |         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. | ||||||
|         1:17 colour NOT EXIST |         1:17 colour NOT EXIST | ||||||
|         "###); |         "###); | ||||||
|  |  | ||||||
|         insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###" |         insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###" | ||||||
|         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. |         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. | ||||||
|         1:23 subscribers 100 TO1000 |         1:23 subscribers 100 TO1000 | ||||||
|         "###); |         "###); | ||||||
|  |  | ||||||
| @@ -772,6 +795,39 @@ pub mod tests { | |||||||
|         Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes. |         Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes. | ||||||
|         5:7 NOT OR EXISTS AND EXISTS NOT EXISTS |         5:7 NOT OR EXISTS AND EXISTS NOT EXISTS | ||||||
|         "###); |         "###); | ||||||
|  |  | ||||||
|  |         insta::assert_display_snapshot!(p(r#"value NULL"#), @r###" | ||||||
|  |         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`. | ||||||
|  |         1:11 value NULL | ||||||
|  |         "###); | ||||||
|  |         insta::assert_display_snapshot!(p(r#"value NOT NULL"#), @r###" | ||||||
|  |         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`. | ||||||
|  |         1:15 value NOT NULL | ||||||
|  |         "###); | ||||||
|  |         insta::assert_display_snapshot!(p(r#"value EMPTY"#), @r###" | ||||||
|  |         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`. | ||||||
|  |         1:12 value EMPTY | ||||||
|  |         "###); | ||||||
|  |         insta::assert_display_snapshot!(p(r#"value NOT EMPTY"#), @r###" | ||||||
|  |         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`. | ||||||
|  |         1:16 value NOT EMPTY | ||||||
|  |         "###); | ||||||
|  |         insta::assert_display_snapshot!(p(r#"value IS"#), @r###" | ||||||
|  |         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS`. | ||||||
|  |         1:9 value IS | ||||||
|  |         "###); | ||||||
|  |         insta::assert_display_snapshot!(p(r#"value IS NOT"#), @r###" | ||||||
|  |         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`. | ||||||
|  |         1:13 value IS NOT | ||||||
|  |         "###); | ||||||
|  |         insta::assert_display_snapshot!(p(r#"value IS EXISTS"#), @r###" | ||||||
|  |         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`. | ||||||
|  |         1:16 value IS EXISTS | ||||||
|  |         "###); | ||||||
|  |         insta::assert_display_snapshot!(p(r#"value IS NOT EXISTS"#), @r###" | ||||||
|  |         Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`. | ||||||
|  |         1:20 value IS NOT EXISTS | ||||||
|  |         "###); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
| @@ -853,6 +909,8 @@ impl<'a> std::fmt::Display for Condition<'a> { | |||||||
|             Condition::GreaterThanOrEqual(token) => write!(f, ">= {token}"), |             Condition::GreaterThanOrEqual(token) => write!(f, ">= {token}"), | ||||||
|             Condition::Equal(token) => write!(f, "= {token}"), |             Condition::Equal(token) => write!(f, "= {token}"), | ||||||
|             Condition::NotEqual(token) => write!(f, "!= {token}"), |             Condition::NotEqual(token) => write!(f, "!= {token}"), | ||||||
|  |             Condition::Null => write!(f, "IS NULL"), | ||||||
|  |             Condition::Empty => write!(f, "IS EMPTY"), | ||||||
|             Condition::Exists => write!(f, "EXISTS"), |             Condition::Exists => write!(f, "EXISTS"), | ||||||
|             Condition::LowerThan(token) => write!(f, "< {token}"), |             Condition::LowerThan(token) => write!(f, "< {token}"), | ||||||
|             Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"), |             Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"), | ||||||
|   | |||||||
| @@ -183,7 +183,20 @@ fn is_syntax_component(c: char) -> bool { | |||||||
| } | } | ||||||
|  |  | ||||||
| fn is_keyword(s: &str) -> bool { | fn is_keyword(s: &str) -> bool { | ||||||
|     matches!(s, "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "_geoRadius" | "_geoBoundingBox") |     matches!( | ||||||
|  |         s, | ||||||
|  |         "AND" | ||||||
|  |             | "OR" | ||||||
|  |             | "IN" | ||||||
|  |             | "NOT" | ||||||
|  |             | "TO" | ||||||
|  |             | "EXISTS" | ||||||
|  |             | "IS" | ||||||
|  |             | "NULL" | ||||||
|  |             | "EMPTY" | ||||||
|  |             | "_geoRadius" | ||||||
|  |             | "_geoBoundingBox" | ||||||
|  |     ) | ||||||
| } | } | ||||||
|  |  | ||||||
| #[cfg(test)] | #[cfg(test)] | ||||||
|   | |||||||
| @@ -4,51 +4,56 @@ use serde_json::{Map, Value}; | |||||||
|  |  | ||||||
| pub fn flatten(json: &Map<String, Value>) -> Map<String, Value> { | pub fn flatten(json: &Map<String, Value>) -> Map<String, Value> { | ||||||
|     let mut obj = Map::new(); |     let mut obj = Map::new(); | ||||||
|     let mut all_keys = vec![]; |     let mut all_entries = vec![]; | ||||||
|     insert_object(&mut obj, None, json, &mut all_keys); |     insert_object(&mut obj, None, json, &mut all_entries); | ||||||
|     for key in all_keys { |     for (key, old_val) in all_entries { | ||||||
|         obj.entry(key).or_insert(Value::Array(vec![])); |         obj.entry(key).or_insert(old_val.clone()); | ||||||
|     } |     } | ||||||
|     obj |     obj | ||||||
| } | } | ||||||
|  |  | ||||||
| fn insert_object( | fn insert_object<'a>( | ||||||
|     base_json: &mut Map<String, Value>, |     base_json: &mut Map<String, Value>, | ||||||
|     base_key: Option<&str>, |     base_key: Option<&str>, | ||||||
|     object: &Map<String, Value>, |     object: &'a Map<String, Value>, | ||||||
|     all_keys: &mut Vec<String>, |     all_entries: &mut Vec<(String, &'a Value)>, | ||||||
| ) { | ) { | ||||||
|     for (key, value) in object { |     for (key, value) in object { | ||||||
|         let new_key = base_key.map_or_else(|| key.clone(), |base_key| format!("{base_key}.{key}")); |         let new_key = base_key.map_or_else(|| key.clone(), |base_key| format!("{base_key}.{key}")); | ||||||
|         all_keys.push(new_key.clone()); |         all_entries.push((new_key.clone(), value)); | ||||||
|         if let Some(array) = value.as_array() { |         if let Some(array) = value.as_array() { | ||||||
|             insert_array(base_json, &new_key, array, all_keys); |             insert_array(base_json, &new_key, array, all_entries); | ||||||
|         } else if let Some(object) = value.as_object() { |         } else if let Some(object) = value.as_object() { | ||||||
|             insert_object(base_json, Some(&new_key), object, all_keys); |             insert_object(base_json, Some(&new_key), object, all_entries); | ||||||
|         } else { |         } else { | ||||||
|             insert_value(base_json, &new_key, value.clone()); |             insert_value(base_json, &new_key, value.clone(), false); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| fn insert_array( | fn insert_array<'a>( | ||||||
|     base_json: &mut Map<String, Value>, |     base_json: &mut Map<String, Value>, | ||||||
|     base_key: &str, |     base_key: &str, | ||||||
|     array: &Vec<Value>, |     array: &'a Vec<Value>, | ||||||
|     all_keys: &mut Vec<String>, |     all_entries: &mut Vec<(String, &'a Value)>, | ||||||
| ) { | ) { | ||||||
|     for value in array { |     for value in array { | ||||||
|         if let Some(object) = value.as_object() { |         if let Some(object) = value.as_object() { | ||||||
|             insert_object(base_json, Some(base_key), object, all_keys); |             insert_object(base_json, Some(base_key), object, all_entries); | ||||||
|         } else if let Some(sub_array) = value.as_array() { |         } else if let Some(sub_array) = value.as_array() { | ||||||
|             insert_array(base_json, base_key, sub_array, all_keys); |             insert_array(base_json, base_key, sub_array, all_entries); | ||||||
|         } else { |         } else { | ||||||
|             insert_value(base_json, base_key, value.clone()); |             insert_value(base_json, base_key, value.clone(), true); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| fn insert_value(base_json: &mut Map<String, Value>, key: &str, to_insert: Value) { | fn insert_value( | ||||||
|  |     base_json: &mut Map<String, Value>, | ||||||
|  |     key: &str, | ||||||
|  |     to_insert: Value, | ||||||
|  |     came_from_array: bool, | ||||||
|  | ) { | ||||||
|     debug_assert!(!to_insert.is_object()); |     debug_assert!(!to_insert.is_object()); | ||||||
|     debug_assert!(!to_insert.is_array()); |     debug_assert!(!to_insert.is_array()); | ||||||
|  |  | ||||||
| @@ -63,6 +68,8 @@ fn insert_value(base_json: &mut Map<String, Value>, key: &str, to_insert: Value) | |||||||
|             base_json[key] = Value::Array(vec![value, to_insert]); |             base_json[key] = Value::Array(vec![value, to_insert]); | ||||||
|         } |         } | ||||||
|         // if it does not exist we can push the value untouched |         // if it does not exist we can push the value untouched | ||||||
|  |     } else if came_from_array { | ||||||
|  |         base_json.insert(key.to_string(), Value::Array(vec![to_insert])); | ||||||
|     } else { |     } else { | ||||||
|         base_json.insert(key.to_string(), to_insert); |         base_json.insert(key.to_string(), to_insert); | ||||||
|     } |     } | ||||||
| @@ -113,7 +120,11 @@ mod tests { | |||||||
|         assert_eq!( |         assert_eq!( | ||||||
|             &flat, |             &flat, | ||||||
|             json!({ |             json!({ | ||||||
|                 "a": [], |                 "a": { | ||||||
|  |                   "b": "c", | ||||||
|  |                   "d": "e", | ||||||
|  |                   "f": "g" | ||||||
|  |                 }, | ||||||
|                 "a.b": "c", |                 "a.b": "c", | ||||||
|                 "a.d": "e", |                 "a.d": "e", | ||||||
|                 "a.f": "g" |                 "a.f": "g" | ||||||
| @@ -164,7 +175,7 @@ mod tests { | |||||||
|         assert_eq!( |         assert_eq!( | ||||||
|             &flat, |             &flat, | ||||||
|             json!({ |             json!({ | ||||||
|                 "a": 42, |                 "a": [42], | ||||||
|                 "a.b": ["c", "d", "e"], |                 "a.b": ["c", "d", "e"], | ||||||
|             }) |             }) | ||||||
|             .as_object() |             .as_object() | ||||||
| @@ -186,7 +197,7 @@ mod tests { | |||||||
|         assert_eq!( |         assert_eq!( | ||||||
|             &flat, |             &flat, | ||||||
|             json!({ |             json!({ | ||||||
|                 "a": null, |                 "a": [null], | ||||||
|                 "a.b": ["c", "d", "e"], |                 "a.b": ["c", "d", "e"], | ||||||
|             }) |             }) | ||||||
|             .as_object() |             .as_object() | ||||||
| @@ -208,7 +219,9 @@ mod tests { | |||||||
|         assert_eq!( |         assert_eq!( | ||||||
|             &flat, |             &flat, | ||||||
|             json!({ |             json!({ | ||||||
|                 "a": [], |                 "a": { | ||||||
|  |                     "b": "c" | ||||||
|  |                 }, | ||||||
|                 "a.b": ["c", "d"], |                 "a.b": ["c", "d"], | ||||||
|             }) |             }) | ||||||
|             .as_object() |             .as_object() | ||||||
| @@ -234,7 +247,7 @@ mod tests { | |||||||
|             json!({ |             json!({ | ||||||
|                 "a.b": ["c", "d", "f"], |                 "a.b": ["c", "d", "f"], | ||||||
|                 "a.c": "e", |                 "a.c": "e", | ||||||
|                 "a": 35, |                 "a": [35], | ||||||
|             }) |             }) | ||||||
|             .as_object() |             .as_object() | ||||||
|             .unwrap() |             .unwrap() | ||||||
| @@ -302,4 +315,53 @@ mod tests { | |||||||
|             .unwrap() |             .unwrap() | ||||||
|         ); |         ); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn flatten_nested_values_keep_original_values() { | ||||||
|  |         let mut base: Value = json!({ | ||||||
|  |             "tags": { | ||||||
|  |                 "t1": "v1" | ||||||
|  |             }, | ||||||
|  |             "prices": { | ||||||
|  |                 "p1": [null], | ||||||
|  |                 "p1000": {"tamo": {"le": {}}} | ||||||
|  |             }, | ||||||
|  |             "kiki": [[]] | ||||||
|  |         }); | ||||||
|  |         let json = std::mem::take(base.as_object_mut().unwrap()); | ||||||
|  |         let flat = flatten(&json); | ||||||
|  |  | ||||||
|  |         println!("{}", serde_json::to_string_pretty(&flat).unwrap()); | ||||||
|  |  | ||||||
|  |         assert_eq!( | ||||||
|  |             &flat, | ||||||
|  |             json!({ | ||||||
|  |               "prices": { | ||||||
|  |                 "p1": [null], | ||||||
|  |                 "p1000": { | ||||||
|  |                   "tamo": { | ||||||
|  |                     "le": {} | ||||||
|  |                   } | ||||||
|  |                 } | ||||||
|  |               }, | ||||||
|  |               "prices.p1": [null], | ||||||
|  |               "prices.p1000": { | ||||||
|  |                 "tamo": { | ||||||
|  |                   "le": {} | ||||||
|  |                 } | ||||||
|  |               }, | ||||||
|  |               "prices.p1000.tamo": { | ||||||
|  |                 "le": {} | ||||||
|  |               }, | ||||||
|  |               "prices.p1000.tamo.le": {}, | ||||||
|  |               "tags": { | ||||||
|  |                 "t1": "v1" | ||||||
|  |               }, | ||||||
|  |               "tags.t1": "v1", | ||||||
|  |               "kiki": [[]] | ||||||
|  |             }) | ||||||
|  |             .as_object() | ||||||
|  |             .unwrap() | ||||||
|  |         ); | ||||||
|  |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -547,7 +547,7 @@ async fn filter_invalid_syntax_object() { | |||||||
|     index.wait_task(1).await; |     index.wait_task(1).await; | ||||||
|  |  | ||||||
|     let expected_response = json!({ |     let expected_response = json!({ | ||||||
|         "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", |         "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", | ||||||
|         "code": "invalid_search_filter", |         "code": "invalid_search_filter", | ||||||
|         "type": "invalid_request", |         "type": "invalid_request", | ||||||
|         "link": "https://docs.meilisearch.com/errors#invalid_search_filter" |         "link": "https://docs.meilisearch.com/errors#invalid_search_filter" | ||||||
| @@ -572,7 +572,7 @@ async fn filter_invalid_syntax_array() { | |||||||
|     index.wait_task(1).await; |     index.wait_task(1).await; | ||||||
|  |  | ||||||
|     let expected_response = json!({ |     let expected_response = json!({ | ||||||
|         "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", |         "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", | ||||||
|         "code": "invalid_search_filter", |         "code": "invalid_search_filter", | ||||||
|         "type": "invalid_request", |         "type": "invalid_request", | ||||||
|         "link": "https://docs.meilisearch.com/errors#invalid_search_filter" |         "link": "https://docs.meilisearch.com/errors#invalid_search_filter" | ||||||
|   | |||||||
| @@ -80,6 +80,8 @@ pub mod db_name { | |||||||
|     pub const FIELD_ID_WORD_COUNT_DOCIDS: &str = "field-id-word-count-docids"; |     pub const FIELD_ID_WORD_COUNT_DOCIDS: &str = "field-id-word-count-docids"; | ||||||
|     pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids"; |     pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids"; | ||||||
|     pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids"; |     pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids"; | ||||||
|  |     pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids"; | ||||||
|  |     pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids"; | ||||||
|     pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids"; |     pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids"; | ||||||
|     pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s"; |     pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s"; | ||||||
|     pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings"; |     pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings"; | ||||||
| @@ -129,6 +131,10 @@ pub struct Index { | |||||||
|  |  | ||||||
|     /// Maps the facet field id and the docids for which this field exists |     /// Maps the facet field id and the docids for which this field exists | ||||||
|     pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>, |     pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>, | ||||||
|  |     /// Maps the facet field id and the docids for which this field is set as null | ||||||
|  |     pub facet_id_is_null_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>, | ||||||
|  |     /// Maps the facet field id and the docids for which this field is considered empty | ||||||
|  |     pub facet_id_is_empty_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>, | ||||||
|  |  | ||||||
|     /// Maps the facet field id and ranges of numbers with the docids that corresponds to them. |     /// Maps the facet field id and ranges of numbers with the docids that corresponds to them. | ||||||
|     pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>, |     pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>, | ||||||
| @@ -153,7 +159,7 @@ impl Index { | |||||||
|     ) -> Result<Index> { |     ) -> Result<Index> { | ||||||
|         use db_name::*; |         use db_name::*; | ||||||
|  |  | ||||||
|         options.max_dbs(19); |         options.max_dbs(21); | ||||||
|         unsafe { options.flag(Flags::MdbAlwaysFreePages) }; |         unsafe { options.flag(Flags::MdbAlwaysFreePages) }; | ||||||
|  |  | ||||||
|         let env = options.open(path)?; |         let env = options.open(path)?; | ||||||
| @@ -175,6 +181,8 @@ impl Index { | |||||||
|         let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?; |         let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?; | ||||||
|         let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?; |         let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?; | ||||||
|         let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?; |         let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?; | ||||||
|  |         let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?; | ||||||
|  |         let facet_id_is_empty_docids = env.create_database(Some(FACET_ID_IS_EMPTY_DOCIDS))?; | ||||||
|  |  | ||||||
|         let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?; |         let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?; | ||||||
|         let field_id_docid_facet_strings = |         let field_id_docid_facet_strings = | ||||||
| @@ -201,6 +209,8 @@ impl Index { | |||||||
|             facet_id_f64_docids, |             facet_id_f64_docids, | ||||||
|             facet_id_string_docids, |             facet_id_string_docids, | ||||||
|             facet_id_exists_docids, |             facet_id_exists_docids, | ||||||
|  |             facet_id_is_null_docids, | ||||||
|  |             facet_id_is_empty_docids, | ||||||
|             field_id_docid_facet_f64s, |             field_id_docid_facet_f64s, | ||||||
|             field_id_docid_facet_strings, |             field_id_docid_facet_strings, | ||||||
|             documents, |             documents, | ||||||
| @@ -833,6 +843,30 @@ impl Index { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Retrieve all the documents which contain this field id set as null | ||||||
|  |     pub fn null_faceted_documents_ids( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |         field_id: FieldId, | ||||||
|  |     ) -> heed::Result<RoaringBitmap> { | ||||||
|  |         match self.facet_id_is_null_docids.get(rtxn, &BEU16::new(field_id))? { | ||||||
|  |             Some(docids) => Ok(docids), | ||||||
|  |             None => Ok(RoaringBitmap::new()), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Retrieve all the documents which contain this field id and that is considered empty | ||||||
|  |     pub fn empty_faceted_documents_ids( | ||||||
|  |         &self, | ||||||
|  |         rtxn: &RoTxn, | ||||||
|  |         field_id: FieldId, | ||||||
|  |     ) -> heed::Result<RoaringBitmap> { | ||||||
|  |         match self.facet_id_is_empty_docids.get(rtxn, &BEU16::new(field_id))? { | ||||||
|  |             Some(docids) => Ok(docids), | ||||||
|  |             None => Ok(RoaringBitmap::new()), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Retrieve all the documents which contain this field id |     /// Retrieve all the documents which contain this field id | ||||||
|     pub fn exists_faceted_documents_ids( |     pub fn exists_faceted_documents_ids( | ||||||
|         &self, |         &self, | ||||||
|   | |||||||
| @@ -211,6 +211,14 @@ impl<'a> Filter<'a> { | |||||||
|             Condition::Between { from, to } => { |             Condition::Between { from, to } => { | ||||||
|                 (Included(from.parse_finite_float()?), Included(to.parse_finite_float()?)) |                 (Included(from.parse_finite_float()?), Included(to.parse_finite_float()?)) | ||||||
|             } |             } | ||||||
|  |             Condition::Null => { | ||||||
|  |                 let is_null = index.null_faceted_documents_ids(rtxn, field_id)?; | ||||||
|  |                 return Ok(is_null); | ||||||
|  |             } | ||||||
|  |             Condition::Empty => { | ||||||
|  |                 let is_empty = index.empty_faceted_documents_ids(rtxn, field_id)?; | ||||||
|  |                 return Ok(is_empty); | ||||||
|  |             } | ||||||
|             Condition::Exists => { |             Condition::Exists => { | ||||||
|                 let exist = index.exists_faceted_documents_ids(rtxn, field_id)?; |                 let exist = index.exists_faceted_documents_ids(rtxn, field_id)?; | ||||||
|                 return Ok(exist); |                 return Ok(exist); | ||||||
|   | |||||||
| @@ -271,6 +271,16 @@ pub fn snap_facet_id_exists_docids(index: &Index) -> String { | |||||||
|         &format!("{facet_id:<3} {}", display_bitmap(&docids)) |         &format!("{facet_id:<3} {}", display_bitmap(&docids)) | ||||||
|     }) |     }) | ||||||
| } | } | ||||||
|  | pub fn snap_facet_id_is_null_docids(index: &Index) -> String { | ||||||
|  |     make_db_snap_from_iter!(index, facet_id_is_null_docids, |(facet_id, docids)| { | ||||||
|  |         &format!("{facet_id:<3} {}", display_bitmap(&docids)) | ||||||
|  |     }) | ||||||
|  | } | ||||||
|  | pub fn snap_facet_id_is_empty_docids(index: &Index) -> String { | ||||||
|  |     make_db_snap_from_iter!(index, facet_id_is_empty_docids, |(facet_id, docids)| { | ||||||
|  |         &format!("{facet_id:<3} {}", display_bitmap(&docids)) | ||||||
|  |     }) | ||||||
|  | } | ||||||
| pub fn snap_facet_id_string_docids(index: &Index) -> String { | pub fn snap_facet_id_string_docids(index: &Index) -> String { | ||||||
|     make_db_snap_from_iter!(index, facet_id_string_docids, |( |     make_db_snap_from_iter!(index, facet_id_string_docids, |( | ||||||
|         FacetGroupKey { field_id, level, left_bound }, |         FacetGroupKey { field_id, level, left_bound }, | ||||||
| @@ -495,6 +505,12 @@ macro_rules! full_snap_of_db { | |||||||
|     ($index:ident, facet_id_exists_docids) => {{ |     ($index:ident, facet_id_exists_docids) => {{ | ||||||
|         $crate::snapshot_tests::snap_facet_id_exists_docids(&$index) |         $crate::snapshot_tests::snap_facet_id_exists_docids(&$index) | ||||||
|     }}; |     }}; | ||||||
|  |     ($index:ident, facet_id_is_null_docids) => {{ | ||||||
|  |         $crate::snapshot_tests::snap_facet_id_is_null_docids(&$index) | ||||||
|  |     }}; | ||||||
|  |     ($index:ident, facet_id_is_empty_docids) => {{ | ||||||
|  |         $crate::snapshot_tests::snap_facet_id_is_empty_docids(&$index) | ||||||
|  |     }}; | ||||||
|     ($index:ident, documents_ids) => {{ |     ($index:ident, documents_ids) => {{ | ||||||
|         $crate::snapshot_tests::snap_documents_ids(&$index) |         $crate::snapshot_tests::snap_documents_ids(&$index) | ||||||
|     }}; |     }}; | ||||||
|   | |||||||
| @@ -34,6 +34,8 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { | |||||||
|             facet_id_f64_docids, |             facet_id_f64_docids, | ||||||
|             facet_id_string_docids, |             facet_id_string_docids, | ||||||
|             facet_id_exists_docids, |             facet_id_exists_docids, | ||||||
|  |             facet_id_is_null_docids, | ||||||
|  |             facet_id_is_empty_docids, | ||||||
|             field_id_docid_facet_f64s, |             field_id_docid_facet_f64s, | ||||||
|             field_id_docid_facet_strings, |             field_id_docid_facet_strings, | ||||||
|             documents, |             documents, | ||||||
| @@ -86,6 +88,8 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { | |||||||
|         script_language_docids.clear(self.wtxn)?; |         script_language_docids.clear(self.wtxn)?; | ||||||
|         facet_id_f64_docids.clear(self.wtxn)?; |         facet_id_f64_docids.clear(self.wtxn)?; | ||||||
|         facet_id_exists_docids.clear(self.wtxn)?; |         facet_id_exists_docids.clear(self.wtxn)?; | ||||||
|  |         facet_id_is_null_docids.clear(self.wtxn)?; | ||||||
|  |         facet_id_is_empty_docids.clear(self.wtxn)?; | ||||||
|         facet_id_string_docids.clear(self.wtxn)?; |         facet_id_string_docids.clear(self.wtxn)?; | ||||||
|         field_id_docid_facet_f64s.clear(self.wtxn)?; |         field_id_docid_facet_f64s.clear(self.wtxn)?; | ||||||
|         field_id_docid_facet_strings.clear(self.wtxn)?; |         field_id_docid_facet_strings.clear(self.wtxn)?; | ||||||
|   | |||||||
| @@ -245,6 +245,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | |||||||
|             field_id_docid_facet_strings: _, |             field_id_docid_facet_strings: _, | ||||||
|             script_language_docids, |             script_language_docids, | ||||||
|             facet_id_exists_docids, |             facet_id_exists_docids, | ||||||
|  |             facet_id_is_null_docids, | ||||||
|  |             facet_id_is_empty_docids, | ||||||
|             documents, |             documents, | ||||||
|         } = self.index; |         } = self.index; | ||||||
|  |  | ||||||
| @@ -517,12 +519,26 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { | |||||||
|  |  | ||||||
|         drop(iter); |         drop(iter); | ||||||
|         // We delete the documents ids that are under the facet field id values. |         // We delete the documents ids that are under the facet field id values. | ||||||
|         remove_docids_from_facet_id_exists_docids( |         remove_docids_from_facet_id_docids( | ||||||
|             self.wtxn, |             self.wtxn, | ||||||
|             facet_id_exists_docids, |             facet_id_exists_docids, | ||||||
|             &self.to_delete_docids, |             &self.to_delete_docids, | ||||||
|         )?; |         )?; | ||||||
|  |  | ||||||
|  |         // We delete the documents ids that are under the facet field id values. | ||||||
|  |         remove_docids_from_facet_id_docids( | ||||||
|  |             self.wtxn, | ||||||
|  |             facet_id_is_null_docids, | ||||||
|  |             &self.to_delete_docids, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|  |         // We delete the documents ids that are under the facet field id values. | ||||||
|  |         remove_docids_from_facet_id_docids( | ||||||
|  |             self.wtxn, | ||||||
|  |             facet_id_is_empty_docids, | ||||||
|  |             &self.to_delete_docids, | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|         self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?; |         self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?; | ||||||
|  |  | ||||||
|         Ok(DetailedDocumentDeletionResult { |         Ok(DetailedDocumentDeletionResult { | ||||||
| @@ -625,7 +641,7 @@ fn remove_docids_from_field_id_docid_facet_value( | |||||||
|     Ok(all_affected_facet_values) |     Ok(all_affected_facet_values) | ||||||
| } | } | ||||||
|  |  | ||||||
| fn remove_docids_from_facet_id_exists_docids<'a, C>( | fn remove_docids_from_facet_id_docids<'a, C>( | ||||||
|     wtxn: &'a mut heed::RwTxn, |     wtxn: &'a mut heed::RwTxn, | ||||||
|     db: &heed::Database<C, CboRoaringBitmapCodec>, |     db: &heed::Database<C, CboRoaringBitmapCodec>, | ||||||
|     to_remove: &RoaringBitmap, |     to_remove: &RoaringBitmap, | ||||||
|   | |||||||
| @@ -181,7 +181,7 @@ fn json_to_string<'a>(value: &'a Value, buffer: &'a mut String) -> Option<&'a st | |||||||
|     fn inner(value: &Value, output: &mut String) -> bool { |     fn inner(value: &Value, output: &mut String) -> bool { | ||||||
|         use std::fmt::Write; |         use std::fmt::Write; | ||||||
|         match value { |         match value { | ||||||
|             Value::Null => false, |             Value::Null | Value::Object(_) => false, | ||||||
|             Value::Bool(boolean) => write!(output, "{}", boolean).is_ok(), |             Value::Bool(boolean) => write!(output, "{}", boolean).is_ok(), | ||||||
|             Value::Number(number) => write!(output, "{}", number).is_ok(), |             Value::Number(number) => write!(output, "{}", number).is_ok(), | ||||||
|             Value::String(string) => write!(output, "{}", string).is_ok(), |             Value::String(string) => write!(output, "{}", string).is_ok(), | ||||||
| @@ -196,23 +196,6 @@ fn json_to_string<'a>(value: &'a Value, buffer: &'a mut String) -> Option<&'a st | |||||||
|                 // check that at least one value was written |                 // check that at least one value was written | ||||||
|                 count != 0 |                 count != 0 | ||||||
|             } |             } | ||||||
|             Value::Object(object) => { |  | ||||||
|                 let mut buffer = String::new(); |  | ||||||
|                 let mut count = 0; |  | ||||||
|                 for (key, value) in object { |  | ||||||
|                     buffer.clear(); |  | ||||||
|                     let _ = write!(&mut buffer, "{}: ", key); |  | ||||||
|                     if inner(value, &mut buffer) { |  | ||||||
|                         buffer.push_str(". "); |  | ||||||
|                         // We write the "key: value. " pair only when |  | ||||||
|                         // we are sure that the value can be written. |  | ||||||
|                         output.push_str(&buffer); |  | ||||||
|                         count += 1; |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|                 // check that at least one value was written |  | ||||||
|                 count != 0 |  | ||||||
|             } |  | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -7,7 +7,7 @@ use std::mem::size_of; | |||||||
| use heed::zerocopy::AsBytes; | use heed::zerocopy::AsBytes; | ||||||
| use heed::BytesEncode; | use heed::BytesEncode; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| use serde_json::Value; | use serde_json::{from_slice, Value}; | ||||||
|  |  | ||||||
| use super::helpers::{create_sorter, keep_first, sorter_into_reader, GrenadParameters}; | use super::helpers::{create_sorter, keep_first, sorter_into_reader, GrenadParameters}; | ||||||
| use crate::error::InternalError; | use crate::error::InternalError; | ||||||
| @@ -15,6 +15,15 @@ use crate::facet::value_encoding::f64_into_bytes; | |||||||
| use crate::update::index_documents::{create_writer, writer_into_reader}; | use crate::update::index_documents::{create_writer, writer_into_reader}; | ||||||
| use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH}; | use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH}; | ||||||
|  |  | ||||||
|  | /// The extracted facet values stored in grenad files by type. | ||||||
|  | pub struct ExtractedFacetValues { | ||||||
|  |     pub docid_fid_facet_numbers_chunk: grenad::Reader<File>, | ||||||
|  |     pub docid_fid_facet_strings_chunk: grenad::Reader<File>, | ||||||
|  |     pub fid_facet_is_null_docids_chunk: grenad::Reader<File>, | ||||||
|  |     pub fid_facet_is_empty_docids_chunk: grenad::Reader<File>, | ||||||
|  |     pub fid_facet_exists_docids_chunk: grenad::Reader<File>, | ||||||
|  | } | ||||||
|  |  | ||||||
| /// Extracts the facet values of each faceted field of each document. | /// Extracts the facet values of each faceted field of each document. | ||||||
| /// | /// | ||||||
| /// Returns the generated grenad reader containing the docid the fid and the orginal value as key | /// Returns the generated grenad reader containing the docid the fid and the orginal value as key | ||||||
| @@ -24,7 +33,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>( | |||||||
|     obkv_documents: grenad::Reader<R>, |     obkv_documents: grenad::Reader<R>, | ||||||
|     indexer: GrenadParameters, |     indexer: GrenadParameters, | ||||||
|     faceted_fields: &HashSet<FieldId>, |     faceted_fields: &HashSet<FieldId>, | ||||||
| ) -> Result<(grenad::Reader<File>, grenad::Reader<File>, grenad::Reader<File>)> { | ) -> Result<ExtractedFacetValues> { | ||||||
|     let max_memory = indexer.max_memory_by_thread(); |     let max_memory = indexer.max_memory_by_thread(); | ||||||
|  |  | ||||||
|     let mut fid_docid_facet_numbers_sorter = create_sorter( |     let mut fid_docid_facet_numbers_sorter = create_sorter( | ||||||
| @@ -46,6 +55,8 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>( | |||||||
|     ); |     ); | ||||||
|  |  | ||||||
|     let mut facet_exists_docids = BTreeMap::<FieldId, RoaringBitmap>::new(); |     let mut facet_exists_docids = BTreeMap::<FieldId, RoaringBitmap>::new(); | ||||||
|  |     let mut facet_is_null_docids = BTreeMap::<FieldId, RoaringBitmap>::new(); | ||||||
|  |     let mut facet_is_empty_docids = BTreeMap::<FieldId, RoaringBitmap>::new(); | ||||||
|  |  | ||||||
|     let mut key_buffer = Vec::new(); |     let mut key_buffer = Vec::new(); | ||||||
|     let mut cursor = obkv_documents.into_cursor()?; |     let mut cursor = obkv_documents.into_cursor()?; | ||||||
| @@ -69,33 +80,44 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>( | |||||||
|                 // For the other extraction tasks, prefix the key with the field_id and the document_id |                 // For the other extraction tasks, prefix the key with the field_id and the document_id | ||||||
|                 key_buffer.extend_from_slice(docid_bytes); |                 key_buffer.extend_from_slice(docid_bytes); | ||||||
|  |  | ||||||
|                 let value = |                 let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?; | ||||||
|                     serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?; |  | ||||||
|  |  | ||||||
|                 let (numbers, strings) = extract_facet_values(&value); |                 match extract_facet_values(&value) { | ||||||
|  |                     FilterableValues::Null => { | ||||||
|                 // insert facet numbers in sorter |                         facet_is_null_docids.entry(field_id).or_default().insert(document); | ||||||
|                 for number in numbers { |  | ||||||
|                     key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>()); |  | ||||||
|                     if let Some(value_bytes) = f64_into_bytes(number) { |  | ||||||
|                         key_buffer.extend_from_slice(&value_bytes); |  | ||||||
|                         key_buffer.extend_from_slice(&number.to_be_bytes()); |  | ||||||
|  |  | ||||||
|                         fid_docid_facet_numbers_sorter.insert(&key_buffer, ().as_bytes())?; |  | ||||||
|                     } |                     } | ||||||
|                 } |                     FilterableValues::Empty => { | ||||||
|  |                         facet_is_empty_docids.entry(field_id).or_default().insert(document); | ||||||
|  |                     } | ||||||
|  |                     FilterableValues::Values { numbers, strings } => { | ||||||
|  |                         // insert facet numbers in sorter | ||||||
|  |                         for number in numbers { | ||||||
|  |                             key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>()); | ||||||
|  |                             if let Some(value_bytes) = f64_into_bytes(number) { | ||||||
|  |                                 key_buffer.extend_from_slice(&value_bytes); | ||||||
|  |                                 key_buffer.extend_from_slice(&number.to_be_bytes()); | ||||||
|  |  | ||||||
|                 // insert normalized and original facet string in sorter |                                 fid_docid_facet_numbers_sorter | ||||||
|                 for (normalized, original) in strings.into_iter().filter(|(n, _)| !n.is_empty()) { |                                     .insert(&key_buffer, ().as_bytes())?; | ||||||
|                     let normalised_truncated_value: String = normalized |                             } | ||||||
|                         .char_indices() |                         } | ||||||
|                         .take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH) |  | ||||||
|                         .map(|(_, c)| c) |  | ||||||
|                         .collect(); |  | ||||||
|  |  | ||||||
|                     key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>()); |                         // insert normalized and original facet string in sorter | ||||||
|                     key_buffer.extend_from_slice(normalised_truncated_value.as_bytes()); |                         for (normalized, original) in | ||||||
|                     fid_docid_facet_strings_sorter.insert(&key_buffer, original.as_bytes())?; |                             strings.into_iter().filter(|(n, _)| !n.is_empty()) | ||||||
|  |                         { | ||||||
|  |                             let normalized_truncated_value: String = normalized | ||||||
|  |                                 .char_indices() | ||||||
|  |                                 .take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH) | ||||||
|  |                                 .map(|(_, c)| c) | ||||||
|  |                                 .collect(); | ||||||
|  |  | ||||||
|  |                             key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>()); | ||||||
|  |                             key_buffer.extend_from_slice(normalized_truncated_value.as_bytes()); | ||||||
|  |                             fid_docid_facet_strings_sorter | ||||||
|  |                                 .insert(&key_buffer, original.as_bytes())?; | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
| @@ -112,14 +134,48 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>( | |||||||
|     } |     } | ||||||
|     let facet_exists_docids_reader = writer_into_reader(facet_exists_docids_writer)?; |     let facet_exists_docids_reader = writer_into_reader(facet_exists_docids_writer)?; | ||||||
|  |  | ||||||
|     Ok(( |     let mut facet_is_null_docids_writer = create_writer( | ||||||
|         sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?, |         indexer.chunk_compression_type, | ||||||
|         sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?, |         indexer.chunk_compression_level, | ||||||
|         facet_exists_docids_reader, |         tempfile::tempfile()?, | ||||||
|     )) |     ); | ||||||
|  |     for (fid, bitmap) in facet_is_null_docids.into_iter() { | ||||||
|  |         let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap(); | ||||||
|  |         facet_is_null_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?; | ||||||
|  |     } | ||||||
|  |     let facet_is_null_docids_reader = writer_into_reader(facet_is_null_docids_writer)?; | ||||||
|  |  | ||||||
|  |     let mut facet_is_empty_docids_writer = create_writer( | ||||||
|  |         indexer.chunk_compression_type, | ||||||
|  |         indexer.chunk_compression_level, | ||||||
|  |         tempfile::tempfile()?, | ||||||
|  |     ); | ||||||
|  |     for (fid, bitmap) in facet_is_empty_docids.into_iter() { | ||||||
|  |         let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap(); | ||||||
|  |         facet_is_empty_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?; | ||||||
|  |     } | ||||||
|  |     let facet_is_empty_docids_reader = writer_into_reader(facet_is_empty_docids_writer)?; | ||||||
|  |  | ||||||
|  |     Ok(ExtractedFacetValues { | ||||||
|  |         docid_fid_facet_numbers_chunk: sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?, | ||||||
|  |         docid_fid_facet_strings_chunk: sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?, | ||||||
|  |         fid_facet_is_null_docids_chunk: facet_is_null_docids_reader, | ||||||
|  |         fid_facet_is_empty_docids_chunk: facet_is_empty_docids_reader, | ||||||
|  |         fid_facet_exists_docids_chunk: facet_exists_docids_reader, | ||||||
|  |     }) | ||||||
| } | } | ||||||
|  |  | ||||||
| fn extract_facet_values(value: &Value) -> (Vec<f64>, Vec<(String, String)>) { | /// Represent what a document field contains. | ||||||
|  | enum FilterableValues { | ||||||
|  |     /// Corresponds to the JSON `null` value. | ||||||
|  |     Null, | ||||||
|  |     /// Corresponds to either, an empty string `""`, an empty array `[]`, or an empty object `{}`. | ||||||
|  |     Empty, | ||||||
|  |     /// Represents all the numbers and strings values found in this document field. | ||||||
|  |     Values { numbers: Vec<f64>, strings: Vec<(String, String)> }, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn extract_facet_values(value: &Value) -> FilterableValues { | ||||||
|     fn inner_extract_facet_values( |     fn inner_extract_facet_values( | ||||||
|         value: &Value, |         value: &Value, | ||||||
|         can_recurse: bool, |         can_recurse: bool, | ||||||
| @@ -149,9 +205,16 @@ fn extract_facet_values(value: &Value) -> (Vec<f64>, Vec<(String, String)>) { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     let mut facet_number_values = Vec::new(); |     match value { | ||||||
|     let mut facet_string_values = Vec::new(); |         Value::Null => FilterableValues::Null, | ||||||
|     inner_extract_facet_values(value, true, &mut facet_number_values, &mut facet_string_values); |         Value::String(s) if s.is_empty() => FilterableValues::Empty, | ||||||
|  |         Value::Array(a) if a.is_empty() => FilterableValues::Empty, | ||||||
|     (facet_number_values, facet_string_values) |         Value::Object(o) if o.is_empty() => FilterableValues::Empty, | ||||||
|  |         otherwise => { | ||||||
|  |             let mut numbers = Vec::new(); | ||||||
|  |             let mut strings = Vec::new(); | ||||||
|  |             inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings); | ||||||
|  |             FilterableValues::Values { numbers, strings } | ||||||
|  |         } | ||||||
|  |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -18,7 +18,7 @@ use rayon::prelude::*; | |||||||
| use self::extract_docid_word_positions::extract_docid_word_positions; | use self::extract_docid_word_positions::extract_docid_word_positions; | ||||||
| use self::extract_facet_number_docids::extract_facet_number_docids; | use self::extract_facet_number_docids::extract_facet_number_docids; | ||||||
| use self::extract_facet_string_docids::extract_facet_string_docids; | use self::extract_facet_string_docids::extract_facet_string_docids; | ||||||
| use self::extract_fid_docid_facet_values::extract_fid_docid_facet_values; | use self::extract_fid_docid_facet_values::{extract_fid_docid_facet_values, ExtractedFacetValues}; | ||||||
| use self::extract_fid_word_count_docids::extract_fid_word_count_docids; | use self::extract_fid_word_count_docids::extract_fid_word_count_docids; | ||||||
| use self::extract_geo_points::extract_geo_points; | use self::extract_geo_points::extract_geo_points; | ||||||
| use self::extract_word_docids::extract_word_docids; | use self::extract_word_docids::extract_word_docids; | ||||||
| @@ -55,28 +55,35 @@ pub(crate) fn data_from_obkv_documents( | |||||||
|         .collect::<Result<()>>()?; |         .collect::<Result<()>>()?; | ||||||
|  |  | ||||||
|     #[allow(clippy::type_complexity)] |     #[allow(clippy::type_complexity)] | ||||||
|     let result: Result<(Vec<_>, (Vec<_>, (Vec<_>, Vec<_>)))> = flattened_obkv_chunks |     let result: Result<(Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, Vec<_>)))))> = | ||||||
|         .par_bridge() |         flattened_obkv_chunks | ||||||
|         .map(|flattened_obkv_chunks| { |             .par_bridge() | ||||||
|             send_and_extract_flattened_documents_data( |             .map(|flattened_obkv_chunks| { | ||||||
|                 flattened_obkv_chunks, |                 send_and_extract_flattened_documents_data( | ||||||
|                 indexer, |                     flattened_obkv_chunks, | ||||||
|                 lmdb_writer_sx.clone(), |                     indexer, | ||||||
|                 &searchable_fields, |                     lmdb_writer_sx.clone(), | ||||||
|                 &faceted_fields, |                     &searchable_fields, | ||||||
|                 primary_key_id, |                     &faceted_fields, | ||||||
|                 geo_fields_ids, |                     primary_key_id, | ||||||
|                 &stop_words, |                     geo_fields_ids, | ||||||
|                 max_positions_per_attributes, |                     &stop_words, | ||||||
|             ) |                     max_positions_per_attributes, | ||||||
|         }) |                 ) | ||||||
|         .collect(); |             }) | ||||||
|  |             .collect(); | ||||||
|  |  | ||||||
|     let ( |     let ( | ||||||
|         docid_word_positions_chunks, |         docid_word_positions_chunks, | ||||||
|         ( |         ( | ||||||
|             docid_fid_facet_numbers_chunks, |             docid_fid_facet_numbers_chunks, | ||||||
|             (docid_fid_facet_strings_chunks, facet_exists_docids_chunks), |             ( | ||||||
|  |                 docid_fid_facet_strings_chunks, | ||||||
|  |                 ( | ||||||
|  |                     facet_is_null_docids_chunks, | ||||||
|  |                     (facet_is_empty_docids_chunks, facet_exists_docids_chunks), | ||||||
|  |                 ), | ||||||
|  |             ), | ||||||
|         ), |         ), | ||||||
|     ) = result?; |     ) = result?; | ||||||
|  |  | ||||||
| @@ -96,6 +103,38 @@ pub(crate) fn data_from_obkv_documents( | |||||||
|         }); |         }); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     // merge facet_is_null_docids and send them as a typed chunk | ||||||
|  |     { | ||||||
|  |         let lmdb_writer_sx = lmdb_writer_sx.clone(); | ||||||
|  |         rayon::spawn(move || { | ||||||
|  |             debug!("merge {} database", "facet-id-is-null-docids"); | ||||||
|  |             match facet_is_null_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) { | ||||||
|  |                 Ok(reader) => { | ||||||
|  |                     let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsNullDocids(reader))); | ||||||
|  |                 } | ||||||
|  |                 Err(e) => { | ||||||
|  |                     let _ = lmdb_writer_sx.send(Err(e)); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         }); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // merge facet_is_empty_docids and send them as a typed chunk | ||||||
|  |     { | ||||||
|  |         let lmdb_writer_sx = lmdb_writer_sx.clone(); | ||||||
|  |         rayon::spawn(move || { | ||||||
|  |             debug!("merge {} database", "facet-id-is-empty-docids"); | ||||||
|  |             match facet_is_empty_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) { | ||||||
|  |                 Ok(reader) => { | ||||||
|  |                     let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader))); | ||||||
|  |                 } | ||||||
|  |                 Err(e) => { | ||||||
|  |                     let _ = lmdb_writer_sx.send(Err(e)); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         }); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>( |     spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>( | ||||||
|         docid_word_positions_chunks.clone(), |         docid_word_positions_chunks.clone(), | ||||||
|         indexer, |         indexer, | ||||||
| @@ -235,7 +274,10 @@ fn send_and_extract_flattened_documents_data( | |||||||
|     grenad::Reader<CursorClonableMmap>, |     grenad::Reader<CursorClonableMmap>, | ||||||
|     ( |     ( | ||||||
|         grenad::Reader<CursorClonableMmap>, |         grenad::Reader<CursorClonableMmap>, | ||||||
|         (grenad::Reader<CursorClonableMmap>, grenad::Reader<File>), |         ( | ||||||
|  |             grenad::Reader<CursorClonableMmap>, | ||||||
|  |             (grenad::Reader<File>, (grenad::Reader<File>, grenad::Reader<File>)), | ||||||
|  |         ), | ||||||
|     ), |     ), | ||||||
| )> { | )> { | ||||||
|     let flattened_documents_chunk = |     let flattened_documents_chunk = | ||||||
| @@ -281,11 +323,13 @@ fn send_and_extract_flattened_documents_data( | |||||||
|                 Ok(docid_word_positions_chunk) |                 Ok(docid_word_positions_chunk) | ||||||
|             }, |             }, | ||||||
|             || { |             || { | ||||||
|                 let ( |                 let ExtractedFacetValues { | ||||||
|                     docid_fid_facet_numbers_chunk, |                     docid_fid_facet_numbers_chunk, | ||||||
|                     docid_fid_facet_strings_chunk, |                     docid_fid_facet_strings_chunk, | ||||||
|  |                     fid_facet_is_null_docids_chunk, | ||||||
|  |                     fid_facet_is_empty_docids_chunk, | ||||||
|                     fid_facet_exists_docids_chunk, |                     fid_facet_exists_docids_chunk, | ||||||
|                 ) = extract_fid_docid_facet_values( |                 } = extract_fid_docid_facet_values( | ||||||
|                     flattened_documents_chunk.clone(), |                     flattened_documents_chunk.clone(), | ||||||
|                     indexer, |                     indexer, | ||||||
|                     faceted_fields, |                     faceted_fields, | ||||||
| @@ -309,7 +353,13 @@ fn send_and_extract_flattened_documents_data( | |||||||
|  |  | ||||||
|                 Ok(( |                 Ok(( | ||||||
|                     docid_fid_facet_numbers_chunk, |                     docid_fid_facet_numbers_chunk, | ||||||
|                     (docid_fid_facet_strings_chunk, fid_facet_exists_docids_chunk), |                     ( | ||||||
|  |                         docid_fid_facet_strings_chunk, | ||||||
|  |                         ( | ||||||
|  |                             fid_facet_is_null_docids_chunk, | ||||||
|  |                             (fid_facet_is_empty_docids_chunk, fid_facet_exists_docids_chunk), | ||||||
|  |                         ), | ||||||
|  |                     ), | ||||||
|                 )) |                 )) | ||||||
|             }, |             }, | ||||||
|         ); |         ); | ||||||
|   | |||||||
| @@ -1757,6 +1757,187 @@ mod tests { | |||||||
|         check_ok(&index); |         check_ok(&index); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn index_documents_check_is_null_database() { | ||||||
|  |         let content = || { | ||||||
|  |             documents!([ | ||||||
|  |                 { | ||||||
|  |                     "id": 0, | ||||||
|  |                     "colour": null, | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                     "id": 1, | ||||||
|  |                     "colour": [null], // must not be returned | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                     "id": 6, | ||||||
|  |                     "colour": { | ||||||
|  |                         "green": null | ||||||
|  |                     } | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                     "id": 7, | ||||||
|  |                     "colour": { | ||||||
|  |                         "green": { | ||||||
|  |                             "blue": null | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                     "id": 8, | ||||||
|  |                     "colour": 0, | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                     "id": 9, | ||||||
|  |                     "colour": [] | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                     "id": 10, | ||||||
|  |                     "colour": {} | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                     "id": 12, | ||||||
|  |                     "colour": [1] | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                     "id": 13 | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                     "id": 14, | ||||||
|  |                     "colour": { | ||||||
|  |                         "green": 1 | ||||||
|  |                     } | ||||||
|  |                 }, | ||||||
|  |                 { | ||||||
|  |                     "id": 15, | ||||||
|  |                     "colour": { | ||||||
|  |                         "green": { | ||||||
|  |                             "blue": [] | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             ]) | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         let check_ok = |index: &Index| { | ||||||
|  |             let rtxn = index.read_txn().unwrap(); | ||||||
|  |             let facets = index.faceted_fields(&rtxn).unwrap(); | ||||||
|  |             assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue"))); | ||||||
|  |  | ||||||
|  |             let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap(); | ||||||
|  |             let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap(); | ||||||
|  |             let colour_blue_id = | ||||||
|  |                 index.fields_ids_map(&rtxn).unwrap().id("colour.green.blue").unwrap(); | ||||||
|  |  | ||||||
|  |             let bitmap_null_colour = | ||||||
|  |                 index.facet_id_is_null_docids.get(&rtxn, &BEU16::new(colour_id)).unwrap().unwrap(); | ||||||
|  |             assert_eq!(bitmap_null_colour.into_iter().collect::<Vec<_>>(), vec![0]); | ||||||
|  |  | ||||||
|  |             let bitmap_colour_green = index | ||||||
|  |                 .facet_id_is_null_docids | ||||||
|  |                 .get(&rtxn, &BEU16::new(colour_green_id)) | ||||||
|  |                 .unwrap() | ||||||
|  |                 .unwrap(); | ||||||
|  |             assert_eq!(bitmap_colour_green.into_iter().collect::<Vec<_>>(), vec![2]); | ||||||
|  |  | ||||||
|  |             let bitmap_colour_blue = index | ||||||
|  |                 .facet_id_is_null_docids | ||||||
|  |                 .get(&rtxn, &BEU16::new(colour_blue_id)) | ||||||
|  |                 .unwrap() | ||||||
|  |                 .unwrap(); | ||||||
|  |             assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![3]); | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         let faceted_fields = hashset!(S("colour")); | ||||||
|  |  | ||||||
|  |         let index = TempIndex::new(); | ||||||
|  |         index.add_documents(content()).unwrap(); | ||||||
|  |         index | ||||||
|  |             .update_settings(|settings| { | ||||||
|  |                 settings.set_filterable_fields(faceted_fields.clone()); | ||||||
|  |             }) | ||||||
|  |             .unwrap(); | ||||||
|  |         check_ok(&index); | ||||||
|  |  | ||||||
|  |         let index = TempIndex::new(); | ||||||
|  |         index | ||||||
|  |             .update_settings(|settings| { | ||||||
|  |                 settings.set_filterable_fields(faceted_fields.clone()); | ||||||
|  |             }) | ||||||
|  |             .unwrap(); | ||||||
|  |         index.add_documents(content()).unwrap(); | ||||||
|  |         check_ok(&index); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn index_documents_check_is_empty_database() { | ||||||
|  |         let content = || { | ||||||
|  |             documents!([ | ||||||
|  |                 {"id": 0, "tags": null }, | ||||||
|  |                 {"id": 1, "tags": [null] }, | ||||||
|  |                 {"id": 2, "tags": [] }, | ||||||
|  |                 {"id": 3, "tags": ["hello","world"] }, | ||||||
|  |                 {"id": 4, "tags": [""] }, | ||||||
|  |                 {"id": 5 }, | ||||||
|  |                 {"id": 6, "tags": {} }, | ||||||
|  |                 {"id": 7, "tags": {"green": "cool"} }, | ||||||
|  |                 {"id": 8, "tags": {"green": ""} }, | ||||||
|  |                 {"id": 9, "tags": "" }, | ||||||
|  |                 {"id": 10, "tags": { "green": null } }, | ||||||
|  |                 {"id": 11, "tags": { "green": { "blue": null } } }, | ||||||
|  |                 {"id": 12, "tags": { "green": { "blue": [] } } } | ||||||
|  |             ]) | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         let check_ok = |index: &Index| { | ||||||
|  |             let rtxn = index.read_txn().unwrap(); | ||||||
|  |             let facets = index.faceted_fields(&rtxn).unwrap(); | ||||||
|  |             assert_eq!(facets, hashset!(S("tags"), S("tags.green"), S("tags.green.blue"))); | ||||||
|  |  | ||||||
|  |             let tags_id = index.fields_ids_map(&rtxn).unwrap().id("tags").unwrap(); | ||||||
|  |             let tags_green_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green").unwrap(); | ||||||
|  |             let tags_blue_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green.blue").unwrap(); | ||||||
|  |  | ||||||
|  |             let bitmap_empty_tags = | ||||||
|  |                 index.facet_id_is_empty_docids.get(&rtxn, &BEU16::new(tags_id)).unwrap().unwrap(); | ||||||
|  |             assert_eq!(bitmap_empty_tags.into_iter().collect::<Vec<_>>(), vec![2, 6, 9]); | ||||||
|  |  | ||||||
|  |             let bitmap_tags_green = index | ||||||
|  |                 .facet_id_is_empty_docids | ||||||
|  |                 .get(&rtxn, &BEU16::new(tags_green_id)) | ||||||
|  |                 .unwrap() | ||||||
|  |                 .unwrap(); | ||||||
|  |             assert_eq!(bitmap_tags_green.into_iter().collect::<Vec<_>>(), vec![8]); | ||||||
|  |  | ||||||
|  |             let bitmap_tags_blue = index | ||||||
|  |                 .facet_id_is_empty_docids | ||||||
|  |                 .get(&rtxn, &BEU16::new(tags_blue_id)) | ||||||
|  |                 .unwrap() | ||||||
|  |                 .unwrap(); | ||||||
|  |             assert_eq!(bitmap_tags_blue.into_iter().collect::<Vec<_>>(), vec![12]); | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         let faceted_fields = hashset!(S("tags")); | ||||||
|  |  | ||||||
|  |         let index = TempIndex::new(); | ||||||
|  |         index.add_documents(content()).unwrap(); | ||||||
|  |         index | ||||||
|  |             .update_settings(|settings| { | ||||||
|  |                 settings.set_filterable_fields(faceted_fields.clone()); | ||||||
|  |             }) | ||||||
|  |             .unwrap(); | ||||||
|  |         check_ok(&index); | ||||||
|  |  | ||||||
|  |         let index = TempIndex::new(); | ||||||
|  |         index | ||||||
|  |             .update_settings(|settings| { | ||||||
|  |                 settings.set_filterable_fields(faceted_fields.clone()); | ||||||
|  |             }) | ||||||
|  |             .unwrap(); | ||||||
|  |         index.add_documents(content()).unwrap(); | ||||||
|  |         check_ok(&index); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
|     fn primary_key_must_not_contain_floats() { |     fn primary_key_must_not_contain_floats() { | ||||||
|         let index = TempIndex::new_with_map_size(4096 * 100); |         let index = TempIndex::new_with_map_size(4096 * 100); | ||||||
|   | |||||||
| @@ -39,6 +39,8 @@ pub(crate) enum TypedChunk { | |||||||
|     FieldIdFacetStringDocids(grenad::Reader<File>), |     FieldIdFacetStringDocids(grenad::Reader<File>), | ||||||
|     FieldIdFacetNumberDocids(grenad::Reader<File>), |     FieldIdFacetNumberDocids(grenad::Reader<File>), | ||||||
|     FieldIdFacetExistsDocids(grenad::Reader<File>), |     FieldIdFacetExistsDocids(grenad::Reader<File>), | ||||||
|  |     FieldIdFacetIsNullDocids(grenad::Reader<File>), | ||||||
|  |     FieldIdFacetIsEmptyDocids(grenad::Reader<File>), | ||||||
|     GeoPoints(grenad::Reader<File>), |     GeoPoints(grenad::Reader<File>), | ||||||
|     ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>), |     ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>), | ||||||
| } | } | ||||||
| @@ -161,6 +163,28 @@ pub(crate) fn write_typed_chunk_into_index( | |||||||
|             )?; |             )?; | ||||||
|             is_merged_database = true; |             is_merged_database = true; | ||||||
|         } |         } | ||||||
|  |         TypedChunk::FieldIdFacetIsNullDocids(facet_id_is_null_docids) => { | ||||||
|  |             append_entries_into_database( | ||||||
|  |                 facet_id_is_null_docids, | ||||||
|  |                 &index.facet_id_is_null_docids, | ||||||
|  |                 wtxn, | ||||||
|  |                 index_is_empty, | ||||||
|  |                 |value, _buffer| Ok(value), | ||||||
|  |                 merge_cbo_roaring_bitmaps, | ||||||
|  |             )?; | ||||||
|  |             is_merged_database = true; | ||||||
|  |         } | ||||||
|  |         TypedChunk::FieldIdFacetIsEmptyDocids(facet_id_is_empty_docids) => { | ||||||
|  |             append_entries_into_database( | ||||||
|  |                 facet_id_is_empty_docids, | ||||||
|  |                 &index.facet_id_is_empty_docids, | ||||||
|  |                 wtxn, | ||||||
|  |                 index_is_empty, | ||||||
|  |                 |value, _buffer| Ok(value), | ||||||
|  |                 merge_cbo_roaring_bitmaps, | ||||||
|  |             )?; | ||||||
|  |             is_merged_database = true; | ||||||
|  |         } | ||||||
|         TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => { |         TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => { | ||||||
|             append_entries_into_database( |             append_entries_into_database( | ||||||
|                 word_pair_proximity_docids_iter, |                 word_pair_proximity_docids_iter, | ||||||
|   | |||||||
| @@ -82,10 +82,23 @@ test_filter!( | |||||||
|     vec![Left(vec!["tag=red", "tag=green"]), Left(vec!["asc_desc_rank<3", "asc_desc_rank<1"])] |     vec![Left(vec!["tag=red", "tag=green"]), Left(vec!["asc_desc_rank<3", "asc_desc_rank<1"])] | ||||||
| ); | ); | ||||||
| test_filter!(exists_filter_1, vec![Right("opt1 EXISTS")]); | test_filter!(exists_filter_1, vec![Right("opt1 EXISTS")]); | ||||||
|  | test_filter!(exists_filter_2, vec![Right("opt1.opt2 EXISTS")]); | ||||||
| test_filter!(exists_filter_1_not, vec![Right("opt1 NOT EXISTS")]); | test_filter!(exists_filter_1_not, vec![Right("opt1 NOT EXISTS")]); | ||||||
| test_filter!(exists_filter_1_not_alt, vec![Right("NOT opt1 EXISTS")]); | test_filter!(exists_filter_1_not_alt, vec![Right("NOT opt1 EXISTS")]); | ||||||
| test_filter!(exists_filter_1_double_not, vec![Right("NOT opt1 NOT EXISTS")]); | test_filter!(exists_filter_1_double_not, vec![Right("NOT opt1 NOT EXISTS")]); | ||||||
|  |  | ||||||
|  | test_filter!(null_filter_1, vec![Right("opt1 IS NULL")]); | ||||||
|  | test_filter!(null_filter_2, vec![Right("opt1.opt2 IS NULL")]); | ||||||
|  | test_filter!(null_filter_1_not, vec![Right("opt1 IS NOT NULL")]); | ||||||
|  | test_filter!(null_filter_1_not_alt, vec![Right("NOT opt1 IS NULL")]); | ||||||
|  | test_filter!(null_filter_1_double_not, vec![Right("NOT opt1 IS NOT NULL")]); | ||||||
|  |  | ||||||
|  | test_filter!(empty_filter_1, vec![Right("opt1 IS EMPTY")]); | ||||||
|  | test_filter!(empty_filter_2, vec![Right("opt1.opt2 IS EMPTY")]); | ||||||
|  | test_filter!(empty_filter_1_not, vec![Right("opt1 IS NOT EMPTY")]); | ||||||
|  | test_filter!(empty_filter_1_not_alt, vec![Right("NOT opt1 IS EMPTY")]); | ||||||
|  | test_filter!(empty_filter_1_double_not, vec![Right("NOT opt1 IS NOT EMPTY")]); | ||||||
|  |  | ||||||
| test_filter!(in_filter, vec![Right("tag_in IN[1, 2, 3, four, five]")]); | test_filter!(in_filter, vec![Right("tag_in IN[1, 2, 3, four, five]")]); | ||||||
| test_filter!(not_in_filter, vec![Right("tag_in NOT IN[1, 2, 3, four, five]")]); | test_filter!(not_in_filter, vec![Right("tag_in NOT IN[1, 2, 3, four, five]")]); | ||||||
| test_filter!(not_not_in_filter, vec![Right("NOT tag_in NOT IN[1, 2, 3, four, five]")]); | test_filter!(not_not_in_filter, vec![Right("NOT tag_in NOT IN[1, 2, 3, four, five]")]); | ||||||
|   | |||||||
| @@ -205,6 +205,30 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> { | |||||||
|         } else if let Some(opt1) = &document.opt1 { |         } else if let Some(opt1) = &document.opt1 { | ||||||
|             id = contains_key_rec(opt1, "opt2").then(|| document.id.clone()); |             id = contains_key_rec(opt1, "opt2").then(|| document.id.clone()); | ||||||
|         } |         } | ||||||
|  |     } else if matches!(filter, "opt1 IS NULL" | "NOT opt1 IS NOT NULL") { | ||||||
|  |         id = document.opt1.as_ref().map_or(false, |v| v.is_null()).then(|| document.id.clone()); | ||||||
|  |     } else if matches!(filter, "NOT opt1 IS NULL" | "opt1 IS NOT NULL") { | ||||||
|  |         id = document.opt1.as_ref().map_or(true, |v| !v.is_null()).then(|| document.id.clone()); | ||||||
|  |     } else if matches!(filter, "opt1.opt2 IS NULL") { | ||||||
|  |         if document.opt1opt2.as_ref().map_or(false, |v| v.is_null()) { | ||||||
|  |             id = Some(document.id.clone()); | ||||||
|  |         } else if let Some(opt1) = &document.opt1 { | ||||||
|  |             if !opt1.is_null() { | ||||||
|  |                 id = contains_null_rec(opt1, "opt2").then(|| document.id.clone()); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } else if matches!(filter, "opt1 IS EMPTY" | "NOT opt1 IS NOT EMPTY") { | ||||||
|  |         id = document.opt1.as_ref().map_or(false, is_empty_value).then(|| document.id.clone()); | ||||||
|  |     } else if matches!(filter, "NOT opt1 IS EMPTY" | "opt1 IS NOT EMPTY") { | ||||||
|  |         id = document | ||||||
|  |             .opt1 | ||||||
|  |             .as_ref() | ||||||
|  |             .map_or(true, |v| !is_empty_value(v)) | ||||||
|  |             .then(|| document.id.clone()); | ||||||
|  |     } else if matches!(filter, "opt1.opt2 IS EMPTY") { | ||||||
|  |         if document.opt1opt2.as_ref().map_or(false, is_empty_value) { | ||||||
|  |             id = Some(document.id.clone()); | ||||||
|  |         } | ||||||
|     } else if matches!( |     } else if matches!( | ||||||
|         filter, |         filter, | ||||||
|         "tag_in IN[1, 2, 3, four, five]" | "NOT tag_in NOT IN[1, 2, 3, four, five]" |         "tag_in IN[1, 2, 3, four, five]" | "NOT tag_in NOT IN[1, 2, 3, four, five]" | ||||||
| @@ -218,6 +242,15 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> { | |||||||
|     id |     id | ||||||
| } | } | ||||||
|  |  | ||||||
|  | pub fn is_empty_value(v: &serde_json::Value) -> bool { | ||||||
|  |     match v { | ||||||
|  |         serde_json::Value::String(s) => s.is_empty(), | ||||||
|  |         serde_json::Value::Array(a) => a.is_empty(), | ||||||
|  |         serde_json::Value::Object(o) => o.is_empty(), | ||||||
|  |         _ => false, | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| pub fn contains_key_rec(v: &serde_json::Value, key: &str) -> bool { | pub fn contains_key_rec(v: &serde_json::Value, key: &str) -> bool { | ||||||
|     match v { |     match v { | ||||||
|         serde_json::Value::Array(v) => { |         serde_json::Value::Array(v) => { | ||||||
| @@ -240,6 +273,28 @@ pub fn contains_key_rec(v: &serde_json::Value, key: &str) -> bool { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | pub fn contains_null_rec(v: &serde_json::Value, key: &str) -> bool { | ||||||
|  |     match v { | ||||||
|  |         serde_json::Value::Object(v) => { | ||||||
|  |             for (k, v) in v.iter() { | ||||||
|  |                 if k == key && v.is_null() || contains_null_rec(v, key) { | ||||||
|  |                     return true; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             false | ||||||
|  |         } | ||||||
|  |         serde_json::Value::Array(v) => { | ||||||
|  |             for v in v.iter() { | ||||||
|  |                 if contains_null_rec(v, key) { | ||||||
|  |                     return true; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             false | ||||||
|  |         } | ||||||
|  |         _ => false, | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| pub fn expected_filtered_ids(filters: Vec<Either<Vec<&str>, &str>>) -> HashSet<String> { | pub fn expected_filtered_ids(filters: Vec<Either<Vec<&str>, &str>>) -> HashSet<String> { | ||||||
|     let dataset: Vec<TestDocument> = |     let dataset: Vec<TestDocument> = | ||||||
|         serde_json::Deserializer::from_str(CONTENT).into_iter().map(|r| r.unwrap()).collect(); |         serde_json::Deserializer::from_str(CONTENT).into_iter().map(|r| r.unwrap()).collect(); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user