mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 09:56:28 +00:00 
			
		
		
		
	Merge #338
338: Fix string fields sorting r=Kerollmops a=shekhirin Resolves https://github.com/meilisearch/milli/issues/333 <details> <summary>curl checks</summary> ```console ➜ ~ curl -s 'localhost:7700/indexes/movies/search' -d '{"sort": ["title:asc"], "limit": 30}' | jq -r '.hits | map(.title)[]' #1 Cheerleader Camp #Horror #RealityHigh #Roxy #SquadGoals $5 a Day $9.99 '71 (2) (500) Days of Summer (Girl)Friend *batteries not included ...And God Created Woman ...And Justice for All ...E fuori nevica! .45 1 1 Mile To You 1 Night 10 10 Cloverfield Lane 10 giorni senza mamma 10 Items or Less 10 Rillington Place 10 Rules for Sleeping Around 10 Things I Hate About You 10 to Midnight 10 Years 10,000 BC 10,000 Saints ➜ ~ curl -s 'localhost:7700/indexes/movies/search' -d '{"sort": ["title:desc"], "limit": 30}' | jq -r '.hits | map(.title)[]' 크게 될 놈 왓칭 뷰티플 마인드 노무현과 바보들 ハニー Счастье – это… Часть 2 СОТКА Смотри мою любовь Позивний 'Бандерас' Лошо момиче Күлүк Хомус Куда течет море Каникулы президента Ακίνητο Ποτάμι Üç Harfliler: Beddua È nata una Star? Æon Flux Ága À propos de Nice À Nos Amours À l'aventure ¡Three Amigos! Zulu Dawn Zulu Zulu Zu: Warriors from the Magic Mountain Zu Warriors Zorro Zorba the Greek Zootopia ``` </details> Co-authored-by: Alexey Shekhirin <a.shekhirin@gmail.com>
This commit is contained in:
		@@ -243,24 +243,27 @@ impl<'t> Iterator for FacetStringGroupRevRange<'t> {
 | 
				
			|||||||
    type Item = heed::Result<((NonZeroU8, u32, u32), (Option<(&'t str, &'t str)>, RoaringBitmap))>;
 | 
					    type Item = heed::Result<((NonZeroU8, u32, u32), (Option<(&'t str, &'t str)>, RoaringBitmap))>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fn next(&mut self) -> Option<Self::Item> {
 | 
					    fn next(&mut self) -> Option<Self::Item> {
 | 
				
			||||||
        match self.iter.next() {
 | 
					        loop {
 | 
				
			||||||
            Some(Ok(((_fid, level, left, right), docids))) => {
 | 
					            match self.iter.next() {
 | 
				
			||||||
                let must_be_returned = match self.end {
 | 
					                Some(Ok(((_fid, level, left, right), docids))) => {
 | 
				
			||||||
                    Included(end) => right <= end,
 | 
					                    let must_be_returned = match self.end {
 | 
				
			||||||
                    Excluded(end) => right < end,
 | 
					                        Included(end) => right <= end,
 | 
				
			||||||
                    Unbounded => true,
 | 
					                        Excluded(end) => right < end,
 | 
				
			||||||
                };
 | 
					                        Unbounded => true,
 | 
				
			||||||
                if must_be_returned {
 | 
					                    };
 | 
				
			||||||
                    match docids.decode() {
 | 
					                    if must_be_returned {
 | 
				
			||||||
                        Ok((bounds, docids)) => Some(Ok(((level, left, right), (bounds, docids)))),
 | 
					                        match docids.decode() {
 | 
				
			||||||
                        Err(e) => Some(Err(e)),
 | 
					                            Ok((bounds, docids)) => {
 | 
				
			||||||
 | 
					                                return Some(Ok(((level, left, right), (bounds, docids))))
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                            Err(e) => return Some(Err(e)),
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                } else {
 | 
					                    continue;
 | 
				
			||||||
                    None
 | 
					 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					                Some(Err(e)) => return Some(Err(e)),
 | 
				
			||||||
 | 
					                None => return None,
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            Some(Err(e)) => Some(Err(e)),
 | 
					 | 
				
			||||||
            None => None,
 | 
					 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -545,18 +548,18 @@ impl<'t> Iterator for FacetStringIter<'t> {
 | 
				
			|||||||
            // the algorithm less complex to understand.
 | 
					            // the algorithm less complex to understand.
 | 
				
			||||||
            let last = match last {
 | 
					            let last = match last {
 | 
				
			||||||
                Left(ascending) => match ascending {
 | 
					                Left(ascending) => match ascending {
 | 
				
			||||||
                    Left(last) => Left(Left(last)),
 | 
					                    Left(group) => Left(Left(group)),
 | 
				
			||||||
                    Right(last) => Right(Left(last)),
 | 
					                    Right(zero_level) => Right(Left(zero_level)),
 | 
				
			||||||
                },
 | 
					                },
 | 
				
			||||||
                Right(descending) => match descending {
 | 
					                Right(descending) => match descending {
 | 
				
			||||||
                    Left(last) => Left(Right(last)),
 | 
					                    Left(group) => Left(Right(group)),
 | 
				
			||||||
                    Right(last) => Right(Right(last)),
 | 
					                    Right(zero_level) => Right(Right(zero_level)),
 | 
				
			||||||
                },
 | 
					                },
 | 
				
			||||||
            };
 | 
					            };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            match last {
 | 
					            match last {
 | 
				
			||||||
                Left(last) => {
 | 
					                Left(group) => {
 | 
				
			||||||
                    for result in last {
 | 
					                    for result in group {
 | 
				
			||||||
                        match result {
 | 
					                        match result {
 | 
				
			||||||
                            Ok(((level, left, right), (string_bounds, mut docids))) => {
 | 
					                            Ok(((level, left, right), (string_bounds, mut docids))) => {
 | 
				
			||||||
                                docids &= &*documents_ids;
 | 
					                                docids &= &*documents_ids;
 | 
				
			||||||
@@ -566,6 +569,27 @@ impl<'t> Iterator for FacetStringIter<'t> {
 | 
				
			|||||||
                                    }
 | 
					                                    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                                    let result = if is_ascending {
 | 
					                                    let result = if is_ascending {
 | 
				
			||||||
 | 
					                                        match string_bounds {
 | 
				
			||||||
 | 
					                                            Some((left, right)) => FacetStringLevelZeroRange::new(
 | 
				
			||||||
 | 
					                                                self.rtxn,
 | 
				
			||||||
 | 
					                                                self.db,
 | 
				
			||||||
 | 
					                                                self.field_id,
 | 
				
			||||||
 | 
					                                                Included(left),
 | 
				
			||||||
 | 
					                                                Included(right),
 | 
				
			||||||
 | 
					                                            )
 | 
				
			||||||
 | 
					                                            .map(Right),
 | 
				
			||||||
 | 
					                                            None => FacetStringGroupRange::new(
 | 
				
			||||||
 | 
					                                                self.rtxn,
 | 
				
			||||||
 | 
					                                                self.db,
 | 
				
			||||||
 | 
					                                                self.field_id,
 | 
				
			||||||
 | 
					                                                NonZeroU8::new(level.get() - 1).unwrap(),
 | 
				
			||||||
 | 
					                                                Included(left),
 | 
				
			||||||
 | 
					                                                Included(right),
 | 
				
			||||||
 | 
					                                            )
 | 
				
			||||||
 | 
					                                            .map(Left),
 | 
				
			||||||
 | 
					                                        }
 | 
				
			||||||
 | 
					                                        .map(Left)
 | 
				
			||||||
 | 
					                                    } else {
 | 
				
			||||||
                                        match string_bounds {
 | 
					                                        match string_bounds {
 | 
				
			||||||
                                            Some((left, right)) => {
 | 
					                                            Some((left, right)) => {
 | 
				
			||||||
                                                FacetStringLevelZeroRevRange::new(
 | 
					                                                FacetStringLevelZeroRevRange::new(
 | 
				
			||||||
@@ -588,27 +612,6 @@ impl<'t> Iterator for FacetStringIter<'t> {
 | 
				
			|||||||
                                            .map(Left),
 | 
					                                            .map(Left),
 | 
				
			||||||
                                        }
 | 
					                                        }
 | 
				
			||||||
                                        .map(Right)
 | 
					                                        .map(Right)
 | 
				
			||||||
                                    } else {
 | 
					 | 
				
			||||||
                                        match string_bounds {
 | 
					 | 
				
			||||||
                                            Some((left, right)) => FacetStringLevelZeroRange::new(
 | 
					 | 
				
			||||||
                                                self.rtxn,
 | 
					 | 
				
			||||||
                                                self.db,
 | 
					 | 
				
			||||||
                                                self.field_id,
 | 
					 | 
				
			||||||
                                                Included(left),
 | 
					 | 
				
			||||||
                                                Included(right),
 | 
					 | 
				
			||||||
                                            )
 | 
					 | 
				
			||||||
                                            .map(Right),
 | 
					 | 
				
			||||||
                                            None => FacetStringGroupRange::new(
 | 
					 | 
				
			||||||
                                                self.rtxn,
 | 
					 | 
				
			||||||
                                                self.db,
 | 
					 | 
				
			||||||
                                                self.field_id,
 | 
					 | 
				
			||||||
                                                NonZeroU8::new(level.get() - 1).unwrap(),
 | 
					 | 
				
			||||||
                                                Included(left),
 | 
					 | 
				
			||||||
                                                Included(right),
 | 
					 | 
				
			||||||
                                            )
 | 
					 | 
				
			||||||
                                            .map(Left),
 | 
					 | 
				
			||||||
                                        }
 | 
					 | 
				
			||||||
                                        .map(Left)
 | 
					 | 
				
			||||||
                                    };
 | 
					                                    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                                    match result {
 | 
					                                    match result {
 | 
				
			||||||
@@ -624,9 +627,9 @@ impl<'t> Iterator for FacetStringIter<'t> {
 | 
				
			|||||||
                        }
 | 
					                        }
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                Right(last) => {
 | 
					                Right(zero_level) => {
 | 
				
			||||||
                    // level zero only
 | 
					                    // level zero only
 | 
				
			||||||
                    for result in last {
 | 
					                    for result in zero_level {
 | 
				
			||||||
                        match result {
 | 
					                        match result {
 | 
				
			||||||
                            Ok((normalized, original, mut docids)) => {
 | 
					                            Ok((normalized, original, mut docids)) => {
 | 
				
			||||||
                                docids &= &*documents_ids;
 | 
					                                docids &= &*documents_ids;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,6 +1,12 @@
 | 
				
			|||||||
 | 
					use std::cmp::Reverse;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use big_s::S;
 | 
					use big_s::S;
 | 
				
			||||||
use milli::update::Settings;
 | 
					use heed::EnvOpenOptions;
 | 
				
			||||||
use milli::{AscDesc, Criterion, Search, SearchResult};
 | 
					use itertools::Itertools;
 | 
				
			||||||
 | 
					use maplit::hashset;
 | 
				
			||||||
 | 
					use milli::update::{Settings, UpdateBuilder, UpdateFormat};
 | 
				
			||||||
 | 
					use milli::{AscDesc, Criterion, Index, Search, SearchResult};
 | 
				
			||||||
 | 
					use rand::Rng;
 | 
				
			||||||
use Criterion::*;
 | 
					use Criterion::*;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
 | 
					use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
 | 
				
			||||||
@@ -9,6 +15,7 @@ const ALLOW_TYPOS: bool = true;
 | 
				
			|||||||
const DISALLOW_TYPOS: bool = false;
 | 
					const DISALLOW_TYPOS: bool = false;
 | 
				
			||||||
const ALLOW_OPTIONAL_WORDS: bool = true;
 | 
					const ALLOW_OPTIONAL_WORDS: bool = true;
 | 
				
			||||||
const DISALLOW_OPTIONAL_WORDS: bool = false;
 | 
					const DISALLOW_OPTIONAL_WORDS: bool = false;
 | 
				
			||||||
 | 
					const ASC_DESC_CANDIDATES_THRESHOLD: usize = 1000;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
macro_rules! test_criterion {
 | 
					macro_rules! test_criterion {
 | 
				
			||||||
    ($func:ident, $optional_word:ident, $authorize_typos:ident, $criteria:expr, $sort_criteria:expr) => {
 | 
					    ($func:ident, $optional_word:ident, $authorize_typos:ident, $criteria:expr, $sort_criteria:expr) => {
 | 
				
			||||||
@@ -357,3 +364,93 @@ fn criteria_mixup() {
 | 
				
			|||||||
        assert_eq!(documents_ids, expected_external_ids);
 | 
					        assert_eq!(documents_ids, expected_external_ids);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[test]
 | 
				
			||||||
 | 
					fn criteria_ascdesc() {
 | 
				
			||||||
 | 
					    let path = tempfile::tempdir().unwrap();
 | 
				
			||||||
 | 
					    let mut options = EnvOpenOptions::new();
 | 
				
			||||||
 | 
					    options.map_size(10 * 1024 * 1024); // 10 MB
 | 
				
			||||||
 | 
					    let index = Index::new(options, &path).unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let mut wtxn = index.write_txn().unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let mut builder = Settings::new(&mut wtxn, &index, 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    builder.set_sortable_fields(hashset! {
 | 
				
			||||||
 | 
					        S("name"),
 | 
				
			||||||
 | 
					        S("age"),
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					    builder.execute(|_, _| ()).unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // index documents
 | 
				
			||||||
 | 
					    let mut builder = UpdateBuilder::new(0);
 | 
				
			||||||
 | 
					    builder.max_memory(10 * 1024 * 1024); // 10MiB
 | 
				
			||||||
 | 
					    let mut builder = builder.index_documents(&mut wtxn, &index);
 | 
				
			||||||
 | 
					    builder.update_format(UpdateFormat::Csv);
 | 
				
			||||||
 | 
					    builder.enable_autogenerate_docids();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let content = [
 | 
				
			||||||
 | 
					        vec![S("name,age")],
 | 
				
			||||||
 | 
					        (0..ASC_DESC_CANDIDATES_THRESHOLD + 1)
 | 
				
			||||||
 | 
					            .map(|_| {
 | 
				
			||||||
 | 
					                let mut rng = rand::thread_rng();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                let age = rng.gen::<u32>().to_string();
 | 
				
			||||||
 | 
					                let name = rng
 | 
				
			||||||
 | 
					                    .sample_iter(&rand::distributions::Alphanumeric)
 | 
				
			||||||
 | 
					                    .map(char::from)
 | 
				
			||||||
 | 
					                    .filter(|c| *c >= 'a' && *c <= 'z')
 | 
				
			||||||
 | 
					                    .take(10)
 | 
				
			||||||
 | 
					                    .collect::<String>();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                format!("{},{}", name, age)
 | 
				
			||||||
 | 
					            })
 | 
				
			||||||
 | 
					            .collect::<Vec<_>>(),
 | 
				
			||||||
 | 
					    ]
 | 
				
			||||||
 | 
					    .iter()
 | 
				
			||||||
 | 
					    .flatten()
 | 
				
			||||||
 | 
					    .join("\n");
 | 
				
			||||||
 | 
					    builder.execute(content.as_bytes(), |_, _| ()).unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    wtxn.commit().unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let rtxn = index.read_txn().unwrap();
 | 
				
			||||||
 | 
					    let documents = index.all_documents(&rtxn).unwrap().map(|doc| doc.unwrap()).collect::<Vec<_>>();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for criterion in [Asc(S("name")), Desc(S("name")), Asc(S("age")), Desc(S("age"))] {
 | 
				
			||||||
 | 
					        eprintln!("Testing with criterion: {:?}", &criterion);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let mut wtxn = index.write_txn().unwrap();
 | 
				
			||||||
 | 
					        let mut builder = Settings::new(&mut wtxn, &index, 0);
 | 
				
			||||||
 | 
					        builder.set_criteria(vec![criterion.to_string()]);
 | 
				
			||||||
 | 
					        builder.execute(|_, _| ()).unwrap();
 | 
				
			||||||
 | 
					        wtxn.commit().unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let mut rtxn = index.read_txn().unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let mut search = Search::new(&mut rtxn, &index);
 | 
				
			||||||
 | 
					        search.limit(ASC_DESC_CANDIDATES_THRESHOLD + 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let SearchResult { documents_ids, .. } = search.execute().unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let expected_document_ids = match criterion {
 | 
				
			||||||
 | 
					            Asc(field_name) if field_name == "name" => {
 | 
				
			||||||
 | 
					                documents.iter().sorted_by_key(|(_, obkv)| obkv.get(0).unwrap())
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Desc(field_name) if field_name == "name" => {
 | 
				
			||||||
 | 
					                documents.iter().sorted_by_key(|(_, obkv)| Reverse(obkv.get(0).unwrap()))
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Asc(field_name) if field_name == "name" => {
 | 
				
			||||||
 | 
					                documents.iter().sorted_by_key(|(_, obkv)| obkv.get(1).unwrap())
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            Desc(field_name) if field_name == "name" => {
 | 
				
			||||||
 | 
					                documents.iter().sorted_by_key(|(_, obkv)| Reverse(obkv.get(1).unwrap()))
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            _ => continue,
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        .map(|(id, _)| *id)
 | 
				
			||||||
 | 
					        .collect::<Vec<_>>();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        assert_eq!(documents_ids, expected_document_ids);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user