Commit Graph

11485 Commits

Author SHA1 Message Date
Loïc Lecrenier
ef75a77464 Fix undefined behaviour caused by reusing key from the database
New full snapshot:
---
source: milli/src/update/word_prefix_pair_proximity_docids.rs
---
5                a    1  [101, ]
5                a    2  [101, ]
5                am   1  [101, ]
5                b    4  [101, ]
5                be   4  [101, ]
am               a    3  [101, ]
amazing          a    1  [100, ]
amazing          a    2  [100, ]
amazing          a    3  [100, ]
amazing          an   1  [100, ]
amazing          an   2  [100, ]
amazing          b    2  [100, ]
amazing          be   2  [100, ]
an               a    1  [100, ]
an               a    2  [100, 202, ]
an               am   1  [100, ]
an               an   2  [100, ]
an               b    3  [100, ]
an               be   3  [100, ]
and              a    2  [100, ]
and              a    3  [100, ]
and              a    4  [100, ]
and              am   2  [100, ]
and              an   3  [100, ]
and              b    1  [100, ]
and              be   1  [100, ]
at               a    1  [100, 202, ]
at               a    2  [100, 101, ]
at               a    3  [100, ]
at               am   2  [100, 101, ]
at               an   1  [100, 202, ]
at               an   3  [100, ]
at               b    3  [101, ]
at               b    4  [100, ]
at               be   3  [101, ]
at               be   4  [100, ]
beautiful        a    2  [100, ]
beautiful        a    3  [100, ]
beautiful        a    4  [100, ]
beautiful        am   3  [100, ]
beautiful        an   2  [100, ]
beautiful        an   4  [100, ]
bell             a    2  [101, ]
bell             a    4  [101, ]
bell             am   4  [101, ]
extraordinary    a    2  [202, ]
extraordinary    a    3  [202, ]
extraordinary    an   2  [202, ]
house            a    3  [100, 202, ]
house            a    4  [100, 202, ]
house            am   4  [100, ]
house            an   3  [100, 202, ]
house            b    2  [100, ]
house            be   2  [100, ]
rings            a    1  [101, ]
rings            a    3  [101, ]
rings            am   3  [101, ]
rings            b    2  [101, ]
rings            be   2  [101, ]
the              a    3  [101, ]
the              b    1  [101, ]
the              be   1  [101, ]
2022-08-17 12:17:45 +02:00
Loïc Lecrenier
7309111433 Don't run block code in doc tests of word_pair_proximity_docids 2022-08-17 12:17:18 +02:00
Loïc Lecrenier
f6f8f543e1 Run cargo fmt 2022-08-17 12:17:18 +02:00
Loïc Lecrenier
34c991ea02 Add newlines in documentation of word_prefix_pair_proximity_docids 2022-08-17 12:17:18 +02:00
Loïc Lecrenier
06f3fd8c6d Add more comments to WordPrefixPairProximityDocids::execute 2022-08-17 12:17:18 +02:00
Loïc Lecrenier
474500362c Update wpppd snapshots
New snapshot (yes, it's wrong as well, it will get fixed later):

---
source: milli/src/update/word_prefix_pair_proximity_docids.rs
---
5                a    1  [101, ]
5                a    2  [101, ]
5                am   1  [101, ]
5                b    4  [101, ]
5                be   4  [101, ]
am               a    3  [101, ]
amazing          a    1  [100, ]
amazing          a    2  [100, ]
amazing          a    3  [100, ]
amazing          an   1  [100, ]
amazing          an   2  [100, ]
amazing          b    2  [100, ]
amazing          be   2  [100, ]
an               a    1  [100, ]
an               a    2  [100, 202, ]
an               am   1  [100, ]
an               b    3  [100, ]
an               be   3  [100, ]
and              a    2  [100, ]
and              a    3  [100, ]
and              a    4  [100, ]
and              b    1  [100, ]
and              be   1  [100, ]
                 d\0  0  [100, 202, ]
an               an   2  [100, ]
and              am   2  [100, ]
and              an   3  [100, ]
at               a    2  [100, 101, ]
at               a    3  [100, ]
at               am   2  [100, 101, ]
at               an   1  [100, 202, ]
at               an   3  [100, ]
at               b    3  [101, ]
at               b    4  [100, ]
at               be   3  [101, ]
at               be   4  [100, ]
beautiful        a    2  [100, ]
beautiful        a    3  [100, ]
beautiful        a    4  [100, ]
beautiful        am   3  [100, ]
beautiful        an   2  [100, ]
beautiful        an   4  [100, ]
bell             a    2  [101, ]
bell             a    4  [101, ]
bell             am   4  [101, ]
extraordinary    a    2  [202, ]
extraordinary    a    3  [202, ]
extraordinary    an   2  [202, ]
house            a    4  [100, 202, ]
house            a    4  [100, ]
house            am   4  [100, ]
house            an   3  [100, 202, ]
house            b    2  [100, ]
house            be   2  [100, ]
rings            a    1  [101, ]
rings            a    3  [101, ]
rings            am   3  [101, ]
rings            b    2  [101, ]
rings            be   2  [101, ]
the              a    3  [101, ]
the              b    1  [101, ]
the              be   1  [101, ]
2022-08-17 12:17:18 +02:00
Loïc Lecrenier
ea4a96761c Move content of readme for WordPrefixPairProximityDocids into the code 2022-08-17 12:05:37 +02:00
Loïc Lecrenier
220921628b Simplify and document WordPrefixPairProximityDocIds::execute 2022-08-17 11:59:19 +02:00
Loïc Lecrenier
044356d221 Optimise WordPrefixPairProximityDocIds merge operation 2022-08-17 11:59:18 +02:00
Loïc Lecrenier
d350114159 Add tests for WordPrefixPairProximityDocIds 2022-08-17 11:59:15 +02:00
Loïc Lecrenier
86807ca848 Refactor word prefix pair proximity indexation further 2022-08-17 11:59:13 +02:00
Loïc Lecrenier
306593144d Refactor word prefix pair proximity indexation 2022-08-17 11:59:00 +02:00
Loïc Lecrenier
5d59bfde8a Sort Cargo.toml dependencies 2022-08-17 11:46:56 +02:00
bors[bot]
f55034ed54 Merge #606
606: Make binaries faster on release profile through better compile options r=Kerollmops a=loiclec

Using `codegen-units = 1` and `lto = 'thin'` makes the compile time a bit longer, but also produces faster binaries.

I'd like to run milli's benchmark with these options, so that we can see whether it is worth enabling on meilisearch.

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-08-17 08:57:24 +00:00
Loïc Lecrenier
03e679b634 Make binaries faster on release profile through better compile options 2022-08-17 10:29:33 +02:00
Loïc Lecrenier
f20e588ec1 Make sure there is one newline at eof in cargo.toml 2022-08-17 07:44:33 +02:00
Loïc Lecrenier
20be69e1b9 Always use mimalloc as the global allocator 2022-08-16 20:09:36 +02:00
bors[bot]
22874ce300 Merge #2664
2664: 🐞 fix: Support https in print_launch_resume r=irevoire a=evpeople

fix #2660

# Pull Request

## What does this PR do?
Fixes #2660 
<!-- Please link the issue you're trying to fix with this PR, if none then please create an issue first. -->

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: evpeople <hangcaihui@gmail.com>
2022-08-16 14:40:20 +00:00
bors[bot]
b5f91b91c3 Merge #2523
2523: Improve the tasks error reporting when processed in batches r=irevoire a=Kerollmops

This fixes #2478 by changing the behavior of the task handler when there is an error in a batch of document addition or update.

What changes is that when there is a user error in a task in a batch we now report this task as failed with the right error message but we continue to process the other tasks. A user error can be when a geo field is invalid, a document id is invalid, or missing.

fixes #2582, #2478

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-08-16 14:15:30 +00:00
ManyTheFish
b6e6a08f7d Fix CI test 2022-08-16 15:14:01 +02:00
bors[bot]
8198bb9da2 Merge #2665
2665: 📎 makes clippy happy r=Kerollmops a=irevoire



Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-08-16 12:01:56 +00:00
bors[bot]
293a246af8 Merge #601
601: Introduce snapshot tests r=Kerollmops a=loiclec

# Pull Request
## What does this PR do?
Introduce snapshot tests into milli, by using the `insta` crate. This implements the idea described by #597 

See: [insta.rs](https://insta.rs)

## Design
There is now a new file, `snapshot_tests.rs`, which is compiled only under `#[cfg(test)]`. It exposes the `db_snap!` macro, which is used to snapshot the content of a database.

When running `cargo test`, `insta` will check that the value of the current snapshot is the same as the previous one (on the file system). If they are the same, the test passes. If they are different, the test fails and you are asked to review the new snapshot to approve or reject it.

We don't want to save very large snapshots to the file system, because it will pollute the git repository and increase its size too much. Instead, we only save their `md5` hashes under the name `<snapshot_name>.hash.snap`. There is a new environment variable called `MILLI_TEST_FULL_SNAPS` which can be set to `true` in order to *also* save the full content of the snapshot under the name `<snapshot_name>.full.snap`. However, snapshots with the extension `.full.snap` are never saved to the git repository.

## Example
```rust
// In e.g. facets.rs
#[test]
fn my_test() {
    // create an index
    let index = TempIndex::new():
    index.add_documents(...);
    index.update_settings(|settings| ...);
    
    // then snapshot the content of one of its databases
    // the snapshot will be saved at the current folder under facets.rs/my_test/facet_id_string_docids.snap
    db_snap!(index, facet_id_string_docids);

    index.add_documents(...);   

    // we can also name the snapshot to ensure there is no conflict
    // this snapshot will be saved at facets.rs/my_test/updated/facet_id_string_docids.snap
    db_snap!(index, facet_id_string, docids, "updated");
    
    // and we can also use "inline" snapshots, which insert their content in the given string literal
    db_snap!(index, field_distributions, `@"");`
    // once the snapshot is approved, it will automatically get transformed to, e.g.:
    // db_snap!(index, field_distributions, `@"`
    // my_facet        21
    // other_field     3
    // ");
    
    // now let's add **many** documents
    index.add_documents(...);
    
    // because the snapshot is too big, its hash is saved instead
    // if the MILLI_TEST_FULL_SNAPS env variable is set to true, then the full snapshot will also be saved
    // at facets.rs/my_test/large/facet_id_string_docids.full.snap
    db_snap!(index, facet_id_string_docids, "large", `@"5348bbc46b5384455b6a900666d2a502");`
}
```

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-08-16 11:57:09 +00:00
Loïc Lecrenier
dea00311b6 Add type annotations to remove compiler error 2022-08-16 09:19:30 +02:00
Irevoire
68a7d6bc61 reformat 2022-08-12 15:11:01 +02:00
Irevoire
83e20027fd 📎 makes clippy happy 2022-08-12 14:18:27 +02:00
evpeople
f21a4d61da 🌈 style(http/main.rs): 2022-08-12 16:16:23 +08:00
evpeople
12538d5a44 🐞 fix: Support https when print_lanuch_resume
fix #2660
2022-08-11 21:29:18 +08:00
ManyTheFish
ae174c2cca Fix task serialization 2022-08-11 13:35:35 +02:00
bors[bot]
e6b806e0cf Merge #2662
2662: Fix(cli): Clamp databases max size to a multiple of system page size r=Kerollmops a=ManyTheFish

fix #2659


Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-08-11 08:49:24 +00:00
ManyTheFish
cf955a77db Fix(cli): Clamp databases max size to a multiple of system page size
fix #2659
2022-08-11 10:44:47 +02:00
ManyTheFish
3a48de136e Add autobatching test 2022-08-10 17:02:29 +02:00
Loïc Lecrenier
fb2b6c0c28 Use mimalloc for benchmarks on all platforms 2022-08-10 16:56:42 +02:00
Loïc Lecrenier
6f49126223 Fix db_snap macro with inline parameter 2022-08-10 15:55:22 +02:00
Loïc Lecrenier
12920f2a4f Fix paths of snapshot tests 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
4b7fd4dfae Update insta version 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
ce560fdcb5 Add documentation for db_snap! 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
748bb86b5b cargo fmt 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
051f24f674 Switch to snapshot tests for search/matches/mod.rs 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
d2e01528a6 Switch to snapshot tests for search/criteria/typo.rs 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
a9c7d82693 Switch to snapshot tests for search/criteria/attribute.rs 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
4bba2f41d7 Switch to snapshot tests for query_tree.rs 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
8ac24d3114 Cargo fmt + fix compiler warnings/error 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
6066256689 Add snapshot tests for indexing of word_prefix_pair_proximity_docids 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
3a734af159 Add snapshot tests for Facets::execute 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
b9907997e4 Remove old snapshot tests code 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
ef889ade5d Refactor snapshot tests 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
334098a7e0 Add index snapshot test helper function 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
8f73251012 Use mimalloc for benchmarks on macOS 2022-08-10 13:30:56 +02:00
ManyTheFish
b389be48a0 Factorize phrase computation 2022-08-08 10:37:31 +02:00
bors[bot]
950d8e4c44 Merge #600
600: Simplify some unit tests r=ManyTheFish a=loiclec

# Pull Request

## What does this PR do?
Simplify the code that is used in unit tests to create and modify an index. Basically, the following code:
```rust
  let path = tempfile::tempdir().unwrap();
  let mut options = EnvOpenOptions::new();
  options.map_size(10 * 1024 * 1024); // 10 MB
  let index = Index::new(options, &path).unwrap();

  let mut wtxn = index.write_txn().unwrap();
  let content = documents!([
      { "id": 0, "name": "kevin" },
  ]);
  let config = IndexerConfig::default();
  let indexing_config = IndexDocumentsConfig::default();
  let builder =
      IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
  let (builder, user_error) = builder.add_documents(content).unwrap();
  user_error.unwrap();
  builder.execute().unwrap();
  wtxn.commit.unwrap();

  let mut wtxn = index.write_txn().unwrap();
  let config = IndexerConfig::default();
  let mut builder = Settings::new(&mut wtxn, &index, &config);
  builder.set_primary_key(S("docid"));
  builder.set_filterable_fields(hashset! { S("label") });
  builder.execute(|_| ()).unwrap();
  wtxn.commit().unwrap();
```
becomes:
```rust
let index = TempIndex::new():
index.add_documents(documents!(
      { "id": 0, "name": "kevin" },
)).unwrap();
index.update_settings(|settings| {
    settings.set_primary_key(S("docid"));
    settings.set_filterable_fields(hashset! { S("label") });
}).unwrap();
```

Then there is a bunch of options to modify the indexing configs, the map size, to reuse a transaction, etc. For example:
```rust
let mut index = TempIndex::new_with_map_size(1000 * 4096 * 10);
index.index_documents_config.autogenerate_docids = true;
let mut wtxn = index.write_txn().unwrap();
index.update_settings_using_wtxn(&mut wtxn, |settings| {
    settings.set_primary_key(S("docids"));
}).unwrap();
wtxn.commit().unwrap();
```

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
2022-08-04 10:19:42 +00:00