mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-23 05:06:11 +00:00
Compare commits
3 Commits
codex/add-
...
try-roarin
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f747964f65 | ||
|
|
47f6ab1279 | ||
|
|
67b08a180f |
@@ -1,26 +1,28 @@
|
||||
---
|
||||
name: New feature issue
|
||||
about: ⚠️ Should only be used by the internal Meili team ⚠️
|
||||
name: New sprint issue
|
||||
about: ⚠️ Should only be used by the engine team ⚠️
|
||||
title: ''
|
||||
labels: 'impacts docs, impacts integrations'
|
||||
labels: 'missing usage in PRD, impacts docs'
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
Related product team resources: [PRD]() (_internal only_)
|
||||
Related product discussion:
|
||||
|
||||
## Motivation
|
||||
|
||||
<!---Copy/paste the information in PRD or briefly detail the product motivation. Ask product team if any hesitation.-->
|
||||
|
||||
## Usage
|
||||
|
||||
<!---Link to the public part of the PRD, or to the related product discussion for experimental features-->
|
||||
|
||||
TBD
|
||||
|
||||
## TODO
|
||||
|
||||
<!---If necessary, create a list with technical/product steps-->
|
||||
|
||||
### Are you modifying a database?
|
||||
|
||||
- [ ] If not, add the `no db change` label to your PR, and you're good to merge.
|
||||
- [ ] If yes, add the `db change` label to your PR. You'll receive a message explaining you what to do.
|
||||
|
||||
@@ -52,5 +54,5 @@ TBD
|
||||
|
||||
## Impacted teams
|
||||
|
||||
<!---Ping the related teams. Ask on Slack if any hesitation-->
|
||||
<!---@meilisearch/docs-team and @meilisearch/integration-team when there is any API change, e.g. settings addition-->
|
||||
<!---Ping the related teams. Ask for the engine manager if any hesitation-->
|
||||
<!---@meilisearch/docs-team when there is any API change, e.g. settings addition-->
|
||||
16
.github/pull_request_template.md
vendored
16
.github/pull_request_template.md
vendored
@@ -1,16 +0,0 @@
|
||||
## Related issue
|
||||
|
||||
Fixes #...
|
||||
|
||||
## Requirements
|
||||
|
||||
⚠️ Ensure the following requirements before merging ⚠️
|
||||
- [ ] Automated tests have been added.
|
||||
- [ ] If some tests cannot be automated, manual rigorous tests should be applied.
|
||||
- [ ] ⚠️ If there is any change in the DB:
|
||||
- [ ] Test that any impacted DB still works as expected after using `--experimental-dumpless-upgrade` on a DB created with the last released Meilisearch
|
||||
- [ ] Test that during the upgrade, **search is still available** (artificially make the upgrade longer if needed)
|
||||
- [ ] Set the `db change` label.
|
||||
- [ ] If necessary, the feature have been tested in the Cloud production environment (with [prototypes](./documentation/prototypes.md)) and the Cloud UI is ready.
|
||||
- [ ] If necessary, the [documentation](https://github.com/meilisearch/documentation) related to the implemented feature in the PR is ready.
|
||||
- [ ] If necessary, the [integrations](https://github.com/meilisearch/integration-guides) related to the implemented feature in the PR are ready.
|
||||
33
.github/release-draft-template.yml
vendored
33
.github/release-draft-template.yml
vendored
@@ -1,33 +0,0 @@
|
||||
name-template: 'v$RESOLVED_VERSION'
|
||||
tag-template: 'v$RESOLVED_VERSION'
|
||||
exclude-labels:
|
||||
- 'skip changelog'
|
||||
version-resolver:
|
||||
minor:
|
||||
labels:
|
||||
- 'enhancement'
|
||||
default: patch
|
||||
categories:
|
||||
- title: '⚠️ Breaking changes'
|
||||
label: 'breaking-change'
|
||||
- title: '🚀 Enhancements'
|
||||
label: 'enhancement'
|
||||
- title: '🐛 Bug Fixes'
|
||||
label: 'bug'
|
||||
- title: '🔒 Security'
|
||||
label: 'security'
|
||||
- title: '⚙️ Maintenance/misc'
|
||||
label:
|
||||
- 'maintenance'
|
||||
- 'documentation'
|
||||
template: |
|
||||
$CHANGES
|
||||
|
||||
❤️ Huge thanks to our contributors: $CONTRIBUTORS.
|
||||
no-changes-template: 'Changes are coming soon 😎'
|
||||
sort-direction: 'ascending'
|
||||
replacers:
|
||||
- search: '/(?:and )?@dependabot-preview(?:\[bot\])?,?/g'
|
||||
replace: ''
|
||||
- search: '/(?:and )?@dependabot(?:\[bot\])?,?/g'
|
||||
replace: ''
|
||||
22
.github/templates/dependency-issue.md
vendored
22
.github/templates/dependency-issue.md
vendored
@@ -1,22 +0,0 @@
|
||||
This issue is about updating Meilisearch dependencies:
|
||||
- [ ] Update Meilisearch dependencies with the help of `cargo +nightly udeps --all-targets` (remove unused dependencies) and `cargo upgrade` (upgrade dependencies versions) - ⚠️ Some repositories may contain subdirectories (like heed, charabia, or deserr). Take care of updating these in the main crate as well. This won't be done automatically by `cargo upgrade`.
|
||||
- [ ] [deserr](https://github.com/meilisearch/deserr)
|
||||
- [ ] [charabia](https://github.com/meilisearch/charabia/)
|
||||
- [ ] [heed](https://github.com/meilisearch/heed/)
|
||||
- [ ] [roaring-rs](https://github.com/RoaringBitmap/roaring-rs/)
|
||||
- [ ] [obkv](https://github.com/meilisearch/obkv)
|
||||
- [ ] [grenad](https://github.com/meilisearch/grenad/)
|
||||
- [ ] [arroy](https://github.com/meilisearch/arroy/)
|
||||
- [ ] [segment](https://github.com/meilisearch/segment)
|
||||
- [ ] [bumparaw-collections](https://github.com/meilisearch/bumparaw-collections)
|
||||
- [ ] [bbqueue](https://github.com/meilisearch/bbqueue)
|
||||
- [ ] Finally, [Meilisearch](https://github.com/meilisearch/MeiliSearch)
|
||||
- [ ] If new Rust versions have been released, update the minimal Rust version in use at Meilisearch:
|
||||
- [ ] in this [GitHub Action file](https://github.com/meilisearch/meilisearch/blob/main/.github/workflows/test-suite.yml), by changing the `toolchain` field of the `rustfmt` job to the latest available nightly (of the day before or the current day).
|
||||
- [ ] in every [GitHub Action files](https://github.com/meilisearch/meilisearch/blob/main/.github/workflows), by changing all the `dtolnay/rust-toolchain@` references to use the latest stable version.
|
||||
- [ ] in this [`rust-toolchain.toml`](https://github.com/meilisearch/meilisearch/blob/main/rust-toolchain.toml), by changing the `channel` field to the latest stable version.
|
||||
- [ ] in the [Dockerfile](https://github.com/meilisearch/meilisearch/blob/main/Dockerfile), by changing the base image to `rust:<target_rust_version>-alpine<alpine_version>`. Check that the image exists on [Dockerhub](https://hub.docker.com/_/rust/tags?page=1&name=alpine). Also, build and run the image to check everything still works!
|
||||
|
||||
⚠️ This issue should be prioritized to avoid any deprecation and vulnerability issues.
|
||||
|
||||
The GitHub action dependencies are managed by [Dependabot](https://github.com/meilisearch/meilisearch/blob/main/.github/dependabot.yml), so no need to update them when solving this issue.
|
||||
100
.github/workflows/check-valid-milestone.yml
vendored
Normal file
100
.github/workflows/check-valid-milestone.yml
vendored
Normal file
@@ -0,0 +1,100 @@
|
||||
name: PR Milestone Check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, reopened, edited, synchronize, milestoned, demilestoned]
|
||||
branches:
|
||||
- "main"
|
||||
- "release-v*.*.*"
|
||||
|
||||
jobs:
|
||||
check-milestone:
|
||||
name: Check PR Milestone
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Validate PR milestone
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
// Get PR number directly from the event payload
|
||||
const prNumber = context.payload.pull_request.number;
|
||||
|
||||
// Get PR details
|
||||
const { data: prData } = await github.rest.pulls.get({
|
||||
owner: 'meilisearch',
|
||||
repo: 'meilisearch',
|
||||
pull_number: prNumber
|
||||
});
|
||||
|
||||
// Get base branch name
|
||||
const baseBranch = prData.base.ref;
|
||||
console.log(`Base branch: ${baseBranch}`);
|
||||
|
||||
// Get PR milestone
|
||||
const prMilestone = prData.milestone;
|
||||
if (!prMilestone) {
|
||||
core.setFailed('PR must have a milestone assigned');
|
||||
return;
|
||||
}
|
||||
console.log(`PR milestone: ${prMilestone.title}`);
|
||||
|
||||
// Validate milestone format: vx.y.z
|
||||
const milestoneRegex = /^v\d+\.\d+\.\d+$/;
|
||||
if (!milestoneRegex.test(prMilestone.title)) {
|
||||
core.setFailed(`Milestone "${prMilestone.title}" does not follow the required format vx.y.z`);
|
||||
return;
|
||||
}
|
||||
|
||||
// For main branch PRs, check if the milestone is the highest one
|
||||
if (baseBranch === 'main') {
|
||||
// Get all milestones
|
||||
const { data: milestones } = await github.rest.issues.listMilestones({
|
||||
owner: 'meilisearch',
|
||||
repo: 'meilisearch',
|
||||
state: 'open',
|
||||
sort: 'due_on',
|
||||
direction: 'desc'
|
||||
});
|
||||
|
||||
// Sort milestones by version number (vx.y.z)
|
||||
const sortedMilestones = milestones
|
||||
.filter(m => milestoneRegex.test(m.title))
|
||||
.sort((a, b) => {
|
||||
const versionA = a.title.substring(1).split('.').map(Number);
|
||||
const versionB = b.title.substring(1).split('.').map(Number);
|
||||
|
||||
// Compare major version
|
||||
if (versionA[0] !== versionB[0]) return versionB[0] - versionA[0];
|
||||
// Compare minor version
|
||||
if (versionA[1] !== versionB[1]) return versionB[1] - versionA[1];
|
||||
// Compare patch version
|
||||
return versionB[2] - versionA[2];
|
||||
});
|
||||
|
||||
if (sortedMilestones.length === 0) {
|
||||
core.setFailed('No valid milestones found in the repository. Please create at least one milestone with the format vx.y.z');
|
||||
return;
|
||||
}
|
||||
|
||||
const highestMilestone = sortedMilestones[0];
|
||||
console.log(`Highest milestone: ${highestMilestone.title}`);
|
||||
|
||||
if (prMilestone.title !== highestMilestone.title) {
|
||||
core.setFailed(`PRs targeting the main branch must use the highest milestone (${highestMilestone.title}), but this PR uses ${prMilestone.title}`);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// For release branches, the milestone should match the branch version
|
||||
const branchVersion = baseBranch.substring(8); // remove 'release-'
|
||||
if (prMilestone.title !== branchVersion) {
|
||||
core.setFailed(`PRs targeting release branch "${baseBranch}" must use the matching milestone "${branchVersion}", but this PR uses "${prMilestone.title}"`);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
console.log('PR milestone validation passed!');
|
||||
2
.github/workflows/dependency-issue.yml
vendored
2
.github/workflows/dependency-issue.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Download the issue template
|
||||
run: curl -s https://raw.githubusercontent.com/meilisearch/meilisearch/main/.github/templates/dependency-issue.md > $ISSUE_TEMPLATE
|
||||
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/dependency-issue.md > $ISSUE_TEMPLATE
|
||||
- name: Create issue
|
||||
run: |
|
||||
gh issue create \
|
||||
|
||||
2
.github/workflows/flaky-tests.yml
vendored
2
.github/workflows/flaky-tests.yml
vendored
@@ -3,7 +3,7 @@ name: Look for flaky tests
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '0 4 * * *' # Every day at 4:00AM
|
||||
- cron: "0 12 * * FRI" # Every Friday at 12:00PM
|
||||
|
||||
jobs:
|
||||
flaky:
|
||||
|
||||
224
.github/workflows/milestone-workflow.yml
vendored
Normal file
224
.github/workflows/milestone-workflow.yml
vendored
Normal file
@@ -0,0 +1,224 @@
|
||||
name: Milestone's workflow
|
||||
|
||||
# /!\ No git flow are handled here
|
||||
|
||||
# For each Milestone created (not opened!), and if the release is NOT a patch release (only the patch changed)
|
||||
# - the roadmap issue is created, see https://github.com/meilisearch/engine-team/blob/main/issue-templates/roadmap-issue.md
|
||||
# - the changelog issue is created, see https://github.com/meilisearch/engine-team/blob/main/issue-templates/changelog-issue.md
|
||||
# - update the ruleset to add the current release version to the list of allowed versions and be able to use the merge queue.
|
||||
|
||||
# For each Milestone closed
|
||||
# - the `release_version` label is created
|
||||
# - this label is applied to all issues/PRs in the Milestone
|
||||
|
||||
on:
|
||||
milestone:
|
||||
types: [created, closed]
|
||||
|
||||
env:
|
||||
MILESTONE_VERSION: ${{ github.event.milestone.title }}
|
||||
MILESTONE_URL: ${{ github.event.milestone.html_url }}
|
||||
MILESTONE_DUE_ON: ${{ github.event.milestone.due_on }}
|
||||
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
|
||||
jobs:
|
||||
# -----------------
|
||||
# MILESTONE CREATED
|
||||
# -----------------
|
||||
|
||||
get-release-version:
|
||||
if: github.event.action == 'created'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
is-patch: ${{ steps.check-patch.outputs.is-patch }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Check if this release is a patch release only
|
||||
id: check-patch
|
||||
run: |
|
||||
echo version: $MILESTONE_VERSION
|
||||
if [[ $MILESTONE_VERSION =~ ^v[0-9]+\.[0-9]+\.0$ ]]; then
|
||||
echo 'This is NOT a patch release'
|
||||
echo "is-patch=false" >> $GITHUB_OUTPUT
|
||||
elif [[ $MILESTONE_VERSION =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
|
||||
echo 'This is a patch release'
|
||||
echo "is-patch=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "Not a valid format of release, check the Milestone's title."
|
||||
echo 'Should be vX.Y.Z'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
create-roadmap-issue:
|
||||
needs: get-release-version
|
||||
# Create the roadmap issue if the release is not only a patch release
|
||||
if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false'
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
ISSUE_TEMPLATE: issue-template.md
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Download the issue template
|
||||
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/roadmap-issue.md > $ISSUE_TEMPLATE
|
||||
- name: Replace all empty occurrences in the templates
|
||||
run: |
|
||||
# Replace all <<version>> occurrences
|
||||
sed -i "s/<<version>>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE
|
||||
|
||||
# Replace all <<milestone_id>> occurrences
|
||||
milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7)
|
||||
sed -i "s/<<milestone_id>>/$milestone_id/g" $ISSUE_TEMPLATE
|
||||
|
||||
# Replace release date if exists
|
||||
if [[ ! -z $MILESTONE_DUE_ON ]]; then
|
||||
date=$(echo $MILESTONE_DUE_ON | cut -d 'T' -f 1)
|
||||
sed -i "s/Release date\: 20XX-XX-XX/Release date\: $date/g" $ISSUE_TEMPLATE
|
||||
fi
|
||||
- name: Create the issue
|
||||
run: |
|
||||
gh issue create \
|
||||
--title "$MILESTONE_VERSION ROADMAP" \
|
||||
--label 'epic,impacts docs,impacts integrations,impacts cloud' \
|
||||
--body-file $ISSUE_TEMPLATE \
|
||||
--milestone $MILESTONE_VERSION
|
||||
|
||||
create-changelog-issue:
|
||||
needs: get-release-version
|
||||
# Create the changelog issue if the release is not only a patch release
|
||||
if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false'
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
ISSUE_TEMPLATE: issue-template.md
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Download the issue template
|
||||
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/changelog-issue.md > $ISSUE_TEMPLATE
|
||||
- name: Replace all empty occurrences in the templates
|
||||
run: |
|
||||
# Replace all <<version>> occurrences
|
||||
sed -i "s/<<version>>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE
|
||||
|
||||
# Replace all <<milestone_id>> occurrences
|
||||
milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7)
|
||||
sed -i "s/<<milestone_id>>/$milestone_id/g" $ISSUE_TEMPLATE
|
||||
- name: Create the issue
|
||||
run: |
|
||||
gh issue create \
|
||||
--title "Create release changelogs for $MILESTONE_VERSION" \
|
||||
--label 'impacts docs,documentation' \
|
||||
--body-file $ISSUE_TEMPLATE \
|
||||
--milestone $MILESTONE_VERSION \
|
||||
--assignee curquiza
|
||||
|
||||
create-update-version-issue:
|
||||
needs: get-release-version
|
||||
# Create the update-version issue even if the release is a patch release
|
||||
if: github.event.action == 'created'
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
ISSUE_TEMPLATE: issue-template.md
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Download the issue template
|
||||
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/update-version-issue.md > $ISSUE_TEMPLATE
|
||||
- name: Create the issue
|
||||
run: |
|
||||
gh issue create \
|
||||
--title "Update version in Cargo.toml for $MILESTONE_VERSION" \
|
||||
--label 'maintenance' \
|
||||
--body-file $ISSUE_TEMPLATE \
|
||||
--milestone $MILESTONE_VERSION
|
||||
|
||||
create-update-openapi-issue:
|
||||
needs: get-release-version
|
||||
# Create the openAPI issue if the release is not only a patch release
|
||||
if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false'
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
ISSUE_TEMPLATE: issue-template.md
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Download the issue template
|
||||
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/update-openapi-issue.md > $ISSUE_TEMPLATE
|
||||
- name: Create the issue
|
||||
run: |
|
||||
gh issue create \
|
||||
--title "Update Open API file for $MILESTONE_VERSION" \
|
||||
--label 'maintenance' \
|
||||
--body-file $ISSUE_TEMPLATE \
|
||||
--milestone $MILESTONE_VERSION
|
||||
|
||||
update-ruleset:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.action == 'created'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install jq
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y jq
|
||||
- name: Update ruleset
|
||||
env:
|
||||
# gh api repos/meilisearch/meilisearch/rulesets --jq '.[] | {name: .name, id: .id}'
|
||||
RULESET_ID: 4253297
|
||||
BRANCH_NAME: ${{ github.event.inputs.branch_name }}
|
||||
run: |
|
||||
echo "RULESET_ID: ${{ env.RULESET_ID }}"
|
||||
echo "BRANCH_NAME: ${{ env.BRANCH_NAME }}"
|
||||
|
||||
# Get current ruleset conditions
|
||||
CONDITIONS=$(gh api repos/meilisearch/meilisearch/rulesets/${{ env.RULESET_ID }} --jq '{ conditions: .conditions }')
|
||||
|
||||
# Update the conditions by appending the milestone version
|
||||
UPDATED_CONDITIONS=$(echo $CONDITIONS | jq '.conditions.ref_name.include += ["refs/heads/release-'${{ env.MILESTONE_VERSION }}'"]')
|
||||
|
||||
# Update the ruleset from stdin (-)
|
||||
echo $UPDATED_CONDITIONS |
|
||||
gh api repos/meilisearch/meilisearch/rulesets/${{ env.RULESET_ID }} \
|
||||
--method PUT \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
--input -
|
||||
|
||||
# ----------------
|
||||
# MILESTONE CLOSED
|
||||
# ----------------
|
||||
|
||||
create-release-label:
|
||||
if: github.event.action == 'closed'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Create the ${{ env.MILESTONE_VERSION }} label
|
||||
run: |
|
||||
label_description="PRs/issues solved in $MILESTONE_VERSION"
|
||||
if [[ ! -z $MILESTONE_DUE_ON ]]; then
|
||||
date=$(echo $MILESTONE_DUE_ON | cut -d 'T' -f 1)
|
||||
label_description="$label_description released on $date"
|
||||
fi
|
||||
|
||||
gh api repos/meilisearch/meilisearch/labels \
|
||||
--method POST \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-f name="$MILESTONE_VERSION" \
|
||||
-f description="$label_description" \
|
||||
-f color='ff5ba3'
|
||||
|
||||
labelize-all-milestone-content:
|
||||
if: github.event.action == 'closed'
|
||||
needs: create-release-label
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Add label ${{ env.MILESTONE_VERSION }} to all PRs in the Milestone
|
||||
run: |
|
||||
prs=$(gh pr list --search milestone:"$MILESTONE_VERSION" --limit 1000 --state all --json number --template '{{range .}}{{tablerow (printf "%v" .number)}}{{end}}')
|
||||
for pr in $prs; do
|
||||
gh pr edit $pr --add-label $MILESTONE_VERSION
|
||||
done
|
||||
- name: Add label ${{ env.MILESTONE_VERSION }} to all issues in the Milestone
|
||||
run: |
|
||||
issues=$(gh issue list --search milestone:"$MILESTONE_VERSION" --limit 1000 --state all --json number --template '{{range .}}{{tablerow (printf "%v" .number)}}{{end}}')
|
||||
for issue in $issues; do
|
||||
gh issue edit $issue --add-label $MILESTONE_VERSION
|
||||
done
|
||||
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
@@ -32,7 +32,7 @@ jobs:
|
||||
- name: Build deb package
|
||||
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
|
||||
- name: Upload debian pkg to release
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/debian/meilisearch.deb
|
||||
|
||||
8
.github/workflows/publish-binaries.yml
vendored
8
.github/workflows/publish-binaries.yml
vendored
@@ -51,7 +51,7 @@ jobs:
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/meilisearch
|
||||
@@ -81,7 +81,7 @@ jobs:
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/${{ matrix.artifact_name }}
|
||||
@@ -113,7 +113,7 @@ jobs:
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
@@ -178,7 +178,7 @@ jobs:
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
|
||||
17
.github/workflows/publish-docker-images.yml
vendored
17
.github/workflows/publish-docker-images.yml
vendored
@@ -16,8 +16,6 @@ on:
|
||||
jobs:
|
||||
docker:
|
||||
runs-on: docker
|
||||
permissions:
|
||||
id-token: write # This is needed to use Cosign in keyless mode
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
@@ -64,9 +62,6 @@ jobs:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Install cosign
|
||||
uses: sigstore/cosign-installer@d58896d6a1865668819e1d91763c7751a165e159 # tag=v3.9.2
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
@@ -90,7 +85,6 @@ jobs:
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
id: build-and-push
|
||||
with:
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
@@ -100,17 +94,6 @@ jobs:
|
||||
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
|
||||
GIT_TAG=${{ github.ref_name }}
|
||||
|
||||
- name: Sign the images with GitHub OIDC Token
|
||||
env:
|
||||
DIGEST: ${{ steps.build-and-push.outputs.digest }}
|
||||
TAGS: ${{ steps.meta.outputs.tags }}
|
||||
run: |
|
||||
images=""
|
||||
for tag in ${TAGS}; do
|
||||
images+="${tag}@${DIGEST} "
|
||||
done
|
||||
cosign sign --yes ${images}
|
||||
|
||||
# /!\ Don't touch this without checking with Cloud team
|
||||
- name: Send CI information to Cloud team
|
||||
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)
|
||||
|
||||
20
.github/workflows/release-drafter.yml
vendored
20
.github/workflows/release-drafter.yml
vendored
@@ -1,20 +0,0 @@
|
||||
name: Release Drafter
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
update_release_draft:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: release-drafter/release-drafter@v6
|
||||
with:
|
||||
config-name: release-draft-template.yml
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.RELEASE_DRAFTER_TOKEN }}
|
||||
14
.github/workflows/sdks-tests.yml
vendored
14
.github/workflows/sdks-tests.yml
vendored
@@ -9,7 +9,7 @@ on:
|
||||
required: false
|
||||
default: nightly
|
||||
schedule:
|
||||
- cron: '0 6 * * *' # Every day at 6:00am
|
||||
- cron: "0 6 * * MON" # Every Monday at 6:00AM
|
||||
|
||||
env:
|
||||
MEILI_MASTER_KEY: 'masterKey'
|
||||
@@ -114,7 +114,7 @@ jobs:
|
||||
dep ensure
|
||||
fi
|
||||
- name: Run integration tests
|
||||
run: go test --race -v ./integration
|
||||
run: go test -v ./...
|
||||
|
||||
meilisearch-java-tests:
|
||||
needs: define-docker-image
|
||||
@@ -344,23 +344,15 @@ jobs:
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
env:
|
||||
RAILS_VERSION: '7.0'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-rails
|
||||
- name: Install SQLite dependencies
|
||||
run: sudo apt-get update && sudo apt-get install -y libsqlite3-dev
|
||||
- name: Set up Ruby
|
||||
- name: Set up Ruby 3
|
||||
uses: ruby/setup-ruby@v1
|
||||
with:
|
||||
ruby-version: 3
|
||||
bundler-cache: true
|
||||
- name: Start MongoDB
|
||||
uses: supercharge/mongodb-github-action@1.12.0
|
||||
with:
|
||||
mongodb-version: 8.0
|
||||
- name: Run tests
|
||||
run: bundle exec rspec
|
||||
|
||||
|
||||
12
.github/workflows/test-suite.yml
vendored
12
.github/workflows/test-suite.yml
vendored
@@ -3,7 +3,7 @@ name: Test suite
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Every day at 5:00am
|
||||
# Everyday at 5:00am
|
||||
- cron: "0 5 * * *"
|
||||
pull_request:
|
||||
merge_group:
|
||||
@@ -29,7 +29,7 @@ jobs:
|
||||
- name: Setup test with Rust stable
|
||||
uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
@@ -155,7 +155,7 @@ jobs:
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.85
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -172,7 +172,7 @@ jobs:
|
||||
profile: minimal
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -191,7 +191,7 @@ jobs:
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
uses: Swatinem/rust-cache@v2.7.8
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
|
||||
13
.gitignore
vendored
13
.gitignore
vendored
@@ -5,27 +5,18 @@
|
||||
**/*.json_lines
|
||||
**/*.rs.bk
|
||||
/*.mdb
|
||||
/*.ms
|
||||
/data.ms
|
||||
/snapshots
|
||||
/dumps
|
||||
/bench
|
||||
/_xtask_benchmark.ms
|
||||
/benchmarks
|
||||
.DS_Store
|
||||
|
||||
# Snapshots
|
||||
## ... large
|
||||
*.full.snap
|
||||
## ... unreviewed
|
||||
## ... unreviewed
|
||||
*.snap.new
|
||||
## ... pending
|
||||
*.pending-snap
|
||||
|
||||
# Tmp files
|
||||
.tmp*
|
||||
|
||||
# Database snapshot
|
||||
crates/meilisearch/db.snapshot
|
||||
|
||||
# Fuzzcheck data for the facet indexing fuzz test
|
||||
crates/milli/fuzz/update::facet::incremental::fuzz::fuzz/
|
||||
|
||||
@@ -106,13 +106,7 @@ Run `cargo xtask --help` from the root of the repository to find out what is ava
|
||||
#### Update the openAPI file if the API changed
|
||||
|
||||
To update the openAPI file in the code, see [sprint_issue.md](https://github.com/meilisearch/meilisearch/blob/main/.github/ISSUE_TEMPLATE/sprint_issue.md#reminders-when-modifying-the-api).
|
||||
|
||||
If you want to update the openAPI file on the [open-api repository](https://github.com/meilisearch/open-api):
|
||||
- Pull the latest version of the latest rc of Meilisearch `git checkout release-vX.Y.Z; git pull`
|
||||
- Starts Meilisearch with the `swagger` feature flag: `cargo run --features swagger`
|
||||
- On a browser, open the following URL: http://localhost:7700/scalar
|
||||
- Click the « Download openAPI file »
|
||||
- Open a PR replacing [this file](https://github.com/meilisearch/open-api/blob/main/open-api.json) with the one downloaded
|
||||
If you want to update the openAPI file on the [open-api repository](https://github.com/meilisearch/open-api), see [update-openapi-issue.md](https://github.com/meilisearch/engine-team/blob/main/issue-templates/update-openapi-issue.md).
|
||||
|
||||
### Logging
|
||||
|
||||
@@ -166,37 +160,25 @@ Some notes on GitHub PRs:
|
||||
The draft PRs are recommended when you want to show that you are working on something and make your work visible.
|
||||
- The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [GitHub Merge Queues](https://github.blog/news-insights/product-news/github-merge-queue-is-generally-available/) to automatically enforce this requirement without the PR author having to rebase manually.
|
||||
|
||||
## Merging PRs
|
||||
|
||||
This project uses GitHub Merge Queues that helps us manage pull requests merging.
|
||||
|
||||
Before merging a PR, the maintainer should ensure the following requirements are met
|
||||
- Automated tests have been added.
|
||||
- If some tests cannot be automated, manual rigorous tests should be applied.
|
||||
- ⚠️ If there is an change in the DB: it's mandatory to manually test the `--experimental-dumpless-upgrade` on a DB of the previous Meilisearch minor version (e.g. v1.13 for the v1.14 release).
|
||||
- If necessary, the feature have been tested in the Cloud production environment (with [prototypes](./documentation/prototypes.md)) and the Cloud UI is ready.
|
||||
- If necessary, the [documentation](https://github.com/meilisearch/documentation) related to the implemented feature in the PR is ready.
|
||||
- If necessary, the [integrations](https://github.com/meilisearch/integration-guides) related to the implemented feature in the PR are ready.
|
||||
|
||||
## Publish Process (for internal team only)
|
||||
## Release Process (for internal team only)
|
||||
|
||||
Meilisearch tools follow the [Semantic Versioning Convention](https://semver.org/).
|
||||
|
||||
### How to publish a new release
|
||||
### Automation to rebase and Merge the PRs
|
||||
|
||||
The full Meilisearch release process is described in [this guide](./documentation/release.md).
|
||||
This project uses GitHub Merge Queues that helps us manage pull requests merging.
|
||||
|
||||
### How to Publish a new Release
|
||||
|
||||
The full Meilisearch release process is described in [this guide](https://github.com/meilisearch/engine-team/blob/main/resources/meilisearch-release.md). Please follow it carefully before doing any release.
|
||||
|
||||
### How to publish a prototype
|
||||
|
||||
Depending on the developed feature, you might need to provide a prototyped version of Meilisearch to make it easier to test by the users.
|
||||
|
||||
This happens in two steps:
|
||||
- [Release the prototype](./documentation/prototypes.md#how-to-publish-a-prototype)
|
||||
- [Communicate about it](./documentation/prototypes.md#communication)
|
||||
|
||||
### How to implement and publish an experimental feature
|
||||
|
||||
Here is our [guidelines and process](./documentation/experimental-features.md) to implement and publish an experimental feature.
|
||||
- [Release the prototype](https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md#how-to-publish-a-prototype)
|
||||
- [Communicate about it](https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md#communication)
|
||||
|
||||
### Release assets
|
||||
|
||||
|
||||
672
Cargo.lock
generated
672
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -22,7 +22,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.17.0"
|
||||
version = "1.15.2"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
@@ -49,3 +49,6 @@ opt-level = 3
|
||||
opt-level = 3
|
||||
[profile.dev.package.roaring]
|
||||
opt-level = 3
|
||||
|
||||
[patch.crates-io]
|
||||
roaring = { git = "https://github.com/RoaringBitmap/roaring-rs.git" }
|
||||
|
||||
@@ -119,6 +119,6 @@ Meilisearch is, and will always be, open-source! If you want to contribute to th
|
||||
|
||||
Meilisearch releases and their associated binaries are available on the project's [releases page](https://github.com/meilisearch/meilisearch/releases).
|
||||
|
||||
The binaries are versioned following [SemVer conventions](https://semver.org/). To know more, read our [versioning policy](./documentation/versioning-policy.md).
|
||||
The binaries are versioned following [SemVer conventions](https://semver.org/). To know more, read our [versioning policy](https://github.com/meilisearch/engine-team/blob/main/resources/versioning-policy.md).
|
||||
|
||||
Differently from the binaries, crates in this repository are not currently available on [crates.io](https://crates.io/) and do not follow [SemVer conventions](https://semver.org).
|
||||
|
||||
@@ -11,27 +11,27 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.98"
|
||||
bumpalo = "3.18.1"
|
||||
anyhow = "1.0.95"
|
||||
bumpalo = "3.16.0"
|
||||
csv = "1.3.1"
|
||||
memmap2 = "0.9.7"
|
||||
memmap2 = "0.9.5"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tempfile = "3.20.0"
|
||||
mimalloc = { version = "0.1.43", default-features = false }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tempfile = "3.15.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.6.0", features = ["html_reports"] }
|
||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
roaring = "0.10.12"
|
||||
roaring = "0.10.10"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.98"
|
||||
bytes = "1.10.1"
|
||||
convert_case = "0.8.0"
|
||||
flate2 = "1.1.2"
|
||||
reqwest = { version = "0.12.20", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
anyhow = "1.0.95"
|
||||
bytes = "1.9.0"
|
||||
convert_case = "0.6.0"
|
||||
flate2 = "1.0.35"
|
||||
reqwest = { version = "0.12.15", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[features]
|
||||
default = ["milli/all-tokenizations"]
|
||||
@@ -51,11 +51,3 @@ harness = false
|
||||
[[bench]]
|
||||
name = "indexing"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "sort"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "filter_starts_with"
|
||||
harness = false
|
||||
|
||||
@@ -1,66 +0,0 @@
|
||||
mod datasets_paths;
|
||||
mod utils;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use milli::FilterableAttributesRule;
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
fn base_conf(builder: &mut Settings) {
|
||||
let displayed_fields = ["geonameid", "name"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let filterable_fields =
|
||||
["name"].iter().map(|s| FilterableAttributesRule::Field(s.to_string())).collect();
|
||||
builder.set_filterable_fields(filterable_fields);
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
const BASE_CONF: Conf = Conf {
|
||||
dataset: datasets_paths::SMOL_ALL_COUNTRIES,
|
||||
dataset_format: "jsonl",
|
||||
queries: &[
|
||||
"",
|
||||
],
|
||||
configure: base_conf,
|
||||
primary_key: Some("geonameid"),
|
||||
..Conf::BASE
|
||||
};
|
||||
|
||||
fn filter_starts_with(c: &mut criterion::Criterion) {
|
||||
#[rustfmt::skip]
|
||||
let confs = &[
|
||||
utils::Conf {
|
||||
group_name: "1 letter",
|
||||
filter: Some("name STARTS WITH e"),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "2 letters",
|
||||
filter: Some("name STARTS WITH es"),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "3 letters",
|
||||
filter: Some("name STARTS WITH est"),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "6 letters",
|
||||
filter: Some("name STARTS WITH estoni"),
|
||||
..BASE_CONF
|
||||
}
|
||||
];
|
||||
|
||||
utils::run_benches(c, confs);
|
||||
}
|
||||
|
||||
criterion_group!(benches, filter_starts_with);
|
||||
criterion_main!(benches);
|
||||
@@ -11,7 +11,7 @@ use milli::heed::{EnvOpenOptions, RwTxn};
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexerConfig, Settings};
|
||||
use milli::vector::RuntimeEmbedders;
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::{FilterableAttributesRule, Index};
|
||||
use rand::seq::SliceRandom;
|
||||
use rand_chacha::rand_core::SeedableRng;
|
||||
@@ -65,7 +65,7 @@ fn setup_settings<'t>(
|
||||
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_sortable_fields(sortable_fields);
|
||||
|
||||
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
}
|
||||
|
||||
fn setup_index_with_settings(
|
||||
@@ -166,10 +166,9 @@ fn indexing_songs_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -233,10 +232,9 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -278,10 +276,9 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -347,10 +344,9 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -424,10 +420,9 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -469,10 +464,9 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -510,10 +504,9 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -578,10 +571,9 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -645,10 +637,9 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -712,10 +703,9 @@ fn indexing_wiki(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -778,10 +768,9 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -823,10 +812,9 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -891,10 +879,9 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -968,10 +955,9 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1014,10 +1000,9 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1056,10 +1041,9 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1123,10 +1107,9 @@ fn indexing_movies_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1189,10 +1172,9 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1234,10 +1216,9 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1302,10 +1283,9 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1351,10 +1331,9 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
|
||||
new_fields_ids_map,
|
||||
Some(primary_key),
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1416,10 +1395,9 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1461,10 +1439,9 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1502,10 +1479,9 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1592,10 +1568,9 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1683,10 +1658,9 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1766,10 +1740,9 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1833,10 +1806,9 @@ fn indexing_geo(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1899,10 +1871,9 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1944,10 +1915,9 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2012,10 +1982,9 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -1,114 +0,0 @@
|
||||
//! This benchmark module is used to compare the performance of sorting documents in /search VS /documents
|
||||
//!
|
||||
//! The tests/benchmarks were designed in the context of a query returning only 20 documents.
|
||||
|
||||
mod datasets_paths;
|
||||
mod utils;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
fn base_conf(builder: &mut Settings) {
|
||||
let displayed_fields =
|
||||
["geonameid", "name", "asciiname", "alternatenames", "_geo", "population"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let sortable_fields =
|
||||
["_geo", "name", "population", "elevation", "timezone", "modification-date"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_sortable_fields(sortable_fields);
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
const BASE_CONF: Conf = Conf {
|
||||
dataset: datasets_paths::SMOL_ALL_COUNTRIES,
|
||||
dataset_format: "jsonl",
|
||||
configure: base_conf,
|
||||
primary_key: Some("geonameid"),
|
||||
queries: &[""],
|
||||
offsets: &[
|
||||
Some((0, 20)), // The most common query in the real world
|
||||
Some((0, 500)), // A query that ranges over many documents
|
||||
Some((980, 20)), // The worst query that could happen in the real world
|
||||
Some((800_000, 20)) // The worst query
|
||||
],
|
||||
get_documents: true,
|
||||
..Conf::BASE
|
||||
};
|
||||
|
||||
fn bench_sort(c: &mut criterion::Criterion) {
|
||||
#[rustfmt::skip]
|
||||
let confs = &[
|
||||
utils::Conf {
|
||||
group_name: "without sort",
|
||||
sort: None,
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "sort on many different values",
|
||||
sort: Some(vec!["name:asc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "sort on many similar values",
|
||||
sort: Some(vec!["timezone:desc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "sort on many similar then different values",
|
||||
sort: Some(vec!["timezone:desc", "name:asc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "sort on many different then similar values",
|
||||
sort: Some(vec!["timezone:desc", "name:asc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "geo sort",
|
||||
sample_size: Some(10),
|
||||
sort: Some(vec!["_geoPoint(45.4777599, 9.1967508):asc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "sort on many similar values then geo sort",
|
||||
sample_size: Some(50),
|
||||
sort: Some(vec!["timezone:desc", "_geoPoint(45.4777599, 9.1967508):asc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "sort on many different values then geo sort",
|
||||
sample_size: Some(50),
|
||||
sort: Some(vec!["name:desc", "_geoPoint(45.4777599, 9.1967508):asc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
|
||||
utils::Conf {
|
||||
group_name: "sort on many fields",
|
||||
sort: Some(vec!["population:asc", "name:asc", "elevation:asc", "timezone:asc"]),
|
||||
..BASE_CONF
|
||||
},
|
||||
];
|
||||
|
||||
utils::run_benches(c, confs);
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_sort);
|
||||
criterion_main!(benches);
|
||||
@@ -9,12 +9,11 @@ use anyhow::Context;
|
||||
use bumpalo::Bump;
|
||||
use criterion::BenchmarkId;
|
||||
use memmap2::Mmap;
|
||||
use milli::documents::sort::recursive_sort;
|
||||
use milli::heed::EnvOpenOptions;
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexerConfig, Settings};
|
||||
use milli::vector::RuntimeEmbedders;
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
|
||||
use serde_json::Value;
|
||||
|
||||
@@ -36,12 +35,6 @@ pub struct Conf<'a> {
|
||||
pub configure: fn(&mut Settings),
|
||||
pub filter: Option<&'a str>,
|
||||
pub sort: Option<Vec<&'a str>>,
|
||||
/// set to skip documents (offset, limit)
|
||||
pub offsets: &'a [Option<(usize, usize)>],
|
||||
/// enable if you want to bench getting documents without querying
|
||||
pub get_documents: bool,
|
||||
/// configure the benchmark sample size
|
||||
pub sample_size: Option<usize>,
|
||||
/// enable or disable the optional words on the query
|
||||
pub optional_words: bool,
|
||||
/// primary key, if there is None we'll auto-generate docids for every documents
|
||||
@@ -59,9 +52,6 @@ impl Conf<'_> {
|
||||
configure: |_| (),
|
||||
filter: None,
|
||||
sort: None,
|
||||
offsets: &[None],
|
||||
get_documents: false,
|
||||
sample_size: None,
|
||||
optional_words: true,
|
||||
primary_key: None,
|
||||
};
|
||||
@@ -100,7 +90,7 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
|
||||
(conf.configure)(&mut builder);
|
||||
|
||||
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
@@ -135,10 +125,9 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
RuntimeEmbedders::default(),
|
||||
EmbeddingConfigs::default(),
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -155,79 +144,25 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|
||||
let file_name = Path::new(conf.dataset).file_name().and_then(|f| f.to_str()).unwrap();
|
||||
let name = format!("{}: {}", file_name, conf.group_name);
|
||||
let mut group = c.benchmark_group(&name);
|
||||
if let Some(sample_size) = conf.sample_size {
|
||||
group.sample_size(sample_size);
|
||||
}
|
||||
|
||||
for &query in conf.queries {
|
||||
for offset in conf.offsets {
|
||||
let parameter = match offset {
|
||||
None => query.to_string(),
|
||||
Some((offset, limit)) => format!("{query}[{offset}:{limit}]"),
|
||||
};
|
||||
group.bench_with_input(
|
||||
BenchmarkId::from_parameter(parameter),
|
||||
&query,
|
||||
|b, &query| {
|
||||
b.iter(|| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut search = index.search(&rtxn);
|
||||
search
|
||||
.query(query)
|
||||
.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
if let Some(filter) = conf.filter {
|
||||
let filter = Filter::from_str(filter).unwrap().unwrap();
|
||||
search.filter(filter);
|
||||
}
|
||||
if let Some(sort) = &conf.sort {
|
||||
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
|
||||
search.sort_criteria(sort);
|
||||
}
|
||||
if let Some((offset, limit)) = offset {
|
||||
search.offset(*offset).limit(*limit);
|
||||
}
|
||||
|
||||
let _ids = search.execute().unwrap();
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if conf.get_documents {
|
||||
for offset in conf.offsets {
|
||||
let parameter = match offset {
|
||||
None => String::from("get_documents"),
|
||||
Some((offset, limit)) => format!("get_documents[{offset}:{limit}]"),
|
||||
};
|
||||
group.bench_with_input(BenchmarkId::from_parameter(parameter), &(), |b, &()| {
|
||||
b.iter(|| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
if let Some(sort) = &conf.sort {
|
||||
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
|
||||
let all_docs = index.documents_ids(&rtxn).unwrap();
|
||||
let facet_sort =
|
||||
recursive_sort(&index, &rtxn, sort, &all_docs).unwrap();
|
||||
let iter = facet_sort.iter().unwrap();
|
||||
if let Some((offset, limit)) = offset {
|
||||
let _results = iter.skip(*offset).take(*limit).collect::<Vec<_>>();
|
||||
} else {
|
||||
let _results = iter.collect::<Vec<_>>();
|
||||
}
|
||||
} else {
|
||||
let all_docs = index.documents_ids(&rtxn).unwrap();
|
||||
if let Some((offset, limit)) = offset {
|
||||
let _results =
|
||||
all_docs.iter().skip(*offset).take(*limit).collect::<Vec<_>>();
|
||||
} else {
|
||||
let _results = all_docs.iter().collect::<Vec<_>>();
|
||||
}
|
||||
}
|
||||
});
|
||||
group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| {
|
||||
b.iter(|| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut search = index.search(&rtxn);
|
||||
search.query(query).terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
if let Some(filter) = conf.filter {
|
||||
let filter = Filter::from_str(filter).unwrap().unwrap();
|
||||
search.filter(filter);
|
||||
}
|
||||
if let Some(sort) = &conf.sort {
|
||||
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
|
||||
search.sort_criteria(sort);
|
||||
}
|
||||
let _ids = search.execute().unwrap();
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
|
||||
@@ -67,7 +67,7 @@ fn main() -> anyhow::Result<()> {
|
||||
writeln!(
|
||||
&mut manifest_paths_file,
|
||||
r#"pub const {}: &str = {:?};"#,
|
||||
dataset.to_case(Case::UpperSnake),
|
||||
dataset.to_case(Case::ScreamingSnake),
|
||||
out_file.display(),
|
||||
)?;
|
||||
|
||||
|
||||
@@ -11,8 +11,8 @@ license.workspace = true
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
time = { version = "0.3.41", features = ["parsing"] }
|
||||
time = { version = "0.3.37", features = ["parsing"] }
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.98"
|
||||
vergen-git2 = "1.0.7"
|
||||
anyhow = "1.0.95"
|
||||
vergen-git2 = "1.0.2"
|
||||
|
||||
@@ -11,21 +11,21 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.98"
|
||||
flate2 = "1.1.2"
|
||||
http = "1.3.1"
|
||||
anyhow = "1.0.95"
|
||||
flate2 = "1.0.35"
|
||||
http = "1.2.0"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.21.3"
|
||||
once_cell = "1.20.2"
|
||||
regex = "1.11.1"
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tar = "0.4.44"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tar = "0.4.43"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
||||
@@ -1,17 +1,12 @@
|
||||
#![allow(clippy::type_complexity)]
|
||||
#![allow(clippy::wrong_self_convention)]
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use meilisearch_types::batches::BatchId;
|
||||
use meilisearch_types::byte_unit::Byte;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::settings::Unchecked;
|
||||
use meilisearch_types::tasks::{
|
||||
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId,
|
||||
};
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task, TaskId};
|
||||
use meilisearch_types::InstanceUid;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -146,12 +141,6 @@ pub enum KindDump {
|
||||
instance_uid: Option<InstanceUid>,
|
||||
},
|
||||
SnapshotCreation,
|
||||
Export {
|
||||
url: String,
|
||||
api_key: Option<String>,
|
||||
payload_size: Option<Byte>,
|
||||
indexes: BTreeMap<String, ExportIndexSettings>,
|
||||
},
|
||||
UpgradeDatabase {
|
||||
from: (u32, u32, u32),
|
||||
},
|
||||
@@ -224,15 +213,6 @@ impl From<KindWithContent> for KindDump {
|
||||
KindDump::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindWithContent::SnapshotCreation => KindDump::SnapshotCreation,
|
||||
KindWithContent::Export { url, api_key, payload_size, indexes } => KindDump::Export {
|
||||
url,
|
||||
api_key,
|
||||
payload_size,
|
||||
indexes: indexes
|
||||
.into_iter()
|
||||
.map(|(pattern, settings)| (pattern.to_string(), settings))
|
||||
.collect(),
|
||||
},
|
||||
KindWithContent::UpgradeDatabase { from: version } => {
|
||||
KindDump::UpgradeDatabase { from: version }
|
||||
}
|
||||
@@ -349,7 +329,6 @@ pub(crate) mod test {
|
||||
write_channel_congestion: None,
|
||||
internal_database_sizes: Default::default(),
|
||||
},
|
||||
embedder_stats: Default::default(),
|
||||
enqueued_at: Some(BatchEnqueuedAt {
|
||||
earliest: datetime!(2022-11-11 0:00 UTC),
|
||||
oldest: datetime!(2022-11-11 0:00 UTC),
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::fs::File;
|
||||
use std::str::FromStr;
|
||||
|
||||
use super::v2_to_v3::CompatV2ToV3;
|
||||
@@ -95,10 +94,6 @@ impl CompatIndexV1ToV2 {
|
||||
self.from.documents().map(|it| Box::new(it) as Box<dyn Iterator<Item = _>>)
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
self.from.documents_file()
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<v2::settings::Settings<v2::settings::Checked>> {
|
||||
Ok(v2::settings::Settings::<v2::settings::Unchecked>::from(self.from.settings()?).check())
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::fs::File;
|
||||
use std::str::FromStr;
|
||||
|
||||
use time::OffsetDateTime;
|
||||
@@ -123,13 +122,6 @@ impl CompatIndexV2ToV3 {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
match self {
|
||||
CompatIndexV2ToV3::V2(v2) => v2.documents_file(),
|
||||
CompatIndexV2ToV3::Compat(compat) => compat.documents_file(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<v3::Settings<v3::Checked>> {
|
||||
let settings = match self {
|
||||
CompatIndexV2ToV3::V2(from) => from.settings()?,
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
use std::fs::File;
|
||||
|
||||
use super::v2_to_v3::{CompatIndexV2ToV3, CompatV2ToV3};
|
||||
use super::v4_to_v5::CompatV4ToV5;
|
||||
use crate::reader::{v3, v4, UpdateFile};
|
||||
@@ -254,13 +252,6 @@ impl CompatIndexV3ToV4 {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
match self {
|
||||
CompatIndexV3ToV4::V3(v3) => v3.documents_file(),
|
||||
CompatIndexV3ToV4::Compat(compat) => compat.documents_file(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<v4::Settings<v4::Checked>> {
|
||||
Ok(match self {
|
||||
CompatIndexV3ToV4::V3(v3) => {
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
use std::fs::File;
|
||||
|
||||
use super::v3_to_v4::{CompatIndexV3ToV4, CompatV3ToV4};
|
||||
use super::v5_to_v6::CompatV5ToV6;
|
||||
use crate::reader::{v4, v5, Document};
|
||||
@@ -243,13 +241,6 @@ impl CompatIndexV4ToV5 {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
match self {
|
||||
CompatIndexV4ToV5::V4(v4) => v4.documents_file(),
|
||||
CompatIndexV4ToV5::Compat(compat) => compat.documents_file(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<v5::Settings<v5::Checked>> {
|
||||
match self {
|
||||
CompatIndexV4ToV5::V4(v4) => Ok(v5::Settings::from(v4.settings()?).check()),
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::fs::File;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::str::FromStr;
|
||||
|
||||
@@ -202,10 +201,6 @@ impl CompatV5ToV6 {
|
||||
pub fn network(&self) -> Result<Option<&v6::Network>> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub fn webhooks(&self) -> Option<&v6::Webhooks> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub enum CompatIndexV5ToV6 {
|
||||
@@ -248,13 +243,6 @@ impl CompatIndexV5ToV6 {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
match self {
|
||||
CompatIndexV5ToV6::V5(v5) => v5.documents_file(),
|
||||
CompatIndexV5ToV6::Compat(compat) => compat.documents_file(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> {
|
||||
match self {
|
||||
CompatIndexV5ToV6::V5(v5) => Ok(v6::Settings::from(v5.settings()?).check()),
|
||||
|
||||
@@ -116,15 +116,6 @@ impl DumpReader {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn chat_completions_settings(
|
||||
&mut self,
|
||||
) -> Result<Box<dyn Iterator<Item = Result<(String, v6::ChatCompletionSettings)>> + '_>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => current.chat_completions_settings(),
|
||||
DumpReader::Compat(_compat) => Ok(Box::new(std::iter::empty())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
|
||||
match self {
|
||||
DumpReader::Current(current) => Ok(current.features()),
|
||||
@@ -138,13 +129,6 @@ impl DumpReader {
|
||||
DumpReader::Compat(compat) => compat.network(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn webhooks(&self) -> Option<&v6::Webhooks> {
|
||||
match self {
|
||||
DumpReader::Current(current) => current.webhooks(),
|
||||
DumpReader::Compat(compat) => compat.webhooks(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<V6Reader> for DumpReader {
|
||||
@@ -199,14 +183,6 @@ impl DumpIndexReader {
|
||||
}
|
||||
}
|
||||
|
||||
/// A reference to a file in the NDJSON format containing all the documents of the index
|
||||
pub fn documents_file(&self) -> &File {
|
||||
match self {
|
||||
DumpIndexReader::Current(v6) => v6.documents_file(),
|
||||
DumpIndexReader::Compat(compat) => compat.documents_file(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> {
|
||||
match self {
|
||||
DumpIndexReader::Current(v6) => v6.settings(),
|
||||
@@ -372,7 +348,6 @@ pub(crate) mod test {
|
||||
|
||||
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
|
||||
assert_eq!(dump.network().unwrap(), None);
|
||||
assert_eq!(dump.webhooks(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -443,43 +418,6 @@ pub(crate) mod test {
|
||||
insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().search_api_key.as_ref().unwrap(), @"foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import_dump_v6_webhooks() {
|
||||
let dump = File::open("tests/assets/v6-with-webhooks.dump").unwrap();
|
||||
let dump = DumpReader::open(dump).unwrap();
|
||||
|
||||
// top level infos
|
||||
insta::assert_snapshot!(dump.date().unwrap(), @"2025-07-31 9:21:30.479544 +00:00:00");
|
||||
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @r"
|
||||
Some(
|
||||
cb887dcc-34b3-48d1-addd-9815ae721a81,
|
||||
)
|
||||
");
|
||||
|
||||
// webhooks
|
||||
let webhooks = dump.webhooks().unwrap();
|
||||
insta::assert_json_snapshot!(webhooks, @r#"
|
||||
{
|
||||
"webhooks": {
|
||||
"627ea538-733d-4545-8d2d-03526eb381ce": {
|
||||
"url": "https://example.com/authorization-less",
|
||||
"headers": {}
|
||||
},
|
||||
"771b0a28-ef28-4082-b984-536f82958c65": {
|
||||
"url": "https://example.com/hook",
|
||||
"headers": {
|
||||
"authorization": "TOKEN"
|
||||
}
|
||||
},
|
||||
"f3583083-f8a7-4cbf-a5e7-fb3f1e28a7e9": {
|
||||
"url": "https://third.com",
|
||||
"headers": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
"#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import_dump_v5() {
|
||||
let dump = File::open("tests/assets/v5.dump").unwrap();
|
||||
|
||||
@@ -72,10 +72,6 @@ impl V1IndexReader {
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
self.documents.get_ref()
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<self::settings::Settings> {
|
||||
Ok(serde_json::from_reader(&mut self.settings)?)
|
||||
}
|
||||
|
||||
@@ -203,10 +203,6 @@ impl V2IndexReader {
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
self.documents.get_ref()
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||
Ok(self.settings.clone())
|
||||
}
|
||||
|
||||
@@ -215,10 +215,6 @@ impl V3IndexReader {
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
self.documents.get_ref()
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||
Ok(self.settings.clone())
|
||||
}
|
||||
|
||||
@@ -210,10 +210,6 @@ impl V4IndexReader {
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
self.documents.get_ref()
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||
Ok(self.settings.clone())
|
||||
}
|
||||
|
||||
@@ -247,10 +247,6 @@ impl V5IndexReader {
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
self.documents.get_ref()
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||
Ok(self.settings.clone())
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{BufRead, BufReader, ErrorKind};
|
||||
use std::path::Path;
|
||||
@@ -22,10 +21,8 @@ pub type Unchecked = meilisearch_types::settings::Unchecked;
|
||||
pub type Task = crate::TaskDump;
|
||||
pub type Batch = meilisearch_types::batches::Batch;
|
||||
pub type Key = meilisearch_types::keys::Key;
|
||||
pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings;
|
||||
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
pub type Network = meilisearch_types::features::Network;
|
||||
pub type Webhooks = meilisearch_types::webhooks::WebhooksDumpView;
|
||||
|
||||
// ===== Other types to clarify the code of the compat module
|
||||
// everything related to the tasks
|
||||
@@ -60,7 +57,6 @@ pub struct V6Reader {
|
||||
keys: BufReader<File>,
|
||||
features: Option<RuntimeTogglableFeatures>,
|
||||
network: Option<Network>,
|
||||
webhooks: Option<Webhooks>,
|
||||
}
|
||||
|
||||
impl V6Reader {
|
||||
@@ -95,8 +91,8 @@ impl V6Reader {
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
let network = match fs::read(dump.path().join("network.json")) {
|
||||
Ok(network_file) => Some(serde_json::from_reader(&*network_file)?),
|
||||
let network_file = match fs::read(dump.path().join("network.json")) {
|
||||
Ok(network_file) => Some(network_file),
|
||||
Err(error) => match error.kind() {
|
||||
// Allows the file to be missing, this will only result in all experimental features disabled.
|
||||
ErrorKind::NotFound => {
|
||||
@@ -106,16 +102,10 @@ impl V6Reader {
|
||||
_ => return Err(error.into()),
|
||||
},
|
||||
};
|
||||
|
||||
let webhooks = match fs::read(dump.path().join("webhooks.json")) {
|
||||
Ok(webhooks_file) => Some(serde_json::from_reader(&*webhooks_file)?),
|
||||
Err(error) => match error.kind() {
|
||||
ErrorKind::NotFound => {
|
||||
debug!("`webhooks.json` not found in dump");
|
||||
None
|
||||
}
|
||||
_ => return Err(error.into()),
|
||||
},
|
||||
let network = if let Some(network_file) = network_file {
|
||||
Some(serde_json::from_reader(&*network_file)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(V6Reader {
|
||||
@@ -127,7 +117,6 @@ impl V6Reader {
|
||||
features,
|
||||
network,
|
||||
dump,
|
||||
webhooks,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -203,34 +192,6 @@ impl V6Reader {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn chat_completions_settings(
|
||||
&mut self,
|
||||
) -> Result<Box<dyn Iterator<Item = Result<(String, ChatCompletionSettings)>> + '_>> {
|
||||
let entries = match fs::read_dir(self.dump.path().join("chat-completions-settings")) {
|
||||
Ok(entries) => entries,
|
||||
Err(e) if e.kind() == ErrorKind::NotFound => return Ok(Box::new(std::iter::empty())),
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
Ok(Box::new(
|
||||
entries
|
||||
.map(|entry| -> Result<Option<_>> {
|
||||
let entry = entry?;
|
||||
let file_name = entry.file_name();
|
||||
let path = Path::new(&file_name);
|
||||
if entry.file_type()?.is_file() && path.extension() == Some(OsStr::new("json"))
|
||||
{
|
||||
let name = path.file_stem().unwrap().to_str().unwrap().to_string();
|
||||
let file = File::open(entry.path())?;
|
||||
let settings = serde_json::from_reader(file)?;
|
||||
Ok(Some((name, settings)))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
})
|
||||
.filter_map(|entry| entry.transpose()),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn features(&self) -> Option<RuntimeTogglableFeatures> {
|
||||
self.features
|
||||
}
|
||||
@@ -238,10 +199,6 @@ impl V6Reader {
|
||||
pub fn network(&self) -> Option<&Network> {
|
||||
self.network.as_ref()
|
||||
}
|
||||
|
||||
pub fn webhooks(&self) -> Option<&Webhooks> {
|
||||
self.webhooks.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UpdateFile {
|
||||
@@ -297,10 +254,6 @@ impl V6IndexReader {
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
self.documents.get_ref()
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||
let mut settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
|
||||
patch_embedders(&mut settings);
|
||||
|
||||
@@ -5,10 +5,9 @@ use std::path::PathBuf;
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::features::{ChatCompletionSettings, Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::features::{Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::settings::{Checked, Settings};
|
||||
use meilisearch_types::webhooks::WebhooksDumpView;
|
||||
use serde_json::{Map, Value};
|
||||
use tempfile::TempDir;
|
||||
use time::OffsetDateTime;
|
||||
@@ -52,10 +51,6 @@ impl DumpWriter {
|
||||
KeyWriter::new(self.dir.path().to_path_buf())
|
||||
}
|
||||
|
||||
pub fn create_chat_completions_settings(&self) -> Result<ChatCompletionsSettingsWriter> {
|
||||
ChatCompletionsSettingsWriter::new(self.dir.path().join("chat-completions-settings"))
|
||||
}
|
||||
|
||||
pub fn create_tasks_queue(&self) -> Result<TaskWriter> {
|
||||
TaskWriter::new(self.dir.path().join("tasks"))
|
||||
}
|
||||
@@ -75,13 +70,6 @@ impl DumpWriter {
|
||||
Ok(std::fs::write(self.dir.path().join("network.json"), serde_json::to_string(&network)?)?)
|
||||
}
|
||||
|
||||
pub fn create_webhooks(&self, webhooks: WebhooksDumpView) -> Result<()> {
|
||||
Ok(std::fs::write(
|
||||
self.dir.path().join("webhooks.json"),
|
||||
serde_json::to_string(&webhooks)?,
|
||||
)?)
|
||||
}
|
||||
|
||||
pub fn persist_to(self, mut writer: impl Write) -> Result<()> {
|
||||
let gz_encoder = GzEncoder::new(&mut writer, Compression::default());
|
||||
let mut tar_encoder = tar::Builder::new(gz_encoder);
|
||||
@@ -116,24 +104,6 @@ impl KeyWriter {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ChatCompletionsSettingsWriter {
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl ChatCompletionsSettingsWriter {
|
||||
pub(crate) fn new(path: PathBuf) -> Result<Self> {
|
||||
std::fs::create_dir(&path)?;
|
||||
Ok(ChatCompletionsSettingsWriter { path })
|
||||
}
|
||||
|
||||
pub fn push_settings(&mut self, name: &str, settings: &ChatCompletionSettings) -> Result<()> {
|
||||
let mut settings_file = File::create(self.path.join(name).with_extension("json"))?;
|
||||
serde_json::to_writer(&mut settings_file, &settings)?;
|
||||
settings_file.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TaskWriter {
|
||||
queue: BufWriter<File>,
|
||||
update_files: PathBuf,
|
||||
|
||||
Binary file not shown.
@@ -11,7 +11,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
@@ -14,7 +14,7 @@ license.workspace = true
|
||||
[dependencies]
|
||||
nom = "7.1.3"
|
||||
nom_locate = "4.2.0"
|
||||
unescaper = "0.1.6"
|
||||
unescaper = "0.1.5"
|
||||
|
||||
[dev-dependencies]
|
||||
# fixed version due to format breakages in v1.40
|
||||
|
||||
@@ -165,9 +165,9 @@ impl<'a> FilterCondition<'a> {
|
||||
| Condition::Exists
|
||||
| Condition::LowerThan(_)
|
||||
| Condition::LowerThanOrEqual(_)
|
||||
| Condition::Between { .. }
|
||||
| Condition::StartsWith { .. } => None,
|
||||
Condition::Contains { keyword, word: _ } => Some(keyword),
|
||||
| Condition::Between { .. } => None,
|
||||
Condition::Contains { keyword, word: _ }
|
||||
| Condition::StartsWith { keyword, word: _ } => Some(keyword),
|
||||
},
|
||||
FilterCondition::Not(this) => this.use_contains_operator(),
|
||||
FilterCondition::Or(seq) | FilterCondition::And(seq) => {
|
||||
|
||||
@@ -16,7 +16,7 @@ license.workspace = true
|
||||
serde_json = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.6.0", features = ["html_reports"] }
|
||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
|
||||
[[bench]]
|
||||
name = "benchmarks"
|
||||
|
||||
@@ -12,11 +12,11 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
arbitrary = { version = "1.4.1", features = ["derive"] }
|
||||
bumpalo = "3.18.1"
|
||||
clap = { version = "4.5.40", features = ["derive"] }
|
||||
either = "1.15.0"
|
||||
bumpalo = "3.16.0"
|
||||
clap = { version = "4.5.24", features = ["derive"] }
|
||||
either = "1.13.0"
|
||||
fastrand = "2.3.0"
|
||||
milli = { path = "../milli" }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tempfile = "3.20.0"
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tempfile = "3.15.0"
|
||||
|
||||
@@ -13,7 +13,7 @@ use milli::heed::EnvOpenOptions;
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::IndexerConfig;
|
||||
use milli::vector::RuntimeEmbedders;
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::Index;
|
||||
use serde_json::Value;
|
||||
use tempfile::TempDir;
|
||||
@@ -89,7 +89,7 @@ fn main() {
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let embedders = RuntimeEmbedders::default();
|
||||
let embedders = EmbeddingConfigs::default();
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
|
||||
let mut operations = Vec::new();
|
||||
@@ -144,7 +144,6 @@ fn main() {
|
||||
embedders,
|
||||
&|| false,
|
||||
&Progress::default(),
|
||||
&Default::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -11,31 +11,31 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.98"
|
||||
anyhow = "1.0.95"
|
||||
bincode = "1.3.3"
|
||||
byte-unit = "5.1.6"
|
||||
bumpalo = "3.18.1"
|
||||
bumpalo = "3.16.0"
|
||||
bumparaw-collections = "0.1.4"
|
||||
convert_case = "0.8.0"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.3.1"
|
||||
derive_builder = "0.20.2"
|
||||
dump = { path = "../dump" }
|
||||
enum-iterator = "2.1.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.1.2"
|
||||
indexmap = "2.9.0"
|
||||
flate2 = "1.0.35"
|
||||
indexmap = "2.7.0"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.7"
|
||||
memmap2 = "0.9.5"
|
||||
page_size = "0.6.0"
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.138", features = ["preserve_order"] }
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
@@ -43,8 +43,7 @@ time = { version = "0.3.41", features = [
|
||||
] }
|
||||
tracing = "0.1.41"
|
||||
ureq = "2.12.1"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
backoff = "0.4.0"
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
||||
@@ -4,7 +4,6 @@ use std::io;
|
||||
use dump::{KindDump, TaskDump, UpdateFile};
|
||||
use meilisearch_types::batches::{Batch, BatchId};
|
||||
use meilisearch_types::heed::RwTxn;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -212,23 +211,6 @@ impl<'a> Dump<'a> {
|
||||
KindWithContent::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
|
||||
KindDump::Export { url, api_key, payload_size, indexes } => {
|
||||
KindWithContent::Export {
|
||||
url,
|
||||
api_key,
|
||||
payload_size,
|
||||
indexes: indexes
|
||||
.into_iter()
|
||||
.map(|(pattern, settings)| {
|
||||
Ok((
|
||||
IndexUidPattern::try_from(pattern)
|
||||
.map_err(|_| Error::CorruptedDump)?,
|
||||
settings,
|
||||
))
|
||||
})
|
||||
.collect::<Result<_, Error>>()?,
|
||||
}
|
||||
}
|
||||
KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from },
|
||||
},
|
||||
};
|
||||
|
||||
@@ -151,10 +151,6 @@ pub enum Error {
|
||||
CorruptedTaskQueue,
|
||||
#[error(transparent)]
|
||||
DatabaseUpgrade(Box<Self>),
|
||||
#[error(transparent)]
|
||||
Export(Box<Self>),
|
||||
#[error("Failed to export documents to remote server {code} ({type}): {message} <{link}>")]
|
||||
FromRemoteWhenExporting { message: String, code: String, r#type: String, link: String },
|
||||
#[error("Failed to rollback for index `{index}`: {rollback_outcome} ")]
|
||||
RollbackFailed { index: String, rollback_outcome: RollbackOutcome },
|
||||
#[error(transparent)]
|
||||
@@ -216,7 +212,6 @@ impl Error {
|
||||
| Error::BatchNotFound(_)
|
||||
| Error::TaskDeletionWithEmptyQuery
|
||||
| Error::TaskCancelationWithEmptyQuery
|
||||
| Error::FromRemoteWhenExporting { .. }
|
||||
| Error::AbortedTask
|
||||
| Error::Dump(_)
|
||||
| Error::Heed(_)
|
||||
@@ -226,7 +221,6 @@ impl Error {
|
||||
| Error::IoError(_)
|
||||
| Error::Persist(_)
|
||||
| Error::FeatureNotEnabled(_)
|
||||
| Error::Export(_)
|
||||
| Error::Anyhow(_) => true,
|
||||
Error::CreateBatch(_)
|
||||
| Error::CorruptedTaskQueue
|
||||
@@ -288,7 +282,6 @@ impl ErrorCode for Error {
|
||||
Error::Dump(e) => e.error_code(),
|
||||
Error::Milli { error, .. } => error.error_code(),
|
||||
Error::ProcessBatchPanicked(_) => Code::Internal,
|
||||
Error::FromRemoteWhenExporting { .. } => Code::Internal,
|
||||
Error::Heed(e) => e.error_code(),
|
||||
Error::HeedTransaction(e) => e.error_code(),
|
||||
Error::FileStore(e) => e.error_code(),
|
||||
@@ -301,7 +294,6 @@ impl ErrorCode for Error {
|
||||
Error::CorruptedTaskQueue => Code::Internal,
|
||||
Error::CorruptedDump => Code::Internal,
|
||||
Error::DatabaseUpgrade(_) => Code::Internal,
|
||||
Error::Export(_) => Code::Internal,
|
||||
Error::RollbackFailed { .. } => Code::Internal,
|
||||
Error::UnrecoverableError(_) => Code::Internal,
|
||||
Error::IndexSchedulerVersionMismatch { .. } => Code::Internal,
|
||||
|
||||
@@ -85,7 +85,7 @@ impl RoFeatures {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action: "Using `CONTAINS` in a filter",
|
||||
disabled_action: "Using `CONTAINS` or `STARTS WITH` in a filter",
|
||||
feature: "contains filter",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/763",
|
||||
}
|
||||
@@ -144,19 +144,6 @@ impl RoFeatures {
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_multimodal(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.multimodal {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action,
|
||||
feature: "multimodal",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/846",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
@@ -182,7 +169,6 @@ impl FeatureData {
|
||||
..persisted_features
|
||||
}));
|
||||
|
||||
// Once this is stabilized, network should be stored along with webhooks in index-scheduler's persisted database
|
||||
let network_db = runtime_features_db.remap_data_type::<SerdeJson<Network>>();
|
||||
let network: Network = network_db.get(wtxn, db_keys::NETWORK)?.unwrap_or_default();
|
||||
|
||||
|
||||
@@ -71,7 +71,7 @@ pub struct IndexMapper {
|
||||
/// Path to the folder where the LMDB environments of each index are.
|
||||
base_path: PathBuf,
|
||||
/// The map size an index is opened with on the first time.
|
||||
pub(crate) index_base_map_size: usize,
|
||||
index_base_map_size: usize,
|
||||
/// The quantity by which the map size of an index is incremented upon reopening, in bytes.
|
||||
index_growth_amount: usize,
|
||||
/// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
|
||||
|
||||
@@ -20,17 +20,16 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
|
||||
let IndexScheduler {
|
||||
cleanup_enabled: _,
|
||||
experimental_no_edition_2024_for_dumps: _,
|
||||
processing_tasks,
|
||||
env,
|
||||
version,
|
||||
queue,
|
||||
scheduler,
|
||||
persisted,
|
||||
|
||||
index_mapper,
|
||||
features: _,
|
||||
webhooks: _,
|
||||
webhook_url: _,
|
||||
webhook_authorization_header: _,
|
||||
test_breakpoint_sdr: _,
|
||||
planned_failures: _,
|
||||
run_loop_iteration: _,
|
||||
@@ -62,13 +61,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
}
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
let persisted_db_snapshot = snapshot_persisted_db(&rtxn, persisted);
|
||||
if !persisted_db_snapshot.is_empty() {
|
||||
snap.push_str("### Persisted:\n");
|
||||
snap.push_str(&persisted_db_snapshot);
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
}
|
||||
|
||||
snap.push_str("### All Tasks:\n");
|
||||
snap.push_str(&snapshot_all_tasks(&rtxn, queue.tasks.all_tasks));
|
||||
snap.push_str("----------------------------------------------------------------------\n");
|
||||
@@ -207,16 +199,6 @@ pub fn snapshot_date_db(rtxn: &RoTxn, db: Database<BEI128, CboRoaringBitmapCodec
|
||||
snap
|
||||
}
|
||||
|
||||
pub fn snapshot_persisted_db(rtxn: &RoTxn, db: &Database<Str, Str>) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
for next in iter {
|
||||
let (key, value) = next.unwrap();
|
||||
snap.push_str(&format!("{key}: {value}\n"));
|
||||
}
|
||||
snap
|
||||
}
|
||||
|
||||
pub fn snapshot_task(task: &Task) -> String {
|
||||
let mut snap = String::new();
|
||||
let Task {
|
||||
@@ -307,9 +289,6 @@ fn snapshot_details(d: &Details) -> String {
|
||||
Details::IndexSwap { swaps } => {
|
||||
format!("{{ swaps: {swaps:?} }}")
|
||||
}
|
||||
Details::Export { url, api_key, payload_size, indexes } => {
|
||||
format!("{{ url: {url:?}, api_key: {api_key:?}, payload_size: {payload_size:?}, indexes: {indexes:?} }}")
|
||||
}
|
||||
Details::UpgradeDatabase { from, to } => {
|
||||
format!("{{ from: {from:?}, to: {to:?} }}")
|
||||
}
|
||||
@@ -328,7 +307,6 @@ pub fn snapshot_status(
|
||||
}
|
||||
snap
|
||||
}
|
||||
|
||||
pub fn snapshot_kind(rtxn: &RoTxn, db: Database<SerdeBincode<Kind>, RoaringBitmapCodec>) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
@@ -349,7 +327,6 @@ pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database<Str, RoaringBitmapCodec>)
|
||||
}
|
||||
snap
|
||||
}
|
||||
|
||||
pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec>) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
@@ -366,7 +343,6 @@ pub fn snapshot_batch(batch: &Batch) -> String {
|
||||
uid,
|
||||
details,
|
||||
stats,
|
||||
embedder_stats,
|
||||
started_at,
|
||||
finished_at,
|
||||
progress: _,
|
||||
@@ -390,12 +366,6 @@ pub fn snapshot_batch(batch: &Batch) -> String {
|
||||
snap.push_str(&format!("uid: {uid}, "));
|
||||
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
|
||||
snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
|
||||
if !embedder_stats.skip_serializing() {
|
||||
snap.push_str(&format!(
|
||||
"embedder stats: {}, ",
|
||||
serde_json::to_string(&embedder_stats).unwrap()
|
||||
));
|
||||
}
|
||||
snap.push_str(&format!("stop reason: {}, ", serde_json::to_string(&stop_reason).unwrap()));
|
||||
snap.push('}');
|
||||
snap
|
||||
|
||||
@@ -57,24 +57,18 @@ use meilisearch_types::features::{
|
||||
use meilisearch_types::heed::byteorder::BE;
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeJson, Str, I128};
|
||||
use meilisearch_types::heed::{self, Database, Env, RoTxn, WithoutTls};
|
||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::vector::json_template::JsonTemplate;
|
||||
use meilisearch_types::milli::vector::{
|
||||
Embedder, EmbedderOptions, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment,
|
||||
};
|
||||
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
||||
use meilisearch_types::milli::{self, Index};
|
||||
use meilisearch_types::task_view::TaskView;
|
||||
use meilisearch_types::tasks::{KindWithContent, Task};
|
||||
use meilisearch_types::webhooks::{Webhook, WebhooksDumpView, WebhooksView};
|
||||
use milli::vector::db::IndexEmbeddingConfig;
|
||||
use processing::ProcessingTasks;
|
||||
pub use queue::Query;
|
||||
use queue::Queue;
|
||||
use roaring::RoaringBitmap;
|
||||
use scheduler::Scheduler;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
use versioning::Versioning;
|
||||
|
||||
use crate::index_mapper::IndexMapper;
|
||||
@@ -83,15 +77,7 @@ use crate::utils::clamp_to_page_size;
|
||||
pub(crate) type BEI128 = I128<BE>;
|
||||
|
||||
const TASK_SCHEDULER_SIZE_THRESHOLD_PERCENT_INT: u64 = 40;
|
||||
|
||||
mod db_name {
|
||||
pub const CHAT_SETTINGS: &str = "chat-settings";
|
||||
pub const PERSISTED: &str = "persisted";
|
||||
}
|
||||
|
||||
mod db_keys {
|
||||
pub const WEBHOOKS: &str = "webhooks";
|
||||
}
|
||||
const CHAT_SETTINGS_DB_NAME: &str = "chat-settings";
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct IndexSchedulerOptions {
|
||||
@@ -109,10 +95,10 @@ pub struct IndexSchedulerOptions {
|
||||
pub snapshots_path: PathBuf,
|
||||
/// The path to the folder containing the dumps.
|
||||
pub dumps_path: PathBuf,
|
||||
/// The webhook url that was set by the CLI.
|
||||
pub cli_webhook_url: Option<String>,
|
||||
/// The Authorization header to send to the webhook URL that was set by the CLI.
|
||||
pub cli_webhook_authorization: Option<String>,
|
||||
/// The URL on which we must send the tasks statuses
|
||||
pub webhook_url: Option<String>,
|
||||
/// The value we will send into the Authorization HTTP header on the webhook URL
|
||||
pub webhook_authorization_header: Option<String>,
|
||||
/// The maximum size, in bytes, of the task index.
|
||||
pub task_db_size: usize,
|
||||
/// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
|
||||
@@ -179,14 +165,10 @@ pub struct IndexScheduler {
|
||||
/// Whether we should automatically cleanup the task queue or not.
|
||||
pub(crate) cleanup_enabled: bool,
|
||||
|
||||
/// Whether we should use the old document indexer or the new one.
|
||||
pub(crate) experimental_no_edition_2024_for_dumps: bool,
|
||||
|
||||
/// A database to store single-keyed data that is persisted across restarts.
|
||||
persisted: Database<Str, Str>,
|
||||
|
||||
/// Webhook, loaded and stored in the `persisted` database
|
||||
webhooks: Arc<Webhooks>,
|
||||
/// The webhook url we should send tasks to after processing every batches.
|
||||
pub(crate) webhook_url: Option<String>,
|
||||
/// The Authorization header to send to the webhook URL.
|
||||
pub(crate) webhook_authorization_header: Option<String>,
|
||||
|
||||
/// A map to retrieve the runtime representation of an embedder depending on its configuration.
|
||||
///
|
||||
@@ -225,10 +207,8 @@ impl IndexScheduler {
|
||||
|
||||
index_mapper: self.index_mapper.clone(),
|
||||
cleanup_enabled: self.cleanup_enabled,
|
||||
experimental_no_edition_2024_for_dumps: self.experimental_no_edition_2024_for_dumps,
|
||||
persisted: self.persisted,
|
||||
|
||||
webhooks: self.webhooks.clone(),
|
||||
webhook_url: self.webhook_url.clone(),
|
||||
webhook_authorization_header: self.webhook_authorization_header.clone(),
|
||||
embedders: self.embedders.clone(),
|
||||
#[cfg(test)]
|
||||
test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
|
||||
@@ -247,7 +227,6 @@ impl IndexScheduler {
|
||||
+ IndexMapper::nb_db()
|
||||
+ features::FeatureData::nb_db()
|
||||
+ 1 // chat-prompts
|
||||
+ 1 // persisted
|
||||
}
|
||||
|
||||
/// Create an index scheduler and start its run loop.
|
||||
@@ -298,18 +277,10 @@ impl IndexScheduler {
|
||||
let version = versioning::Versioning::new(&env, from_db_version)?;
|
||||
|
||||
let mut wtxn = env.write_txn()?;
|
||||
|
||||
let features = features::FeatureData::new(&env, &mut wtxn, options.instance_features)?;
|
||||
let queue = Queue::new(&env, &mut wtxn, &options)?;
|
||||
let index_mapper = IndexMapper::new(&env, &mut wtxn, &options, budget)?;
|
||||
let chat_settings = env.create_database(&mut wtxn, Some(db_name::CHAT_SETTINGS))?;
|
||||
|
||||
let persisted = env.create_database(&mut wtxn, Some(db_name::PERSISTED))?;
|
||||
let webhooks_db = persisted.remap_data_type::<SerdeJson<Webhooks>>();
|
||||
let mut webhooks = webhooks_db.get(&wtxn, db_keys::WEBHOOKS)?.unwrap_or_default();
|
||||
webhooks
|
||||
.with_cli(options.cli_webhook_url.clone(), options.cli_webhook_authorization.clone());
|
||||
|
||||
let chat_settings = env.create_database(&mut wtxn, Some(CHAT_SETTINGS_DB_NAME))?;
|
||||
wtxn.commit()?;
|
||||
|
||||
// allow unreachable_code to get rids of the warning in the case of a test build.
|
||||
@@ -322,11 +293,8 @@ impl IndexScheduler {
|
||||
index_mapper,
|
||||
env,
|
||||
cleanup_enabled: options.cleanup_enabled,
|
||||
experimental_no_edition_2024_for_dumps: options
|
||||
.indexer_config
|
||||
.experimental_no_edition_2024_for_dumps,
|
||||
persisted,
|
||||
webhooks: Arc::new(webhooks),
|
||||
webhook_url: options.webhook_url,
|
||||
webhook_authorization_header: options.webhook_authorization_header,
|
||||
embedders: Default::default(),
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -623,11 +591,6 @@ impl IndexScheduler {
|
||||
Ok(nbr_index_processing_tasks > 0)
|
||||
}
|
||||
|
||||
/// Whether the index should use the old document indexer.
|
||||
pub fn no_edition_2024_for_dumps(&self) -> bool {
|
||||
self.experimental_no_edition_2024_for_dumps
|
||||
}
|
||||
|
||||
/// Return the tasks matching the query from the user's point of view along
|
||||
/// with the total number of tasks matching the query, ignoring from and limit.
|
||||
///
|
||||
@@ -774,92 +737,86 @@ impl IndexScheduler {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Once the tasks changes have been committed we must send all the tasks that were updated to our webhooks
|
||||
fn notify_webhooks(&self, updated: RoaringBitmap) {
|
||||
struct TaskReader<'a, 'b> {
|
||||
rtxn: &'a RoTxn<'a>,
|
||||
index_scheduler: &'a IndexScheduler,
|
||||
tasks: &'b mut roaring::bitmap::Iter<'b>,
|
||||
buffer: Vec<u8>,
|
||||
written: usize,
|
||||
}
|
||||
/// Once the tasks changes have been committed we must send all the tasks that were updated to our webhook if there is one.
|
||||
fn notify_webhook(&self, updated: &RoaringBitmap) -> Result<()> {
|
||||
if let Some(ref url) = self.webhook_url {
|
||||
struct TaskReader<'a, 'b> {
|
||||
rtxn: &'a RoTxn<'a>,
|
||||
index_scheduler: &'a IndexScheduler,
|
||||
tasks: &'b mut roaring::bitmap::Iter<'b>,
|
||||
buffer: Vec<u8>,
|
||||
written: usize,
|
||||
}
|
||||
|
||||
impl Read for TaskReader<'_, '_> {
|
||||
fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
|
||||
if self.buffer.is_empty() {
|
||||
match self.tasks.next() {
|
||||
None => return Ok(0),
|
||||
Some(task_id) => {
|
||||
let task = self
|
||||
.index_scheduler
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(self.rtxn, task_id)
|
||||
.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
|
||||
.ok_or_else(|| {
|
||||
io::Error::new(io::ErrorKind::Other, Error::CorruptedTaskQueue)
|
||||
})?;
|
||||
impl Read for TaskReader<'_, '_> {
|
||||
fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
|
||||
if self.buffer.is_empty() {
|
||||
match self.tasks.next() {
|
||||
None => return Ok(0),
|
||||
Some(task_id) => {
|
||||
let task = self
|
||||
.index_scheduler
|
||||
.queue
|
||||
.tasks
|
||||
.get_task(self.rtxn, task_id)
|
||||
.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
|
||||
.ok_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
Error::CorruptedTaskQueue,
|
||||
)
|
||||
})?;
|
||||
|
||||
serde_json::to_writer(&mut self.buffer, &TaskView::from_task(&task))?;
|
||||
self.buffer.push(b'\n');
|
||||
serde_json::to_writer(
|
||||
&mut self.buffer,
|
||||
&TaskView::from_task(&task),
|
||||
)?;
|
||||
self.buffer.push(b'\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut to_write = &self.buffer[self.written..];
|
||||
let wrote = io::copy(&mut to_write, &mut buf)?;
|
||||
self.written += wrote as usize;
|
||||
|
||||
// we wrote everything and must refresh our buffer on the next call
|
||||
if self.written == self.buffer.len() {
|
||||
self.written = 0;
|
||||
self.buffer.clear();
|
||||
}
|
||||
|
||||
Ok(wrote as usize)
|
||||
}
|
||||
}
|
||||
|
||||
let mut to_write = &self.buffer[self.written..];
|
||||
let wrote = io::copy(&mut to_write, &mut buf)?;
|
||||
self.written += wrote as usize;
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
// we wrote everything and must refresh our buffer on the next call
|
||||
if self.written == self.buffer.len() {
|
||||
self.written = 0;
|
||||
self.buffer.clear();
|
||||
}
|
||||
let task_reader = TaskReader {
|
||||
rtxn: &rtxn,
|
||||
index_scheduler: self,
|
||||
tasks: &mut updated.into_iter(),
|
||||
buffer: Vec::with_capacity(50), // on average a task is around ~100 bytes
|
||||
written: 0,
|
||||
};
|
||||
|
||||
Ok(wrote as usize)
|
||||
// let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
|
||||
let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
|
||||
let request = ureq::post(url)
|
||||
.timeout(Duration::from_secs(30))
|
||||
.set("Content-Encoding", "gzip")
|
||||
.set("Content-Type", "application/x-ndjson");
|
||||
let request = match &self.webhook_authorization_header {
|
||||
Some(header) => request.set("Authorization", header),
|
||||
None => request,
|
||||
};
|
||||
|
||||
if let Err(e) = request.send(reader) {
|
||||
tracing::error!("While sending data to the webhook: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
let webhooks = self.webhooks.get_all();
|
||||
if webhooks.is_empty() {
|
||||
return;
|
||||
}
|
||||
let this = self.private_clone();
|
||||
// We must take the RoTxn before entering the thread::spawn otherwise another batch may be
|
||||
// processed before we had the time to take our txn.
|
||||
let rtxn = match self.env.clone().static_read_txn() {
|
||||
Ok(rtxn) => rtxn,
|
||||
Err(e) => {
|
||||
tracing::error!("Couldn't get an rtxn to notify the webhook: {e}");
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
std::thread::spawn(move || {
|
||||
for (uuid, Webhook { url, headers }) in webhooks.iter() {
|
||||
let task_reader = TaskReader {
|
||||
rtxn: &rtxn,
|
||||
index_scheduler: &this,
|
||||
tasks: &mut updated.iter(),
|
||||
buffer: Vec::with_capacity(page_size::get()),
|
||||
written: 0,
|
||||
};
|
||||
|
||||
let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
|
||||
|
||||
let mut request = ureq::post(url)
|
||||
.timeout(Duration::from_secs(30))
|
||||
.set("Content-Encoding", "gzip")
|
||||
.set("Content-Type", "application/x-ndjson");
|
||||
for (header_name, header_value) in headers.iter() {
|
||||
request = request.set(header_name, header_value);
|
||||
}
|
||||
|
||||
if let Err(e) = request.send(reader) {
|
||||
tracing::error!("While sending data to the webhook {uuid}: {e}");
|
||||
}
|
||||
}
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn index_stats(&self, index_uid: &str) -> Result<IndexStats> {
|
||||
@@ -890,69 +847,33 @@ impl IndexScheduler {
|
||||
self.features.network()
|
||||
}
|
||||
|
||||
pub fn update_runtime_webhooks(&self, runtime: RuntimeWebhooks) -> Result<()> {
|
||||
let webhooks = Webhooks::from_runtime(runtime);
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let webhooks_db = self.persisted.remap_data_type::<SerdeJson<Webhooks>>();
|
||||
webhooks_db.put(&mut wtxn, db_keys::WEBHOOKS, &webhooks)?;
|
||||
wtxn.commit()?;
|
||||
self.webhooks.update_runtime(webhooks.into_runtime());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn webhooks_dump_view(&self) -> WebhooksDumpView {
|
||||
// We must not dump the cli api key
|
||||
WebhooksDumpView { webhooks: self.webhooks.get_runtime() }
|
||||
}
|
||||
|
||||
pub fn webhooks_view(&self) -> WebhooksView {
|
||||
WebhooksView { webhooks: self.webhooks.get_all() }
|
||||
}
|
||||
|
||||
pub fn retrieve_runtime_webhooks(&self) -> RuntimeWebhooks {
|
||||
self.webhooks.get_runtime()
|
||||
}
|
||||
|
||||
pub fn embedders(
|
||||
&self,
|
||||
index_uid: String,
|
||||
embedding_configs: Vec<IndexEmbeddingConfig>,
|
||||
) -> Result<RuntimeEmbedders> {
|
||||
) -> Result<EmbeddingConfigs> {
|
||||
let res: Result<_> = embedding_configs
|
||||
.into_iter()
|
||||
.map(
|
||||
|IndexEmbeddingConfig {
|
||||
name,
|
||||
config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized },
|
||||
fragments,
|
||||
}|
|
||||
-> Result<(String, Arc<RuntimeEmbedder>)> {
|
||||
let document_template = prompt
|
||||
.try_into()
|
||||
.map_err(meilisearch_types::milli::Error::from)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
|
||||
let fragments = fragments
|
||||
.into_inner()
|
||||
.into_iter()
|
||||
.map(|fragment| {
|
||||
let value = embedder_options.fragment(&fragment.name).unwrap();
|
||||
let template = JsonTemplate::new(value.clone()).unwrap();
|
||||
RuntimeFragment { name: fragment.name, id: fragment.id, template }
|
||||
})
|
||||
.collect();
|
||||
..
|
||||
}| {
|
||||
let prompt = Arc::new(
|
||||
prompt
|
||||
.try_into()
|
||||
.map_err(meilisearch_types::milli::Error::from)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
);
|
||||
// optimistically return existing embedder
|
||||
{
|
||||
let embedders = self.embedders.read().unwrap();
|
||||
if let Some(embedder) = embedders.get(&embedder_options) {
|
||||
let runtime = Arc::new(RuntimeEmbedder::new(
|
||||
embedder.clone(),
|
||||
document_template,
|
||||
fragments,
|
||||
quantized.unwrap_or_default(),
|
||||
return Ok((
|
||||
name,
|
||||
(embedder.clone(), prompt, quantized.unwrap_or_default()),
|
||||
));
|
||||
|
||||
return Ok((name, runtime));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -968,19 +889,11 @@ impl IndexScheduler {
|
||||
let mut embedders = self.embedders.write().unwrap();
|
||||
embedders.insert(embedder_options, embedder.clone());
|
||||
}
|
||||
|
||||
let runtime = Arc::new(RuntimeEmbedder::new(
|
||||
embedder.clone(),
|
||||
document_template,
|
||||
fragments,
|
||||
quantized.unwrap_or_default(),
|
||||
));
|
||||
|
||||
Ok((name, runtime))
|
||||
Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
res.map(RuntimeEmbedders::new)
|
||||
res.map(EmbeddingConfigs::new)
|
||||
}
|
||||
|
||||
pub fn chat_settings(&self, uid: &str) -> Result<Option<ChatCompletionSettings>> {
|
||||
@@ -1041,72 +954,3 @@ pub struct IndexStats {
|
||||
/// Internal stats computed from the index.
|
||||
pub inner_stats: index_mapper::IndexStats,
|
||||
}
|
||||
|
||||
/// These structure are not meant to be exposed to the end user, if needed, use the meilisearch-types::webhooks structure instead.
|
||||
/// /!\ Everytime you deserialize this structure you should fill the cli_webhook later on with the `with_cli` method. /!\
|
||||
#[derive(Debug, Serialize, Deserialize, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct Webhooks {
|
||||
// The cli webhook should *never* be stored in a database.
|
||||
// It represent a state that only exists for this execution of meilisearch
|
||||
#[serde(skip)]
|
||||
pub cli: Option<CliWebhook>,
|
||||
|
||||
#[serde(default)]
|
||||
pub runtime: RwLock<RuntimeWebhooks>,
|
||||
}
|
||||
|
||||
type RuntimeWebhooks = BTreeMap<Uuid, Webhook>;
|
||||
|
||||
impl Webhooks {
|
||||
pub fn with_cli(&mut self, url: Option<String>, auth: Option<String>) {
|
||||
if let Some(url) = url {
|
||||
let webhook = CliWebhook { url, auth };
|
||||
self.cli = Some(webhook);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_runtime(webhooks: RuntimeWebhooks) -> Self {
|
||||
Self { cli: None, runtime: RwLock::new(webhooks) }
|
||||
}
|
||||
|
||||
pub fn into_runtime(self) -> RuntimeWebhooks {
|
||||
// safe because we own self and it cannot be cloned
|
||||
self.runtime.into_inner().unwrap()
|
||||
}
|
||||
|
||||
pub fn update_runtime(&self, webhooks: RuntimeWebhooks) {
|
||||
*self.runtime.write().unwrap() = webhooks;
|
||||
}
|
||||
|
||||
/// Returns all the webhooks in an unified view. The cli webhook is represented with an uuid set to 0
|
||||
pub fn get_all(&self) -> BTreeMap<Uuid, Webhook> {
|
||||
self.cli
|
||||
.as_ref()
|
||||
.map(|wh| (Uuid::nil(), Webhook::from(wh)))
|
||||
.into_iter()
|
||||
.chain(self.runtime.read().unwrap().iter().map(|(uuid, wh)| (*uuid, wh.clone())))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Returns all the runtime webhooks.
|
||||
pub fn get_runtime(&self) -> BTreeMap<Uuid, Webhook> {
|
||||
self.runtime.read().unwrap().iter().map(|(uuid, wh)| (*uuid, wh.clone())).collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
|
||||
struct CliWebhook {
|
||||
pub url: String,
|
||||
pub auth: Option<String>,
|
||||
}
|
||||
|
||||
impl From<&CliWebhook> for Webhook {
|
||||
fn from(webhook: &CliWebhook) -> Self {
|
||||
let mut headers = BTreeMap::new();
|
||||
if let Some(ref auth) = webhook.auth {
|
||||
headers.insert("Authorization".to_string(), auth.to_string());
|
||||
}
|
||||
Self { url: webhook.url.to_string(), headers }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -103,12 +103,10 @@ make_enum_progress! {
|
||||
pub enum DumpCreationProgress {
|
||||
StartTheDumpCreation,
|
||||
DumpTheApiKeys,
|
||||
DumpTheChatCompletionSettings,
|
||||
DumpTheTasks,
|
||||
DumpTheBatches,
|
||||
DumpTheIndexes,
|
||||
DumpTheExperimentalFeatures,
|
||||
DumpTheWebhooks,
|
||||
CompressTheDump,
|
||||
}
|
||||
}
|
||||
@@ -177,17 +175,8 @@ make_enum_progress! {
|
||||
}
|
||||
}
|
||||
|
||||
make_enum_progress! {
|
||||
pub enum Export {
|
||||
EnsuringCorrectnessOfTheTarget,
|
||||
ExportingTheSettings,
|
||||
ExportingTheDocuments,
|
||||
}
|
||||
}
|
||||
|
||||
make_atomic_progress!(Task alias AtomicTaskStep => "task" );
|
||||
make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
|
||||
make_atomic_progress!(Index alias AtomicIndexStep => "index" );
|
||||
make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
|
||||
make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" );
|
||||
|
||||
|
||||
@@ -179,7 +179,6 @@ impl BatchQueue {
|
||||
progress: None,
|
||||
details: batch.details,
|
||||
stats: batch.stats,
|
||||
embedder_stats: batch.embedder_stats.as_ref().into(),
|
||||
started_at: batch.started_at,
|
||||
finished_at: batch.finished_at,
|
||||
enqueued_at: batch.enqueued_at,
|
||||
|
||||
@@ -71,7 +71,6 @@ impl From<KindWithContent> for AutobatchKind {
|
||||
KindWithContent::TaskCancelation { .. }
|
||||
| KindWithContent::TaskDeletion { .. }
|
||||
| KindWithContent::DumpCreation { .. }
|
||||
| KindWithContent::Export { .. }
|
||||
| KindWithContent::UpgradeDatabase { .. }
|
||||
| KindWithContent::SnapshotCreation => {
|
||||
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use std::fmt;
|
||||
use std::io::ErrorKind;
|
||||
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
@@ -48,9 +47,6 @@ pub(crate) enum Batch {
|
||||
IndexSwap {
|
||||
task: Task,
|
||||
},
|
||||
Export {
|
||||
task: Task,
|
||||
},
|
||||
UpgradeDatabase {
|
||||
tasks: Vec<Task>,
|
||||
},
|
||||
@@ -107,7 +103,6 @@ impl Batch {
|
||||
Batch::TaskCancelation { task, .. }
|
||||
| Batch::Dump(task)
|
||||
| Batch::IndexCreation { task, .. }
|
||||
| Batch::Export { task }
|
||||
| Batch::IndexUpdate { task, .. } => {
|
||||
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
|
||||
}
|
||||
@@ -147,7 +142,6 @@ impl Batch {
|
||||
| TaskDeletions(_)
|
||||
| SnapshotCreation(_)
|
||||
| Dump(_)
|
||||
| Export { .. }
|
||||
| UpgradeDatabase { .. }
|
||||
| IndexSwap { .. } => None,
|
||||
IndexOperation { op, .. } => Some(op.index_uid()),
|
||||
@@ -173,7 +167,6 @@ impl fmt::Display for Batch {
|
||||
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
|
||||
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
|
||||
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
|
||||
Batch::Export { .. } => f.write_str("Export")?,
|
||||
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
|
||||
};
|
||||
match index_uid {
|
||||
@@ -433,10 +426,9 @@ impl IndexScheduler {
|
||||
/// 0. We get the *last* task to cancel.
|
||||
/// 1. We get the tasks to upgrade.
|
||||
/// 2. We get the *next* task to delete.
|
||||
/// 3. We get the *next* export to process.
|
||||
/// 4. We get the *next* snapshot to process.
|
||||
/// 5. We get the *next* dump to process.
|
||||
/// 6. We get the *next* tasks to process for a specific index.
|
||||
/// 3. We get the *next* snapshot to process.
|
||||
/// 4. We get the *next* dump to process.
|
||||
/// 5. We get the *next* tasks to process for a specific index.
|
||||
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
|
||||
pub(crate) fn create_next_batch(
|
||||
&self,
|
||||
@@ -508,17 +500,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 3. we batch the export.
|
||||
let to_export = self.queue.tasks.get_kind(rtxn, Kind::Export)? & enqueued;
|
||||
if !to_export.is_empty() {
|
||||
let task_id = to_export.iter().next().expect("There must be at least one export task");
|
||||
let mut task = self.queue.tasks.get_task(rtxn, task_id)?.unwrap();
|
||||
current_batch.processing([&mut task]);
|
||||
current_batch.reason(BatchStopReason::TaskKindCannotBeBatched { kind: Kind::Export });
|
||||
return Ok(Some((Batch::Export { task }, current_batch)));
|
||||
}
|
||||
|
||||
// 4. we batch the snapshot.
|
||||
// 3. we batch the snapshot.
|
||||
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
|
||||
if !to_snapshot.is_empty() {
|
||||
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
|
||||
@@ -528,7 +510,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
|
||||
}
|
||||
|
||||
// 5. we batch the dumps.
|
||||
// 4. we batch the dumps.
|
||||
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
|
||||
if let Some(to_dump) = to_dump.min() {
|
||||
let mut task =
|
||||
@@ -541,7 +523,7 @@ impl IndexScheduler {
|
||||
return Ok(Some((Batch::Dump(task), current_batch)));
|
||||
}
|
||||
|
||||
// 6. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
// 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
|
||||
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
@@ -595,11 +577,7 @@ impl IndexScheduler {
|
||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?;
|
||||
|
||||
if let Some(uuid) = task.content_uuid() {
|
||||
let content_size = match self.queue.file_store.compute_size(uuid) {
|
||||
Ok(content_size) => content_size,
|
||||
Err(file_store::Error::IoError(err)) if err.kind() == ErrorKind::NotFound => 0,
|
||||
Err(otherwise) => return Err(otherwise.into()),
|
||||
};
|
||||
let content_size = self.queue.file_store.compute_size(uuid)?;
|
||||
total_size = total_size.saturating_add(content_size);
|
||||
}
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ mod autobatcher_test;
|
||||
mod create_batch;
|
||||
mod process_batch;
|
||||
mod process_dump_creation;
|
||||
mod process_export;
|
||||
mod process_index_operation;
|
||||
mod process_snapshot_creation;
|
||||
mod process_upgrade;
|
||||
@@ -446,7 +445,8 @@ impl IndexScheduler {
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
self.notify_webhooks(ids);
|
||||
// We shouldn't crash the tick function if we can't send data to the webhook.
|
||||
let _ = self.notify_webhook(&ids);
|
||||
|
||||
#[cfg(test)]
|
||||
self.breakpoint(crate::test_utils::Breakpoint::AfterProcessing);
|
||||
|
||||
@@ -162,13 +162,8 @@ impl IndexScheduler {
|
||||
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
||||
|
||||
let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
|
||||
let (tasks, congestion) = self.apply_index_operation(
|
||||
&mut index_wtxn,
|
||||
&index,
|
||||
op,
|
||||
&progress,
|
||||
current_batch.embedder_stats.clone(),
|
||||
)?;
|
||||
let (tasks, congestion) =
|
||||
self.apply_index_operation(&mut index_wtxn, &index, op, &progress)?;
|
||||
|
||||
{
|
||||
progress.update_progress(FinalizingIndexStep::Committing);
|
||||
@@ -243,12 +238,10 @@ impl IndexScheduler {
|
||||
);
|
||||
builder.set_primary_key(primary_key);
|
||||
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||
|
||||
builder
|
||||
.execute(
|
||||
&|| must_stop_processing.get(),
|
||||
&progress,
|
||||
current_batch.embedder_stats.clone(),
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
|
||||
index_wtxn.commit()?;
|
||||
@@ -368,46 +361,6 @@ impl IndexScheduler {
|
||||
task.status = Status::Succeeded;
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::Export { mut task } => {
|
||||
let KindWithContent::Export { url, api_key, payload_size, indexes } = &task.kind
|
||||
else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let ret = catch_unwind(AssertUnwindSafe(|| {
|
||||
self.process_export(
|
||||
url,
|
||||
api_key.as_deref(),
|
||||
payload_size.as_ref(),
|
||||
indexes,
|
||||
progress,
|
||||
)
|
||||
}));
|
||||
|
||||
let stats = match ret {
|
||||
Ok(Ok(stats)) => stats,
|
||||
Ok(Err(Error::AbortedTask)) => return Err(Error::AbortedTask),
|
||||
Ok(Err(e)) => return Err(Error::Export(Box::new(e))),
|
||||
Err(e) => {
|
||||
let msg = match e.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match e.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
return Err(Error::Export(Box::new(Error::ProcessBatchPanicked(
|
||||
msg.to_string(),
|
||||
))));
|
||||
}
|
||||
};
|
||||
|
||||
task.status = Status::Succeeded;
|
||||
if let Some(Details::Export { indexes, .. }) = task.details.as_mut() {
|
||||
*indexes = stats;
|
||||
}
|
||||
Ok((vec![task], ProcessBatchInfo::default()))
|
||||
}
|
||||
Batch::UpgradeDatabase { mut tasks } => {
|
||||
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
|
||||
unreachable!();
|
||||
@@ -755,11 +708,9 @@ impl IndexScheduler {
|
||||
from.1,
|
||||
from.2
|
||||
);
|
||||
let ret = catch_unwind(std::panic::AssertUnwindSafe(|| {
|
||||
match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
|
||||
self.process_rollback(from, progress)
|
||||
}));
|
||||
|
||||
match ret {
|
||||
})) {
|
||||
Ok(Ok(())) => {}
|
||||
Ok(Err(err)) => return Err(Error::DatabaseUpgrade(Box::new(err))),
|
||||
Err(e) => {
|
||||
|
||||
@@ -5,7 +5,6 @@ use std::sync::atomic::Ordering;
|
||||
|
||||
use dump::IndexMetadata;
|
||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
use meilisearch_types::milli::{self};
|
||||
@@ -44,16 +43,7 @@ impl IndexScheduler {
|
||||
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
// 2. dump the chat completion settings
|
||||
// TODO should I skip the export if the chat completion has been disabled?
|
||||
progress.update_progress(DumpCreationProgress::DumpTheChatCompletionSettings);
|
||||
let mut dump_chat_completion_settings = dump.create_chat_completions_settings()?;
|
||||
for result in self.chat_settings.iter(&rtxn)? {
|
||||
let (name, chat_settings) = result?;
|
||||
dump_chat_completion_settings.push_settings(name, &chat_settings)?;
|
||||
}
|
||||
|
||||
// 3. dump the tasks
|
||||
// 2. dump the tasks
|
||||
progress.update_progress(DumpCreationProgress::DumpTheTasks);
|
||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||
|
||||
@@ -91,7 +81,7 @@ impl IndexScheduler {
|
||||
|
||||
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
|
||||
|
||||
// 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
|
||||
if let Some(content_file) = content_file {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
@@ -115,7 +105,7 @@ impl IndexScheduler {
|
||||
}
|
||||
dump_tasks.flush()?;
|
||||
|
||||
// 4. dump the batches
|
||||
// 3. dump the batches
|
||||
progress.update_progress(DumpCreationProgress::DumpTheBatches);
|
||||
let mut dump_batches = dump.create_batches_queue()?;
|
||||
|
||||
@@ -148,7 +138,7 @@ impl IndexScheduler {
|
||||
}
|
||||
dump_batches.flush()?;
|
||||
|
||||
// 5. Dump the indexes
|
||||
// 4. Dump the indexes
|
||||
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
|
||||
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
|
||||
let mut count = 0;
|
||||
@@ -175,6 +165,9 @@ impl IndexScheduler {
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let embedding_configs = index
|
||||
.embedding_configs(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
let nb_documents = index
|
||||
.number_of_documents(&rtxn)
|
||||
@@ -185,7 +178,7 @@ impl IndexScheduler {
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
// 5.1. Dump the documents
|
||||
// 4.1. Dump the documents
|
||||
for ret in documents {
|
||||
if self.scheduler.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
@@ -228,21 +221,16 @@ impl IndexScheduler {
|
||||
return Err(Error::from_milli(user_err, Some(uid.to_string())));
|
||||
};
|
||||
|
||||
for (
|
||||
embedder_name,
|
||||
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||
) in embeddings
|
||||
{
|
||||
for (embedder_name, embeddings) in embeddings {
|
||||
let user_provided = embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == embedder_name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(id));
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
||||
embeddings,
|
||||
)),
|
||||
regenerate: regenerate &&
|
||||
// Meilisearch does not handle well dumps with fragments, because as the fragments
|
||||
// are marked as user-provided,
|
||||
// all embeddings would be regenerated on any settings change or document update.
|
||||
// To prevent this, we mark embeddings has non regenerate in this case.
|
||||
!has_fragments,
|
||||
regenerate: !user_provided,
|
||||
};
|
||||
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
||||
}
|
||||
@@ -252,7 +240,7 @@ impl IndexScheduler {
|
||||
atomic.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
// 5.2. Dump the settings
|
||||
// 4.2. Dump the settings
|
||||
let settings = meilisearch_types::settings::settings(
|
||||
index,
|
||||
&rtxn,
|
||||
@@ -263,18 +251,13 @@ impl IndexScheduler {
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
// 6. Dump experimental feature settings
|
||||
// 5. Dump experimental feature settings
|
||||
progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
|
||||
let features = self.features().runtime_features();
|
||||
dump.create_experimental_features(features)?;
|
||||
let network = self.network();
|
||||
dump.create_network(network)?;
|
||||
|
||||
// 7. Dump the webhooks
|
||||
progress.update_progress(DumpCreationProgress::DumpTheWebhooks);
|
||||
let webhooks = self.webhooks_dump_view();
|
||||
dump.create_webhooks(webhooks)?;
|
||||
|
||||
let dump_uid = started_at.format(format_description!(
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
)).unwrap();
|
||||
|
||||
@@ -1,377 +0,0 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::io::{self, Write as _};
|
||||
use std::sync::atomic;
|
||||
use std::time::Duration;
|
||||
|
||||
use backoff::ExponentialBackoff;
|
||||
use byte_unit::Byte;
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::update::{request_threads, Setting};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
use meilisearch_types::milli::{self, obkv_to_json, Filter, InternalError};
|
||||
use meilisearch_types::settings::{self, SecretPolicy};
|
||||
use meilisearch_types::tasks::{DetailsExportIndexSettings, ExportIndexSettings};
|
||||
use serde::Deserialize;
|
||||
use ureq::{json, Response};
|
||||
|
||||
use super::MustStopProcessing;
|
||||
use crate::processing::AtomicDocumentStep;
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
impl IndexScheduler {
|
||||
pub(super) fn process_export(
|
||||
&self,
|
||||
base_url: &str,
|
||||
api_key: Option<&str>,
|
||||
payload_size: Option<&Byte>,
|
||||
indexes: &BTreeMap<IndexUidPattern, ExportIndexSettings>,
|
||||
progress: Progress,
|
||||
) -> Result<BTreeMap<IndexUidPattern, DetailsExportIndexSettings>> {
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::test_utils::FailureLocation::ProcessExport)?;
|
||||
|
||||
let indexes: Vec<_> = self
|
||||
.index_names()?
|
||||
.into_iter()
|
||||
.flat_map(|uid| {
|
||||
indexes
|
||||
.iter()
|
||||
.find(|(pattern, _)| pattern.matches_str(&uid))
|
||||
.map(|(pattern, settings)| (pattern, uid, settings))
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut output = BTreeMap::new();
|
||||
let agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build();
|
||||
let must_stop_processing = self.scheduler.must_stop_processing.clone();
|
||||
for (i, (_pattern, uid, export_settings)) in indexes.iter().enumerate() {
|
||||
if must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
progress.update_progress(VariableNameStep::<ExportIndex>::new(
|
||||
format!("Exporting index `{uid}`"),
|
||||
i as u32,
|
||||
indexes.len() as u32,
|
||||
));
|
||||
|
||||
let ExportIndexSettings { filter, override_settings } = export_settings;
|
||||
let index = self.index(uid)?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let bearer = api_key.map(|api_key| format!("Bearer {api_key}"));
|
||||
|
||||
// First, check if the index already exists
|
||||
let url = format!("{base_url}/indexes/{uid}");
|
||||
let response = retry(&must_stop_processing, || {
|
||||
let mut request = agent.get(&url);
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
|
||||
request.send_bytes(Default::default()).map_err(into_backoff_error)
|
||||
});
|
||||
let index_exists = match response {
|
||||
Ok(response) => response.status() == 200,
|
||||
Err(Error::FromRemoteWhenExporting { code, .. }) if code == "index_not_found" => {
|
||||
false
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
|
||||
let primary_key = index
|
||||
.primary_key(&index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
|
||||
// Create the index
|
||||
if !index_exists {
|
||||
let url = format!("{base_url}/indexes");
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.post(&url);
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
let index_param = json!({ "uid": uid, "primaryKey": primary_key });
|
||||
request.send_json(&index_param).map_err(into_backoff_error)
|
||||
})?;
|
||||
}
|
||||
|
||||
// Patch the index primary key
|
||||
if index_exists && *override_settings {
|
||||
let url = format!("{base_url}/indexes/{uid}");
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.patch(&url);
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
let index_param = json!({ "primaryKey": primary_key });
|
||||
request.send_json(&index_param).map_err(into_backoff_error)
|
||||
})?;
|
||||
}
|
||||
|
||||
// Send the index settings
|
||||
if !index_exists || *override_settings {
|
||||
let mut settings =
|
||||
settings::settings(&index, &index_rtxn, SecretPolicy::RevealSecrets)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
// Remove the experimental chat setting if not enabled
|
||||
if self.features().check_chat_completions("exporting chat settings").is_err() {
|
||||
settings.chat = Setting::NotSet;
|
||||
}
|
||||
// Retry logic for sending settings
|
||||
let url = format!("{base_url}/indexes/{uid}/settings");
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.patch(&url);
|
||||
if let Some(bearer) = bearer.as_ref() {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
request.send_json(settings.clone()).map_err(into_backoff_error)
|
||||
})?;
|
||||
}
|
||||
|
||||
let filter = filter
|
||||
.as_ref()
|
||||
.map(Filter::from_json)
|
||||
.transpose()
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
|
||||
.flatten();
|
||||
|
||||
let filter_universe = filter
|
||||
.map(|f| f.evaluate(&index_rtxn, &index))
|
||||
.transpose()
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
let whole_universe = index
|
||||
.documents_ids(&index_rtxn)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
let universe = filter_universe.unwrap_or(whole_universe);
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&index_rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
// We don't need to keep this one alive as we will
|
||||
// spawn many threads to process the documents
|
||||
drop(index_rtxn);
|
||||
|
||||
let total_documents = universe.len() as u32;
|
||||
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
|
||||
progress.update_progress(progress_step);
|
||||
|
||||
output.insert(
|
||||
IndexUidPattern::new_unchecked(uid.clone()),
|
||||
DetailsExportIndexSettings {
|
||||
settings: (*export_settings).clone(),
|
||||
matched_documents: Some(total_documents as u64),
|
||||
},
|
||||
);
|
||||
|
||||
let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(20 * 1024 * 1024); // defaults to 20 MiB
|
||||
let documents_url = format!("{base_url}/indexes/{uid}/documents");
|
||||
|
||||
let results = request_threads()
|
||||
.broadcast(|ctx| {
|
||||
let index_rtxn = index
|
||||
.read_txn()
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let mut compressed_buffer = Vec::new();
|
||||
for (i, docid) in universe.iter().enumerate() {
|
||||
if i % ctx.num_threads() != ctx.index() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let document = index
|
||||
.document(&index_rtxn, docid)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
let mut document = obkv_to_json(&all_fields, &fields_ids_map, document)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
// TODO definitely factorize this code
|
||||
'inject_vectors: {
|
||||
let embeddings = index
|
||||
.embeddings(&index_rtxn, docid)
|
||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||
|
||||
if embeddings.is_empty() {
|
||||
break 'inject_vectors;
|
||||
}
|
||||
|
||||
let vectors = document
|
||||
.entry(RESERVED_VECTORS_FIELD_NAME)
|
||||
.or_insert(serde_json::Value::Object(Default::default()));
|
||||
|
||||
let serde_json::Value::Object(vectors) = vectors else {
|
||||
return Err(Error::from_milli(
|
||||
milli::Error::UserError(
|
||||
milli::UserError::InvalidVectorsMapType {
|
||||
document_id: {
|
||||
if let Ok(Some(Ok(index))) = index
|
||||
.external_id_of(
|
||||
&index_rtxn,
|
||||
std::iter::once(docid),
|
||||
)
|
||||
.map(|it| it.into_iter().next())
|
||||
{
|
||||
index
|
||||
} else {
|
||||
format!("internal docid={docid}")
|
||||
}
|
||||
},
|
||||
value: vectors.clone(),
|
||||
},
|
||||
),
|
||||
Some(uid.to_string()),
|
||||
));
|
||||
};
|
||||
|
||||
for (
|
||||
embedder_name,
|
||||
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||
) in embeddings
|
||||
{
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(
|
||||
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
|
||||
),
|
||||
regenerate: regenerate &&
|
||||
// Meilisearch does not handle well dumps with fragments, because as the fragments
|
||||
// are marked as user-provided,
|
||||
// all embeddings would be regenerated on any settings change or document update.
|
||||
// To prevent this, we mark embeddings has non regenerate in this case.
|
||||
!has_fragments,
|
||||
};
|
||||
vectors.insert(
|
||||
embedder_name,
|
||||
serde_json::to_value(embeddings).unwrap(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
tmp_buffer.clear();
|
||||
serde_json::to_writer(&mut tmp_buffer, &document)
|
||||
.map_err(milli::InternalError::from)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?;
|
||||
|
||||
// Make sure we put at least one document in the buffer even
|
||||
// though we might go above the buffer limit before sending
|
||||
if !buffer.is_empty() && buffer.len() + tmp_buffer.len() > limit {
|
||||
// We compress the documents before sending them
|
||||
let mut encoder =
|
||||
GzEncoder::new(&mut compressed_buffer, Compression::default());
|
||||
encoder
|
||||
.write_all(&buffer)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
|
||||
encoder
|
||||
.finish()
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(uid.clone())))?;
|
||||
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.post(&documents_url);
|
||||
request = request.set("Content-Type", "application/x-ndjson");
|
||||
request = request.set("Content-Encoding", "gzip");
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
request.send_bytes(&compressed_buffer).map_err(into_backoff_error)
|
||||
})?;
|
||||
buffer.clear();
|
||||
compressed_buffer.clear();
|
||||
}
|
||||
buffer.extend_from_slice(&tmp_buffer);
|
||||
|
||||
if i > 0 && i % 100 == 0 {
|
||||
step.fetch_add(100, atomic::Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
retry(&must_stop_processing, || {
|
||||
let mut request = agent.post(&documents_url);
|
||||
request = request.set("Content-Type", "application/x-ndjson");
|
||||
if let Some(bearer) = &bearer {
|
||||
request = request.set("Authorization", bearer);
|
||||
}
|
||||
request.send_bytes(&buffer).map_err(into_backoff_error)
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.map_err(|e| {
|
||||
Error::from_milli(
|
||||
milli::Error::InternalError(InternalError::PanicInThreadPool(e)),
|
||||
Some(uid.to_string()),
|
||||
)
|
||||
})?;
|
||||
for result in results {
|
||||
result?;
|
||||
}
|
||||
|
||||
step.store(total_documents, atomic::Ordering::Relaxed);
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
|
||||
fn retry<F>(must_stop_processing: &MustStopProcessing, send_request: F) -> Result<ureq::Response>
|
||||
where
|
||||
F: Fn() -> Result<ureq::Response, backoff::Error<ureq::Error>>,
|
||||
{
|
||||
match backoff::retry(ExponentialBackoff::default(), || {
|
||||
if must_stop_processing.get() {
|
||||
return Err(backoff::Error::Permanent(ureq::Error::Status(
|
||||
u16::MAX,
|
||||
// 444: Connection Closed Without Response
|
||||
Response::new(444, "Abort", "Aborted task").unwrap(),
|
||||
)));
|
||||
}
|
||||
send_request()
|
||||
}) {
|
||||
Ok(response) => Ok(response),
|
||||
Err(backoff::Error::Permanent(e)) => Err(ureq_error_into_error(e)),
|
||||
Err(backoff::Error::Transient { err, retry_after: _ }) => Err(ureq_error_into_error(err)),
|
||||
}
|
||||
}
|
||||
|
||||
fn into_backoff_error(err: ureq::Error) -> backoff::Error<ureq::Error> {
|
||||
match err {
|
||||
// Those code status must trigger an automatic retry
|
||||
// <https://www.restapitutorial.com/advanced/responses/retries>
|
||||
ureq::Error::Status(408 | 429 | 500 | 502 | 503 | 504, _) => {
|
||||
backoff::Error::Transient { err, retry_after: None }
|
||||
}
|
||||
ureq::Error::Status(_, _) => backoff::Error::Permanent(err),
|
||||
ureq::Error::Transport(_) => backoff::Error::Transient { err, retry_after: None },
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a `ureq::Error` into an `Error`.
|
||||
fn ureq_error_into_error(error: ureq::Error) -> Error {
|
||||
#[derive(Deserialize)]
|
||||
struct MeiliError {
|
||||
message: String,
|
||||
code: String,
|
||||
r#type: String,
|
||||
link: String,
|
||||
}
|
||||
|
||||
match error {
|
||||
// This is a workaround to handle task abortion - the error propagation path
|
||||
// makes it difficult to cleanly surface the abortion at this level.
|
||||
ureq::Error::Status(u16::MAX, _) => Error::AbortedTask,
|
||||
ureq::Error::Status(_, response) => match response.into_json() {
|
||||
Ok(MeiliError { message, code, r#type, link }) => {
|
||||
Error::FromRemoteWhenExporting { message, code, r#type, link }
|
||||
}
|
||||
Err(e) => e.into(),
|
||||
},
|
||||
ureq::Error::Transport(transport) => io::Error::new(io::ErrorKind::Other, transport).into(),
|
||||
}
|
||||
}
|
||||
|
||||
enum ExportIndex {}
|
||||
@@ -1,10 +1,8 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use bumpalo::collections::CollectIn;
|
||||
use bumpalo::Bump;
|
||||
use meilisearch_types::heed::RwTxn;
|
||||
use meilisearch_types::milli::documents::PrimaryKey;
|
||||
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
|
||||
use meilisearch_types::milli::progress::Progress;
|
||||
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
|
||||
use meilisearch_types::milli::update::DocumentAdditionResult;
|
||||
use meilisearch_types::milli::{self, ChannelCongestion, Filter};
|
||||
@@ -26,7 +24,7 @@ impl IndexScheduler {
|
||||
/// The list of processed tasks.
|
||||
#[tracing::instrument(
|
||||
level = "trace",
|
||||
skip(self, index_wtxn, index, progress, embedder_stats),
|
||||
skip(self, index_wtxn, index, progress),
|
||||
target = "indexing::scheduler"
|
||||
)]
|
||||
pub(crate) fn apply_index_operation<'i>(
|
||||
@@ -35,7 +33,6 @@ impl IndexScheduler {
|
||||
index: &'i Index,
|
||||
operation: IndexOperation,
|
||||
progress: &Progress,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
|
||||
let indexer_alloc = Bump::new();
|
||||
let started_processing_at = std::time::Instant::now();
|
||||
@@ -89,9 +86,8 @@ impl IndexScheduler {
|
||||
let mut content_files_iter = content_files.iter();
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let embedders = index
|
||||
.embedding_configs()
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|e| Error::from_milli(e.into(), Some(index_uid.clone())))?;
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
for operation in operations {
|
||||
match operation {
|
||||
@@ -181,7 +177,6 @@ impl IndexScheduler {
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
progress,
|
||||
&embedder_stats,
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
|
||||
);
|
||||
@@ -275,9 +270,8 @@ impl IndexScheduler {
|
||||
})
|
||||
.unwrap()?;
|
||||
let embedders = index
|
||||
.embedding_configs()
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
|
||||
progress.update_progress(DocumentEditionProgress::Indexing);
|
||||
@@ -294,7 +288,6 @@ impl IndexScheduler {
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
progress,
|
||||
&embedder_stats,
|
||||
)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
);
|
||||
@@ -425,9 +418,8 @@ impl IndexScheduler {
|
||||
indexer.delete_documents_by_docids(to_delete);
|
||||
let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
|
||||
let embedders = index
|
||||
.embedding_configs()
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
|
||||
progress.update_progress(DocumentDeletionProgress::Indexing);
|
||||
@@ -444,7 +436,6 @@ impl IndexScheduler {
|
||||
embedders,
|
||||
&|| must_stop_processing.get(),
|
||||
progress,
|
||||
&embedder_stats,
|
||||
)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
|
||||
);
|
||||
@@ -477,11 +468,14 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
progress.update_progress(SettingsProgress::ApplyTheSettings);
|
||||
let congestion = builder
|
||||
.execute(&|| must_stop_processing.get(), progress, embedder_stats)
|
||||
builder
|
||||
.execute(
|
||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||
|| must_stop_processing.get(),
|
||||
)
|
||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||
|
||||
Ok((tasks, congestion))
|
||||
Ok((tasks, None))
|
||||
}
|
||||
IndexOperation::DocumentClearAndSetting {
|
||||
index_uid,
|
||||
@@ -497,7 +491,6 @@ impl IndexScheduler {
|
||||
tasks: cleared_tasks,
|
||||
},
|
||||
progress,
|
||||
embedder_stats.clone(),
|
||||
)?;
|
||||
|
||||
let (settings_tasks, _congestion) = self.apply_index_operation(
|
||||
@@ -505,7 +498,6 @@ impl IndexScheduler {
|
||||
index,
|
||||
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
|
||||
progress,
|
||||
embedder_stats,
|
||||
)?;
|
||||
|
||||
let mut tasks = settings_tasks;
|
||||
|
||||
@@ -7,73 +7,9 @@ use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::tasks::{Status, Task};
|
||||
use meilisearch_types::{compression, VERSION_FILE_NAME};
|
||||
|
||||
use crate::heed::EnvOpenOptions;
|
||||
use crate::processing::{AtomicUpdateFileStep, SnapshotCreationProgress};
|
||||
use crate::queue::TaskQueue;
|
||||
use crate::{Error, IndexScheduler, Result};
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// See [`EnvOpenOptions::open`].
|
||||
unsafe fn remove_tasks(
|
||||
tasks: &[Task],
|
||||
dst: &std::path::Path,
|
||||
index_base_map_size: usize,
|
||||
) -> Result<()> {
|
||||
let env_options = EnvOpenOptions::new();
|
||||
let mut env_options = env_options.read_txn_without_tls();
|
||||
let env = env_options.max_dbs(TaskQueue::nb_db()).map_size(index_base_map_size).open(dst)?;
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let task_queue = TaskQueue::new(&env, &mut wtxn)?;
|
||||
|
||||
// Destructuring to ensure the code below gets updated if a database gets added in the future.
|
||||
let TaskQueue {
|
||||
all_tasks,
|
||||
status,
|
||||
kind,
|
||||
index_tasks: _, // snapshot creation tasks are not index tasks
|
||||
canceled_by,
|
||||
enqueued_at,
|
||||
started_at,
|
||||
finished_at,
|
||||
} = task_queue;
|
||||
|
||||
for task in tasks {
|
||||
all_tasks.delete(&mut wtxn, &task.uid)?;
|
||||
|
||||
let mut tasks = status.get(&wtxn, &task.status)?.unwrap_or_default();
|
||||
tasks.remove(task.uid);
|
||||
status.put(&mut wtxn, &task.status, &tasks)?;
|
||||
|
||||
let mut tasks = kind.get(&wtxn, &task.kind.as_kind())?.unwrap_or_default();
|
||||
tasks.remove(task.uid);
|
||||
kind.put(&mut wtxn, &task.kind.as_kind(), &tasks)?;
|
||||
|
||||
canceled_by.delete(&mut wtxn, &task.uid)?;
|
||||
|
||||
let timestamp = task.enqueued_at.unix_timestamp_nanos();
|
||||
let mut tasks = enqueued_at.get(&wtxn, ×tamp)?.unwrap_or_default();
|
||||
tasks.remove(task.uid);
|
||||
enqueued_at.put(&mut wtxn, ×tamp, &tasks)?;
|
||||
|
||||
if let Some(task_started_at) = task.started_at {
|
||||
let timestamp = task_started_at.unix_timestamp_nanos();
|
||||
let mut tasks = started_at.get(&wtxn, ×tamp)?.unwrap_or_default();
|
||||
tasks.remove(task.uid);
|
||||
started_at.put(&mut wtxn, ×tamp, &tasks)?;
|
||||
}
|
||||
|
||||
if let Some(task_finished_at) = task.finished_at {
|
||||
let timestamp = task_finished_at.unix_timestamp_nanos();
|
||||
let mut tasks = finished_at.get(&wtxn, ×tamp)?.unwrap_or_default();
|
||||
tasks.remove(task.uid);
|
||||
finished_at.put(&mut wtxn, ×tamp, &tasks)?;
|
||||
}
|
||||
}
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
pub(super) fn process_snapshot(
|
||||
&self,
|
||||
@@ -112,26 +48,14 @@ impl IndexScheduler {
|
||||
};
|
||||
self.env.copy_to_path(dst.join("data.mdb"), compaction_option)?;
|
||||
|
||||
// 2.2 Remove the current snapshot tasks
|
||||
//
|
||||
// This is done to ensure that the tasks are not processed again when the snapshot is imported
|
||||
//
|
||||
// # Safety
|
||||
//
|
||||
// This is safe because we open the env file we just created in a temporary directory.
|
||||
// We are sure it's not being used by any other process nor thread.
|
||||
unsafe {
|
||||
remove_tasks(&tasks, &dst, self.index_mapper.index_base_map_size)?;
|
||||
}
|
||||
|
||||
// 2.3 Create a read transaction on the index-scheduler
|
||||
// 2.2 Create a read transaction on the index-scheduler
|
||||
let rtxn = self.env.read_txn()?;
|
||||
|
||||
// 2.4 Create the update files directory
|
||||
// 2.3 Create the update files directory
|
||||
let update_files_dir = temp_snapshot_dir.path().join("update_files");
|
||||
fs::create_dir_all(&update_files_dir)?;
|
||||
|
||||
// 2.5 Only copy the update files of the enqueued tasks
|
||||
// 2.4 Only copy the update files of the enqueued tasks
|
||||
progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles);
|
||||
let enqueued = self.queue.tasks.get_status(&rtxn, Status::Enqueued)?;
|
||||
let (atomic, update_file_progress) = AtomicUpdateFileStep::new(enqueued.len() as u32);
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test.rs
|
||||
expression: config.embedder_options
|
||||
---
|
||||
{
|
||||
"Rest": {
|
||||
"api_key": "My super secret",
|
||||
"distribution": null,
|
||||
"dimensions": 4,
|
||||
"url": "http://localhost:7777",
|
||||
"request": "{{text}}",
|
||||
"search_fragments": {},
|
||||
"indexing_fragments": {},
|
||||
"response": "{{embedding}}",
|
||||
"headers": {}
|
||||
}
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
expression: simple_hf_config.embedder_options
|
||||
---
|
||||
{
|
||||
"HuggingFace": {
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
"distribution": null,
|
||||
"pooling": "useModel"
|
||||
}
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
expression: doc
|
||||
---
|
||||
{
|
||||
"doggo": "Intel",
|
||||
"breed": "beagle",
|
||||
"_vectors": {
|
||||
"noise": [
|
||||
0.1,
|
||||
0.2,
|
||||
0.3
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
expression: doc
|
||||
---
|
||||
{
|
||||
"doggo": "kefir",
|
||||
"breed": "patou",
|
||||
"_vectors": {
|
||||
"noise": [
|
||||
0.1,
|
||||
0.2,
|
||||
0.3
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -1,17 +1,12 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
expression: fakerest_config.embedder_options
|
||||
expression: simple_hf_config.embedder_options
|
||||
---
|
||||
{
|
||||
"Rest": {
|
||||
"api_key": "My super secret",
|
||||
"HuggingFace": {
|
||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||
"distribution": null,
|
||||
"dimensions": 384,
|
||||
"url": "http://localhost:7777",
|
||||
"request": "{{text}}",
|
||||
"search_fragments": {},
|
||||
"indexing_fragments": {},
|
||||
"response": "{{embedding}}",
|
||||
"headers": {}
|
||||
"pooling": "useModel"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, indexing_fragments: NotSet, search_fragments: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 17, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
|
||||
[timestamp] [4,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.17.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.2"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 17, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 17, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 17, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
@@ -37,7 +37,7 @@ catto [1,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.17.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.2"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 17, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
----------------------------------------------------------------------
|
||||
@@ -40,7 +40,7 @@ doggo [2,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.17.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.2"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 17, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 15, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -43,7 +43,7 @@ doggo [2,3,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.17.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.15.2"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -3,11 +3,11 @@ use std::collections::BTreeMap;
|
||||
use big_s::S;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch_auth::AuthFilter;
|
||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod::*;
|
||||
use meilisearch_types::milli::{self};
|
||||
use meilisearch_types::settings::SettingEmbeddingSettings;
|
||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
|
||||
use milli::vector::db::IndexEmbeddingConfig;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::insta_snapshot::snapshot_index_scheduler;
|
||||
@@ -690,20 +690,11 @@ fn test_settings_update() {
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let embedders = index.embedding_configs();
|
||||
let configs = embedders.embedding_configs(&rtxn).unwrap();
|
||||
let IndexEmbeddingConfig { name, config, fragments } = configs.first().unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
let IndexEmbeddingConfig { name, config, user_provided } = configs.first().unwrap();
|
||||
insta::assert_snapshot!(name, @"default");
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_json_snapshot!(config.embedder_options);
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -741,7 +732,6 @@ fn basic_get_stats() {
|
||||
"documentDeletion": 0,
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@@ -775,7 +765,6 @@ fn basic_get_stats() {
|
||||
"documentDeletion": 0,
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@@ -816,7 +805,6 @@ fn basic_get_stats() {
|
||||
"documentDeletion": 0,
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
@@ -858,7 +846,6 @@ fn basic_get_stats() {
|
||||
"documentDeletion": 0,
|
||||
"documentEdition": 0,
|
||||
"dumpCreation": 0,
|
||||
"export": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
|
||||
@@ -3,15 +3,13 @@ use std::collections::BTreeMap;
|
||||
use big_s::S;
|
||||
use insta::assert_json_snapshot;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
|
||||
use meilisearch_types::milli::vector::SearchQuery;
|
||||
use meilisearch_types::milli::{self, obkv_to_json};
|
||||
use meilisearch_types::settings::{SettingEmbeddingSettings, Settings, Unchecked};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use milli::update::IndexDocumentsMethod::*;
|
||||
use milli::vector::db::IndexEmbeddingConfig;
|
||||
|
||||
use crate::insta_snapshot::snapshot_index_scheduler;
|
||||
use crate::test_utils::read_json;
|
||||
@@ -87,51 +85,28 @@ fn import_vectors() {
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let embedders = index.embedding_configs();
|
||||
let configs = embedders.embedding_configs(&rtxn).unwrap();
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
// for consistency with the below
|
||||
#[allow(clippy::get_first)]
|
||||
let IndexEmbeddingConfig { name, config: fakerest_config, fragments } =
|
||||
let IndexEmbeddingConfig { name, config: fakerest_config, user_provided } =
|
||||
configs.get(0).unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_snapshot!(name, @"A_fakerest");
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_json_snapshot!(fakerest_config.embedder_options);
|
||||
let fakerest_name = name.clone();
|
||||
|
||||
let IndexEmbeddingConfig { name, config: simple_hf_config, fragments } =
|
||||
let IndexEmbeddingConfig { name, config: simple_hf_config, user_provided } =
|
||||
configs.get(1).unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"1");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_snapshot!(name, @"B_small_hf");
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
|
||||
let simple_hf_name = name.clone();
|
||||
|
||||
let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
|
||||
let hf_runtime = configs.get(&simple_hf_name).unwrap();
|
||||
let hf_embedder = &hf_runtime.embedder;
|
||||
let beagle_embed = hf_embedder
|
||||
.embed_search(SearchQuery::Text("Intel the beagle best doggo"), None)
|
||||
.unwrap();
|
||||
let lab_embed =
|
||||
hf_embedder.embed_search(SearchQuery::Text("Max the lab best doggo"), None).unwrap();
|
||||
let patou_embed = hf_embedder
|
||||
.embed_search(SearchQuery::Text("kefir the patou best doggo"), None)
|
||||
.unwrap();
|
||||
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
|
||||
let beagle_embed = hf_embedder.embed_search("Intel the beagle best doggo", None).unwrap();
|
||||
let lab_embed = hf_embedder.embed_search("Max the lab best doggo", None).unwrap();
|
||||
let patou_embed = hf_embedder.embed_search("kefir the patou best doggo", None).unwrap();
|
||||
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
|
||||
};
|
||||
|
||||
@@ -191,38 +166,22 @@ fn import_vectors() {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
// Ensure the document have been inserted into the relevant bitamp
|
||||
let embedders = index.embedding_configs();
|
||||
let configs = embedders.embedding_configs(&rtxn).unwrap();
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
// for consistency with the below
|
||||
#[allow(clippy::get_first)]
|
||||
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(0).unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[0]>");
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
|
||||
configs.get(0).unwrap();
|
||||
insta::assert_snapshot!(name, @"A_fakerest");
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
|
||||
|
||||
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(1).unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"1");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
|
||||
insta::assert_snapshot!(name, @"B_small_hf");
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name].embeddings[0] == lab_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&fakerest_name].embeddings[0] == beagle_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
||||
|
||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
@@ -280,41 +239,25 @@ fn import_vectors() {
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let embedders = index.embedding_configs();
|
||||
// Ensure the document have been inserted into the relevant bitamp
|
||||
let configs = embedders.embedding_configs(&rtxn).unwrap();
|
||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||
// for consistency with the below
|
||||
#[allow(clippy::get_first)]
|
||||
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(0).unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[0]>");
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
|
||||
configs.get(0).unwrap();
|
||||
insta::assert_snapshot!(name, @"A_fakerest");
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
|
||||
|
||||
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(1).unwrap();
|
||||
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"1");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
|
||||
let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
|
||||
insta::assert_snapshot!(name, @"B_small_hf");
|
||||
insta::assert_debug_snapshot!(fragments, @r###"
|
||||
FragmentConfigs(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||
|
||||
// automatically changed to patou because set to regenerate
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name].embeddings[0] == patou_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true");
|
||||
// remained beagle
|
||||
assert_json_snapshot!(embeddings[&fakerest_name].embeddings[0] == beagle_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
|
||||
|
||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
@@ -456,8 +399,8 @@ fn import_vectors_first_and_embedder_later() {
|
||||
.collect::<Vec<_>>();
|
||||
// the all the vectors linked to the new specified embedder have been removed
|
||||
// Only the unknown embedders stays in the document DB
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1,2,3]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4,5]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###);
|
||||
let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###);
|
||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||
// even though we specified the vector for the ID 3, it shouldn't be marked
|
||||
// as user provided since we explicitely marked it as NOT user provided.
|
||||
snapshot!(format!("{conf:#?}"), @r###"
|
||||
@@ -483,28 +426,19 @@ fn import_vectors_first_and_embedder_later() {
|
||||
},
|
||||
quantized: None,
|
||||
},
|
||||
fragments: FragmentConfigs(
|
||||
[],
|
||||
),
|
||||
user_provided: RoaringBitmap<[1, 2]>,
|
||||
},
|
||||
]
|
||||
"###);
|
||||
let info =
|
||||
index.embedding_configs().embedder_info(&rtxn, "my_doggo_embedder").unwrap().unwrap();
|
||||
insta::assert_snapshot!(info.embedder_id, @"0");
|
||||
|
||||
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[1, 2, 3]>");
|
||||
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[1, 2]>");
|
||||
|
||||
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let EmbeddingsWithMetadata { embeddings, .. } = &embeddings["my_doggo_embedder"];
|
||||
assert!(!embeddings.is_empty(), "{embeddings:?}");
|
||||
let embedding = &embeddings["my_doggo_embedder"];
|
||||
assert!(!embedding.is_empty(), "{embedding:?}");
|
||||
|
||||
// the document with the id 3 should keep its original embedding
|
||||
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let EmbeddingsWithMetadata { embeddings, .. } = &embeddings["my_doggo_embedder"];
|
||||
let embeddings = &embeddings["my_doggo_embedder"];
|
||||
|
||||
snapshot!(embeddings.len(), @"1");
|
||||
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
|
||||
@@ -559,7 +493,7 @@ fn import_vectors_first_and_embedder_later() {
|
||||
"###);
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["my_doggo_embedder"];
|
||||
let embedding = &embeddings["my_doggo_embedder"];
|
||||
|
||||
assert!(!embedding.is_empty());
|
||||
assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]);
|
||||
@@ -567,7 +501,7 @@ fn import_vectors_first_and_embedder_later() {
|
||||
// the document with the id 4 should generate an embedding
|
||||
let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["my_doggo_embedder"];
|
||||
let embedding = &embeddings["my_doggo_embedder"];
|
||||
|
||||
assert!(!embedding.is_empty());
|
||||
}
|
||||
@@ -669,35 +603,33 @@ fn delete_document_containing_vector() {
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###);
|
||||
let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap();
|
||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||
snapshot!(format!("{conf:#?}"), @r###"
|
||||
[
|
||||
IndexEmbeddingConfig {
|
||||
name: "manual",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: UserProvided(
|
||||
EmbedderOptions {
|
||||
dimensions: 3,
|
||||
distribution: None,
|
||||
},
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||
max_bytes: Some(
|
||||
400,
|
||||
[
|
||||
IndexEmbeddingConfig {
|
||||
name: "manual",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: UserProvided(
|
||||
EmbedderOptions {
|
||||
dimensions: 3,
|
||||
distribution: None,
|
||||
},
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||
max_bytes: Some(
|
||||
400,
|
||||
),
|
||||
},
|
||||
quantized: None,
|
||||
},
|
||||
quantized: None,
|
||||
user_provided: RoaringBitmap<[0]>,
|
||||
},
|
||||
fragments: FragmentConfigs(
|
||||
[],
|
||||
),
|
||||
},
|
||||
]
|
||||
"###);
|
||||
]
|
||||
"###);
|
||||
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["manual"];
|
||||
let embedding = &embeddings["manual"];
|
||||
assert!(!embedding.is_empty(), "{embedding:?}");
|
||||
|
||||
index_scheduler
|
||||
@@ -715,32 +647,30 @@ fn delete_document_containing_vector() {
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @"[]");
|
||||
let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap();
|
||||
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||
snapshot!(format!("{conf:#?}"), @r###"
|
||||
[
|
||||
IndexEmbeddingConfig {
|
||||
name: "manual",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: UserProvided(
|
||||
EmbedderOptions {
|
||||
dimensions: 3,
|
||||
distribution: None,
|
||||
},
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||
max_bytes: Some(
|
||||
400,
|
||||
[
|
||||
IndexEmbeddingConfig {
|
||||
name: "manual",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: UserProvided(
|
||||
EmbedderOptions {
|
||||
dimensions: 3,
|
||||
distribution: None,
|
||||
},
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||
max_bytes: Some(
|
||||
400,
|
||||
),
|
||||
},
|
||||
quantized: None,
|
||||
},
|
||||
quantized: None,
|
||||
user_provided: RoaringBitmap<[]>,
|
||||
},
|
||||
fragments: FragmentConfigs(
|
||||
[],
|
||||
),
|
||||
},
|
||||
]
|
||||
"###);
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -870,7 +800,7 @@ fn delete_embedder_with_user_provided_vectors() {
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"regenerate":false,"embeddings":[[0.0,0.0,0.0]]}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"regenerate":false,"embeddings":[[1.0,1.0,1.0]]}}}]"###);
|
||||
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir","_vectors":{"manual":{"embeddings":[[0.0,0.0,0.0]],"regenerate":false}}},{"id":1,"doggo":"intel","_vectors":{"manual":{"embeddings":[[1.0,1.0,1.0]],"regenerate":false}}}]"###);
|
||||
}
|
||||
|
||||
{
|
||||
@@ -905,6 +835,6 @@ fn delete_embedder_with_user_provided_vectors() {
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// FIXME: redaction
|
||||
snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"regenerate\":false,\"embeddings\":[[0.0,0.0,0.0]]},\"my_doggo_embedder\":{\"regenerate\":false,\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]]}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"regenerate\":false,\"embeddings\":[[1.0,1.0,1.0]]}}}]""###);
|
||||
snapshot!(json_string!(serde_json::to_string(&documents).unwrap(), { "[]._vectors.doggo_embedder.embeddings" => "[vector]" }), @r###""[{\"id\":0,\"doggo\":\"kefir\",\"_vectors\":{\"manual\":{\"embeddings\":[[0.0,0.0,0.0]],\"regenerate\":false},\"my_doggo_embedder\":{\"embeddings\":[[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]],\"regenerate\":false}}},{\"id\":1,\"doggo\":\"intel\",\"_vectors\":{\"manual\":{\"embeddings\":[[1.0,1.0,1.0]],\"regenerate\":false}}}]""###);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,7 +37,6 @@ pub(crate) enum FailureLocation {
|
||||
InsideCreateBatch,
|
||||
InsideProcessBatch,
|
||||
PanicInsideProcessBatch,
|
||||
ProcessExport,
|
||||
ProcessUpgrade,
|
||||
AcquiringWtxn,
|
||||
UpdatingTaskAfterProcessBatchSuccess { task_uid: u32 },
|
||||
@@ -98,8 +97,8 @@ impl IndexScheduler {
|
||||
indexes_path: tempdir.path().join("indexes"),
|
||||
snapshots_path: tempdir.path().join("snapshots"),
|
||||
dumps_path: tempdir.path().join("dumps"),
|
||||
cli_webhook_url: None,
|
||||
cli_webhook_authorization: None,
|
||||
webhook_url: None,
|
||||
webhook_authorization_header: None,
|
||||
task_db_size: 1000 * 1000 * 10, // 10 MB, we don't use MiB on purpose.
|
||||
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
|
||||
enable_mdb_writemap: false,
|
||||
|
||||
@@ -39,7 +39,6 @@ pub fn upgrade_index_scheduler(
|
||||
(1, 13, _) => 0,
|
||||
(1, 14, _) => 0,
|
||||
(1, 15, _) => 0,
|
||||
(1, 16, _) => 0,
|
||||
(major, minor, patch) => {
|
||||
if major > current_major
|
||||
|| (major == current_major && minor > current_minor)
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
//! Utility functions on the DBs. Mainly getter and setters.
|
||||
|
||||
use crate::milli::progress::EmbedderStats;
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
use std::ops::Bound;
|
||||
use std::sync::Arc;
|
||||
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats};
|
||||
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
|
||||
@@ -29,7 +27,6 @@ pub struct ProcessingBatch {
|
||||
pub uid: BatchId,
|
||||
pub details: DetailsView,
|
||||
pub stats: BatchStats,
|
||||
pub embedder_stats: Arc<EmbedderStats>,
|
||||
|
||||
pub statuses: HashSet<Status>,
|
||||
pub kinds: HashSet<Kind>,
|
||||
@@ -51,7 +48,6 @@ impl ProcessingBatch {
|
||||
uid,
|
||||
details: DetailsView::default(),
|
||||
stats: BatchStats::default(),
|
||||
embedder_stats: Default::default(),
|
||||
|
||||
statuses,
|
||||
kinds: HashSet::default(),
|
||||
@@ -150,7 +146,6 @@ impl ProcessingBatch {
|
||||
progress: None,
|
||||
details: self.details.clone(),
|
||||
stats: self.stats.clone(),
|
||||
embedder_stats: self.embedder_stats.as_ref().into(),
|
||||
started_at: self.started_at,
|
||||
finished_at: self.finished_at,
|
||||
enqueued_at: self.enqueued_at,
|
||||
@@ -278,7 +273,6 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
|
||||
K::TaskCancelation { .. }
|
||||
| K::TaskDeletion { .. }
|
||||
| K::DumpCreation { .. }
|
||||
| K::Export { .. }
|
||||
| K::UpgradeDatabase { .. }
|
||||
| K::SnapshotCreation => (),
|
||||
};
|
||||
@@ -606,9 +600,6 @@ impl crate::IndexScheduler {
|
||||
Details::Dump { dump_uid: _ } => {
|
||||
assert_eq!(kind.as_kind(), Kind::DumpCreation);
|
||||
}
|
||||
Details::Export { url: _, api_key: _, payload_size: _, indexes: _ } => {
|
||||
assert_eq!(kind.as_kind(), Kind::Export);
|
||||
}
|
||||
Details::UpgradeDatabase { from: _, to: _ } => {
|
||||
assert_eq!(kind.as_kind(), Kind::UpgradeDatabase);
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ license.workspace = true
|
||||
serde_json = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.6.0"
|
||||
criterion = "0.5.1"
|
||||
|
||||
[[bench]]
|
||||
name = "depth"
|
||||
|
||||
@@ -14,6 +14,6 @@ license.workspace = true
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = { version = "=1.39.0", features = ["json", "redactions"] }
|
||||
md5 = "0.7.0"
|
||||
once_cell = "1.21"
|
||||
once_cell = "1.20"
|
||||
regex-lite = "0.1.6"
|
||||
uuid = { version = "1.17.0", features = ["v4"] }
|
||||
|
||||
@@ -17,10 +17,10 @@ hmac = "0.12.1"
|
||||
maplit = "1.0.2"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
rand = "0.8.5"
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
sha2 = "0.10.9"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
sha2 = "0.10.8"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
@@ -158,7 +158,7 @@ impl AuthController {
|
||||
self.store.delete_all_keys()
|
||||
}
|
||||
|
||||
/// Insert a key directly into the store.
|
||||
/// Delete all the keys in the DB.
|
||||
pub fn raw_insert_key(&mut self, key: Key) -> Result<()> {
|
||||
self.store.put_api_key(key)?;
|
||||
Ok(())
|
||||
@@ -351,7 +351,6 @@ pub struct IndexSearchRules {
|
||||
|
||||
fn generate_default_keys(store: &HeedAuthStore) -> Result<()> {
|
||||
store.put_api_key(Key::default_chat())?;
|
||||
store.put_api_key(Key::default_read_only_admin())?;
|
||||
store.put_api_key(Key::default_admin())?;
|
||||
store.put_api_key(Key::default_search())?;
|
||||
|
||||
|
||||
@@ -88,13 +88,7 @@ impl HeedAuthStore {
|
||||
let mut actions = HashSet::new();
|
||||
for action in &key.actions {
|
||||
match action {
|
||||
Action::All => {
|
||||
actions.extend(enum_iterator::all::<Action>());
|
||||
actions.remove(&Action::AllGet);
|
||||
}
|
||||
Action::AllGet => {
|
||||
actions.extend(enum_iterator::all::<Action>().filter(|a| a.is_read()))
|
||||
}
|
||||
Action::All => actions.extend(enum_iterator::all::<Action>()),
|
||||
Action::DocumentsAll => {
|
||||
actions.extend(
|
||||
[Action::DocumentsGet, Action::DocumentsDelete, Action::DocumentsAdd]
|
||||
@@ -137,14 +131,6 @@ impl HeedAuthStore {
|
||||
Action::ChatsSettingsAll => {
|
||||
actions.extend([Action::ChatsSettingsGet, Action::ChatsSettingsUpdate]);
|
||||
}
|
||||
Action::WebhooksAll => {
|
||||
actions.extend([
|
||||
Action::WebhooksGet,
|
||||
Action::WebhooksUpdate,
|
||||
Action::WebhooksDelete,
|
||||
Action::WebhooksCreate,
|
||||
]);
|
||||
}
|
||||
other => {
|
||||
actions.insert(*other);
|
||||
}
|
||||
|
||||
@@ -11,38 +11,37 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
actix-web = { version = "4.11.0", default-features = false }
|
||||
anyhow = "1.0.98"
|
||||
bumpalo = "3.18.1"
|
||||
actix-web = { version = "4.9.0", default-features = false }
|
||||
anyhow = "1.0.95"
|
||||
bumpalo = "3.16.0"
|
||||
bumparaw-collections = "0.1.4"
|
||||
byte-unit = { version = "5.1.6", features = ["serde"] }
|
||||
convert_case = "0.8.0"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.3.1"
|
||||
deserr = { version = "0.6.3", features = ["actix-web"] }
|
||||
either = { version = "1.15.0", features = ["serde"] }
|
||||
either = { version = "1.13.0", features = ["serde"] }
|
||||
enum-iterator = "2.1.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.1.2"
|
||||
flate2 = "1.0.35"
|
||||
fst = "0.4.7"
|
||||
memmap2 = "0.9.7"
|
||||
memmap2 = "0.9.5"
|
||||
milli = { path = "../milli" }
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
rustc-hash = "2.1.1"
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
roaring = { version = "0.10.10", features = ["serde"] }
|
||||
rustc-hash = "2.1.0"
|
||||
serde = { version = "1.0.217", features = ["derive"] }
|
||||
serde-cs = "0.2.4"
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tar = "0.4.44"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
serde_json = { version = "1.0.135", features = ["preserve_order"] }
|
||||
tar = "0.4.43"
|
||||
tempfile = "3.15.0"
|
||||
thiserror = "2.0.9"
|
||||
time = { version = "0.3.37", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = "1.45"
|
||||
utoipa = { version = "5.4.0", features = ["macros"] }
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
tokio = "1.43"
|
||||
utoipa = { version = "5.3.1", features = ["macros"] }
|
||||
uuid = { version = "1.11.0", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
# fixed version due to format breakages in v1.40
|
||||
|
||||
@@ -3,7 +3,7 @@ use serde::Serialize;
|
||||
use time::{Duration, OffsetDateTime};
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use crate::batches::{Batch, BatchId, BatchStats, EmbedderStatsView};
|
||||
use crate::batches::{Batch, BatchId, BatchStats};
|
||||
use crate::task_view::DetailsView;
|
||||
use crate::tasks::serialize_duration;
|
||||
|
||||
@@ -14,7 +14,7 @@ pub struct BatchView {
|
||||
pub uid: BatchId,
|
||||
pub progress: Option<ProgressView>,
|
||||
pub details: DetailsView,
|
||||
pub stats: BatchStatsView,
|
||||
pub stats: BatchStats,
|
||||
#[serde(serialize_with = "serialize_duration", default)]
|
||||
pub duration: Option<Duration>,
|
||||
#[serde(with = "time::serde::rfc3339", default)]
|
||||
@@ -25,26 +25,13 @@ pub struct BatchView {
|
||||
pub batch_strategy: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct BatchStatsView {
|
||||
#[serde(flatten)]
|
||||
pub stats: BatchStats,
|
||||
#[serde(skip_serializing_if = "EmbedderStatsView::skip_serializing", default)]
|
||||
pub embedder_requests: EmbedderStatsView,
|
||||
}
|
||||
|
||||
impl BatchView {
|
||||
pub fn from_batch(batch: &Batch) -> Self {
|
||||
Self {
|
||||
uid: batch.uid,
|
||||
progress: batch.progress.clone(),
|
||||
details: batch.details.clone(),
|
||||
stats: BatchStatsView {
|
||||
stats: batch.stats.clone(),
|
||||
embedder_requests: batch.embedder_stats.clone(),
|
||||
},
|
||||
stats: batch.stats.clone(),
|
||||
duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at),
|
||||
started_at: batch.started_at,
|
||||
finished_at: batch.finished_at,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use milli::progress::{EmbedderStats, ProgressView};
|
||||
use milli::progress::ProgressView;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
use utoipa::ToSchema;
|
||||
@@ -19,8 +19,6 @@ pub struct Batch {
|
||||
pub progress: Option<ProgressView>,
|
||||
pub details: DetailsView,
|
||||
pub stats: BatchStats,
|
||||
#[serde(skip_serializing_if = "EmbedderStatsView::skip_serializing", default)]
|
||||
pub embedder_stats: EmbedderStatsView,
|
||||
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub started_at: OffsetDateTime,
|
||||
@@ -45,7 +43,6 @@ impl PartialEq for Batch {
|
||||
progress,
|
||||
details,
|
||||
stats,
|
||||
embedder_stats,
|
||||
started_at,
|
||||
finished_at,
|
||||
enqueued_at,
|
||||
@@ -56,7 +53,6 @@ impl PartialEq for Batch {
|
||||
&& progress.is_none() == other.progress.is_none()
|
||||
&& details == &other.details
|
||||
&& stats == &other.stats
|
||||
&& embedder_stats == &other.embedder_stats
|
||||
&& started_at == &other.started_at
|
||||
&& finished_at == &other.finished_at
|
||||
&& enqueued_at == &other.enqueued_at
|
||||
@@ -87,30 +83,3 @@ pub struct BatchStats {
|
||||
#[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
|
||||
pub internal_database_sizes: serde_json::Map<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct EmbedderStatsView {
|
||||
pub total: usize,
|
||||
pub failed: usize,
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
pub last_error: Option<String>,
|
||||
}
|
||||
|
||||
impl From<&EmbedderStats> for EmbedderStatsView {
|
||||
fn from(stats: &EmbedderStats) -> Self {
|
||||
let errors = stats.errors.read().unwrap_or_else(|p| p.into_inner());
|
||||
Self {
|
||||
total: stats.total_count.load(std::sync::atomic::Ordering::Relaxed),
|
||||
failed: errors.1 as usize,
|
||||
last_error: errors.0.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl EmbedderStatsView {
|
||||
pub fn skip_serializing(&self) -> bool {
|
||||
self.total == 0 && self.failed == 0 && self.last_error.is_none()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -237,7 +237,6 @@ InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQU
|
||||
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||
MissingDocumentEditionFunction , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -302,7 +301,6 @@ InvalidFacetSearchQuery , InvalidRequest , BAD_REQU
|
||||
InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
|
||||
FacetSearchDisabled , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchVector , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchMedia , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSimilarShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -310,7 +308,6 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQU
|
||||
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchMediaAndVector , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -392,13 +389,6 @@ InvalidDocumentEditionContext , InvalidRequest , BAD_REQU
|
||||
InvalidDocumentEditionFunctionFilter , InvalidRequest , BAD_REQUEST ;
|
||||
EditDocumentsByFunctionError , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsIndexChat , InvalidRequest , BAD_REQUEST ;
|
||||
// Export
|
||||
InvalidExportUrl , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidExportApiKey , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidExportPayloadSize , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidExportIndexesPatterns , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidExportIndexFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidExportIndexOverrideSettings , InvalidRequest , BAD_REQUEST ;
|
||||
// Experimental features - Chat Completions
|
||||
UnimplementedExternalFunctionCalling , InvalidRequest , NOT_IMPLEMENTED ;
|
||||
UnimplementedNonStreamingChatCompletions , InvalidRequest , NOT_IMPLEMENTED ;
|
||||
@@ -416,18 +406,8 @@ InvalidChatCompletionPrompts , InvalidRequest , BAD_REQU
|
||||
InvalidChatCompletionSystemPrompt , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidChatCompletionSearchDescriptionPrompt , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidChatCompletionSearchQueryParamPrompt , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidChatCompletionSearchFilterParamPrompt , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidChatCompletionSearchIndexUidParamPrompt , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidChatCompletionPreQueryPrompt , InvalidRequest , BAD_REQUEST ;
|
||||
// Webhooks
|
||||
InvalidWebhooks , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidWebhookUrl , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidWebhookHeaders , InvalidRequest , BAD_REQUEST ;
|
||||
ImmutableWebhook , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidWebhookUuid , InvalidRequest , BAD_REQUEST ;
|
||||
WebhookNotFound , InvalidRequest , NOT_FOUND ;
|
||||
ImmutableWebhookUuid , InvalidRequest , BAD_REQUEST ;
|
||||
ImmutableWebhookIsEditable , InvalidRequest , BAD_REQUEST
|
||||
InvalidChatCompletionPreQueryPrompt , InvalidRequest , BAD_REQUEST
|
||||
}
|
||||
|
||||
impl ErrorCode for JoinError {
|
||||
@@ -477,7 +457,6 @@ impl ErrorCode for milli::Error {
|
||||
| UserError::MissingSourceForNested { .. }
|
||||
| UserError::InvalidSettingsEmbedder { .. } => Code::InvalidSettingsEmbedders,
|
||||
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
|
||||
UserError::TooManyFragments(_) => Code::InvalidSettingsEmbedders,
|
||||
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
|
||||
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
|
||||
UserError::MultiplePrimaryKeyCandidatesFound { .. } => {
|
||||
@@ -487,8 +466,7 @@ impl ErrorCode for milli::Error {
|
||||
UserError::InvalidDistinctAttribute { .. } => Code::InvalidSearchDistinct,
|
||||
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
|
||||
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
|
||||
UserError::InvalidSearchSortableAttribute { .. } => Code::InvalidSearchSort,
|
||||
UserError::InvalidDocumentSortableAttribute { .. } => Code::InvalidDocumentSort,
|
||||
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
|
||||
UserError::InvalidSearchableAttribute { .. } => {
|
||||
Code::InvalidSearchAttributesToSearchOn
|
||||
}
|
||||
@@ -504,8 +482,7 @@ impl ErrorCode for milli::Error {
|
||||
UserError::InvalidVectorsMapType { .. }
|
||||
| UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType,
|
||||
UserError::TooManyVectors(_, _) => Code::TooManyVectors,
|
||||
UserError::SortError { search: true, .. } => Code::InvalidSearchSort,
|
||||
UserError::SortError { search: false, .. } => Code::InvalidDocumentSort,
|
||||
UserError::SortError(_) => Code::InvalidSearchSort,
|
||||
UserError::InvalidMinTypoWordLenSetting(_, _) => {
|
||||
Code::InvalidSettingsTypoTolerance
|
||||
}
|
||||
|
||||
@@ -4,11 +4,10 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::{Code, ResponseError};
|
||||
|
||||
pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search. Meilisearch doesn't use the colon (:) syntax to filter but rather the equal (=) one. Separate filters from query and keep the q parameter empty if needed. Same for the filter parameter: keep it empty if need be. If you need to find documents that CONTAINS keywords simply put the keywords in the q parameter do no use a filter for this purpose. Whenever you get an error, read the error message and fix your error. ";
|
||||
pub const DEFAULT_CHAT_SYSTEM_PROMPT: &str = "You are a highly capable research assistant with access to powerful search tools. IMPORTANT INSTRUCTIONS:1. When answering questions, you MUST make multiple tool calls (at least 2-3) to gather comprehensive information.2. Use different search queries for each tool call - vary keywords, rephrase questions, and explore different semantic angles to ensure broad coverage.3. Always explicitly announce BEFORE making each tool call by saying: \"I'll search for [specific information] now.\"4. Combine information from ALL tool calls to provide complete, nuanced answers rather than relying on a single source.5. For complex topics, break down your research into multiple targeted queries rather than using a single generic search.";
|
||||
pub const DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT: &str =
|
||||
"Query: 'best story about Rust before 2018' with year: 2018, 2020, 2021\nlabel: analysis, golang, javascript\ntype: story, link\nvote: 300, 298, 278\n: {\"q\": \"\", \"filter\": \"category = Rust AND type = story AND year < 2018 AND vote > 100\"}\nQuery: 'A black or green car that can go fast with red brakes' with maxspeed_kmh: 200, 150, 130\ncolor: black, grey, red, green\nbrand: Toyota, Renault, Jeep, Ferrari\n: {\"q\": \"red brakes\", \"filter\": \"maxspeed_kmh > 150 AND color IN ['black', green]\"}\nQuery: 'Superman movie released in 2018 or after' with year: 2018, 2020, 2021\ngenres: Drama, Comedy, Adventure, Fiction\n: {\"q\":\"Superman\",\"filter\":\"genres IN [Adventure, Fiction] AND year >= 2018\"}";
|
||||
"Search the database for relevant JSON documents using an optional query.";
|
||||
pub const DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT: &str = "The search query string used to find relevant documents in the index. This should contain keywords or phrases that best represent what the user is looking for. More specific queries will yield more precise results.";
|
||||
pub const DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT: &str = "The search filter string used to find relevant documents in the index. It supports parentheses, `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox`. Here is an example: \"price > 100 AND category = 'electronics'\". The following is a list of fields that can be filtered on: ";
|
||||
pub const DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT: &str = "The name of the index to search within. An index is a collection of documents organized for search. Selecting the right index ensures the most relevant results for the user query.";
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||||
@@ -22,7 +21,6 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub get_task_documents_route: bool,
|
||||
pub composite_embedders: bool,
|
||||
pub chat_completions: bool,
|
||||
pub multimodal: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
@@ -116,6 +114,7 @@ pub enum ChatCompletionSource {
|
||||
OpenAi,
|
||||
AzureOpenAi,
|
||||
Mistral,
|
||||
Gemini,
|
||||
VLlm,
|
||||
}
|
||||
|
||||
@@ -135,6 +134,7 @@ impl ChatCompletionSource {
|
||||
AzureOpenAi if Self::old_openai_model(model) => System,
|
||||
AzureOpenAi => Developer,
|
||||
Mistral => System,
|
||||
Gemini => System,
|
||||
VLlm => System,
|
||||
}
|
||||
}
|
||||
@@ -154,6 +154,7 @@ impl ChatCompletionSource {
|
||||
match self {
|
||||
OpenAi => Some("https://api.openai.com/v1/"),
|
||||
Mistral => Some("https://api.mistral.ai/v1/"),
|
||||
Gemini => Some("https://generativelanguage.googleapis.com/v1beta/openai/"),
|
||||
AzureOpenAi | VLlm => None,
|
||||
}
|
||||
}
|
||||
@@ -162,31 +163,18 @@ impl ChatCompletionSource {
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ChatCompletionPrompts {
|
||||
#[serde(default)]
|
||||
pub system: String,
|
||||
#[serde(default)]
|
||||
pub search_description: String,
|
||||
#[serde(default)]
|
||||
pub search_q_param: String,
|
||||
#[serde(default = "default_search_filter_param")]
|
||||
pub search_filter_param: String,
|
||||
#[serde(default)]
|
||||
pub search_index_uid_param: String,
|
||||
}
|
||||
|
||||
/// This function is used for when the search_filter_param is
|
||||
/// not provided and this can happen when the database is in v1.15.
|
||||
fn default_search_filter_param() -> String {
|
||||
DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string()
|
||||
}
|
||||
|
||||
impl Default for ChatCompletionPrompts {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
system: DEFAULT_CHAT_SYSTEM_PROMPT.to_string(),
|
||||
search_description: DEFAULT_CHAT_SEARCH_DESCRIPTION_PROMPT.to_string(),
|
||||
search_q_param: DEFAULT_CHAT_SEARCH_Q_PARAM_PROMPT.to_string(),
|
||||
search_filter_param: DEFAULT_CHAT_SEARCH_FILTER_PARAM_PROMPT.to_string(),
|
||||
search_index_uid_param: DEFAULT_CHAT_SEARCH_INDEX_UID_PARAM_PROMPT.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user