mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-30 01:35:36 +00:00
Compare commits
60 Commits
document-j
...
new-search
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fce046d84d | ||
|
|
3fc507bb44 | ||
|
|
fdbcd033fb | ||
|
|
aaab49baca | ||
|
|
0d0d6e8099 | ||
|
|
c1e351c92b | ||
|
|
67cab4cc9d | ||
|
|
f30a37b0fe | ||
|
|
a78a9f80dd | ||
|
|
439fee5434 | ||
|
|
9e858590e0 | ||
|
|
29eebd5f93 | ||
|
|
07da6edbdf | ||
|
|
22b83042e6 | ||
|
|
52ab13906a | ||
|
|
29bec8efd4 | ||
|
|
6947a8990b | ||
|
|
fbb2bb0c73 | ||
|
|
15918f53a9 | ||
|
|
d7f5f3a0a3 | ||
|
|
1afbf35f27 | ||
|
|
d7675233d5 | ||
|
|
c63c1ac32b | ||
|
|
6171dcde0d | ||
|
|
04bc134324 | ||
|
|
8ff39d927d | ||
|
|
ffd461c800 | ||
|
|
9134d27980 | ||
|
|
f60242979f | ||
|
|
d347417cfd | ||
|
|
55d54afd69 | ||
|
|
dca7679c47 | ||
|
|
a34b692396 | ||
|
|
63829b62e9 | ||
|
|
44c8252ad5 | ||
|
|
19ae428890 | ||
|
|
7adcb657ae | ||
|
|
9624768976 | ||
|
|
5025acfd2a | ||
|
|
4bbfdccc3e | ||
|
|
a5b24b54b8 | ||
|
|
461e69c143 | ||
|
|
915aeafefe | ||
|
|
408529d8b2 | ||
|
|
1724ab6d94 | ||
|
|
49a500a342 | ||
|
|
f26eabcfa1 | ||
|
|
b468c090f3 | ||
|
|
c14114840e | ||
|
|
7933d1f9ea | ||
|
|
d5a5372aba | ||
|
|
0d5e176dc2 | ||
|
|
d6f36a773d | ||
|
|
a8d55562e9 | ||
|
|
40d649ec9e | ||
|
|
c272ac8204 | ||
|
|
e18c677f0e | ||
|
|
84a288da57 | ||
|
|
cbfc325b56 | ||
|
|
ea640b076e |
160
.github/workflows/publish-docker-images.yml
vendored
160
.github/workflows/publish-docker-images.yml
vendored
@@ -14,10 +14,105 @@ on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
docker:
|
||||
runs-on: docker
|
||||
build:
|
||||
runs-on: ${{ matrix.runner }}
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
platform: [amd64, arm64]
|
||||
edition: [community, enterprise]
|
||||
include:
|
||||
- platform: amd64
|
||||
runner: ubuntu-24.04
|
||||
- platform: arm64
|
||||
runner: ubuntu-24.04-arm
|
||||
- edition: community
|
||||
registry: getmeili/meilisearch
|
||||
feature-flag: ""
|
||||
- edition: enterprise
|
||||
registry: getmeili/meilisearch-enterprise
|
||||
feature-flag: "--features enterprise"
|
||||
|
||||
permissions: {}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
platform=linux/${{ matrix.platform }}
|
||||
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
with:
|
||||
platforms: linux/${{ matrix.platform }}
|
||||
install: true
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ matrix.registry }}
|
||||
# Prevent `latest` to be updated for each new tag pushed.
|
||||
# We need latest and `vX.Y` tags to only be pushed for the stable Meilisearch releases.
|
||||
flavor: latest=false
|
||||
tags: |
|
||||
type=ref,event=tag
|
||||
type=raw,value=nightly,enable=${{ github.event_name != 'push' }}
|
||||
type=semver,pattern=v{{major}}.{{minor}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
|
||||
type=semver,pattern=v{{major}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
|
||||
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
|
||||
|
||||
- name: Build and push by digest
|
||||
uses: docker/build-push-action@v6
|
||||
id: build-and-push
|
||||
with:
|
||||
platforms: linux/${{ matrix.platform }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
tags: ${{ matrix.registry }}
|
||||
outputs: type=image,push-by-digest=true,name-canonical=true,push=true
|
||||
build-args: |
|
||||
COMMIT_SHA=${{ github.sha }}
|
||||
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
|
||||
GIT_TAG=${{ github.ref_name }}
|
||||
EXTRA_ARGS=${{ matrix.feature-flag }}
|
||||
|
||||
- name: Export digest
|
||||
run: |
|
||||
mkdir -p ${{ runner.temp }}/digests
|
||||
digest="${{ steps.build-and-push.outputs.digest }}"
|
||||
touch "${{ runner.temp }}/digests/${digest#sha256:}"
|
||||
|
||||
- name: Upload digest
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: digests-${{ matrix.edition }}-${{ env.PLATFORM_PAIR }}
|
||||
path: ${{ runner.temp }}/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
merge:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
edition: [community, enterprise]
|
||||
include:
|
||||
- edition: community
|
||||
registry: getmeili/meilisearch
|
||||
- edition: enterprise
|
||||
registry: getmeili/meilisearch-enterprise
|
||||
needs:
|
||||
- build
|
||||
|
||||
permissions:
|
||||
id-token: write # This is needed to use Cosign in keyless mode
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
@@ -58,26 +153,30 @@ jobs:
|
||||
|
||||
echo "date=$commit_date" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Install cosign
|
||||
uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # tag=v3.10.0
|
||||
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: ${{ runner.temp }}/digests
|
||||
pattern: digests-${{ matrix.edition }}-*
|
||||
merge-multiple: true
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: getmeili/meilisearch
|
||||
images: ${{ matrix.registry }}
|
||||
# Prevent `latest` to be updated for each new tag pushed.
|
||||
# We need latest and `vX.Y` tags to only be pushed for the stable Meilisearch releases.
|
||||
flavor: latest=false
|
||||
@@ -88,26 +187,24 @@ jobs:
|
||||
type=semver,pattern=v{{major}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
|
||||
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
id: build-and-push
|
||||
with:
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
build-args: |
|
||||
COMMIT_SHA=${{ github.sha }}
|
||||
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
|
||||
GIT_TAG=${{ github.ref_name }}
|
||||
- name: Create manifest list and push
|
||||
working-directory: ${{ runner.temp }}/digests
|
||||
run: |
|
||||
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
|
||||
$(printf '${{ matrix.registry }}@sha256:%s ' *)
|
||||
|
||||
- name: Inspect image to fetch digest to sign
|
||||
run: |
|
||||
digest=$(docker buildx imagetools inspect --format='{{ json .Manifest }}' ${{ matrix.registry }}:${{ steps.meta.outputs.version }} | jq -r '.digest')
|
||||
echo "DIGEST=${digest}" >> $GITHUB_ENV
|
||||
|
||||
- name: Sign the images with GitHub OIDC Token
|
||||
env:
|
||||
DIGEST: ${{ steps.build-and-push.outputs.digest }}
|
||||
TAGS: ${{ steps.meta.outputs.tags }}
|
||||
run: |
|
||||
images=""
|
||||
for tag in ${TAGS}; do
|
||||
images+="${tag}@${DIGEST} "
|
||||
images+="${tag}@${{ env.DIGEST }} "
|
||||
done
|
||||
cosign sign --yes ${images}
|
||||
|
||||
@@ -121,22 +218,3 @@ jobs:
|
||||
repository: meilisearch/meilisearch-cloud
|
||||
event-type: cloud-docker-build
|
||||
client-payload: '{ "meilisearch_version": "${{ github.ref_name }}", "stable": "${{ steps.check-tag-format.outputs.stable }}" }'
|
||||
|
||||
# Send notification to Swarmia to notify of a deployment: https://app.swarmia.com
|
||||
# - name: 'Setup jq'
|
||||
# uses: dcarbone/install-jq-action
|
||||
# - name: Send deployment to Swarmia
|
||||
# if: github.event_name == 'push' && success()
|
||||
# run: |
|
||||
# JSON_STRING=$( jq --null-input --compact-output \
|
||||
# --arg version "${{ github.ref_name }}" \
|
||||
# --arg appName "meilisearch" \
|
||||
# --arg environment "production" \
|
||||
# --arg commitSha "${{ github.sha }}" \
|
||||
# --arg repositoryFullName "${{ github.repository }}" \
|
||||
# '{"version": $version, "appName": $appName, "environment": $environment, "commitSha": $commitSha, "repositoryFullName": $repositoryFullName}' )
|
||||
|
||||
# curl -H "Authorization: ${{ secrets.SWARMIA_DEPLOYMENTS_AUTHORIZATION }}" \
|
||||
# -H "Content-Type: application/json" \
|
||||
# -d "$JSON_STRING" \
|
||||
# https://hook.swarmia.com/deployments
|
||||
|
||||
176
.github/workflows/publish-release-assets.yml
vendored
176
.github/workflows/publish-release-assets.yml
vendored
@@ -32,157 +32,61 @@ jobs:
|
||||
if: github.event_name == 'release' && steps.check-tag-format.outputs.stable == 'true'
|
||||
run: bash .github/scripts/check-release.sh
|
||||
|
||||
publish-linux:
|
||||
name: Publish binary for Linux
|
||||
runs-on: ubuntu-latest
|
||||
needs: check-version
|
||||
container:
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/meilisearch
|
||||
asset_name: meilisearch-linux-amd64
|
||||
tag: ${{ github.ref }}
|
||||
|
||||
publish-macos-windows:
|
||||
name: Publish binary for ${{ matrix.os }}
|
||||
publish-binaries:
|
||||
name: Publish binary for ${{ matrix.release }} ${{ matrix.edition }} edition
|
||||
runs-on: ${{ matrix.os }}
|
||||
needs: check-version
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-14, windows-2022]
|
||||
edition: [community, enterprise]
|
||||
release:
|
||||
[macos-amd64, macos-aarch64, windows, linux-amd64, linux-aarch64]
|
||||
include:
|
||||
- os: macos-14
|
||||
artifact_name: meilisearch
|
||||
asset_name: meilisearch-macos-amd64
|
||||
- os: windows-2022
|
||||
artifact_name: meilisearch.exe
|
||||
asset_name: meilisearch-windows-amd64.exe
|
||||
- edition: "community"
|
||||
feature-flag: ""
|
||||
edition-suffix: ""
|
||||
- edition: "enterprise"
|
||||
feature-flag: "--features enterprise"
|
||||
edition-suffix: "enterprise-"
|
||||
- release: macos-amd64
|
||||
os: macos-15-intel
|
||||
binary_path: release/meilisearch
|
||||
asset_name: macos-amd64
|
||||
extra-args: ""
|
||||
- release: macos-aarch64
|
||||
os: macos-14
|
||||
binary_path: aarch64-apple-darwin/release/meilisearch
|
||||
asset_name: macos-apple-silicon
|
||||
extra-args: "--target aarch64-apple-darwin"
|
||||
- release: windows
|
||||
os: windows-2022
|
||||
binary_path: release/meilisearch.exe
|
||||
asset_name: windows-amd64.exe
|
||||
extra-args: ""
|
||||
- release: linux-amd64
|
||||
os: ubuntu-22.04
|
||||
binary_path: release/meilisearch
|
||||
asset_name: linux-amd64
|
||||
extra-args: "--target x86_64-unknown-linux-gnu"
|
||||
- release: linux-aarch64
|
||||
os: ubuntu-22.04-arm
|
||||
binary_path: aarch64-unknown-linux-gnu/release/meilisearch
|
||||
asset_name: linux-aarch64
|
||||
extra-args: "--target aarch64-unknown-linux-gnu"
|
||||
needs: check-version
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
run: cargo build --release --locked ${{ matrix.feature-flag }} ${{ matrix.extra-args }}
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/${{ matrix.artifact_name }}
|
||||
asset_name: ${{ matrix.asset_name }}
|
||||
tag: ${{ github.ref }}
|
||||
|
||||
publish-macos-apple-silicon:
|
||||
name: Publish binary for macOS silicon
|
||||
runs-on: macos-14
|
||||
needs: check-version
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- target: aarch64-apple-darwin
|
||||
asset_name: meilisearch-macos-apple-silicon
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
- name: Installing Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
- name: Cargo build
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --release --target ${{ matrix.target }}
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
asset_name: ${{ matrix.asset_name }}
|
||||
tag: ${{ github.ref }}
|
||||
|
||||
publish-aarch64:
|
||||
name: Publish binary for aarch64
|
||||
runs-on: ubuntu-latest
|
||||
needs: check-version
|
||||
env:
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
container:
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- target: aarch64-unknown-linux-gnu
|
||||
asset_name: meilisearch-linux-aarch64
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update -y && apt upgrade -y
|
||||
apt-get install -y curl build-essential gcc-aarch64-linux-gnu
|
||||
- name: Set up Docker for cross compilation
|
||||
run: |
|
||||
apt-get install -y curl apt-transport-https ca-certificates software-properties-common
|
||||
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
|
||||
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
|
||||
apt-get update -y && apt-get install -y docker-ce
|
||||
- name: Installing Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@1.89
|
||||
with:
|
||||
profile: minimal
|
||||
target: ${{ matrix.target }}
|
||||
- name: Configure target aarch64 GNU
|
||||
## Environment variable is not passed using env:
|
||||
## LD gold won't work with MUSL
|
||||
# env:
|
||||
# JEMALLOC_SYS_WITH_LG_PAGE: 16
|
||||
# RUSTFLAGS: '-Clink-arg=-fuse-ld=gold'
|
||||
run: |
|
||||
echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config
|
||||
echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config
|
||||
echo 'JEMALLOC_SYS_WITH_LG_PAGE=16' >> $GITHUB_ENV
|
||||
- name: Install a default toolchain that will be used to build cargo cross
|
||||
run: |
|
||||
rustup default stable
|
||||
- name: Cargo build
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
use-cross: true
|
||||
args: --release --target ${{ matrix.target }}
|
||||
env:
|
||||
CROSS_DOCKER_IN_DOCKER: true
|
||||
- name: List target output files
|
||||
run: ls -lR ./target
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
asset_name: ${{ matrix.asset_name }}
|
||||
file: target/${{ matrix.binary_path }}
|
||||
asset_name: meilisearch-${{ matrix.edition-suffix }}${{ matrix.asset_name }}
|
||||
tag: ${{ github.ref }}
|
||||
|
||||
publish-openapi-file:
|
||||
|
||||
24
.github/workflows/sdks-tests.yml
vendored
24
.github/workflows/sdks-tests.yml
vendored
@@ -68,7 +68,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -92,7 +92,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -122,7 +122,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -149,7 +149,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -184,7 +184,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -213,7 +213,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -238,7 +238,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -263,7 +263,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -284,7 +284,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -307,7 +307,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -338,7 +338,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
@@ -370,7 +370,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
image: getmeili/meilisearch-enterprise:${{ needs.define-docker-image.outputs.docker-image }}
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
|
||||
39
.github/workflows/test-suite.yml
vendored
39
.github/workflows/test-suite.yml
vendored
@@ -15,8 +15,12 @@ env:
|
||||
|
||||
jobs:
|
||||
test-linux:
|
||||
name: Tests on ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
name: Tests on Ubuntu
|
||||
runs-on: ${{ matrix.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
runner: [ubuntu-24.04, ubuntu-24.04-arm]
|
||||
features: ["", "--features enterprise"]
|
||||
container:
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
@@ -39,7 +43,7 @@ jobs:
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all
|
||||
args: --locked --release --all ${{ matrix.features }}
|
||||
|
||||
test-others:
|
||||
name: Tests on ${{ matrix.os }}
|
||||
@@ -48,6 +52,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-14, windows-2022]
|
||||
features: ["", "--features enterprise"]
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Cache dependencies
|
||||
@@ -62,7 +67,7 @@ jobs:
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all
|
||||
args: --locked --release --all ${{ matrix.features }}
|
||||
|
||||
test-all-features:
|
||||
name: Tests almost all features
|
||||
@@ -88,6 +93,9 @@ jobs:
|
||||
ollama-ubuntu:
|
||||
name: Test with Ollama
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
features: ["", "--features enterprise"]
|
||||
env:
|
||||
MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434"
|
||||
steps:
|
||||
@@ -115,7 +123,7 @@ jobs:
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all --features test-ollama ollama
|
||||
args: --locked --release --all --features test-ollama ollama ${{ matrix.features }}
|
||||
|
||||
test-disabled-tokenization:
|
||||
name: Test disabled tokenization
|
||||
@@ -143,28 +151,25 @@ jobs:
|
||||
# We run tests in debug also, to make sure that the debug_assertions are hit
|
||||
test-debug:
|
||||
name: Run tests in debug
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-22.04 to compile with glibc 2.35
|
||||
image: ubuntu:22.04
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
features: ["", "--features enterprise"]
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.8.0
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --all
|
||||
args: --locked --all ${{ matrix.features }}
|
||||
|
||||
clippy:
|
||||
name: Run Clippy
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
features: ["", "--features enterprise"]
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
@@ -177,7 +182,7 @@ jobs:
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: clippy
|
||||
args: --all-targets -- --deny warnings
|
||||
args: --all-targets ${{ matrix.features }} -- --deny warnings
|
||||
|
||||
fmt:
|
||||
name: Run Rustfmt
|
||||
|
||||
1071
Cargo.lock
generated
1071
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -50,3 +50,5 @@ opt-level = 3
|
||||
opt-level = 3
|
||||
[profile.dev.package.roaring]
|
||||
opt-level = 3
|
||||
[profile.dev.package.gemm-f16]
|
||||
opt-level = 3
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
[build.env]
|
||||
passthrough = [
|
||||
"RUST_BACKTRACE",
|
||||
"CARGO_TERM_COLOR",
|
||||
"RUSTFLAGS",
|
||||
"JEMALLOC_SYS_WITH_LG_PAGE"
|
||||
]
|
||||
@@ -8,16 +8,14 @@ WORKDIR /
|
||||
ARG COMMIT_SHA
|
||||
ARG COMMIT_DATE
|
||||
ARG GIT_TAG
|
||||
ARG EXTRA_ARGS
|
||||
ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_DESCRIBE=${GIT_TAG}
|
||||
ENV RUSTFLAGS="-C target-feature=-crt-static"
|
||||
|
||||
COPY . .
|
||||
RUN set -eux; \
|
||||
apkArch="$(apk --print-arch)"; \
|
||||
if [ "$apkArch" = "aarch64" ]; then \
|
||||
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
|
||||
fi && \
|
||||
cargo build --release -p meilisearch -p meilitool
|
||||
cargo build --release -p meilisearch -p meilitool ${EXTRA_ARGS}
|
||||
|
||||
# Run
|
||||
FROM alpine:3.22
|
||||
|
||||
@@ -11,27 +11,27 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.98"
|
||||
bumpalo = "3.18.1"
|
||||
csv = "1.3.1"
|
||||
memmap2 = "0.9.7"
|
||||
anyhow = "1.0.100"
|
||||
bumpalo = "3.19.0"
|
||||
csv = "1.4.0"
|
||||
memmap2 = "0.9.9"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tempfile = "3.20.0"
|
||||
mimalloc = { version = "0.1.48", default-features = false }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
tempfile = "3.23.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.6.0", features = ["html_reports"] }
|
||||
criterion = { version = "0.7.0", features = ["html_reports"] }
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
roaring = "0.10.12"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.98"
|
||||
bytes = "1.10.1"
|
||||
convert_case = "0.8.0"
|
||||
flate2 = "1.1.2"
|
||||
reqwest = { version = "0.12.20", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
anyhow = "1.0.100"
|
||||
bytes = "1.11.0"
|
||||
convert_case = "0.9.0"
|
||||
flate2 = "1.1.5"
|
||||
reqwest = { version = "0.12.24", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[features]
|
||||
default = ["milli/all-tokenizations"]
|
||||
|
||||
@@ -11,8 +11,8 @@ license.workspace = true
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
time = { version = "0.3.41", features = ["parsing"] }
|
||||
time = { version = "0.3.44", features = ["parsing"] }
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.98"
|
||||
anyhow = "1.0.100"
|
||||
vergen-git2 = "1.0.7"
|
||||
|
||||
@@ -11,24 +11,27 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.98"
|
||||
flate2 = "1.1.2"
|
||||
anyhow = "1.0.100"
|
||||
flate2 = "1.1.5"
|
||||
http = "1.3.1"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.21.3"
|
||||
regex = "1.11.1"
|
||||
regex = "1.12.2"
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
tar = "0.4.44"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tempfile = "3.23.0"
|
||||
thiserror = "2.0.17"
|
||||
time = { version = "0.3.44", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
|
||||
[features]
|
||||
enterprise = ["meilisearch-types/enterprise"]
|
||||
@@ -262,13 +262,13 @@ pub(crate) mod test {
|
||||
use big_s::S;
|
||||
use maplit::{btreemap, btreeset};
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
|
||||
use meilisearch_types::enterprise_edition::network::{Network, Remote};
|
||||
use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||
use meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::keys::{Action, Key};
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::{self, FilterableAttributesRule};
|
||||
use meilisearch_types::network::{Network, Remote};
|
||||
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
|
||||
use meilisearch_types::task_view::DetailsView;
|
||||
use meilisearch_types::tasks::{BatchStopReason, Details, Kind, Status};
|
||||
@@ -317,7 +317,6 @@ pub(crate) mod test {
|
||||
FilterableAttributesRule::Field(S("race")),
|
||||
FilterableAttributesRule::Field(S("age")),
|
||||
]),
|
||||
foreign_keys: Setting::NotSet,
|
||||
sortable_attributes: Setting::Set(btreeset! { S("age") }),
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
|
||||
@@ -349,7 +349,6 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
v5::settings::Setting::Reset => v6::Setting::Reset,
|
||||
v5::settings::Setting::NotSet => v6::Setting::NotSet,
|
||||
},
|
||||
foreign_keys: v6::Setting::NotSet,
|
||||
sortable_attributes: settings.sortable_attributes.into(),
|
||||
ranking_rules: {
|
||||
match settings.ranking_rules {
|
||||
|
||||
@@ -24,7 +24,7 @@ pub type Batch = meilisearch_types::batches::Batch;
|
||||
pub type Key = meilisearch_types::keys::Key;
|
||||
pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings;
|
||||
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
pub type Network = meilisearch_types::enterprise_edition::network::Network;
|
||||
pub type Network = meilisearch_types::network::Network;
|
||||
pub type Webhooks = meilisearch_types::webhooks::WebhooksDumpView;
|
||||
|
||||
// ===== Other types to clarify the code of the compat module
|
||||
|
||||
@@ -5,9 +5,9 @@ use std::path::PathBuf;
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::enterprise_edition::network::Network;
|
||||
use meilisearch_types::features::{ChatCompletionSettings, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::network::Network;
|
||||
use meilisearch_types::settings::{Checked, Settings};
|
||||
use meilisearch_types::webhooks::WebhooksDumpView;
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
@@ -11,7 +11,7 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
tempfile = "3.23.0"
|
||||
thiserror = "2.0.17"
|
||||
tracing = "0.1.41"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4"] }
|
||||
|
||||
@@ -16,7 +16,7 @@ license.workspace = true
|
||||
serde_json = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.6.0", features = ["html_reports"] }
|
||||
criterion = { version = "0.7.0", features = ["html_reports"] }
|
||||
|
||||
[[bench]]
|
||||
name = "benchmarks"
|
||||
|
||||
@@ -11,12 +11,12 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
arbitrary = { version = "1.4.1", features = ["derive"] }
|
||||
bumpalo = "3.18.1"
|
||||
clap = { version = "4.5.40", features = ["derive"] }
|
||||
arbitrary = { version = "1.4.2", features = ["derive"] }
|
||||
bumpalo = "3.19.0"
|
||||
clap = { version = "4.5.52", features = ["derive"] }
|
||||
either = "1.15.0"
|
||||
fastrand = "2.3.0"
|
||||
milli = { path = "../milli" }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tempfile = "3.20.0"
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
tempfile = "3.23.0"
|
||||
|
||||
@@ -11,33 +11,33 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.98"
|
||||
anyhow = "1.0.100"
|
||||
bincode = "1.3.3"
|
||||
byte-unit = "5.1.6"
|
||||
bytes = "1.10.1"
|
||||
bumpalo = "3.18.1"
|
||||
bytes = "1.11.0"
|
||||
bumpalo = "3.19.0"
|
||||
bumparaw-collections = "0.1.4"
|
||||
convert_case = "0.8.0"
|
||||
csv = "1.3.1"
|
||||
convert_case = "0.9.0"
|
||||
csv = "1.4.0"
|
||||
derive_builder = "0.20.2"
|
||||
dump = { path = "../dump" }
|
||||
enum-iterator = "2.1.0"
|
||||
enum-iterator = "2.3.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.1.2"
|
||||
indexmap = "2.9.0"
|
||||
flate2 = "1.1.5"
|
||||
indexmap = "2.12.0"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.7"
|
||||
memmap2 = "0.9.9"
|
||||
page_size = "0.6.0"
|
||||
rayon = "1.10.0"
|
||||
rayon = "1.11.0"
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
tar = "0.4.44"
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
tempfile = "3.23.0"
|
||||
thiserror = "2.0.17"
|
||||
time = { version = "0.3.44", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
@@ -45,11 +45,11 @@ time = { version = "0.3.41", features = [
|
||||
] }
|
||||
tracing = "0.1.41"
|
||||
ureq = "2.12.1"
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4"] }
|
||||
backoff = "0.4.0"
|
||||
reqwest = { version = "0.12.23", features = ["rustls-tls", "http2"], default-features = false }
|
||||
reqwest = { version = "0.12.24", features = ["rustls-tls", "http2"], default-features = false }
|
||||
rusty-s3 = "0.8.1"
|
||||
tokio = { version = "1.47.1", features = ["full"] }
|
||||
tokio = { version = "1.48.0", features = ["full"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use meilisearch_types::enterprise_edition::network::Network;
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls};
|
||||
use meilisearch_types::network::Network;
|
||||
|
||||
use crate::error::FeatureNotEnabledError;
|
||||
use crate::Result;
|
||||
@@ -171,19 +171,6 @@ impl RoFeatures {
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_foreign_keys_setting(&self, disabled_action: &'static str) -> Result<()> {
|
||||
if self.runtime.foreign_keys {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(FeatureNotEnabledError {
|
||||
disabled_action,
|
||||
feature: "foreign_keys",
|
||||
issue_link: "https://github.com/orgs/meilisearch/discussions/873",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FeatureData {
|
||||
|
||||
@@ -54,7 +54,6 @@ pub use features::RoFeatures;
|
||||
use flate2::bufread::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::enterprise_edition::network::Network;
|
||||
use meilisearch_types::features::{
|
||||
ChatCompletionSettings, InstanceTogglableFeatures, RuntimeTogglableFeatures,
|
||||
};
|
||||
@@ -67,6 +66,7 @@ use meilisearch_types::milli::vector::{
|
||||
Embedder, EmbedderOptions, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment,
|
||||
};
|
||||
use meilisearch_types::milli::{self, Index};
|
||||
use meilisearch_types::network::Network;
|
||||
use meilisearch_types::task_view::TaskView;
|
||||
use meilisearch_types::tasks::{KindWithContent, Task, TaskNetwork};
|
||||
use meilisearch_types::webhooks::{Webhook, WebhooksDumpView, WebhooksView};
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, foreign_keys: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, vector_store: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, foreign_keys: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, vector_store: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, vector_store: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, vector_store: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, foreign_keys: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, vector_store: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, foreign_keys: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, vector_store: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, vector_store: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), indexing_fragments: NotSet, search_fragments: NotSet, request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, chat: NotSet, vector_store: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
||||
@@ -15,7 +15,7 @@ license.workspace = true
|
||||
serde_json = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.6.0"
|
||||
criterion = "0.7.0"
|
||||
|
||||
[[bench]]
|
||||
name = "depth"
|
||||
|
||||
@@ -13,7 +13,7 @@ license.workspace = true
|
||||
[dependencies]
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = { version = "=1.39.0", features = ["json", "redactions"] }
|
||||
md5 = "0.7.0"
|
||||
md5 = "0.8.0"
|
||||
once_cell = "1.21"
|
||||
regex-lite = "0.1.6"
|
||||
uuid = { version = "1.17.0", features = ["v4"] }
|
||||
regex-lite = "0.1.8"
|
||||
uuid = { version = "1.18.1", features = ["v4"] }
|
||||
|
||||
@@ -12,15 +12,15 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
base64 = "0.22.1"
|
||||
enum-iterator = "2.1.0"
|
||||
enum-iterator = "2.3.0"
|
||||
hmac = "0.12.1"
|
||||
maplit = "1.0.2"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
rand = "0.8.5"
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
sha2 = "0.10.9"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
thiserror = "2.0.17"
|
||||
time = { version = "0.3.44", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4"] }
|
||||
|
||||
@@ -11,38 +11,38 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
actix-web = { version = "4.11.0", default-features = false }
|
||||
anyhow = "1.0.98"
|
||||
bumpalo = "3.18.1"
|
||||
actix-web = { version = "4.12.0", default-features = false }
|
||||
anyhow = "1.0.100"
|
||||
bumpalo = "3.19.0"
|
||||
bumparaw-collections = "0.1.4"
|
||||
byte-unit = { version = "5.1.6", features = ["serde"] }
|
||||
convert_case = "0.8.0"
|
||||
csv = "1.3.1"
|
||||
deserr = { version = "0.6.3", features = ["actix-web"] }
|
||||
convert_case = "0.9.0"
|
||||
csv = "1.4.0"
|
||||
deserr = { version = "0.6.4", features = ["actix-web"] }
|
||||
either = { version = "1.15.0", features = ["serde"] }
|
||||
enum-iterator = "2.1.0"
|
||||
enum-iterator = "2.3.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.1.2"
|
||||
flate2 = "1.1.5"
|
||||
fst = "0.4.7"
|
||||
memmap2 = "0.9.7"
|
||||
memmap2 = "0.9.9"
|
||||
milli = { path = "../milli" }
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
rustc-hash = "2.1.1"
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde-cs = "0.2.4"
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
tar = "0.4.44"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
tempfile = "3.23.0"
|
||||
thiserror = "2.0.17"
|
||||
time = { version = "0.3.44", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = "1.45"
|
||||
tokio = "1.48"
|
||||
utoipa = { version = "5.4.0", features = ["macros"] }
|
||||
uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
# fixed version due to format breakages in v1.40
|
||||
@@ -56,6 +56,9 @@ all-tokenizations = ["milli/all-tokenizations"]
|
||||
# chinese specialized tokenization
|
||||
chinese = ["milli/chinese"]
|
||||
chinese-pinyin = ["milli/chinese-pinyin"]
|
||||
|
||||
enterprise = ["milli/enterprise"]
|
||||
|
||||
# hebrew specialized tokenization
|
||||
hebrew = ["milli/hebrew"]
|
||||
# japanese specialized tokenization
|
||||
|
||||
16
crates/meilisearch-types/src/community_edition.rs
Normal file
16
crates/meilisearch-types/src/community_edition.rs
Normal file
@@ -0,0 +1,16 @@
|
||||
pub mod network {
|
||||
use milli::update::new::indexer::current_edition::sharding::Shards;
|
||||
|
||||
use crate::network::Network;
|
||||
|
||||
impl Network {
|
||||
pub fn shards(&self) -> Option<Shards> {
|
||||
None
|
||||
}
|
||||
|
||||
pub fn sharding(&self) -> bool {
|
||||
// always false in CE
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,21 +3,9 @@
|
||||
// Use of this source code is governed by the Business Source License 1.1,
|
||||
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use milli::update::new::indexer::enterprise_edition::sharding::Shards;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Network {
|
||||
#[serde(default, rename = "self")]
|
||||
pub local: Option<String>,
|
||||
#[serde(default)]
|
||||
pub remotes: BTreeMap<String, Remote>,
|
||||
#[serde(default)]
|
||||
pub sharding: bool,
|
||||
}
|
||||
use crate::network::Network;
|
||||
|
||||
impl Network {
|
||||
pub fn shards(&self) -> Option<Shards> {
|
||||
@@ -34,14 +22,8 @@ impl Network {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Remote {
|
||||
pub url: String,
|
||||
#[serde(default)]
|
||||
pub search_api_key: Option<String>,
|
||||
#[serde(default)]
|
||||
pub write_api_key: Option<String>,
|
||||
pub fn sharding(&self) -> bool {
|
||||
self.sharding
|
||||
}
|
||||
}
|
||||
|
||||
@@ -327,7 +327,6 @@ InvalidSettingsFacetSearch , InvalidRequest , BAD_REQU
|
||||
InvalidSettingsPrefixSearch , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsForeignKeys , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -434,6 +433,7 @@ InvalidChatCompletionSearchQueryParamPrompt , InvalidRequest , BAD_REQU
|
||||
InvalidChatCompletionSearchFilterParamPrompt , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidChatCompletionSearchIndexUidParamPrompt , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidChatCompletionPreQueryPrompt , InvalidRequest , BAD_REQUEST ;
|
||||
RequiresEnterpriseEdition , InvalidRequest , UNAVAILABLE_FOR_LEGAL_REASONS ;
|
||||
// Webhooks
|
||||
InvalidWebhooks , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidWebhookUrl , InvalidRequest , BAD_REQUEST ;
|
||||
|
||||
@@ -22,7 +22,6 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub chat_completions: bool,
|
||||
pub multimodal: bool,
|
||||
pub vector_store_setting: bool,
|
||||
pub foreign_keys: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
|
||||
@@ -2,10 +2,17 @@
|
||||
|
||||
pub mod batch_view;
|
||||
pub mod batches;
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
pub mod community_edition;
|
||||
pub mod compression;
|
||||
pub mod deserr;
|
||||
pub mod document_formats;
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub mod enterprise_edition;
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
pub use community_edition as current_edition;
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub use enterprise_edition as current_edition;
|
||||
pub mod error;
|
||||
pub mod facet_values_sort;
|
||||
pub mod features;
|
||||
@@ -13,6 +20,7 @@ pub mod index_uid;
|
||||
pub mod index_uid_pattern;
|
||||
pub mod keys;
|
||||
pub mod locales;
|
||||
pub mod network;
|
||||
pub mod settings;
|
||||
pub mod star_or;
|
||||
pub mod task_view;
|
||||
|
||||
23
crates/meilisearch-types/src/network.rs
Normal file
23
crates/meilisearch-types/src/network.rs
Normal file
@@ -0,0 +1,23 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Network {
|
||||
#[serde(default, rename = "self")]
|
||||
pub local: Option<String>,
|
||||
#[serde(default)]
|
||||
pub remotes: BTreeMap<String, Remote>,
|
||||
#[serde(default)]
|
||||
pub sharding: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Remote {
|
||||
pub url: String,
|
||||
#[serde(default)]
|
||||
pub search_api_key: Option<String>,
|
||||
#[serde(default)]
|
||||
pub write_api_key: Option<String>,
|
||||
}
|
||||
@@ -15,10 +15,7 @@ pub use milli::update::ChatSettings;
|
||||
use milli::update::Setting;
|
||||
use milli::vector::db::IndexEmbeddingConfig;
|
||||
use milli::vector::VectorStoreBackend;
|
||||
use milli::{
|
||||
Criterion, CriterionError, FilterableAttributesRule, ForeignKey, Index,
|
||||
DEFAULT_VALUES_PER_FACET,
|
||||
};
|
||||
use milli::{Criterion, CriterionError, FilterableAttributesRule, Index, DEFAULT_VALUES_PER_FACET};
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use utoipa::ToSchema;
|
||||
|
||||
@@ -224,12 +221,6 @@ pub struct Settings<T> {
|
||||
#[schema(value_type = Option<Vec<String>>, example = json!(["release_date"]))]
|
||||
pub sortable_attributes: Setting<BTreeSet<String>>,
|
||||
|
||||
/// Foreign keys to use for cross-index filtering search.
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsForeignKeys>)]
|
||||
#[schema(value_type = Option<Vec<ForeignKey>>, example = json!([{"foreignIndexUid": "products", "fieldName": "productId"}]))]
|
||||
pub foreign_keys: Setting<Vec<ForeignKey>>,
|
||||
|
||||
/// List of ranking rules sorted by order of importance. The order is customizable.
|
||||
/// [A list of ordered built-in ranking rules](https://www.meilisearch.com/docs/learn/relevancy/relevancy).
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
@@ -385,7 +376,6 @@ impl Settings<Checked> {
|
||||
displayed_attributes: Setting::Reset.into(),
|
||||
searchable_attributes: Setting::Reset.into(),
|
||||
filterable_attributes: Setting::Reset,
|
||||
foreign_keys: Setting::Reset,
|
||||
sortable_attributes: Setting::Reset,
|
||||
ranking_rules: Setting::Reset,
|
||||
stop_words: Setting::Reset,
|
||||
@@ -414,7 +404,6 @@ impl Settings<Checked> {
|
||||
displayed_attributes,
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
foreign_keys,
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
@@ -442,7 +431,6 @@ impl Settings<Checked> {
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
foreign_keys,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
non_separator_tokens,
|
||||
@@ -494,7 +482,6 @@ impl Settings<Unchecked> {
|
||||
displayed_attributes: displayed_attributes.into(),
|
||||
searchable_attributes: searchable_attributes.into(),
|
||||
filterable_attributes: self.filterable_attributes,
|
||||
foreign_keys: self.foreign_keys,
|
||||
sortable_attributes: self.sortable_attributes,
|
||||
ranking_rules: self.ranking_rules,
|
||||
stop_words: self.stop_words,
|
||||
@@ -556,7 +543,6 @@ impl Settings<Unchecked> {
|
||||
.sortable_attributes
|
||||
.clone()
|
||||
.or(self.sortable_attributes.clone()),
|
||||
foreign_keys: other.foreign_keys.clone().or(self.foreign_keys.clone()),
|
||||
ranking_rules: other.ranking_rules.clone().or(self.ranking_rules.clone()),
|
||||
stop_words: other.stop_words.clone().or(self.stop_words.clone()),
|
||||
non_separator_tokens: other
|
||||
@@ -618,7 +604,6 @@ pub fn apply_settings_to_builder(
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
foreign_keys,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
non_separator_tokens,
|
||||
@@ -666,12 +651,6 @@ pub fn apply_settings_to_builder(
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match foreign_keys {
|
||||
Setting::Set(ref keys) => builder.set_foreign_keys(keys.clone().into_iter().collect()),
|
||||
Setting::Reset => builder.reset_foreign_keys(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match ranking_rules {
|
||||
Setting::Set(ref criteria) => {
|
||||
builder.set_criteria(criteria.iter().map(|c| c.clone().into()).collect())
|
||||
@@ -889,8 +868,6 @@ pub fn settings(
|
||||
|
||||
let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect();
|
||||
|
||||
let foreign_keys = index.foreign_keys(rtxn)?.into_iter().collect();
|
||||
|
||||
let criteria = index.criteria(rtxn)?;
|
||||
|
||||
let stop_words = index
|
||||
@@ -988,7 +965,6 @@ pub fn settings(
|
||||
.into(),
|
||||
filterable_attributes: Setting::Set(filterable_attributes),
|
||||
sortable_attributes: Setting::Set(sortable_attributes),
|
||||
foreign_keys: Setting::Set(foreign_keys),
|
||||
ranking_rules: Setting::Set(criteria.iter().map(|c| c.clone().into()).collect()),
|
||||
stop_words: Setting::Set(stop_words),
|
||||
non_separator_tokens: Setting::Set(non_separator_tokens),
|
||||
@@ -1231,7 +1207,6 @@ pub(crate) mod test {
|
||||
searchable_attributes: Setting::Set(vec![String::from("hello")]).into(),
|
||||
filterable_attributes: Setting::NotSet,
|
||||
sortable_attributes: Setting::NotSet,
|
||||
foreign_keys: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
@@ -1265,7 +1240,6 @@ pub(crate) mod test {
|
||||
.into(),
|
||||
filterable_attributes: Setting::NotSet,
|
||||
sortable_attributes: Setting::NotSet,
|
||||
foreign_keys: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
|
||||
@@ -14,91 +14,91 @@ default-run = "meilisearch"
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.7.1"
|
||||
actix-http = { version = "3.11.0", default-features = false, features = [
|
||||
actix-http = { version = "3.11.2", default-features = false, features = [
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"rustls-0_23",
|
||||
] }
|
||||
actix-utils = "3.0.1"
|
||||
actix-web = { version = "4.11.0", default-features = false, features = [
|
||||
actix-web = { version = "4.12.0", default-features = false, features = [
|
||||
"macros",
|
||||
"compress-brotli",
|
||||
"compress-gzip",
|
||||
"cookies",
|
||||
"rustls-0_23",
|
||||
] }
|
||||
anyhow = { version = "1.0.98", features = ["backtrace"] }
|
||||
bstr = "1.12.0"
|
||||
anyhow = { version = "1.0.100", features = ["backtrace"] }
|
||||
bstr = "1.12.1"
|
||||
byte-unit = { version = "5.1.6", features = ["serde"] }
|
||||
bytes = "1.10.1"
|
||||
bumpalo = "3.18.1"
|
||||
clap = { version = "4.5.40", features = ["derive", "env"] }
|
||||
bytes = "1.11.0"
|
||||
bumpalo = "3.19.0"
|
||||
clap = { version = "4.5.52", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.15"
|
||||
deserr = { version = "0.6.3", features = ["actix-web"] }
|
||||
deserr = { version = "0.6.4", features = ["actix-web"] }
|
||||
dump = { path = "../dump" }
|
||||
either = "1.15.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.1.2"
|
||||
flate2 = "1.1.5"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3.31"
|
||||
futures-util = "0.3.31"
|
||||
index-scheduler = { path = "../index-scheduler" }
|
||||
indexmap = { version = "2.9.0", features = ["serde"] }
|
||||
is-terminal = "0.4.16"
|
||||
indexmap = { version = "2.12.0", features = ["serde"] }
|
||||
is-terminal = "0.4.17"
|
||||
itertools = "0.14.0"
|
||||
jsonwebtoken = "9.3.1"
|
||||
lazy_static = "1.5.0"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.7"
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
memmap2 = "0.9.9"
|
||||
mimalloc = { version = "0.1.48", default-features = false }
|
||||
mime = "0.3.17"
|
||||
num_cpus = "1.17.0"
|
||||
obkv = "0.3.0"
|
||||
once_cell = "1.21.3"
|
||||
ordered-float = "5.0.0"
|
||||
parking_lot = "0.12.4"
|
||||
ordered-float = "5.1.0"
|
||||
parking_lot = "0.12.5"
|
||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.16"
|
||||
platform-dirs = "0.3.0"
|
||||
prometheus = { version = "0.14.0", features = ["process"] }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.10.0"
|
||||
regex = "1.11.1"
|
||||
reqwest = { version = "0.12.20", features = [
|
||||
rayon = "1.11.0"
|
||||
regex = "1.12.2"
|
||||
reqwest = { version = "0.12.24", features = [
|
||||
"rustls-tls",
|
||||
"json",
|
||||
], default-features = false }
|
||||
rustls = { version = "0.23.28", features = ["ring"], default-features = false }
|
||||
rustls-pki-types = { version = "1.12.0", features = ["alloc"] }
|
||||
rustls = { version = "0.23.35", features = ["ring"], default-features = false }
|
||||
rustls-pki-types = { version = "1.13.0", features = ["alloc"] }
|
||||
rustls-pemfile = "2.2.0"
|
||||
segment = { version = "0.2.6" }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
sha2 = "0.10.9"
|
||||
siphasher = "1.0.1"
|
||||
slice-group-by = "0.3.1"
|
||||
static-files = { version = "0.2.5", optional = true }
|
||||
sysinfo = "0.35.2"
|
||||
static-files = { version = "0.3.1", optional = true }
|
||||
sysinfo = "0.37.2"
|
||||
tar = "0.4.44"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
tempfile = "3.23.0"
|
||||
thiserror = "2.0.17"
|
||||
time = { version = "0.3.44", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
tokio = { version = "1.45.1", features = ["full"] }
|
||||
toml = "0.8.23"
|
||||
uuid = { version = "1.18.0", features = ["serde", "v4", "v7"] }
|
||||
tokio = { version = "1.48.0", features = ["full"] }
|
||||
toml = "0.9.8"
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4", "v7"] }
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.4.1"
|
||||
url = { version = "2.5.4", features = ["serde"] }
|
||||
url = { version = "2.5.7", features = ["serde"] }
|
||||
tracing = "0.1.41"
|
||||
tracing-subscriber = { version = "0.3.20", features = ["json"] }
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
tracing-actix-web = "0.7.18"
|
||||
tracing-actix-web = "0.7.19"
|
||||
build-info = { version = "1.7.0", path = "../build-info" }
|
||||
roaring = "0.10.12"
|
||||
mopa-maintained = "0.2.3"
|
||||
@@ -114,35 +114,35 @@ utoipa = { version = "5.4.0", features = [
|
||||
utoipa-scalar = { version = "0.3.0", optional = true, features = ["actix-web"] }
|
||||
async-openai = { git = "https://github.com/meilisearch/async-openai", branch = "better-error-handling" }
|
||||
secrecy = "0.10.3"
|
||||
actix-web-lab = { version = "0.24.1", default-features = false }
|
||||
actix-web-lab = { version = "0.24.3", default-features = false }
|
||||
urlencoding = "2.1.3"
|
||||
backoff = { version = "0.4.0", features = ["tokio"] }
|
||||
|
||||
humantime = { version = "2.3.0", default-features = false }
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.10.0"
|
||||
brotli = "8.0.1"
|
||||
actix-rt = "2.11.0"
|
||||
brotli = "8.0.2"
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = { version = "=1.39.0", features = ["redactions"] }
|
||||
manifest-dir-macros = "0.1.18"
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
temp-env = "0.3.6"
|
||||
wiremock = "0.6.3"
|
||||
wiremock = "0.6.5"
|
||||
yaup = "0.3.1"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = { version = "1.0.98", optional = true }
|
||||
cargo_toml = { version = "0.22.1", optional = true }
|
||||
anyhow = { version = "1.0.100", optional = true }
|
||||
cargo_toml = { version = "0.22.3", optional = true }
|
||||
hex = { version = "0.4.3", optional = true }
|
||||
reqwest = { version = "0.12.20", features = [
|
||||
reqwest = { version = "0.12.24", features = [
|
||||
"blocking",
|
||||
"rustls-tls",
|
||||
], default-features = false, optional = true }
|
||||
sha-1 = { version = "0.10.1", optional = true }
|
||||
static-files = { version = "0.2.5", optional = true }
|
||||
tempfile = { version = "3.20.0", optional = true }
|
||||
zip = { version = "4.1.0", optional = true }
|
||||
static-files = { version = "0.3.1", optional = true }
|
||||
tempfile = { version = "3.23.0", optional = true }
|
||||
zip = { version = "6.0.0", optional = true }
|
||||
|
||||
[features]
|
||||
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
@@ -160,6 +160,7 @@ mini-dashboard = [
|
||||
]
|
||||
chinese = ["meilisearch-types/chinese"]
|
||||
chinese-pinyin = ["meilisearch-types/chinese-pinyin"]
|
||||
enterprise = ["meilisearch-types/enterprise"]
|
||||
hebrew = ["meilisearch-types/hebrew"]
|
||||
japanese = ["meilisearch-types/japanese"]
|
||||
korean = ["meilisearch-types/korean"]
|
||||
|
||||
@@ -208,7 +208,6 @@ struct Infos {
|
||||
experimental_no_edition_2024_for_prefix_post_processing: bool,
|
||||
experimental_no_edition_2024_for_facet_post_processing: bool,
|
||||
experimental_vector_store_setting: bool,
|
||||
experimental_foreign_keys: bool,
|
||||
experimental_personalization: bool,
|
||||
gpu_enabled: bool,
|
||||
db_path: bool,
|
||||
@@ -318,7 +317,6 @@ impl Infos {
|
||||
chat_completions,
|
||||
multimodal,
|
||||
vector_store_setting,
|
||||
foreign_keys,
|
||||
} = features;
|
||||
|
||||
// We're going to override every sensible information.
|
||||
@@ -345,7 +343,6 @@ impl Infos {
|
||||
experimental_no_snapshot_compaction,
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_vector_store_setting: vector_store_setting,
|
||||
experimental_foreign_keys: foreign_keys,
|
||||
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
|
||||
db_path: db_path != PathBuf::from("./data.ms"),
|
||||
import_dump: import_dump.is_some(),
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
use lazy_static::lazy_static;
|
||||
use prometheus::{
|
||||
opts, register_gauge, register_histogram_vec, register_int_counter_vec, register_int_gauge,
|
||||
register_int_gauge_vec, Gauge, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec,
|
||||
opts, register_gauge, register_gauge_vec, register_histogram_vec, register_int_counter_vec,
|
||||
register_int_gauge, register_int_gauge_vec, Gauge, GaugeVec, HistogramVec, IntCounterVec,
|
||||
IntGauge, IntGaugeVec,
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
@@ -73,6 +74,20 @@ lazy_static! {
|
||||
&["kind", "value"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_BATCH_RUNNING_PROGRESS_TRACE: GaugeVec = register_gauge_vec!(
|
||||
opts!("meilisearch_batch_running_progress_trace", "The currently running progress trace"),
|
||||
&["batch_uid", "step_name"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_LAST_FINISHED_BATCHES_PROGRESS_TRACE_MS: IntGaugeVec =
|
||||
register_int_gauge_vec!(
|
||||
opts!(
|
||||
"meilisearch_last_finished_batches_progress_trace_ms",
|
||||
"The last few batches progress trace in milliseconds"
|
||||
),
|
||||
&["batch_uid", "step_name"]
|
||||
)
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_LAST_UPDATE: IntGauge =
|
||||
register_int_gauge!(opts!("meilisearch_last_update", "Meilisearch Last Update"))
|
||||
.expect("Can't create a metric");
|
||||
|
||||
@@ -56,7 +56,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
chat_completions: Some(false),
|
||||
multimodal: Some(false),
|
||||
vector_store_setting: Some(false),
|
||||
foreign_keys: Some(false),
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
@@ -107,8 +106,6 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub multimodal: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub vector_store_setting: Option<bool>,
|
||||
#[deserr(default)]
|
||||
pub foreign_keys: Option<bool>,
|
||||
}
|
||||
|
||||
impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
|
||||
@@ -124,7 +121,6 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
|
||||
chat_completions,
|
||||
multimodal,
|
||||
vector_store_setting,
|
||||
foreign_keys,
|
||||
} = value;
|
||||
|
||||
Self {
|
||||
@@ -138,7 +134,6 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
|
||||
chat_completions: Some(chat_completions),
|
||||
multimodal: Some(multimodal),
|
||||
vector_store_setting: Some(vector_store_setting),
|
||||
foreign_keys: Some(foreign_keys),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -155,7 +150,6 @@ pub struct PatchExperimentalFeatureAnalytics {
|
||||
chat_completions: bool,
|
||||
multimodal: bool,
|
||||
vector_store_setting: bool,
|
||||
foreign_keys: bool,
|
||||
}
|
||||
|
||||
impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
@@ -175,7 +169,6 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
chat_completions: new.chat_completions,
|
||||
multimodal: new.multimodal,
|
||||
vector_store_setting: new.vector_store_setting,
|
||||
foreign_keys: new.foreign_keys,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -204,7 +197,6 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
|
||||
chat_completions: Some(false),
|
||||
multimodal: Some(false),
|
||||
vector_store_setting: Some(false),
|
||||
foreign_keys: Some(false),
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
{
|
||||
@@ -252,7 +244,6 @@ async fn patch_features(
|
||||
.0
|
||||
.vector_store_setting
|
||||
.unwrap_or(old_features.vector_store_setting),
|
||||
foreign_keys: new_features.0.foreign_keys.unwrap_or(old_features.foreign_keys),
|
||||
};
|
||||
|
||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||
@@ -269,7 +260,6 @@ async fn patch_features(
|
||||
chat_completions,
|
||||
multimodal,
|
||||
vector_store_setting,
|
||||
foreign_keys,
|
||||
} = new_features;
|
||||
|
||||
analytics.publish(
|
||||
@@ -284,7 +274,6 @@ async fn patch_features(
|
||||
chat_completions,
|
||||
multimodal,
|
||||
vector_store_setting,
|
||||
foreign_keys,
|
||||
},
|
||||
&req,
|
||||
);
|
||||
|
||||
39
crates/meilisearch/src/routes/indexes/community_edition.rs
Normal file
39
crates/meilisearch/src/routes/indexes/community_edition.rs
Normal file
@@ -0,0 +1,39 @@
|
||||
pub mod proxy {
|
||||
|
||||
use std::fs::File;
|
||||
|
||||
use actix_web::HttpRequest;
|
||||
use index_scheduler::IndexScheduler;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
|
||||
pub enum Body<T: serde::Serialize> {
|
||||
NdJsonPayload,
|
||||
Inline(T),
|
||||
None,
|
||||
}
|
||||
|
||||
impl Body<()> {
|
||||
pub fn with_ndjson_payload(_file: File) -> Self {
|
||||
Self::NdJsonPayload
|
||||
}
|
||||
|
||||
pub fn none() -> Self {
|
||||
Self::None
|
||||
}
|
||||
}
|
||||
|
||||
pub const PROXY_ORIGIN_REMOTE_HEADER: &str = "Meili-Proxy-Origin-Remote";
|
||||
pub const PROXY_ORIGIN_TASK_UID_HEADER: &str = "Meili-Proxy-Origin-TaskUid";
|
||||
|
||||
pub async fn proxy<T: serde::Serialize>(
|
||||
_index_scheduler: &IndexScheduler,
|
||||
_index_uid: &str,
|
||||
_req: &HttpRequest,
|
||||
_network: meilisearch_types::network::Network,
|
||||
_body: Body<T>,
|
||||
_task: &meilisearch_types::tasks::Task,
|
||||
) -> Result<(), MeilisearchHttpError> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -45,7 +45,7 @@ use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::payload::Payload;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::indexes::enterprise_edition::proxy::{proxy, Body};
|
||||
use crate::routes::indexes::current_edition::proxy::{proxy, Body};
|
||||
use crate::routes::indexes::search::fix_sort_query_parameters;
|
||||
use crate::routes::{
|
||||
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
|
||||
@@ -367,7 +367,7 @@ pub async fn delete_document(
|
||||
.await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
if network.sharding() && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::none(), &task).await?;
|
||||
}
|
||||
|
||||
@@ -1098,7 +1098,7 @@ async fn document_addition(
|
||||
}
|
||||
};
|
||||
|
||||
if network.sharding {
|
||||
if network.sharding() {
|
||||
if let Some(file) = file {
|
||||
proxy(
|
||||
&index_scheduler,
|
||||
@@ -1222,7 +1222,7 @@ pub async fn delete_documents_batch(
|
||||
.await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
if network.sharding() && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(body), &task).await?;
|
||||
}
|
||||
|
||||
@@ -1320,7 +1320,7 @@ pub async fn delete_documents_by_filter(
|
||||
.await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
if network.sharding() && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(filter), &task).await?;
|
||||
}
|
||||
|
||||
@@ -1475,7 +1475,7 @@ pub async fn edit_documents_by_function(
|
||||
.await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
if network.sharding() && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(body), &task).await?;
|
||||
}
|
||||
|
||||
@@ -1549,7 +1549,7 @@ pub async fn clear_all_documents(
|
||||
.await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
if network.sharding() && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::none(), &task).await?;
|
||||
}
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ pub async fn proxy<T: serde::Serialize>(
|
||||
index_scheduler: &IndexScheduler,
|
||||
index_uid: &str,
|
||||
req: &HttpRequest,
|
||||
network: meilisearch_types::enterprise_edition::network::Network,
|
||||
network: meilisearch_types::network::Network,
|
||||
body: Body<T>,
|
||||
task: &meilisearch_types::tasks::Task,
|
||||
) -> Result<(), MeilisearchHttpError> {
|
||||
|
||||
@@ -30,7 +30,16 @@ use crate::Opt;
|
||||
|
||||
pub mod compact;
|
||||
pub mod documents;
|
||||
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
mod community_edition;
|
||||
#[cfg(feature = "enterprise")]
|
||||
mod enterprise_edition;
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
use community_edition as current_edition;
|
||||
#[cfg(feature = "enterprise")]
|
||||
use enterprise_edition as current_edition;
|
||||
|
||||
pub mod facet_search;
|
||||
pub mod search;
|
||||
mod search_analytics;
|
||||
@@ -41,7 +50,7 @@ mod settings_analytics;
|
||||
pub mod similar;
|
||||
mod similar_analytics;
|
||||
|
||||
pub use enterprise_edition::proxy::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
|
||||
pub use current_edition::proxy::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
|
||||
|
||||
#[derive(OpenApi)]
|
||||
#[openapi(
|
||||
|
||||
@@ -531,17 +531,6 @@ make_setting_routes!(
|
||||
camelcase_attr: "vectorStore",
|
||||
analytics: VectorStoreAnalytics
|
||||
},
|
||||
{
|
||||
route: "/foreign-keys",
|
||||
update_verb: put,
|
||||
value_type: Vec<meilisearch_types::milli::ForeignKey>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsForeignKeys,
|
||||
>,
|
||||
attr: foreign_keys,
|
||||
camelcase_attr: "foreignKeys",
|
||||
analytics: ForeignKeysAnalytics
|
||||
},
|
||||
);
|
||||
|
||||
#[utoipa::path(
|
||||
@@ -606,7 +595,6 @@ pub async fn update_all(
|
||||
filterable_attributes: FilterableAttributesAnalytics::new(
|
||||
new_settings.filterable_attributes.as_ref().set(),
|
||||
),
|
||||
foreign_keys: ForeignKeysAnalytics::new(new_settings.foreign_keys.as_ref().set()),
|
||||
distinct_attribute: DistinctAttributeAnalytics::new(
|
||||
new_settings.distinct_attribute.as_ref().set(),
|
||||
),
|
||||
@@ -700,10 +688,6 @@ pub async fn get_all(
|
||||
new_settings.vector_store = Setting::NotSet;
|
||||
}
|
||||
|
||||
if features.check_foreign_keys_setting("showing index `foreignKeys` settings").is_err() {
|
||||
new_settings.foreign_keys = Setting::NotSet;
|
||||
}
|
||||
|
||||
debug!(returns = ?new_settings, "Get all settings");
|
||||
Ok(HttpResponse::Ok().json(new_settings))
|
||||
}
|
||||
@@ -809,9 +793,5 @@ fn validate_settings(
|
||||
features.check_vector_store_setting("setting `vectorStore` in the index settings")?;
|
||||
}
|
||||
|
||||
if let Setting::Set(_) = &settings.foreign_keys {
|
||||
features.check_foreign_keys_setting("setting `foreignKeys` in the index settings")?;
|
||||
}
|
||||
|
||||
Ok(settings.validate()?)
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||
use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView};
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::vector::VectorStoreBackend;
|
||||
use meilisearch_types::milli::{FilterableAttributesRule, ForeignKey};
|
||||
use meilisearch_types::milli::FilterableAttributesRule;
|
||||
use meilisearch_types::settings::{
|
||||
ChatSettings, FacetingSettings, PaginationSettings, PrefixSearchSettings,
|
||||
ProximityPrecisionView, RankingRuleView, SettingEmbeddingSettings, TypoSettings,
|
||||
@@ -25,7 +25,6 @@ pub struct SettingsAnalytics {
|
||||
pub displayed_attributes: DisplayedAttributesAnalytics,
|
||||
pub sortable_attributes: SortableAttributesAnalytics,
|
||||
pub filterable_attributes: FilterableAttributesAnalytics,
|
||||
pub foreign_keys: ForeignKeysAnalytics,
|
||||
pub distinct_attribute: DistinctAttributeAnalytics,
|
||||
pub proximity_precision: ProximityPrecisionAnalytics,
|
||||
pub typo_tolerance: TypoToleranceAnalytics,
|
||||
@@ -99,10 +98,6 @@ impl Aggregate for SettingsAnalytics {
|
||||
.has_patterns
|
||||
.or(self.filterable_attributes.has_patterns),
|
||||
},
|
||||
foreign_keys: ForeignKeysAnalytics {
|
||||
set: new.foreign_keys.set | self.foreign_keys.set,
|
||||
total: new.foreign_keys.total.or(self.foreign_keys.total),
|
||||
},
|
||||
distinct_attribute: DistinctAttributeAnalytics {
|
||||
set: self.distinct_attribute.set | new.distinct_attribute.set,
|
||||
},
|
||||
@@ -367,22 +362,6 @@ impl FilterableAttributesAnalytics {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Default)]
|
||||
pub struct ForeignKeysAnalytics {
|
||||
pub set: bool,
|
||||
pub total: Option<usize>,
|
||||
}
|
||||
|
||||
impl ForeignKeysAnalytics {
|
||||
pub fn new(settings: Option<&Vec<ForeignKey>>) -> Self {
|
||||
Self { set: settings.is_some(), total: settings.as_ref().map(|s| s.len()) }
|
||||
}
|
||||
|
||||
pub fn into_settings(self) -> SettingsAnalytics {
|
||||
SettingsAnalytics { foreign_keys: self, ..Default::default() }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Default)]
|
||||
pub struct DistinctAttributeAnalytics {
|
||||
pub set: bool,
|
||||
|
||||
@@ -4,6 +4,7 @@ use index_scheduler::{IndexScheduler, Query};
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::milli::progress::ProgressStepView;
|
||||
use meilisearch_types::tasks::Status;
|
||||
use prometheus::{Encoder, TextEncoder};
|
||||
use time::OffsetDateTime;
|
||||
@@ -38,6 +39,12 @@ pub fn configure(config: &mut web::ServiceConfig) {
|
||||
# HELP meilisearch_db_size_bytes Meilisearch DB Size In Bytes
|
||||
# TYPE meilisearch_db_size_bytes gauge
|
||||
meilisearch_db_size_bytes 1130496
|
||||
# HELP meilisearch_batch_running_progress_trace The currently running progress trace
|
||||
# TYPE meilisearch_batch_running_progress_trace gauge
|
||||
meilisearch_batch_running_progress_trace{batch_uid="0",step_name="document"} 0.710618582519409
|
||||
meilisearch_batch_running_progress_trace{batch_uid="0",step_name="extracting word proximity"} 0.2222222222222222
|
||||
meilisearch_batch_running_progress_trace{batch_uid="0",step_name="indexing"} 0.6666666666666666
|
||||
meilisearch_batch_running_progress_trace{batch_uid="0",step_name="processing tasks"} 0
|
||||
# HELP meilisearch_http_requests_total Meilisearch HTTP requests total
|
||||
# TYPE meilisearch_http_requests_total counter
|
||||
meilisearch_http_requests_total{method="GET",path="/metrics",status="400"} 1
|
||||
@@ -61,6 +68,13 @@ meilisearch_http_response_time_seconds_bucket{method="GET",path="/metrics",le="1
|
||||
meilisearch_http_response_time_seconds_bucket{method="GET",path="/metrics",le="+Inf"} 0
|
||||
meilisearch_http_response_time_seconds_sum{method="GET",path="/metrics"} 0
|
||||
meilisearch_http_response_time_seconds_count{method="GET",path="/metrics"} 0
|
||||
# HELP meilisearch_last_finished_batches_progress_trace_ms The last few batches progress trace in milliseconds
|
||||
# TYPE meilisearch_last_finished_batches_progress_trace_ms gauge
|
||||
meilisearch_last_finished_batches_progress_trace_ms{batch_uid="0",step_name="processing tasks"} 19360
|
||||
meilisearch_last_finished_batches_progress_trace_ms{batch_uid="0",step_name="processing tasks > computing document changes"} 368
|
||||
meilisearch_last_finished_batches_progress_trace_ms{batch_uid="0",step_name="processing tasks > computing document changes > preparing payloads"} 367
|
||||
meilisearch_last_finished_batches_progress_trace_ms{batch_uid="0",step_name="processing tasks > computing document changes > preparing payloads > payload"} 367
|
||||
meilisearch_last_finished_batches_progress_trace_ms{batch_uid="0",step_name="processing tasks > indexing"} 18970
|
||||
# HELP meilisearch_index_count Meilisearch Index Count
|
||||
# TYPE meilisearch_index_count gauge
|
||||
meilisearch_index_count 1
|
||||
@@ -148,6 +162,46 @@ pub async fn get_metrics(
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch and expose the current progressing step
|
||||
crate::metrics::MEILISEARCH_BATCH_RUNNING_PROGRESS_TRACE.reset();
|
||||
let (batches, _total) = index_scheduler.get_batches_from_authorized_indexes(
|
||||
&Query { statuses: Some(vec![Status::Processing]), ..Query::default() },
|
||||
auth_filters,
|
||||
)?;
|
||||
if let Some(batch) = batches.into_iter().next() {
|
||||
let batch_uid = batch.uid.to_string();
|
||||
if let Some(progress) = batch.progress {
|
||||
for ProgressStepView { current_step, finished, total } in progress.steps {
|
||||
crate::metrics::MEILISEARCH_BATCH_RUNNING_PROGRESS_TRACE
|
||||
.with_label_values(&[batch_uid.as_str(), current_step.as_ref()])
|
||||
// We return the completion ratio of the current step
|
||||
.set(finished as f64 / total as f64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
crate::metrics::MEILISEARCH_LAST_FINISHED_BATCHES_PROGRESS_TRACE_MS.reset();
|
||||
let (batches, _total) = index_scheduler.get_batches_from_authorized_indexes(
|
||||
// Fetch the finished batches...
|
||||
&Query { statuses: Some(vec![Status::Succeeded, Status::Failed]), ..Query::default() },
|
||||
auth_filters,
|
||||
)?;
|
||||
// ...and get the last batch only.
|
||||
if let Some(batch) = batches.into_iter().next() {
|
||||
let batch_uid = batch.uid.to_string();
|
||||
for (step_name, duration_str) in batch.stats.progress_trace {
|
||||
let Some(duration_str) = duration_str.as_str() else { continue };
|
||||
match humantime::parse_duration(duration_str) {
|
||||
Ok(duration) => {
|
||||
crate::metrics::MEILISEARCH_LAST_FINISHED_BATCHES_PROGRESS_TRACE_MS
|
||||
.with_label_values(&[&batch_uid, &step_name])
|
||||
.set(duration.as_millis() as i64);
|
||||
}
|
||||
Err(e) => tracing::error!("Failed to parse duration: {e}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(last_update) = response.last_update {
|
||||
crate::metrics::MEILISEARCH_LAST_UPDATE.set(last_update.unix_timestamp());
|
||||
}
|
||||
|
||||
@@ -7,7 +7,6 @@ use deserr::Deserr;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use itertools::{EitherOrBoth, Itertools};
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::enterprise_edition::network::{Network as DbNetwork, Remote as DbRemote};
|
||||
use meilisearch_types::error::deserr_codes::{
|
||||
InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkSharding,
|
||||
InvalidNetworkUrl, InvalidNetworkWriteApiKey,
|
||||
@@ -15,6 +14,7 @@ use meilisearch_types::error::deserr_codes::{
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::network::{Network as DbNetwork, Remote as DbRemote};
|
||||
use serde::Serialize;
|
||||
use tracing::debug;
|
||||
use utoipa::{OpenApi, ToSchema};
|
||||
@@ -211,6 +211,16 @@ async fn patch_network(
|
||||
let old_network = index_scheduler.network();
|
||||
debug!(parameters = ?new_network, "Patch network");
|
||||
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
if new_network.sharding.set().is_some() {
|
||||
use meilisearch_types::error::Code;
|
||||
|
||||
return Err(ResponseError::from_msg(
|
||||
"Meilisearch Enterprise Edition is required to set `network.sharding`".into(),
|
||||
Code::RequiresEnterpriseEdition,
|
||||
));
|
||||
}
|
||||
|
||||
let merged_self = match new_network.local {
|
||||
Setting::Set(new_self) => Some(new_self),
|
||||
Setting::Reset => None,
|
||||
@@ -312,6 +322,7 @@ async fn patch_network(
|
||||
|
||||
let merged_network =
|
||||
DbNetwork { local: merged_self, remotes: merged_remotes, sharding: merged_sharding };
|
||||
|
||||
index_scheduler.put_network(merged_network.clone())?;
|
||||
debug!(returns = ?merged_network, "Patch network");
|
||||
Ok(HttpResponse::Ok().json(merged_network))
|
||||
|
||||
@@ -9,12 +9,12 @@ use std::vec::{IntoIter, Vec};
|
||||
use actix_http::StatusCode;
|
||||
use index_scheduler::{IndexScheduler, RoFeatures};
|
||||
use itertools::Itertools;
|
||||
use meilisearch_types::enterprise_edition::network::{Network, Remote};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::milli::order_by_map::OrderByMap;
|
||||
use meilisearch_types::milli::score_details::{ScoreDetails, WeightedScoreValue};
|
||||
use meilisearch_types::milli::vector::Embedding;
|
||||
use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget, DEFAULT_VALUES_PER_FACET};
|
||||
use meilisearch_types::network::{Network, Remote};
|
||||
use roaring::RoaringBitmap;
|
||||
use tokio::task::JoinHandle;
|
||||
use uuid::Uuid;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
pub use error::ProxySearchError;
|
||||
use error::ReqwestErrorWithoutUrl;
|
||||
use meilisearch_types::enterprise_edition::network::Remote;
|
||||
use meilisearch_types::network::Remote;
|
||||
use rand::Rng as _;
|
||||
use reqwest::{Client, Response, StatusCode};
|
||||
use serde::de::DeserializeOwned;
|
||||
|
||||
@@ -237,7 +237,6 @@ async fn import_dump_v1_movie_with_settings() {
|
||||
"sortableAttributes": [
|
||||
"genres"
|
||||
],
|
||||
"foreignKeys": [],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
@@ -412,7 +411,6 @@ async fn import_dump_v1_rubygems_with_settings() {
|
||||
"sortableAttributes": [
|
||||
"version"
|
||||
],
|
||||
"foreignKeys": [],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
@@ -742,7 +740,6 @@ async fn import_dump_v2_movie_with_settings() {
|
||||
"genres"
|
||||
],
|
||||
"sortableAttributes": [],
|
||||
"foreignKeys": [],
|
||||
"rankingRules": [
|
||||
"words",
|
||||
"typo",
|
||||
@@ -914,7 +911,6 @@ async fn import_dump_v2_rubygems_with_settings() {
|
||||
"version"
|
||||
],
|
||||
"sortableAttributes": [],
|
||||
"foreignKeys": [],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
@@ -1244,7 +1240,6 @@ async fn import_dump_v3_movie_with_settings() {
|
||||
"genres"
|
||||
],
|
||||
"sortableAttributes": [],
|
||||
"foreignKeys": [],
|
||||
"rankingRules": [
|
||||
"words",
|
||||
"typo",
|
||||
@@ -1416,7 +1411,6 @@ async fn import_dump_v3_rubygems_with_settings() {
|
||||
"version"
|
||||
],
|
||||
"sortableAttributes": [],
|
||||
"foreignKeys": [],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
@@ -1746,7 +1740,6 @@ async fn import_dump_v4_movie_with_settings() {
|
||||
"genres"
|
||||
],
|
||||
"sortableAttributes": [],
|
||||
"foreignKeys": [],
|
||||
"rankingRules": [
|
||||
"words",
|
||||
"typo",
|
||||
@@ -1918,7 +1911,6 @@ async fn import_dump_v4_rubygems_with_settings() {
|
||||
"version"
|
||||
],
|
||||
"sortableAttributes": [],
|
||||
"foreignKeys": [],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
@@ -2198,8 +2190,7 @@ async fn import_dump_v6_containing_experimental_features() {
|
||||
"compositeEmbedders": false,
|
||||
"chatCompletions": false,
|
||||
"multimodal": false,
|
||||
"vectorStoreSetting": false,
|
||||
"foreignKeys": false
|
||||
"vectorStoreSetting": false
|
||||
}
|
||||
"###);
|
||||
|
||||
|
||||
@@ -27,8 +27,7 @@ async fn experimental_features() {
|
||||
"compositeEmbedders": false,
|
||||
"chatCompletions": false,
|
||||
"multimodal": false,
|
||||
"vectorStoreSetting": false,
|
||||
"foreignKeys": false
|
||||
"vectorStoreSetting": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -46,8 +45,7 @@ async fn experimental_features() {
|
||||
"compositeEmbedders": false,
|
||||
"chatCompletions": false,
|
||||
"multimodal": false,
|
||||
"vectorStoreSetting": false,
|
||||
"foreignKeys": false
|
||||
"vectorStoreSetting": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -65,8 +63,7 @@ async fn experimental_features() {
|
||||
"compositeEmbedders": false,
|
||||
"chatCompletions": false,
|
||||
"multimodal": false,
|
||||
"vectorStoreSetting": false,
|
||||
"foreignKeys": false
|
||||
"vectorStoreSetting": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -85,8 +82,7 @@ async fn experimental_features() {
|
||||
"compositeEmbedders": false,
|
||||
"chatCompletions": false,
|
||||
"multimodal": false,
|
||||
"vectorStoreSetting": false,
|
||||
"foreignKeys": false
|
||||
"vectorStoreSetting": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -105,8 +101,7 @@ async fn experimental_features() {
|
||||
"compositeEmbedders": false,
|
||||
"chatCompletions": false,
|
||||
"multimodal": false,
|
||||
"vectorStoreSetting": false,
|
||||
"foreignKeys": false
|
||||
"vectorStoreSetting": false
|
||||
}
|
||||
"###);
|
||||
}
|
||||
@@ -132,8 +127,7 @@ async fn experimental_feature_metrics() {
|
||||
"compositeEmbedders": false,
|
||||
"chatCompletions": false,
|
||||
"multimodal": false,
|
||||
"vectorStoreSetting": false,
|
||||
"foreignKeys": false
|
||||
"vectorStoreSetting": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -180,7 +174,7 @@ async fn errors() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`, `compositeEmbedders`, `chatCompletions`, `multimodal`, `vectorStoreSetting`, `foreignKeys`",
|
||||
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`, `compositeEmbedders`, `chatCompletions`, `multimodal`, `vectorStoreSetting`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
|
||||
@@ -3142,6 +3142,7 @@ fn fail(override_response_body: Option<&str>) -> ResponseTemplate {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
#[actix_rt::test]
|
||||
async fn remote_auto_sharding() {
|
||||
let ms0 = Server::new().await;
|
||||
@@ -3161,7 +3162,6 @@ async fn remote_auto_sharding() {
|
||||
snapshot!(json_string!(response["network"]), @"true");
|
||||
|
||||
// set self & sharding
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0", "sharding": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
@@ -3462,6 +3462,30 @@ async fn remote_auto_sharding() {
|
||||
"###);
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
#[actix_rt::test]
|
||||
async fn sharding_not_enterprise() {
|
||||
let ms0 = Server::new().await;
|
||||
|
||||
// enable feature
|
||||
|
||||
let (response, code) = ms0.set_features(json!({"network": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["network"]), @"true");
|
||||
|
||||
let (response, code) = ms0.set_network(json!({"self": "ms0", "sharding": true})).await;
|
||||
snapshot!(code, @"451 Unavailable For Legal Reasons");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Meilisearch Enterprise Edition is required to set `network.sharding`",
|
||||
"code": "requires_enterprise_edition",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#requires_enterprise_edition"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
#[actix_rt::test]
|
||||
async fn remote_auto_sharding_with_custom_metadata() {
|
||||
let ms0 = Server::new().await;
|
||||
|
||||
@@ -318,7 +318,6 @@ async fn secrets_are_hidden_in_settings() {
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"sortableAttributes": [],
|
||||
"foreignKeys": [],
|
||||
"rankingRules": [
|
||||
"words",
|
||||
"typo",
|
||||
|
||||
@@ -2,6 +2,7 @@ mod chat;
|
||||
mod distinct;
|
||||
mod errors;
|
||||
mod get_settings;
|
||||
mod parent_seachable_fields;
|
||||
mod prefix_search_settings;
|
||||
mod proximity_settings;
|
||||
mod tokenizer_customization;
|
||||
|
||||
114
crates/meilisearch/tests/settings/parent_seachable_fields.rs
Normal file
114
crates/meilisearch/tests/settings/parent_seachable_fields.rs
Normal file
@@ -0,0 +1,114 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use crate::common::Server;
|
||||
use crate::json;
|
||||
|
||||
static DOCUMENTS: Lazy<crate::common::Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
"id": 1,
|
||||
"meta": {
|
||||
"title": "Soup of the day",
|
||||
"description": "many the fish",
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"meta": {
|
||||
"title": "Soup of day",
|
||||
"description": "many the lazy fish",
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"meta": {
|
||||
"title": "the Soup of day",
|
||||
"description": "many the fish",
|
||||
}
|
||||
},
|
||||
])
|
||||
});
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn nested_field_becomes_searchable() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let (task, _status_code) = index.add_documents(DOCUMENTS.clone(), None).await;
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"searchableAttributes": ["meta.title"]
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{response:?}");
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
// We expect no documents when searching for
|
||||
// a nested non-searchable field
|
||||
index
|
||||
.search(json!({"q": "many fish"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"[]"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"searchableAttributes": ["meta.title", "meta.description"]
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{response:?}");
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
// We expect all the documents when the nested field becomes searchable
|
||||
index
|
||||
.search(json!({"q": "many fish"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"meta": {
|
||||
"title": "Soup of the day",
|
||||
"description": "many the fish"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"meta": {
|
||||
"title": "the Soup of day",
|
||||
"description": "many the fish"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"meta": {
|
||||
"title": "Soup of day",
|
||||
"description": "many the lazy fish"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"searchableAttributes": ["meta.title"]
|
||||
}))
|
||||
.await;
|
||||
assert_eq!("202", code.as_str(), "{response:?}");
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
// We expect no documents when searching for
|
||||
// a nested non-searchable field
|
||||
index
|
||||
.search(json!({"q": "many fish"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"[]"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
@@ -9,15 +9,15 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.98"
|
||||
clap = { version = "4.5.40", features = ["derive"] }
|
||||
anyhow = "1.0.100"
|
||||
clap = { version = "4.5.52", features = ["derive"] }
|
||||
dump = { path = "../dump" }
|
||||
file-store = { path = "../file-store" }
|
||||
indexmap = { version = "2.9.0", features = ["serde"] }
|
||||
indexmap = { version = "2.12.0", features = ["serde"] }
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
tempfile = "3.20.0"
|
||||
time = { version = "0.3.41", features = ["formatting", "parsing", "alloc"] }
|
||||
uuid = { version = "1.17.0", features = ["v4"], default-features = false }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order"] }
|
||||
tempfile = "3.23.0"
|
||||
time = { version = "0.3.44", features = ["formatting", "parsing", "alloc"] }
|
||||
uuid = { version = "1.18.1", features = ["v4"], default-features = false }
|
||||
|
||||
@@ -15,15 +15,15 @@ license.workspace = true
|
||||
big_s = "1.0.2"
|
||||
bimap = { version = "0.6.3", features = ["serde"] }
|
||||
bincode = "1.3.3"
|
||||
bstr = "1.12.0"
|
||||
bytemuck = { version = "1.23.1", features = ["extern_crate_alloc"] }
|
||||
bstr = "1.12.1"
|
||||
bytemuck = { version = "1.24.0", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.5.0"
|
||||
charabia = { version = "0.9.8", default-features = false }
|
||||
charabia = { version = "0.9.9", default-features = false }
|
||||
cellulite = "0.3.1-nested-rtxns-2"
|
||||
concat-arrays = "0.1.2"
|
||||
convert_case = "0.8.0"
|
||||
convert_case = "0.9.0"
|
||||
crossbeam-channel = "0.5.15"
|
||||
deserr = "0.6.3"
|
||||
deserr = "0.6.4"
|
||||
either = { version = "1.15.0", features = ["serde"] }
|
||||
flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
fst = "0.4.7"
|
||||
@@ -38,39 +38,39 @@ heed = { version = "0.22.1-nested-rtxns-6", default-features = false, features =
|
||||
"serde-json",
|
||||
"serde-bincode",
|
||||
] }
|
||||
indexmap = { version = "2.9.0", features = ["serde"] }
|
||||
indexmap = { version = "2.12.0", features = ["serde"] }
|
||||
json-depth-checker = { path = "../json-depth-checker" }
|
||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||
memchr = "2.7.5"
|
||||
memmap2 = "0.9.7"
|
||||
memchr = "2.7.6"
|
||||
memmap2 = "0.9.9"
|
||||
obkv = "0.3.0"
|
||||
once_cell = "1.21.3"
|
||||
ordered-float = "5.0.0"
|
||||
rayon = "1.10.0"
|
||||
ordered-float = "5.1.0"
|
||||
rayon = "1.11.0"
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
rstar = { version = "0.12.2", features = ["serde"] }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order", "raw_value"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = { version = "1.0.145", features = ["preserve_order", "raw_value"] }
|
||||
slice-group-by = "0.3.1"
|
||||
smallstr = { version = "0.3.0", features = ["serde"] }
|
||||
smallstr = { version = "0.3.1", features = ["serde"] }
|
||||
smallvec = "1.15.1"
|
||||
smartstring = "1.0.1"
|
||||
tempfile = "3.20.0"
|
||||
thiserror = "2.0.12"
|
||||
time = { version = "0.3.41", features = [
|
||||
tempfile = "3.23.0"
|
||||
thiserror = "2.0.17"
|
||||
time = { version = "0.3.44", features = [
|
||||
"serde-well-known",
|
||||
"formatting",
|
||||
"parsing",
|
||||
"macros",
|
||||
] }
|
||||
uuid = { version = "1.17.0", features = ["v4"] }
|
||||
uuid = { version = "1.18.1", features = ["v4"] }
|
||||
|
||||
filter-parser = { path = "../filter-parser" }
|
||||
|
||||
# documents words self-join
|
||||
itertools = "0.14.0"
|
||||
|
||||
csv = "1.3.1"
|
||||
csv = "1.4.0"
|
||||
candle-core = { version = "0.9.1" }
|
||||
candle-transformers = { version = "0.9.1" }
|
||||
candle-nn = { version = "0.9.1" }
|
||||
@@ -81,9 +81,9 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
|
||||
"online",
|
||||
] }
|
||||
safetensors = "0.6.2"
|
||||
tiktoken-rs = "0.7.0"
|
||||
tiktoken-rs = "0.9.1"
|
||||
liquid = "0.26.11"
|
||||
rhai = { version = "1.22.2", features = [
|
||||
rhai = { version = "1.23.6", features = [
|
||||
"serde",
|
||||
"no_module",
|
||||
"no_custom_syntax",
|
||||
@@ -95,14 +95,14 @@ hannoy = { version = "0.0.9-nested-rtxns-2", features = ["arroy"] }
|
||||
rand = "0.8.5"
|
||||
tracing = "0.1.41"
|
||||
ureq = { version = "2.12.1", features = ["json"] }
|
||||
url = "2.5.4"
|
||||
hashbrown = "0.15.4"
|
||||
bumpalo = "3.18.1"
|
||||
url = "2.5.7"
|
||||
hashbrown = "0.15.5"
|
||||
bumpalo = "3.19.0"
|
||||
bumparaw-collections = "0.1.4"
|
||||
steppe = { version = "0.4", default-features = false }
|
||||
thread_local = "1.1.9"
|
||||
rustc-hash = "2.1.1"
|
||||
enum-iterator = "2.1.0"
|
||||
enum-iterator = "2.3.0"
|
||||
bbqueue = { git = "https://github.com/meilisearch/bbqueue" }
|
||||
flume = { version = "0.11.1", default-features = false }
|
||||
utoipa = { version = "5.4.0", features = [
|
||||
@@ -112,21 +112,21 @@ utoipa = { version = "5.4.0", features = [
|
||||
"time",
|
||||
"openapi_extensions",
|
||||
] }
|
||||
lru = "0.14.0"
|
||||
twox-hash = { version = "2.1.1", default-features = false, features = [
|
||||
lru = "0.16.2"
|
||||
twox-hash = { version = "2.1.2", default-features = false, features = [
|
||||
"std",
|
||||
"xxhash3_64",
|
||||
"xxhash64",
|
||||
] }
|
||||
geo-types = "0.7.16"
|
||||
geo-types = "0.7.17"
|
||||
zerometry = "0.3.0"
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
mimalloc = { version = "0.1.48", default-features = false }
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = "=1.39.0"
|
||||
maplit = "1.0.2"
|
||||
md5 = "0.7.0"
|
||||
md5 = "0.8.0"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
rand = { version = "0.8.5", features = ["small_rng"] }
|
||||
|
||||
@@ -141,6 +141,8 @@ lmdb-posix-sem = ["heed/posix-sem"]
|
||||
chinese = ["charabia/chinese"]
|
||||
chinese-pinyin = ["chinese", "charabia/chinese-normalization-pinyin"]
|
||||
|
||||
enterprise = []
|
||||
|
||||
# allow hebrew specialized tokenization
|
||||
hebrew = ["charabia/hebrew"]
|
||||
|
||||
|
||||
@@ -18,6 +18,8 @@ use crate::{
|
||||
pub struct Metadata {
|
||||
/// The weight as defined in the FieldidsWeightsMap of the searchable attribute if it is searchable.
|
||||
pub searchable: Option<Weight>,
|
||||
/// The field is part of the exact attributes.
|
||||
pub exact: bool,
|
||||
/// The field is part of the sortable attributes.
|
||||
pub sortable: bool,
|
||||
/// The field is defined as the distinct attribute.
|
||||
@@ -209,6 +211,7 @@ impl Metadata {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MetadataBuilder {
|
||||
searchable_attributes: Option<Vec<String>>,
|
||||
exact_searchable_attributes: Vec<String>,
|
||||
filterable_attributes: Vec<FilterableAttributesRule>,
|
||||
sortable_attributes: HashSet<String>,
|
||||
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
||||
@@ -220,15 +223,18 @@ impl MetadataBuilder {
|
||||
pub fn from_index(index: &Index, rtxn: &RoTxn) -> Result<Self> {
|
||||
let searchable_attributes = index
|
||||
.user_defined_searchable_fields(rtxn)?
|
||||
.map(|fields| fields.into_iter().map(|s| s.to_string()).collect());
|
||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||
let exact_searchable_attributes =
|
||||
index.exact_attributes(rtxn)?.into_iter().map(String::from).collect();
|
||||
let filterable_attributes = index.filterable_attributes_rules(rtxn)?;
|
||||
let sortable_attributes = index.sortable_fields(rtxn)?;
|
||||
let localized_attributes = index.localized_attributes_rules(rtxn)?;
|
||||
let distinct_attribute = index.distinct_field(rtxn)?.map(|s| s.to_string());
|
||||
let distinct_attribute = index.distinct_field(rtxn)?.map(String::from);
|
||||
let asc_desc_attributes = index.asc_desc_fields(rtxn)?;
|
||||
|
||||
Ok(Self::new(
|
||||
searchable_attributes,
|
||||
exact_searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
localized_attributes,
|
||||
@@ -242,6 +248,7 @@ impl MetadataBuilder {
|
||||
/// This is used for testing, prefer using `MetadataBuilder::from_index` instead.
|
||||
pub fn new(
|
||||
searchable_attributes: Option<Vec<String>>,
|
||||
exact_searchable_attributes: Vec<String>,
|
||||
filterable_attributes: Vec<FilterableAttributesRule>,
|
||||
sortable_attributes: HashSet<String>,
|
||||
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
||||
@@ -256,6 +263,7 @@ impl MetadataBuilder {
|
||||
|
||||
Self {
|
||||
searchable_attributes,
|
||||
exact_searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
localized_attributes,
|
||||
@@ -269,6 +277,7 @@ impl MetadataBuilder {
|
||||
// Vectors fields are not searchable, filterable, distinct or asc_desc
|
||||
return Metadata {
|
||||
searchable: None,
|
||||
exact: false,
|
||||
sortable: false,
|
||||
distinct: false,
|
||||
asc_desc: false,
|
||||
@@ -296,6 +305,7 @@ impl MetadataBuilder {
|
||||
// Geo fields are not searchable, distinct or asc_desc
|
||||
return Metadata {
|
||||
searchable: None,
|
||||
exact: false,
|
||||
sortable,
|
||||
distinct: false,
|
||||
asc_desc: false,
|
||||
@@ -309,6 +319,7 @@ impl MetadataBuilder {
|
||||
debug_assert!(!sortable, "geojson fields should not be sortable");
|
||||
return Metadata {
|
||||
searchable: None,
|
||||
exact: false,
|
||||
sortable,
|
||||
distinct: false,
|
||||
asc_desc: false,
|
||||
@@ -329,6 +340,8 @@ impl MetadataBuilder {
|
||||
None => Some(0),
|
||||
};
|
||||
|
||||
let exact = self.exact_searchable_attributes.iter().any(|attr| is_faceted_by(field, attr));
|
||||
|
||||
let distinct =
|
||||
self.distinct_attribute.as_ref().is_some_and(|distinct_field| field == distinct_field);
|
||||
let asc_desc = self.asc_desc_attributes.contains(field);
|
||||
@@ -343,6 +356,7 @@ impl MetadataBuilder {
|
||||
|
||||
Metadata {
|
||||
searchable,
|
||||
exact,
|
||||
sortable,
|
||||
distinct,
|
||||
asc_desc,
|
||||
|
||||
@@ -53,7 +53,6 @@ pub mod main_key {
|
||||
pub const HIDDEN_FACETED_FIELDS_KEY: &str = "hidden-faceted-fields";
|
||||
pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
|
||||
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
|
||||
pub const FOREIGN_KEYS_KEY: &str = "foreign-keys";
|
||||
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
|
||||
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
||||
pub const FIELDIDS_WEIGHTS_MAP_KEY: &str = "fieldids-weights-map";
|
||||
|
||||
@@ -19,7 +19,6 @@ mod external_documents_ids;
|
||||
pub mod facet;
|
||||
mod fields_ids_map;
|
||||
mod filterable_attributes_rules;
|
||||
mod foreign_key;
|
||||
pub mod heed_codec;
|
||||
pub mod index;
|
||||
mod localized_attributes_rules;
|
||||
@@ -72,7 +71,6 @@ pub use self::filterable_attributes_rules::{
|
||||
FilterFeatures, FilterableAttributesFeatures, FilterableAttributesPatterns,
|
||||
FilterableAttributesRule,
|
||||
};
|
||||
pub use self::foreign_key::ForeignKey;
|
||||
pub use self::heed_codec::{
|
||||
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
|
||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec,
|
||||
|
||||
@@ -8,17 +8,26 @@ use bumpalo::Bump;
|
||||
|
||||
use super::match_searchable_field;
|
||||
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||
use crate::fields_ids_map::metadata::Metadata;
|
||||
use crate::update::new::document::DocumentContext;
|
||||
use crate::update::new::extract::cache::BalancedCaches;
|
||||
use crate::update::new::extract::perm_json_p::contained_in;
|
||||
use crate::update::new::extract::searchable::has_searchable_children;
|
||||
use crate::update::new::indexer::document_changes::{
|
||||
extract, DocumentChanges, Extractor, IndexingContext,
|
||||
};
|
||||
use crate::update::new::indexer::settings_changes::{
|
||||
settings_change_extract, DocumentsIndentifiers, SettingsChangeExtractor,
|
||||
};
|
||||
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
||||
use crate::update::new::steps::IndexingStep;
|
||||
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
|
||||
use crate::update::new::DocumentChange;
|
||||
use crate::{bucketed_position, DocumentId, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE};
|
||||
use crate::update::new::{DocumentChange, DocumentIdentifiers};
|
||||
use crate::update::settings::SettingsDelta;
|
||||
use crate::{
|
||||
bucketed_position, DocumentId, FieldId, PatternMatch, Result, UserError,
|
||||
MAX_POSITION_PER_ATTRIBUTE,
|
||||
};
|
||||
|
||||
const MAX_COUNTED_WORDS: usize = 30;
|
||||
|
||||
@@ -34,6 +43,15 @@ pub struct WordDocidsBalancedCaches<'extractor> {
|
||||
|
||||
unsafe impl MostlySend for WordDocidsBalancedCaches<'_> {}
|
||||
|
||||
/// Whether to extract or skip fields during word extraction.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
enum FieldDbExtraction {
|
||||
/// Extract the word and put it in to the fid-based databases.
|
||||
Extract,
|
||||
/// Do not store the word in the fid-based databases.
|
||||
Skip,
|
||||
}
|
||||
|
||||
impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
||||
pub fn new_in(buckets: usize, max_memory: Option<usize>, alloc: &'extractor Bump) -> Self {
|
||||
Self {
|
||||
@@ -47,12 +65,14 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn insert_add_u32(
|
||||
&mut self,
|
||||
field_id: FieldId,
|
||||
position: u16,
|
||||
word: &str,
|
||||
exact: bool,
|
||||
field_db_extraction: FieldDbExtraction,
|
||||
docid: u32,
|
||||
bump: &Bump,
|
||||
) -> Result<()> {
|
||||
@@ -66,11 +86,13 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
||||
let buffer_size = word_bytes.len() + 1 + size_of::<FieldId>();
|
||||
let mut buffer = BumpVec::with_capacity_in(buffer_size, bump);
|
||||
|
||||
buffer.clear();
|
||||
buffer.extend_from_slice(word_bytes);
|
||||
buffer.push(0);
|
||||
buffer.extend_from_slice(&field_id.to_be_bytes());
|
||||
self.word_fid_docids.insert_add_u32(&buffer, docid)?;
|
||||
if field_db_extraction == FieldDbExtraction::Extract {
|
||||
buffer.clear();
|
||||
buffer.extend_from_slice(word_bytes);
|
||||
buffer.push(0);
|
||||
buffer.extend_from_slice(&field_id.to_be_bytes());
|
||||
self.word_fid_docids.insert_add_u32(&buffer, docid)?;
|
||||
}
|
||||
|
||||
let position = bucketed_position(position);
|
||||
buffer.clear();
|
||||
@@ -83,21 +105,26 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
||||
self.flush_fid_word_count(&mut buffer)?;
|
||||
}
|
||||
|
||||
self.fid_word_count
|
||||
.entry(field_id)
|
||||
.and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
|
||||
.or_insert((None, Some(1)));
|
||||
if field_db_extraction == FieldDbExtraction::Extract {
|
||||
self.fid_word_count
|
||||
.entry(field_id)
|
||||
.and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
|
||||
.or_insert((None, Some(1)));
|
||||
}
|
||||
|
||||
self.current_docid = Some(docid);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn insert_del_u32(
|
||||
&mut self,
|
||||
field_id: FieldId,
|
||||
position: u16,
|
||||
word: &str,
|
||||
exact: bool,
|
||||
field_db_extraction: FieldDbExtraction,
|
||||
docid: u32,
|
||||
bump: &Bump,
|
||||
) -> Result<()> {
|
||||
@@ -111,11 +138,13 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
||||
let buffer_size = word_bytes.len() + 1 + size_of::<FieldId>();
|
||||
let mut buffer = BumpVec::with_capacity_in(buffer_size, bump);
|
||||
|
||||
buffer.clear();
|
||||
buffer.extend_from_slice(word_bytes);
|
||||
buffer.push(0);
|
||||
buffer.extend_from_slice(&field_id.to_be_bytes());
|
||||
self.word_fid_docids.insert_del_u32(&buffer, docid)?;
|
||||
if field_db_extraction == FieldDbExtraction::Extract {
|
||||
buffer.clear();
|
||||
buffer.extend_from_slice(word_bytes);
|
||||
buffer.push(0);
|
||||
buffer.extend_from_slice(&field_id.to_be_bytes());
|
||||
self.word_fid_docids.insert_del_u32(&buffer, docid)?;
|
||||
}
|
||||
|
||||
let position = bucketed_position(position);
|
||||
buffer.clear();
|
||||
@@ -128,10 +157,12 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
|
||||
self.flush_fid_word_count(&mut buffer)?;
|
||||
}
|
||||
|
||||
self.fid_word_count
|
||||
.entry(field_id)
|
||||
.and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
|
||||
.or_insert((Some(1), None));
|
||||
if field_db_extraction == FieldDbExtraction::Extract {
|
||||
self.fid_word_count
|
||||
.entry(field_id)
|
||||
.and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
|
||||
.or_insert((Some(1), None));
|
||||
}
|
||||
|
||||
self.current_docid = Some(docid);
|
||||
|
||||
@@ -325,6 +356,24 @@ impl WordDocidsExtractors {
|
||||
exact_attributes.iter().any(|attr| contained_in(fname, attr))
|
||||
|| disabled_typos_terms.is_exact(word)
|
||||
};
|
||||
|
||||
let mut should_tokenize = |field_name: &str| {
|
||||
let Some((field_id, meta)) = new_fields_ids_map.id_with_metadata_or_insert(field_name)
|
||||
else {
|
||||
return Err(UserError::AttributeLimitReached.into());
|
||||
};
|
||||
|
||||
let pattern_match = if meta.is_searchable() {
|
||||
PatternMatch::Match
|
||||
} else {
|
||||
// TODO: should be a match on the field_name using `match_field_legacy` function,
|
||||
// but for legacy reasons we iterate over all the fields to fill the field_id_map.
|
||||
PatternMatch::Parent
|
||||
};
|
||||
|
||||
Ok((field_id, pattern_match))
|
||||
};
|
||||
|
||||
match document_change {
|
||||
DocumentChange::Deletion(inner) => {
|
||||
let mut token_fn = |fname: &str, fid, pos, word: &str| {
|
||||
@@ -333,13 +382,14 @@ impl WordDocidsExtractors {
|
||||
pos,
|
||||
word,
|
||||
is_exact(fname, word),
|
||||
FieldDbExtraction::Extract,
|
||||
inner.docid(),
|
||||
doc_alloc,
|
||||
)
|
||||
};
|
||||
document_tokenizer.tokenize_document(
|
||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
||||
new_fields_ids_map,
|
||||
&mut should_tokenize,
|
||||
&mut token_fn,
|
||||
)?;
|
||||
}
|
||||
@@ -361,13 +411,14 @@ impl WordDocidsExtractors {
|
||||
pos,
|
||||
word,
|
||||
is_exact(fname, word),
|
||||
FieldDbExtraction::Extract,
|
||||
inner.docid(),
|
||||
doc_alloc,
|
||||
)
|
||||
};
|
||||
document_tokenizer.tokenize_document(
|
||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
||||
new_fields_ids_map,
|
||||
&mut should_tokenize,
|
||||
&mut token_fn,
|
||||
)?;
|
||||
|
||||
@@ -377,13 +428,14 @@ impl WordDocidsExtractors {
|
||||
pos,
|
||||
word,
|
||||
is_exact(fname, word),
|
||||
FieldDbExtraction::Extract,
|
||||
inner.docid(),
|
||||
doc_alloc,
|
||||
)
|
||||
};
|
||||
document_tokenizer.tokenize_document(
|
||||
inner.merged(rtxn, index, context.db_fields_ids_map)?,
|
||||
new_fields_ids_map,
|
||||
&mut should_tokenize,
|
||||
&mut token_fn,
|
||||
)?;
|
||||
}
|
||||
@@ -394,13 +446,14 @@ impl WordDocidsExtractors {
|
||||
pos,
|
||||
word,
|
||||
is_exact(fname, word),
|
||||
FieldDbExtraction::Extract,
|
||||
inner.docid(),
|
||||
doc_alloc,
|
||||
)
|
||||
};
|
||||
document_tokenizer.tokenize_document(
|
||||
inner.inserted(),
|
||||
new_fields_ids_map,
|
||||
&mut should_tokenize,
|
||||
&mut token_fn,
|
||||
)?;
|
||||
}
|
||||
@@ -411,3 +464,292 @@ impl WordDocidsExtractors {
|
||||
cached_sorter.flush_fid_word_count(&mut buffer)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct WordDocidsSettingsExtractorsData<'a, SD> {
|
||||
tokenizer: DocumentTokenizer<'a>,
|
||||
max_memory_by_thread: Option<usize>,
|
||||
buckets: usize,
|
||||
settings_delta: &'a SD,
|
||||
}
|
||||
|
||||
impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
||||
for WordDocidsSettingsExtractorsData<'_, SD>
|
||||
{
|
||||
type Data = RefCell<Option<WordDocidsBalancedCaches<'extractor>>>;
|
||||
|
||||
fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result<Self::Data> {
|
||||
Ok(RefCell::new(Some(WordDocidsBalancedCaches::new_in(
|
||||
self.buckets,
|
||||
self.max_memory_by_thread,
|
||||
extractor_alloc,
|
||||
))))
|
||||
}
|
||||
|
||||
fn process<'doc>(
|
||||
&'doc self,
|
||||
documents: impl Iterator<Item = crate::Result<DocumentIdentifiers<'doc>>>,
|
||||
context: &'doc DocumentContext<Self::Data>,
|
||||
) -> crate::Result<()> {
|
||||
for document in documents {
|
||||
let document = document?;
|
||||
SettingsChangeWordDocidsExtractors::extract_document_from_settings_change(
|
||||
document,
|
||||
context,
|
||||
&self.tokenizer,
|
||||
self.settings_delta,
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SettingsChangeWordDocidsExtractors;
|
||||
|
||||
impl SettingsChangeWordDocidsExtractors {
|
||||
pub fn run_extraction<'fid, 'indexer, 'index, 'extractor, SD, MSP>(
|
||||
settings_delta: &SD,
|
||||
documents: &'indexer DocumentsIndentifiers<'indexer>,
|
||||
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||
step: IndexingStep,
|
||||
) -> Result<WordDocidsCaches<'extractor>>
|
||||
where
|
||||
SD: SettingsDelta + Sync,
|
||||
MSP: Fn() -> bool + Sync,
|
||||
{
|
||||
// Warning: this is duplicated code from extract_word_pair_proximity_docids.rs
|
||||
// TODO we need to read the new AND old settings to support changing global parameters
|
||||
let rtxn = indexing_context.index.read_txn()?;
|
||||
let stop_words = indexing_context.index.stop_words(&rtxn)?;
|
||||
let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
|
||||
let allowed_separators: Option<Vec<_>> =
|
||||
allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let dictionary = indexing_context.index.dictionary(&rtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let mut builder = tokenizer_builder(
|
||||
stop_words.as_ref(),
|
||||
allowed_separators.as_deref(),
|
||||
dictionary.as_deref(),
|
||||
);
|
||||
let tokenizer = builder.build();
|
||||
let localized_attributes_rules =
|
||||
indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
|
||||
let document_tokenizer = DocumentTokenizer {
|
||||
tokenizer: &tokenizer,
|
||||
localized_attributes_rules: &localized_attributes_rules,
|
||||
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
|
||||
};
|
||||
let extractor_data = WordDocidsSettingsExtractorsData {
|
||||
tokenizer: document_tokenizer,
|
||||
max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(),
|
||||
buckets: rayon::current_num_threads(),
|
||||
settings_delta,
|
||||
};
|
||||
let datastore = ThreadLocal::new();
|
||||
{
|
||||
let span = tracing::debug_span!(target: "indexing::documents::extract", "vectors");
|
||||
let _entered = span.enter();
|
||||
|
||||
settings_change_extract(
|
||||
documents,
|
||||
&extractor_data,
|
||||
indexing_context,
|
||||
extractor_allocs,
|
||||
&datastore,
|
||||
step,
|
||||
)?;
|
||||
}
|
||||
|
||||
let mut merger = WordDocidsCaches::new();
|
||||
for cache in datastore.into_iter().flat_map(RefCell::into_inner) {
|
||||
merger.push(cache)?;
|
||||
}
|
||||
|
||||
Ok(merger)
|
||||
}
|
||||
|
||||
/// Extracts document words from a settings change.
|
||||
fn extract_document_from_settings_change<SD: SettingsDelta>(
|
||||
document: DocumentIdentifiers<'_>,
|
||||
context: &DocumentContext<RefCell<Option<WordDocidsBalancedCaches>>>,
|
||||
document_tokenizer: &DocumentTokenizer,
|
||||
settings_delta: &SD,
|
||||
) -> Result<()> {
|
||||
let mut cached_sorter_ref = context.data.borrow_mut_or_yield();
|
||||
let cached_sorter = cached_sorter_ref.as_mut().unwrap();
|
||||
let doc_alloc = &context.doc_alloc;
|
||||
|
||||
let new_fields_ids_map = settings_delta.new_fields_ids_map();
|
||||
let old_fields_ids_map = context.index.fields_ids_map_with_metadata(&context.rtxn)?;
|
||||
let old_searchable = settings_delta.old_searchable_attributes().as_ref();
|
||||
let new_searchable = settings_delta.new_searchable_attributes().as_ref();
|
||||
|
||||
let current_document = document.current(
|
||||
&context.rtxn,
|
||||
context.index,
|
||||
old_fields_ids_map.as_fields_ids_map(),
|
||||
)?;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
enum ActionToOperate {
|
||||
ReindexAllFields,
|
||||
// TODO improve by listing field prefixes
|
||||
IndexAddedFields,
|
||||
SkipDocument,
|
||||
}
|
||||
|
||||
let mut action = ActionToOperate::SkipDocument;
|
||||
// Here we do a preliminary check to determine the action to take.
|
||||
// This check doesn't trigger the tokenizer as we never return
|
||||
// PatternMatch::Match.
|
||||
document_tokenizer.tokenize_document(
|
||||
current_document,
|
||||
&mut |field_name| {
|
||||
let fid = new_fields_ids_map.id(field_name).expect("All fields IDs must exist");
|
||||
|
||||
// If the document must be reindexed, early return NoMatch to stop the scanning process.
|
||||
if action == ActionToOperate::ReindexAllFields {
|
||||
return Ok((fid, PatternMatch::NoMatch));
|
||||
}
|
||||
|
||||
let old_field_metadata = old_fields_ids_map.metadata(fid).unwrap();
|
||||
let new_field_metadata = new_fields_ids_map.metadata(fid).unwrap();
|
||||
|
||||
action = match (old_field_metadata, new_field_metadata) {
|
||||
// At least one field is added or removed from the exact fields => ReindexAllFields
|
||||
(Metadata { exact: old_exact, .. }, Metadata { exact: new_exact, .. })
|
||||
if old_exact != new_exact =>
|
||||
{
|
||||
ActionToOperate::ReindexAllFields
|
||||
}
|
||||
// At least one field is removed from the searchable fields => ReindexAllFields
|
||||
(Metadata { searchable: Some(_), .. }, Metadata { searchable: None, .. }) => {
|
||||
ActionToOperate::ReindexAllFields
|
||||
}
|
||||
// At least one field is added in the searchable fields => IndexAddedFields
|
||||
(Metadata { searchable: None, .. }, Metadata { searchable: Some(_), .. }) => {
|
||||
// We can safely overwrite the action, because we early return when action is ReindexAllFields.
|
||||
ActionToOperate::IndexAddedFields
|
||||
}
|
||||
_ => action,
|
||||
};
|
||||
|
||||
Ok((fid, PatternMatch::Parent))
|
||||
},
|
||||
&mut |_, _, _, _| Ok(()),
|
||||
)?;
|
||||
|
||||
// Early return when we don't need to index the document
|
||||
if action == ActionToOperate::SkipDocument {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut should_tokenize = |field_name: &str| {
|
||||
let field_id = new_fields_ids_map.id(field_name).expect("All fields IDs must exist");
|
||||
let old_field_metadata = old_fields_ids_map.metadata(field_id).unwrap();
|
||||
let new_field_metadata = new_fields_ids_map.metadata(field_id).unwrap();
|
||||
|
||||
let pattern_match = match action {
|
||||
ActionToOperate::ReindexAllFields => {
|
||||
if old_field_metadata.is_searchable() || new_field_metadata.is_searchable() {
|
||||
PatternMatch::Match
|
||||
// If any old or new field is searchable then we need to iterate over all fields
|
||||
// else if any field matches we need to iterate over all fields
|
||||
} else if has_searchable_children(
|
||||
field_name,
|
||||
old_searchable.zip(new_searchable).map(|(old, new)| old.iter().chain(new)),
|
||||
) {
|
||||
PatternMatch::Parent
|
||||
} else {
|
||||
PatternMatch::NoMatch
|
||||
}
|
||||
}
|
||||
ActionToOperate::IndexAddedFields => {
|
||||
// Was not searchable but now is
|
||||
if !old_field_metadata.is_searchable() && new_field_metadata.is_searchable() {
|
||||
PatternMatch::Match
|
||||
// If the field is now a parent of a searchable field
|
||||
} else if has_searchable_children(field_name, new_searchable) {
|
||||
PatternMatch::Parent
|
||||
} else {
|
||||
PatternMatch::NoMatch
|
||||
}
|
||||
}
|
||||
ActionToOperate::SkipDocument => unreachable!(),
|
||||
};
|
||||
|
||||
Ok((field_id, pattern_match))
|
||||
};
|
||||
|
||||
let old_disabled_typos_terms = settings_delta.old_disabled_typos_terms();
|
||||
let new_disabled_typos_terms = settings_delta.new_disabled_typos_terms();
|
||||
let mut token_fn = |_field_name: &str, field_id, pos, word: &str| {
|
||||
let old_field_metadata = old_fields_ids_map.metadata(field_id).unwrap();
|
||||
let new_field_metadata = new_fields_ids_map.metadata(field_id).unwrap();
|
||||
|
||||
match (old_field_metadata, new_field_metadata) {
|
||||
(
|
||||
Metadata { searchable: Some(_), exact: old_exact, .. },
|
||||
Metadata { searchable: None, .. },
|
||||
) => cached_sorter.insert_del_u32(
|
||||
field_id,
|
||||
pos,
|
||||
word,
|
||||
old_exact || old_disabled_typos_terms.is_exact(word),
|
||||
// We deleted the field globally
|
||||
FieldDbExtraction::Skip,
|
||||
document.docid(),
|
||||
doc_alloc,
|
||||
),
|
||||
(
|
||||
Metadata { searchable: None, .. },
|
||||
Metadata { searchable: Some(_), exact: new_exact, .. },
|
||||
) => cached_sorter.insert_add_u32(
|
||||
field_id,
|
||||
pos,
|
||||
word,
|
||||
new_exact || new_disabled_typos_terms.is_exact(word),
|
||||
FieldDbExtraction::Extract,
|
||||
document.docid(),
|
||||
doc_alloc,
|
||||
),
|
||||
(Metadata { searchable: None, .. }, Metadata { searchable: None, .. }) => {
|
||||
unreachable!()
|
||||
}
|
||||
(Metadata { exact: old_exact, .. }, Metadata { exact: new_exact, .. }) => {
|
||||
cached_sorter.insert_del_u32(
|
||||
field_id,
|
||||
pos,
|
||||
word,
|
||||
old_exact || old_disabled_typos_terms.is_exact(word),
|
||||
// The field has already been extracted
|
||||
FieldDbExtraction::Skip,
|
||||
document.docid(),
|
||||
doc_alloc,
|
||||
)?;
|
||||
cached_sorter.insert_add_u32(
|
||||
field_id,
|
||||
pos,
|
||||
word,
|
||||
new_exact || new_disabled_typos_terms.is_exact(word),
|
||||
// The field has already been extracted
|
||||
FieldDbExtraction::Skip,
|
||||
document.docid(),
|
||||
doc_alloc,
|
||||
)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// TODO we must tokenize twice when we change global parameters like stop words,
|
||||
// the language settings, dictionary, separators, non-separators...
|
||||
document_tokenizer.tokenize_document(
|
||||
current_document,
|
||||
&mut should_tokenize,
|
||||
&mut token_fn,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,17 +6,24 @@ use bumpalo::Bump;
|
||||
|
||||
use super::match_searchable_field;
|
||||
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||
use crate::fields_ids_map::metadata::Metadata;
|
||||
use crate::proximity::ProximityPrecision::*;
|
||||
use crate::proximity::{index_proximity, MAX_DISTANCE};
|
||||
use crate::update::new::document::{Document, DocumentContext};
|
||||
use crate::update::new::extract::cache::BalancedCaches;
|
||||
use crate::update::new::indexer::document_changes::{
|
||||
extract, DocumentChanges, Extractor, IndexingContext,
|
||||
};
|
||||
use crate::update::new::indexer::settings_change_extract;
|
||||
use crate::update::new::indexer::settings_changes::{
|
||||
DocumentsIndentifiers, SettingsChangeExtractor,
|
||||
};
|
||||
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
||||
use crate::update::new::steps::IndexingStep;
|
||||
use crate::update::new::thread_local::{FullySend, ThreadLocal};
|
||||
use crate::update::new::DocumentChange;
|
||||
use crate::{FieldId, GlobalFieldsIdsMap, Result, MAX_POSITION_PER_ATTRIBUTE};
|
||||
use crate::update::new::{DocumentChange, DocumentIdentifiers};
|
||||
use crate::update::settings::SettingsDelta;
|
||||
use crate::{FieldId, PatternMatch, Result, UserError, MAX_POSITION_PER_ATTRIBUTE};
|
||||
|
||||
pub struct WordPairProximityDocidsExtractorData<'a> {
|
||||
tokenizer: DocumentTokenizer<'a>,
|
||||
@@ -116,7 +123,7 @@ impl WordPairProximityDocidsExtractor {
|
||||
// and to store the docids of the documents that have a number of words in a given field
|
||||
// equal to or under than MAX_COUNTED_WORDS.
|
||||
fn extract_document_change(
|
||||
context: &DocumentContext<RefCell<BalancedCaches>>,
|
||||
context: &DocumentContext<RefCell<BalancedCaches<'_>>>,
|
||||
document_tokenizer: &DocumentTokenizer,
|
||||
searchable_attributes: Option<&[&str]>,
|
||||
document_change: DocumentChange,
|
||||
@@ -147,8 +154,12 @@ impl WordPairProximityDocidsExtractor {
|
||||
process_document_tokens(
|
||||
document,
|
||||
document_tokenizer,
|
||||
new_fields_ids_map,
|
||||
&mut word_positions,
|
||||
&mut |field_name| {
|
||||
new_fields_ids_map
|
||||
.id_with_metadata_or_insert(field_name)
|
||||
.ok_or(UserError::AttributeLimitReached.into())
|
||||
},
|
||||
&mut |(w1, w2), prox| {
|
||||
del_word_pair_proximity.push(((w1, w2), prox));
|
||||
},
|
||||
@@ -170,8 +181,12 @@ impl WordPairProximityDocidsExtractor {
|
||||
process_document_tokens(
|
||||
document,
|
||||
document_tokenizer,
|
||||
new_fields_ids_map,
|
||||
&mut word_positions,
|
||||
&mut |field_name| {
|
||||
new_fields_ids_map
|
||||
.id_with_metadata_or_insert(field_name)
|
||||
.ok_or(UserError::AttributeLimitReached.into())
|
||||
},
|
||||
&mut |(w1, w2), prox| {
|
||||
del_word_pair_proximity.push(((w1, w2), prox));
|
||||
},
|
||||
@@ -180,8 +195,12 @@ impl WordPairProximityDocidsExtractor {
|
||||
process_document_tokens(
|
||||
document,
|
||||
document_tokenizer,
|
||||
new_fields_ids_map,
|
||||
&mut word_positions,
|
||||
&mut |field_name| {
|
||||
new_fields_ids_map
|
||||
.id_with_metadata_or_insert(field_name)
|
||||
.ok_or(UserError::AttributeLimitReached.into())
|
||||
},
|
||||
&mut |(w1, w2), prox| {
|
||||
add_word_pair_proximity.push(((w1, w2), prox));
|
||||
},
|
||||
@@ -192,8 +211,12 @@ impl WordPairProximityDocidsExtractor {
|
||||
process_document_tokens(
|
||||
document,
|
||||
document_tokenizer,
|
||||
new_fields_ids_map,
|
||||
&mut word_positions,
|
||||
&mut |field_name| {
|
||||
new_fields_ids_map
|
||||
.id_with_metadata_or_insert(field_name)
|
||||
.ok_or(UserError::AttributeLimitReached.into())
|
||||
},
|
||||
&mut |(w1, w2), prox| {
|
||||
add_word_pair_proximity.push(((w1, w2), prox));
|
||||
},
|
||||
@@ -257,8 +280,8 @@ fn drain_word_positions(
|
||||
fn process_document_tokens<'doc>(
|
||||
document: impl Document<'doc>,
|
||||
document_tokenizer: &DocumentTokenizer,
|
||||
fields_ids_map: &mut GlobalFieldsIdsMap,
|
||||
word_positions: &mut VecDeque<(Rc<str>, u16)>,
|
||||
field_id_and_metadata: &mut impl FnMut(&str) -> Result<(FieldId, Metadata)>,
|
||||
word_pair_proximity: &mut impl FnMut((Rc<str>, Rc<str>), u8),
|
||||
) -> Result<()> {
|
||||
let mut field_id = None;
|
||||
@@ -279,8 +302,248 @@ fn process_document_tokens<'doc>(
|
||||
word_positions.push_back((Rc::from(word), pos));
|
||||
Ok(())
|
||||
};
|
||||
document_tokenizer.tokenize_document(document, fields_ids_map, &mut token_fn)?;
|
||||
|
||||
let mut should_tokenize = |field_name: &str| {
|
||||
let (field_id, meta) = field_id_and_metadata(field_name)?;
|
||||
|
||||
let pattern_match = if meta.is_searchable() {
|
||||
PatternMatch::Match
|
||||
} else {
|
||||
// TODO: should be a match on the field_name using `match_field_legacy` function,
|
||||
// but for legacy reasons we iterate over all the fields to fill the field_id_map.
|
||||
PatternMatch::Parent
|
||||
};
|
||||
|
||||
Ok((field_id, pattern_match))
|
||||
};
|
||||
|
||||
document_tokenizer.tokenize_document(document, &mut should_tokenize, &mut token_fn)?;
|
||||
|
||||
drain_word_positions(word_positions, word_pair_proximity);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub struct WordPairProximityDocidsSettingsExtractorsData<'a, SD> {
|
||||
tokenizer: DocumentTokenizer<'a>,
|
||||
max_memory_by_thread: Option<usize>,
|
||||
buckets: usize,
|
||||
settings_delta: &'a SD,
|
||||
}
|
||||
|
||||
impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
||||
for WordPairProximityDocidsSettingsExtractorsData<'_, SD>
|
||||
{
|
||||
type Data = RefCell<BalancedCaches<'extractor>>;
|
||||
|
||||
fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result<Self::Data> {
|
||||
Ok(RefCell::new(BalancedCaches::new_in(
|
||||
self.buckets,
|
||||
self.max_memory_by_thread,
|
||||
extractor_alloc,
|
||||
)))
|
||||
}
|
||||
|
||||
fn process<'doc>(
|
||||
&'doc self,
|
||||
documents: impl Iterator<Item = crate::Result<DocumentIdentifiers<'doc>>>,
|
||||
context: &'doc DocumentContext<Self::Data>,
|
||||
) -> crate::Result<()> {
|
||||
for document in documents {
|
||||
let document = document?;
|
||||
SettingsChangeWordPairProximityDocidsExtractors::extract_document_from_settings_change(
|
||||
document,
|
||||
context,
|
||||
&self.tokenizer,
|
||||
self.settings_delta,
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SettingsChangeWordPairProximityDocidsExtractors;
|
||||
|
||||
impl SettingsChangeWordPairProximityDocidsExtractors {
|
||||
pub fn run_extraction<'fid, 'indexer, 'index, 'extractor, SD, MSP>(
|
||||
settings_delta: &SD,
|
||||
documents: &'indexer DocumentsIndentifiers<'indexer>,
|
||||
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||
step: IndexingStep,
|
||||
) -> Result<Vec<BalancedCaches<'extractor>>>
|
||||
where
|
||||
SD: SettingsDelta + Sync,
|
||||
MSP: Fn() -> bool + Sync,
|
||||
{
|
||||
// Warning: this is duplicated code from extract_word_docids.rs
|
||||
let rtxn = indexing_context.index.read_txn()?;
|
||||
let stop_words = indexing_context.index.stop_words(&rtxn)?;
|
||||
let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
|
||||
let allowed_separators: Option<Vec<_>> =
|
||||
allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let dictionary = indexing_context.index.dictionary(&rtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let mut builder = tokenizer_builder(
|
||||
stop_words.as_ref(),
|
||||
allowed_separators.as_deref(),
|
||||
dictionary.as_deref(),
|
||||
);
|
||||
let tokenizer = builder.build();
|
||||
let localized_attributes_rules =
|
||||
indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
|
||||
let document_tokenizer = DocumentTokenizer {
|
||||
tokenizer: &tokenizer,
|
||||
localized_attributes_rules: &localized_attributes_rules,
|
||||
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
|
||||
};
|
||||
let extractor_data = WordPairProximityDocidsSettingsExtractorsData {
|
||||
tokenizer: document_tokenizer,
|
||||
max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(),
|
||||
buckets: rayon::current_num_threads(),
|
||||
settings_delta,
|
||||
};
|
||||
let datastore = ThreadLocal::new();
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids_extraction");
|
||||
let _entered = span.enter();
|
||||
|
||||
settings_change_extract(
|
||||
documents,
|
||||
&extractor_data,
|
||||
indexing_context,
|
||||
extractor_allocs,
|
||||
&datastore,
|
||||
step,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(datastore.into_iter().map(RefCell::into_inner).collect())
|
||||
}
|
||||
|
||||
/// Extracts document words from a settings change.
|
||||
fn extract_document_from_settings_change<SD: SettingsDelta>(
|
||||
document: DocumentIdentifiers<'_>,
|
||||
context: &DocumentContext<RefCell<BalancedCaches<'_>>>,
|
||||
document_tokenizer: &DocumentTokenizer,
|
||||
settings_delta: &SD,
|
||||
) -> Result<()> {
|
||||
let mut cached_sorter = context.data.borrow_mut_or_yield();
|
||||
let doc_alloc = &context.doc_alloc;
|
||||
|
||||
let new_fields_ids_map = settings_delta.new_fields_ids_map();
|
||||
let old_fields_ids_map = settings_delta.old_fields_ids_map();
|
||||
let old_proximity_precision = *settings_delta.old_proximity_precision();
|
||||
let new_proximity_precision = *settings_delta.new_proximity_precision();
|
||||
|
||||
let current_document = document.current(
|
||||
&context.rtxn,
|
||||
context.index,
|
||||
old_fields_ids_map.as_fields_ids_map(),
|
||||
)?;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
enum ActionToOperate {
|
||||
ReindexAllFields,
|
||||
SkipDocument,
|
||||
}
|
||||
|
||||
// TODO prefix_fid delete_old_fid_based_databases
|
||||
let mut action = match (old_proximity_precision, new_proximity_precision) {
|
||||
(ByAttribute, ByWord) => ActionToOperate::ReindexAllFields,
|
||||
(_, _) => ActionToOperate::SkipDocument,
|
||||
};
|
||||
|
||||
// Here we do a preliminary check to determine the action to take.
|
||||
// This check doesn't trigger the tokenizer as we never return
|
||||
// PatternMatch::Match.
|
||||
if action != ActionToOperate::ReindexAllFields {
|
||||
document_tokenizer.tokenize_document(
|
||||
current_document,
|
||||
&mut |field_name| {
|
||||
let fid = new_fields_ids_map.id(field_name).expect("All fields IDs must exist");
|
||||
|
||||
// If the document must be reindexed, early return NoMatch to stop the scanning process.
|
||||
if action == ActionToOperate::ReindexAllFields {
|
||||
return Ok((fid, PatternMatch::NoMatch));
|
||||
}
|
||||
|
||||
let old_field_metadata = old_fields_ids_map.metadata(fid).unwrap();
|
||||
let new_field_metadata = new_fields_ids_map.metadata(fid).unwrap();
|
||||
|
||||
action = match (old_field_metadata, new_field_metadata) {
|
||||
// At least one field is removed or added from the searchable fields
|
||||
(
|
||||
Metadata { searchable: Some(_), .. },
|
||||
Metadata { searchable: None, .. },
|
||||
)
|
||||
| (
|
||||
Metadata { searchable: None, .. },
|
||||
Metadata { searchable: Some(_), .. },
|
||||
) => ActionToOperate::ReindexAllFields,
|
||||
_ => action,
|
||||
};
|
||||
|
||||
Ok((fid, PatternMatch::Parent))
|
||||
},
|
||||
&mut |_, _, _, _| Ok(()),
|
||||
)?;
|
||||
}
|
||||
|
||||
// Early return when we don't need to index the document
|
||||
if action == ActionToOperate::SkipDocument {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut del_word_pair_proximity = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||
let mut add_word_pair_proximity = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||
|
||||
// is a vecdequeue, and will be smol, so can stay on the heap for now
|
||||
let mut word_positions: VecDeque<(Rc<str>, u16)> =
|
||||
VecDeque::with_capacity(MAX_DISTANCE as usize);
|
||||
|
||||
process_document_tokens(
|
||||
current_document,
|
||||
// TODO Tokenize must be based on old settings
|
||||
document_tokenizer,
|
||||
&mut word_positions,
|
||||
&mut |field_name| {
|
||||
Ok(old_fields_ids_map.id_with_metadata(field_name).expect("All fields must exist"))
|
||||
},
|
||||
&mut |(w1, w2), prox| {
|
||||
del_word_pair_proximity.push(((w1, w2), prox));
|
||||
},
|
||||
)?;
|
||||
|
||||
process_document_tokens(
|
||||
current_document,
|
||||
// TODO Tokenize must be based on new settings
|
||||
document_tokenizer,
|
||||
&mut word_positions,
|
||||
&mut |field_name| {
|
||||
Ok(new_fields_ids_map.id_with_metadata(field_name).expect("All fields must exist"))
|
||||
},
|
||||
&mut |(w1, w2), prox| {
|
||||
add_word_pair_proximity.push(((w1, w2), prox));
|
||||
},
|
||||
)?;
|
||||
|
||||
let mut key_buffer = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||
|
||||
del_word_pair_proximity.sort_unstable();
|
||||
del_word_pair_proximity.dedup_by(|(k1, _), (k2, _)| k1 == k2);
|
||||
for ((w1, w2), prox) in del_word_pair_proximity.iter() {
|
||||
let key = build_key(*prox, w1, w2, &mut key_buffer);
|
||||
cached_sorter.insert_del_u32(key, document.docid())?;
|
||||
}
|
||||
|
||||
add_word_pair_proximity.sort_unstable();
|
||||
add_word_pair_proximity.dedup_by(|(k1, _), (k2, _)| k1 == k2);
|
||||
for ((w1, w2), prox) in add_word_pair_proximity.iter() {
|
||||
let key = build_key(*prox, w1, w2, &mut key_buffer);
|
||||
cached_sorter.insert_add_u32(key, document.docid())?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,8 +2,12 @@ mod extract_word_docids;
|
||||
mod extract_word_pair_proximity_docids;
|
||||
mod tokenize_document;
|
||||
|
||||
pub use extract_word_docids::{WordDocidsCaches, WordDocidsExtractors};
|
||||
pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
|
||||
pub use extract_word_docids::{
|
||||
SettingsChangeWordDocidsExtractors, WordDocidsCaches, WordDocidsExtractors,
|
||||
};
|
||||
pub use extract_word_pair_proximity_docids::{
|
||||
SettingsChangeWordPairProximityDocidsExtractors, WordPairProximityDocidsExtractor,
|
||||
};
|
||||
|
||||
use crate::attribute_patterns::{match_field_legacy, PatternMatch};
|
||||
|
||||
@@ -27,3 +31,17 @@ pub fn match_searchable_field(
|
||||
|
||||
selection
|
||||
}
|
||||
|
||||
/// return `true` if the provided `field_name` is a parent of at least one of the fields contained in `searchable`,
|
||||
/// or if `searchable` is `None`.
|
||||
fn has_searchable_children<I, A>(field_name: &str, searchable: Option<I>) -> bool
|
||||
where
|
||||
I: IntoIterator<Item = A>,
|
||||
A: AsRef<str>,
|
||||
{
|
||||
searchable.is_none_or(|fields| {
|
||||
fields
|
||||
.into_iter()
|
||||
.any(|attr| match_field_legacy(attr.as_ref(), field_name) == PatternMatch::Parent)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -8,10 +8,7 @@ use crate::update::new::document::Document;
|
||||
use crate::update::new::extract::perm_json_p::{
|
||||
seek_leaf_values_in_array, seek_leaf_values_in_object, Depth,
|
||||
};
|
||||
use crate::{
|
||||
FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError,
|
||||
MAX_WORD_LENGTH,
|
||||
};
|
||||
use crate::{FieldId, InternalError, LocalizedAttributesRule, Result, MAX_WORD_LENGTH};
|
||||
|
||||
// todo: should be crate::proximity::MAX_DISTANCE but it has been forgotten
|
||||
const MAX_DISTANCE: u32 = 8;
|
||||
@@ -26,26 +23,25 @@ impl DocumentTokenizer<'_> {
|
||||
pub fn tokenize_document<'doc>(
|
||||
&self,
|
||||
document: impl Document<'doc>,
|
||||
field_id_map: &mut GlobalFieldsIdsMap,
|
||||
should_tokenize: &mut impl FnMut(&str) -> Result<(FieldId, PatternMatch)>,
|
||||
token_fn: &mut impl FnMut(&str, FieldId, u16, &str) -> Result<()>,
|
||||
) -> Result<()> {
|
||||
let mut field_position = HashMap::new();
|
||||
let mut tokenize_field = |field_name: &str, _depth, value: &Value| {
|
||||
let Some((field_id, meta)) = field_id_map.id_with_metadata_or_insert(field_name) else {
|
||||
return Err(UserError::AttributeLimitReached.into());
|
||||
};
|
||||
|
||||
if meta.is_searchable() {
|
||||
self.tokenize_field(field_id, field_name, value, token_fn, &mut field_position)?;
|
||||
}
|
||||
|
||||
// todo: should be a match on the field_name using `match_field_legacy` function,
|
||||
// but for legacy reasons we iterate over all the fields to fill the field_id_map.
|
||||
Ok(PatternMatch::Match)
|
||||
};
|
||||
|
||||
for entry in document.iter_top_level_fields() {
|
||||
let (field_name, value) = entry?;
|
||||
|
||||
if let (_, PatternMatch::NoMatch) = should_tokenize(field_name)? {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut tokenize_field = |field_name: &str, _depth, value: &Value| {
|
||||
let (fid, pattern_match) = should_tokenize(field_name)?;
|
||||
if pattern_match == PatternMatch::Match {
|
||||
self.tokenize_field(fid, field_name, value, token_fn, &mut field_position)?;
|
||||
}
|
||||
Ok(pattern_match)
|
||||
};
|
||||
|
||||
// parse json.
|
||||
match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
|
||||
Value::Object(object) => seek_leaf_values_in_object(
|
||||
@@ -192,7 +188,7 @@ mod test {
|
||||
use super::*;
|
||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||
use crate::update::new::document::{DocumentFromVersions, Versions};
|
||||
use crate::FieldsIdsMap;
|
||||
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, UserError};
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_document() {
|
||||
@@ -231,6 +227,7 @@ mod test {
|
||||
Default::default(),
|
||||
Default::default(),
|
||||
Default::default(),
|
||||
Default::default(),
|
||||
None,
|
||||
None,
|
||||
Default::default(),
|
||||
@@ -251,15 +248,19 @@ mod test {
|
||||
let document = Versions::single(document);
|
||||
let document = DocumentFromVersions::new(&document);
|
||||
|
||||
let mut should_tokenize = |field_name: &str| {
|
||||
let Some(field_id) = global_fields_ids_map.id_or_insert(field_name) else {
|
||||
return Err(UserError::AttributeLimitReached.into());
|
||||
};
|
||||
|
||||
Ok((field_id, PatternMatch::Match))
|
||||
};
|
||||
|
||||
document_tokenizer
|
||||
.tokenize_document(
|
||||
document,
|
||||
&mut global_fields_ids_map,
|
||||
&mut |_fname, fid, pos, word| {
|
||||
words.insert([fid, pos], word.to_string());
|
||||
Ok(())
|
||||
},
|
||||
)
|
||||
.tokenize_document(document, &mut should_tokenize, &mut |_fname, fid, pos, word| {
|
||||
words.insert([fid, pos], word.to_string());
|
||||
Ok(())
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
snapshot!(format!("{:#?}", words), @r###"
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::cell::RefCell;
|
||||
use std::fmt::Debug;
|
||||
use std::sync::RwLock;
|
||||
|
||||
use bumpalo::collections::Vec as BVec;
|
||||
use bumpalo::Bump;
|
||||
@@ -27,7 +28,10 @@ use crate::vector::extractor::{
|
||||
use crate::vector::session::{EmbedSession, Input, Metadata, OnEmbed};
|
||||
use crate::vector::settings::ReindexAction;
|
||||
use crate::vector::{Embedding, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment};
|
||||
use crate::{DocumentId, FieldDistribution, InternalError, Result, ThreadPoolNoAbort, UserError};
|
||||
use crate::{
|
||||
DocumentId, FieldDistribution, GlobalFieldsIdsMap, InternalError, Result, ThreadPoolNoAbort,
|
||||
UserError,
|
||||
};
|
||||
|
||||
pub struct EmbeddingExtractor<'a, 'b> {
|
||||
embedders: &'a RuntimeEmbedders,
|
||||
@@ -321,6 +325,15 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
||||
let old_embedders = self.settings_delta.old_embedders();
|
||||
let unused_vectors_distribution = UnusedVectorsDistributionBump::new_in(&context.doc_alloc);
|
||||
|
||||
// We get a reference to the new and old fields ids maps but
|
||||
// note that those are local versions where updates to them
|
||||
// will not be reflected in the database. It's not an issue
|
||||
// because new settings do not generate new fields.
|
||||
let new_fields_ids_map = RwLock::new(self.settings_delta.new_fields_ids_map().clone());
|
||||
let new_fields_ids_map = RefCell::new(GlobalFieldsIdsMap::new(&new_fields_ids_map));
|
||||
let old_fields_ids_map = RwLock::new(self.settings_delta.old_fields_ids_map().clone());
|
||||
let old_fields_ids_map = RefCell::new(GlobalFieldsIdsMap::new(&old_fields_ids_map));
|
||||
|
||||
let mut all_chunks = BVec::with_capacity_in(embedders.len(), &context.doc_alloc);
|
||||
let embedder_configs = context.index.embedding_configs();
|
||||
for (embedder_name, action) in self.settings_delta.embedder_actions().iter() {
|
||||
@@ -396,6 +409,7 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
||||
if !must_regenerate {
|
||||
continue;
|
||||
}
|
||||
|
||||
// we need to regenerate the prompts for the document
|
||||
chunks.settings_change_autogenerated(
|
||||
document.docid(),
|
||||
@@ -406,7 +420,8 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
||||
context.db_fields_ids_map,
|
||||
)?,
|
||||
self.settings_delta,
|
||||
context.new_fields_ids_map,
|
||||
&old_fields_ids_map,
|
||||
&new_fields_ids_map,
|
||||
&unused_vectors_distribution,
|
||||
old_is_user_provided,
|
||||
fragments_changed,
|
||||
@@ -442,7 +457,8 @@ impl<'extractor, SD: SettingsDelta + Sync> SettingsChangeExtractor<'extractor>
|
||||
context.db_fields_ids_map,
|
||||
)?,
|
||||
self.settings_delta,
|
||||
context.new_fields_ids_map,
|
||||
&old_fields_ids_map,
|
||||
&new_fields_ids_map,
|
||||
&unused_vectors_distribution,
|
||||
old_is_user_provided,
|
||||
true,
|
||||
@@ -638,7 +654,8 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
external_docid: &'a str,
|
||||
document: D,
|
||||
settings_delta: &SD,
|
||||
fields_ids_map: &'a RefCell<crate::GlobalFieldsIdsMap>,
|
||||
old_fields_ids_map: &'a RefCell<GlobalFieldsIdsMap<'a>>,
|
||||
new_fields_ids_map: &'a RefCell<GlobalFieldsIdsMap<'a>>,
|
||||
unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>,
|
||||
old_is_user_provided: bool,
|
||||
full_reindex: bool,
|
||||
@@ -733,10 +750,17 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
old_embedder.as_ref().map(|old_embedder| &old_embedder.document_template)
|
||||
};
|
||||
|
||||
let extractor =
|
||||
DocumentTemplateExtractor::new(document_template, doc_alloc, fields_ids_map);
|
||||
let extractor = DocumentTemplateExtractor::new(
|
||||
document_template,
|
||||
doc_alloc,
|
||||
new_fields_ids_map,
|
||||
);
|
||||
let old_extractor = old_document_template.map(|old_document_template| {
|
||||
DocumentTemplateExtractor::new(old_document_template, doc_alloc, fields_ids_map)
|
||||
DocumentTemplateExtractor::new(
|
||||
old_document_template,
|
||||
doc_alloc,
|
||||
old_fields_ids_map,
|
||||
)
|
||||
});
|
||||
let metadata =
|
||||
Metadata { docid, external_docid, extractor_id: extractor.extractor_id() };
|
||||
|
||||
9
crates/milli/src/update/new/indexer/community_edition.rs
Normal file
9
crates/milli/src/update/new/indexer/community_edition.rs
Normal file
@@ -0,0 +1,9 @@
|
||||
pub mod sharding {
|
||||
pub struct Shards;
|
||||
|
||||
impl Shards {
|
||||
pub fn must_process(&self, _docid: &str) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -17,7 +17,7 @@ use super::guess_primary_key::retrieve_or_guess_primary_key;
|
||||
use crate::documents::PrimaryKey;
|
||||
use crate::progress::{AtomicPayloadStep, Progress};
|
||||
use crate::update::new::document::{DocumentContext, Versions};
|
||||
use crate::update::new::indexer::enterprise_edition::sharding::Shards;
|
||||
use crate::update::new::indexer::current_edition::sharding::Shards;
|
||||
use crate::update::new::steps::IndexingStep;
|
||||
use crate::update::new::thread_local::MostlySend;
|
||||
use crate::update::new::{DocumentIdentifiers, Insertion, Update};
|
||||
|
||||
@@ -372,11 +372,10 @@ where
|
||||
SD: SettingsDelta + Sync,
|
||||
{
|
||||
// Create the list of document ids to extract
|
||||
let rtxn = indexing_context.index.read_txn()?;
|
||||
let all_document_ids =
|
||||
indexing_context.index.documents_ids(&rtxn)?.into_iter().collect::<Vec<_>>();
|
||||
let primary_key =
|
||||
primary_key_from_db(indexing_context.index, &rtxn, &indexing_context.db_fields_ids_map)?;
|
||||
let index = indexing_context.index;
|
||||
let rtxn = index.read_txn()?;
|
||||
let all_document_ids = index.documents_ids(&rtxn)?.into_iter().collect::<Vec<_>>();
|
||||
let primary_key = primary_key_from_db(index, &rtxn, &indexing_context.db_fields_ids_map)?;
|
||||
let documents = DocumentsIndentifiers::new(&all_document_ids, primary_key);
|
||||
|
||||
let span =
|
||||
@@ -391,6 +390,133 @@ where
|
||||
extractor_allocs,
|
||||
)?;
|
||||
|
||||
{
|
||||
let WordDocidsCaches {
|
||||
word_docids,
|
||||
word_fid_docids,
|
||||
exact_word_docids,
|
||||
word_position_docids,
|
||||
fid_word_count_docids,
|
||||
} = {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
|
||||
let _entered = span.enter();
|
||||
SettingsChangeWordDocidsExtractors::run_extraction(
|
||||
settings_delta,
|
||||
&documents,
|
||||
indexing_context,
|
||||
extractor_allocs,
|
||||
IndexingStep::ExtractingWords,
|
||||
)?
|
||||
};
|
||||
|
||||
indexing_context.progress.update_progress(IndexingStep::MergingWordCaches);
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
|
||||
let _entered = span.enter();
|
||||
indexing_context.progress.update_progress(MergingWordCache::WordDocids);
|
||||
|
||||
merge_and_send_docids(
|
||||
word_docids,
|
||||
index.word_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
{
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
|
||||
let _entered = span.enter();
|
||||
indexing_context.progress.update_progress(MergingWordCache::WordFieldIdDocids);
|
||||
|
||||
merge_and_send_docids(
|
||||
word_fid_docids,
|
||||
index.word_fid_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordFidDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
{
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
|
||||
let _entered = span.enter();
|
||||
indexing_context.progress.update_progress(MergingWordCache::ExactWordDocids);
|
||||
|
||||
merge_and_send_docids(
|
||||
exact_word_docids,
|
||||
index.exact_word_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<ExactWordDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
{
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
|
||||
let _entered = span.enter();
|
||||
indexing_context.progress.update_progress(MergingWordCache::WordPositionDocids);
|
||||
|
||||
merge_and_send_docids(
|
||||
word_position_docids,
|
||||
index.word_position_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordPositionDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
|
||||
{
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
|
||||
let _entered = span.enter();
|
||||
indexing_context.progress.update_progress(MergingWordCache::FieldIdWordCountDocids);
|
||||
|
||||
merge_and_send_docids(
|
||||
fid_word_count_docids,
|
||||
index.field_id_word_count_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<FidWordCountDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Run the proximity extraction only if the precision is ByWord.
|
||||
let new_proximity_precision = settings_delta.new_proximity_precision();
|
||||
if *new_proximity_precision == ProximityPrecision::ByWord {
|
||||
let caches = {
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
SettingsChangeWordPairProximityDocidsExtractors::run_extraction(
|
||||
settings_delta,
|
||||
&documents,
|
||||
indexing_context,
|
||||
extractor_allocs,
|
||||
IndexingStep::ExtractingWordProximity,
|
||||
)?
|
||||
};
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
|
||||
let _entered = span.enter();
|
||||
indexing_context.progress.update_progress(IndexingStep::MergingWordProximity);
|
||||
|
||||
merge_and_send_docids(
|
||||
caches,
|
||||
index.word_pair_proximity_docids.remap_types(),
|
||||
index,
|
||||
extractor_sender.docids::<WordPairProximityDocids>(),
|
||||
&indexing_context.must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
'vectors: {
|
||||
if settings_delta.embedder_actions().is_empty() {
|
||||
break 'vectors;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::{Arc, Once, RwLock};
|
||||
use std::thread::{self, Builder};
|
||||
@@ -8,9 +8,11 @@ use document_changes::{DocumentChanges, IndexingContext};
|
||||
pub use document_deletion::DocumentDeletion;
|
||||
pub use document_operation::{DocumentOperation, PayloadStats};
|
||||
use hashbrown::HashMap;
|
||||
use heed::{RoTxn, RwTxn};
|
||||
use heed::types::DecodeIgnore;
|
||||
use heed::{BytesDecode, Database, RoTxn, RwTxn};
|
||||
pub use partial_dump::PartialDump;
|
||||
pub use post_processing::recompute_word_fst_from_word_docids_database;
|
||||
pub use settings_changes::settings_change_extract;
|
||||
pub use update_by_function::UpdateByFunction;
|
||||
pub use write::ChannelCongestion;
|
||||
use write::{build_vectors, update_index, write_to_db};
|
||||
@@ -20,18 +22,31 @@ use super::steps::IndexingStep;
|
||||
use super::thread_local::ThreadLocal;
|
||||
use crate::documents::PrimaryKey;
|
||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||
use crate::heed_codec::StrBEU16Codec;
|
||||
use crate::progress::{EmbedderStats, Progress};
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::update::new::steps::SettingsIndexerStep;
|
||||
use crate::update::new::FacetFieldIdsDelta;
|
||||
use crate::update::settings::SettingsDelta;
|
||||
use crate::update::GrenadParameters;
|
||||
use crate::vector::settings::{EmbedderAction, RemoveFragments, WriteBackToDocuments};
|
||||
use crate::vector::{Embedder, RuntimeEmbedders, VectorStore};
|
||||
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort};
|
||||
use crate::{
|
||||
Error, FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort,
|
||||
};
|
||||
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
pub mod community_edition;
|
||||
pub(crate) mod de;
|
||||
pub mod document_changes;
|
||||
mod document_deletion;
|
||||
mod document_operation;
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub mod enterprise_edition;
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
pub use community_edition as current_edition;
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub use enterprise_edition as current_edition;
|
||||
mod extract;
|
||||
mod guess_primary_key;
|
||||
mod partial_dump;
|
||||
@@ -235,6 +250,20 @@ where
|
||||
SD: SettingsDelta + Sync,
|
||||
{
|
||||
delete_old_embedders_and_fragments(wtxn, index, settings_delta)?;
|
||||
delete_old_fid_based_databases(wtxn, index, settings_delta, must_stop_processing, progress)?;
|
||||
|
||||
// Clear word_pair_proximity if byWord to byAttribute
|
||||
let old_proximity_precision = settings_delta.old_proximity_precision();
|
||||
let new_proximity_precision = settings_delta.new_proximity_precision();
|
||||
if *old_proximity_precision == ProximityPrecision::ByWord
|
||||
&& *new_proximity_precision == ProximityPrecision::ByAttribute
|
||||
{
|
||||
index.word_pair_proximity_docids.clear(wtxn)?;
|
||||
}
|
||||
|
||||
// TODO delete useless searchable databases
|
||||
// - Clear fid_prefix_* in the post processing
|
||||
// - clear the prefix + fid_prefix if setting `PrefixSearch` is enabled
|
||||
|
||||
let mut bbbuffers = Vec::new();
|
||||
let finished_extraction = AtomicBool::new(false);
|
||||
@@ -293,6 +322,8 @@ where
|
||||
.unwrap()
|
||||
})?;
|
||||
|
||||
let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
|
||||
|
||||
let new_embedders = settings_delta.new_embedders();
|
||||
let embedder_actions = settings_delta.embedder_actions();
|
||||
let index_embedder_category_ids = settings_delta.new_embedder_category_id();
|
||||
@@ -327,6 +358,18 @@ where
|
||||
})
|
||||
.unwrap()?;
|
||||
|
||||
pool.install(|| {
|
||||
// WARN When implementing the facets don't forget this
|
||||
let facet_field_ids_delta = FacetFieldIdsDelta::new(0, 0);
|
||||
post_processing::post_process(
|
||||
indexing_context,
|
||||
wtxn,
|
||||
global_fields_ids_map,
|
||||
facet_field_ids_delta,
|
||||
)
|
||||
})
|
||||
.unwrap()?;
|
||||
|
||||
indexing_context.progress.update_progress(IndexingStep::BuildingGeoJson);
|
||||
index.cellulite.build(
|
||||
wtxn,
|
||||
@@ -456,6 +499,106 @@ where
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Deletes entries refering the provided
|
||||
/// fids from the fid-based databases.
|
||||
fn delete_old_fid_based_databases<SD, MSP>(
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
index: &Index,
|
||||
settings_delta: &SD,
|
||||
must_stop_processing: &MSP,
|
||||
progress: &Progress,
|
||||
) -> Result<()>
|
||||
where
|
||||
SD: SettingsDelta + Sync,
|
||||
MSP: Fn() -> bool + Sync,
|
||||
{
|
||||
let fids_to_delete: Option<BTreeSet<_>> = {
|
||||
let rtxn = index.read_txn()?;
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let old_searchable_attributes = settings_delta.old_searchable_attributes().as_ref();
|
||||
let new_searchable_attributes = settings_delta.new_searchable_attributes().as_ref();
|
||||
old_searchable_attributes.zip(new_searchable_attributes).map(|(old, new)| {
|
||||
old.iter()
|
||||
// Ignore the field if it is not searchable anymore
|
||||
// or if it was never referenced in any document
|
||||
.filter_map(|name| if new.contains(name) { None } else { fields_ids_map.id(name) })
|
||||
.collect()
|
||||
})
|
||||
};
|
||||
|
||||
let Some(fids_to_delete) = fids_to_delete else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
progress.update_progress(SettingsIndexerStep::DeletingOldWordFidDocids);
|
||||
delete_old_word_fid_docids(wtxn, index.word_fid_docids, must_stop_processing, &fids_to_delete)?;
|
||||
|
||||
progress.update_progress(SettingsIndexerStep::DeletingOldFidWordCountDocids);
|
||||
delete_old_fid_word_count_docids(wtxn, index, must_stop_processing, &fids_to_delete)?;
|
||||
|
||||
progress.update_progress(SettingsIndexerStep::DeletingOldWordPrefixFidDocids);
|
||||
delete_old_word_fid_docids(
|
||||
wtxn,
|
||||
index.word_prefix_fid_docids,
|
||||
must_stop_processing,
|
||||
&fids_to_delete,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn delete_old_word_fid_docids<'txn, MSP, DC>(
|
||||
wtxn: &mut RwTxn<'txn>,
|
||||
database: Database<StrBEU16Codec, DC>,
|
||||
must_stop_processing: &MSP,
|
||||
fids_to_delete: &BTreeSet<u16>,
|
||||
) -> Result<(), Error>
|
||||
where
|
||||
MSP: Fn() -> bool + Sync,
|
||||
DC: BytesDecode<'txn>,
|
||||
{
|
||||
let mut iter = database.iter_mut(wtxn)?.remap_data_type::<DecodeIgnore>();
|
||||
while let Some(((_word, fid), ())) = iter.next().transpose()? {
|
||||
// TODO should I call it that often?
|
||||
if must_stop_processing() {
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
}
|
||||
|
||||
if fids_to_delete.contains(&fid) {
|
||||
// safety: We don't keep any references to the data.
|
||||
unsafe { iter.del_current()? };
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn delete_old_fid_word_count_docids<MSP>(
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
index: &Index,
|
||||
must_stop_processing: &MSP,
|
||||
fids_to_delete: &BTreeSet<u16>,
|
||||
) -> Result<(), Error>
|
||||
where
|
||||
MSP: Fn() -> bool + Sync,
|
||||
{
|
||||
let db = index.field_id_word_count_docids.remap_data_type::<DecodeIgnore>();
|
||||
for &fid_to_delete in fids_to_delete {
|
||||
if must_stop_processing() {
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
}
|
||||
|
||||
let mut iter = db.prefix_iter_mut(wtxn, &(fid_to_delete, 0))?;
|
||||
while let Some(((fid, _word_count), ())) = iter.next().transpose()? {
|
||||
debug_assert_eq!(fid, fid_to_delete);
|
||||
// safety: We don't keep any references to the data.
|
||||
unsafe { iter.del_current()? };
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn indexer_memory_settings(
|
||||
current_num_threads: usize,
|
||||
grenad_parameters: GrenadParameters,
|
||||
|
||||
@@ -28,6 +28,9 @@ make_enum_progress! {
|
||||
ChangingVectorStore,
|
||||
UsingStableIndexer,
|
||||
UsingExperimentalIndexer,
|
||||
DeletingOldWordFidDocids,
|
||||
DeletingOldFidWordCountDocids,
|
||||
DeletingOldWordPrefixFidDocids,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -45,8 +45,7 @@ use crate::vector::{
|
||||
VectorStoreBackend,
|
||||
};
|
||||
use crate::{
|
||||
ChannelCongestion, FieldId, FilterableAttributesRule, ForeignKey, Index,
|
||||
LocalizedAttributesRule, Result,
|
||||
ChannelCongestion, FieldId, FilterableAttributesRule, Index, LocalizedAttributesRule, Result,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
|
||||
@@ -177,7 +176,6 @@ pub struct Settings<'a, 't, 'i> {
|
||||
displayed_fields: Setting<Vec<String>>,
|
||||
filterable_fields: Setting<Vec<FilterableAttributesRule>>,
|
||||
sortable_fields: Setting<HashSet<String>>,
|
||||
foreign_keys: Setting<Vec<ForeignKey>>,
|
||||
criteria: Setting<Vec<Criterion>>,
|
||||
stop_words: Setting<BTreeSet<String>>,
|
||||
non_separator_tokens: Setting<BTreeSet<String>>,
|
||||
@@ -219,7 +217,6 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
displayed_fields: Setting::NotSet,
|
||||
filterable_fields: Setting::NotSet,
|
||||
sortable_fields: Setting::NotSet,
|
||||
foreign_keys: Setting::NotSet,
|
||||
criteria: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
@@ -281,14 +278,6 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
self.sortable_fields = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_foreign_keys(&mut self, keys: Vec<ForeignKey>) {
|
||||
self.foreign_keys = Setting::Set(keys);
|
||||
}
|
||||
|
||||
pub fn reset_foreign_keys(&mut self) {
|
||||
self.foreign_keys = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn reset_criteria(&mut self) {
|
||||
self.criteria = Setting::Reset;
|
||||
}
|
||||
@@ -833,19 +822,6 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn update_foreign_keys(&mut self) -> Result<()> {
|
||||
match self.foreign_keys {
|
||||
Setting::Set(ref keys) => {
|
||||
self.index.put_foreign_keys(self.wtxn, keys)?;
|
||||
}
|
||||
Setting::Reset => {
|
||||
self.index.delete_foreign_keys(self.wtxn)?;
|
||||
}
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn update_criteria(&mut self) -> Result<()> {
|
||||
match &self.criteria {
|
||||
Setting::Set(criteria) => {
|
||||
@@ -1479,7 +1455,6 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
self.update_sort_facet_values_by()?;
|
||||
self.update_pagination_max_total_hits()?;
|
||||
self.update_search_cutoff()?;
|
||||
self.update_foreign_keys()?;
|
||||
|
||||
// could trigger re-indexing
|
||||
self.update_filterable()?;
|
||||
@@ -1614,34 +1589,33 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
|
||||
// only use the new indexer when only the embedder possibly changed
|
||||
if let Self {
|
||||
searchable_fields: Setting::NotSet,
|
||||
searchable_fields: _,
|
||||
displayed_fields: Setting::NotSet,
|
||||
filterable_fields: Setting::NotSet,
|
||||
sortable_fields: Setting::NotSet,
|
||||
foreign_keys: Setting::NotSet,
|
||||
criteria: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
separator_tokens: Setting::NotSet,
|
||||
dictionary: Setting::NotSet,
|
||||
stop_words: Setting::NotSet, // TODO (require force reindexing of searchables)
|
||||
non_separator_tokens: Setting::NotSet, // TODO (require force reindexing of searchables)
|
||||
separator_tokens: Setting::NotSet, // TODO (require force reindexing of searchables)
|
||||
dictionary: Setting::NotSet, // TODO (require force reindexing of searchables)
|
||||
distinct_field: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
primary_key: Setting::NotSet,
|
||||
authorize_typos: Setting::NotSet,
|
||||
min_word_len_two_typos: Setting::NotSet,
|
||||
min_word_len_one_typo: Setting::NotSet,
|
||||
exact_words: Setting::NotSet,
|
||||
exact_attributes: Setting::NotSet,
|
||||
exact_words: Setting::NotSet, // TODO (require force reindexing of searchables)
|
||||
exact_attributes: _,
|
||||
max_values_per_facet: Setting::NotSet,
|
||||
sort_facet_values_by: Setting::NotSet,
|
||||
pagination_max_total_hits: Setting::NotSet,
|
||||
proximity_precision: Setting::NotSet,
|
||||
proximity_precision: _,
|
||||
embedder_settings: _,
|
||||
search_cutoff: Setting::NotSet,
|
||||
localized_attributes_rules: Setting::NotSet,
|
||||
prefix_search: Setting::NotSet,
|
||||
localized_attributes_rules: Setting::NotSet, // TODO to start with
|
||||
prefix_search: Setting::NotSet, // TODO continue with this
|
||||
facet_search: Setting::NotSet,
|
||||
disable_on_numbers: Setting::NotSet,
|
||||
disable_on_numbers: Setting::NotSet, // TODO (require force reindexing of searchables)
|
||||
chat: Setting::NotSet,
|
||||
vector_store: Setting::NotSet,
|
||||
wtxn: _,
|
||||
@@ -1658,10 +1632,12 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
// Update index settings
|
||||
let embedding_config_updates = self.update_embedding_configs()?;
|
||||
self.update_user_defined_searchable_attributes()?;
|
||||
self.update_exact_attributes()?;
|
||||
self.update_proximity_precision()?;
|
||||
|
||||
let mut new_inner_settings =
|
||||
InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
|
||||
new_inner_settings.recompute_searchables(self.wtxn, self.index)?;
|
||||
// Note that we don't need to update the searchables here,
|
||||
// as it will be done after the settings update.
|
||||
let new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
|
||||
|
||||
let primary_key_id = self
|
||||
.index
|
||||
@@ -2088,9 +2064,12 @@ impl InnerIndexSettings {
|
||||
let sortable_fields = index.sortable_fields(rtxn)?;
|
||||
let asc_desc_fields = index.asc_desc_fields(rtxn)?;
|
||||
let distinct_field = index.distinct_field(rtxn)?.map(|f| f.to_string());
|
||||
let user_defined_searchable_attributes = index
|
||||
.user_defined_searchable_fields(rtxn)?
|
||||
.map(|fields| fields.into_iter().map(|f| f.to_string()).collect());
|
||||
let user_defined_searchable_attributes = match index.user_defined_searchable_fields(rtxn)? {
|
||||
Some(fields) if fields.contains(&"*") => None,
|
||||
Some(fields) => Some(fields.into_iter().map(|f| f.to_string()).collect()),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let builder = MetadataBuilder::from_index(index, rtxn)?;
|
||||
let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
|
||||
let disabled_typos_terms = index.disabled_typos_terms(rtxn)?;
|
||||
@@ -2604,8 +2583,20 @@ fn deserialize_sub_embedder(
|
||||
/// Implement this trait for the settings delta type.
|
||||
/// This is used in the new settings update flow and will allow to easily replace the old settings delta type: `InnerIndexSettingsDiff`.
|
||||
pub trait SettingsDelta {
|
||||
fn new_embedders(&self) -> &RuntimeEmbedders;
|
||||
fn old_fields_ids_map(&self) -> &FieldIdMapWithMetadata;
|
||||
fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata;
|
||||
|
||||
fn old_searchable_attributes(&self) -> &Option<Vec<String>>;
|
||||
fn new_searchable_attributes(&self) -> &Option<Vec<String>>;
|
||||
|
||||
fn old_disabled_typos_terms(&self) -> &DisabledTyposTerms;
|
||||
fn new_disabled_typos_terms(&self) -> &DisabledTyposTerms;
|
||||
|
||||
fn old_proximity_precision(&self) -> &ProximityPrecision;
|
||||
fn new_proximity_precision(&self) -> &ProximityPrecision;
|
||||
|
||||
fn old_embedders(&self) -> &RuntimeEmbedders;
|
||||
fn new_embedders(&self) -> &RuntimeEmbedders;
|
||||
fn new_embedder_category_id(&self) -> &HashMap<String, u8>;
|
||||
fn embedder_actions(&self) -> &BTreeMap<String, EmbedderAction>;
|
||||
fn try_for_each_fragment_diff<F, E>(
|
||||
@@ -2615,7 +2606,6 @@ pub trait SettingsDelta {
|
||||
) -> std::result::Result<(), E>
|
||||
where
|
||||
F: FnMut(FragmentDiff) -> std::result::Result<(), E>;
|
||||
fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata;
|
||||
}
|
||||
|
||||
pub struct FragmentDiff<'a> {
|
||||
@@ -2624,26 +2614,47 @@ pub struct FragmentDiff<'a> {
|
||||
}
|
||||
|
||||
impl SettingsDelta for InnerIndexSettingsDiff {
|
||||
fn new_embedders(&self) -> &RuntimeEmbedders {
|
||||
&self.new.runtime_embedders
|
||||
fn old_fields_ids_map(&self) -> &FieldIdMapWithMetadata {
|
||||
&self.old.fields_ids_map
|
||||
}
|
||||
fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata {
|
||||
&self.new.fields_ids_map
|
||||
}
|
||||
|
||||
fn old_searchable_attributes(&self) -> &Option<Vec<String>> {
|
||||
&self.old.user_defined_searchable_attributes
|
||||
}
|
||||
fn new_searchable_attributes(&self) -> &Option<Vec<String>> {
|
||||
&self.new.user_defined_searchable_attributes
|
||||
}
|
||||
|
||||
fn old_disabled_typos_terms(&self) -> &DisabledTyposTerms {
|
||||
&self.old.disabled_typos_terms
|
||||
}
|
||||
fn new_disabled_typos_terms(&self) -> &DisabledTyposTerms {
|
||||
&self.new.disabled_typos_terms
|
||||
}
|
||||
|
||||
fn old_proximity_precision(&self) -> &ProximityPrecision {
|
||||
&self.old.proximity_precision
|
||||
}
|
||||
fn new_proximity_precision(&self) -> &ProximityPrecision {
|
||||
&self.new.proximity_precision
|
||||
}
|
||||
|
||||
fn old_embedders(&self) -> &RuntimeEmbedders {
|
||||
&self.old.runtime_embedders
|
||||
}
|
||||
fn new_embedders(&self) -> &RuntimeEmbedders {
|
||||
&self.new.runtime_embedders
|
||||
}
|
||||
|
||||
fn new_embedder_category_id(&self) -> &HashMap<String, u8> {
|
||||
&self.new.embedder_category_id
|
||||
}
|
||||
|
||||
fn embedder_actions(&self) -> &BTreeMap<String, EmbedderAction> {
|
||||
&self.embedding_config_updates
|
||||
}
|
||||
|
||||
fn new_fields_ids_map(&self) -> &FieldIdMapWithMetadata {
|
||||
&self.new.fields_ids_map
|
||||
}
|
||||
|
||||
fn try_for_each_fragment_diff<F, E>(
|
||||
&self,
|
||||
embedder_name: &str,
|
||||
|
||||
@@ -14,28 +14,21 @@ fn set_and_reset_searchable_fields() {
|
||||
let index = TempIndex::new();
|
||||
|
||||
// First we send 3 documents with ids from 1 to 3.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
index
|
||||
.add_documents_using_wtxn(
|
||||
&mut wtxn,
|
||||
documents!([
|
||||
{ "id": 1, "name": "kevin", "age": 23 },
|
||||
{ "id": 2, "name": "kevina", "age": 21},
|
||||
{ "id": 3, "name": "benoit", "age": 34 }
|
||||
]),
|
||||
)
|
||||
.add_documents(documents!([
|
||||
{ "id": 1, "name": "kevin", "age": 23 },
|
||||
{ "id": 2, "name": "kevina", "age": 21},
|
||||
{ "id": 3, "name": "benoit", "age": 34 }
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
// We change the searchable fields to be the "name" field only.
|
||||
index
|
||||
.update_settings_using_wtxn(&mut wtxn, |settings| {
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec!["name".into()]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 id |
|
||||
1 name |
|
||||
@@ -874,7 +867,6 @@ fn test_correct_settings_init() {
|
||||
displayed_fields,
|
||||
filterable_fields,
|
||||
sortable_fields,
|
||||
foreign_keys,
|
||||
criteria,
|
||||
stop_words,
|
||||
non_separator_tokens,
|
||||
@@ -905,7 +897,6 @@ fn test_correct_settings_init() {
|
||||
assert!(matches!(displayed_fields, Setting::NotSet));
|
||||
assert!(matches!(filterable_fields, Setting::NotSet));
|
||||
assert!(matches!(sortable_fields, Setting::NotSet));
|
||||
assert!(matches!(foreign_keys, Setting::NotSet));
|
||||
assert!(matches!(criteria, Setting::NotSet));
|
||||
assert!(matches!(stop_words, Setting::NotSet));
|
||||
assert!(matches!(non_separator_tokens, Setting::NotSet));
|
||||
|
||||
@@ -7,6 +7,6 @@ publish = false
|
||||
[dependencies]
|
||||
meilisearch = { path = "../meilisearch" , default-features = false}
|
||||
serde_json = "1.0"
|
||||
clap = { version = "4.5.40", features = ["derive"] }
|
||||
anyhow = "1.0.98"
|
||||
clap = { version = "4.5.52", features = ["derive"] }
|
||||
anyhow = "1.0.100"
|
||||
utoipa = "5.4.0"
|
||||
|
||||
@@ -8,8 +8,8 @@ edition = "2021"
|
||||
[dependencies]
|
||||
color-spantrace = "0.3.0"
|
||||
fxprof-processed-profile = "0.7.0"
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = "1.0.140"
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = "1.0.145"
|
||||
tracing = "0.1.41"
|
||||
tracing-error = "0.2.1"
|
||||
tracing-subscriber = "0.3.20"
|
||||
@@ -18,7 +18,7 @@ byte-unit = { version = "5.1.6", default-features = false, features = [
|
||||
"byte",
|
||||
"serde",
|
||||
] }
|
||||
tokio = { version = "1.45.1", features = ["sync"] }
|
||||
tokio = { version = "1.48.0", features = ["sync"] }
|
||||
|
||||
[target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies]
|
||||
libproc = "0.14.10"
|
||||
libproc = "0.14.11"
|
||||
|
||||
@@ -11,27 +11,27 @@ license.workspace = true
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.98"
|
||||
anyhow = "1.0.100"
|
||||
build-info = { version = "1.7.0", path = "../build-info" }
|
||||
cargo_metadata = "0.20.0"
|
||||
clap = { version = "4.5.40", features = ["derive"] }
|
||||
cargo_metadata = "0.23.1"
|
||||
clap = { version = "4.5.52", features = ["derive"] }
|
||||
futures-core = "0.3.31"
|
||||
futures-util = "0.3.31"
|
||||
reqwest = { version = "0.12.20", features = [
|
||||
reqwest = { version = "0.12.24", features = [
|
||||
"stream",
|
||||
"json",
|
||||
"rustls-tls",
|
||||
], default-features = false }
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = "1.0.140"
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = "1.0.145"
|
||||
sha2 = "0.10.9"
|
||||
sysinfo = "0.35.2"
|
||||
time = { version = "0.3.41", features = [
|
||||
sysinfo = "0.37.2"
|
||||
time = { version = "0.3.44", features = [
|
||||
"serde",
|
||||
"serde-human-readable",
|
||||
"macros",
|
||||
] }
|
||||
tokio = { version = "1.45.1", features = [
|
||||
tokio = { version = "1.48.0", features = [
|
||||
"rt",
|
||||
"net",
|
||||
"time",
|
||||
@@ -41,4 +41,4 @@ tokio = { version = "1.45.1", features = [
|
||||
tracing = "0.1.41"
|
||||
tracing-subscriber = "0.3.20"
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
uuid = { version = "1.17.0", features = ["v7", "serde"] }
|
||||
uuid = { version = "1.18.1", features = ["v7", "serde"] }
|
||||
|
||||
Reference in New Issue
Block a user