get rids of the index crate + the document_types crate

This commit is contained in:
Tamo
2022-10-11 17:42:43 +02:00
committed by Clément Renault
parent 9a74ea0943
commit 667c282e19
30 changed files with 324 additions and 2145 deletions

View File

@@ -1,26 +1,24 @@
use std::io::Cursor;
use actix_web::error::PayloadError;
use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::{Bytes, Data};
use actix_web::web::Data;
use actix_web::HttpMessage;
use actix_web::{web, HttpRequest, HttpResponse};
use bstr::ByteSlice;
use document_formats::{read_csv, read_json, read_ndjson, PayloadType};
use futures::{Stream, StreamExt};
use index::{retrieve_document, retrieve_documents};
use index_scheduler::milli::update::IndexDocumentsMethod;
use index_scheduler::IndexScheduler;
use index_scheduler::{KindWithContent, TaskView};
use futures::StreamExt;
use index_scheduler::{IndexScheduler, KindWithContent, TaskView};
use log::debug;
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::star_or::StarOr;
use meilisearch_types::{milli, Document, Index};
use mime::Mime;
use once_cell::sync::Lazy;
use serde::Deserialize;
use serde_cs::vec::CS;
use serde_json::Value;
use tokio::sync::mpsc;
use crate::analytics::Analytics;
use crate::error::MeilisearchHttpError;
@@ -37,17 +35,6 @@ static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
]
});
/// This is required because Payload is not Sync nor Send
fn payload_to_stream(mut payload: Payload) -> impl Stream<Item = Result<Bytes, PayloadError>> {
let (snd, recv) = mpsc::channel(1);
tokio::task::spawn_local(async move {
while let Some(data) = payload.next().await {
let _ = snd.send(data).await;
}
});
tokio_stream::wrappers::ReceiverStream::new(recv)
}
/// Extracts the mime type from the content type and return
/// a meilisearch error if anything bad happen.
fn extract_mime_type(req: &HttpRequest) -> Result<Option<Mime>, MeilisearchHttpError> {
@@ -344,3 +331,76 @@ pub async fn clear_all_documents(
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
fn all_documents<'a>(
index: &Index,
rtxn: &'a RoTxn,
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
Ok(index.all_documents(rtxn)?.map(move |ret| {
ret.map_err(ResponseError::from)
.and_then(|(_key, document)| -> Result<_, ResponseError> {
Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?)
})
}))
}
fn retrieve_documents<S: AsRef<str>>(
index: &Index,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<(u64, Vec<Document>), ResponseError> {
let rtxn = index.read_txn()?;
let mut documents = Vec::new();
for document in all_documents(index, &rtxn)?.skip(offset).take(limit) {
let document = match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document?,
attributes_to_retrieve.iter().map(|s| s.as_ref()),
),
None => document?,
};
documents.push(document);
}
let number_of_documents = index.number_of_documents(&rtxn)?;
Ok((number_of_documents, documents))
}
fn retrieve_document<S: AsRef<str>>(
index: &Index,
doc_id: &str,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<Document, ResponseError> {
let txn = index.read_txn()?;
let fields_ids_map = index.fields_ids_map(&txn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let internal_id = index
.external_documents_ids(&txn)?
.get(doc_id.as_bytes())
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
let document = index
.documents(&txn, std::iter::once(internal_id))?
.into_iter()
.next()
.map(|(_, d)| d)
.ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;
let document = meilisearch_types::milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
let document = match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document,
attributes_to_retrieve.iter().map(|s| s.as_ref()),
),
None => document,
};
Ok(document)
}

View File

@@ -1,9 +1,9 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::milli::{FieldDistribution, Index};
use index_scheduler::{IndexScheduler, KindWithContent, Query, Status};
use log::debug;
use meilisearch_types::error::ResponseError;
use meilisearch_types::milli::{self, FieldDistribution, Index};
use serde::{Deserialize, Serialize};
use serde_json::json;
use time::OffsetDateTime;
@@ -51,7 +51,7 @@ pub struct IndexView {
}
impl IndexView {
fn new(uid: String, index: &Index) -> Result<IndexView, index::error::IndexError> {
fn new(uid: String, index: &Index) -> Result<IndexView, milli::Error> {
let rtxn = index.read_txn()?;
Ok(IndexView {
uid,

View File

@@ -1,10 +1,5 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index::{
perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
DEFAULT_SEARCH_OFFSET,
};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::IndexSearchRules;
@@ -16,6 +11,11 @@ use serde_json::Value;
use crate::analytics::{Analytics, SearchAggregator};
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::search::{
perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
DEFAULT_SEARCH_OFFSET
};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(

View File

@@ -1,14 +1,26 @@
use std::collections::BTreeSet;
use std::marker::PhantomData;
use actix_web::web::Data;
use fst::IntoStreamer;
use log::debug;
use actix_web::{web, HttpRequest, HttpResponse};
use index::{Settings, Unchecked};
use index_scheduler::{IndexScheduler, KindWithContent};
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::{self, DEFAULT_VALUES_PER_FACET};
use meilisearch_types::settings::{
Checked, FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, Settings, TypoSettings,
Unchecked,
};
use meilisearch_types::Index;
use serde_json::json;
use crate::analytics::Analytics;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
#[macro_export]
macro_rules! make_setting_route {
@@ -18,14 +30,15 @@ macro_rules! make_setting_route {
use actix_web::{web, HttpRequest, HttpResponse, Resource};
use log::debug;
use index::Settings;
use index_scheduler::milli::update::Setting;
use index_scheduler::{IndexScheduler, KindWithContent};
use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::Settings;
use meilisearch_types::error::ResponseError;
use $crate::analytics::Analytics;
use $crate::extractors::authentication::{policies::*, GuardedData};
use $crate::extractors::sequential_extractor::SeqHandler;
use $crate::routes::indexes::settings::settings;
pub async fn delete(
index_scheduler: GuardedData<
@@ -98,7 +111,7 @@ macro_rules! make_setting_route {
) -> std::result::Result<HttpResponse, ResponseError> {
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?;
let settings = index::settings(&index, &rtxn)?;
let settings = settings(&index, &rtxn)?;
debug!("returns: {:?}", settings);
let mut json = serde_json::json!(&settings);
@@ -185,11 +198,11 @@ make_setting_route!(
make_setting_route!(
"/typo-tolerance",
patch,
index::updates::TypoSettings,
meilisearch_types::settings::TypoSettings,
typo_tolerance,
"typoTolerance",
analytics,
|setting: &Option<index::updates::TypoSettings>, req: &HttpRequest| {
|setting: &Option<meilisearch_types::settings::TypoSettings>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
@@ -295,11 +308,11 @@ make_setting_route!(
make_setting_route!(
"/faceting",
patch,
index::updates::FacetingSettings,
meilisearch_types::settings::FacetingSettings,
faceting,
"faceting",
analytics,
|setting: &Option<index::updates::FacetingSettings>, req: &HttpRequest| {
|setting: &Option<meilisearch_types::settings::FacetingSettings>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
@@ -317,11 +330,11 @@ make_setting_route!(
make_setting_route!(
"/pagination",
patch,
index::updates::PaginationSettings,
meilisearch_types::settings::PaginationSettings,
pagination,
"pagination",
analytics,
|setting: &Option<index::updates::PaginationSettings>, req: &HttpRequest| {
|setting: &Option<meilisearch_types::settings::PaginationSettings>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
@@ -456,7 +469,7 @@ pub async fn get_all(
) -> Result<HttpResponse, ResponseError> {
let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?;
let new_settings = index::settings(&index, &rtxn)?;
let new_settings = settings(&index, &rtxn)?;
debug!("returns: {:?}", new_settings);
Ok(HttpResponse::Ok().json(new_settings))
}
@@ -479,3 +492,108 @@ pub async fn delete_all(
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub fn settings(index: &Index, rtxn: &RoTxn) -> Result<Settings<Checked>, milli::Error> {
let displayed_attributes = index
.displayed_fields(rtxn)?
.map(|fields| fields.into_iter().map(String::from).collect());
let searchable_attributes = index
.user_defined_searchable_fields(rtxn)?
.map(|fields| fields.into_iter().map(String::from).collect());
let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect();
let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect();
let criteria = index
.criteria(rtxn)?
.into_iter()
.map(|c| c.to_string())
.collect();
let stop_words = index
.stop_words(rtxn)?
.map(|stop_words| -> Result<BTreeSet<_>, milli::Error> {
Ok(stop_words.stream().into_strs()?.into_iter().collect())
})
.transpose()?
.unwrap_or_default();
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
// in milli each word in the synonyms map were split on their separator. Since we lost
// this information we are going to put space between words.
let synonyms = index
.synonyms(rtxn)?
.iter()
.map(|(key, values)| {
(
key.join(" "),
values.iter().map(|value| value.join(" ")).collect(),
)
})
.collect();
let min_typo_word_len = MinWordSizeTyposSetting {
one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?),
};
let disabled_words = match index.exact_words(rtxn)? {
Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(),
None => BTreeSet::new(),
};
let disabled_attributes = index
.exact_attributes(rtxn)?
.into_iter()
.map(String::from)
.collect();
let typo_tolerance = TypoSettings {
enabled: Setting::Set(index.authorize_typos(rtxn)?),
min_word_size_for_typos: Setting::Set(min_typo_word_len),
disable_on_words: Setting::Set(disabled_words),
disable_on_attributes: Setting::Set(disabled_attributes),
};
let faceting = FacetingSettings {
max_values_per_facet: Setting::Set(
index
.max_values_per_facet(rtxn)?
.unwrap_or(DEFAULT_VALUES_PER_FACET),
),
};
let pagination = PaginationSettings {
max_total_hits: Setting::Set(
index
.pagination_max_total_hits(rtxn)?
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
),
};
Ok(Settings {
displayed_attributes: match displayed_attributes {
Some(attrs) => Setting::Set(attrs),
None => Setting::Reset,
},
searchable_attributes: match searchable_attributes {
Some(attrs) => Setting::Set(attrs),
None => Setting::Reset,
},
filterable_attributes: Setting::Set(filterable_attributes),
sortable_attributes: Setting::Set(sortable_attributes),
ranking_rules: Setting::Set(criteria),
stop_words: Setting::Set(stop_words),
distinct_attribute: match distinct_field {
Some(field) => Setting::Set(field),
None => Setting::Reset,
},
synonyms: Setting::Set(synonyms),
typo_tolerance: Setting::Set(typo_tolerance),
faceting: Setting::Set(faceting),
pagination: Setting::Set(pagination),
_kind: PhantomData,
})
}