Make sure that meilisearch-http works without index wrapper

This commit is contained in:
Kerollmops
2022-10-04 11:06:48 +02:00
committed by Clément Renault
parent c70f375669
commit cf6084151b
9 changed files with 230 additions and 43 deletions

View File

@@ -104,12 +104,14 @@ impl IndexMapper {
Ok(index) Ok(index)
} }
pub fn indexes(&self, rtxn: &RoTxn) -> Result<Vec<Index>> { pub fn indexes(&self, rtxn: &RoTxn) -> Result<Vec<(String, Index)>> {
self.index_mapping self.index_mapping
.iter(rtxn)? .iter(rtxn)?
.map(|ret| { .map(|ret| {
ret.map_err(Error::from) ret.map_err(Error::from).and_then(|(name, _)| {
.and_then(|(name, _)| self.index(rtxn, name)) self.index(rtxn, name)
.map(|index| (name.to_string(), index))
})
}) })
.collect() .collect()
} }

View File

@@ -231,7 +231,7 @@ impl IndexScheduler {
} }
/// Return and open all the indexes. /// Return and open all the indexes.
pub fn indexes(&self) -> Result<Vec<Index>> { pub fn indexes(&self) -> Result<Vec<(String, Index)>> {
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
self.index_mapper.indexes(&rtxn) self.index_mapper.indexes(&rtxn)
} }

View File

@@ -1,4 +1,5 @@
pub use search::{ pub use search::{
all_documents, perform_search, retrieve_document, retrieve_documents, settings,
MatchingStrategy, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, MatchingStrategy, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
}; };

View File

@@ -1,19 +1,25 @@
use std::cmp::min; use std::cmp::min;
use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::marker::PhantomData;
use std::str::FromStr; use std::str::FromStr;
use std::time::Instant; use std::time::Instant;
use either::Either; use either::Either;
use fst::IntoStreamer;
use milli::heed::RoTxn;
use milli::tokenizer::TokenizerBuilder; use milli::tokenizer::TokenizerBuilder;
use milli::update::Setting;
use milli::{ use milli::{
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, MatchBounds, MatcherBuilder, obkv_to_json, AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, MatchBounds,
SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, MatcherBuilder, SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
}; };
use regex::Regex; use regex::Regex;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::{json, Value}; use serde_json::{json, Value};
use crate::error::FacetError; use crate::error::FacetError;
use crate::updates::{FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, TypoSettings};
use crate::{Checked, Settings};
use super::error::{IndexError, Result}; use super::error::{IndexError, Result};
@@ -282,6 +288,184 @@ pub fn perform_search(index: &Index, query: SearchQuery) -> Result<SearchResult>
Ok(result) Ok(result)
} }
pub fn all_documents<'a>(
index: &Index,
rtxn: &'a RoTxn,
) -> Result<impl Iterator<Item = Result<Document>> + 'a> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
Ok(index.all_documents(rtxn)?.map(move |ret| {
ret.map_err(IndexError::from)
.and_then(|(_key, document)| -> Result<_> {
Ok(obkv_to_json(&all_fields, &fields_ids_map, document)?)
})
}))
}
pub fn retrieve_documents<S: AsRef<str>>(
index: &Index,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<(u64, Vec<Document>)> {
let rtxn = index.read_txn()?;
let mut documents = Vec::new();
for document in all_documents(index, &rtxn)?.skip(offset).take(limit) {
let document = match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document?,
attributes_to_retrieve.iter().map(|s| s.as_ref()),
),
None => document?,
};
documents.push(document);
}
let number_of_documents = index.number_of_documents(&rtxn)?;
Ok((number_of_documents, documents))
}
pub fn retrieve_document<S: AsRef<str>>(
index: &Index,
doc_id: &str,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<Document> {
let txn = index.read_txn()?;
let fields_ids_map = index.fields_ids_map(&txn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let internal_id = index
.external_documents_ids(&txn)?
.get(doc_id.as_bytes())
.ok_or_else(|| IndexError::DocumentNotFound(doc_id.to_string()))?;
let document = index
.documents(&txn, std::iter::once(internal_id))?
.into_iter()
.next()
.map(|(_, d)| d)
.ok_or_else(|| IndexError::DocumentNotFound(doc_id.to_string()))?;
let document = obkv_to_json(&all_fields, &fields_ids_map, document)?;
let document = match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document,
attributes_to_retrieve.iter().map(|s| s.as_ref()),
),
None => document,
};
Ok(document)
}
pub fn settings(index: &Index, rtxn: &RoTxn) -> Result<Settings<Checked>> {
let displayed_attributes = index
.displayed_fields(rtxn)?
.map(|fields| fields.into_iter().map(String::from).collect());
let searchable_attributes = index
.user_defined_searchable_fields(rtxn)?
.map(|fields| fields.into_iter().map(String::from).collect());
let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect();
let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect();
let criteria = index
.criteria(rtxn)?
.into_iter()
.map(|c| c.to_string())
.collect();
let stop_words = index
.stop_words(rtxn)?
.map(|stop_words| -> Result<BTreeSet<_>> {
Ok(stop_words.stream().into_strs()?.into_iter().collect())
})
.transpose()?
.unwrap_or_default();
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
// in milli each word in the synonyms map were split on their separator. Since we lost
// this information we are going to put space between words.
let synonyms = index
.synonyms(rtxn)?
.iter()
.map(|(key, values)| {
(
key.join(" "),
values.iter().map(|value| value.join(" ")).collect(),
)
})
.collect();
let min_typo_word_len = MinWordSizeTyposSetting {
one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?),
};
let disabled_words = match index.exact_words(rtxn)? {
Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(),
None => BTreeSet::new(),
};
let disabled_attributes = index
.exact_attributes(rtxn)?
.into_iter()
.map(String::from)
.collect();
let typo_tolerance = TypoSettings {
enabled: Setting::Set(index.authorize_typos(rtxn)?),
min_word_size_for_typos: Setting::Set(min_typo_word_len),
disable_on_words: Setting::Set(disabled_words),
disable_on_attributes: Setting::Set(disabled_attributes),
};
let faceting = FacetingSettings {
max_values_per_facet: Setting::Set(
index
.max_values_per_facet(rtxn)?
.unwrap_or(DEFAULT_VALUES_PER_FACET),
),
};
let pagination = PaginationSettings {
max_total_hits: Setting::Set(
index
.pagination_max_total_hits(rtxn)?
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
),
};
Ok(Settings {
displayed_attributes: match displayed_attributes {
Some(attrs) => Setting::Set(attrs),
None => Setting::Reset,
},
searchable_attributes: match searchable_attributes {
Some(attrs) => Setting::Set(attrs),
None => Setting::Reset,
},
filterable_attributes: Setting::Set(filterable_attributes),
sortable_attributes: Setting::Set(sortable_attributes),
ranking_rules: Setting::Set(criteria),
stop_words: Setting::Set(stop_words),
distinct_attribute: match distinct_field {
Some(field) => Setting::Set(field),
None => Setting::Reset,
},
synonyms: Setting::Set(synonyms),
typo_tolerance: Setting::Set(typo_tolerance),
faceting: Setting::Set(faceting),
pagination: Setting::Set(pagination),
_kind: PhantomData,
})
}
fn insert_geo_distance(sorts: &[String], document: &mut Document) { fn insert_geo_distance(sorts: &[String], document: &mut Document) {
lazy_static::lazy_static! { lazy_static::lazy_static! {
static ref GEO_REGEX: Regex = static ref GEO_REGEX: Regex =

View File

@@ -8,6 +8,7 @@ use actix_web::{web, HttpRequest, HttpResponse};
use bstr::ByteSlice; use bstr::ByteSlice;
use document_formats::{read_csv, read_json, read_ndjson, PayloadType}; use document_formats::{read_csv, read_json, read_ndjson, PayloadType};
use futures::{Stream, StreamExt}; use futures::{Stream, StreamExt};
use index::{retrieve_document, retrieve_documents};
use index_scheduler::milli::update::IndexDocumentsMethod; use index_scheduler::milli::update::IndexDocumentsMethod;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use index_scheduler::{KindWithContent, TaskView}; use index_scheduler::{KindWithContent, TaskView};
@@ -103,7 +104,7 @@ pub async fn get_document(
let attributes_to_retrieve = fields.and_then(fold_star_or); let attributes_to_retrieve = fields.and_then(fold_star_or);
let index = index_scheduler.index(&path.index_uid)?; let index = index_scheduler.index(&path.index_uid)?;
let document = index.retrieve_document(&path.document_id, attributes_to_retrieve)?; let document = retrieve_document(&index, &path.document_id, attributes_to_retrieve)?;
debug!("returns: {:?}", document); debug!("returns: {:?}", document);
Ok(HttpResponse::Ok().json(document)) Ok(HttpResponse::Ok().json(document))
} }
@@ -149,7 +150,7 @@ pub async fn get_all_documents(
let attributes_to_retrieve = fields.and_then(fold_star_or); let attributes_to_retrieve = fields.and_then(fold_star_or);
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let (total, documents) = index.retrieve_documents(offset, limit, attributes_to_retrieve)?; let (total, documents) = retrieve_documents(&index, offset, limit, attributes_to_retrieve)?;
let ret = PaginationView::new(offset, limit, total as usize, documents); let ret = PaginationView::new(offset, limit, total as usize, documents);

View File

@@ -1,6 +1,6 @@
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::milli::FieldDistribution; use index_scheduler::milli::{FieldDistribution, Index};
use index_scheduler::{IndexScheduler, KindWithContent, Query, Status}; use index_scheduler::{IndexScheduler, KindWithContent, Query, Status};
use log::debug; use log::debug;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
@@ -11,7 +11,6 @@ use time::OffsetDateTime;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::extractors::authentication::{policies::*, AuthenticationError, GuardedData}; use crate::extractors::authentication::{policies::*, AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler; use crate::extractors::sequential_extractor::SeqHandler;
use index_scheduler::task::TaskView;
use super::Pagination; use super::Pagination;
@@ -51,15 +50,14 @@ pub struct IndexView {
pub primary_key: Option<String>, pub primary_key: Option<String>,
} }
impl TryFrom<&Index> for IndexView { impl IndexView {
type Error = index::error::IndexError; fn new(uid: String, index: &Index) -> Result<IndexView, index::error::IndexError> {
let rtxn = index.read_txn()?;
fn try_from(index: &Index) -> Result<IndexView, Self::Error> {
Ok(IndexView { Ok(IndexView {
uid: index.name.clone(), uid,
created_at: index.created_at()?, created_at: index.created_at(&rtxn)?,
updated_at: index.updated_at()?, updated_at: index.updated_at(&rtxn)?,
primary_key: index.primary_key()?, primary_key: index.primary_key(&rtxn)?.map(String::from),
}) })
} }
} }
@@ -71,9 +69,9 @@ pub async fn list_indexes(
let search_rules = &index_scheduler.filters().search_rules; let search_rules = &index_scheduler.filters().search_rules;
let indexes: Vec<_> = index_scheduler.indexes()?; let indexes: Vec<_> = index_scheduler.indexes()?;
let indexes = indexes let indexes = indexes
.iter() .into_iter()
.filter(|index| search_rules.is_index_authorized(&index.name)) .filter(|(name, _)| search_rules.is_index_authorized(name))
.map(IndexView::try_from) .map(|(name, index)| IndexView::new(name, &index))
.collect::<Result<Vec<_>, _>>()?; .collect::<Result<Vec<_>, _>>()?;
let ret = paginate.auto_paginate_sized(indexes.into_iter()); let ret = paginate.auto_paginate_sized(indexes.into_iter());
@@ -130,7 +128,7 @@ pub async fn get_index(
index_uid: web::Path<String>, index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let index_view: IndexView = (&index).try_into()?; let index_view = IndexView::new(index_uid.into_inner(), &index)?;
debug!("returns: {:?}", index_view); debug!("returns: {:?}", index_view);
@@ -216,10 +214,11 @@ impl IndexStats {
let is_processing = !processing_task.is_empty(); let is_processing = !processing_task.is_empty();
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?;
Ok(IndexStats { Ok(IndexStats {
number_of_documents: index.number_of_documents()?, number_of_documents: index.number_of_documents(&rtxn)?,
is_indexing: is_processing, is_indexing: is_processing,
field_distribution: index.field_distribution()?, field_distribution: index.field_distribution(&rtxn)?,
}) })
} }
} }

View File

@@ -1,7 +1,7 @@
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use index::{ use index::{
MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
DEFAULT_SEARCH_OFFSET, DEFAULT_SEARCH_OFFSET,
}; };
@@ -158,7 +158,7 @@ pub async fn search_with_url_query(
let mut aggregate = SearchAggregator::from_query(&query, &req); let mut aggregate = SearchAggregator::from_query(&query, &req);
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let search_result = index.perform_search(query); let search_result = perform_search(&index, query);
if let Ok(ref search_result) = search_result { if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result); aggregate.succeed(search_result);
} }
@@ -192,7 +192,7 @@ pub async fn search_with_post(
let mut aggregate = SearchAggregator::from_query(&query, &req); let mut aggregate = SearchAggregator::from_query(&query, &req);
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let search_result = index.perform_search(query); let search_result = perform_search(&index, query);
if let Ok(ref search_result) = search_result { if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result); aggregate.succeed(search_result);
} }

View File

@@ -97,7 +97,8 @@ macro_rules! make_setting_route {
index_uid: actix_web::web::Path<String>, index_uid: actix_web::web::Path<String>,
) -> std::result::Result<HttpResponse, ResponseError> { ) -> std::result::Result<HttpResponse, ResponseError> {
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let settings = index.settings()?; let rtxn = index.read_txn()?;
let settings = index::settings(&index, &rtxn)?;
debug!("returns: {:?}", settings); debug!("returns: {:?}", settings);
let mut json = serde_json::json!(&settings); let mut json = serde_json::json!(&settings);
@@ -454,7 +455,8 @@ pub async fn get_all(
index_uid: web::Path<String>, index_uid: web::Path<String>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let new_settings = index.settings()?; let rtxn = index.read_txn()?;
let new_settings = index::settings(&index, &rtxn)?;
debug!("returns: {:?}", new_settings); debug!("returns: {:?}", new_settings);
Ok(HttpResponse::Ok().json(new_settings)) Ok(HttpResponse::Ok().json(new_settings))
} }

View File

@@ -5,14 +5,11 @@ use actix_web::{web, HttpRequest, HttpResponse};
use index::{Settings, Unchecked}; use index::{Settings, Unchecked};
use index_scheduler::{IndexScheduler, Query, Status}; use index_scheduler::{IndexScheduler, Query, Status};
use log::debug; use log::debug;
use serde::{Deserialize, Serialize};
use serde_json::json;
use time::OffsetDateTime;
use index::{Settings, Unchecked};
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::star_or::StarOr; use meilisearch_types::star_or::StarOr;
use serde::{Deserialize, Serialize};
use serde_json::json;
use time::OffsetDateTime;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::extractors::authentication::{policies::*, GuardedData}; use crate::extractors::authentication::{policies::*, GuardedData};
@@ -270,25 +267,26 @@ async fn get_stats(
.first() .first()
.and_then(|task| task.index_uid.clone()); .and_then(|task| task.index_uid.clone());
for index in index_scheduler.indexes()? { for (name, index) in index_scheduler.indexes()? {
if !search_rules.is_index_authorized(&index.name) { if !search_rules.is_index_authorized(&name) {
continue; continue;
} }
database_size += index.size()?; database_size += index.on_disk_size()?;
let rtxn = index.read_txn()?;
let stats = IndexStats { let stats = IndexStats {
number_of_documents: index.number_of_documents()?, number_of_documents: index.number_of_documents(&rtxn)?,
is_indexing: processing_index is_indexing: processing_index
.as_deref() .as_deref()
.map_or(false, |index_name| index.name == index_name), .map_or(false, |index_name| name == index_name),
field_distribution: index.field_distribution()?, field_distribution: index.field_distribution(&rtxn)?,
}; };
let updated_at = index.updated_at()?; let updated_at = index.updated_at(&rtxn)?;
last_task = last_task.map_or(Some(updated_at), |last| Some(last.max(updated_at))); last_task = last_task.map_or(Some(updated_at), |last| Some(last.max(updated_at)));
indexes.insert(index.name.clone(), stats); indexes.insert(name, stats);
} }
let stats = Stats { let stats = Stats {