Switch to 2TiB default index size, updates documentation

This commit is contained in:
Louis Dureuil
2023-02-22 09:04:52 +01:00
parent a529bf160c
commit c63294f331
2 changed files with 16 additions and 14 deletions

View File

@@ -45,18 +45,23 @@ use option::ScheduleSnapshot;
use crate::error::MeilisearchHttpError; use crate::error::MeilisearchHttpError;
/// Default number of simultaneously opened indexes, /// Default number of simultaneously opened indexes.
/// lower for Windows that dedicates a smaller virtual address space to processes. ///
/// This value is used when dynamic computation of how many indexes can be opened at once was skipped (e.g., in tests).
///
/// Lower for Windows that dedicates a smaller virtual address space to processes.
/// ///
/// The value was chosen this way: /// The value was chosen this way:
/// ///
/// - Windows provides a small virtual address space of about 10TiB to processes. /// - Windows provides a small virtual address space of about 10TiB to processes.
/// - The chosen value allows for indexes to reach a safe size of 1TiB. /// - The chosen value allows for indexes to use the default map size of 2TiB safely.
/// - This can accomodate an unlimited number of indexes as long as they stay below 1TiB size.
#[cfg(windows)] #[cfg(windows)]
const DEFAULT_INDEX_COUNT: usize = 10; const DEFAULT_INDEX_COUNT: usize = 4;
/// Default number of simultaneously opened indexes. /// Default number of simultaneously opened indexes.
/// ///
/// This value is used when dynamic computation of how many indexes can be opened at once was skipped (e.g., in tests).
///
/// The higher, the better for avoiding reopening indexes. /// The higher, the better for avoiding reopening indexes.
/// ///
/// The value was chosen this way: /// The value was chosen this way:
@@ -64,13 +69,9 @@ const DEFAULT_INDEX_COUNT: usize = 10;
/// - Opening an index consumes a file descriptor. /// - Opening an index consumes a file descriptor.
/// - The default on many unices is about 256 file descriptors for a process. /// - The default on many unices is about 256 file descriptors for a process.
/// - 100 is a little bit less than half this value. /// - 100 is a little bit less than half this value.
/// /// - The chosen value allows for indexes to use the default map size of 2TiB safely.
/// In the future, this value could be computed from the dynamic number of allowed file descriptors for the current process.
///
/// On Unices, this value is largely irrelevant to virtual address space, because due to index resizing the indexes should take virtual memory in the same ballpark
/// as their disk size and it is unlikely for a user to have a sum of index weighing 128TB on a single Meilisearch node.
#[cfg(not(windows))] #[cfg(not(windows))]
const DEFAULT_INDEX_COUNT: usize = 100; const DEFAULT_INDEX_COUNT: usize = 20;
/// Check if a db is empty. It does not provide any information on the /// Check if a db is empty. It does not provide any information on the
/// validity of the data in it. /// validity of the data in it.

View File

@@ -65,9 +65,10 @@ const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
const DEFAULT_LOG_EVERY_N: usize = 100_000; const DEFAULT_LOG_EVERY_N: usize = 100_000;
// Each environment (index and task-db) is taking space in the virtual address space. // Each environment (index and task-db) is taking space in the virtual address space.
// When creating a new environment, it starts its life with 10GiB of virtual address space. // Ideally, indexes can occupy 2TiB each to avoid having to manually resize them.
// It is then later resized if needs be. // The actual size of the virtual address space is computed at startup to determine how many 2TiB indexes can be
pub const INDEX_SIZE: u64 = 10 * 1024 * 1024 * 1024; // 10 GiB // opened simultaneously.
pub const INDEX_SIZE: u64 = 2 * 1024 * 1024 * 1024 * 1024; // 2 TiB
pub const TASK_DB_SIZE: u64 = 10 * 1024 * 1024 * 1024; // 10 GiB pub const TASK_DB_SIZE: u64 = 10 * 1024 * 1024 * 1024; // 10 GiB
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)] #[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]