mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-08-02 03:40:00 +00:00
let the follower join the leader at any time
This commit is contained in:
@ -22,3 +22,4 @@ crossbeam = "0.8.2"
|
||||
bus = "2.3.0"
|
||||
time = "0.3.20"
|
||||
uuid = { version = "1.3.0", features = ["v4"] }
|
||||
synchronoise = "1.0.1"
|
||||
|
@ -7,44 +7,62 @@ use crossbeam::channel::{unbounded, Receiver, Sender};
|
||||
use ductile::{ChannelReceiver, ChannelSender, ChannelServer};
|
||||
use log::info;
|
||||
use meilisearch_types::tasks::Task;
|
||||
use synchronoise::SignalEvent;
|
||||
|
||||
use crate::batch::Batch;
|
||||
use crate::{Consistency, FollowerMsg, LeaderMsg};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone)]
|
||||
pub struct Leader {
|
||||
task_ready_to_commit: Receiver<u32>,
|
||||
broadcast_to_follower: Sender<LeaderMsg>,
|
||||
|
||||
cluster_size: Arc<AtomicUsize>,
|
||||
pub wake_up: Arc<SignalEvent>,
|
||||
|
||||
new_followers: Arc<AtomicUsize>,
|
||||
active_followers: Arc<AtomicUsize>,
|
||||
|
||||
batch_id: Arc<RwLock<u32>>,
|
||||
}
|
||||
|
||||
impl Leader {
|
||||
pub fn new(listen_on: impl ToSocketAddrs + Send + 'static) -> Leader {
|
||||
let cluster_size = Arc::new(AtomicUsize::new(1));
|
||||
let (process_batch_sender, process_batch_receiver) = unbounded();
|
||||
let new_followers = Arc::new(AtomicUsize::new(0));
|
||||
let active_followers = Arc::new(AtomicUsize::new(1));
|
||||
let wake_up = Arc::new(SignalEvent::auto(true));
|
||||
let (broadcast_to_follower, process_batch_receiver) = unbounded();
|
||||
let (task_finished_sender, task_finished_receiver) = unbounded();
|
||||
|
||||
let cs = cluster_size.clone();
|
||||
let nf = new_followers.clone();
|
||||
let af = active_followers.clone();
|
||||
let wu = wake_up.clone();
|
||||
std::thread::spawn(move || {
|
||||
Self::listener(listen_on, cs, process_batch_receiver, task_finished_sender)
|
||||
Self::listener(listen_on, nf, af, wu, process_batch_receiver, task_finished_sender)
|
||||
});
|
||||
|
||||
Leader {
|
||||
task_ready_to_commit: task_finished_receiver,
|
||||
broadcast_to_follower: process_batch_sender,
|
||||
cluster_size,
|
||||
broadcast_to_follower,
|
||||
|
||||
wake_up,
|
||||
|
||||
new_followers,
|
||||
active_followers,
|
||||
batch_id: Arc::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn has_new_followers(&self) -> bool {
|
||||
self.new_followers.load(Ordering::Relaxed) != 0
|
||||
}
|
||||
|
||||
/// Takes all the necessary channels to chat with the scheduler and give them
|
||||
/// to each new followers
|
||||
fn listener(
|
||||
listen_on: impl ToSocketAddrs,
|
||||
cluster_size: Arc<AtomicUsize>,
|
||||
new_followers: Arc<AtomicUsize>,
|
||||
active_followers: Arc<AtomicUsize>,
|
||||
wake_up: Arc<SignalEvent>,
|
||||
broadcast_to_follower: Receiver<LeaderMsg>,
|
||||
task_finished: Sender<u32>,
|
||||
) {
|
||||
@ -63,12 +81,14 @@ impl Leader {
|
||||
|
||||
for (sender, receiver, _addr) in listener {
|
||||
let task_finished = task_finished.clone();
|
||||
let cs = cluster_size.clone();
|
||||
let nf = new_followers.clone();
|
||||
let af = active_followers.clone();
|
||||
let wu = wake_up.clone();
|
||||
|
||||
let process_batch = bus.lock().unwrap().add_rx();
|
||||
|
||||
std::thread::spawn(move || {
|
||||
Self::follower(sender, receiver, cs, process_batch, task_finished)
|
||||
Self::follower(sender, receiver, nf, af, wu, process_batch, task_finished)
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -77,20 +97,43 @@ impl Leader {
|
||||
fn follower(
|
||||
sender: ChannelSender<LeaderMsg>,
|
||||
receiver: ChannelReceiver<FollowerMsg>,
|
||||
cluster_size: Arc<AtomicUsize>,
|
||||
new_followers: Arc<AtomicUsize>,
|
||||
active_followers: Arc<AtomicUsize>,
|
||||
wake_up: Arc<SignalEvent>,
|
||||
mut broadcast_to_follower: BusReader<LeaderMsg>,
|
||||
task_finished: Sender<u32>,
|
||||
) {
|
||||
let size = cluster_size.fetch_add(1, atomic::Ordering::Relaxed) + 1;
|
||||
let size = new_followers.fetch_add(1, Ordering::Relaxed) + 1;
|
||||
wake_up.signal();
|
||||
info!("A new follower joined the cluster. {} members.", size);
|
||||
|
||||
loop {
|
||||
if let msg @ LeaderMsg::JoinFromDump(_) =
|
||||
broadcast_to_follower.recv().expect("Main thread died")
|
||||
{
|
||||
// we exit the new_follower state and become an active follower even though
|
||||
// the dump will takes some time to index
|
||||
new_followers.fetch_sub(1, Ordering::Relaxed);
|
||||
let size = active_followers.fetch_add(1, Ordering::Relaxed) + 1;
|
||||
info!("A new follower became active. {} active members.", size);
|
||||
|
||||
sender.send(msg).unwrap();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// send messages to the follower
|
||||
std::thread::spawn(move || loop {
|
||||
let msg = broadcast_to_follower.recv().expect("Main thread died");
|
||||
if sender.send(msg).is_err() {
|
||||
// the follower died, the logging and cluster size update should be done
|
||||
// in the other thread
|
||||
break;
|
||||
match msg {
|
||||
LeaderMsg::JoinFromDump(_) => (),
|
||||
msg => {
|
||||
if sender.send(msg).is_err() {
|
||||
// the follower died, the logging and cluster size update should be done
|
||||
// in the other thread
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@ -109,10 +152,20 @@ impl Leader {
|
||||
|
||||
// if we exited from the previous loop it means the follower is down and should
|
||||
// be removed from the cluster
|
||||
let size = cluster_size.fetch_sub(1, atomic::Ordering::Relaxed) - 1;
|
||||
let size = active_followers.fetch_sub(1, atomic::Ordering::Relaxed) - 1;
|
||||
info!("A follower left the cluster. {} members.", size);
|
||||
}
|
||||
|
||||
pub fn wake_up(&self) {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub fn join_me(&self, dump: Vec<u8>) {
|
||||
self.broadcast_to_follower
|
||||
.send(LeaderMsg::JoinFromDump(dump))
|
||||
.expect("Lost the link with the followers");
|
||||
}
|
||||
|
||||
pub fn starts_batch(&self, batch: Batch) {
|
||||
let mut batch_id = self.batch_id.write().unwrap();
|
||||
|
||||
@ -127,7 +180,7 @@ impl Leader {
|
||||
pub fn commit(&self, consistency_level: Consistency) {
|
||||
info!("Wait until enough followers are ready to commit a batch");
|
||||
|
||||
let mut batch_id = self.batch_id.write().unwrap();
|
||||
let batch_id = self.batch_id.write().unwrap();
|
||||
|
||||
// if zero nodes needs to be sync we can commit right away and early exit
|
||||
if consistency_level != Consistency::One {
|
||||
@ -144,13 +197,13 @@ impl Leader {
|
||||
// TODO: if the last node dies we're stuck on the iterator
|
||||
|
||||
// we need to reload the cluster size everytime in case a node dies
|
||||
let cluster_size = self.cluster_size.load(atomic::Ordering::Relaxed);
|
||||
let size = self.active_followers.load(atomic::Ordering::Relaxed);
|
||||
|
||||
info!("{ready_to_commit} nodes are ready to commit for a cluster size of {cluster_size}");
|
||||
info!("{ready_to_commit} nodes are ready to commit for a cluster size of {size}");
|
||||
match consistency_level {
|
||||
Consistency::Two if ready_to_commit >= 1 => break,
|
||||
Consistency::Quorum if ready_to_commit >= (cluster_size / 2) => break,
|
||||
Consistency::All if ready_to_commit == cluster_size => break,
|
||||
Consistency::Quorum if ready_to_commit >= (size / 2) => break,
|
||||
Consistency::All if ready_to_commit == size => break,
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
@ -23,6 +23,8 @@ pub enum Error {
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum LeaderMsg {
|
||||
// A dump to join the cluster
|
||||
JoinFromDump(Vec<u8>),
|
||||
// Starts a new batch
|
||||
StartBatch { id: u32, batch: Batch },
|
||||
// Tell the follower to commit the update asap
|
||||
@ -58,11 +60,19 @@ pub struct Follower {
|
||||
}
|
||||
|
||||
impl Follower {
|
||||
pub fn join(leader: impl ToSocketAddrs) -> Follower {
|
||||
pub fn join(leader: impl ToSocketAddrs) -> (Follower, Vec<u8>) {
|
||||
let (sender, receiver) = connect_channel(leader).unwrap();
|
||||
|
||||
info!("Connection to the leader established");
|
||||
|
||||
info!("Waiting for the leader to contact us");
|
||||
let state = receiver.recv().unwrap();
|
||||
|
||||
let dump = match state {
|
||||
LeaderMsg::JoinFromDump(dump) => dump,
|
||||
msg => panic!("Received unexpected message {msg:?}"),
|
||||
};
|
||||
|
||||
let (get_batch_sender, get_batch_receiver) = unbounded();
|
||||
let (must_commit_sender, must_commit_receiver) = unbounded();
|
||||
let (register_task_sender, register_task_receiver) = unbounded();
|
||||
@ -71,13 +81,16 @@ impl Follower {
|
||||
Self::router(receiver, get_batch_sender, must_commit_sender, register_task_sender);
|
||||
});
|
||||
|
||||
Follower {
|
||||
sender,
|
||||
get_batch: get_batch_receiver,
|
||||
must_commit: must_commit_receiver,
|
||||
register_new_task: register_task_receiver,
|
||||
batch_id: Arc::default(),
|
||||
}
|
||||
(
|
||||
Follower {
|
||||
sender,
|
||||
get_batch: get_batch_receiver,
|
||||
must_commit: must_commit_receiver,
|
||||
register_new_task: register_task_receiver,
|
||||
batch_id: Arc::default(),
|
||||
},
|
||||
dump,
|
||||
)
|
||||
}
|
||||
|
||||
fn router(
|
||||
@ -88,6 +101,9 @@ impl Follower {
|
||||
) {
|
||||
loop {
|
||||
match receiver.recv().expect("Lost connection to the leader") {
|
||||
LeaderMsg::JoinFromDump(_) => {
|
||||
panic!("Received a join from dump msg but I’m already running")
|
||||
}
|
||||
LeaderMsg::StartBatch { id, batch } => {
|
||||
info!("Starting to process a new batch");
|
||||
get_batch.send((id, batch)).expect("Lost connection to the main thread")
|
||||
|
Reference in New Issue
Block a user