let the follower join the leader at any time

This commit is contained in:
Tamo
2023-03-21 17:56:18 +01:00
parent 498b59ac84
commit 5ecfa3570f
7 changed files with 299 additions and 166 deletions

View File

@ -22,3 +22,4 @@ crossbeam = "0.8.2"
bus = "2.3.0"
time = "0.3.20"
uuid = { version = "1.3.0", features = ["v4"] }
synchronoise = "1.0.1"

View File

@ -7,44 +7,62 @@ use crossbeam::channel::{unbounded, Receiver, Sender};
use ductile::{ChannelReceiver, ChannelSender, ChannelServer};
use log::info;
use meilisearch_types::tasks::Task;
use synchronoise::SignalEvent;
use crate::batch::Batch;
use crate::{Consistency, FollowerMsg, LeaderMsg};
#[derive(Clone, Debug)]
#[derive(Clone)]
pub struct Leader {
task_ready_to_commit: Receiver<u32>,
broadcast_to_follower: Sender<LeaderMsg>,
cluster_size: Arc<AtomicUsize>,
pub wake_up: Arc<SignalEvent>,
new_followers: Arc<AtomicUsize>,
active_followers: Arc<AtomicUsize>,
batch_id: Arc<RwLock<u32>>,
}
impl Leader {
pub fn new(listen_on: impl ToSocketAddrs + Send + 'static) -> Leader {
let cluster_size = Arc::new(AtomicUsize::new(1));
let (process_batch_sender, process_batch_receiver) = unbounded();
let new_followers = Arc::new(AtomicUsize::new(0));
let active_followers = Arc::new(AtomicUsize::new(1));
let wake_up = Arc::new(SignalEvent::auto(true));
let (broadcast_to_follower, process_batch_receiver) = unbounded();
let (task_finished_sender, task_finished_receiver) = unbounded();
let cs = cluster_size.clone();
let nf = new_followers.clone();
let af = active_followers.clone();
let wu = wake_up.clone();
std::thread::spawn(move || {
Self::listener(listen_on, cs, process_batch_receiver, task_finished_sender)
Self::listener(listen_on, nf, af, wu, process_batch_receiver, task_finished_sender)
});
Leader {
task_ready_to_commit: task_finished_receiver,
broadcast_to_follower: process_batch_sender,
cluster_size,
broadcast_to_follower,
wake_up,
new_followers,
active_followers,
batch_id: Arc::default(),
}
}
pub fn has_new_followers(&self) -> bool {
self.new_followers.load(Ordering::Relaxed) != 0
}
/// Takes all the necessary channels to chat with the scheduler and give them
/// to each new followers
fn listener(
listen_on: impl ToSocketAddrs,
cluster_size: Arc<AtomicUsize>,
new_followers: Arc<AtomicUsize>,
active_followers: Arc<AtomicUsize>,
wake_up: Arc<SignalEvent>,
broadcast_to_follower: Receiver<LeaderMsg>,
task_finished: Sender<u32>,
) {
@ -63,12 +81,14 @@ impl Leader {
for (sender, receiver, _addr) in listener {
let task_finished = task_finished.clone();
let cs = cluster_size.clone();
let nf = new_followers.clone();
let af = active_followers.clone();
let wu = wake_up.clone();
let process_batch = bus.lock().unwrap().add_rx();
std::thread::spawn(move || {
Self::follower(sender, receiver, cs, process_batch, task_finished)
Self::follower(sender, receiver, nf, af, wu, process_batch, task_finished)
});
}
}
@ -77,20 +97,43 @@ impl Leader {
fn follower(
sender: ChannelSender<LeaderMsg>,
receiver: ChannelReceiver<FollowerMsg>,
cluster_size: Arc<AtomicUsize>,
new_followers: Arc<AtomicUsize>,
active_followers: Arc<AtomicUsize>,
wake_up: Arc<SignalEvent>,
mut broadcast_to_follower: BusReader<LeaderMsg>,
task_finished: Sender<u32>,
) {
let size = cluster_size.fetch_add(1, atomic::Ordering::Relaxed) + 1;
let size = new_followers.fetch_add(1, Ordering::Relaxed) + 1;
wake_up.signal();
info!("A new follower joined the cluster. {} members.", size);
loop {
if let msg @ LeaderMsg::JoinFromDump(_) =
broadcast_to_follower.recv().expect("Main thread died")
{
// we exit the new_follower state and become an active follower even though
// the dump will takes some time to index
new_followers.fetch_sub(1, Ordering::Relaxed);
let size = active_followers.fetch_add(1, Ordering::Relaxed) + 1;
info!("A new follower became active. {} active members.", size);
sender.send(msg).unwrap();
break;
}
}
// send messages to the follower
std::thread::spawn(move || loop {
let msg = broadcast_to_follower.recv().expect("Main thread died");
if sender.send(msg).is_err() {
// the follower died, the logging and cluster size update should be done
// in the other thread
break;
match msg {
LeaderMsg::JoinFromDump(_) => (),
msg => {
if sender.send(msg).is_err() {
// the follower died, the logging and cluster size update should be done
// in the other thread
break;
}
}
}
});
@ -109,10 +152,20 @@ impl Leader {
// if we exited from the previous loop it means the follower is down and should
// be removed from the cluster
let size = cluster_size.fetch_sub(1, atomic::Ordering::Relaxed) - 1;
let size = active_followers.fetch_sub(1, atomic::Ordering::Relaxed) - 1;
info!("A follower left the cluster. {} members.", size);
}
pub fn wake_up(&self) {
todo!()
}
pub fn join_me(&self, dump: Vec<u8>) {
self.broadcast_to_follower
.send(LeaderMsg::JoinFromDump(dump))
.expect("Lost the link with the followers");
}
pub fn starts_batch(&self, batch: Batch) {
let mut batch_id = self.batch_id.write().unwrap();
@ -127,7 +180,7 @@ impl Leader {
pub fn commit(&self, consistency_level: Consistency) {
info!("Wait until enough followers are ready to commit a batch");
let mut batch_id = self.batch_id.write().unwrap();
let batch_id = self.batch_id.write().unwrap();
// if zero nodes needs to be sync we can commit right away and early exit
if consistency_level != Consistency::One {
@ -144,13 +197,13 @@ impl Leader {
// TODO: if the last node dies we're stuck on the iterator
// we need to reload the cluster size everytime in case a node dies
let cluster_size = self.cluster_size.load(atomic::Ordering::Relaxed);
let size = self.active_followers.load(atomic::Ordering::Relaxed);
info!("{ready_to_commit} nodes are ready to commit for a cluster size of {cluster_size}");
info!("{ready_to_commit} nodes are ready to commit for a cluster size of {size}");
match consistency_level {
Consistency::Two if ready_to_commit >= 1 => break,
Consistency::Quorum if ready_to_commit >= (cluster_size / 2) => break,
Consistency::All if ready_to_commit == cluster_size => break,
Consistency::Quorum if ready_to_commit >= (size / 2) => break,
Consistency::All if ready_to_commit == size => break,
_ => (),
}
}

View File

@ -23,6 +23,8 @@ pub enum Error {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum LeaderMsg {
// A dump to join the cluster
JoinFromDump(Vec<u8>),
// Starts a new batch
StartBatch { id: u32, batch: Batch },
//Tell the follower to commit the update asap
@ -58,11 +60,19 @@ pub struct Follower {
}
impl Follower {
pub fn join(leader: impl ToSocketAddrs) -> Follower {
pub fn join(leader: impl ToSocketAddrs) -> (Follower, Vec<u8>) {
let (sender, receiver) = connect_channel(leader).unwrap();
info!("Connection to the leader established");
info!("Waiting for the leader to contact us");
let state = receiver.recv().unwrap();
let dump = match state {
LeaderMsg::JoinFromDump(dump) => dump,
msg => panic!("Received unexpected message {msg:?}"),
};
let (get_batch_sender, get_batch_receiver) = unbounded();
let (must_commit_sender, must_commit_receiver) = unbounded();
let (register_task_sender, register_task_receiver) = unbounded();
@ -71,13 +81,16 @@ impl Follower {
Self::router(receiver, get_batch_sender, must_commit_sender, register_task_sender);
});
Follower {
sender,
get_batch: get_batch_receiver,
must_commit: must_commit_receiver,
register_new_task: register_task_receiver,
batch_id: Arc::default(),
}
(
Follower {
sender,
get_batch: get_batch_receiver,
must_commit: must_commit_receiver,
register_new_task: register_task_receiver,
batch_id: Arc::default(),
},
dump,
)
}
fn router(
@ -88,6 +101,9 @@ impl Follower {
) {
loop {
match receiver.recv().expect("Lost connection to the leader") {
LeaderMsg::JoinFromDump(_) => {
panic!("Received a join from dump msg but Im already running")
}
LeaderMsg::StartBatch { id, batch } => {
info!("Starting to process a new batch");
get_batch.send((id, batch)).expect("Lost connection to the main thread")