diff --git a/Cargo.toml b/Cargo.toml index 877fbde79eb027..3387af244f7309 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,13 +16,13 @@ license = "Apache-2.0" name = "solana-historian-demo" path = "src/bin/historian-demo.rs" -[[bin]] -name = "solana-client-demo" -path = "src/bin/client-demo.rs" +#[[bin]] +#name = "solana-client-demo" +#path = "src/bin/client-demo.rs" -[[bin]] -name = "solana-testnode" -path = "src/bin/testnode.rs" +#[[bin]] +#name = "solana-testnode" +#path = "src/bin/testnode.rs" [[bin]] name = "solana-genesis" diff --git a/libcuda_verify_ed25519.a b/libcuda_verify_ed25519.a deleted file mode 100644 index 00184b42bf4553..00000000000000 --- a/libcuda_verify_ed25519.a +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f300c876abe10eea017bd9b56d75ca936eb18da202929b857e69db27b0d3b563 -size 9907668 diff --git a/src/accountant_skel.rs b/src/accountant_skel.rs index 9712c6c031cab5..7b71233ebce456 100644 --- a/src/accountant_skel.rs +++ b/src/accountant_skel.rs @@ -4,6 +4,7 @@ use accountant::Accountant; use bincode::{deserialize, serialize}; +use serde::Serializer; use ecdsa; use entry::Entry; use event::Event; @@ -18,7 +19,7 @@ use serde_json; use signature::PublicKey; use std::cmp::max; use std::collections::VecDeque; -use std::io::Write; +use std::io::{Cursor, Write}; use std::net::{SocketAddr, UdpSocket}; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::mpsc::{channel, Receiver, Sender}; @@ -27,14 +28,11 @@ use std::thread::{spawn, JoinHandle}; use std::time::Duration; use streamer; use transaction::Transaction; +use crdt::{ReplicatedData, Crdt}; -use subscribers; - -pub struct AccountantSkel { +pub struct AccountantSkel { acc: Accountant, last_id: Hash, - writer: W, - historian: Historian, entry_info_subscribers: Vec, } @@ -76,13 +74,12 @@ pub enum Response { LastId { id: Hash }, } -impl AccountantSkel { +impl AccountantSkel { /// Create a new AccountantSkel that wraps the given Accountant. - pub fn new(acc: Accountant, last_id: Hash, writer: W, historian: Historian) -> Self { + pub fn new(acc: Accountant, last_id: Hash, historian: Historian) -> Self { AccountantSkel { acc, last_id, - writer, historian, entry_info_subscribers: vec![], } @@ -104,16 +101,64 @@ impl AccountantSkel { } /// Process any Entry items that have been published by the Historian. - pub fn sync(&mut self) -> Hash { - while let Ok(entry) = self.historian.receiver.try_recv() { + /// continuosly broadcast blobs of entries out + fn run_sync( + obj: Arc>, + broadcast: &streamer::BlobSender, + blob_recycler: &streamer::BlobRecycler + writer: W + historian: Receiver, + ) -> Result<()> { + //TODO clean this mess up + let entry = historian.recv(Duration::new(1, 0))?; + let mut b = blob_recycler.allocate(); + let mut out = Cursor::new(b.data_mut()); + let mut ser = bincode::Serializer::new(out); + let mut seq = ser.serialize_seq(None); + seq.serialize(entry).expect("serialize failed on first entry!"); + obj.write().unwrap().notify_entry_info_subscribers(&entry); + + while let Ok(entry) = historian.try_recv() { + //UNLOCK skel in this scope + let mut robj = obj.write().unwrap(); + if let Err(e) = seq.serialize(entry) { + seq.end(); + b.set_size(out.len()); + broadcast.send(b)?; + + //NEW SEQUENCE + b = blob_recycler.allocate(); + out = Cursor::new(b.data_mut()); + seq = ser.serialize_seq(None); + seq.serialize(entry).expect("serialize failed on first entry!"); + } self.last_id = entry.id; self.acc.register_entry_id(&self.last_id); - writeln!(self.writer, "{}", serde_json::to_string(&entry).unwrap()).unwrap(); + writeln!(writer, "{}", serde_json::to_string(&entry).unwrap()).unwrap(); self.notify_entry_info_subscribers(&entry); } - self.last_id + seq.end(); + b.set_size(out.len()); + broadcast.send(b)?; + Ok(()) } + pub fn sync_service( + obj: Arc>, + exit: AtomicBool, + broadcast: &streamer::BlobSender, + blob_recycler: &streamer::BlobRecycler + writer: W, + historian: Receiver, + ) -> JoinHandle<()> { + spawn(move|| loop { + let e = Self::run_sync(&obj, &broadcast, &blob_recycler, writer, &historian); + if e.is_err() && exit_.load(Ordering::Relaxed) { + break; + } + }) + } + /// Process Request items sent by clients. pub fn process_request( &mut self, @@ -261,9 +306,10 @@ impl AccountantSkel { } fn process( - obj: &Arc>>, + obj: &Arc>, verified_receiver: &Receiver)>>, - blob_sender: &streamer::BlobSender, + broadcast_sender: &streamer::BlobSender, + responder_sender: &streamer::BlobSender, packet_recycler: &packet::PacketRecycler, blob_recycler: &packet::BlobRecycler, ) -> Result<()> { @@ -280,7 +326,7 @@ impl AccountantSkel { let blobs = Self::serialize_responses(rsps, blob_recycler)?; if !blobs.is_empty() { //don't wake up the other side if there is nothing - blob_sender.send(blobs)?; + responder_sender.send(blobs)?; } packet_recycler.recycle(msgs); @@ -293,7 +339,7 @@ impl AccountantSkel { /// Process verified blobs, already in order /// Respond with a signed hash of the state fn replicate_state( - obj: &Arc>>, + obj: &Arc>, verified_receiver: &streamer::BlobReceiver, blob_recycler: &packet::BlobRecycler, ) -> Result<()> { @@ -322,11 +368,14 @@ impl AccountantSkel { /// This service is the network leader /// Set `exit` to shutdown its threads. pub fn serve( - obj: &Arc>>, - addr: &str, + obj: &Arc>, + me: ReplicatedData exit: Arc, ) -> Result>> { - let read = UdpSocket::bind(addr)?; + let gossip = UdpSocket::bind(me.gossip_addr)?; + let read = UdpSocket::bind(me.serve_addr)?; + let crdt = Arc::new(RwLock::new(Crdt::new(me))); + // make sure we are on the same interface let mut local = read.local_addr()?; local.set_port(0); @@ -337,9 +386,13 @@ impl AccountantSkel { let (packet_sender, packet_receiver) = channel(); let t_receiver = streamer::receiver(read, exit.clone(), packet_recycler.clone(), packet_sender)?; - let (blob_sender, blob_receiver) = channel(); - let t_responder = - streamer::responder(write, exit.clone(), blob_recycler.clone(), blob_receiver); + let (responder_sender, responder_receiver) = channel(); + let t_responder = streamer::responder( + write, + exit.clone(), + blob_recycler.clone(), + responder_receiver + ); let (verified_sender, verified_receiver) = channel(); let exit_ = exit.clone(); @@ -350,12 +403,23 @@ impl AccountantSkel { } }); + let (broadcast_sender, broadcast_receiver) = channel(); + + let t_broadcast = streamer::broadcaster( + write, + exit.clone(), + crdt.clone(), + blob_recycler.clone(), + broadcast_receiver, + ); + let skel = obj.clone(); let t_server = spawn(move || loop { let e = Self::process( &skel, &verified_receiver, - &blob_sender, + &broadcast_sender, + &responder_sender, &packet_recycler, &blob_recycler, ); @@ -375,7 +439,8 @@ impl AccountantSkel { /// on the accountant state. /// # Arguments /// * `obj` - The accountant state. - /// * `rsubs` - The subscribers. + /// * `me` - my configuration + /// * `leader` - leader configuration /// * `exit` - The exit signal. /// # Remarks /// The pipeline is constructed as follows: @@ -389,11 +454,19 @@ impl AccountantSkel { /// 4. process the transaction state machine /// 5. respond with the hash of the state back to the leader pub fn replicate( - obj: &Arc>>, - rsubs: subscribers::Subscribers, + obj: &Arc>, + me: ReplicatedData, + leader: ReplicatedData, exit: Arc, ) -> Result>> { - let read = UdpSocket::bind(rsubs.me.addr)?; + let gossip = UdpSocket::bind(me.gossip_addr)?; + let read = UdpSocket::bind(me.replicate_addr)?; + + let crdt = Arc::new(RwLock::new(Crdt::new(me))); + crdt.write().unwrap().insert(&leader); + let t_gossip = Crdt::gossip(crdt.clone(), exit.clone()); + let t_listen = Crdt::listen(crdt.clone(), exit.clone()); + // make sure we are on the same interface let mut local = read.local_addr()?; local.set_port(0); @@ -410,20 +483,20 @@ impl AccountantSkel { let (window_sender, window_receiver) = channel(); let (retransmit_sender, retransmit_receiver) = channel(); - let subs = Arc::new(RwLock::new(rsubs)); let t_retransmit = streamer::retransmitter( write, exit.clone(), - subs.clone(), + crdt.clone(), blob_recycler.clone(), retransmit_receiver, ); - //TODO + + //TODO //the packets coming out of blob_receiver need to be sent to the GPU and verified //then sent to the window, which does the erasure coding reconstruction let t_window = streamer::window( exit.clone(), - subs, + crdt, blob_recycler.clone(), blob_receiver, window_sender, @@ -437,7 +510,7 @@ impl AccountantSkel { break; } }); - Ok(vec![t_blob_receiver, t_retransmit, t_window, t_server]) + Ok(vec![t_blob_receiver, t_retransmit, t_window, t_server, t_gossip, t_listen]) } } @@ -486,8 +559,7 @@ mod tests { use std::thread::sleep; use std::time::Duration; use transaction::Transaction; - - use subscribers::{Node, Subscribers}; + use crdt::Crdt; use streamer; use std::sync::mpsc::channel; use std::collections::VecDeque; @@ -623,10 +695,10 @@ mod tests { let source_peer_sock = UdpSocket::bind("127.0.0.1:0").expect("bind"); let exit = Arc::new(AtomicBool::new(false)); - let node_me = Node::new([0, 0, 0, 0, 0, 0, 0, 1], 10, me_addr); - let node_subs = vec![Node::new([0, 0, 0, 0, 0, 0, 0, 2], 8, target_peer_addr); 1]; - let node_leader = Node::new([0, 0, 0, 0, 0, 0, 0, 3], 20, leader_addr); - let subs = Subscribers::new(node_me, node_leader, &node_subs); + let node_me = ReplicateData::new(KeyPair::new().pubkey(), me_addr); + let node_target = ReplicateData::new(KeyPair::new().pubkey(), target_peer_addr); + let node_leader = ReplicateData::new(KeyPair::new().pubkey(), leader_addr); + let crdt_me = Crdt::new(node_me, node_leader, &node_subs); // setup some blob services to send blobs into the socket // to simulate the source peer and get blobs out of the socket to diff --git a/src/crdt.rs b/src/crdt.rs index b7742d5cd309b2..c90df5a728e9ab 100644 --- a/src/crdt.rs +++ b/src/crdt.rs @@ -1,15 +1,24 @@ //! The `crdt` module defines a data structure that is shared by all the nodes in the network over -//! a gossip control plane. The goal is to share small bits of of-chain information and detect and +//! a gossip control plane. The goal is to share small bits of off-chain information and detect and //! repair partitions. //! //! This CRDT only supports a very limited set of types. A map of PublicKey -> Versioned Struct. //! The last version is always picked durring an update. +//! +//! The network is arranged in layers: +//! +//! * layer 0 - Leader. +//! * layer 1 - As many nodes as we can fit +//! * layer 2 - Everyone else, if layer 1 is `2^10`, layer 2 should be able to fit `2^20` number of nodes. +//! +//! Accountant needs to provide an interface for us to query the stake weight use bincode::{deserialize, serialize}; use byteorder::{LittleEndian, ReadBytesExt}; use hash::Hash; -use result::Result; +use result::{Error, Result}; use ring::rand::{SecureRandom, SystemRandom}; +use rayon::prelude::*; use signature::{PublicKey, Signature}; use std::collections::HashMap; use std::io::Cursor; @@ -18,11 +27,12 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, RwLock}; use std::thread::{sleep, spawn, JoinHandle}; use std::time::Duration; +use packet::SharedBlob; /// Structure to be replicated by the network #[derive(Serialize, Deserialize, Clone)] pub struct ReplicatedData { - id: PublicKey, + pub id: PublicKey, sig: Signature, /// should always be increasing version: u64, @@ -31,7 +41,7 @@ pub struct ReplicatedData { /// address to connect to for replication replicate_addr: SocketAddr, /// address to connect to when this node is leader - lead_addr: SocketAddr, + serve_addr: SocketAddr, /// current leader identity current_leader_id: PublicKey, /// last verified hash that was submitted to the leader @@ -41,15 +51,18 @@ pub struct ReplicatedData { } impl ReplicatedData { - pub fn new(id: PublicKey, gossip_addr: SocketAddr) -> ReplicatedData { - let daddr = "0.0.0.0:0".parse().unwrap(); + pub fn new(id: PublicKey, + gossip_addr: SocketAddr, + replicate_addr: SocketAddr, + serve_addr: SocketAddr) -> ReplicatedData { + let daddr:SocketAddr = "0.0.0.0:0".parse().unwrap(); ReplicatedData { id, sig: Signature::default(), version: 0, gossip_addr, - replicate_addr: daddr, - lead_addr: daddr, + replicate_addr, + serve_addr, current_leader_id: PublicKey::default(), last_verified_hash: Hash::default(), last_verified_count: 0, @@ -109,15 +122,19 @@ impl Crdt { g.table.insert(me.id, me); g } - pub fn import(&mut self, v: &ReplicatedData) { - // TODO check that last_verified types are always increasing - // TODO probably an error or attack - if self.me != v.id { - self.insert(v); - } + pub fn my_data(&self) -> &ReplicatedData { + &self.table[&self.me] + } + pub fn leader_data(&self) -> &ReplicatedData { + &self.table[&self.table[&self.me].current_leader_id] } pub fn insert(&mut self, v: &ReplicatedData) { + // TODO check that last_verified types are always increasing if self.table.get(&v.id).is_none() || (v.version > self.table[&v.id].version) { + //somehow we signed a message for our own identity with a higher version that + // we have stored ourselves + println!("me: {:?} v.id: {:?}", self.me, v.id); + //assert!(self.me != v.id); trace!("insert! {}", v.version); self.update_index += 1; let _ = self.table.insert(v.id, v.clone()); @@ -126,6 +143,86 @@ impl Crdt { trace!("INSERT FAILED {}", v.version); } } + + /// broadcast messages from the leader to layer 1 nodes + /// # Remarks + /// We need to avoid having obj locked while doing any io, such as the `send_to` + pub fn broadcast( + obj: &Arc>, + blobs: &Vec, + s: &UdpSocket, + transmit_index: &mut u64 + ) -> Result<()> { + let (me, table): (ReplicatedData, Vec) = { + // copy to avoid locking durring IO + let robj = obj.read().unwrap(); + let cloned_table:Vec = robj.table.values().cloned().collect(); + (robj.table[&robj.me].clone(), cloned_table) + }; + let errs: Vec<_> = table.iter() + .enumerate() + .cycle() + .zip(blobs.iter()) + .map(|((i,v),b)| { + if me.id == v.id { + return Ok(0); + } + // only leader should be broadcasting + assert!(me.current_leader_id != v.id); + let mut blob = b.write().unwrap(); + blob.set_index(*transmit_index + i as u64); + s.send_to(&blob.data[..blob.meta.size], &v.replicate_addr) + }) + .collect(); + for e in errs { + trace!("retransmit result {:?}", e); + match e { + Err(e) => return Err(Error::IO(e)), + _ => (), + } + *transmit_index += 1; + } + Ok(()) + } + + /// retransmit messages from the leader to layer 1 nodes + /// # Remarks + /// We need to avoid having obj locked while doing any io, such as the `send_to` + pub fn retransmit( + obj: &Arc>, + blob: &SharedBlob, + s: &UdpSocket + ) -> Result<()> { + let (me, table): (ReplicatedData, Vec) = { + // copy to avoid locking durring IO + let s = obj.read().unwrap(); + (s.table[&s.me].clone(), s.table.values().cloned().collect()) + }; + let rblob = blob.read().unwrap(); + let errs: Vec<_> = table + .par_iter() + .map(|v| { + if me.id == v.id { + return Ok(0); + } + if me.current_leader_id == v.id { + trace!("skip retransmit to leader{:?}", v.id); + return Ok(0); + } + trace!("retransmit blob to {}", v.replicate_addr); + s.send_to(&rblob.data[..rblob.meta.size], &v.replicate_addr) + }) + .collect(); + for e in errs { + trace!("retransmit result {:?}", e); + match e { + Err(e) => return Err(Error::IO(e)), + _ => (), + } + } + Ok(()) + } + fn random() -> u64 { let rnd = SystemRandom::new(); let mut buf = [0u8; 8]; @@ -186,7 +283,7 @@ impl Crdt { // TODO we need to punish/spam resist here // sig verify the whole update and slash anyone who sends a bad update for v in data { - self.import(&v); + self.insert(&v); } *self.remote.entry(from).or_insert(update_index) = update_index; } @@ -222,7 +319,7 @@ impl Crdt { let rsp = serialize(&Protocol::ReceiveUpdates(from, ups, data))?; trace!("send_to {}", addr); //TODO verify reqdata belongs to sender - obj.write().unwrap().import(&reqdata); + obj.write().unwrap().insert(&reqdata); sock.send_to(&rsp, addr).unwrap(); trace!("send_to done!"); } @@ -259,6 +356,12 @@ mod test { use std::thread::{sleep, JoinHandle}; use std::time::Duration; + use rayon::iter::*; + use streamer::{blob_receiver, retransmitter}; + use std::sync::mpsc::channel; + use packet::{Blob, BlobRecycler}; + use std::collections::VecDeque; + /// Test that the network converges. /// Run until every node in the network has a full ReplicatedData set. /// Check that nodes stop sending updates after all the ReplicatedData has been shared. @@ -271,12 +374,18 @@ mod test { let exit = Arc::new(AtomicBool::new(false)); let listen: Vec<_> = (0..num) .map(|_| { - let listener = UdpSocket::bind("0.0.0.0:0").unwrap(); + let gossip = UdpSocket::bind("0.0.0.0:0").unwrap(); + let replicate = UdpSocket::bind("0.0.0.0:0").unwrap(); + let serve = UdpSocket::bind("0.0.0.0:0").unwrap(); let pubkey = KeyPair::new().pubkey(); - let d = ReplicatedData::new(pubkey, listener.local_addr().unwrap()); + let d = ReplicatedData::new(pubkey, + gossip.local_addr().unwrap(), + replicate.local_addr().unwrap(), + serve.local_addr().unwrap(), + ); let crdt = Crdt::new(d); let c = Arc::new(RwLock::new(crdt)); - let l = Crdt::listen(c.clone(), listener, exit.clone()); + let l = Crdt::listen(c.clone(), gossip, exit.clone()); (c, l) }) .collect(); @@ -357,7 +466,11 @@ mod test { /// Test that insert drops messages that are older #[test] fn insert_test() { - let mut d = ReplicatedData::new(KeyPair::new().pubkey(), "127.0.0.1:1234".parse().unwrap()); + let mut d = ReplicatedData::new(KeyPair::new().pubkey(), + "127.0.0.1:1234".parse().unwrap(), + "127.0.0.1:1235".parse().unwrap(), + "127.0.0.1:1236".parse().unwrap(), + ); assert_eq!(d.version, 0); let mut crdt = Crdt::new(d.clone()); assert_eq!(crdt.table[&d.id].version, 0); @@ -369,4 +482,40 @@ mod test { assert_eq!(crdt.table[&d.id].version, 2); } + #[test] + pub fn test_crdt_retransmit() { + let read = UdpSocket::bind("127.0.0.1:0").expect("bind"); + let send = UdpSocket::bind("127.0.0.1:0").expect("bind"); + let serve = UdpSocket::bind("127.0.0.1:0").expect("bind"); + //let n1 = Node::new([0; 8], 0, s1.local_addr().unwrap()); + //let n2 = Node::new([0; 8], 0, s2.local_addr().unwrap()); + //let mut s = Subscribers::new(n1.clone(), n2.clone(), &[]); + //let n3 = Node::new([0; 8], 0, s3.local_addr().unwrap()); + let pubkey_me = KeyPair::new().pubkey(); + + let rep_data = ReplicatedData::new(pubkey_me, + read.local_addr().unwrap(), + send.local_addr().unwrap(), + serve.local_addr().unwrap()); + let n4 = rep_data.clone(); + let subs = Arc::new(RwLock::new(Crdt::new(rep_data))); + + //s.insert(&[n3]); + let mut b = Arc::new(RwLock::new(Blob::default())); + b.write().unwrap().meta.size = 10; + let s4 = UdpSocket::bind("127.0.0.1:0").expect("bind"); + Crdt::retransmit(&subs, &mut b, &s4).unwrap(); + + let res: Vec<_> = [read, send, serve] + .into_par_iter() + .map(|s| { + let mut b = Blob::default(); + s.set_read_timeout(Some(Duration::new(1, 0))).unwrap(); + s.recv_from(&mut b.data).is_err() + }) + .collect(); + assert_eq!(res, [true, true, false]); + subs.write().unwrap().insert(&n4); + assert!(Crdt::retransmit(&subs, &mut b, &s4).is_err()); + } } diff --git a/src/erasure.rs b/src/erasure.rs index b8480a73d7529a..a2486758c3dcc4 100644 --- a/src/erasure.rs +++ b/src/erasure.rs @@ -153,7 +153,7 @@ pub fn decode_blocks(data: &mut [&mut [u8]], coding: &[&[u8]], erasures: &[i32]) // Generate coding blocks in window from consumed to consumed+NUM_DATA pub fn generate_coding( re: &BlobRecycler, - window: &mut Vec>, + window: &mut Vec, consumed: usize, ) -> Result<()> { let mut data_blobs = Vec::new(); @@ -179,7 +179,7 @@ pub fn generate_coding( let coding_end = consumed + NUM_CODED; for i in coding_start..coding_end { let n = i % window.len(); - window[n] = Some(re.allocate()); + window[n] = re.allocate(); coding_blobs.push(window[n].clone().unwrap()); } for b in &coding_blobs { diff --git a/src/lib.rs b/src/lib.rs index 7a316f9ade8c64..8e4dda71c44906 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,9 @@ #![cfg_attr(feature = "unstable", feature(test))] pub mod accountant; -pub mod accountant_skel; -pub mod accountant_stub; +//pub mod accountant_skel; +//pub mod accountant_stub; pub mod crdt; -pub mod ecdsa; +//pub mod ecdsa; pub mod entry; #[cfg(feature = "erasure")] pub mod erasure; @@ -18,7 +18,6 @@ pub mod recorder; pub mod result; pub mod signature; pub mod streamer; -pub mod subscribers; pub mod transaction; extern crate bincode; extern crate byteorder; diff --git a/src/packet.rs b/src/packet.rs index d97b261e9f0fc8..cefe44cd0bf741 100644 --- a/src/packet.rs +++ b/src/packet.rs @@ -4,9 +4,11 @@ use result::{Error, Result}; use std::collections::VecDeque; use std::fmt; use std::io; +use bincode::{deserialize, serialize}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, UdpSocket}; use std::sync::{Arc, Mutex, RwLock}; use std::mem::size_of; +use signature::PublicKey; pub type SharedPackets = Arc>; pub type SharedBlob = Arc>; @@ -211,28 +213,40 @@ impl Packets { } } -const BLOB_INDEX_SIZE: usize = size_of::(); +const BLOB_INDEX_END: usize = size_of::(); +const BLOB_ID_END: usize = BLOB_INDEX_END + size_of::(); impl Blob { pub fn get_index(&self) -> Result { - let mut rdr = io::Cursor::new(&self.data[0..BLOB_INDEX_SIZE]); + let mut rdr = io::Cursor::new(&self.data[0..BLOB_INDEX_END]); let r = rdr.read_u64::()?; Ok(r) } pub fn set_index(&mut self, ix: u64) -> Result<()> { let mut wtr = vec![]; wtr.write_u64::(ix)?; - self.data[..BLOB_INDEX_SIZE].clone_from_slice(&wtr); + self.data[..BLOB_INDEX_END].clone_from_slice(&wtr); Ok(()) } + + pub fn get_id(&self) -> Result { + let e = deserialize(&self.data[BLOB_INDEX_END..BLOB_ID_END])?; + Ok(e) + } + pub fn set_id(&mut self, id: PublicKey) -> Result<()> { + let wtr = serialize(&id)?; + self.data[BLOB_INDEX_END..BLOB_ID_END].clone_from_slice(&wtr); + Ok(()) + } + pub fn data(&self) -> &[u8] { - &self.data[BLOB_INDEX_SIZE..] + &self.data[BLOB_ID_END..] } pub fn data_mut(&mut self) -> &mut [u8] { - &mut self.data[BLOB_INDEX_SIZE..] + &mut self.data[BLOB_ID_END..] } pub fn set_size(&mut self, size: usize) { - self.meta.size = size + BLOB_INDEX_SIZE; + self.meta.size = size + BLOB_ID_END; } pub fn recv_from(re: &BlobRecycler, socket: &UdpSocket) -> Result> { let mut v = VecDeque::new(); diff --git a/src/streamer.rs b/src/streamer.rs index 43e6f2ac353216..91db39b449ee86 100644 --- a/src/streamer.rs +++ b/src/streamer.rs @@ -8,7 +8,9 @@ use std::sync::mpsc; use std::sync::{Arc, RwLock}; use std::thread::{spawn, JoinHandle}; use std::time::Duration; -use subscribers::Subscribers; +use crdt::Crdt; +#[cfg(feature = "erasure")] +use erasure; pub type PacketReceiver = mpsc::Receiver; pub type PacketSender = mpsc::Sender; @@ -109,7 +111,7 @@ pub fn blob_receiver( fn recv_window( window: &mut Vec>, - subs: &Arc>, + crdt: &Arc>, recycler: &BlobRecycler, consumed: &mut usize, r: &BlobReceiver, @@ -118,24 +120,25 @@ fn recv_window( ) -> Result<()> { let timer = Duration::new(1, 0); let mut dq = r.recv_timeout(timer)?; + let leader_id = crdt.read().unwrap().leader_data().id; while let Ok(mut nq) = r.try_recv() { dq.append(&mut nq) } { //retransmit all leader blocks let mut retransmitq = VecDeque::new(); - let rsubs = subs.read().unwrap(); for b in &dq { let p = b.read().unwrap(); //TODO this check isn't safe against adverserial packets //we need to maintain a sequence window trace!( - "idx: {} addr: {:?} leader: {:?}", + "idx: {} addr: {:?} id: {:?} leader: {:?}", p.get_index().unwrap(), + p.get_id().unwrap(), p.meta.addr(), - rsubs.leader.addr + leader_id ); - if p.meta.addr() == rsubs.leader.addr { + if p.get_id().unwrap() == leader_id { //TODO //need to copy the retransmited blob //otherwise we get into races with which thread @@ -195,7 +198,7 @@ fn recv_window( pub fn window( exit: Arc, - subs: Arc>, + crdt: Arc>, recycler: BlobRecycler, r: BlobReceiver, s: BlobSender, @@ -210,7 +213,7 @@ pub fn window( } let _ = recv_window( &mut window, - &subs, + &crdt, &recycler, &mut consumed, &r, @@ -221,8 +224,57 @@ pub fn window( }) } +fn broadcast( + crdt: &Arc>, + recycler: &BlobRecycler, + r: &BlobReceiver, + sock: &UdpSocket, + transmit_index: &mut u64 +) -> Result<()> { + let timer = Duration::new(1, 0); + let mut dq = r.recv_timeout(timer)?; + while let Ok(mut nq) = r.try_recv() { + dq.append(&mut nq); + } + let mut blobs = dq.into_iter().collect(); + /// appends codes to the list of blobs allowing us to reconstruct the stream + #[cfg(feature = "erasure")] + erasure::generate_codes(blobs); + Crdt::broadcast(crdt, &blobs, &sock, transmit_index)?; + while let Some(b) = blobs.pop() { + recycler.recycle(b); + } + Ok(()) +} + +/// Service to broadcast messages from the leader to layer 1 nodes. +/// See `crdt` for network layer definitions. +/// # Arguments +/// * `sock` - Socket to send from. +/// * `exit` - Boolean to signal system exit. +/// * `crdt` - CRDT structure +/// * `recycler` - Blob recycler. +/// * `r` - Receive channel for blobs to be retransmitted to all the layer 1 nodes. +pub fn broadcaster( + sock: UdpSocket, + exit: Arc, + crdt: Arc>, + recycler: BlobRecycler, + r: BlobReceiver, +) -> JoinHandle<()> { + spawn(move || { + let mut transmit_index = 0; + loop { + if exit.load(Ordering::Relaxed) { + break; + } + let _ = broadcast(&crdt, &recycler, &r, &sock, &mut transmit_index); + } + }) +} + fn retransmit( - subs: &Arc>, + crdt: &Arc>, recycler: &BlobRecycler, r: &BlobReceiver, sock: &UdpSocket, @@ -233,10 +285,8 @@ fn retransmit( dq.append(&mut nq); } { - let wsubs = subs.read().unwrap(); for b in &dq { - let mut mb = b.write().unwrap(); - wsubs.retransmit(&mut mb, sock)?; + Crdt::retransmit(&crdt, b, sock)?; } } while let Some(b) = dq.pop_front() { @@ -246,18 +296,17 @@ fn retransmit( } /// Service to retransmit messages from the leader to layer 1 nodes. -/// See `subscribers` for network layer definitions. +/// See `crdt` for network layer definitions. /// # Arguments /// * `sock` - Socket to read from. Read timeout is set to 1. /// * `exit` - Boolean to signal system exit. -/// * `subs` - Shared Subscriber structure. This structure needs to be updated and popualted by -/// the accountant. +/// * `crdt` - This structure needs to be updated and populated by the accountant and via gossip. /// * `recycler` - Blob recycler. /// * `r` - Receive channel for blobs to be retransmitted to all the layer 1 nodes. pub fn retransmitter( sock: UdpSocket, exit: Arc, - subs: Arc>, + crdt: Arc>, recycler: BlobRecycler, r: BlobReceiver, ) -> JoinHandle<()> { @@ -265,7 +314,7 @@ pub fn retransmitter( if exit.load(Ordering::Relaxed) { break; } - let _ = retransmit(&subs, &recycler, &r, &sock); + let _ = retransmit(&crdt, &recycler, &r, &sock); }) } @@ -384,7 +433,10 @@ mod test { use std::time::Duration; use streamer::{blob_receiver, receiver, responder, retransmitter, window, BlobReceiver, PacketReceiver}; - use subscribers::{Node, Subscribers}; + + use crdt::{Crdt, ReplicatedData}; + use signature::KeyPair; + use signature::KeyPairUtil; fn get_msgs(r: PacketReceiver, num: &mut usize) { for _t in 0..5 { @@ -455,15 +507,23 @@ mod test { #[test] pub fn window_send_test() { + let pubkey_me = KeyPair::new().pubkey(); let read = UdpSocket::bind("127.0.0.1:0").expect("bind"); let addr = read.local_addr().unwrap(); let send = UdpSocket::bind("127.0.0.1:0").expect("bind"); + let serve = UdpSocket::bind("127.0.0.1:0").expect("bind"); let exit = Arc::new(AtomicBool::new(false)); - let subs = Arc::new(RwLock::new(Subscribers::new( + + let rep_data = ReplicatedData::new(pubkey_me, + read.local_addr().unwrap(), + send.local_addr().unwrap(), + serve.local_addr().unwrap()); + let subs = Arc::new(RwLock::new(Crdt::new(rep_data))); + /* Node::default(), Node::new([0; 8], 0, send.local_addr().unwrap()), - &[], - ))); + &[],*/ + let resp_recycler = BlobRecycler::default(); let (s_reader, r_reader) = channel(); let t_receiver = @@ -509,14 +569,18 @@ mod test { #[test] pub fn retransmit() { + let pubkey_me = KeyPair::new().pubkey(); let read = UdpSocket::bind("127.0.0.1:0").expect("bind"); let send = UdpSocket::bind("127.0.0.1:0").expect("bind"); + let serve = UdpSocket::bind("127.0.0.1:0").expect("bind"); let exit = Arc::new(AtomicBool::new(false)); - let subs = Arc::new(RwLock::new(Subscribers::new( - Node::default(), - Node::default(), - &[Node::new([0; 8], 1, read.local_addr().unwrap())], - ))); + + let rep_data = ReplicatedData::new(pubkey_me, + read.local_addr().unwrap(), + send.local_addr().unwrap(), + serve.local_addr().unwrap()); + let subs = Arc::new(RwLock::new(Crdt::new(rep_data))); + let (s_retransmit, r_retransmit) = channel(); let blob_recycler = BlobRecycler::default(); let saddr = send.local_addr().unwrap(); diff --git a/src/subscribers.rs b/src/subscribers.rs deleted file mode 100644 index f0b271c43960b5..00000000000000 --- a/src/subscribers.rs +++ /dev/null @@ -1,149 +0,0 @@ -//! The `subscribers` module defines data structures to keep track of nodes on the network. -//! The network is arranged in layers: -//! -//! * layer 0 - Leader. -//! * layer 1 - As many nodes as we can fit to quickly get reliable `2/3+1` finality -//! * layer 2 - Everyone else, if layer 1 is `2^10`, layer 2 should be able to fit `2^20` number of nodes. -//! -//! It's up to the external state machine to keep this updated. -use packet::Blob; -use rayon::prelude::*; -use result::{Error, Result}; -use std::net::{SocketAddr, UdpSocket}; - -use std::fmt; - -#[derive(Clone, PartialEq)] -pub struct Node { - pub id: [u64; 8], - pub weight: u64, - pub addr: SocketAddr, -} - -//sockaddr doesn't implement default -impl Default for Node { - fn default() -> Node { - Node { - id: [0; 8], - weight: 0, - addr: "0.0.0.0:0".parse().unwrap(), - } - } -} - -impl Node { - pub fn new(id: [u64; 8], weight: u64, addr: SocketAddr) -> Node { - Node { id, weight, addr } - } - fn key(&self) -> i64 { - (self.weight as i64).checked_neg().unwrap() - } -} - -impl fmt::Debug for Node { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Node {{ weight: {} addr: {} }}", self.weight, self.addr) - } -} - -pub struct Subscribers { - data: Vec, - pub me: Node, - pub leader: Node, -} - -impl Subscribers { - pub fn new(me: Node, leader: Node, network: &[Node]) -> Subscribers { - let mut h = Subscribers { - data: vec![], - me: me.clone(), - leader: leader.clone(), - }; - h.insert(&[me, leader]); - h.insert(network); - h - } - - /// retransmit messages from the leader to layer 1 nodes - pub fn retransmit(&self, blob: &mut Blob, s: &UdpSocket) -> Result<()> { - let errs: Vec<_> = self.data - .par_iter() - .map(|i| { - if self.me == *i { - return Ok(0); - } - if self.leader == *i { - return Ok(0); - } - trace!("retransmit blob to {}", i.addr); - s.send_to(&blob.data[..blob.meta.size], &i.addr) - }) - .collect(); - for e in errs { - trace!("retransmit result {:?}", e); - match e { - Err(e) => return Err(Error::IO(e)), - _ => (), - } - } - Ok(()) - } - pub fn insert(&mut self, ns: &[Node]) { - self.data.extend_from_slice(ns); - self.data.sort_by_key(Node::key); - } -} - -#[cfg(test)] -mod test { - use packet::Blob; - use rayon::prelude::*; - use std::net::UdpSocket; - use std::time::Duration; - use subscribers::{Node, Subscribers}; - - #[test] - pub fn subscriber() { - let mut me = Node::default(); - me.weight = 10; - let mut leader = Node::default(); - leader.weight = 11; - let mut s = Subscribers::new(me, leader, &[]); - assert_eq!(s.data.len(), 2); - assert_eq!(s.data[0].weight, 11); - assert_eq!(s.data[1].weight, 10); - let mut n = Node::default(); - n.weight = 12; - s.insert(&[n]); - assert_eq!(s.data.len(), 3); - assert_eq!(s.data[0].weight, 12); - } - #[test] - pub fn retransmit() { - let s1 = UdpSocket::bind("127.0.0.1:0").expect("bind"); - let s2 = UdpSocket::bind("127.0.0.1:0").expect("bind"); - let s3 = UdpSocket::bind("127.0.0.1:0").expect("bind"); - let n1 = Node::new([0; 8], 0, s1.local_addr().unwrap()); - let n2 = Node::new([0; 8], 0, s2.local_addr().unwrap()); - let mut s = Subscribers::new(n1.clone(), n2.clone(), &[]); - let n3 = Node::new([0; 8], 0, s3.local_addr().unwrap()); - s.insert(&[n3]); - let mut b = Blob::default(); - b.meta.size = 10; - let s4 = UdpSocket::bind("127.0.0.1:0").expect("bind"); - s.retransmit(&mut b, &s4).unwrap(); - let res: Vec<_> = [s1, s2, s3] - .into_par_iter() - .map(|s| { - let mut b = Blob::default(); - s.set_read_timeout(Some(Duration::new(1, 0))).unwrap(); - s.recv_from(&mut b.data).is_err() - }) - .collect(); - assert_eq!(res, [true, true, false]); - let mut n4 = Node::default(); - n4.addr = "255.255.255.255:1".parse().unwrap(); - s.insert(&[n4]); - assert!(s.retransmit(&mut b, &s4).is_err()); - } -}