Skip to content

Commit

Permalink
Add linear time implementation of append.
Browse files Browse the repository at this point in the history
  • Loading branch information
jooert committed Jul 20, 2015
1 parent 9b903bf commit 5f1a116
Show file tree
Hide file tree
Showing 3 changed files with 293 additions and 21 deletions.
89 changes: 86 additions & 3 deletions src/libcollections/btree/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,13 @@ pub struct OccupiedEntry<'a, K:'a, V:'a> {
stack: stack::SearchStack<'a, K, V, node::handle::KV, node::handle::LeafOrInternal>,
}

struct MergeIter<K, V, I: Iterator<Item=(K, V)>> {
left: I,
right: I,
left_cur: Option<(K, V)>,
right_cur: Option<(K, V)>,
}

impl<K: Ord, V> BTreeMap<K, V> {
/// Makes a new empty BTreeMap with a reasonable choice for B.
#[stable(feature = "rust1", since = "1.0.0")]
Expand Down Expand Up @@ -496,10 +503,36 @@ impl<K: Ord, V> BTreeMap<K, V> {
#[unstable(feature = "append",
reason = "recently added as part of collections reform 2")]
pub fn append(&mut self, other: &mut Self) {
let b = other.b;
for (key, value) in mem::replace(other, BTreeMap::with_b(b)) {
self.insert(key, value);
// Do we have to append anything at all?
if other.len() == 0 {
return;
}

// If the values of `b` of `self` and `other` are equal, we can just swap them if `self` is
// empty.
if self.len() == 0 && self.b == other.b {
mem::swap(self, other);
}

// First, we merge `self` and `other` into a sorted sequence in linear time.
let self_b = self.b;
let other_b = other.b;
let mut self_iter = mem::replace(self, BTreeMap::with_b(self_b)).into_iter();
let mut other_iter = mem::replace(other, BTreeMap::with_b(other_b)).into_iter();
let self_cur = self_iter.next();
let other_cur = other_iter.next();

// Second, we build a tree from the sorted sequence in linear time.
let (length, depth, root) = Node::from_sorted_iter(MergeIter {
left: self_iter,
right: other_iter,
left_cur: self_cur,
right_cur: other_cur,
}, self_b);

self.length = length;
self.depth = depth;
self.root = root.unwrap(); // `unwrap` won't panic because length can't be zero.
}

/// Splits the map into two at the given key,
Expand Down Expand Up @@ -644,6 +677,56 @@ impl<'a, K, V> IntoIterator for &'a mut BTreeMap<K, V> {
}
}

// Helper enum for MergeIter
enum MergeResult {
Left,
Right,
Both,
None,
}

impl<K: Ord, V, I: Iterator<Item=(K, V)>> Iterator for MergeIter<K, V, I> {
type Item = (K, V);

fn next(&mut self) -> Option<(K, V)> {
let res = match (&self.left_cur, &self.right_cur) {
(&Some((ref left_key, _)), &Some((ref right_key, _))) => {
match left_key.cmp(right_key) {
Ordering::Less => MergeResult::Left,
Ordering::Equal => MergeResult::Both,
Ordering::Greater => MergeResult::Right,
}
},
(&Some(_), &None) => MergeResult::Left,
(&None, &Some(_)) => MergeResult::Right,
(&None, &None) => MergeResult::None,
};

// Check which elements comes first and only advance the corresponding iterator.
// If two keys are equal, take the value from `right`.
match res {
MergeResult::Left => {
let ret = self.left_cur.take();
self.left_cur = self.left.next();
ret
},
MergeResult::Right => {
let ret = self.right_cur.take();
self.right_cur = self.right.next();
ret
},
MergeResult::Both => {
let ret = self.right_cur.take();
self.left_cur = self.left.next();
self.right_cur = self.right.next();
ret
},
MergeResult::None => None,
}
}
}


/// A helper enum useful for deciding whether to continue a loop since we can't
/// return from a closure
enum Continuation<A, B> {
Expand Down
169 changes: 169 additions & 0 deletions src/libcollections/btree/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use core::ptr::Unique;
use core::{slice, mem, ptr, cmp, raw};
use alloc::heap::{self, EMPTY};

use vec::Vec;
use borrow::Borrow;

/// Represents the result of an Insertion: either the item fit, or the node had to split
Expand Down Expand Up @@ -904,6 +905,17 @@ impl<'a, K: 'a, V: 'a, NodeType> Handle<&'a mut Node<K, V>, handle::KV, NodeType
marker: PhantomData,
}
}

/// Convert this handle into one pointing at the edge immediately to the right of the key/value
/// pair pointed-to by this handle. This is useful because it returns a reference with larger
/// lifetime than `right_edge`.
pub fn into_right_edge(self) -> Handle<&'a mut Node<K, V>, handle::Edge, NodeType> {
Handle {
node: &mut *self.node,
index: self.index + 1,
marker: PhantomData,
}
}
}

impl<'a, K: 'a, V: 'a, NodeRef: Deref<Target=Node<K, V>> + 'a, NodeType> Handle<NodeRef, handle::KV,
Expand Down Expand Up @@ -1230,6 +1242,163 @@ impl<K, V> Node<K, V> {
}
}

impl<K, V> Node<K, V> {
pub fn from_sorted_iter<I>(iter: I, b: usize) -> (usize, usize, Option<Node<K, V>>)
where I: Iterator<Item=(K, V)> {
let capacity = capacity_from_b(b);
let minimum = min_load_from_capacity(capacity);

// Holds the current level.
let mut num_level = 0;
// Needed to count the number of key-value pairs in `iter`.
let mut length = 0;
// `levels` contains the current node on every level, going from the leaves level to the
// root level.
let mut levels: Vec<Option<Node<K, V>>> = Vec::new();

// Iterate through all key-value pairs, pushing them into nodes of appropriate size at the
// right level.
for (key, value) in iter {
// Always go down to a leaf after inserting an element into an internal node.
if num_level > 0 {
num_level = 0;
}

loop {
// If we are in an internal node, extract node from the level below to insert it as
// edge on the level above; `unsafe` is needed for unchecked access.
let new_edge = unsafe {
if num_level > 0 {
levels.get_unchecked_mut(num_level - 1).take()
} else {
None
}
};

// Get current node on current level.
// If we are past the top-most level, insert a new level.
if num_level == levels.len() {
levels.push(None);
}
// If there is no node on this level, create a new node. `unsafe`
// is needed for unchecked access.
let level = unsafe { levels.get_unchecked_mut(num_level) };
if level.is_none() {
*level = if num_level == 0 {
Some(Node::new_leaf(capacity))
} else {
// `unsafe` is needed for `new_internal`.
unsafe {
Some(Node::new_internal(capacity))
}
};
}
let node = level.as_mut().unwrap();

// Insert edge from the level below; `unsafe` is needed for `push_edge`.
if let Some(edge) = new_edge {
unsafe {
node.push_edge(edge);
}
}

// If node is already full, we have to go up one level before we can insert the
// key-value pair.
if !node.is_full() {
// Insert key-value pair into node; `unsafe` is needed for `push_kv`.
unsafe {
node.push_kv(key, value);
}
break;
}
num_level += 1;
}

length += 1;
}

// Fix "right edge" of the tree.
if levels.len() > 1 {

num_level = 0;
while num_level < levels.len() - 1 {
// Extract node from this level or create a new one if there isn't any. `unsafe` is
// needed for unchecked access and `new_internal`.
let edge = unsafe {
match levels.get_unchecked_mut(num_level).take() {
Some(n) => n,
None => {
if num_level == 0 {
Node::new_leaf(capacity)
} else {
Node::new_internal(capacity)
}
},
}
};

// Go to the level above.
num_level += 1;

// Get node on this level, create one if there isn't any; `unsafe` is needed for
// unchecked access.
let level = unsafe { levels.get_unchecked_mut(num_level) };
if level.is_none() {
// `unsafe` is needed for `new_internal`.
unsafe {
*level = Some(Node::new_internal(capacity));
}
}
let mut node = level.as_mut().unwrap();

// Insert `edge` as new edge in `node`; `unsafe` is needed for `push_edge`.
unsafe {
node.push_edge(edge);
}
}

// Start at the root and steal to fix underfull nodes on the "right edge" of the tree.
let root_index = levels.len() - 1;
let mut node = unsafe { levels.get_unchecked_mut(root_index).as_mut().unwrap() };

loop {
let mut temp_node = node;
let index = temp_node.len() - 1;
let mut handle = match temp_node.kv_handle(index).force() {
ForceResult::Internal(h) => h,
ForceResult::Leaf(_) => break,
};

// Check if we need to steal, i.e. is the length of the right edge less than
// `minimum`?
let right_len = handle.right_edge().node().len();
if right_len < minimum {
// Steal!
let num_steals = minimum - right_len;
for _ in 0..num_steals {
// `unsafe` is needed for stealing.
unsafe {
handle.steal_rightward();
}
}
}

// Go down the right edge.
node = handle.into_right_edge().into_edge_mut();
}
}

// Get root node from `levels`.
let root = match levels.pop() {
Some(option) => option,
_ => None,
};

// Return (length, depth, root_node).
(length, levels.len(), root)
}
}

// Private implementation details
impl<K, V> Node<K, V> {
/// Node is full, so split it into two nodes, and yield the middle-most key-value pair
Expand Down
56 changes: 38 additions & 18 deletions src/libcollectionstest/btree/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,30 +294,50 @@ fn test_extend_ref() {
assert_eq!(a[&3], "three");
}

#[test]
fn test_append() {
let mut a = BTreeMap::new();
a.insert(1, "a");
a.insert(2, "b");
a.insert(3, "c");
macro_rules! create_append_test {
($name:ident, $len:expr) => {
#[test]
fn $name() {
let mut a = BTreeMap::with_b(6);
for i in 0..8 {
a.insert(i, i);
}

let mut b = BTreeMap::new();
b.insert(3, "d"); // Overwrite element from a
b.insert(4, "e");
b.insert(5, "f");
let mut b = BTreeMap::with_b(6);
for i in 5..$len {
b.insert(i, 2*i);
}

a.append(&mut b);
a.append(&mut b);

assert_eq!(a.len(), 5);
assert_eq!(b.len(), 0);
assert_eq!(a.len(), $len);
assert_eq!(b.len(), 0);

assert_eq!(a[&1], "a");
assert_eq!(a[&2], "b");
assert_eq!(a[&3], "d");
assert_eq!(a[&4], "e");
assert_eq!(a[&5], "f");
for i in 0..$len {
if i < 5 {
assert_eq!(a[&i], i);
} else {
assert_eq!(a[&i], 2*i);
}
}
}
};
}

// These are mostly for testing the algorithm that "fixes" the right edge after insertion.
// Single node.
create_append_test!(test_append_9, 9);
// Two leafs that don't need fixing.
create_append_test!(test_append_17, 17);
// Two leafs where the second one ends up underfull and needs stealing at the end.
create_append_test!(test_append_14, 14);
// Two leafs where the first one isn't full; finish insertion at root.
create_append_test!(test_append_12, 12);
// Three levels; finish insertion at root.
create_append_test!(test_append_144, 144);
// Three levels; finish insertion at leaf without a node on the second level.
create_append_test!(test_append_145, 145);

#[test]
fn test_split_off() {
// Split empty map
Expand Down

0 comments on commit 5f1a116

Please sign in to comment.