Skip to content

Commit

Permalink
Merge #175 #176
Browse files Browse the repository at this point in the history
175: Split `Frame::new` into two. r=ptersilie a=ltratt

This function is one of the most frequently called in yksom, so the fact that it serves two different masters (methods and blocks) is less than ideal (at the very least it introduces a branch). This commit splits the function into two, one for methods and one for blocks.

To some extent this is a bit pointless in that the major overhead of `Frame::new` is the malloc of a `Vec`, but one day that cost will be much less, so this change seems like a useful one, if perhaps a bit early!

This can be reviewed but won't be mergeable until softdevteam/libgc#19 is merged.

176: Lazily store the number of Unicode characters a string contains. r=ptersilie a=ltratt

Calculating the number of Unicode characters in a string is an O(n) operation. Previously everytime `String_::length` was called we iterated over the underlying string to discover how many Unicode characters it contains.

This commit adds a field to SOM strings that stores its number of Unicode characters. Since not all strings will be queried for this property, we lazily cache this property the first time that `length` is called on a `String_`. We use `usize::max` as our marker as that means the marker is "safe" in all cases.

On the JSON benchmark, with GC off, this speeds things up by about 80%.

Needs softdevteam/libgc#19 to be merged first.

Co-authored-by: Laurence Tratt <[email protected]>
  • Loading branch information
bors[bot] and ltratt authored Aug 10, 2020
3 parents 6df91b7 + 0df4f31 + 918d735 commit 0f58312
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 36 deletions.
66 changes: 34 additions & 32 deletions src/lib/vm/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ impl VM {
for a in args {
self.stack.push(a);
}
let frame = Frame::new(self, None, rcv, None, num_vars, meth.num_params());
let frame = Frame::new_method(self, rcv, num_vars, meth.num_params());
self.frames.push(frame);
let r = self.exec_user(rcv, meth, bytecode_off);
self.frame_pop();
Expand All @@ -365,7 +365,7 @@ impl VM {
if self.stack.remaining_capacity() < max_stack {
panic!("Not enough stack space to execute method.");
}
let nframe = Frame::new(self, None, rcv, None, num_vars, method.num_params());
let nframe = Frame::new_method(self, rcv, num_vars, method.num_params());
self.frames.push(nframe);
let r = self.exec_user(rcv, method, bytecode_off);
self.frame_pop();
Expand Down Expand Up @@ -1061,14 +1061,8 @@ impl VM {
if self.stack.remaining_capacity() < max_stack {
panic!("Not enough stack space to execute block.");
}
let frame = Frame::new(
self,
Some(rcv),
rcv,
Some(rcv_blk.parent_closure),
num_vars,
nargs as usize,
);
let frame =
Frame::new_block(self, rcv, rcv_blk.parent_closure, num_vars, nargs as usize);
self.frames.push(frame);
let r = self.exec_user(rcv_blk.inst, rcv_blk.method, bytecode_off);
self.frame_pop();
Expand Down Expand Up @@ -1331,38 +1325,46 @@ pub struct Frame {
}

impl Frame {
fn new(
fn new_block(
vm: &mut VM,
block: Option<Val>,
self_val: Val,
parent_closure: Option<Gc<Closure>>,
block: Val,
parent_closure: Gc<Closure>,
num_vars: usize,
num_args: usize,
) -> Self {
let mut vars = Vec::with_capacity(num_vars);
vars.resize_with(num_vars, Val::illegal);

if block.is_none() {
vars[0] = self_val;
for i in 0..num_args {
vars[num_args - i] = vm.stack.pop();
}
for v in vars.iter_mut().skip(num_args + 1).take(num_vars) {
*v = vm.nil;
}
} else {
for i in 0..num_args {
vars[num_args - i - 1] = vm.stack.pop();
}
for v in vars.iter_mut().skip(num_args).take(num_vars) {
*v = vm.nil;
}
for i in 0..num_args {
vars[num_args - i - 1] = vm.stack.pop();
}
for v in vars.iter_mut().skip(num_args).take(num_vars) {
*v = vm.nil;
}

Frame {
sp: 0,
block: Some(block),
closure: Gc::new(Closure::new(Some(parent_closure), vars)),
}
}

fn new_method(vm: &mut VM, self_val: Val, num_vars: usize, num_args: usize) -> Self {
let mut vars = Vec::with_capacity(num_vars);
vars.resize_with(num_vars, Val::illegal);

vars[0] = self_val;
for i in 0..num_args {
vars[num_args - i] = vm.stack.pop();
}
for v in vars.iter_mut().skip(num_args + 1).take(num_vars) {
*v = vm.nil;
}

Frame {
sp: 0,
block,
closure: Gc::new(Closure::new(parent_closure, vars)),
block: None,
closure: Gc::new(Closure::new(None, vars)),
}
}

Expand Down Expand Up @@ -1483,7 +1485,7 @@ mod tests {
vm.stack.push(v);
let v = Val::from_isize(&mut vm, 44);
vm.stack.push(v);
let f = Frame::new(&mut vm, None, selfv, None, 3, 2);
let f = Frame::new_method(&mut vm, selfv, 3, 2);
assert_eq!(f.var_lookup(0, 0).as_isize(&mut vm).unwrap(), 42);
assert_eq!(f.var_lookup(0, 1).as_isize(&mut vm).unwrap(), 43);
assert_eq!(f.var_lookup(0, 2).as_isize(&mut vm).unwrap(), 44);
Expand Down
36 changes: 32 additions & 4 deletions src/lib/vm/objects/string_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ use crate::vm::{
#[derive(Debug)]
pub struct String_ {
cls: Cell<Val>,
/// The number of Unicode characters in this string. This is initialised lazily: usize::MAX is
/// the marker that means both `this string contains usize::MAX characters` and `we don't know
/// how many characters this string has`. The former condition is rather unlikely, so we accept
/// that if anyone ever manages to make a string of usize::MAX characters, we won't cache its
/// length correctly, but will recalculate it each time.
chars_len: Cell<usize>,
s: SmartString,
}

Expand All @@ -42,6 +48,7 @@ impl Obj for String_ {
vm,
String_ {
cls: self.cls.clone(),
chars_len: Cell::new(usize::MAX),
s: self.s.clone(),
},
))
Expand All @@ -54,7 +61,12 @@ impl Obj for String_ {
}

fn length(self: Gc<Self>) -> usize {
self.s.chars().count()
if self.chars_len.get() < usize::MAX {
return self.chars_len.get();
}
let chars_len = self.s.chars().count();
self.chars_len.set(chars_len);
chars_len
}

fn ref_equals(self: Gc<Self>, vm: &mut VM, other: Val) -> Result<Val, Box<VMError>> {
Expand All @@ -80,10 +92,17 @@ impl StaticObjType for String_ {

impl String_ {
pub fn new_str(vm: &mut VM, s: SmartString) -> Val {
String_::new_str_chars_len(vm, s, usize::MAX)
}

/// Create a new `String_` whose number of Unicode characters is already known. Note that it is
/// safe to pass `usize::MAX` for `chars_len`.
fn new_str_chars_len(vm: &mut VM, s: SmartString, chars_len: usize) -> Val {
Val::from_obj(
vm,
String_ {
cls: Cell::new(vm.str_cls),
chars_len: Cell::new(chars_len),
s,
},
)
Expand All @@ -94,6 +113,7 @@ impl String_ {
vm,
String_ {
cls: Cell::new(vm.sym_cls),
chars_len: Cell::new(usize::MAX),
s,
},
)
Expand Down Expand Up @@ -136,7 +156,11 @@ impl String_ {
let mut new = String::new();
new.push_str(&self.s);
new.push_str(&other_str.s);
Ok(String_::new_str(vm, new.into()))
let chars_len = self
.chars_len
.get()
.saturating_add(other_str.chars_len.get());
Ok(String_::new_str_chars_len(vm, new.into(), chars_len))
}

pub fn substring(&self, vm: &mut VM, start: usize, end: usize) -> Result<Val, Box<VMError>> {
Expand All @@ -156,11 +180,15 @@ impl String_ {
.take(end - start + 1)
.collect::<String>()
.into();
Ok(String_::new_str(vm, substr))
Ok(String_::new_str_chars_len(vm, substr, end - start))
}

pub fn to_string_(&self, vm: &mut VM) -> Result<Val, Box<VMError>> {
Ok(String_::new_str(vm, self.s.clone()))
Ok(String_::new_str_chars_len(
vm,
self.s.clone(),
self.chars_len.get(),
))
}

pub fn to_symbol(&self, vm: &mut VM) -> Result<Val, Box<VMError>> {
Expand Down

0 comments on commit 0f58312

Please sign in to comment.