diff --git a/Cargo.toml b/Cargo.toml index 50ee7f1..5ddaad7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,14 +1,10 @@ [package] name = "smallstring" -version = "0.1.2" +version = "0.2.0" authors = ["Jack Fransham "] repository = "https://github.com/jFransham/smallstring" description = "'Small string' optimization: store small strings on the stack using smallvec" license = "MIT" -[features] -default = [] -as-mut = [] - [dependencies] -smallvec = "0.3" \ No newline at end of file +smallvec = "0.6" diff --git a/src/lib.rs b/src/lib.rs index 4edb7dd..203a4f8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,190 +1,289 @@ -#![cfg_attr(feature="as-mut", feature(str_mut_extras))] - extern crate smallvec; use std::str; use std::ffi::OsStr; -use std::ops::Deref; +use std::ops::{Deref, DerefMut}; use std::borrow::Borrow; use std::iter::{FromIterator, IntoIterator}; use smallvec::{Array, SmallVec}; -// TODO: FromIterator without having to allocate a String #[derive(Clone, Default)] -pub struct SmallString = [u8; 8]> { +pub struct SmallString> { buffer: SmallVec, } -impl> std::hash::Hash for SmallString { +impl<'a, B: Array> SmallString { + /// Construct an empty string. + pub fn new() -> Self { + SmallString { + buffer: SmallVec::new(), + } + } + + /// Constructs an empty string with enough capacity pre-allocated to store + /// at least `n` bytes worth of characters. + /// + /// Will create a heap allocation if and only if `n` is larger than the + /// inline capacity. + pub fn with_capacity(n: usize) -> Self { + SmallString { + buffer: SmallVec::with_capacity(n), + } + } + + /// Constructs a new `SmallString` from a `String` without copying elements. + pub fn from_string(string: String) -> Self { + SmallString { + buffer: SmallVec::from_vec(string.into()), + } + } + + /// The maximum number of bytes this string can hold inline. + pub fn inline_size(&self) -> usize { + self.buffer.inline_size() + } + + /// The length of this string in bytes. + pub fn len(&self) -> usize { + self.buffer.len() + } + + /// Returns `true` if the string is empty. + pub fn is_empty(&self) -> bool { + self.buffer.is_empty() + } + + /// The maximum number of bytes this string can hold without reallocating. + pub fn capacity(&self) -> usize { + self.buffer.capacity() + } + + /// Returns `true` if the string has spilled into a heap-allocated buffer. + pub fn spilled(&self) -> bool { + self.buffer.spilled() + } + + /// Appends the given `char` to the end of this string. + pub fn push(&mut self, ch: char) { + match ch.len_utf8() { + 1 => self.buffer.push(ch as u8), + _ => self.buffer + .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()), + } + } + + /// Removes the last character from the string buffer and returns it. + /// + /// Returns `None` if this string is empty. + pub fn pop(&mut self) -> Option { + // copied from String::pop implementation. + let ch = match self.chars().rev().next() { + Some(ch) => ch, + None => return None, + }; + + let new_len = self.len() - ch.len_utf8(); + + // self.buffer.set_len might be more efficient, but this *should* + // compile down to the same thing, and it is more safe in case + // SmallVec::set_len's implementation changes. + self.buffer.truncate(new_len); + + Some(ch) + } + + /// Appends a given string slice onto the end of this string. + pub fn push_str(&mut self, string: &str) { + self.buffer.extend_from_slice(string.as_bytes()) + } + + /// Reserve capacity for `additional` bytes to be inserted. + /// + /// May reserve more space to avoid frequent reallocations. + /// + /// If the new capacity would overflow `usize` then it will be set to + /// `usize::max_value()` instead. (This means that inserting additional new + /// elements is not guaranteed to be possible after calling this function.) + pub fn reserve(&mut self, additional: usize) { + self.buffer.reserve(additional) + } + + /// Reserve the minimum capacity for `additional` more bytes to be inserted. + /// + /// Panics if new capacity overflows `usize`. + pub fn reserve_exact(&mut self, additional: usize) { + self.buffer.reserve_exact(additional) + } + + /// Shrink the capacity of this `String` to match its length. + /// + /// When possible, this will move data from an external heap buffer to the + /// string's inline storage. + pub fn shrink_to_fit(&mut self) { + self.buffer.shrink_to_fit() + } + + /// Shortens this `String` to the specified length. + /// + /// If `new_len > len()`, this has no effect. + /// + /// Note that this method has no effect on the allocated capacity of the string + /// + /// # Panics + /// + /// Panics if `new_len` does not lie on a `char` boundary. + pub fn truncate(&mut self, new_len: usize) { + if new_len < self.len() { + assert!(self.is_char_boundary(new_len)); + self.buffer.truncate(new_len); + } + } + + /// Removes all text from the string. + pub fn clear(&mut self) { + self.buffer.clear() + } +} + +impl> std::hash::Hash for SmallString { fn hash(&self, state: &mut H) { let s: &str = self; s.hash(state) } } -impl> std::cmp::PartialEq for SmallString { +impl> std::cmp::PartialEq for SmallString { fn eq(&self, other: &Self) -> bool { let (s1, s2): (&str, &str) = (self, other); s1 == s2 } } -impl> std::cmp::Eq for SmallString {} +impl> std::cmp::Eq for SmallString {} -impl<'a, B: Array> PartialEq> for &'a str { +impl<'a, B: Array> PartialEq> for &'a str { fn eq(&self, other: &SmallString) -> bool { *self == (other as &str) } } -impl> std::fmt::Display for SmallString { +impl> std::fmt::Display for SmallString { fn fmt(&self, fm: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { let s: &str = SmallString::deref(self); s.fmt(fm) } } -impl> std::fmt::Debug for SmallString { +impl> std::fmt::Debug for SmallString { fn fmt(&self, fm: &mut std::fmt::Formatter) -> std::fmt::Result { let s: &str = SmallString::deref(self); s.fmt(fm) } } -impl> SmallString { - pub fn from_str(s: &str) -> Self { - SmallString { - buffer: s.as_bytes().into_iter() - .cloned() - .collect(), - } - } -} - -impl<'a, B: Array> From<&'a str> for SmallString { - fn from(s: &str) -> Self { - Self::from_str(s) - } -} - -impl> Deref for SmallString { +impl> Deref for SmallString { type Target = str; fn deref(&self) -> &str { // We only allow `buffer` to be created from an existing valid string, // so this is safe. - unsafe { - str::from_utf8_unchecked(self.buffer.as_ref()) - } + unsafe { str::from_utf8_unchecked(self.buffer.as_ref()) } } } -impl AsRef for SmallString { - fn as_ref(&self) -> &str { +impl> DerefMut for SmallString { + fn deref_mut(&mut self) -> &mut str { // We only allow `buffer` to be created from an existing valid string, // so this is safe. unsafe { - str::from_utf8_unchecked(self.buffer.as_ref()) + // we would use this method, but it's Rust 1.20+ only. + // str::from_utf8_unchecked_mut(self.buffer.as_mut()) + // Instead, let's do what String::deref_mut() did before + // this method existed: + // https://doc.rust-lang.org/1.3.0/src/collections/string.rs.html#1023-1027 + std::mem::transmute::<&mut [u8], &mut str>(&mut self.buffer[..]) } } } -struct Utf8Iterator(I, Option>); - -impl> Utf8Iterator { - pub fn new>(into: In) -> Self { - Utf8Iterator(into.into_iter(), None) +impl> AsRef for SmallString { + fn as_ref(&self) -> &str { + self // forward to Deref } } -impl> Iterator for Utf8Iterator { - type Item = u8; +impl> AsMut for SmallString { + fn as_mut(&mut self) -> &mut str { + self // forward to DerefMut + } +} - fn next(&mut self) -> Option { - if let Some(mut into) = self.1.take() { - if let Some(n) = into.next() { - self.1 = Some(into); - return Some(n); - } +impl> Extend for SmallString { + fn extend>(&mut self, iter: T) { + let iterator = iter.into_iter(); + let (lower_bound, _) = iterator.size_hint(); + self.reserve(lower_bound); + for ch in iterator { + self.push(ch); } - - let out = self.0.next(); - - out.and_then(|chr| { - let mut dest = [0u8; 4]; - let outstr = chr.encode_utf8(&mut dest); - - self.1 = Some( - outstr.as_bytes() - .into_iter() - .cloned() - .collect::>() - .into_iter() - ); - - self.1.as_mut().and_then(|i| i.next()) - }) } +} - fn size_hint(&self) -> (usize, Option) { - let hint = self.0.size_hint(); - - (hint.0, hint.1.map(|x| x * 4)) +impl<'a, B: Array> Extend<&'a str> for SmallString { + fn extend>(&mut self, iter: I) { + for s in iter { + self.push_str(s); + } } } -impl FromIterator for SmallString { - fn from_iter>(into_iter: T) -> Self { - // We're a shell so we mostly work with ASCII data - optimise for this - // case since we have to optimise for _some_ fixed size of char. - let utf8 = Utf8Iterator::new(into_iter); - - SmallString { - buffer: utf8.collect(), - } +impl> FromIterator for SmallString { + fn from_iter>(iter: T) -> Self { + let mut buf = SmallString::new(); + buf.extend(iter); + buf } } -#[cfg(feature="as-mut")] -impl AsMut for SmallString { - fn as_mut(&mut self) -> &mut str { - // We only allow `buffer` to be created from an existing valid string, - // so this is safe. - unsafe { - str::from_utf8_unchecked_mut(self.buffer.as_mut()) - } +impl<'a, B: Array> FromIterator<&'a str> for SmallString { + fn from_iter>(iter: I) -> Self { + let mut buf = SmallString::new(); + buf.extend(iter); + buf } } -impl AsRef for SmallString { +impl> AsRef for SmallString { fn as_ref(&self) -> &OsStr { let s: &str = self.as_ref(); s.as_ref() } } -impl Borrow for SmallString { +impl> Borrow for SmallString { fn borrow(&self) -> &str { - // We only allow `buffer` to be created from an existing valid string, - // so this is safe. - unsafe { - str::from_utf8_unchecked(self.buffer.as_ref()) + &self + } +} + +impl<'a, B: Array> From<&'a str> for SmallString { + fn from(s: &str) -> Self { + SmallString { + buffer: SmallVec::from_slice(s.as_bytes()), } } } -impl From for SmallString { - fn from(s: String) -> SmallString { +impl> From for SmallString { + fn from(s: String) -> Self { SmallString { buffer: SmallVec::from_vec(s.into_bytes()), } } } -impl From for String { - fn from(s: SmallString) -> String { - unsafe { - String::from_utf8_unchecked(s.buffer.into_vec()) - } +impl> From> for String { + fn from(s: SmallString) -> String { + unsafe { String::from_utf8_unchecked(s.buffer.into_vec()) } } }