diff --git a/src/de.rs b/src/de.rs index 19b2a70f..bfa3e6d6 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1,24 +1,23 @@ use std::borrow::Cow; -use std::char::{decode_utf16, REPLACEMENT_CHARACTER}; use std::error::Error as StdError; use std::fmt; -use std::str::FromStr; +use std::str::Utf8Error; +use std::string::FromUtf8Error; + +use parse::Bytes; -use pom::{DataInput, Input}; -use pom::char_class; -use pom::parser::*; use serde::de::{self, Deserializer as Deserializer_, DeserializeSeed, Visitor}; -type Result = ::std::result::Result; +pub type Result = ::std::result::Result; #[derive(Clone, Debug, PartialEq)] pub enum Error { Eof, Syntax, ExpectedArray, - ExpectedArrayComma, ExpectedArrayEnd, ExpectedBoolean, + ExpectedComma, ExpectedEnum, ExpectedChar, ExpectedFloat, @@ -27,7 +26,6 @@ pub enum Error { ExpectedOptionEnd, ExpectedMap, ExpectedMapColon, - ExpectedMapComma, ExpectedMapEnd, ExpectedStruct, ExpectedStructEnd, @@ -40,6 +38,7 @@ pub enum Error { /// A custom error emitted by the deserializer. Message(String), + Utf8Error(Utf8Error), TrailingCharacters, } @@ -58,6 +57,18 @@ impl de::Error for Error { } } +impl From for Error { + fn from(e: Utf8Error) -> Self { + Error::Utf8Error(e) + } +} + +impl From for Error { + fn from(e: FromUtf8Error) -> Self { + Error::Utf8Error(e.utf8_error()) + } +} + impl StdError for Error { fn description(&self) -> &str { match *self { @@ -68,17 +79,24 @@ impl StdError for Error { } pub struct Deserializer<'de> { - input: DataInput<'de, u8>, + bytes: Bytes<'de>, } impl<'de> Deserializer<'de> { pub fn from_str(input: &'de str) -> Self { Deserializer { - input: DataInput::new(input.as_bytes()), + bytes: Bytes::new(input.as_bytes()), + } + } + + pub fn from_bytes(input: &'de [u8]) -> Self { + Deserializer { + bytes: Bytes::new(input), } } + pub fn remainder(&self) -> Cow { - String::from_utf8_lossy(&self.input.data[self.input.position..]) + String::from_utf8_lossy(&self.bytes.bytes()) } } @@ -87,65 +105,26 @@ pub fn from_str<'a, T>(s: &'a str) -> Result { let mut deserializer = Deserializer::from_str(s); let t = T::deserialize(&mut deserializer)?; - if deserializer.input.position == deserializer.input.data.len() { - Ok(t) - } else { - Err(Error::TrailingCharacters) - } -} - -impl<'de> Deserializer<'de> { - fn parse_unsigned(&mut self) -> Result - where T: 'static + FromStr, T::Err: fmt::Debug - { - let parser = one_of(b"0123456789").repeat(1..); - parser.convert(|bytes| String::from_utf8(bytes)) - .convert(|string| FromStr::from_str(&string)) - .parse(&mut self.input) - .map_err(|_| Error::ExpectedInteger) - } - fn parse_signed(&mut self) -> Result - where T: 'static + FromStr, T::Err: fmt::Debug - { - let parser = one_of(b"+-").opt() + - one_of(b"0123456789").repeat(1..); - parser.collect() - .convert(|bytes| String::from_utf8(bytes)) - .convert(|string| FromStr::from_str(&string)) - .parse(&mut self.input) - .map_err(|_| Error::ExpectedInteger) - } + deserializer.end()?; - fn parse_float(&mut self) -> Result - where T: 'static + FromStr, T::Err: fmt::Debug - { - let integer = one_of(b"123456789") - one_of(b"0123456789").repeat(0..) | sym(b'0'); - let frac = sym(b'.') + one_of(b"0123456789").repeat(1..); - let exp = one_of(b"eE") + one_of(b"+-").opt() + one_of(b"0123456789").repeat(1..); - let parser = sym(b'-').opt() + integer + frac.opt() + exp.opt(); + Ok(t) +} - parser.collect() - .convert(|bytes| String::from_utf8(bytes)) - .convert(|string| FromStr::from_str(&string)) - .parse(&mut self.input) - .map_err(|_| Error::ExpectedFloat) - } +impl<'de> Deserializer<'de> { + /// Check if the remaining bytes are whitespace only, + /// otherwise return an error. + pub fn end(&mut self) -> Result<()> { + self.bytes.skip_ws(); - fn consume(&mut self, what: &'static str) -> Result<()> { - let parser = seq(what.as_bytes()).discard(); - parser.parse(&mut self.input) - .map_err(|_| Error::Syntax) + if self.bytes.bytes().is_empty() { + Ok(()) + } else { + Err(Error::TrailingCharacters) + } } } -fn space<'a>() -> Parser<'a, u8, ()> { - one_of(b" \t\r\n").repeat(0..).discard() -} -fn comma<'a>() -> Parser<'a, u8, u8> { - space() * sym(b',') - space() -} - impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { type Error = Error; @@ -158,115 +137,83 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { fn deserialize_bool(self, visitor: V) -> Result where V: Visitor<'de> { - match seq(b"true").parse(&mut self.input) { - Ok(_) => visitor.visit_bool(true), - Err(_) => match seq(b"false").parse(&mut self.input) { - Ok(_) => visitor.visit_bool(false), - Err(_) => Err(Error::ExpectedBoolean) - } - } + visitor.visit_bool(self.bytes.bool()?) } fn deserialize_i8(self, visitor: V) -> Result where V: Visitor<'de> { - visitor.visit_i8(self.parse_signed()?) + visitor.visit_i8(self.bytes.signed_integer()?) } fn deserialize_i16(self, visitor: V) -> Result where V: Visitor<'de> { - visitor.visit_i16(self.parse_signed()?) + visitor.visit_i8(self.bytes.signed_integer()?) } fn deserialize_i32(self, visitor: V) -> Result where V: Visitor<'de> { - visitor.visit_i32(self.parse_signed()?) + visitor.visit_i32(self.bytes.signed_integer()?) } fn deserialize_i64(self, visitor: V) -> Result where V: Visitor<'de> { - visitor.visit_i64(self.parse_signed()?) + visitor.visit_i64(self.bytes.signed_integer()?) } fn deserialize_u8(self, visitor: V) -> Result where V: Visitor<'de> { - visitor.visit_u8(self.parse_unsigned()?) + visitor.visit_u8(self.bytes.unsigned_integer()?) } fn deserialize_u16(self, visitor: V) -> Result where V: Visitor<'de> { - visitor.visit_u16(self.parse_unsigned()?) + visitor.visit_u16(self.bytes.unsigned_integer()?) } fn deserialize_u32(self, visitor: V) -> Result where V: Visitor<'de> { - visitor.visit_u32(self.parse_unsigned()?) + visitor.visit_u32(self.bytes.unsigned_integer()?) } fn deserialize_u64(self, visitor: V) -> Result where V: Visitor<'de> { - visitor.visit_u64(self.parse_unsigned()?) + visitor.visit_u64(self.bytes.unsigned_integer()?) } fn deserialize_f32(self, visitor: V) -> Result where V: Visitor<'de> { - visitor.visit_f32(self.parse_float()?) + visitor.visit_f32(self.bytes.float()?) } fn deserialize_f64(self, visitor: V) -> Result where V: Visitor<'de> { - visitor.visit_f64(self.parse_float()?) + visitor.visit_f64(self.bytes.float()?) } fn deserialize_char(self, visitor: V) -> Result where V: Visitor<'de> { - let parser = sym(b'\'') * take(1); - match parser.parse(&mut self.input) { - Ok(c) => { - let rv = if c[0] == b'\\' { - match take(1).parse(&mut self.input) { - Ok(ref c) if c[0] == b'\'' => visitor.visit_char('\''), - Ok(ref c) if c[0] == b'\\' => visitor.visit_char('\\'), - Ok(_) => Err(Error::InvalidEscape), - Err(_) => Err(Error::InvalidEscape), - } - } else { - visitor.visit_char(c[0] as char) - }; - - sym(b'\'').parse(&mut self.input).map_err(|_| Error::ExpectedChar)?; - - rv - }, - Err(_) => Err(Error::ExpectedChar) - } + visitor.visit_char(self.bytes.char()?) } fn deserialize_str(self, visitor: V) -> Result where V: Visitor<'de> { - let special_char = sym(b'\\') | sym(b'/') | sym(b'"') - | sym(b'b').map(|_|b'\x08') | sym(b'f').map(|_|b'\x0C') - | sym(b'n').map(|_|b'\n') | sym(b'r').map(|_|b'\r') | sym(b't').map(|_|b'\t'); - let escape_sequence = sym(b'\\') * special_char; - let char_string = (none_of(b"\\\"") | escape_sequence).repeat(0..).convert(String::from_utf8); - let utf16_char = seq(b"\\u") * is_a(char_class::hex_digit).repeat(4).convert(String::from_utf8).convert(|digits|u16::from_str_radix(&digits, 16)); - let utf16_string = utf16_char.repeat(0..).map(|chars| decode_utf16(chars).map(|r| r.unwrap_or(REPLACEMENT_CHARACTER)).collect::()); - let parser = sym(b'"') * (char_string | utf16_string) - sym(b'"'); - - match parser.parse(&mut self.input) { - Ok(string) => visitor.visit_string(string), - Err(_) => Err(Error::ExpectedString) + use parse::ParsedStr; + + match self.bytes.string()? { + ParsedStr::Allocated(s) => visitor.visit_string(s), + ParsedStr::Slice(s) => visitor.visit_str(s), } } @@ -276,34 +223,34 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { self.deserialize_str(visitor) } - // The `Serializer` implementation on the previous page serialized byte - // arrays as JSON arrays of bytes. Handle that representation here. - fn deserialize_bytes(self, _visitor: V) -> Result + fn deserialize_bytes(self, visitor: V) -> Result where V: Visitor<'de> { - unimplemented!() + self.deserialize_seq(visitor) } - fn deserialize_byte_buf(self, _visitor: V) -> Result + fn deserialize_byte_buf(self, visitor: V) -> Result where V: Visitor<'de> { - unimplemented!() + self.deserialize_seq(visitor) } fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de> { - match seq(b"None").discard().parse(&mut self.input) { - Ok(_) => visitor.visit_none(), - Err(_) => match (seq(b"Some(") - space()).discard().parse(&mut self.input) { - Ok(_) => { - let value = visitor.visit_some(&mut *self)?; - self.consume(")") - .map(|_| value) - .map_err(|_| Error::ExpectedOptionEnd) - }, - Err(_) => Err(Error::ExpectedOption), + if self.bytes.consume("Some(") { + let v = visitor.visit_some(&mut *self)?; + + if self.bytes.consume(")") { + Ok(v) + } else { + Err(Error::ExpectedOptionEnd) } + + } else if self.bytes.consume("None") { + visitor.visit_none() + } else { + Err(Error::ExpectedOption) } } @@ -311,9 +258,10 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de> { - match self.consume("()") { - Ok(_) => visitor.visit_unit(), - Err(_) => Err(Error::ExpectedUnit), + if self.bytes.consume("()") { + visitor.visit_unit() + } else { + Err(Error::ExpectedUnit) } } @@ -324,7 +272,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { ) -> Result where V: Visitor<'de> { - if self.consume(name).is_ok() { + if self.bytes.consume(name) { visitor.visit_unit() } else { self.deserialize_unit(visitor) @@ -338,31 +286,36 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { ) -> Result where V: Visitor<'de> { - let _ = self.consume(name); - match self.consume("(") { - Ok(_) => { - let value = visitor.visit_newtype_struct(&mut *self)?; - let _ = comma().parse(&mut self.input); - self.consume(")") - .map(|_| value) - .map_err(|_| Error::ExpectedStructEnd) - }, - Err(_) => Err(Error::ExpectedStruct), + self.bytes.consume(name); + + if self.bytes.consume("(") { + let value = visitor.visit_newtype_struct(&mut *self)?; + self.bytes.comma(); + + if self.bytes.consume(")") { + Ok(value) + } else { + Err(Error::ExpectedStructEnd) + } + } else { + Err(Error::ExpectedStruct) } } fn deserialize_seq(mut self, visitor: V) -> Result where V: Visitor<'de> { - match self.consume("[") { - Ok(_) => { - let value = visitor.visit_seq(CommaSeparated::new(b']', &mut self))?; - let _ = comma().parse(&mut self.input); - self.consume("]") - .map(|_| value) - .map_err(|_| Error::ExpectedArrayEnd) - }, - Err(_) => Err(Error::ExpectedArray) + if self.bytes.consume("[") { + let value = visitor.visit_seq(CommaSeparated::new(b']', &mut self))?; + self.bytes.comma(); + + if self.bytes.consume("]") { + Ok(value) + } else { + Err(Error::ExpectedArrayEnd) + } + } else { + Err(Error::ExpectedArray) } } @@ -379,15 +332,17 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { ) -> Result where V: Visitor<'de> { - match self.consume("(") { - Ok(_) => { - let value = visitor.visit_seq(CommaSeparated::new(b')', &mut self))?; - let _ = comma().parse(&mut self.input); - self.consume(")") - .map(|_| value) - .map_err(|_| Error::ExpectedArrayEnd) - }, - Err(_) => Err(Error::ExpectedArray) + if self.bytes.consume("(") { + let value = visitor.visit_seq(CommaSeparated::new(b')', &mut self))?; + self.bytes.comma(); + + if self.bytes.consume(")") { + Ok(value) + } else { + Err(Error::ExpectedArrayEnd) + } + } else { + Err(Error::ExpectedArray) } } @@ -399,22 +354,24 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { ) -> Result where V: Visitor<'de> { - let _ = self.consume(name); + self.bytes.consume(name); self.deserialize_tuple(len, visitor) } fn deserialize_map(mut self, visitor: V) -> Result where V: Visitor<'de> { - match self.consume("{") { - Ok(_) => { - let value = visitor.visit_map(CommaSeparated::new(b'}', &mut self))?; - let _ = comma().parse(&mut self.input); - self.consume("}") - .map(|_| value) - .map_err(|_| Error::ExpectedMapEnd) - }, - Err(_) => Err(Error::ExpectedMap) + if self.bytes.consume("{") { + let value = visitor.visit_map(CommaSeparated::new(b'}', &mut self))?; + self.bytes.comma(); + + if self.bytes.consume("}") { + Ok(value) + } else { + Err(Error::ExpectedMapEnd) + } + } else { + Err(Error::ExpectedMap) } } @@ -426,17 +383,19 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { ) -> Result where V: Visitor<'de> { - let _ = self.consume(name); - - match self.consume("(") { - Ok(_) => { - let value = visitor.visit_map(CommaSeparated::new(b')', &mut self))?; - let _ = comma().parse(&mut self.input); - self.consume(")") - .map(|_| value) - .map_err(|_| Error::ExpectedStructEnd) - }, - Err(_) => Err(Error::ExpectedStruct) + self.bytes.consume(name); + + if self.bytes.consume("(") { + let value = visitor.visit_map(CommaSeparated::new(b')', &mut self))?; + self.bytes.comma(); + + if self.bytes.consume(")") { + Ok(value) + } else { + Err(Error::ExpectedStructEnd) + } + } else { + Err(Error::ExpectedStruct) } } @@ -457,13 +416,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { ) -> Result where V: Visitor<'de> { - let first = is_a(|b| char_class::alpha(b) || b == b'_'); - let other = is_a(|b| char_class::alpha(b) || char_class::alphanum(b) || b == b'_'); - let parser = space() * (first + other.repeat(0..)) - space(); - match parser.collect().parse(&mut self.input) { - Ok(bytes) => visitor.visit_bytes(&bytes), - Err(_) => Err(Error::ExpectedIdentifier), - } + visitor.visit_bytes(self.bytes.identifier()?) } fn deserialize_ignored_any( @@ -486,6 +439,25 @@ impl<'a, 'de> CommaSeparated<'a, 'de> { fn new(terminator: u8, de: &'a mut Deserializer<'de>) -> Self { CommaSeparated { de, terminator, first: true } } + + fn has_element(&mut self) -> Result { + if self.first { + self.de.bytes.skip_ws(); + self.first = false; + + Ok(self.de.bytes.peek().ok_or(Error::Eof)? != self.terminator) + } else { + let comma = self.de.bytes.comma(); + + if self.de.bytes.peek().ok_or(Error::Eof)? == self.terminator { + Ok(false) + } else if comma { + Ok(true) + } else { + Err(Error::ExpectedComma) + } + } + } } impl<'de, 'a> de::SeqAccess<'de> for CommaSeparated<'a, 'de> { @@ -494,23 +466,11 @@ impl<'de, 'a> de::SeqAccess<'de> for CommaSeparated<'a, 'de> { fn next_element_seed(&mut self, seed: T) -> Result> where T: DeserializeSeed<'de> { - // Check if there are no more elements. - if self.de.input.current() == Some(self.terminator) { - return Ok(None) - } - // Comma is required before every element except the first. - if !self.first { - if comma().parse(&mut self.de.input).is_err() { - return Err(Error::ExpectedArrayComma); - } - if self.de.input.current() == Some(self.terminator) { - return Ok(None) - } + if self.has_element()? { + seed.deserialize(&mut *self.de).map(Some) + } else { + Ok(None) } - self.first = false; - let _ = space().parse(&mut self.de.input); - // Deserialize an array element. - seed.deserialize(&mut *self.de).map(Some) } } @@ -520,32 +480,22 @@ impl<'de, 'a> de::MapAccess<'de> for CommaSeparated<'a, 'de> { fn next_key_seed(&mut self, seed: K) -> Result> where K: DeserializeSeed<'de> { - // Check if there are no more elements. - if self.de.input.current() == Some(self.terminator) { - return Ok(None) - } - // Comma is required before every element except the first. - if !self.first { - if comma().parse(&mut self.de.input).is_err() { - return Err(Error::ExpectedMapComma); - } - if self.de.input.current() == Some(self.terminator) { - return Ok(None) - } + if self.has_element()? { + seed.deserialize(&mut *self.de).map(Some) + } else { + Ok(None) } - self.first = false; - let _ = space().parse(&mut self.de.input); - // Deserialize a map key. - seed.deserialize(&mut *self.de).map(Some) } fn next_value_seed(&mut self, seed: V) -> Result where V: DeserializeSeed<'de> { - let parser = space() * sym(b':') - space(); - match parser.parse(&mut self.de.input) { - Ok(_) => seed.deserialize(&mut *self.de), - Err(_) => Err(Error::ExpectedMapColon), + if self.de.bytes.consume(":") { + self.de.bytes.skip_ws(); + + seed.deserialize(&mut *self.de) + } else { + Err(Error::ExpectedMapColon) } } } @@ -556,7 +506,7 @@ struct Enum<'a, 'de: 'a> { impl<'a, 'de> Enum<'a, 'de> { fn new(de: &'a mut Deserializer<'de>) -> Self { - Enum { de: de } + Enum { de } } } @@ -568,6 +518,7 @@ impl<'de, 'a> de::EnumAccess<'de> for Enum<'a, 'de> { where V: DeserializeSeed<'de> { let value = seed.deserialize(&mut *self.de)?; + Ok((value, self)) } } @@ -582,15 +533,18 @@ impl<'de, 'a> de::VariantAccess<'de> for Enum<'a, 'de> { fn newtype_variant_seed(self, seed: T) -> Result where T: DeserializeSeed<'de> { - match self.de.consume("(") { - Ok(_) => { - let value = seed.deserialize(&mut *self.de)?; - let _ = comma().parse(&mut self.de.input); - self.de.consume(")") - .map(|_| value) - .map_err(|_| Error::ExpectedStructEnd) - }, - Err(_) => Err(Error::ExpectedStruct) + if self.de.bytes.consume("(") { + let val = seed.deserialize(&mut *self.de)?; + + self.de.bytes.comma(); + + if self.de.bytes.consume(")") { + Ok(val) + } else { + Err(Error::ExpectedStructEnd) + } + } else { + Err(Error::ExpectedStruct) } } diff --git a/src/lib.rs b/src/lib.rs index b1887381..014b1814 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,3 +6,5 @@ extern crate serde_derive; pub mod de; pub mod ser; + +mod parse; diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 00000000..a5f42e9c --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,334 @@ +use std::ops::Neg; +use std::str::{FromStr, from_utf8, from_utf8_unchecked}; + +use de::{Error, Result}; + +const DIGITS: &[u8] = b"0123456789"; +const FLOAT_CHARS: &[u8] = b"0123456789.+-eE"; +const IDENT_FIRST: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_"; +const IDENT_CHAR: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_0123456789"; +const WHITE_SPACE: &[u8] = b"\n\t\r "; + +#[derive(Clone, Copy, Debug)] +pub struct Bytes<'a> { + bytes: &'a [u8], + column: usize, + line: usize, +} + +impl<'a> Bytes<'a> { + pub fn new(bytes: &'a [u8]) -> Self { + Bytes { + bytes, + column: 1, + line: 1, + } + } + + pub fn advance(&mut self, bytes: usize) -> Result<()> { + for _ in 0..bytes { + self.advance_single()?; + } + + Ok(()) + } + + pub fn advance_single(&mut self) -> Result<()> { + if self.peek().ok_or(Error::Eof)? == b'\n' { + self.line += 1; + self.column = 1; + } else { + self.column += 1; + } + + self.bytes = &self.bytes[1..]; + + Ok(()) + } + + pub fn bool(&mut self) -> Result { + if self.consume("true") { + Ok(true) + } else if self.consume("false") { + Ok(false) + } else { + Err(Error::ExpectedBoolean) + } + } + + pub fn bytes(&self) -> &[u8] { + &self.bytes + } + + pub fn char(&mut self) -> Result { + if !self.consume("'") { + return Err(Error::ExpectedChar); + } + + let c = self.eat_byte()?; + + let c = if c == b'\\' { + let c = self.eat_byte()?; + + if c != b'\\' && c != b'\'' { + return Err(Error::InvalidEscape); + } + + c + } else { + c + }; + + if !self.consume("'") { + return Err(Error::ExpectedChar); + } + + Ok(c as char) + } + + pub fn comma(&mut self) -> bool { + if self.consume(",") { + self.skip_ws(); + + true + } else { + false + } + } + + pub fn consume(&mut self, s: &str) -> bool { + if s.bytes().enumerate().all(|(i, b)| self.bytes.get(i).map(|t| *t == b).unwrap_or(false)) { + let _ = self.advance(s.len()); + + true + } else { + false + } + } + + pub fn eat_byte(&mut self) -> Result { + if let Some(peek) = self.peek() { + let _ = self.advance_single(); + + Ok(peek) + } else { + Err(Error::Eof) + } + } + + pub fn float(&mut self) -> Result + where T: FromStr + { + let num_bytes = self.next_bytes_contained_in(FLOAT_CHARS); + + let s = unsafe { from_utf8_unchecked(&self.bytes[0..num_bytes]) }; + let res = FromStr::from_str(s).map_err(|_| Error::ExpectedFloat); + + let _ = self.advance(num_bytes); + + res + } + + pub fn identifier(&mut self) -> Result<&[u8]> { + if IDENT_FIRST.contains(&self.peek().ok_or(Error::Eof)?) { + let bytes = self.next_bytes_contained_in(IDENT_CHAR); + + let ident = &self.bytes[..bytes]; + let _ = self.advance(bytes); + + Ok(ident) + } else { + Err(Error::ExpectedIdentifier) + } + } + + pub fn next_bytes_contained_in(&self, allowed: &[u8]) -> usize { + (0..self.bytes.len()) + .flat_map(|i| self.bytes.get(i)) + .take_while(|b| allowed.contains(b)) + .fold(0, |acc, _| acc + 1) + } + + pub fn skip_ws(&mut self) { + while self.peek().map(|c| WHITE_SPACE.contains(&c)).unwrap_or(false) { + let _ = self.advance_single(); + } + } + + pub fn peek(&self) -> Option { + self.bytes.get(0).map(|b| *b) + } + + pub fn signed_integer(&mut self) -> Result where T: FromStr + Neg { + match self.peek() { + Some(b'+') => { + let _ = self.advance_single(); + + self.unsigned_integer() + } + Some(b'-') => { + let _ = self.advance_single(); + + self.unsigned_integer::().map(Neg::neg) + } + Some(_) => self.unsigned_integer(), + None => Err(Error::Eof), + } + } + + pub fn string(&mut self) -> Result { + if !self.consume("\"") { + return Err(Error::ExpectedString); + } + + let (i, end_or_escape) = (0..) + .flat_map(|i| self.bytes.get(i)) + .enumerate() + .find(|&(_, &b)| b == b'\\' || b == b'"') + .ok_or(Error::Eof)?; + + if *end_or_escape == b'"' { + let s = from_utf8(&self.bytes[..i])?; + + // Advance by the number of bytes of the string + // + 1 for the `"`. + let _ = self.advance(i + 1); + + Ok(ParsedStr::Slice(s)) + } else { + let mut i = i; + let mut s: Vec<_> = self.bytes[..i].to_vec(); + + loop { + let _ = self.advance(i + 1); + self.parse_str_escape(&mut s)?; + + let (new_i, end_or_escape) = (0..) + .flat_map(|i| self.bytes.get(i)) + .enumerate() + .find(|&(_, &b)| b == b'\\' || b == b'"') + .ok_or(Error::Eof)?; + + i = new_i; + s.extend_from_slice(&self.bytes[..i]); + + if *end_or_escape == b'"' { + let _ = self.advance(i + 1); + + break Ok(ParsedStr::Allocated(String::from_utf8(s)?)); + } + } + } + } + + pub fn unsigned_integer(&mut self) -> Result where T: FromStr { + let num_bytes = self.next_bytes_contained_in(DIGITS); + + if num_bytes == 0 { + return Err(Error::Eof); + } + + let res = FromStr::from_str(unsafe { from_utf8_unchecked(&self.bytes[0..num_bytes]) }) + .map_err(|_| Error::ExpectedInteger); + + let _ = self.advance(num_bytes); + + res + } + + fn decode_hex_escape(&mut self) -> Result { + let mut n = 0; + for _ in 0..4 { + n = match self.eat_byte()? { + c @ b'0' ... b'9' => n * 16_u16 + ((c as u16) - (b'0' as u16)), + b'a' | b'A' => n * 16_u16 + 10_u16, + b'b' | b'B' => n * 16_u16 + 11_u16, + b'c' | b'C' => n * 16_u16 + 12_u16, + b'd' | b'D' => n * 16_u16 + 13_u16, + b'e' | b'E' => n * 16_u16 + 14_u16, + b'f' | b'F' => n * 16_u16 + 15_u16, + _ => { + return Err(Error::InvalidEscape); + } + }; + } + + Ok(n) + } + + fn parse_str_escape(&mut self, store: &mut Vec) -> Result<()> { + use std::iter::repeat; + + match self.eat_byte()? { + b'"' => store.push(b'"'), + b'\\' => store.push(b'\\'), + b'b' => store.push(b'\x08'), + b'f' => store.push(b'\x0c'), + b'n' => store.push(b'\n'), + b'r' => store.push(b'\r'), + b't' => store.push(b'\t'), + b'u' => { + let c: char = match self.decode_hex_escape()? { + 0xDC00 ... 0xDFFF => { + return Err(Error::InvalidEscape); + } + + n1 @ 0xD800 ... 0xDBFF => { + if self.eat_byte()? != b'\\' { + return Err(Error::InvalidEscape); + } + + if self.eat_byte()? != b'u' { + return Err(Error::InvalidEscape); + } + + let n2 = self.decode_hex_escape()?; + + if n2 < 0xDC00 || n2 > 0xDFFF { + return Err(Error::InvalidEscape); + } + + let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000; + + match ::std::char::from_u32(n as u32) { + Some(c) => c, + None => { + return Err(Error::InvalidEscape); + } + } + } + + n => { + match ::std::char::from_u32(n as u32) { + Some(c) => c, + None => { + return Err(Error::InvalidEscape); + } + } + } + }; + + let char_start = store.len(); + store.extend(repeat(0).take(c.len_utf8())); + c.encode_utf8(&mut store[char_start..]); + } + _ => { + return Err(Error::InvalidEscape); + } + } + + Ok(()) + } +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Position { + pub col: usize, + pub line: usize, +} + +#[derive(Clone, Debug)] +pub enum ParsedStr<'a> { + Allocated(String), + Slice(&'a str), +}