Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Zero alloc lexer #322

Merged
merged 7 commits into from
Jul 5, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 89 additions & 23 deletions crates/apollo-parser/src/lexer/cursor.rs
Original file line number Diff line number Diff line change
@@ -1,63 +1,129 @@
use std::str::Chars;
use std::str::CharIndices;

use crate::Error;

/// Peekable iterator over a char sequence.
#[derive(Debug, Clone)]
pub(crate) struct Cursor<'a> {
chars: Chars<'a>,
index: usize,
offset: usize,
prev: usize,
source: &'a str,
chars: CharIndices<'a>,
pending: Option<char>,
pub(crate) err: Option<Error>,
}

impl<'a> Cursor<'a> {
pub(crate) fn new(input: &'a str) -> Cursor<'a> {
Cursor {
chars: input.chars(),
index: 0,
offset: 0,
prev: 0,
pending: None,
source: input,
chars: input.char_indices(),
err: None,
}
}
}

pub(crate) const EOF_CHAR: char = '\0';

impl<'a> Cursor<'a> {
/// Returns nth character relative to the current cursor position.
fn nth_char(&self, n: usize) -> char {
self.chars().nth(n).unwrap_or(EOF_CHAR)
pub(crate) fn index(&self) -> usize {
self.index
}

/// Peeks the next char in input without consuming.
pub(crate) fn first(&self) -> char {
self.nth_char(0)
fn eof(&self) -> bool {
self.offset == self.source.len()
}

/// Peeks the second char in input without consuming.
pub(crate) fn second(&self) -> char {
self.nth_char(1)
pub(crate) fn pending(&self) -> bool {
self.pending.is_some()
}

/// Checks if there are chars to consume.
pub(crate) fn is_eof(&self) -> bool {
self.chars.as_str().is_empty()
/// Moves to the next character.
pub(crate) fn prev_str(&mut self) -> &'a str {
let slice = &self.source[self.index..self.offset];

self.index = self.offset;
self.pending = self
.source
.get(self.offset..)
.and_then(|subslice| subslice.chars().next());

slice
}

/// Moves to the next character.
pub(crate) fn current_str(&mut self) -> &'a str {
self.pending = None;
if self.eof() {
return self.source.get(self.index..).unwrap();
}
let slice = self.source.get(self.index..=self.offset).unwrap();

self.index = self.offset;
if let Some((pos, next)) = self.chars.next() {
self.index = pos;
self.offset = pos;
self.pending = Some(next);
}

slice
}

/// Moves to the next character.
pub(crate) fn bump(&mut self) -> Option<char> {
let c = self.chars.next()?;
if self.pending.is_some() {
return self.pending.take();
}

if self.offset == self.source.len() {
return None;
}

let (pos, c) = self.chars.next()?;
self.prev = self.offset;
self.offset = pos;

Some(c)
}

/// Moves to the next character.
pub(crate) fn eatc(&mut self, c: char) -> bool {
if self.pending.is_some() {
panic!("dont call eatc when a character is pending");
}

if let Some((pos, c_in)) = self.chars.next() {
self.prev = self.offset;
self.offset = pos;

if c_in == c {
return true;
}

self.pending = Some(c_in);
}

false
}

/// Get current error object in the cursor.
pub(crate) fn err(&mut self) -> Option<Error> {
self.err.clone()
}

pub(crate) fn drain(&mut self) -> &'a str {
self.pending = None;
let start = self.index;
self.index = self.source.len() - 1;

self.source.get(start..=self.index).unwrap()
}

/// Add error object to the cursor.
pub(crate) fn add_err(&mut self, err: Error) {
self.err = Some(err)
}

/// Returns a `Chars` iterator over the remaining characters.
fn chars(&self) -> Chars<'_> {
self.chars.clone()
}
}
Loading