From d383eb10249b9296dafd5652408856982b835a89 Mon Sep 17 00:00:00 2001 From: Matthew Howard Date: Mon, 7 Aug 2023 14:42:58 +0100 Subject: [PATCH 1/5] add comments --- src/compiler/sexp.rs | 156 ++++++++++++++++++++++++------------------- 1 file changed, 87 insertions(+), 69 deletions(-) diff --git a/src/compiler/sexp.rs b/src/compiler/sexp.rs index 599eea3fe..0811d1026 100644 --- a/src/compiler/sexp.rs +++ b/src/compiler/sexp.rs @@ -203,9 +203,9 @@ enum TermListCommentState { } #[derive(Debug)] -enum SExpParseState { +enum SExpParseState { // The types of state that the Rust pre-forms can take Empty, - CommentText(Srcloc, Vec), + CommentText(Srcloc, Vec), //srcloc contains the file, line, column and length for the captured form Bareword(Srcloc, Vec), QuotedText(Srcloc, u8, Vec), QuotedEscaped(Srcloc, u8, Vec), @@ -221,7 +221,7 @@ enum SExpParseState { } #[derive(Debug)] -enum SExpParseResult { +enum SExpParseResult { // the result of a call to parse an SExp Resume(SExpParseState), Emit(Rc, SExpParseState), Error(Srcloc, String), @@ -318,16 +318,18 @@ pub fn enlist(l: Srcloc, v: Vec>) -> SExp { result } -fn emit(a: Rc, p: SExpParseState) -> SExpParseResult { - SExpParseResult::Emit(a, p) +// this function takes a ParseState and returns an Emit ParseResult which contains the ParseState +fn emit(a: Rc, current_state: SExpParseState) -> SExpParseResult { + SExpParseResult::Emit(a, current_state) } fn error(l: Srcloc, t: &str) -> SExpParseResult { SExpParseResult::Error(l, t.to_string()) } -fn resume(p: SExpParseState) -> SExpParseResult { - SExpParseResult::Resume(p) +// this function takes a ParseState and returns a Resume ParseResult which contains the ParseState +fn resume(current_state: SExpParseState) -> SExpParseResult { + SExpParseResult::Resume(current_state) } fn escape_quote(q: u8, s: &[u8]) -> String { @@ -538,127 +540,132 @@ impl SExp { } } -fn parse_sexp_step(loc: Srcloc, p: &SExpParseState, this_char: u8) -> SExpParseResult { - match p { - SExpParseState::Empty => match this_char as char { - '(' => resume(SExpParseState::OpenList(loc)), - '\n' => resume(SExpParseState::Empty), +fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) -> SExpParseResult { + // switch on our state + match current_state { + SExpParseState::Empty => match this_char as char { // we are not currently in a list + '(' => resume(SExpParseState::OpenList(loc)), // move to OpenList state + '\n' => resume(SExpParseState::Empty), // new line, same state ';' => resume(SExpParseState::CommentText(loc, Vec::new())), ')' => error(loc, "Too many close parens"), - '"' => resume(SExpParseState::QuotedText(loc, b'"', Vec::new())), - '\'' => resume(SExpParseState::QuotedText(loc, b'\'', Vec::new())), + '"' => resume(SExpParseState::QuotedText(loc, b'"', Vec::new())), // match on " + '\'' => resume(SExpParseState::QuotedText(loc, b'\'', Vec::new())), // match on ' ch => { if char::is_whitespace(ch) { resume(SExpParseState::Empty) } else { - resume(SExpParseState::Bareword(loc, vec![this_char])) + resume(SExpParseState::Bareword(loc, vec![this_char])) // start of a word - could be an atom or a keyword - the compiler will decide } } }, - SExpParseState::CommentText(pl, t) => match this_char as char { - '\r' => resume(SExpParseState::CommentText(pl.clone(), t.to_vec())), + // t is a Vec of the previous characters in this comment string + SExpParseState::CommentText(srcloc, t) => match this_char as char { + '\r' => resume(SExpParseState::CommentText(srcloc.clone(), t.to_vec())), '\n' => resume(SExpParseState::Empty), _ => { let mut tcopy = t.to_vec(); tcopy.push(this_char); - resume(SExpParseState::CommentText(pl.ext(&loc), tcopy)) + resume(SExpParseState::CommentText(srcloc.ext(&loc), tcopy)) } }, - SExpParseState::Bareword(pl, a) => { - if char::is_whitespace(this_char as char) { + // we currently processing a new word + SExpParseState::Bareword(srcloc, word_so_far) => { + if char::is_whitespace(this_char as char) { // we've found a space, so it's the end of a word emit( - Rc::new(make_atom(pl.clone(), a.to_vec())), + Rc::new(make_atom(srcloc.clone(), word_so_far.to_vec())), SExpParseState::Empty, ) - } else { - let mut acopy = a.to_vec(); - acopy.push(this_char); - resume(SExpParseState::Bareword(pl.ext(&loc), acopy)) + } else { // otherwise add letter to word + let mut word_copy = word_so_far.to_vec(); + word_copy.push(this_char); + resume(SExpParseState::Bareword(srcloc.ext(&loc), word_copy)) } } - SExpParseState::QuotedText(pl, term, t) => { - if this_char == b'\\' { - resume(SExpParseState::QuotedEscaped(pl.clone(), *term, t.to_vec())) - } else if this_char == *term { + SExpParseState::QuotedText(srcloc, term, t) => { + if this_char == b'\\' { // if we have a character escape then copy the character directly + resume(SExpParseState::QuotedEscaped(srcloc.clone(), *term, t.to_vec())) + } else if this_char == *term { // otherwise check if it's the terminating character (either ' or ") emit( - Rc::new(SExp::QuotedString(pl.ext(&loc), *term, t.to_vec())), + Rc::new(SExp::QuotedString(srcloc.ext(&loc), *term, t.to_vec())), // add quoted string to parent list SExpParseState::Empty, ) - } else { + } else { // otherwise copy the character let mut tcopy = t.to_vec(); tcopy.push(this_char); - resume(SExpParseState::QuotedText(pl.clone(), *term, tcopy)) + resume(SExpParseState::QuotedText(srcloc.clone(), *term, tcopy)) } } - SExpParseState::QuotedEscaped(pl, term, t) => { + // copy the character the quoted text because we have put the escape character first + SExpParseState::QuotedEscaped(srcloc, term, t) => { let mut tcopy = t.to_vec(); tcopy.push(this_char); - resume(SExpParseState::QuotedText(pl.clone(), *term, tcopy)) + resume(SExpParseState::QuotedText(srcloc.clone(), *term, tcopy)) } - SExpParseState::OpenList(pl) => match this_char as char { - ')' => emit(Rc::new(SExp::Nil(pl.ext(&loc))), SExpParseState::Empty), + SExpParseState::OpenList(srcloc) => match this_char as char { // we are beginning a new list + ')' => emit(Rc::new(SExp::Nil(srcloc.ext(&loc))), SExpParseState::Empty), // create a Nil object '.' => error(loc, "Dot can't appear directly after begin paren"), - _ => match parse_sexp_step(loc.clone(), &SExpParseState::Empty, this_char) { - SExpParseResult::Emit(o, p) => resume(SExpParseState::ParsingList( - pl.ext(&loc), - Rc::new(p), + _ => match parse_sexp_step(loc.clone(), &SExpParseState::Empty, this_char) { // fetch result of parsing as if we were in empty state + SExpParseResult::Emit(o, current_state) => resume(SExpParseState::ParsingList( // we found an object, resume processing + srcloc.ext(&loc), + Rc::new(current_state), // captured state from our pretend empty state vec![o], )), - SExpParseResult::Resume(p) => resume(SExpParseState::ParsingList( - pl.ext(&loc), - Rc::new(p), + SExpParseResult::Resume(current_state) => resume(SExpParseState::ParsingList( // we're still reading the object, resume processing + srcloc.ext(&loc), + Rc::new(current_state), // captured state from our pretend empty state Vec::new(), )), - SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), + SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), // propagate error }, }, - SExpParseState::ParsingList(pl, pp, list_content) => { + // We are in the middle of a list currently + SExpParseState::ParsingList(srcloc, pp, list_content) => { // pp is the captured inside-list state we received from OpenList match (this_char as char, pp.borrow()) { - ('.', SExpParseState::Empty) => resume(SExpParseState::TermList( - pl.ext(&loc), + ('.', SExpParseState::Empty) => resume(SExpParseState::TermList( // dot notation showing cons cell + srcloc.ext(&loc), TermListCommentState::Empty, None, Rc::new(SExpParseState::Empty), list_content.to_vec(), )), - (')', SExpParseState::Empty) => emit( - Rc::new(enlist(pl.ext(&loc), list_content.to_vec())), + (')', SExpParseState::Empty) => emit( // close list and emit it upwards as a complete entity + Rc::new(enlist(srcloc.ext(&loc), list_content.to_vec())), SExpParseState::Empty, ), - (')', SExpParseState::Bareword(l, t)) => { + (')', SExpParseState::Bareword(l, t)) => { // you've reached the end of the word AND the end of the list, close list and emit upwards let parsed_atom = make_atom(l.clone(), t.to_vec()); let mut updated_list = list_content.to_vec(); updated_list.push(Rc::new(parsed_atom)); emit( - Rc::new(enlist(pl.ext(&loc), updated_list)), + Rc::new(enlist(srcloc.ext(&loc), updated_list)), SExpParseState::Empty, ) } - (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) { - SExpParseResult::Emit(o, p) => { + (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) { // + SExpParseResult::Emit(o, current_state) => { // add result of nested call to our list let mut list_copy = list_content.clone(); list_copy.push(o); let result = - SExpParseState::ParsingList(pl.ext(&loc), Rc::new(p), list_copy); + SExpParseState::ParsingList(srcloc.ext(&loc), Rc::new(current_state), list_copy); resume(result) } - SExpParseResult::Resume(rp) => resume(SExpParseState::ParsingList( - pl.ext(&loc), + SExpParseResult::Resume(rp) => resume(SExpParseState::ParsingList( // + srcloc.ext(&loc), Rc::new(rp), list_content.to_vec(), )), - SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), + SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), // propagate error upwards }, } } - SExpParseState::TermList(pl, TermListCommentState::InComment, parsed, pp, list_content) => { + SExpParseState::TermList(srcloc, TermListCommentState::InComment, parsed, pp, list_content) => { // pp is the captured inside-list state we received from OpenList let end_comment = if this_char as char == '\n' || this_char as char == '\r' { TermListCommentState::Empty } else { TermListCommentState::InComment }; resume(SExpParseState::TermList( - pl.clone(), + srcloc.clone(), end_comment, parsed.clone(), pp.clone(), @@ -666,7 +673,7 @@ fn parse_sexp_step(loc: Srcloc, p: &SExpParseState, this_char: u8) -> SExpParseR )) } SExpParseState::TermList( - pl, + srcloc, TermListCommentState::Empty, Some(parsed), pp, @@ -674,7 +681,7 @@ fn parse_sexp_step(loc: Srcloc, p: &SExpParseState, this_char: u8) -> SExpParseR ) => { if this_char.is_ascii_whitespace() { resume(SExpParseState::TermList( - pl.ext(&loc), + srcloc.ext(&loc), TermListCommentState::Empty, Some(parsed.clone()), pp.clone(), @@ -699,7 +706,7 @@ fn parse_sexp_step(loc: Srcloc, p: &SExpParseState, this_char: u8) -> SExpParseR } } else if this_char == b';' { resume(SExpParseState::TermList( - pl.clone(), + srcloc.clone(), TermListCommentState::InComment, Some(parsed.clone()), pp.clone(), @@ -707,12 +714,12 @@ fn parse_sexp_step(loc: Srcloc, p: &SExpParseState, this_char: u8) -> SExpParseR )) } else { error( - pl.clone(), + srcloc.clone(), &format!("unexpected character {}", this_char as char), ) } } - SExpParseState::TermList(pl, TermListCommentState::Empty, None, pp, list_content) => { + SExpParseState::TermList(srcloc, TermListCommentState::Empty, None, pp, list_content) => { match (this_char as char, pp.borrow()) { ('.', SExpParseState::Empty) => { error(loc, "Multiple dots in list notation are illegal") @@ -722,7 +729,7 @@ fn parse_sexp_step(loc: Srcloc, p: &SExpParseState, this_char: u8) -> SExpParseR emit(list_content[0].clone(), SExpParseState::Empty) } else { emit( - Rc::new(enlist(pl.ext(&loc), list_content.to_vec())), + Rc::new(enlist(srcloc.ext(&loc), list_content.to_vec())), SExpParseState::Empty, ) } @@ -747,18 +754,18 @@ fn parse_sexp_step(loc: Srcloc, p: &SExpParseState, this_char: u8) -> SExpParseR } } (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) { - SExpParseResult::Emit(o, _p) => resume(SExpParseState::TermList( + SExpParseResult::Emit(o, _current_state) => resume(SExpParseState::TermList( loc, TermListCommentState::Empty, Some(o), pp.clone(), list_content.clone(), )), - SExpParseResult::Resume(p) => resume(SExpParseState::TermList( - pl.ext(&loc), + SExpParseResult::Resume(current_state) => resume(SExpParseState::TermList( + srcloc.ext(&loc), TermListCommentState::Empty, None, - Rc::new(p), + Rc::new(current_state), list_content.to_vec(), )), SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), @@ -776,19 +783,28 @@ fn parse_sexp_inner( where I: Iterator, { + // we support compiling multiple things at once, keep these in a Vec + // at the moment this will almost certainly only return 1 thing let mut res = Vec::new(); + // Loop through all the characters for this_char in s { + let next_location = start.clone().advance(this_char); + // call parse_sexp_step for current character + // it will return a ParseResult which contains the new ParseState match parse_sexp_step(start.clone(), parse_state.borrow(), this_char) { + // catch error and propagate it upwards SExpParseResult::Error(l, e) => { return Err((l, e)); } + // Keep parsing SExpParseResult::Resume(new_parse_state) => { start = next_location; parse_state = new_parse_state; } + // End of list (top level compile object), but not necessarily end of file SExpParseResult::Emit(o, new_parse_state) => { start = next_location; parse_state = new_parse_state; @@ -797,6 +813,7 @@ where } } + // depending on the state when we finished return Ok or Err enums match parse_state { SExpParseState::Empty => Ok(res), SExpParseState::Bareword(l, t) => Ok(vec![Rc::new(make_atom(l, t))]), @@ -813,6 +830,7 @@ where /// /// Entrypoint for parsing chialisp input. +/// Called from compiler.rs /// /// This produces Rc, where SExp is described above. /// From 8c2eaea8245e7e8fdcf5e79f4cf51fb55edac452 Mon Sep 17 00:00:00 2001 From: Matthew Howard Date: Mon, 7 Aug 2023 15:36:03 +0100 Subject: [PATCH 2/5] fix merge --- src/compiler/sexp.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/compiler/sexp.rs b/src/compiler/sexp.rs index 9cb96ad23..5b5c4fd6c 100644 --- a/src/compiler/sexp.rs +++ b/src/compiler/sexp.rs @@ -636,6 +636,7 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - updated_list.push(Rc::new(parsed_atom)); emit( Rc::new(enlist(srcloc.clone(), &updated_list)), + SExpParseState::Empty, ) } (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) { // From 804947f84da4be555387b298c784d0e87f15521f Mon Sep 17 00:00:00 2001 From: Matthew Howard Date: Tue, 8 Aug 2023 17:04:08 +0100 Subject: [PATCH 3/5] more comments --- src/compiler/sexp.rs | 46 +++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/src/compiler/sexp.rs b/src/compiler/sexp.rs index 5b5c4fd6c..1b1598560 100644 --- a/src/compiler/sexp.rs +++ b/src/compiler/sexp.rs @@ -213,7 +213,7 @@ enum SExpParseState { // The types of state that the Rust pre-forms can take Srcloc, TermListCommentState, Option>, - Rc, + Rc, // used for inner parsing Vec>, ), } @@ -621,9 +621,9 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - match (this_char as char, pp.borrow()) { ('.', SExpParseState::Empty) => resume(SExpParseState::TermList( // dot notation showing cons cell srcloc.ext(&loc), - TermListCommentState::Empty, + TermListCommentState::Empty, // we are not inside a comment None, - Rc::new(SExpParseState::Empty), + Rc::new(SExpParseState::Empty), // nested state is empty list_content.to_vec(), )), (')', SExpParseState::Empty) => emit( // close list and emit it upwards as a complete entity @@ -639,24 +639,26 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - SExpParseState::Empty, ) } + // analyze this character using the mock "inner state" stored in pp (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) { // - SExpParseResult::Emit(o, current_state) => { // add result of nested call to our list + SExpParseResult::Emit(o, current_state) => { // add result of parse_sexp_step to our list let mut list_copy = list_content.clone(); list_copy.push(o); let result = SExpParseState::ParsingList(srcloc.ext(&loc), Rc::new(current_state), list_copy); resume(result) } - SExpParseResult::Resume(rp) => resume(SExpParseState::ParsingList( // + SExpParseResult::Resume(rp) => resume(SExpParseState::ParsingList( // we aren't finished reading in our nested state srcloc.ext(&loc), - Rc::new(rp), + Rc::new(rp), // store the returned state from parse_sexp_step in pp list_content.to_vec(), )), SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), // propagate error upwards }, } } - SExpParseState::TermList(srcloc, TermListCommentState::InComment, parsed, pp, list_content) => { // pp is the captured inside-list state we received from OpenList + // if we're in a comment then just check for newline or carriage return otherwise stay in InComment state + SExpParseState::TermList(srcloc, TermListCommentState::InComment, parsed, pp, list_content) => { let end_comment = if this_char as char == '\n' || this_char as char == '\r' { TermListCommentState::Empty } else { @@ -664,12 +666,13 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - }; resume(SExpParseState::TermList( srcloc.clone(), - end_comment, + end_comment, // store the new commentstate parsed.clone(), pp.clone(), list_content.clone(), )) } + // if we're not in a comment and have already found a parsed second word for this dot expression SExpParseState::TermList( srcloc, TermListCommentState::Empty, @@ -677,7 +680,7 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - pp, list_content, ) => { - if this_char.is_ascii_whitespace() { + if this_char.is_ascii_whitespace() { // ignore whitespace after second word resume(SExpParseState::TermList( srcloc.ext(&loc), TermListCommentState::Empty, @@ -685,7 +688,7 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - pp.clone(), list_content.to_vec(), )) - } else if this_char == b')' { + } else if this_char == b')' { // if we see a `)` then we're ready to close this list let mut list_copy = list_content.to_vec(); match list_copy.pop() { Some(v) => { @@ -697,12 +700,12 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - for item in list_copy.iter().rev() { result_list = make_cons(item.clone(), Rc::new(result_list)); } - emit(Rc::new(result_list), SExpParseState::Empty) + emit(Rc::new(result_list), SExpParseState::Empty) // emit the resultant list } } None => error(loc, "Dot as first element of list?"), } - } else if this_char == b';' { + } else if this_char == b';' { // entering a comment resume(SExpParseState::TermList( srcloc.clone(), TermListCommentState::InComment, @@ -710,19 +713,20 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - pp.clone(), list_content.clone(), )) - } else { + } else { // we don't want to see any more characters after we've concluded a dot expression error( srcloc.clone(), &format!("unexpected character {}", this_char as char), ) } } - SExpParseState::TermList(srcloc, TermListCommentState::Empty, None, pp, list_content) => { - match (this_char as char, pp.borrow()) { - ('.', SExpParseState::Empty) => { + // we are passing a dot-expression (x . y) and not in a comment and don't have an object already discovered + SExpParseState::TermList(srcloc, TermListCommentState::Empty, None, pp, list_content) => { // pp is the inner parsestate inside the dot-expressions + match (this_char as char, pp.borrow()) { //match based on current character and inner state + ('.', SExpParseState::Empty) => { // if we aren't in a word and we see another dot that's illegal error(loc, "Multiple dots in list notation are illegal") } - (')', SExpParseState::Empty) => { + (')', SExpParseState::Empty) => { // attempt to close the list if list_content.len() == 1 { emit(list_content[0].clone(), SExpParseState::Empty) } else { @@ -751,19 +755,21 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - None => error(loc, "Dot as first element of list?"), } } + // if we see anything other than ')' or '.' parse it as if we were in empty state (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) { - SExpParseResult::Emit(o, _current_state) => resume(SExpParseState::TermList( + SExpParseResult::Emit(parsed_object, _current_state) => resume(SExpParseState::TermList( loc, TermListCommentState::Empty, - Some(o), + Some(parsed_object), // assert parsed_object is not None and then store it in parsed_list pp.clone(), list_content.clone(), )), + // resume means it didn't finish parsing yet, so store inner state and keep going SExpParseResult::Resume(current_state) => resume(SExpParseState::TermList( srcloc.ext(&loc), TermListCommentState::Empty, None, - Rc::new(current_state), + Rc::new(current_state), // store our partial inner parsestate in pp list_content.to_vec(), )), SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), From 8bb994877b31101acb54ad5331fb59b2e1618cbe Mon Sep 17 00:00:00 2001 From: Matthew Howard Date: Tue, 8 Aug 2023 17:07:03 +0100 Subject: [PATCH 4/5] explain an enum param --- src/compiler/sexp.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/sexp.rs b/src/compiler/sexp.rs index 1b1598560..e89dbb6e4 100644 --- a/src/compiler/sexp.rs +++ b/src/compiler/sexp.rs @@ -212,7 +212,7 @@ enum SExpParseState { // The types of state that the Rust pre-forms can take TermList( Srcloc, TermListCommentState, - Option>, + Option>, // this is the second value in the dot expression Rc, // used for inner parsing Vec>, ), From 59641cc810b569644c3f4cb20b0daed1da46af77 Mon Sep 17 00:00:00 2001 From: Matthew Howard Date: Tue, 8 Aug 2023 17:47:22 +0100 Subject: [PATCH 5/5] cargo fmt --- src/compiler/sexp.rs | 151 +++++++++++++++++++++++++++---------------- 1 file changed, 96 insertions(+), 55 deletions(-) diff --git a/src/compiler/sexp.rs b/src/compiler/sexp.rs index e89dbb6e4..9759552ae 100644 --- a/src/compiler/sexp.rs +++ b/src/compiler/sexp.rs @@ -201,9 +201,10 @@ enum TermListCommentState { } #[derive(Debug)] -enum SExpParseState { // The types of state that the Rust pre-forms can take +enum SExpParseState { + // The types of state that the Rust pre-forms can take Empty, - CommentText(Srcloc, Vec), //srcloc contains the file, line, column and length for the captured form + CommentText(Srcloc, Vec), //srcloc contains the file, line, column and length for the captured form Bareword(Srcloc, Vec), QuotedText(Srcloc, u8, Vec), QuotedEscaped(Srcloc, u8, Vec), @@ -212,14 +213,15 @@ enum SExpParseState { // The types of state that the Rust pre-forms can take TermList( Srcloc, TermListCommentState, - Option>, // this is the second value in the dot expression - Rc, // used for inner parsing + Option>, // this is the second value in the dot expression + Rc, // used for inner parsing Vec>, ), } #[derive(Debug)] -enum SExpParseResult { // the result of a call to parse an SExp +enum SExpParseResult { + // the result of a call to parse an SExp Resume(SExpParseState), Emit(Rc, SExpParseState), Error(Srcloc, String), @@ -541,18 +543,19 @@ impl SExp { fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) -> SExpParseResult { // switch on our state match current_state { - SExpParseState::Empty => match this_char as char { // we are not currently in a list - '(' => resume(SExpParseState::OpenList(loc)), // move to OpenList state - '\n' => resume(SExpParseState::Empty), // new line, same state + SExpParseState::Empty => match this_char as char { + // we are not currently in a list + '(' => resume(SExpParseState::OpenList(loc)), // move to OpenList state + '\n' => resume(SExpParseState::Empty), // new line, same state ';' => resume(SExpParseState::CommentText(loc, Vec::new())), ')' => error(loc, "Too many close parens"), '"' => resume(SExpParseState::QuotedText(loc, b'"', Vec::new())), // match on " - '\'' => resume(SExpParseState::QuotedText(loc, b'\'', Vec::new())), // match on ' + '\'' => resume(SExpParseState::QuotedText(loc, b'\'', Vec::new())), // match on ' ch => { if char::is_whitespace(ch) { resume(SExpParseState::Empty) } else { - resume(SExpParseState::Bareword(loc, vec![this_char])) // start of a word - could be an atom or a keyword - the compiler will decide + resume(SExpParseState::Bareword(loc, vec![this_char])) // start of a word - could be an atom or a keyword - the compiler will decide } } }, @@ -568,26 +571,35 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - }, // we currently processing a new word SExpParseState::Bareword(srcloc, word_so_far) => { - if char::is_whitespace(this_char as char) { // we've found a space, so it's the end of a word + if char::is_whitespace(this_char as char) { + // we've found a space, so it's the end of a word emit( Rc::new(make_atom(srcloc.clone(), word_so_far.to_vec())), SExpParseState::Empty, ) - } else { // otherwise add letter to word + } else { + // otherwise add letter to word let mut word_copy = word_so_far.to_vec(); word_copy.push(this_char); resume(SExpParseState::Bareword(srcloc.ext(&loc), word_copy)) } } SExpParseState::QuotedText(srcloc, term, t) => { - if this_char == b'\\' { // if we have a character escape then copy the character directly - resume(SExpParseState::QuotedEscaped(srcloc.clone(), *term, t.to_vec())) - } else if this_char == *term { // otherwise check if it's the terminating character (either ' or ") + if this_char == b'\\' { + // if we have a character escape then copy the character directly + resume(SExpParseState::QuotedEscaped( + srcloc.clone(), + *term, + t.to_vec(), + )) + } else if this_char == *term { + // otherwise check if it's the terminating character (either ' or ") emit( Rc::new(SExp::QuotedString(srcloc.ext(&loc), *term, t.to_vec())), // add quoted string to parent list SExpParseState::Empty, ) - } else { // otherwise copy the character + } else { + // otherwise copy the character let mut tcopy = t.to_vec(); tcopy.push(this_char); resume(SExpParseState::QuotedText(srcloc.clone(), *term, tcopy)) @@ -599,38 +611,46 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - tcopy.push(this_char); resume(SExpParseState::QuotedText(srcloc.clone(), *term, tcopy)) } - SExpParseState::OpenList(srcloc) => match this_char as char { // we are beginning a new list - ')' => emit(Rc::new(SExp::Nil(srcloc.ext(&loc))), SExpParseState::Empty), // create a Nil object + SExpParseState::OpenList(srcloc) => match this_char as char { + // we are beginning a new list + ')' => emit(Rc::new(SExp::Nil(srcloc.ext(&loc))), SExpParseState::Empty), // create a Nil object '.' => error(loc, "Dot can't appear directly after begin paren"), - _ => match parse_sexp_step(loc.clone(), &SExpParseState::Empty, this_char) { // fetch result of parsing as if we were in empty state - SExpParseResult::Emit(o, current_state) => resume(SExpParseState::ParsingList( // we found an object, resume processing + _ => match parse_sexp_step(loc.clone(), &SExpParseState::Empty, this_char) { + // fetch result of parsing as if we were in empty state + SExpParseResult::Emit(o, current_state) => resume(SExpParseState::ParsingList( + // we found an object, resume processing srcloc.ext(&loc), - Rc::new(current_state), // captured state from our pretend empty state + Rc::new(current_state), // captured state from our pretend empty state vec![o], )), - SExpParseResult::Resume(current_state) => resume(SExpParseState::ParsingList( // we're still reading the object, resume processing + SExpParseResult::Resume(current_state) => resume(SExpParseState::ParsingList( + // we're still reading the object, resume processing srcloc.ext(&loc), - Rc::new(current_state), // captured state from our pretend empty state + Rc::new(current_state), // captured state from our pretend empty state Vec::new(), )), SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), // propagate error }, }, // We are in the middle of a list currently - SExpParseState::ParsingList(srcloc, pp, list_content) => { // pp is the captured inside-list state we received from OpenList + SExpParseState::ParsingList(srcloc, pp, list_content) => { + // pp is the captured inside-list state we received from OpenList match (this_char as char, pp.borrow()) { - ('.', SExpParseState::Empty) => resume(SExpParseState::TermList( // dot notation showing cons cell + ('.', SExpParseState::Empty) => resume(SExpParseState::TermList( + // dot notation showing cons cell srcloc.ext(&loc), - TermListCommentState::Empty, // we are not inside a comment + TermListCommentState::Empty, // we are not inside a comment None, - Rc::new(SExpParseState::Empty), // nested state is empty + Rc::new(SExpParseState::Empty), // nested state is empty list_content.to_vec(), )), - (')', SExpParseState::Empty) => emit( // close list and emit it upwards as a complete entity + (')', SExpParseState::Empty) => emit( + // close list and emit it upwards as a complete entity Rc::new(enlist(srcloc.clone(), list_content)), SExpParseState::Empty, ), - (')', SExpParseState::Bareword(l, t)) => { // you've reached the end of the word AND the end of the list, close list and emit upwards + (')', SExpParseState::Bareword(l, t)) => { + // you've reached the end of the word AND the end of the list, close list and emit upwards let parsed_atom = make_atom(l.clone(), t.to_vec()); let mut updated_list = list_content.to_vec(); updated_list.push(Rc::new(parsed_atom)); @@ -640,25 +660,37 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - ) } // analyze this character using the mock "inner state" stored in pp - (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) { // - SExpParseResult::Emit(o, current_state) => { // add result of parse_sexp_step to our list + (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) { + // + SExpParseResult::Emit(o, current_state) => { + // add result of parse_sexp_step to our list let mut list_copy = list_content.clone(); list_copy.push(o); - let result = - SExpParseState::ParsingList(srcloc.ext(&loc), Rc::new(current_state), list_copy); + let result = SExpParseState::ParsingList( + srcloc.ext(&loc), + Rc::new(current_state), + list_copy, + ); resume(result) } - SExpParseResult::Resume(rp) => resume(SExpParseState::ParsingList( // we aren't finished reading in our nested state + SExpParseResult::Resume(rp) => resume(SExpParseState::ParsingList( + // we aren't finished reading in our nested state srcloc.ext(&loc), - Rc::new(rp), // store the returned state from parse_sexp_step in pp + Rc::new(rp), // store the returned state from parse_sexp_step in pp list_content.to_vec(), )), - SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), // propagate error upwards + SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), // propagate error upwards }, } } // if we're in a comment then just check for newline or carriage return otherwise stay in InComment state - SExpParseState::TermList(srcloc, TermListCommentState::InComment, parsed, pp, list_content) => { + SExpParseState::TermList( + srcloc, + TermListCommentState::InComment, + parsed, + pp, + list_content, + ) => { let end_comment = if this_char as char == '\n' || this_char as char == '\r' { TermListCommentState::Empty } else { @@ -666,7 +698,7 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - }; resume(SExpParseState::TermList( srcloc.clone(), - end_comment, // store the new commentstate + end_comment, // store the new commentstate parsed.clone(), pp.clone(), list_content.clone(), @@ -680,7 +712,8 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - pp, list_content, ) => { - if this_char.is_ascii_whitespace() { // ignore whitespace after second word + if this_char.is_ascii_whitespace() { + // ignore whitespace after second word resume(SExpParseState::TermList( srcloc.ext(&loc), TermListCommentState::Empty, @@ -688,7 +721,8 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - pp.clone(), list_content.to_vec(), )) - } else if this_char == b')' { // if we see a `)` then we're ready to close this list + } else if this_char == b')' { + // if we see a `)` then we're ready to close this list let mut list_copy = list_content.to_vec(); match list_copy.pop() { Some(v) => { @@ -700,12 +734,13 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - for item in list_copy.iter().rev() { result_list = make_cons(item.clone(), Rc::new(result_list)); } - emit(Rc::new(result_list), SExpParseState::Empty) // emit the resultant list + emit(Rc::new(result_list), SExpParseState::Empty) // emit the resultant list } } None => error(loc, "Dot as first element of list?"), } - } else if this_char == b';' { // entering a comment + } else if this_char == b';' { + // entering a comment resume(SExpParseState::TermList( srcloc.clone(), TermListCommentState::InComment, @@ -713,7 +748,8 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - pp.clone(), list_content.clone(), )) - } else { // we don't want to see any more characters after we've concluded a dot expression + } else { + // we don't want to see any more characters after we've concluded a dot expression error( srcloc.clone(), &format!("unexpected character {}", this_char as char), @@ -721,12 +757,16 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - } } // we are passing a dot-expression (x . y) and not in a comment and don't have an object already discovered - SExpParseState::TermList(srcloc, TermListCommentState::Empty, None, pp, list_content) => { // pp is the inner parsestate inside the dot-expressions - match (this_char as char, pp.borrow()) { //match based on current character and inner state - ('.', SExpParseState::Empty) => { // if we aren't in a word and we see another dot that's illegal + SExpParseState::TermList(srcloc, TermListCommentState::Empty, None, pp, list_content) => { + // pp is the inner parsestate inside the dot-expressions + match (this_char as char, pp.borrow()) { + //match based on current character and inner state + ('.', SExpParseState::Empty) => { + // if we aren't in a word and we see another dot that's illegal error(loc, "Multiple dots in list notation are illegal") } - (')', SExpParseState::Empty) => { // attempt to close the list + (')', SExpParseState::Empty) => { + // attempt to close the list if list_content.len() == 1 { emit(list_content[0].clone(), SExpParseState::Empty) } else { @@ -757,13 +797,15 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - } // if we see anything other than ')' or '.' parse it as if we were in empty state (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) { - SExpParseResult::Emit(parsed_object, _current_state) => resume(SExpParseState::TermList( - loc, - TermListCommentState::Empty, - Some(parsed_object), // assert parsed_object is not None and then store it in parsed_list - pp.clone(), - list_content.clone(), - )), + SExpParseResult::Emit(parsed_object, _current_state) => { + resume(SExpParseState::TermList( + loc, + TermListCommentState::Empty, + Some(parsed_object), // assert parsed_object is not None and then store it in parsed_list + pp.clone(), + list_content.clone(), + )) + } // resume means it didn't finish parsing yet, so store inner state and keep going SExpParseResult::Resume(current_state) => resume(SExpParseState::TermList( srcloc.ext(&loc), @@ -793,7 +835,6 @@ where // Loop through all the characters for this_char in s { - let next_location = start.clone().advance(this_char); // call parse_sexp_step for current character