From d383eb10249b9296dafd5652408856982b835a89 Mon Sep 17 00:00:00 2001 From: Matthew Howard Date: Mon, 7 Aug 2023 14:42:58 +0100 Subject: [PATCH 1/9] add comments --- src/compiler/sexp.rs | 156 ++++++++++++++++++++++++------------------- 1 file changed, 87 insertions(+), 69 deletions(-) diff --git a/src/compiler/sexp.rs b/src/compiler/sexp.rs index 599eea3fe..0811d1026 100644 --- a/src/compiler/sexp.rs +++ b/src/compiler/sexp.rs @@ -203,9 +203,9 @@ enum TermListCommentState { } #[derive(Debug)] -enum SExpParseState { +enum SExpParseState { // The types of state that the Rust pre-forms can take Empty, - CommentText(Srcloc, Vec), + CommentText(Srcloc, Vec), //srcloc contains the file, line, column and length for the captured form Bareword(Srcloc, Vec), QuotedText(Srcloc, u8, Vec), QuotedEscaped(Srcloc, u8, Vec), @@ -221,7 +221,7 @@ enum SExpParseState { } #[derive(Debug)] -enum SExpParseResult { +enum SExpParseResult { // the result of a call to parse an SExp Resume(SExpParseState), Emit(Rc, SExpParseState), Error(Srcloc, String), @@ -318,16 +318,18 @@ pub fn enlist(l: Srcloc, v: Vec>) -> SExp { result } -fn emit(a: Rc, p: SExpParseState) -> SExpParseResult { - SExpParseResult::Emit(a, p) +// this function takes a ParseState and returns an Emit ParseResult which contains the ParseState +fn emit(a: Rc, current_state: SExpParseState) -> SExpParseResult { + SExpParseResult::Emit(a, current_state) } fn error(l: Srcloc, t: &str) -> SExpParseResult { SExpParseResult::Error(l, t.to_string()) } -fn resume(p: SExpParseState) -> SExpParseResult { - SExpParseResult::Resume(p) +// this function takes a ParseState and returns a Resume ParseResult which contains the ParseState +fn resume(current_state: SExpParseState) -> SExpParseResult { + SExpParseResult::Resume(current_state) } fn escape_quote(q: u8, s: &[u8]) -> String { @@ -538,127 +540,132 @@ impl SExp { } } -fn parse_sexp_step(loc: Srcloc, p: &SExpParseState, this_char: u8) -> SExpParseResult { - match p { - SExpParseState::Empty => match this_char as char { - '(' => resume(SExpParseState::OpenList(loc)), - '\n' => resume(SExpParseState::Empty), +fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) -> SExpParseResult { + // switch on our state + match current_state { + SExpParseState::Empty => match this_char as char { // we are not currently in a list + '(' => resume(SExpParseState::OpenList(loc)), // move to OpenList state + '\n' => resume(SExpParseState::Empty), // new line, same state ';' => resume(SExpParseState::CommentText(loc, Vec::new())), ')' => error(loc, "Too many close parens"), - '"' => resume(SExpParseState::QuotedText(loc, b'"', Vec::new())), - '\'' => resume(SExpParseState::QuotedText(loc, b'\'', Vec::new())), + '"' => resume(SExpParseState::QuotedText(loc, b'"', Vec::new())), // match on " + '\'' => resume(SExpParseState::QuotedText(loc, b'\'', Vec::new())), // match on ' ch => { if char::is_whitespace(ch) { resume(SExpParseState::Empty) } else { - resume(SExpParseState::Bareword(loc, vec![this_char])) + resume(SExpParseState::Bareword(loc, vec![this_char])) // start of a word - could be an atom or a keyword - the compiler will decide } } }, - SExpParseState::CommentText(pl, t) => match this_char as char { - '\r' => resume(SExpParseState::CommentText(pl.clone(), t.to_vec())), + // t is a Vec of the previous characters in this comment string + SExpParseState::CommentText(srcloc, t) => match this_char as char { + '\r' => resume(SExpParseState::CommentText(srcloc.clone(), t.to_vec())), '\n' => resume(SExpParseState::Empty), _ => { let mut tcopy = t.to_vec(); tcopy.push(this_char); - resume(SExpParseState::CommentText(pl.ext(&loc), tcopy)) + resume(SExpParseState::CommentText(srcloc.ext(&loc), tcopy)) } }, - SExpParseState::Bareword(pl, a) => { - if char::is_whitespace(this_char as char) { + // we currently processing a new word + SExpParseState::Bareword(srcloc, word_so_far) => { + if char::is_whitespace(this_char as char) { // we've found a space, so it's the end of a word emit( - Rc::new(make_atom(pl.clone(), a.to_vec())), + Rc::new(make_atom(srcloc.clone(), word_so_far.to_vec())), SExpParseState::Empty, ) - } else { - let mut acopy = a.to_vec(); - acopy.push(this_char); - resume(SExpParseState::Bareword(pl.ext(&loc), acopy)) + } else { // otherwise add letter to word + let mut word_copy = word_so_far.to_vec(); + word_copy.push(this_char); + resume(SExpParseState::Bareword(srcloc.ext(&loc), word_copy)) } } - SExpParseState::QuotedText(pl, term, t) => { - if this_char == b'\\' { - resume(SExpParseState::QuotedEscaped(pl.clone(), *term, t.to_vec())) - } else if this_char == *term { + SExpParseState::QuotedText(srcloc, term, t) => { + if this_char == b'\\' { // if we have a character escape then copy the character directly + resume(SExpParseState::QuotedEscaped(srcloc.clone(), *term, t.to_vec())) + } else if this_char == *term { // otherwise check if it's the terminating character (either ' or ") emit( - Rc::new(SExp::QuotedString(pl.ext(&loc), *term, t.to_vec())), + Rc::new(SExp::QuotedString(srcloc.ext(&loc), *term, t.to_vec())), // add quoted string to parent list SExpParseState::Empty, ) - } else { + } else { // otherwise copy the character let mut tcopy = t.to_vec(); tcopy.push(this_char); - resume(SExpParseState::QuotedText(pl.clone(), *term, tcopy)) + resume(SExpParseState::QuotedText(srcloc.clone(), *term, tcopy)) } } - SExpParseState::QuotedEscaped(pl, term, t) => { + // copy the character the quoted text because we have put the escape character first + SExpParseState::QuotedEscaped(srcloc, term, t) => { let mut tcopy = t.to_vec(); tcopy.push(this_char); - resume(SExpParseState::QuotedText(pl.clone(), *term, tcopy)) + resume(SExpParseState::QuotedText(srcloc.clone(), *term, tcopy)) } - SExpParseState::OpenList(pl) => match this_char as char { - ')' => emit(Rc::new(SExp::Nil(pl.ext(&loc))), SExpParseState::Empty), + SExpParseState::OpenList(srcloc) => match this_char as char { // we are beginning a new list + ')' => emit(Rc::new(SExp::Nil(srcloc.ext(&loc))), SExpParseState::Empty), // create a Nil object '.' => error(loc, "Dot can't appear directly after begin paren"), - _ => match parse_sexp_step(loc.clone(), &SExpParseState::Empty, this_char) { - SExpParseResult::Emit(o, p) => resume(SExpParseState::ParsingList( - pl.ext(&loc), - Rc::new(p), + _ => match parse_sexp_step(loc.clone(), &SExpParseState::Empty, this_char) { // fetch result of parsing as if we were in empty state + SExpParseResult::Emit(o, current_state) => resume(SExpParseState::ParsingList( // we found an object, resume processing + srcloc.ext(&loc), + Rc::new(current_state), // captured state from our pretend empty state vec![o], )), - SExpParseResult::Resume(p) => resume(SExpParseState::ParsingList( - pl.ext(&loc), - Rc::new(p), + SExpParseResult::Resume(current_state) => resume(SExpParseState::ParsingList( // we're still reading the object, resume processing + srcloc.ext(&loc), + Rc::new(current_state), // captured state from our pretend empty state Vec::new(), )), - SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), + SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), // propagate error }, }, - SExpParseState::ParsingList(pl, pp, list_content) => { + // We are in the middle of a list currently + SExpParseState::ParsingList(srcloc, pp, list_content) => { // pp is the captured inside-list state we received from OpenList match (this_char as char, pp.borrow()) { - ('.', SExpParseState::Empty) => resume(SExpParseState::TermList( - pl.ext(&loc), + ('.', SExpParseState::Empty) => resume(SExpParseState::TermList( // dot notation showing cons cell + srcloc.ext(&loc), TermListCommentState::Empty, None, Rc::new(SExpParseState::Empty), list_content.to_vec(), )), - (')', SExpParseState::Empty) => emit( - Rc::new(enlist(pl.ext(&loc), list_content.to_vec())), + (')', SExpParseState::Empty) => emit( // close list and emit it upwards as a complete entity + Rc::new(enlist(srcloc.ext(&loc), list_content.to_vec())), SExpParseState::Empty, ), - (')', SExpParseState::Bareword(l, t)) => { + (')', SExpParseState::Bareword(l, t)) => { // you've reached the end of the word AND the end of the list, close list and emit upwards let parsed_atom = make_atom(l.clone(), t.to_vec()); let mut updated_list = list_content.to_vec(); updated_list.push(Rc::new(parsed_atom)); emit( - Rc::new(enlist(pl.ext(&loc), updated_list)), + Rc::new(enlist(srcloc.ext(&loc), updated_list)), SExpParseState::Empty, ) } - (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) { - SExpParseResult::Emit(o, p) => { + (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) { // + SExpParseResult::Emit(o, current_state) => { // add result of nested call to our list let mut list_copy = list_content.clone(); list_copy.push(o); let result = - SExpParseState::ParsingList(pl.ext(&loc), Rc::new(p), list_copy); + SExpParseState::ParsingList(srcloc.ext(&loc), Rc::new(current_state), list_copy); resume(result) } - SExpParseResult::Resume(rp) => resume(SExpParseState::ParsingList( - pl.ext(&loc), + SExpParseResult::Resume(rp) => resume(SExpParseState::ParsingList( // + srcloc.ext(&loc), Rc::new(rp), list_content.to_vec(), )), - SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), + SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), // propagate error upwards }, } } - SExpParseState::TermList(pl, TermListCommentState::InComment, parsed, pp, list_content) => { + SExpParseState::TermList(srcloc, TermListCommentState::InComment, parsed, pp, list_content) => { // pp is the captured inside-list state we received from OpenList let end_comment = if this_char as char == '\n' || this_char as char == '\r' { TermListCommentState::Empty } else { TermListCommentState::InComment }; resume(SExpParseState::TermList( - pl.clone(), + srcloc.clone(), end_comment, parsed.clone(), pp.clone(), @@ -666,7 +673,7 @@ fn parse_sexp_step(loc: Srcloc, p: &SExpParseState, this_char: u8) -> SExpParseR )) } SExpParseState::TermList( - pl, + srcloc, TermListCommentState::Empty, Some(parsed), pp, @@ -674,7 +681,7 @@ fn parse_sexp_step(loc: Srcloc, p: &SExpParseState, this_char: u8) -> SExpParseR ) => { if this_char.is_ascii_whitespace() { resume(SExpParseState::TermList( - pl.ext(&loc), + srcloc.ext(&loc), TermListCommentState::Empty, Some(parsed.clone()), pp.clone(), @@ -699,7 +706,7 @@ fn parse_sexp_step(loc: Srcloc, p: &SExpParseState, this_char: u8) -> SExpParseR } } else if this_char == b';' { resume(SExpParseState::TermList( - pl.clone(), + srcloc.clone(), TermListCommentState::InComment, Some(parsed.clone()), pp.clone(), @@ -707,12 +714,12 @@ fn parse_sexp_step(loc: Srcloc, p: &SExpParseState, this_char: u8) -> SExpParseR )) } else { error( - pl.clone(), + srcloc.clone(), &format!("unexpected character {}", this_char as char), ) } } - SExpParseState::TermList(pl, TermListCommentState::Empty, None, pp, list_content) => { + SExpParseState::TermList(srcloc, TermListCommentState::Empty, None, pp, list_content) => { match (this_char as char, pp.borrow()) { ('.', SExpParseState::Empty) => { error(loc, "Multiple dots in list notation are illegal") @@ -722,7 +729,7 @@ fn parse_sexp_step(loc: Srcloc, p: &SExpParseState, this_char: u8) -> SExpParseR emit(list_content[0].clone(), SExpParseState::Empty) } else { emit( - Rc::new(enlist(pl.ext(&loc), list_content.to_vec())), + Rc::new(enlist(srcloc.ext(&loc), list_content.to_vec())), SExpParseState::Empty, ) } @@ -747,18 +754,18 @@ fn parse_sexp_step(loc: Srcloc, p: &SExpParseState, this_char: u8) -> SExpParseR } } (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) { - SExpParseResult::Emit(o, _p) => resume(SExpParseState::TermList( + SExpParseResult::Emit(o, _current_state) => resume(SExpParseState::TermList( loc, TermListCommentState::Empty, Some(o), pp.clone(), list_content.clone(), )), - SExpParseResult::Resume(p) => resume(SExpParseState::TermList( - pl.ext(&loc), + SExpParseResult::Resume(current_state) => resume(SExpParseState::TermList( + srcloc.ext(&loc), TermListCommentState::Empty, None, - Rc::new(p), + Rc::new(current_state), list_content.to_vec(), )), SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), @@ -776,19 +783,28 @@ fn parse_sexp_inner( where I: Iterator, { + // we support compiling multiple things at once, keep these in a Vec + // at the moment this will almost certainly only return 1 thing let mut res = Vec::new(); + // Loop through all the characters for this_char in s { + let next_location = start.clone().advance(this_char); + // call parse_sexp_step for current character + // it will return a ParseResult which contains the new ParseState match parse_sexp_step(start.clone(), parse_state.borrow(), this_char) { + // catch error and propagate it upwards SExpParseResult::Error(l, e) => { return Err((l, e)); } + // Keep parsing SExpParseResult::Resume(new_parse_state) => { start = next_location; parse_state = new_parse_state; } + // End of list (top level compile object), but not necessarily end of file SExpParseResult::Emit(o, new_parse_state) => { start = next_location; parse_state = new_parse_state; @@ -797,6 +813,7 @@ where } } + // depending on the state when we finished return Ok or Err enums match parse_state { SExpParseState::Empty => Ok(res), SExpParseState::Bareword(l, t) => Ok(vec![Rc::new(make_atom(l, t))]), @@ -813,6 +830,7 @@ where /// /// Entrypoint for parsing chialisp input. +/// Called from compiler.rs /// /// This produces Rc, where SExp is described above. /// From 8c2eaea8245e7e8fdcf5e79f4cf51fb55edac452 Mon Sep 17 00:00:00 2001 From: Matthew Howard Date: Mon, 7 Aug 2023 15:36:03 +0100 Subject: [PATCH 2/9] fix merge --- src/compiler/sexp.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/compiler/sexp.rs b/src/compiler/sexp.rs index 9cb96ad23..5b5c4fd6c 100644 --- a/src/compiler/sexp.rs +++ b/src/compiler/sexp.rs @@ -636,6 +636,7 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - updated_list.push(Rc::new(parsed_atom)); emit( Rc::new(enlist(srcloc.clone(), &updated_list)), + SExpParseState::Empty, ) } (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) { // From a81957ee95df74523799b7d4e679e38bd78d8d41 Mon Sep 17 00:00:00 2001 From: arty Date: Mon, 7 Aug 2023 17:26:13 -0700 Subject: [PATCH 3/9] Move dialect stuff to src/compiler/dialect.rs --- src/classic/clvm_tools/clvmc.rs | 78 +--------------------- src/classic/clvm_tools/cmds.rs | 3 +- src/compiler/compiler.rs | 3 +- src/compiler/comptypes.rs | 15 +---- src/compiler/dialect.rs | 113 ++++++++++++++++++++++++++++++++ src/compiler/mod.rs | 3 +- src/tests/classic/stage_2.rs | 3 +- 7 files changed, 125 insertions(+), 93 deletions(-) create mode 100644 src/compiler/dialect.rs diff --git a/src/classic/clvm_tools/clvmc.rs b/src/classic/clvm_tools/clvmc.rs index 3c20864aa..4a1beb076 100644 --- a/src/classic/clvm_tools/clvmc.rs +++ b/src/classic/clvm_tools/clvmc.rs @@ -6,12 +6,11 @@ use std::rc::Rc; use tempfile::NamedTempFile; -use clvm_rs::allocator::{Allocator, NodePtr, SExp}; +use clvm_rs::allocator::{Allocator, NodePtr}; use clvm_rs::reduction::EvalErr; use crate::classic::clvm::__type_compatibility__::Stream; use crate::classic::clvm::serialize::sexp_to_stream; -use crate::classic::clvm::sexp::proper_list; use crate::classic::clvm_tools::binutils::{assemble_from_ir, disassemble}; use crate::classic::clvm_tools::ir::reader::read_ir; use crate::classic::clvm_tools::stages::run; @@ -23,31 +22,10 @@ use crate::classic::platform::distutils::dep_util::newer; use crate::compiler::clvm::convert_to_clvm_rs; use crate::compiler::compiler::compile_file; use crate::compiler::compiler::{run_optimizer, DefaultCompilerOpts}; -use crate::compiler::comptypes::{AcceptedDialect, CompileErr, CompilerOpts}; +use crate::compiler::comptypes::{CompileErr, CompilerOpts}; +use crate::compiler::dialect::detect_modern; use crate::compiler::runtypes::RunFailure; -fn include_dialect( - allocator: &Allocator, - dialects: &HashMap, i32>, - e: &[NodePtr], -) -> Option { - // Propogated names from let capture to labeled nodes. - let include_keyword_node = e[0]; - let name_node = e[1]; - if let (SExp::Atom(), SExp::Atom()) = ( - allocator.sexp(include_keyword_node), - allocator.sexp(name_node), - ) { - if allocator.atom(include_keyword_node) == "include".as_bytes().to_vec() { - if let Some(dialect) = dialects.get(allocator.atom(name_node)) { - return Some(*dialect); - } - } - } - - None -} - pub fn write_sym_output( compiled_lookup: &HashMap, path: &str, @@ -60,56 +38,6 @@ pub fn write_sym_output( .map(|_| ()) } -// Now return more parameters about the "modern" dialect, including in the future, -// strictness. This will allow us to support the transition to modern macros which -// in turn allow us to turn on strictness in variable naming. Often multiple moves -// are needed to get from one point to another and there's a tension between -// unitary changes and smaller PRs which do fewer things by themselves. This is -// part of a broader narrative, which many requested that sets us on the path of -// being able to include more information in the dialect result. -pub fn detect_modern(allocator: &mut Allocator, sexp: NodePtr) -> AcceptedDialect { - let mut dialects = HashMap::new(); - dialects.insert("*standard-cl-21*".as_bytes().to_vec(), 21); - dialects.insert("*standard-cl-22*".as_bytes().to_vec(), 22); - - // Start with an empty definition of the dialect (classic). - let mut result = AcceptedDialect::default(); - - // For each form in the source file, try to find a sigil at the top level of - // the list it forms to find a sigil. - if let Some(l) = proper_list(allocator, sexp, true) { - for elt in l.iter() { - let detect_modern_result = detect_modern(allocator, *elt); - if detect_modern_result.stepping.is_some() { - // We found a dialect directive. - result = detect_modern_result; - break; - } - - match proper_list(allocator, *elt, true) { - None => { - continue; - } - - Some(e) => { - if e.len() != 2 { - continue; - } - - if let Some(dialect) = include_dialect(allocator, &dialects, &e) { - // We found a sigil. - result.stepping = Some(dialect); - break; - } - } - } - } - } - - // Return whatever we found or the default. - result -} - pub fn compile_clvm_text( allocator: &mut Allocator, opts: Rc, diff --git a/src/classic/clvm_tools/cmds.rs b/src/classic/clvm_tools/cmds.rs index d97ae8c6d..52d953d40 100644 --- a/src/classic/clvm_tools/cmds.rs +++ b/src/classic/clvm_tools/cmds.rs @@ -28,7 +28,7 @@ use crate::classic::clvm::serialize::{sexp_from_stream, sexp_to_stream, SimpleCr use crate::classic::clvm::sexp::{enlist, proper_list, sexp_as_bin}; use crate::classic::clvm::OPERATORS_LATEST_VERSION; use crate::classic::clvm_tools::binutils::{assemble_from_ir, disassemble, disassemble_with_kw}; -use crate::classic::clvm_tools::clvmc::{detect_modern, write_sym_output}; +use crate::classic::clvm_tools::clvmc::write_sym_output; use crate::classic::clvm_tools::debug::check_unused; use crate::classic::clvm_tools::debug::{ program_hash_from_program_env_cons, start_log_after, trace_pre_eval, trace_to_table, @@ -42,6 +42,7 @@ use crate::classic::clvm_tools::stages::stage_0::{ }; use crate::classic::clvm_tools::stages::stage_2::operators::run_program_for_search_paths; use crate::classic::platform::PathJoin; +use crate::compiler::dialect::detect_modern; use crate::classic::platform::argparse::{ Argument, ArgumentParser, ArgumentValue, ArgumentValueConv, IntConversion, NArgsSpec, diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 3232a7ad3..91f564f91 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -14,8 +14,9 @@ use crate::classic::clvm_tools::stages::stage_2::optimize::optimize_sexp; use crate::compiler::clvm::{convert_from_clvm_rs, convert_to_clvm_rs, sha256tree}; use crate::compiler::codegen::{codegen, hoist_body_let_binding, process_helper_let_bindings}; use crate::compiler::comptypes::{ - AcceptedDialect, CompileErr, CompileForm, CompilerOpts, DefunData, HelperForm, PrimaryCodegen, + CompileErr, CompileForm, CompilerOpts, DefunData, HelperForm, PrimaryCodegen, }; +use crate::compiler::dialect::AcceptedDialect; use crate::compiler::evaluate::{build_reflex_captures, Evaluator, EVAL_STACK_LIMIT}; use crate::compiler::frontend::frontend; use crate::compiler::prims; diff --git a/src/compiler/comptypes.rs b/src/compiler/comptypes.rs index b02013598..f38ff8321 100644 --- a/src/compiler/comptypes.rs +++ b/src/compiler/comptypes.rs @@ -10,6 +10,7 @@ use crate::classic::clvm::__type_compatibility__::{Bytes, BytesFromType}; use crate::classic::clvm_tools::stages::stage_0::TRunProgram; use crate::compiler::clvm::sha256tree; +use crate::compiler::dialect::AcceptedDialect; use crate::compiler::sexp::{decode_string, SExp}; use crate::compiler::srcloc::Srcloc; @@ -30,20 +31,6 @@ impl From<(Srcloc, String)> for CompileErr { #[derive(Clone, Debug)] pub struct CompiledCode(pub Srcloc, pub Rc); -/// Specifying how the language is spoken. -/// -/// This object will eventually contain more information about the specifics of -/// the requested dialect. Initially, this includes a 'strict' setting in the -/// modern macros PR which allows us to begin with the *strict-cl-21* sigil to -/// include a more modern macro system and the ability to turn on strict variable -/// name use. This is a feature that's been widely requested and a first step -/// toward it is to make the object that specifies how chialisp is compiled be -/// able to carry more information. -#[derive(Clone, Debug, Default)] -pub struct AcceptedDialect { - pub stepping: Option, -} - /// A description of an inlined function for use during inline expansion. /// This is used only by PrimaryCodegen. #[derive(Clone, Debug)] diff --git a/src/compiler/dialect.rs b/src/compiler/dialect.rs new file mode 100644 index 000000000..4e1093da9 --- /dev/null +++ b/src/compiler/dialect.rs @@ -0,0 +1,113 @@ +use std::collections::HashMap; + +use clvmr::allocator::{Allocator, NodePtr, SExp}; + +use crate::classic::clvm::sexp::proper_list; + +use crate::compiler::sexp::decode_string; + +/// Specifying how the language is spoken. +#[derive(Clone, Debug, Default)] +pub struct AcceptedDialect { + pub stepping: Option, +} + +/// A package containing the content we should insert when a dialect include is +/// used, plus the compilation flags. +#[derive(Clone, Debug)] +pub struct DialectDescription { + pub accepted: AcceptedDialect, + pub content: String, +} + +lazy_static! { + pub static ref KNOWN_DIALECTS: HashMap = { + let mut dialects: HashMap = HashMap::new(); + let dialect_list = [ + ( + "*standard-cl-21*", + DialectDescription { + accepted: AcceptedDialect { + stepping: Some(21), + ..Default::default() + }, + content: indoc! {"( + (defconstant *chialisp-version* 21) + )"} + .to_string(), + }, + ), + ( + "*standard-cl-22*", + DialectDescription { + accepted: AcceptedDialect { stepping: Some(22) }, + content: indoc! {"( + (defconstant *chialisp-version* 22) + )"} + .to_string(), + }, + ), + ]; + for (n, v) in dialect_list.iter() { + dialects.insert(n.to_string(), v.clone()); + } + dialects + }; +} + +fn include_dialect(allocator: &Allocator, e: &[NodePtr]) -> Option { + let include_keyword_sexp = e[0]; + let name_sexp = e[1]; + if let (SExp::Atom(), SExp::Atom()) = ( + allocator.sexp(include_keyword_sexp), + allocator.sexp(name_sexp), + ) { + if allocator.atom(include_keyword_sexp) == "include".as_bytes().to_vec() { + if let Some(dialect) = KNOWN_DIALECTS.get(&decode_string(allocator.atom(name_sexp))) { + return Some(dialect.accepted.clone()); + } + } + } + + None +} + +// Now return more parameters about the "modern" dialect, including in the future, +// strictness. This will allow us to support the transition to modern macros which +// in turn allow us to turn on strictness in variable naming. Often multiple moves +// are needed to get from one point to another and there's a tension between +// unitary changes and smaller PRs which do fewer things by themselves. This is +// part of a broader narrative, which many requested that sets us on the path of +// being able to include more information in the dialect result. +pub fn detect_modern(allocator: &mut Allocator, sexp: NodePtr) -> AcceptedDialect { + let mut result = AcceptedDialect::default(); + + if let Some(l) = proper_list(allocator, sexp, true) { + for elt in l.iter() { + let detect_modern_result = detect_modern(allocator, *elt); + if detect_modern_result.stepping.is_some() { + result = detect_modern_result; + break; + } + + match proper_list(allocator, *elt, true) { + None => { + continue; + } + + Some(e) => { + if e.len() != 2 { + continue; + } + + if let Some(dialect) = include_dialect(allocator, &e) { + result = dialect; + break; + } + } + } + } + } + + result +} diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs index 32e12ad78..af2c6ff15 100644 --- a/src/compiler/mod.rs +++ b/src/compiler/mod.rs @@ -13,8 +13,9 @@ pub mod compiler; /// - CompileForm - The type of finished (mod ) forms before code generation. /// - HelperForm - The type of declarations like macros, constants and functions. pub mod comptypes; -/// pub mod debug; +/// Utilities for chialisp dialect choice +pub mod dialect; pub mod evaluate; pub mod frontend; pub mod gensym; diff --git a/src/tests/classic/stage_2.rs b/src/tests/classic/stage_2.rs index 3ad0b61b0..8bf43064f 100644 --- a/src/tests/classic/stage_2.rs +++ b/src/tests/classic/stage_2.rs @@ -17,7 +17,8 @@ use crate::classic::clvm_tools::stages::stage_2::helpers::{brun, evaluate, quote use crate::classic::clvm_tools::stages::stage_2::operators::run_program_for_search_paths; use crate::classic::clvm_tools::stages::stage_2::reader::{process_embed_file, read_file}; -use crate::compiler::comptypes::{AcceptedDialect, CompileErr, CompilerOpts, PrimaryCodegen}; +use crate::compiler::comptypes::{CompileErr, CompilerOpts, PrimaryCodegen}; +use crate::compiler::dialect::AcceptedDialect; use crate::compiler::sexp::{decode_string, SExp}; use crate::compiler::srcloc::Srcloc; From ea68090c16257821a743dcd42862a94c89a1a66a Mon Sep 17 00:00:00 2001 From: arty Date: Mon, 7 Aug 2023 17:31:59 -0700 Subject: [PATCH 4/9] Default -> AcceptedDialect --- src/compiler/dialect.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/dialect.rs b/src/compiler/dialect.rs index 4e1093da9..26f0a3629 100644 --- a/src/compiler/dialect.rs +++ b/src/compiler/dialect.rs @@ -29,7 +29,7 @@ lazy_static! { DialectDescription { accepted: AcceptedDialect { stepping: Some(21), - ..Default::default() + ..AcceptedDialect::default() }, content: indoc! {"( (defconstant *chialisp-version* 21) From 296cd2a7ba3c39ba4c14be243346b35f8ebdddaf Mon Sep 17 00:00:00 2001 From: arty Date: Mon, 7 Aug 2023 17:34:00 -0700 Subject: [PATCH 5/9] Remove default --- src/compiler/dialect.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/compiler/dialect.rs b/src/compiler/dialect.rs index 26f0a3629..a01ac133d 100644 --- a/src/compiler/dialect.rs +++ b/src/compiler/dialect.rs @@ -29,7 +29,6 @@ lazy_static! { DialectDescription { accepted: AcceptedDialect { stepping: Some(21), - ..AcceptedDialect::default() }, content: indoc! {"( (defconstant *chialisp-version* 21) From f34cafd230ed34439c03c4e9e283779082831dc9 Mon Sep 17 00:00:00 2001 From: arty Date: Mon, 7 Aug 2023 17:40:47 -0700 Subject: [PATCH 6/9] fmt --- src/compiler/dialect.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/compiler/dialect.rs b/src/compiler/dialect.rs index a01ac133d..58b561993 100644 --- a/src/compiler/dialect.rs +++ b/src/compiler/dialect.rs @@ -27,9 +27,7 @@ lazy_static! { ( "*standard-cl-21*", DialectDescription { - accepted: AcceptedDialect { - stepping: Some(21), - }, + accepted: AcceptedDialect { stepping: Some(21) }, content: indoc! {"( (defconstant *chialisp-version* 21) )"} From 804947f84da4be555387b298c784d0e87f15521f Mon Sep 17 00:00:00 2001 From: Matthew Howard Date: Tue, 8 Aug 2023 17:04:08 +0100 Subject: [PATCH 7/9] more comments --- src/compiler/sexp.rs | 46 +++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/src/compiler/sexp.rs b/src/compiler/sexp.rs index 5b5c4fd6c..1b1598560 100644 --- a/src/compiler/sexp.rs +++ b/src/compiler/sexp.rs @@ -213,7 +213,7 @@ enum SExpParseState { // The types of state that the Rust pre-forms can take Srcloc, TermListCommentState, Option>, - Rc, + Rc, // used for inner parsing Vec>, ), } @@ -621,9 +621,9 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - match (this_char as char, pp.borrow()) { ('.', SExpParseState::Empty) => resume(SExpParseState::TermList( // dot notation showing cons cell srcloc.ext(&loc), - TermListCommentState::Empty, + TermListCommentState::Empty, // we are not inside a comment None, - Rc::new(SExpParseState::Empty), + Rc::new(SExpParseState::Empty), // nested state is empty list_content.to_vec(), )), (')', SExpParseState::Empty) => emit( // close list and emit it upwards as a complete entity @@ -639,24 +639,26 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - SExpParseState::Empty, ) } + // analyze this character using the mock "inner state" stored in pp (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) { // - SExpParseResult::Emit(o, current_state) => { // add result of nested call to our list + SExpParseResult::Emit(o, current_state) => { // add result of parse_sexp_step to our list let mut list_copy = list_content.clone(); list_copy.push(o); let result = SExpParseState::ParsingList(srcloc.ext(&loc), Rc::new(current_state), list_copy); resume(result) } - SExpParseResult::Resume(rp) => resume(SExpParseState::ParsingList( // + SExpParseResult::Resume(rp) => resume(SExpParseState::ParsingList( // we aren't finished reading in our nested state srcloc.ext(&loc), - Rc::new(rp), + Rc::new(rp), // store the returned state from parse_sexp_step in pp list_content.to_vec(), )), SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), // propagate error upwards }, } } - SExpParseState::TermList(srcloc, TermListCommentState::InComment, parsed, pp, list_content) => { // pp is the captured inside-list state we received from OpenList + // if we're in a comment then just check for newline or carriage return otherwise stay in InComment state + SExpParseState::TermList(srcloc, TermListCommentState::InComment, parsed, pp, list_content) => { let end_comment = if this_char as char == '\n' || this_char as char == '\r' { TermListCommentState::Empty } else { @@ -664,12 +666,13 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - }; resume(SExpParseState::TermList( srcloc.clone(), - end_comment, + end_comment, // store the new commentstate parsed.clone(), pp.clone(), list_content.clone(), )) } + // if we're not in a comment and have already found a parsed second word for this dot expression SExpParseState::TermList( srcloc, TermListCommentState::Empty, @@ -677,7 +680,7 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - pp, list_content, ) => { - if this_char.is_ascii_whitespace() { + if this_char.is_ascii_whitespace() { // ignore whitespace after second word resume(SExpParseState::TermList( srcloc.ext(&loc), TermListCommentState::Empty, @@ -685,7 +688,7 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - pp.clone(), list_content.to_vec(), )) - } else if this_char == b')' { + } else if this_char == b')' { // if we see a `)` then we're ready to close this list let mut list_copy = list_content.to_vec(); match list_copy.pop() { Some(v) => { @@ -697,12 +700,12 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - for item in list_copy.iter().rev() { result_list = make_cons(item.clone(), Rc::new(result_list)); } - emit(Rc::new(result_list), SExpParseState::Empty) + emit(Rc::new(result_list), SExpParseState::Empty) // emit the resultant list } } None => error(loc, "Dot as first element of list?"), } - } else if this_char == b';' { + } else if this_char == b';' { // entering a comment resume(SExpParseState::TermList( srcloc.clone(), TermListCommentState::InComment, @@ -710,19 +713,20 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - pp.clone(), list_content.clone(), )) - } else { + } else { // we don't want to see any more characters after we've concluded a dot expression error( srcloc.clone(), &format!("unexpected character {}", this_char as char), ) } } - SExpParseState::TermList(srcloc, TermListCommentState::Empty, None, pp, list_content) => { - match (this_char as char, pp.borrow()) { - ('.', SExpParseState::Empty) => { + // we are passing a dot-expression (x . y) and not in a comment and don't have an object already discovered + SExpParseState::TermList(srcloc, TermListCommentState::Empty, None, pp, list_content) => { // pp is the inner parsestate inside the dot-expressions + match (this_char as char, pp.borrow()) { //match based on current character and inner state + ('.', SExpParseState::Empty) => { // if we aren't in a word and we see another dot that's illegal error(loc, "Multiple dots in list notation are illegal") } - (')', SExpParseState::Empty) => { + (')', SExpParseState::Empty) => { // attempt to close the list if list_content.len() == 1 { emit(list_content[0].clone(), SExpParseState::Empty) } else { @@ -751,19 +755,21 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - None => error(loc, "Dot as first element of list?"), } } + // if we see anything other than ')' or '.' parse it as if we were in empty state (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) { - SExpParseResult::Emit(o, _current_state) => resume(SExpParseState::TermList( + SExpParseResult::Emit(parsed_object, _current_state) => resume(SExpParseState::TermList( loc, TermListCommentState::Empty, - Some(o), + Some(parsed_object), // assert parsed_object is not None and then store it in parsed_list pp.clone(), list_content.clone(), )), + // resume means it didn't finish parsing yet, so store inner state and keep going SExpParseResult::Resume(current_state) => resume(SExpParseState::TermList( srcloc.ext(&loc), TermListCommentState::Empty, None, - Rc::new(current_state), + Rc::new(current_state), // store our partial inner parsestate in pp list_content.to_vec(), )), SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), From 8bb994877b31101acb54ad5331fb59b2e1618cbe Mon Sep 17 00:00:00 2001 From: Matthew Howard Date: Tue, 8 Aug 2023 17:07:03 +0100 Subject: [PATCH 8/9] explain an enum param --- src/compiler/sexp.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/sexp.rs b/src/compiler/sexp.rs index 1b1598560..e89dbb6e4 100644 --- a/src/compiler/sexp.rs +++ b/src/compiler/sexp.rs @@ -212,7 +212,7 @@ enum SExpParseState { // The types of state that the Rust pre-forms can take TermList( Srcloc, TermListCommentState, - Option>, + Option>, // this is the second value in the dot expression Rc, // used for inner parsing Vec>, ), From 59641cc810b569644c3f4cb20b0daed1da46af77 Mon Sep 17 00:00:00 2001 From: Matthew Howard Date: Tue, 8 Aug 2023 17:47:22 +0100 Subject: [PATCH 9/9] cargo fmt --- src/compiler/sexp.rs | 151 +++++++++++++++++++++++++++---------------- 1 file changed, 96 insertions(+), 55 deletions(-) diff --git a/src/compiler/sexp.rs b/src/compiler/sexp.rs index e89dbb6e4..9759552ae 100644 --- a/src/compiler/sexp.rs +++ b/src/compiler/sexp.rs @@ -201,9 +201,10 @@ enum TermListCommentState { } #[derive(Debug)] -enum SExpParseState { // The types of state that the Rust pre-forms can take +enum SExpParseState { + // The types of state that the Rust pre-forms can take Empty, - CommentText(Srcloc, Vec), //srcloc contains the file, line, column and length for the captured form + CommentText(Srcloc, Vec), //srcloc contains the file, line, column and length for the captured form Bareword(Srcloc, Vec), QuotedText(Srcloc, u8, Vec), QuotedEscaped(Srcloc, u8, Vec), @@ -212,14 +213,15 @@ enum SExpParseState { // The types of state that the Rust pre-forms can take TermList( Srcloc, TermListCommentState, - Option>, // this is the second value in the dot expression - Rc, // used for inner parsing + Option>, // this is the second value in the dot expression + Rc, // used for inner parsing Vec>, ), } #[derive(Debug)] -enum SExpParseResult { // the result of a call to parse an SExp +enum SExpParseResult { + // the result of a call to parse an SExp Resume(SExpParseState), Emit(Rc, SExpParseState), Error(Srcloc, String), @@ -541,18 +543,19 @@ impl SExp { fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) -> SExpParseResult { // switch on our state match current_state { - SExpParseState::Empty => match this_char as char { // we are not currently in a list - '(' => resume(SExpParseState::OpenList(loc)), // move to OpenList state - '\n' => resume(SExpParseState::Empty), // new line, same state + SExpParseState::Empty => match this_char as char { + // we are not currently in a list + '(' => resume(SExpParseState::OpenList(loc)), // move to OpenList state + '\n' => resume(SExpParseState::Empty), // new line, same state ';' => resume(SExpParseState::CommentText(loc, Vec::new())), ')' => error(loc, "Too many close parens"), '"' => resume(SExpParseState::QuotedText(loc, b'"', Vec::new())), // match on " - '\'' => resume(SExpParseState::QuotedText(loc, b'\'', Vec::new())), // match on ' + '\'' => resume(SExpParseState::QuotedText(loc, b'\'', Vec::new())), // match on ' ch => { if char::is_whitespace(ch) { resume(SExpParseState::Empty) } else { - resume(SExpParseState::Bareword(loc, vec![this_char])) // start of a word - could be an atom or a keyword - the compiler will decide + resume(SExpParseState::Bareword(loc, vec![this_char])) // start of a word - could be an atom or a keyword - the compiler will decide } } }, @@ -568,26 +571,35 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - }, // we currently processing a new word SExpParseState::Bareword(srcloc, word_so_far) => { - if char::is_whitespace(this_char as char) { // we've found a space, so it's the end of a word + if char::is_whitespace(this_char as char) { + // we've found a space, so it's the end of a word emit( Rc::new(make_atom(srcloc.clone(), word_so_far.to_vec())), SExpParseState::Empty, ) - } else { // otherwise add letter to word + } else { + // otherwise add letter to word let mut word_copy = word_so_far.to_vec(); word_copy.push(this_char); resume(SExpParseState::Bareword(srcloc.ext(&loc), word_copy)) } } SExpParseState::QuotedText(srcloc, term, t) => { - if this_char == b'\\' { // if we have a character escape then copy the character directly - resume(SExpParseState::QuotedEscaped(srcloc.clone(), *term, t.to_vec())) - } else if this_char == *term { // otherwise check if it's the terminating character (either ' or ") + if this_char == b'\\' { + // if we have a character escape then copy the character directly + resume(SExpParseState::QuotedEscaped( + srcloc.clone(), + *term, + t.to_vec(), + )) + } else if this_char == *term { + // otherwise check if it's the terminating character (either ' or ") emit( Rc::new(SExp::QuotedString(srcloc.ext(&loc), *term, t.to_vec())), // add quoted string to parent list SExpParseState::Empty, ) - } else { // otherwise copy the character + } else { + // otherwise copy the character let mut tcopy = t.to_vec(); tcopy.push(this_char); resume(SExpParseState::QuotedText(srcloc.clone(), *term, tcopy)) @@ -599,38 +611,46 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - tcopy.push(this_char); resume(SExpParseState::QuotedText(srcloc.clone(), *term, tcopy)) } - SExpParseState::OpenList(srcloc) => match this_char as char { // we are beginning a new list - ')' => emit(Rc::new(SExp::Nil(srcloc.ext(&loc))), SExpParseState::Empty), // create a Nil object + SExpParseState::OpenList(srcloc) => match this_char as char { + // we are beginning a new list + ')' => emit(Rc::new(SExp::Nil(srcloc.ext(&loc))), SExpParseState::Empty), // create a Nil object '.' => error(loc, "Dot can't appear directly after begin paren"), - _ => match parse_sexp_step(loc.clone(), &SExpParseState::Empty, this_char) { // fetch result of parsing as if we were in empty state - SExpParseResult::Emit(o, current_state) => resume(SExpParseState::ParsingList( // we found an object, resume processing + _ => match parse_sexp_step(loc.clone(), &SExpParseState::Empty, this_char) { + // fetch result of parsing as if we were in empty state + SExpParseResult::Emit(o, current_state) => resume(SExpParseState::ParsingList( + // we found an object, resume processing srcloc.ext(&loc), - Rc::new(current_state), // captured state from our pretend empty state + Rc::new(current_state), // captured state from our pretend empty state vec![o], )), - SExpParseResult::Resume(current_state) => resume(SExpParseState::ParsingList( // we're still reading the object, resume processing + SExpParseResult::Resume(current_state) => resume(SExpParseState::ParsingList( + // we're still reading the object, resume processing srcloc.ext(&loc), - Rc::new(current_state), // captured state from our pretend empty state + Rc::new(current_state), // captured state from our pretend empty state Vec::new(), )), SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), // propagate error }, }, // We are in the middle of a list currently - SExpParseState::ParsingList(srcloc, pp, list_content) => { // pp is the captured inside-list state we received from OpenList + SExpParseState::ParsingList(srcloc, pp, list_content) => { + // pp is the captured inside-list state we received from OpenList match (this_char as char, pp.borrow()) { - ('.', SExpParseState::Empty) => resume(SExpParseState::TermList( // dot notation showing cons cell + ('.', SExpParseState::Empty) => resume(SExpParseState::TermList( + // dot notation showing cons cell srcloc.ext(&loc), - TermListCommentState::Empty, // we are not inside a comment + TermListCommentState::Empty, // we are not inside a comment None, - Rc::new(SExpParseState::Empty), // nested state is empty + Rc::new(SExpParseState::Empty), // nested state is empty list_content.to_vec(), )), - (')', SExpParseState::Empty) => emit( // close list and emit it upwards as a complete entity + (')', SExpParseState::Empty) => emit( + // close list and emit it upwards as a complete entity Rc::new(enlist(srcloc.clone(), list_content)), SExpParseState::Empty, ), - (')', SExpParseState::Bareword(l, t)) => { // you've reached the end of the word AND the end of the list, close list and emit upwards + (')', SExpParseState::Bareword(l, t)) => { + // you've reached the end of the word AND the end of the list, close list and emit upwards let parsed_atom = make_atom(l.clone(), t.to_vec()); let mut updated_list = list_content.to_vec(); updated_list.push(Rc::new(parsed_atom)); @@ -640,25 +660,37 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - ) } // analyze this character using the mock "inner state" stored in pp - (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) { // - SExpParseResult::Emit(o, current_state) => { // add result of parse_sexp_step to our list + (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) { + // + SExpParseResult::Emit(o, current_state) => { + // add result of parse_sexp_step to our list let mut list_copy = list_content.clone(); list_copy.push(o); - let result = - SExpParseState::ParsingList(srcloc.ext(&loc), Rc::new(current_state), list_copy); + let result = SExpParseState::ParsingList( + srcloc.ext(&loc), + Rc::new(current_state), + list_copy, + ); resume(result) } - SExpParseResult::Resume(rp) => resume(SExpParseState::ParsingList( // we aren't finished reading in our nested state + SExpParseResult::Resume(rp) => resume(SExpParseState::ParsingList( + // we aren't finished reading in our nested state srcloc.ext(&loc), - Rc::new(rp), // store the returned state from parse_sexp_step in pp + Rc::new(rp), // store the returned state from parse_sexp_step in pp list_content.to_vec(), )), - SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), // propagate error upwards + SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), // propagate error upwards }, } } // if we're in a comment then just check for newline or carriage return otherwise stay in InComment state - SExpParseState::TermList(srcloc, TermListCommentState::InComment, parsed, pp, list_content) => { + SExpParseState::TermList( + srcloc, + TermListCommentState::InComment, + parsed, + pp, + list_content, + ) => { let end_comment = if this_char as char == '\n' || this_char as char == '\r' { TermListCommentState::Empty } else { @@ -666,7 +698,7 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - }; resume(SExpParseState::TermList( srcloc.clone(), - end_comment, // store the new commentstate + end_comment, // store the new commentstate parsed.clone(), pp.clone(), list_content.clone(), @@ -680,7 +712,8 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - pp, list_content, ) => { - if this_char.is_ascii_whitespace() { // ignore whitespace after second word + if this_char.is_ascii_whitespace() { + // ignore whitespace after second word resume(SExpParseState::TermList( srcloc.ext(&loc), TermListCommentState::Empty, @@ -688,7 +721,8 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - pp.clone(), list_content.to_vec(), )) - } else if this_char == b')' { // if we see a `)` then we're ready to close this list + } else if this_char == b')' { + // if we see a `)` then we're ready to close this list let mut list_copy = list_content.to_vec(); match list_copy.pop() { Some(v) => { @@ -700,12 +734,13 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - for item in list_copy.iter().rev() { result_list = make_cons(item.clone(), Rc::new(result_list)); } - emit(Rc::new(result_list), SExpParseState::Empty) // emit the resultant list + emit(Rc::new(result_list), SExpParseState::Empty) // emit the resultant list } } None => error(loc, "Dot as first element of list?"), } - } else if this_char == b';' { // entering a comment + } else if this_char == b';' { + // entering a comment resume(SExpParseState::TermList( srcloc.clone(), TermListCommentState::InComment, @@ -713,7 +748,8 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - pp.clone(), list_content.clone(), )) - } else { // we don't want to see any more characters after we've concluded a dot expression + } else { + // we don't want to see any more characters after we've concluded a dot expression error( srcloc.clone(), &format!("unexpected character {}", this_char as char), @@ -721,12 +757,16 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - } } // we are passing a dot-expression (x . y) and not in a comment and don't have an object already discovered - SExpParseState::TermList(srcloc, TermListCommentState::Empty, None, pp, list_content) => { // pp is the inner parsestate inside the dot-expressions - match (this_char as char, pp.borrow()) { //match based on current character and inner state - ('.', SExpParseState::Empty) => { // if we aren't in a word and we see another dot that's illegal + SExpParseState::TermList(srcloc, TermListCommentState::Empty, None, pp, list_content) => { + // pp is the inner parsestate inside the dot-expressions + match (this_char as char, pp.borrow()) { + //match based on current character and inner state + ('.', SExpParseState::Empty) => { + // if we aren't in a word and we see another dot that's illegal error(loc, "Multiple dots in list notation are illegal") } - (')', SExpParseState::Empty) => { // attempt to close the list + (')', SExpParseState::Empty) => { + // attempt to close the list if list_content.len() == 1 { emit(list_content[0].clone(), SExpParseState::Empty) } else { @@ -757,13 +797,15 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) - } // if we see anything other than ')' or '.' parse it as if we were in empty state (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) { - SExpParseResult::Emit(parsed_object, _current_state) => resume(SExpParseState::TermList( - loc, - TermListCommentState::Empty, - Some(parsed_object), // assert parsed_object is not None and then store it in parsed_list - pp.clone(), - list_content.clone(), - )), + SExpParseResult::Emit(parsed_object, _current_state) => { + resume(SExpParseState::TermList( + loc, + TermListCommentState::Empty, + Some(parsed_object), // assert parsed_object is not None and then store it in parsed_list + pp.clone(), + list_content.clone(), + )) + } // resume means it didn't finish parsing yet, so store inner state and keep going SExpParseResult::Resume(current_state) => resume(SExpParseState::TermList( srcloc.ext(&loc), @@ -793,7 +835,6 @@ where // Loop through all the characters for this_char in s { - let next_location = start.clone().advance(this_char); // call parse_sexp_step for current character