diff --git a/src/compiling/compiler.rs b/src/compiling/compiler.rs index 43c434e..f531156 100644 --- a/src/compiling/compiler.rs +++ b/src/compiling/compiler.rs @@ -2,11 +2,12 @@ use capitalize::Capitalize; use std::fs::File; use std::io::prelude::*; use crate::compiling_rules::Rules; -use crate::compiling::{Token, Lexer, LexerError, LexerErrorType, Metadata, SyntaxModule}; +use crate::compiling::{Token, LexerError, LexerErrorType, Metadata, SyntaxModule}; use crate::compiling::failing::message::Message; use crate::compiling::failing::failure::Failure; use crate::error_pos; +use super::lexer::Lexer; /// How do you want to separate expressions? /// @@ -64,18 +65,14 @@ pub enum ScopingMode { pub struct Compiler { /// Name of your language pub name: String, - /// Rules that describe your language - pub rules: Rules, /// Source code in a form of string pub code: Option, /// Path to the compiled file if exists pub path: Option, - /// Separator mode for this compiler - pub separator_mode: SeparatorMode, - /// Scoping mode for this compiler - pub scoping_mode: ScopingMode, // Check if user wants to debug parser - debug: bool + debug: bool, + /// Lexer to tokenize the code + lexer: Lexer } impl Compiler { @@ -83,18 +80,21 @@ impl Compiler { pub fn new>(name: T, rules: Rules) -> Self { Compiler { name: String::from(name.as_ref()), - rules, code: None, path: None, - separator_mode: SeparatorMode::Manual, - scoping_mode: ScopingMode::Block, - debug: false + debug: false, + lexer: Lexer::new(rules) } } /// Set the language to use indentations pub fn use_indents(&mut self) { - self.scoping_mode = ScopingMode::Indent + self.lexer.scoping_mode = ScopingMode::Indent + } + + /// Set the language separator mode + pub fn set_separator(&mut self, mode: SeparatorMode) { + self.lexer.separator_mode = mode } /// Load file from path @@ -119,9 +119,7 @@ impl Compiler { /// Run just lexer pub fn tokenize(&self) -> Result, LexerError> { - let mut lexer = Lexer::new(self); - lexer.run()?; - Ok(lexer.lexem) + self.lexer.tokenize(&self.code.clone().unwrap()) } /// Parser will display information about the call stack diff --git a/src/compiling/lexing/lexer.rs b/src/compiling/lexing/lexer.rs index e22da83..14ffea0 100644 --- a/src/compiling/lexing/lexer.rs +++ b/src/compiling/lexing/lexer.rs @@ -1,193 +1,199 @@ -use crate::compiling::{ Compiler, Token, SeparatorMode, ScopingMode }; -use super::compound_handler::{CompoundHandler, CompoundReaction}; -use super::region_handler::{ RegionHandler, RegionReaction }; -use super::reader::Reader; -use crate::compiling::failing::position_info::PositionInfo; - -// This is just an estimation of token amount -// inside of a typical 200-lined file. -const AVG_TOKEN_AMOUNT: usize = 1024; - -/// Lexer's error type -#[derive(Debug)] -pub enum LexerErrorType { - /// Unspillable region has been spilled - Singleline, - /// Given region left unclosed - Unclosed +//! Lexer +//! +//! This module contains the lexer that is used to tokenize the source code + +use crate::{ + compiling_rules::Rules, + prelude::{PositionInfo, ScopingMode, SeparatorMode, Token}, +}; + +use super::{ + compound_handler::{CompoundHandler, CompoundReaction}, + reader::Reader, + region_handler::{RegionHandler, RegionReaction}, + LexerError, LexerErrorType, +}; + +/// Lexer +#[derive(Debug, Clone, PartialEq)] +pub struct Lexer { + rules: Rules, + /// Path to the lexed file + pub path: Option, + /// Separator mode for this lexer + pub separator_mode: SeparatorMode, + /// Escape symbol for this lexer. Default is '\\' + pub escape_symbol: char, + /// Scoping mode for this lexer + pub scoping_mode: ScopingMode, } -/// Type containing full error of lexer -pub type LexerError = (LexerErrorType, PositionInfo); - -/// The Lexer -/// -/// Lexer takes source code in a form of a string and translates it to a list of tokens. -/// This particular implementation requires additional metadata such as like regions or symbols. -/// These can be supplied by the `Compiler` in a one cohesive package. Hence the API requires to -/// pass a reference to the `Compiler`. -pub struct Lexer<'a> { - symbols: Vec, - escape_symbol: char, - compound: CompoundHandler, - region: RegionHandler, - reader: Reader<'a>, - path: Option, - /// This attribute stores parsed tokens by the lexer - pub lexem: Vec, - separator_mode: SeparatorMode, - scoping_mode: ScopingMode, +struct LexState<'a> { + word: String, + is_indenting: bool, is_escaped: bool, + token_start_index: usize, position: (usize, usize), - index: usize, - token_start_index: usize + reader: Reader<'a>, + lexem: Vec, + region_handler: RegionHandler, + compound_handler: CompoundHandler, } -impl<'a> Lexer<'a> { +impl Lexer { /// Create a new Lexer based on the compiler metadata - pub fn new(cc: &'a Compiler) -> Self { - let code: &'a String = cc.code.as_ref().unwrap(); + pub fn new(rules: Rules) -> Self { Lexer { - symbols: cc.rules.symbols.clone(), - escape_symbol: cc.rules.escape_symbol, - compound: CompoundHandler::new(&cc.rules), - region: RegionHandler::new(&cc.rules), - reader: Reader::new(code), - path: cc.path.clone(), - lexem: Vec::with_capacity(AVG_TOKEN_AMOUNT), - separator_mode: cc.separator_mode.clone(), - scoping_mode: cc.scoping_mode.clone(), - is_escaped: false, - position: (0, 0), - index: 0, - token_start_index: 0 + rules, + path: None, + separator_mode: SeparatorMode::Manual, + escape_symbol: '\\', + scoping_mode: ScopingMode::Block, } } /// Add indentation to the lexem #[inline] - fn add_indent(&mut self, word: String) -> String { - if !word.is_empty() { - // Getting position by word here would attempt to - // substract with overflow since the new line character - // technically belongs to the previous line - let (row, _col) = self.reader.get_position(); - self.lexem.push(Token { - word, - pos: (row, 1), - start: self.token_start_index, - }); - self.position = (0, 0); - String::new() - } else { word } + fn add_indent(&self, lex_state: &mut LexState) { + if lex_state.word.is_empty() { + return; + } + + // Getting position by word here would attempt to + // substract with overflow since the new line character + // technically belongs to the previous line + let (row, _col) = lex_state.reader.get_position(); + lex_state.lexem.push(Token { + word: lex_state.word.clone(), + pos: (row, 1), + start: lex_state.token_start_index, + }); + lex_state.position = (0, 0); + lex_state.word = String::new(); } /// Add word that has been completed in previous iteration to the lexem #[inline] - fn add_word(&mut self, word: String) -> String { - if !word.is_empty() { - self.lexem.push(Token { - word, - pos: self.position, - start: self.token_start_index - }); - self.position = (0, 0); - String::new() + fn add_word(&self, lex_state: &mut LexState) { + if lex_state.word.is_empty() { + return; } - else { word } + + lex_state.lexem.push(Token { + word: lex_state.word.clone(), + pos: lex_state.position, + start: lex_state.token_start_index, + }); + lex_state.position = (0, 0); + lex_state.word = String::new(); } /// Add word that has been completed in current iteration to the lexem #[inline] - fn add_word_inclusively(&mut self, word: String) -> String { - if !word.is_empty() { - self.lexem.push(Token { - word, - pos: self.position, - start: self.token_start_index - }); - self.position = (0, 0); - String::new() + fn add_word_inclusively(&self, lex_state: &mut LexState) { + if lex_state.word.is_empty() { + return; } - else { word } + + lex_state.lexem.push(Token { + word: lex_state.word.clone(), + pos: lex_state.position, + start: lex_state.token_start_index, + }); + lex_state.position = (0, 0); + lex_state.word = String::new() } /// Checks whether this is a nontokenizable region #[inline] - pub fn is_tokenized_region(&self, reaction: &RegionReaction) -> bool { - if let Some(region) = self.region.get_region().as_ref() { + fn is_tokenized_region(&self, reaction: &RegionReaction, lex_state: &mut LexState) -> bool { + if let Some(region) = lex_state.region_handler.get_region() { region.tokenize && *reaction == RegionReaction::Pass + } else { + false } - else { false } } /// Pattern code for adding a symbol /// **[*]** #[inline] - fn pattern_add_symbol(&mut self, mut word: String, letter: char) -> String { - word = self.add_word(word); - if word.is_empty() { - self.token_start_index = self.index; + fn pattern_add_symbol(&self, lex_state: &mut LexState, letter: char) { + self.add_word(lex_state); + + if lex_state.word.is_empty() { + lex_state.token_start_index = lex_state.reader.get_index(); } - self.word_push(&mut word, letter); - self.position = self.reader.get_position(); - self.add_word_inclusively(word) + self.word_push(lex_state, letter); + lex_state.position = lex_state.reader.get_position(); + + self.add_word_inclusively(lex_state); } /// Pattern code for beginning a new region /// **[** #[inline] - fn pattern_begin(&mut self, mut word: String, letter: char) -> String { - word = self.add_word(word); - self.word_push(&mut word, letter); - word + fn pattern_begin(&self, lex_state: &mut LexState, letter: char) { + self.add_word(lex_state); + self.word_push(lex_state, letter); } /// Pattern code for ending current region /// **]** #[inline] - fn pattern_end(&mut self, mut word: String, letter: char) -> String { - self.word_push(&mut word, letter); - self.add_word_inclusively(word) + fn pattern_end(&self, lex_state: &mut LexState, letter: char) { + self.word_push(lex_state, letter); + self.add_word_inclusively(lex_state); } /// Push letter to the word and set token start index - fn word_push(&mut self, word: &mut String, letter: char) { - if word.is_empty() { - self.token_start_index = self.index; + fn word_push(&self, lex_state: &mut LexState, letter: char) { + if lex_state.word.is_empty() { + lex_state.token_start_index = lex_state.reader.get_index(); } - word.push(letter); + lex_state.word.push(letter); } /// Tokenize source code /// /// Run lexer and tokenize code. The result is stored in the lexem attribute - pub fn run(&mut self) -> Result<(), LexerError> { - let mut word = String::new(); - let mut is_indenting = false; - while let Some(letter) = self.reader.next() { - self.index = self.reader.get_index(); + pub fn tokenize(&self, input: &str) -> Result, LexerError> { + let code = input.to_string(); + let mut lex_state = LexState { + word: String::new(), + is_indenting: false, + is_escaped: false, + token_start_index: 0, + position: (0, 0), + lexem: Vec::new(), + reader: Reader::new(&code), + region_handler: RegionHandler::new(&self.rules), + compound_handler: CompoundHandler::new(&self.rules), + }; + + while let Some(letter) = lex_state.reader.next() { /****************/ /* Set Position */ /****************/ // If the new position hasn't been set yet, set it - if self.position == (0, 0) { + if lex_state.position == (0, 0) { // If separator mode is set to Manual and the letter is a separator, // then skip finding a new position if SeparatorMode::Manual != self.separator_mode || letter != '\n' { - let region = self.region.get_region().unwrap(); + let region = lex_state.region_handler.get_region().unwrap(); // If the region is tokenized, then check if the letter is a separator if !region.tokenize || !vec![' ', '\t'].contains(&letter) { - self.position = self.reader.get_position(); + lex_state.position = lex_state.reader.get_position(); } } } // Reaction stores the reaction of the region handler // Have we just opened or closed some region? - let reaction = self.region.handle_region(&self.reader, self.is_escaped); + let reaction = lex_state + .region_handler + .handle_region(&lex_state.reader, lex_state.is_escaped); match reaction { // If the region has been opened // Finish the part that we have been parsing @@ -195,7 +201,7 @@ impl<'a> Lexer<'a> { // Also if the new region is an interpolation that tokenizes // the inner content - separate the region from the content if tokenize { - word = self.pattern_add_symbol(word, letter); + self.pattern_add_symbol(&mut lex_state, letter); } // Regular region case else { @@ -203,57 +209,62 @@ impl<'a> Lexer<'a> { // character if region rule opens with newline if letter == '\n' { // This additionally creates a new token - word = self.pattern_add_symbol(word, letter); + self.pattern_add_symbol(&mut lex_state, letter); } // Normally start a new region - word = self.pattern_begin(word, letter); + self.pattern_begin(&mut lex_state, letter); } - }, + } // If the region has been closed // Add the closing region and finish the word RegionReaction::End(tokenize) => { // Also if the new region is an interpolation that tokenizes // the inner content - separate the region from the content if tokenize { - word = self.pattern_add_symbol(word, letter); + self.pattern_add_symbol(&mut lex_state, letter); } // Regular region case else { // Normally close the region - word = self.pattern_end(word, letter); + self.pattern_end(&mut lex_state, letter); // This is supposed to prevent overshadowing new line // character if region rule closes with newline if letter == '\n' { // This additionally creates a new token - word = self.pattern_add_symbol(word, letter); + self.pattern_add_symbol(&mut lex_state, letter); } } } RegionReaction::Pass => { - match self.compound.handle_compound(letter, &self.reader, self.is_tokenized_region(&reaction)) { - CompoundReaction::Begin => word = self.pattern_begin(word, letter), - CompoundReaction::Keep => self.word_push(&mut word, letter), - CompoundReaction::End => word = self.pattern_end(word, letter), + let is_tokenized_region = self.is_tokenized_region(&reaction, &mut lex_state); + match lex_state.compound_handler.handle_compound( + letter, + &lex_state.reader, + is_tokenized_region, + ) { + CompoundReaction::Begin => self.pattern_begin(&mut lex_state, letter), + CompoundReaction::Keep => self.word_push(&mut lex_state, letter), + CompoundReaction::End => self.pattern_end(&mut lex_state, letter), CompoundReaction::Pass => { // Handle region scope - if !self.is_tokenized_region(&reaction) { - let region = self.region.get_region().unwrap(); + if !self.is_tokenized_region(&reaction, &mut lex_state) { + let region = lex_state.region_handler.get_region().unwrap(); // Flip escaped key - self.is_escaped = (!self.is_escaped && letter == self.escape_symbol) - .then(|| !self.is_escaped) + lex_state.is_escaped = (!lex_state.is_escaped + && letter == self.escape_symbol) + .then(|| !lex_state.is_escaped) .unwrap_or(false); // Handle singleline attribute if letter == '\n' && region.singleline { - let pos = self.reader.get_position(); + let pos = lex_state.reader.get_position(); return Err(( LexerErrorType::Singleline, - PositionInfo::at_pos(self.path.clone(), pos, 0).data(region.name.clone()) - )) + PositionInfo::at_pos(self.path.clone(), pos, 0) + .data(region.name.clone()), + )); } - self.word_push(&mut word, letter); - } - else { - + self.word_push(&mut lex_state, letter); + } else { /******************/ /* Mode modifiers */ /******************/ @@ -261,31 +272,31 @@ impl<'a> Lexer<'a> { // Create indent regions: '\n ' if let ScopingMode::Indent = self.scoping_mode { // If we are still in the indent region - proceed - if is_indenting && vec![' ', '\t'].contains(&letter) { - self.word_push(&mut word, letter); + if lex_state.is_indenting && vec![' ', '\t'].contains(&letter) { + self.word_push(&mut lex_state, letter); } // If it's the new line - start indent region if letter == '\n' { - is_indenting = true; - word = self.pattern_begin(word, letter); + lex_state.is_indenting = true; + self.pattern_begin(&mut lex_state, letter); } // Check if the current letter // concludes current indent region - if is_indenting { - if let Some(next_char) = self.reader.peek() { + if lex_state.is_indenting { + if let Some(next_char) = lex_state.reader.peek() { if !vec![' ', '\t'].contains(&next_char) { - word = self.add_indent(word); - is_indenting = false; + self.add_indent(&mut lex_state); + lex_state.is_indenting = false; } } - continue + continue; } } // Skip newline character if we want to manually insert semicolons if let SeparatorMode::Manual = self.separator_mode { if letter == '\n' { - word = self.add_word(word); - continue + self.add_word(&mut lex_state); + continue; } } @@ -295,15 +306,15 @@ impl<'a> Lexer<'a> { // Skip whitespace if vec![' ', '\t'].contains(&letter) { - word = self.add_word(word); + self.add_word(&mut lex_state); } // Handle special symbols - else if self.symbols.contains(&letter) || letter == '\n' { - word = self.pattern_add_symbol(word, letter); + else if self.rules.symbols.contains(&letter) || letter == '\n' { + self.pattern_add_symbol(&mut lex_state, letter); } // Handle word else { - self.word_push(&mut word, letter); + self.word_push(&mut lex_state, letter); } } } @@ -311,38 +322,37 @@ impl<'a> Lexer<'a> { } } } - self.add_word(word); + self.add_word(&mut lex_state); // If some region exists that was not closed - if let Err((pos, region)) = self.region.is_region_closed(&self.reader) { + if let Err((pos, region)) = lex_state.region_handler.is_region_closed(&lex_state.reader) { return Err(( LexerErrorType::Unclosed, - PositionInfo::at_pos(self.path.clone(), pos, 0).data(region.name) + PositionInfo::at_pos(self.path.clone(), pos, 0).data(region.name), )); } - Ok(()) + + Ok(lex_state.lexem) } } #[cfg(test)] mod test { - use crate::compiling_rules::{ Region, Rules }; + use crate::compiling::ScopingMode; + use crate::compiling_rules::{Region, Rules}; use crate::reg; - use crate::compiling::{ Compiler, ScopingMode }; #[test] fn test_lexer_base() { let symbols = vec!['(', ')']; - let regions = reg![ - reg!(string as "String literal" => { - begin: "'", - end: "'" - } => [ - reg!(array as "Array Literal" => { - begin: "[", - end: "]" - }) - ]) - ]; + let regions = reg![reg!(string as "String literal" => { + begin: "'", + end: "'" + } => [ + reg!(array as "Array Literal" => { + begin: "[", + end: "]" + }) + ])]; let expected = vec![ ("let".to_string(), 1, 1), ("a".to_string(), 1, 5), @@ -351,17 +361,15 @@ mod test { ("12".to_string(), 1, 10), ("+".to_string(), 1, 13), ("32".to_string(), 1, 15), - (")".to_string(), 1, 17) + (")".to_string(), 1, 17), ]; let rules = Rules::new(symbols, vec![], regions); - let mut cc: Compiler = Compiler::new("TestScript", rules); - cc.load("let a = (12 + 32)"); - let mut lexer = super::Lexer::new(&cc); + let lexer = super::Lexer::new(rules); let mut result = vec![]; // Simulate lexing - let res = lexer.run(); + let res = lexer.tokenize("let a = (12 + 32)"); assert!(res.is_ok()); - for lex in lexer.lexem { + for lex in res.unwrap() { result.push((lex.word, lex.pos.0, lex.pos.1)); } assert_eq!(expected, result); @@ -370,18 +378,16 @@ mod test { #[test] fn test_lexer_string_interp() { let symbols = vec!['(', ')']; - let regions = reg![ - reg!(string_literal as "String literal" => { - begin: "'", - end: "'" - } => [ - reg!(string_interp as "String interpolation" => { - begin: "{", - end: "}", - tokenize: true - } ref global) - ]) - ]; + let regions = reg![reg!(string_literal as "String literal" => { + begin: "'", + end: "'" + } => [ + reg!(string_interp as "String interpolation" => { + begin: "{", + end: "}", + tokenize: true + } ref global) + ])]; let expected = vec![ ("let".to_string(), 1, 1), ("a".to_string(), 1, 5), @@ -394,17 +400,16 @@ mod test { ("}".to_string(), 1, 30), (" long'".to_string(), 1, 31), ("}".to_string(), 1, 37), - (" 🎉 text'".to_string(), 1, 38) + (" 🎉 text'".to_string(), 1, 38), ]; let rules = Rules::new(symbols, vec![], regions); - let mut cc: Compiler = Compiler::new("TestScript", rules); - cc.load("let a = 'this {'is {adjective} long'} 🎉 text'"); - let mut lexer = super::Lexer::new(&cc); + + let lexer = super::Lexer::new(rules); let mut result = vec![]; // Simulate lexing - let res = lexer.run(); + let res = lexer.tokenize("let a = 'this {'is {adjective} long'} 🎉 text'"); assert!(res.is_ok()); - for lex in lexer.lexem { + for lex in res.unwrap() { result.push((lex.word, lex.pos.0, lex.pos.1)); } assert_eq!(expected, result); @@ -423,22 +428,18 @@ mod test { ("subcondition".to_string(), (2, 8), 21), (":".to_string(), (2, 20), 33), ("\n ".to_string(), (3, 1), 34), - ("pass".to_string(), (3, 9), 43) + ("pass".to_string(), (3, 9), 43), ]; let rules = Rules::new(symbols, vec![], regions); - let mut cc: Compiler = Compiler::new("Testhon", rules); - cc.scoping_mode = ScopingMode::Indent; - cc.load(vec![ - "if condition:", - " if subcondition:", - " pass" - ].join("\n")); - let mut lexer = super::Lexer::new(&cc); + + let mut lexer = super::Lexer::new(rules); + lexer.scoping_mode = ScopingMode::Indent; let mut result = vec![]; // Simulate lexing - let res = lexer.run(); + let res = lexer + .tokenize(&vec!["if condition:", " if subcondition:", " pass"].join("\n")); assert!(res.is_ok()); - for lex in lexer.lexem { + for lex in res.unwrap() { result.push((lex.word, (lex.pos.0, lex.pos.1), lex.start)); } assert_eq!(expected, result); @@ -455,21 +456,15 @@ mod test { ("12".to_string(), 1, 11), ("+".to_string(), 2, 1), ("12".to_string(), 3, 1), - (";".to_string(), 3, 3) + (";".to_string(), 3, 3), ]; let rules = Rules::new(symbols, vec![], regions); - let mut cc: Compiler = Compiler::new("Testhon", rules); - cc.load(vec![ - "let age = 12", - "+", - "12;" - ].join("\n")); - let mut lexer = super::Lexer::new(&cc); + let lexer = super::Lexer::new(rules); let mut result = vec![]; // Simulate lexing - let res = lexer.run(); + let res = lexer.tokenize(&vec!["let age = 12", "+", "12;"].join("\n")); assert!(res.is_ok()); - for lex in lexer.lexem { + for lex in res.unwrap() { result.push((lex.word, lex.pos.0, lex.pos.1)); } assert_eq!(expected, result); @@ -478,30 +473,18 @@ mod test { #[test] fn test_lexer_multiline_regions() { let symbols = vec![';', '+', '=']; - let regions = reg![ - reg!(string as "String" => { - begin: "'", - end: "'" - }) - ]; - let expected = vec![ - ("'this\nis\na\nmultiline\nstring'".to_string(), 1, 1) - ]; + let regions = reg![reg!(string as "String" => { + begin: "'", + end: "'" + })]; + let expected = vec![("'this\nis\na\nmultiline\nstring'".to_string(), 1, 1)]; let rules = Rules::new(symbols, vec![], regions); - let mut cc: Compiler = Compiler::new("Test", rules); - cc.load(vec![ - "'this", - "is", - "a", - "multiline", - "string'", - ].join("\n")); - let mut lexer = super::Lexer::new(&cc); + let lexer = super::Lexer::new(rules); let mut result = vec![]; // Simulate lexing - let res = lexer.run(); + let res = lexer.tokenize(&vec!["'this", "is", "a", "multiline", "string'"].join("\n")); assert!(res.is_ok()); - for lex in lexer.lexem { + for lex in res.unwrap() { result.push((lex.word, lex.pos.0, lex.pos.1)); } assert_eq!(expected, result); @@ -510,26 +493,18 @@ mod test { #[test] fn test_lexer_escaped_regions() { let symbols = vec![';', '+', '=']; - let regions = reg![ - reg!(string as "String" => { - begin: "\"", - end: "\"" - }) - ]; - let expected = vec![ - ("\"this is \\\"escaped\\\" string\"".to_string(), 1, 1) - ]; + let regions = reg![reg!(string as "String" => { + begin: "\"", + end: "\"" + })]; + let expected = vec![("\"this is \\\"escaped\\\" string\"".to_string(), 1, 1)]; let rules = Rules::new(symbols, vec![], regions); - let mut cc: Compiler = Compiler::new("Test", rules); - cc.load(vec![ - "\"this is \\\"escaped\\\" string\"" - ].join("\n")); - let mut lexer = super::Lexer::new(&cc); + let lexer = super::Lexer::new(rules); let mut result = vec![]; // Simulate lexing - let res = lexer.run(); + let res = lexer.tokenize(&vec!["\"this is \\\"escaped\\\" string\""].join("\n")); assert!(res.is_ok()); - for lex in lexer.lexem { + for lex in res.unwrap() { result.push((lex.word, lex.pos.0, lex.pos.1)); } assert_eq!(expected, result); diff --git a/src/compiling/lexing/mod.rs b/src/compiling/lexing/mod.rs index 7257ebc..704573b 100644 --- a/src/compiling/lexing/mod.rs +++ b/src/compiling/lexing/mod.rs @@ -1,9 +1,22 @@ //! Lexer module -//! +//! //! This module holds all the lexer related modules +use crate::prelude::PositionInfo; + mod compound_handler; -mod region_handler; +pub mod lexer; mod reader; -mod lexer; -pub use lexer::*; \ No newline at end of file +mod region_handler; + +/// Lexer's error type +#[derive(Debug)] +pub enum LexerErrorType { + /// Unspillable region has been spilled + Singleline, + /// Given region left unclosed + Unclosed, +} + +/// Type containing full error of lexer +pub type LexerError = (LexerErrorType, PositionInfo); diff --git a/src/compiling/mod.rs b/src/compiling/mod.rs index a305a2f..52624ab 100644 --- a/src/compiling/mod.rs +++ b/src/compiling/mod.rs @@ -4,6 +4,7 @@ //! that helps you tokenize your code or even parse it entirely. mod lexing; + mod compiler; mod token; mod parser;