From bac96523970f7fe6bfd151099ebf24c89f7653bd Mon Sep 17 00:00:00 2001 From: yui-knk Date: Sat, 23 Dec 2023 18:14:30 +0900 Subject: [PATCH] Take care of multi line user codes --- lib/lrama/grammar/rule_builder.rb | 11 +++++-- lib/lrama/lexer/location.rb | 42 +++++++++++++++++++++++-- sig/lrama/lexer/location.rbs | 2 ++ spec/fixtures/lexer/location.y | 7 ++++- spec/lrama/grammar/rule_builder_spec.rb | 24 ++++++-------- spec/lrama/lexer/location_spec.rb | 9 ++++++ spec/lrama/parser_spec.rb | 12 ++++--- 7 files changed, 83 insertions(+), 24 deletions(-) diff --git a/lib/lrama/grammar/rule_builder.rb b/lib/lrama/grammar/rule_builder.rb index 31e7db45..80da6368 100644 --- a/lib/lrama/grammar/rule_builder.rb +++ b/lib/lrama/grammar/rule_builder.rb @@ -151,8 +151,15 @@ def numberize_references else candidates = rhs.each_with_index.select {|token, i| token.referred_by?(ref_name) } - raise token.location.generate_error_message("Referring symbol `#{ref_name}` is duplicated.") if candidates.size >= 2 - raise token.location.generate_error_message("Referring symbol `#{ref_name}` is not found.") unless referring_symbol = candidates.first + if candidates.size >= 2 + location = token.location.partial_location(ref.first_column, ref.last_column) + raise location.generate_error_message("Referring symbol `#{ref_name}` is duplicated.") + end + + unless referring_symbol = candidates.first + location = token.location.partial_location(ref.first_column, ref.last_column) + raise location.generate_error_message("Referring symbol `#{ref_name}` is not found.") + end ref.index = referring_symbol[1] + 1 end diff --git a/lib/lrama/lexer/location.rb b/lib/lrama/lexer/location.rb index 6bdce53e..b69dbe9a 100644 --- a/lib/lrama/lexer/location.rb +++ b/lib/lrama/lexer/location.rb @@ -20,6 +20,36 @@ def ==(other) self.last_column == other.last_column end + def partial_location(left, right) + offset = -first_column + new_first_line = -1 + new_first_column = -1 + new_last_line = -1 + new_last_column = -1 + + _text.each.with_index do |line, index| + new_offset = offset + line.length + 1 + + if offset <= left && left <= new_offset + new_first_line = first_line + index + new_first_column = left - offset + end + + if offset <= right && right <= new_offset + new_last_line = first_line + index + new_last_column = right - offset + end + + offset = new_offset + end + + Location.new( + grammar_file_path: grammar_file_path, + first_line: new_first_line, first_column: new_first_column, + last_line: new_last_line, last_column: new_last_column + ) + end + def to_s "#{grammar_file_path} (#{first_line},#{first_column})-(#{last_line},#{last_column})" end @@ -49,10 +79,16 @@ def carrets end def text - return @text if @text + _text.join("\n") + end + + def _text + return @_text if @_text + + offset = 0 - @text = File.read(grammar_file_path).split("\n")[first_line - 1] - @text + @_text = File.read(grammar_file_path).split("\n")[(first_line - 1)...last_line] + @_text end end end diff --git a/sig/lrama/lexer/location.rbs b/sig/lrama/lexer/location.rbs index 903051f6..1a6b5445 100644 --- a/sig/lrama/lexer/location.rbs +++ b/sig/lrama/lexer/location.rbs @@ -10,6 +10,7 @@ module Lrama def initialize: (grammar_file_path: String, first_line: Integer, first_column: Integer, last_line: Integer, last_column: Integer) -> void def ==: (Location other) -> bool + def partial_location: (Integer, Integer) -> Location def generate_error_message: (String) -> String def line_with_carrets: () -> String @@ -18,6 +19,7 @@ module Lrama def blanks: () -> String def carrets: () -> String def text: () -> String + def _text: () -> Array[String] end end end diff --git a/spec/fixtures/lexer/location.y b/spec/fixtures/lexer/location.y index 2c9c45ae..f5541aa0 100644 --- a/spec/fixtures/lexer/location.y +++ b/spec/fixtures/lexer/location.y @@ -34,7 +34,12 @@ expr : NUM | expr '-' expr { $$ = $1 - $3; } | expr '*' expr { $$ = $1 * $3; } | expr '/' expr { $$ = $1 / $3; } - | '(' expr ')' { $$ = $2; } + | '(' expr ')' + { + printf("debug %d\n", $1); + $$ = $2; + printf("debug %d\n", $3); + } ; %% diff --git a/spec/lrama/grammar/rule_builder_spec.rb b/spec/lrama/grammar/rule_builder_spec.rb index fc6e4589..1d452b6a 100644 --- a/spec/lrama/grammar/rule_builder_spec.rb +++ b/spec/lrama/grammar/rule_builder_spec.rb @@ -263,18 +263,16 @@ let(:location_2) { Lrama::Lexer::Location.new(grammar_file_path: grammar_file_path, first_line: 1, first_column: 8, last_line: 1, last_column: 21) } let(:location_3) { Lrama::Lexer::Location.new(grammar_file_path: grammar_file_path, first_line: 1, first_column: 22, last_line: 1, last_column: 29) } let(:location_4) { Lrama::Lexer::Location.new(grammar_file_path: grammar_file_path, first_line: 1, first_column: 30, last_line: 1, last_column: 41) } - let(:location_5) { Lrama::Lexer::Location.new(grammar_file_path: grammar_file_path, first_line: 1, first_column: 42, last_line: 1, last_column: 60) } + let(:location_5) { Lrama::Lexer::Location.new(grammar_file_path: grammar_file_path, first_line: 1, first_column: 43, last_line: 1, last_column: 59) } let(:token_1) { Lrama::Lexer::Token::Ident.new(s_value: "class", location: location_1) } let(:token_2) { Lrama::Lexer::Token::Ident.new(s_value: "keyword_class", location: location_2) } let(:token_3) { Lrama::Lexer::Token::Ident.new(s_value: "tSTRING", location: location_3) } let(:token_4) { Lrama::Lexer::Token::Ident.new(s_value: "keyword_end", location: location_4) } - let(:token_5) { Lrama::Lexer::Token::UserCode.new(s_value: "$classes = $1;", location: location_5) } + let(:token_5) { Lrama::Lexer::Token::UserCode.new(s_value: " $classes = $1; ", location: location_5) } it "raises error" do # class : keyword_class tSTRING keyword_end { $classes = $1; } - [location_1, location_2, location_3, location_4, location_5].each do |location| - allow(location).to receive(:text).and_return(text) - end + allow_any_instance_of(Lrama::Lexer::Location).to receive(:_text).and_return([text]) rule_builder.lhs = token_1 rule_builder.add_rhs(token_2) @@ -284,9 +282,9 @@ rule_builder.complete_input expected = <<-TEXT -parse.y:1:42: Referring symbol `classes` is not found. +parse.y:1:44: Referring symbol `classes` is not found. class : keyword_class tSTRING keyword_end { $classes = $1; } - ^^^^^^^^^^^^^^^^^^ + ^^^^^^^^ TEXT expect { rule_builder.send(:preprocess_references) }.to raise_error(expected) @@ -300,19 +298,17 @@ class : keyword_class tSTRING keyword_end { $classes = $1; } let(:location_3) { Lrama::Lexer::Location.new(grammar_file_path: grammar_file_path, first_line: 1, first_column: 22, last_line: 1, last_column: 29) } let(:location_4) { Lrama::Lexer::Location.new(grammar_file_path: grammar_file_path, first_line: 1, first_column: 30, last_line: 1, last_column: 37) } let(:location_5) { Lrama::Lexer::Location.new(grammar_file_path: grammar_file_path, first_line: 1, first_column: 38, last_line: 1, last_column: 49) } - let(:location_6) { Lrama::Lexer::Location.new(grammar_file_path: grammar_file_path, first_line: 1, first_column: 50, last_line: 1, last_column: 72) } + let(:location_6) { Lrama::Lexer::Location.new(grammar_file_path: grammar_file_path, first_line: 1, first_column: 51, last_line: 1, last_column: 71) } let(:token_1) { Lrama::Lexer::Token::Ident.new(s_value: "class", location: location_1) } let(:token_2) { Lrama::Lexer::Token::Ident.new(s_value: "keyword_class", location: location_2) } let(:token_3) { Lrama::Lexer::Token::Ident.new(s_value: "tSTRING", location: location_3) } let(:token_4) { Lrama::Lexer::Token::Ident.new(s_value: "tSTRING", location: location_4) } let(:token_5) { Lrama::Lexer::Token::Ident.new(s_value: "keyword_end", location: location_5) } - let(:token_6) { Lrama::Lexer::Token::UserCode.new(s_value: "$class = $tSTRING;", location: location_6) } + let(:token_6) { Lrama::Lexer::Token::UserCode.new(s_value: " $class = $tSTRING; ", location: location_6) } it "raises error" do # class : keyword_class tSTRING tSTRING keyword_end { $class = $tSTRING; } - [location_1, location_2, location_3, location_4, location_5, location_6].each do |location| - allow(location).to receive(:text).and_return(text) - end + allow_any_instance_of(Lrama::Lexer::Location).to receive(:_text).and_return([text]) rule_builder.lhs = token_1 rule_builder.add_rhs(token_2) @@ -323,9 +319,9 @@ class : keyword_class tSTRING keyword_end { $classes = $1; } rule_builder.complete_input expected = <<-TEXT -parse.y:1:50: Referring symbol `tSTRING` is duplicated. +parse.y:1:61: Referring symbol `tSTRING` is duplicated. class : keyword_class tSTRING tSTRING keyword_end { $class = $tSTRING; } - ^^^^^^^^^^^^^^^^^^^^^^ + ^^^^^^^^ TEXT expect { rule_builder.send(:preprocess_references) }.to raise_error(expected) diff --git a/spec/lrama/lexer/location_spec.rb b/spec/lrama/lexer/location_spec.rb index 2a943471..bdc4c045 100644 --- a/spec/lrama/lexer/location_spec.rb +++ b/spec/lrama/lexer/location_spec.rb @@ -6,6 +6,15 @@ end end + describe "#partial_location" do + it "creates new partial location" do + path = fixture_path("lexer/location.y") + location = Lrama::Lexer::Location.new(grammar_file_path: path, first_line: 38, first_column: 10, last_line: 42, last_column: 9) + + expect(location.partial_location(49, 57)).to eq Lrama::Lexer::Location.new(grammar_file_path: path, first_line: 40, first_column: 11, last_line: 40, last_column: 19) + end + end + describe "#generate_error_message" do it "returns decorated error message" do path = fixture_path("lexer/location.y") diff --git a/spec/lrama/parser_spec.rb b/spec/lrama/parser_spec.rb index ad94cb17..8f5d0cc5 100644 --- a/spec/lrama/parser_spec.rb +++ b/spec/lrama/parser_spec.rb @@ -2383,7 +2383,11 @@ class : keyword_class tSTRING keyword_end { code 1 } expr[result]: NUM | expr[left] expr[right] '+' - { $results = $left + $right; } + { + // comment + $results = $left + $right; + // comment + } | expr expr '-' { $$ = $1 - $2; } ; @@ -2391,9 +2395,9 @@ class : keyword_class tSTRING keyword_end { code 1 } create_grammar_file("parse.y", y) do |file, content| expected = <<-ERROR -#{file.path}:25:17: Referring symbol `results` is not found. - { $results = $left + $right; } - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#{file.path}:27:18: Referring symbol `results` is not found. + $results = $left + $right; + ^^^^^^^^ ERROR expect { Lrama::Parser.new(content, file.path).parse }.to raise_error(expected)