From 94d7f48ff7213396709e53e7326a323fbc7b93f5 Mon Sep 17 00:00:00 2001 From: yui-knk Date: Thu, 14 Dec 2023 17:24:05 +0800 Subject: [PATCH] wip --- lib/lrama/grammar/reference.rb | 7 ++-- lib/lrama/grammar/rule_builder.rb | 49 ++++++++++++++++++++++-- lib/lrama/lexer/token/user_code.rb | 4 +- sig/lrama/grammar/reference.rbs | 3 +- sig/lrama/grammar/rule_builder.rbs | 3 ++ spec/fixtures/integration/parser_state.y | 8 ++-- spec/lrama/grammar/rule_builder_spec.rb | 2 +- 7 files changed, 62 insertions(+), 14 deletions(-) diff --git a/lib/lrama/grammar/reference.rb b/lib/lrama/grammar/reference.rb index 24c98129..d60edacb 100644 --- a/lib/lrama/grammar/reference.rb +++ b/lib/lrama/grammar/reference.rb @@ -2,11 +2,12 @@ module Lrama class Grammar # type: :dollar or :at # name: String (e.g. $$, $foo, $expr.right) - # index: Integer (e.g. $1) + # number: Integer (e.g. $1) + # index: # ex_tag: "$1" (Optional) - class Reference < Struct.new(:type, :name, :index, :ex_tag, :first_column, :last_column, keyword_init: true) + class Reference < Struct.new(:type, :name, :number, :index, :ex_tag, :first_column, :last_column, keyword_init: true) def value - name || index + name || number end end end diff --git a/lib/lrama/grammar/rule_builder.rb b/lib/lrama/grammar/rule_builder.rb index 42a1e7ba..e255ddfa 100644 --- a/lib/lrama/grammar/rule_builder.rb +++ b/lib/lrama/grammar/rule_builder.rb @@ -164,23 +164,33 @@ def numberize_references next unless token.is_a?(Lrama::Lexer::Token::UserCode) token.references.each do |ref| + # Derive number reference index from named reference ref_name = ref.name if ref_name && ref_name != '$' if lhs.referred_by?(ref_name) ref.name = '$' else - candidates = rhs.each_with_index.select {|token, i| token.referred_by?(ref_name) } + candidates = referable_tokens.each_with_index.select {|token, i| token.referred_by?(ref_name) } raise "Referring symbol `#{ref_name}` is duplicated. #{token}" if candidates.size >= 2 raise "Referring symbol `#{ref_name}` is not found. #{token}" unless referring_symbol = candidates.first - ref.index = referring_symbol[1] + 1 + ref.number = referring_symbol[1] + 1 + end + end + + if ref.number + # Remapping number reference index to include non referable tokens + # TODO: Is it better to separate "number" of reference from actual "index" (Grammar::Reference)? + ref.index = number_to_index[ref.number] + + if !ref.index + raise "Can not refer to not exist component. $#{ref.number}" end end # TODO: Need to check index of @ too? next if ref.type == :at - if ref.index # TODO: Prohibit $0 even so Bison allows it? # See: https://www.gnu.org/software/bison/manual/html_node/Actions.html @@ -191,6 +201,39 @@ def numberize_references end end + def referable_token?(token) + case token + when Lrama::Lexer::Token::ParserStatePop + false + when Lrama::Lexer::Token::ParserStatePush + false + when Lrama::Lexer::Token::ParserStateSet + false + else + true + end + end + + def referable_tokens + rhs.select do |token| + referable_token?(token) + end + end + + def number_to_index + return @number_to_index if @number_to_index + + @number_to_index = [0] + + rhs.each.with_index(1) do |token, i| + if referable_token?(token) + @number_to_index << i + end + end + + @number_to_index + end + def flush_user_code if c = @user_code @rhs << c diff --git a/lib/lrama/lexer/token/user_code.rb b/lib/lrama/lexer/token/user_code.rb index abe6fba2..5f06b9b7 100644 --- a/lib/lrama/lexer/token/user_code.rb +++ b/lib/lrama/lexer/token/user_code.rb @@ -38,7 +38,7 @@ def scan_reference(scanner) return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos - 1) when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $1 tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil - return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1) + return Lrama::Grammar::Reference.new(type: :dollar, number: Integer(scanner[2]), index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1) when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $program (named reference without brackets) tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1) @@ -51,7 +51,7 @@ def scan_reference(scanner) when scanner.scan(/@\$/) # @$ return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos - 1) when scanner.scan(/@(\d+)/) # @1 - return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1) + return Lrama::Grammar::Reference.new(type: :at, number: Integer(scanner[1]), index: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1) when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets) return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos - 1) when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets) diff --git a/sig/lrama/grammar/reference.rbs b/sig/lrama/grammar/reference.rbs index e1b781d7..a27cbac9 100644 --- a/sig/lrama/grammar/reference.rbs +++ b/sig/lrama/grammar/reference.rbs @@ -3,6 +3,7 @@ module Lrama class Reference attr_accessor type: ::Symbol attr_accessor name: String + attr_accessor number: Integer attr_accessor index: Integer attr_accessor ex_tag: Lexer::Token? attr_accessor first_column: Integer @@ -10,7 +11,7 @@ module Lrama attr_accessor position_in_rhs: Integer? def initialize: ( - type: ::Symbol, ?name: String, ?index: Integer, ?ex_tag: Lexer::Token?, + type: ::Symbol, ?name: String, ?number: Integer, ?index: Integer, ?ex_tag: Lexer::Token?, first_column: Integer, last_column: Integer, ?position_in_rhs: Integer? ) -> void diff --git a/sig/lrama/grammar/rule_builder.rbs b/sig/lrama/grammar/rule_builder.rbs index 232b0876..aa79ba71 100644 --- a/sig/lrama/grammar/rule_builder.rbs +++ b/sig/lrama/grammar/rule_builder.rbs @@ -37,6 +37,9 @@ module Lrama def process_rhs: (Grammar::ParameterizingRuleResolver parameterizing_resolver) -> void def process_parser_state_token: (Lexer::Token, String, String, Integer, Grammar::ParameterizingRuleResolver) -> void def numberize_references: () -> void + def referable_token?: (Lexer::Token) -> bool + def referable_tokens: () -> Array[Lexer::Token] + def number_to_index: () -> Array[Integer] def flush_user_code: () -> void end end diff --git a/spec/fixtures/integration/parser_state.y b/spec/fixtures/integration/parser_state.y index 35d36939..59760c02 100644 --- a/spec/fixtures/integration/parser_state.y +++ b/spec/fixtures/integration/parser_state.y @@ -86,28 +86,28 @@ primary : NUM { printf("NUM => %d\n", $1); } %parser-state-push(in_class, in_class) cname { - printf("1. cname => %s. in_def: %s, in_class: %s.\n", $4, YY_CURRENT_STATE_IN_DEF_NAME, YY_CURRENT_STATE_IN_CLASS_NAME); + printf("1. cname => %s. in_def: %s, in_class: %s.\n", $cname, YY_CURRENT_STATE_IN_DEF_NAME, YY_CURRENT_STATE_IN_CLASS_NAME); } compstmt keyword_end %parser-state-pop(in_def) %parser-state-pop(in_class) { - printf("2. cname => %s. in_def: %s, in_class: %s.\n", $4, YY_CURRENT_STATE_IN_DEF_NAME, YY_CURRENT_STATE_IN_CLASS_NAME); + printf("2. cname => %s. in_def: %s, in_class: %s.\n", $2, YY_CURRENT_STATE_IN_DEF_NAME, YY_CURRENT_STATE_IN_CLASS_NAME); } | keyword_def %parser-state-push(in_def, in_def) %parser-state-push(in_class, not_in_class) fname { - printf("1. fname => %s. in_def: %s, in_class: %s.\n", $4, YY_CURRENT_STATE_IN_DEF_NAME, YY_CURRENT_STATE_IN_CLASS_NAME); + printf("1. fname => %s. in_def: %s, in_class: %s.\n", $2, YY_CURRENT_STATE_IN_DEF_NAME, YY_CURRENT_STATE_IN_CLASS_NAME); } compstmt keyword_end %parser-state-pop(in_def) %parser-state-pop(in_class) { - printf("2. fname => %s. in_def: %s, in_class: %s.\n", $4, YY_CURRENT_STATE_IN_DEF_NAME, YY_CURRENT_STATE_IN_CLASS_NAME); + printf("2. fname => %s. in_def: %s, in_class: %s.\n", $fname, YY_CURRENT_STATE_IN_DEF_NAME, YY_CURRENT_STATE_IN_CLASS_NAME); } ; diff --git a/spec/lrama/grammar/rule_builder_spec.rb b/spec/lrama/grammar/rule_builder_spec.rb index 2e48ac75..1b9c1c30 100644 --- a/spec/lrama/grammar/rule_builder_spec.rb +++ b/spec/lrama/grammar/rule_builder_spec.rb @@ -229,7 +229,7 @@ rule_builder.user_code = token_5 rule_builder.complete_input - expect { rule_builder.send(:preprocess_references) }.to raise_error(/Can not refer following component\. 10 >= 4\./) + expect { rule_builder.send(:preprocess_references) }.to raise_error(/Can not refer to not exist component\. \$10/) end end