Remove Symbols:: Base, Symbols:: Nterms, and Symbols:: Terms to deleg…

…ate processing to the resolver * Move all Symbol generation code, including #fill_symbol_number and so on, into Resolver * Remove Symbols::Base, Symbols::Nterms and Symbols::Terms * Keep Grammar#terms and Grammar#tnerms both of them will be delegated to symbols_resolver
ruby · Feb 5, 2024 · 047c8ea · 047c8ea
1 parent d36fbce
commit 047c8ea
Show file tree

Hide file tree

Showing 8 changed files with 145 additions and 194 deletions.
diff --git a/lib/lrama/context.rb b/lib/lrama/context.rb
@@ -23,7 +23,7 @@ def initialize(states)
 
     # enum yytokentype
     def yytokentype
-      @states.terms.symbols.reject do |term|
+      @states.terms.reject do |term|
         0 < term.token_id && term.token_id < 128
       end.map do |term|
         [term.id.s_value, term.token_id, term.display_name]
@@ -68,7 +68,7 @@ def yynstates
 
     # Last token number
     def yymaxutok
-      @states.terms.symbols.map(&:token_id).max
+      @states.terms.map(&:token_id).max
     end
 
     # YYTRANSLATE
@@ -78,7 +78,7 @@ def yytranslate
       # 2 is YYSYMBOL_YYUNDEF
       a = Array.new(yymaxutok, 2)
 
-      @states.terms.symbols.each do |term|
+      @states.terms.each do |term|
         a[term.token_id] = term.number
       end
 
@@ -88,7 +88,7 @@ def yytranslate
     def yytranslate_inverted
       a = Array.new(@states.symbols.count, @states.undef_symbol.token_id)
 
-      @states.terms.symbols.each do |term|
+      @states.terms.each do |term|
         a[term.number] = term.token_id
       end
 
@@ -300,7 +300,7 @@ def compute_yydefgoto
         end
       end
 
-      @states.nterms.symbols.each do |nterm|
+      @states.nterms.each do |nterm|
         if !(states = nterm_to_next_states[nterm])
           default_goto = 0
           not_default_gotos = []

diff --git a/lib/lrama/grammar.rb b/lib/lrama/grammar.rb
@@ -176,7 +176,7 @@ def compute_nullable
 
       while true do
         rs  = @rules.select {|e| e.nullable.nil? }
-        nts = nterms.unset_nullable
+        nts = nterms.select {|e| e.nullable.nil? }
         rule_count_1  = rs.count
         nterm_count_1 = nts.count
 
@@ -195,7 +195,7 @@ def compute_nullable
         end
 
         rule_count_2  = @rules.count {|e| e.nullable.nil? }
-        nterm_count_2 = nterms.unset_nullable.count
+        nterm_count_2 = nterms.select {|e| e.nullable.nil? }.count
 
         if (rule_count_1 == rule_count_2) && (nterm_count_1 == nterm_count_2)
           break
@@ -206,18 +206,18 @@ def compute_nullable
         rule.nullable = false
       end
 
-      nterms.unset_nullable.each do |nterm|
+      nterms.select {|e| e.nullable.nil? }.each do |nterm|
         nterm.nullable = false
       end
     end
 
     def compute_first_set
-      terms.symbols.each do |term|
+      terms.each do |term|
         term.first_set = Set.new([term]).freeze
         term.first_set_bitmap = Lrama::Bitmap.from_array([term.number])
       end
 
-      nterms.symbols.each do |nterm|
+      nterms.each do |nterm|
         nterm.first_set = Set.new([]).freeze
         nterm.first_set_bitmap = Lrama::Bitmap.from_array([])
       end
@@ -239,7 +239,7 @@ def compute_first_set
         break unless changed
       end
 
-      nterms.symbols.each do |nterm|
+      nterms.each do |nterm|
         nterm.first_set = Lrama::Bitmap.to_array(nterm.first_set_bitmap).map do |number|
           find_symbol_by_number!(number)
         end.to_set

diff --git a/lib/lrama/grammar/symbols/base.rb b/lib/lrama/grammar/symbols/base.rb
diff --git a/lib/lrama/grammar/symbols/nterms.rb b/lib/lrama/grammar/symbols/nterms.rb
diff --git a/lib/lrama/grammar/symbols/resolver.rb b/lib/lrama/grammar/symbols/resolver.rb
@@ -5,12 +5,12 @@ class Resolver
         attr_reader :terms, :nterms
 
         def initialize
-          @terms = Terms.new
-          @nterms = Nterms.new
+          @terms = []
+          @nterms = []
         end
 
         def symbols
-          @symbols ||= (@terms.symbols + @nterms.symbols)
+          @symbols ||= (@terms + @nterms)
         end
 
         def sort_by_number!
@@ -33,14 +33,24 @@ def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
           end
 
           @symbols = nil
-          @terms.add(id: id, alias_name: alias_name, tag: tag, token_id: token_id).last
+          term = Symbol.new(
+            id: id, alias_name: alias_name, number: nil, tag: tag,
+            term: true, token_id: token_id, nullable: false
+          )
+          @terms << term
+          term
         end
 
         def add_nterm(id:, alias_name: nil, tag: nil)
           return if find_symbol_by_id(id)
 
           @symbols = nil
-          @nterms.add(id: id, alias_name: alias_name, tag: tag).last
+          nterm = Symbol.new(
+            id: id, alias_name: alias_name, number: nil, tag: tag,
+            term: false, token_id: nil, nullable: nil,
+          )
+          @nterms << nterm
+          nterm
         end
 
         def find_symbol_by_s_value(s_value)
@@ -75,12 +85,20 @@ def find_symbol_by_number!(number)
         end
 
         def fill_symbol_number
-          @terms.fill_symbol_number(used_numbers)
-          @nterms.fill_symbol_number(used_numbers)
+          # YYEMPTY = -2
+          # YYEOF   =  0
+          # YYerror =  1
+          # YYUNDEF =  2
+          @number = 3
+          fill_terms_number
+          fill_nterms_number
         end
 
         def fill_nterm_type(types)
-          @nterms.fill_type(types)
+          types.each do |type|
+            nterm = find_nterm_by_id!(type.id)
+            nterm.tag = type.tag
+          end
         end
 
         def fill_printer(printers)
@@ -133,6 +151,88 @@ def validate!
 
         private
 
+        def find_nterm_by_id!(id)
+          @nterms.find do |s|
+            s.id == id
+          end || (raise "Symbol not found: #{id}")
+        end
+
+        def fill_terms_number
+          # Character literal in grammar file has
+          # token id corresponding to ASCII code by default,
+          # so start token_id from 256.
+          token_id = 256
+
+          @terms.each do |sym|
+            while used_numbers[@number] do
+              @number += 1
+            end
+
+            if sym.number.nil?
+              sym.number = @number
+              used_numbers[@number] = true
+              @number += 1
+            end
+
+            # If id is Token::Char, it uses ASCII code
+            if sym.token_id.nil?
+              if sym.id.is_a?(Lrama::Lexer::Token::Char)
+                # Ignore ' on the both sides
+                case sym.id.s_value[1..-2]
+                when "\\b"
+                  sym.token_id = 8
+                when "\\f"
+                  sym.token_id = 12
+                when "\\n"
+                  sym.token_id = 10
+                when "\\r"
+                  sym.token_id = 13
+                when "\\t"
+                  sym.token_id = 9
+                when "\\v"
+                  sym.token_id = 11
+                when "\""
+                  sym.token_id = 34
+                when "'"
+                  sym.token_id = 39
+                when "\\\\"
+                  sym.token_id = 92
+                when /\A\\(\d+)\z/
+                  sym.token_id = Integer($1, 8)
+                when /\A(.)\z/
+                  sym.token_id = $1.bytes.first
+                else
+                  raise "Unknown Char s_value #{sym}"
+                end
+              else
+                sym.token_id = token_id
+                token_id += 1
+              end
+            end
+          end
+        end
+
+        def fill_nterms_number
+          token_id = 0
+
+          @nterms.each do |sym|
+            while used_numbers[@number] do
+              @number += 1
+            end
+
+            if sym.number.nil?
+              sym.number = @number
+              used_numbers[@number] = true
+              @number += 1
+            end
+
+            if sym.token_id.nil?
+              sym.token_id = token_id
+              token_id += 1
+            end
+          end
+        end
+
         def used_numbers
           return @used_numbers if defined?(@used_numbers)