From 20e0b9d090fdb83418bd376715685f1bb5bc5fce Mon Sep 17 00:00:00 2001 From: yui-knk Date: Sun, 6 Aug 2023 23:47:18 +0900 Subject: [PATCH] Implement Counterexamples MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement Nonunifying Counterexamples. See https://www.cs.cornell.edu/andru/papers/cupex/cupex.pdf These examples are rendered for conflicts. ``` Shift Conflict: 0: stmt "end of file" 3: expr '?' stmt stmt 6: expr '+' expr 6: expr '+' expr 6: expr • '+' expr Reduce Conflict: 0: stmt "end of file" 3: expr '?' stmt stmt 6: expr '+' expr 6: expr '+' expr • ``` --- doc/TODO.md | 4 + lib/lrama.rb | 1 + lib/lrama/command.rb | 2 +- lib/lrama/counterexamples.rb | 285 ++++++++++++ lib/lrama/counterexamples/derivation.rb | 63 +++ lib/lrama/counterexamples/example.rb | 124 ++++++ lib/lrama/counterexamples/path.rb | 69 +++ lib/lrama/counterexamples/state_item.rb | 6 + lib/lrama/counterexamples/triple.rb | 21 + lib/lrama/grammar.rb | 35 ++ lib/lrama/grammar/rule.rb | 6 + lib/lrama/grammar/symbol.rb | 1 + lib/lrama/parser.rb | 1 + lib/lrama/state.rb | 11 +- lib/lrama/state/conflict.rb | 6 - lib/lrama/state/reduce_reduce_conflict.rb | 9 + lib/lrama/state/shift_reduce_conflict.rb | 9 + lib/lrama/states.rb | 21 +- lib/lrama/states/item.rb | 40 +- lib/lrama/states_reporter.rb | 31 +- spec/lrama/command_spec.rb | 2 +- spec/lrama/counterexamples_spec.rb | 516 ++++++++++++++++++++++ 22 files changed, 1241 insertions(+), 22 deletions(-) create mode 100644 lib/lrama/counterexamples.rb create mode 100644 lib/lrama/counterexamples/derivation.rb create mode 100644 lib/lrama/counterexamples/example.rb create mode 100644 lib/lrama/counterexamples/path.rb create mode 100644 lib/lrama/counterexamples/state_item.rb create mode 100644 lib/lrama/counterexamples/triple.rb delete mode 100644 lib/lrama/state/conflict.rb create mode 100644 lib/lrama/state/reduce_reduce_conflict.rb create mode 100644 lib/lrama/state/shift_reduce_conflict.rb create mode 100644 spec/lrama/counterexamples_spec.rb diff --git a/doc/TODO.md b/doc/TODO.md index f5fb1581..ac7419f4 100644 --- a/doc/TODO.md +++ b/doc/TODO.md @@ -44,6 +44,10 @@ * Reporting * [ ] Bison style * [ ] Wrap not selected reduce with "[]". See basic.output file generated by Bison. +* Counterexamples + * [x] Nonunifying Counterexamples + * [ ] Unifying Counterexamples + * [ ] Performance improvement using reverse_transitions and reverse_productions * Error Tolerance * [x] Corchuelo et al. algorithm with N = 1 (this means the next token when error is raised) * [x] Add new decl for error token semantic value initialization (%error-token) diff --git a/lib/lrama.rb b/lib/lrama.rb index 66631fc8..12e635d8 100644 --- a/lib/lrama.rb +++ b/lib/lrama.rb @@ -1,6 +1,7 @@ require "lrama/bitmap" require "lrama/command" require "lrama/context" +require "lrama/counterexamples" require "lrama/digraph" require "lrama/grammar" require "lrama/lexer" diff --git a/lib/lrama/command.rb b/lib/lrama/command.rb index 141523fc..9fceb55f 100644 --- a/lib/lrama/command.rb +++ b/lib/lrama/command.rb @@ -67,7 +67,7 @@ def validate_report(report) bison_list = %w[states itemsets lookaheads solved counterexamples cex all none] others = %w[verbose] list = bison_list + others - not_supported = %w[counterexamples cex none] + not_supported = %w[cex none] h = { grammar: true } report.each do |r| diff --git a/lib/lrama/counterexamples.rb b/lib/lrama/counterexamples.rb new file mode 100644 index 00000000..a5d62a0c --- /dev/null +++ b/lib/lrama/counterexamples.rb @@ -0,0 +1,285 @@ +require "set" + +require "lrama/counterexamples/derivation" +require "lrama/counterexamples/example" +require "lrama/counterexamples/path" +require "lrama/counterexamples/state_item" +require "lrama/counterexamples/triple" + +module Lrama + # See: https://www.cs.cornell.edu/andru/papers/cupex/cupex.pdf + # 4. Constructing Nonunifying Counterexamples + class Counterexamples + attr_reader :transitions, :productions + + def initialize(states) + @states = states + setup_transitions + setup_productions + end + + def to_s + "#" + end + alias :inspect :to_s + + def compute(conflict_state) + conflict_state.conflicts.flat_map do |conflict| + case conflict.type + when :shift_reduce + shift_reduce_example(conflict_state, conflict) + when :reduce_reduce + reduce_reduce_examples(conflict_state, conflict) + end + end.compact + end + + private + + def setup_transitions + # Hash [StateItem, Symbol] => StateItem + @transitions = {} + # Hash [StateItem, Symbol] => Set(StateItem) + @reverse_transitions = {} + + @states.states.each do |src_state| + trans = {} + + src_state.transitions.each do |shift, next_state| + trans[shift.next_sym] = next_state + end + + src_state.items.each do |src_item| + next if src_item.end_of_rule? + sym = src_item.next_sym + dest_state = trans[sym] + + dest_state.kernels.each do |dest_item| + next unless (src_item.rule == dest_item.rule) && (src_item.position + 1 == dest_item.position) + src_state_item = StateItem.new(src_state, src_item) + dest_state_item = StateItem.new(dest_state, dest_item) + + @transitions[[src_state_item, sym]] = dest_state_item + + key = [dest_state_item, sym] + @reverse_transitions[key] ||= Set.new + @reverse_transitions[key] << src_state_item + end + end + end + end + + def setup_productions + # Hash [StateItem] => Set(Item) + @productions = {} + # Hash [State, Symbol] => Set(Item). Symbol is nterm + @reverse_productions = {} + + @states.states.each do |state| + # LHS => Set(Item) + h = {} + + state.closure.each do |item| + sym = item.lhs + + h[sym] ||= Set.new + h[sym] << item + end + + state.items.each do |item| + next if item.end_of_rule? + next if item.next_sym.term? + + sym = item.next_sym + state_item = StateItem.new(state, item) + key = [state, sym] + + @productions[state_item] = h[sym] + + @reverse_productions[key] ||= Set.new + @reverse_productions[key] << item + end + end + end + + def shift_reduce_example(conflict_state, conflict) + conflict_symbol = conflict.symbols.first + shift_conflict_item = conflict_state.items.find { |item| item.next_sym == conflict_symbol } + path2 = shortest_path(conflict_state, conflict.reduce.item, conflict_symbol) + path1 = find_shift_conflict_shortest_path(path2, conflict_state, shift_conflict_item) + + Example.new(path1, path2, conflict, conflict_symbol, self) + end + + def reduce_reduce_examples(conflict_state, conflict) + conflict_symbol = conflict.symbols.first + path1 = shortest_path(conflict_state, conflict.reduce1.item, conflict_symbol) + path2 = shortest_path(conflict_state, conflict.reduce2.item, conflict_symbol) + + Example.new(path1, path2, conflict, conflict_symbol, self) + end + + def find_shift_conflict_shortest_path(reduce_path, conflict_state, conflict_item) + state_items = find_shift_conflict_shortest_state_items(reduce_path, conflict_state, conflict_item) + build_paths_from_state_items(state_items) + end + + def find_shift_conflict_shortest_state_items(reduce_path, conflict_state, conflict_item) + target_state_item = StateItem.new(conflict_state, conflict_item) + result = [target_state_item] + reversed_reduce_path = reduce_path.to_a.reverse + # Index for state_item + i = 0 + + while (path = reversed_reduce_path[i]) + # Index for prev_state_item + j = i + 1 + _j = j + + while (prev_path = reversed_reduce_path[j]) + if prev_path.production? + j += 1 + else + break + end + end + + state_item = path.to + prev_state_item = prev_path&.to + + if target_state_item == state_item || target_state_item.item.start_item? + result.concat(reversed_reduce_path[_j..-1].map(&:to)) + break + end + + if target_state_item.item.beginning_of_rule? + queue = [] + queue << [target_state_item] + + # Find reverse production + while (sis = queue.shift) + si = sis.last + + # Reach to start state + if si.item.start_item? + sis.shift + result.concat(sis) + target_state_item = si + break + end + + if !si.item.beginning_of_rule? + key = [si, si.item.previous_sym] + @reverse_transitions[key].each do |prev_target_state_item| + next if prev_target_state_item.state != prev_state_item.state + sis.shift + result.concat(sis) + result << prev_target_state_item + target_state_item = prev_target_state_item + i = j + queue.clear + break + end + else + key = [si.state, si.item.lhs] + @reverse_productions[key].each do |item| + state_item = StateItem.new(si.state, item) + queue << (sis + [state_item]) + end + end + end + else + # Find reverse transition + key = [target_state_item, target_state_item.item.previous_sym] + @reverse_transitions[key].each do |prev_target_state_item| + next if prev_target_state_item.state != prev_state_item.state + result << prev_target_state_item + target_state_item = prev_target_state_item + i = j + break + end + end + end + + result.reverse + end + + def build_paths_from_state_items(state_items) + paths = state_items.zip([nil] + state_items).map do |si, prev_si| + case + when prev_si.nil? + StartPath.new(si) + when si.item.beginning_of_rule? + ProductionPath.new(prev_si, si) + else + TransitionPath.new(prev_si, si) + end + end + + paths + end + + def shortest_path(conflict_state, conflict_reduce_item, conflict_term) + # queue: is an array of [Triple, [Path]] + queue = [] + visited = {} + start_state = @states.states.first + raise "BUG: Start state should be just one kernel." if start_state.kernels.count != 1 + + start = Triple.new(start_state, start_state.kernels.first, Set.new([@states.eof_symbol])) + + queue << [start, [StartPath.new(start.state_item)]] + + while true + triple, paths = queue.shift + + next if visited[triple] + visited[triple] = true + + # Found + if triple.state == conflict_state && triple.item == conflict_reduce_item && triple.l.include?(conflict_term) + return paths + end + + # transition + triple.state.transitions.each do |shift, next_state| + next unless triple.item.next_sym && triple.item.next_sym == shift.next_sym + next_state.kernels.each do |kernel| + next if kernel.rule != triple.item.rule + t = Triple.new(next_state, kernel, triple.l) + queue << [t, paths + [TransitionPath.new(triple.state_item, t.state_item)]] + end + end + + # production step + triple.state.closure.each do |item| + next unless triple.item.next_sym && triple.item.next_sym == item.lhs + l = follow_l(triple.item, triple.l) + t = Triple.new(triple.state, item, l) + queue << [t, paths + [ProductionPath.new(triple.state_item, t.state_item)]] + end + + break if queue.empty? + end + + return nil + end + + def follow_l(item, current_l) + # 1. follow_L (A -> X1 ... Xn-1 • Xn) = L + # 2. follow_L (A -> X1 ... Xk • Xk+1 Xk+2 ... Xn) = {Xk+2} if Xk+2 is a terminal + # 3. follow_L (A -> X1 ... Xk • Xk+1 Xk+2 ... Xn) = FIRST(Xk+2) if Xk+2 is a nonnullable nonterminal + # 4. follow_L (A -> X1 ... Xk • Xk+1 Xk+2 ... Xn) = FIRST(Xk+2) + follow_L (A -> X1 ... Xk+1 • Xk+2 ... Xn) if Xk+2 is a nullable nonterminal + case + when item.number_of_rest_symbols == 1 + current_l + when item.next_next_sym.term? + Set.new([item.next_next_sym]) + when !item.next_next_sym.nullable + item.next_next_sym.first_set + else + item.next_next_sym.first_set + follow_l(item.new_by_next_position, current_l) + end + end + end +end diff --git a/lib/lrama/counterexamples/derivation.rb b/lib/lrama/counterexamples/derivation.rb new file mode 100644 index 00000000..691e9353 --- /dev/null +++ b/lib/lrama/counterexamples/derivation.rb @@ -0,0 +1,63 @@ +module Lrama + class Counterexamples + class Derivation + attr_reader :item, :left, :right + attr_writer :right + + def initialize(item, left, right = nil) + @item = item + @left = left + @right = right + end + + def to_s + "#" + end + alias :inspect :to_s + + def render_strings_for_report + result = [] + _render_for_report(self, 0, result, 0) + result.map(&:rstrip) + end + + def render_for_report + render_strings_for_report.join("\n") + end + + private + + def _render_for_report(derivation, offset, strings, index) + item = derivation.item + if strings[index] + strings[index] << " " * (offset - strings[index].length) + else + strings[index] = " " * offset + end + str = strings[index] + str << "#{item.rule_id}: #{item.symbols_before_dot.map(&:display_name).join(" ")} " + + if derivation.left + len = str.length + str << "#{item.next_sym.display_name}" + length = _render_for_report(derivation.left, len, strings, index + 1) + # I want String#ljust! + str << " " * (length - str.length) + else + str << " • #{item.symbols_after_dot.map(&:display_name).join(" ")} " + return str.length + end + + if derivation.right&.left + length = _render_for_report(derivation.right.left, str.length, strings, index + 1) + str << "#{item.symbols_after_dot[1..-1].map(&:display_name).join(" ")} " + str << " " * (length - str.length) if length > str.length + elsif item.next_next_sym + str << "#{item.symbols_after_dot[1..-1].map(&:display_name).join(" ")} " + end + + return str.length + end + end + end +end diff --git a/lib/lrama/counterexamples/example.rb b/lib/lrama/counterexamples/example.rb new file mode 100644 index 00000000..8f02d71f --- /dev/null +++ b/lib/lrama/counterexamples/example.rb @@ -0,0 +1,124 @@ +module Lrama + class Counterexamples + class Example + attr_reader :path1, :path2, :conflict, :conflict_symbol + + # path1 is shift conflict when S/R conflict + # path2 is always reduce conflict + def initialize(path1, path2, conflict, conflict_symbol, counterexamples) + @path1 = path1 + @path2 = path2 + @conflict = conflict + @conflict_symbol = conflict_symbol + @counterexamples = counterexamples + end + + def type + @conflict.type + end + + def path1_item + @path1.last.to.item + end + + def path2_item + @path2.last.to.item + end + + def derivations1 + @derivations1 ||= _derivations(path1) + end + + def derivations2 + @derivations2 ||= _derivations(path2) + end + + private + + def _derivations(paths) + derivation = nil + current = :production + lookahead_sym = paths.last.to.item.end_of_rule? ? @conflict_symbol : nil + + paths.reverse.each do |path| + item = path.to.item + + case current + when :production + case path + when StartPath + derivation = Derivation.new(item, derivation) + current = :start + when TransitionPath + derivation = Derivation.new(item, derivation) + current = :transition + when ProductionPath + derivation = Derivation.new(item, derivation) + current = :production + end + + if lookahead_sym && item.next_next_sym && item.next_next_sym.first_set.include?(lookahead_sym) + state_item = @counterexamples.transitions[[path.to, item.next_sym]] + derivation2 = find_derivation_for_symbol(state_item, lookahead_sym) + derivation.right = derivation2 + lookahead_sym = nil + end + + when :transition + case path + when StartPath + derivation = Derivation.new(item, derivation) + current = :start + when TransitionPath + # ignore + current = :transition + when ProductionPath + # ignore + current = :production + end + else + raise "BUG: Unknown #{current}" + end + + break if current == :start + end + + derivation + end + + def find_derivation_for_symbol(state_item, sym) + queue = [] + queue << [state_item] + + while (sis = queue.shift) + si = sis.last + next_sym = si.item.next_sym + + if next_sym == sym + derivation = nil + + sis.reverse.each do |si| + derivation = Derivation.new(si.item, derivation) + end + + return derivation + end + + if next_sym.nterm? && next_sym.first_set.include?(sym) + @counterexamples.productions[si].each do |next_item| + next if next_item.empty_rule? + next_si = StateItem.new(si.state, next_item) + next if sis.include?(next_si) + queue << (sis + [next_si]) + end + + if next_sym.nullable + next_si = @counterexamples.transitions[[si, next_sym]] + queue << (sis + [next_si]) + end + end + end + end + end + end +end diff --git a/lib/lrama/counterexamples/path.rb b/lib/lrama/counterexamples/path.rb new file mode 100644 index 00000000..83135286 --- /dev/null +++ b/lib/lrama/counterexamples/path.rb @@ -0,0 +1,69 @@ +module Lrama + class Counterexamples + class Path + def initialize(from_state_item, to_state_item) + @from_state_item = from_state_item + @to_state_item = to_state_item + end + + def from + @from_state_item + end + + def to + @to_state_item + end + + def to_s + "#" + end + alias :inspect :to_s + end + + class StartPath < Path + def initialize(to_state_item) + super nil, to_state_item + end + + def type + :start + end + + def transition? + false + end + + def production? + false + end + end + + class TransitionPath < Path + def type + :transition + end + + def transition? + true + end + + def production? + false + end + end + + class ProductionPath < Path + def type + :production + end + + def transition? + false + end + + def production? + true + end + end + end +end diff --git a/lib/lrama/counterexamples/state_item.rb b/lib/lrama/counterexamples/state_item.rb new file mode 100644 index 00000000..930ff4a5 --- /dev/null +++ b/lib/lrama/counterexamples/state_item.rb @@ -0,0 +1,6 @@ +module Lrama + class Counterexamples + class StateItem < Struct.new(:state, :item) + end + end +end diff --git a/lib/lrama/counterexamples/triple.rb b/lib/lrama/counterexamples/triple.rb new file mode 100644 index 00000000..e802becc --- /dev/null +++ b/lib/lrama/counterexamples/triple.rb @@ -0,0 +1,21 @@ +module Lrama + class Counterexamples + # s: state + # itm: item within s + # l: precise lookahead set + class Triple < Struct.new(:s, :itm, :l) + alias :state :s + alias :item :itm + alias :precise_lookahead_set :l + + def state_item + StateItem.new(state, item) + end + + def inspect + "#{state.inspect}. #{item.display_name}. #{l.map(&:id).map(&:s_value)}" + end + alias :to_s :inspect + end + end +end diff --git a/lib/lrama/grammar.rb b/lib/lrama/grammar.rb index c5eed6f3..bfd490cc 100644 --- a/lib/lrama/grammar.rb +++ b/lib/lrama/grammar.rb @@ -213,6 +213,41 @@ def compute_nullable end end + def compute_first_set + terms.each do |term| + term.first_set = Set.new([term]).freeze + term.first_set_bitmap = Lrama::Bitmap.from_array([term.number]) + end + + nterms.each do |nterm| + nterm.first_set = Set.new([]).freeze + nterm.first_set_bitmap = Lrama::Bitmap.from_array([]) + end + + while true do + changed = false + + @rules.each do |rule| + rule.rhs.each do |r| + if rule.lhs.first_set_bitmap | r.first_set_bitmap != rule.lhs.first_set_bitmap + changed = true + rule.lhs.first_set_bitmap = rule.lhs.first_set_bitmap | r.first_set_bitmap + end + + break unless r.nullable + end + end + + break unless changed + end + + nterms.each do |nterm| + nterm.first_set = Lrama::Bitmap.to_array(nterm.first_set_bitmap).map do |number| + find_symbol_by_number!(number) + end.to_set + end + end + def find_symbol_by_s_value(s_value) @symbols.find do |sym| sym.id.s_value == s_value diff --git a/lib/lrama/grammar/rule.rb b/lib/lrama/grammar/rule.rb index 7ed5b331..c559388b 100644 --- a/lib/lrama/grammar/rule.rb +++ b/lib/lrama/grammar/rule.rb @@ -17,6 +17,12 @@ def as_comment "#{l}: #{r}" end + # opt_nl: ε <-- empty_rule + # | '\n' <-- not empty_rule + def empty_rule? + rhs.empty? + end + def precedence precedence_sym&.precedence end diff --git a/lib/lrama/grammar/symbol.rb b/lib/lrama/grammar/symbol.rb index 28061bb4..9df1c2f6 100644 --- a/lib/lrama/grammar/symbol.rb +++ b/lib/lrama/grammar/symbol.rb @@ -7,6 +7,7 @@ module Lrama class Grammar class Symbol < Struct.new(:id, :alias_name, :number, :tag, :term, :token_id, :nullable, :precedence, :printer, :error_token, keyword_init: true) + attr_accessor :first_set, :first_set_bitmap attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol def term? diff --git a/lib/lrama/parser.rb b/lib/lrama/parser.rb index 4faa402d..a12c8bbb 100644 --- a/lib/lrama/parser.rb +++ b/lib/lrama/parser.rb @@ -22,6 +22,7 @@ def parse process_epilogue(grammar, lexer) grammar.prepare grammar.compute_nullable + grammar.compute_first_set grammar.validate! grammar diff --git a/lib/lrama/state.rb b/lib/lrama/state.rb index 808d39ea..6632bafe 100644 --- a/lib/lrama/state.rb +++ b/lib/lrama/state.rb @@ -1,7 +1,8 @@ -require "lrama/state/conflict" require "lrama/state/reduce" +require "lrama/state/reduce_reduce_conflict" require "lrama/state/resolved_conflict" require "lrama/state/shift" +require "lrama/state/shift_reduce_conflict" module Lrama class State @@ -100,6 +101,10 @@ def term_transitions @term_transitions end + def transitions + term_transitions + nterm_transitions + end + def selected_term_transitions term_transitions.select do |shift, next_state| !shift.not_selected @@ -143,6 +148,10 @@ def default_reduction_rule=(default_reduction_rule) end end + def has_conflicts? + !@conflicts.empty? + end + def sr_conflicts @conflicts.select do |conflict| conflict.type == :shift_reduce diff --git a/lib/lrama/state/conflict.rb b/lib/lrama/state/conflict.rb deleted file mode 100644 index 6ee80411..00000000 --- a/lib/lrama/state/conflict.rb +++ /dev/null @@ -1,6 +0,0 @@ -module Lrama - class State - class Conflict < Struct.new(:symbols, :reduce, :type, keyword_init: true) - end - end -end diff --git a/lib/lrama/state/reduce_reduce_conflict.rb b/lib/lrama/state/reduce_reduce_conflict.rb new file mode 100644 index 00000000..0a0e4dc2 --- /dev/null +++ b/lib/lrama/state/reduce_reduce_conflict.rb @@ -0,0 +1,9 @@ +module Lrama + class State + class ReduceReduceConflict < Struct.new(:symbols, :reduce1, :reduce2, keyword_init: true) + def type + :reduce_reduce + end + end + end +end diff --git a/lib/lrama/state/shift_reduce_conflict.rb b/lib/lrama/state/shift_reduce_conflict.rb new file mode 100644 index 00000000..f80bd5f3 --- /dev/null +++ b/lib/lrama/state/shift_reduce_conflict.rb @@ -0,0 +1,9 @@ +module Lrama + class State + class ShiftReduceConflict < Struct.new(:symbols, :shift, :reduce, keyword_init: true) + def type + :shift_reduce + end + end + end +end diff --git a/lib/lrama/states.rb b/lib/lrama/states.rb index 3d8d0890..d27c5411 100644 --- a/lib/lrama/states.rb +++ b/lib/lrama/states.rb @@ -436,7 +436,7 @@ def compute_shift_reduce_conflicts # Can resolve only when both have prec unless shift_prec && reduce_prec - state.conflicts << State::Conflict.new(symbols: [sym], reduce: reduce, type: :shift_reduce) + state.conflicts << State::ShiftReduceConflict.new(symbols: [sym], shift: shift, reduce: reduce) next end @@ -485,16 +485,21 @@ def compute_shift_reduce_conflicts def compute_reduce_reduce_conflicts states.each do |state| - a = [] + count = state.reduces.count - state.reduces.each do |reduce| - next if reduce.look_ahead.nil? + for i in 0...count do + reduce1 = state.reduces[i] + next if reduce1.look_ahead.nil? - intersection = a & reduce.look_ahead - a += reduce.look_ahead + for j in (i+1)...count do + reduce2 = state.reduces[j] + next if reduce2.look_ahead.nil? - if !intersection.empty? - state.conflicts << State::Conflict.new(symbols: intersection.dup, reduce: reduce, type: :reduce_reduce) + intersection = reduce1.look_ahead & reduce2.look_ahead + + if !intersection.empty? + state.conflicts << State::ReduceReduceConflict.new(symbols: intersection, reduce1: reduce1, reduce2: reduce2) + end end end end diff --git a/lib/lrama/states/item.rb b/lib/lrama/states/item.rb index 5c3696cc..823ccc72 100644 --- a/lib/lrama/states/item.rb +++ b/lib/lrama/states/item.rb @@ -12,20 +12,56 @@ def rule_id rule.id end + def empty_rule? + rule.empty_rule? + end + + def number_of_rest_symbols + rule.rhs.count - position + end + + def lhs + rule.lhs + end + def next_sym rule.rhs[position] end + def next_next_sym + rule.rhs[position + 1] + end + + def previous_sym + rule.rhs[position - 1] + end + def end_of_rule? rule.rhs.count == position end + def beginning_of_rule? + position == 0 + end + + def start_item? + rule.id == 0 && position == 0 + end + def new_by_next_position Item.new(rule: rule, position: position + 1) end - def previous_sym - rule.rhs[position - 1] + def symbols_before_dot + rule.rhs[0...position] + end + + def symbols_after_dot + rule.rhs[position..-1] + end + + def to_s + "#{lhs.id.s_value}: #{display_name}" end def display_name diff --git a/lib/lrama/states_reporter.rb b/lib/lrama/states_reporter.rb index d3dbe6b1..0d805829 100644 --- a/lib/lrama/states_reporter.rb +++ b/lib/lrama/states_reporter.rb @@ -14,13 +14,13 @@ def report(io, **options) private - def _report(io, grammar: false, states: false, itemsets: false, lookaheads: false, solved: false, verbose: false) + def _report(io, grammar: false, states: false, itemsets: false, lookaheads: false, solved: false, counterexamples: false, verbose: false) # TODO: Unused terms # TODO: Unused rules report_conflicts(io) report_grammar(io) if grammar - report_states(io, itemsets, lookaheads, solved, verbose) + report_states(io, itemsets, lookaheads, solved, counterexamples, verbose) end def report_conflicts(io) @@ -71,7 +71,11 @@ def report_grammar(io) io << "\n\n" end - def report_states(io, itemsets, lookaheads, solved, verbose) + def report_states(io, itemsets, lookaheads, solved, counterexamples, verbose) + if counterexamples + cex = Counterexamples.new(@states) + end + @states.states.each do |state| # Report State io << "State #{state.id}\n\n" @@ -194,6 +198,27 @@ def report_states(io, itemsets, lookaheads, solved, verbose) io << "\n" if !state.resolved_conflicts.empty? end + if counterexamples && state.has_conflicts? + # Report counterexamples + examples = cex.compute(state) + examples.each do |example| + label0 = example.type == :shift_reduce ? "shift/reduce" : "reduce/reduce" + label1 = example.type == :shift_reduce ? "Shift derivation" : "First Reduce derivation" + label2 = example.type == :shift_reduce ? "Reduce derivation" : "Second Reduce derivation" + + io << " #{label0} conflict on token #{example.conflict_symbol.id.s_value}:\n" + io << " #{example.path1_item.to_s}\n" + io << " #{example.path2_item.to_s}\n" + io << " #{label1}\n" + example.derivations1.render_strings_for_report.each do |str| + io << " #{str}\n" + end + io << " #{label2}\n" + example.derivations2.render_strings_for_report.each do |str| + io << " #{str}\n" + end + end + end if verbose # Report direct_read_sets diff --git a/spec/lrama/command_spec.rb b/spec/lrama/command_spec.rb index 70f64abb..3a7dd067 100644 --- a/spec/lrama/command_spec.rb +++ b/spec/lrama/command_spec.rb @@ -56,7 +56,7 @@ opts = command.send(:validate_report, ["all"]) expect(opts).to eq({ grammar: true, states: true, itemsets: true, - lookaheads: true, solved: true, + lookaheads: true, solved: true, counterexamples: true, }) end end diff --git a/spec/lrama/counterexamples_spec.rb b/spec/lrama/counterexamples_spec.rb new file mode 100644 index 00000000..ec4b4f02 --- /dev/null +++ b/spec/lrama/counterexamples_spec.rb @@ -0,0 +1,516 @@ +RSpec.describe Lrama::Counterexamples do + let(:out) { "" } + let(:warning) { Lrama::Warning.new(out) } + + describe "#compute" do + # Example comes from https://www.cs.cornell.edu/andru/papers/cupex/cupex.pdf + # "4. Constructing Nonunifying Counterexamples" + describe "Example of 'Finding Counterexamples from Parsing Conflicts'" do + let(:y) do + <<~STR +%{ +// Prologue +%} + +%union { + int i; +} + +%token keyword_if +%token keyword_then +%token keyword_else +%token arr +%token tEQ ":=" +%token digit + +%type stmt +%type expr +%type num + +%% + +stmt : keyword_if expr keyword_then stmt keyword_else stmt + | keyword_if expr keyword_then stmt + | expr '?' stmt stmt + | arr '[' expr ']' tEQ expr + ; + +expr : num + | expr '+' expr + ; + +num : digit + | num digit + ; + + +%% + + STR + end + + it "build counterexamples of S/R conflicts" do + grammar = Lrama::Parser.new(y).parse + states = Lrama::States.new(grammar, warning) + states.compute + counterexamples = Lrama::Counterexamples.new(states) + + # State 6 + # + # 5 expr: num • ["end of file", keyword_if, keyword_then, keyword_else, arr, digit, '?', ']', '+'] + # 8 num: num • digit + # + # digit shift, and go to state 12 + # digit reduce using rule 5 (expr) + state_6 = states.states[6] + examples = counterexamples.compute(state_6) + expect(examples.count).to eq 1 + example = examples[0] + + expect(example.type).to eq :shift_reduce + # Shift Conflict + expect(example.path1.map(&:to).map(&:item).map(&:to_s)).to eq([ + "$accept: • stmt \"end of file\" (rule 0)", + "stmt: • expr '?' stmt stmt (rule 3)", + "stmt: expr • '?' stmt stmt (rule 3)", + "stmt: expr '?' • stmt stmt (rule 3)", + "stmt: • arr '[' expr ']' \":=\" expr (rule 4)", + "stmt: arr • '[' expr ']' \":=\" expr (rule 4)", + "stmt: arr '[' • expr ']' \":=\" expr (rule 4)", + "stmt: arr '[' expr • ']' \":=\" expr (rule 4)", + "stmt: arr '[' expr ']' • \":=\" expr (rule 4)", + "stmt: arr '[' expr ']' \":=\" • expr (rule 4)", + "expr: • num (rule 5)", + "num: • num digit (rule 8)", + "num: num • digit (rule 8)" + ]) + expect(example.derivations1.render_for_report).to eq(<<~STR.chomp) + 0: stmt "end of file" + 3: expr '?' stmt stmt + 4: arr '[' expr ']' ":=" expr + 5: num + 8: num • digit + STR + # Reduce Conflict + expect(example.path2.map(&:to).map(&:item).map(&:to_s)).to eq([ + "$accept: • stmt \"end of file\" (rule 0)", + "stmt: • expr '?' stmt stmt (rule 3)", + "stmt: expr • '?' stmt stmt (rule 3)", + "stmt: expr '?' • stmt stmt (rule 3)", + "stmt: • arr '[' expr ']' \":=\" expr (rule 4)", + "stmt: arr • '[' expr ']' \":=\" expr (rule 4)", + "stmt: arr '[' • expr ']' \":=\" expr (rule 4)", + "stmt: arr '[' expr • ']' \":=\" expr (rule 4)", + "stmt: arr '[' expr ']' • \":=\" expr (rule 4)", + "stmt: arr '[' expr ']' \":=\" • expr (rule 4)", + "expr: • num (rule 5)", + "expr: num • (rule 5)" + ]) + expect(example.derivations2.render_for_report).to eq(<<~STR.chomp) + 0: stmt "end of file" + 3: expr '?' stmt stmt + 4: arr '[' expr ']' ":=" expr 3: expr '?' stmt stmt + 5: num • 5: num + 7: • digit + STR + + + # State 16 + # + # 6 expr: expr • '+' expr + # 6 | expr '+' expr • ["end of file", keyword_if, keyword_then, keyword_else, arr, digit, '?', ']', '+'] + # + # '+' shift, and go to state 11 + # '+' reduce using rule 6 (expr) + state_16 = states.states[16] + examples = counterexamples.compute(state_16) + expect(examples.count).to eq 1 + example = examples[0] + + expect(example.type).to eq :shift_reduce + # Shift Conflict + expect(example.path1.map(&:to).map(&:item).map(&:to_s)).to eq([ + "$accept: • stmt \"end of file\" (rule 0)", + "stmt: • expr '?' stmt stmt (rule 3)", + "expr: • expr '+' expr (rule 6)", + "expr: • expr '+' expr (rule 6)", + "expr: expr • '+' expr (rule 6)", + "expr: expr '+' • expr (rule 6)", + "expr: • expr '+' expr (rule 6)", + "expr: expr • '+' expr (rule 6)" + ]) + expect(example.derivations1.render_for_report).to eq(<<~STR.chomp) + 0: stmt "end of file" + 3: expr '?' stmt stmt + 6: expr '+' expr + 6: expr '+' expr + 6: expr • '+' expr + STR + # Reduce Conflict + expect(example.path2.map(&:to).map(&:item).map(&:to_s)).to eq([ + "$accept: • stmt \"end of file\" (rule 0)", + "stmt: • expr '?' stmt stmt (rule 3)", + "expr: • expr '+' expr (rule 6)", + "expr: • expr '+' expr (rule 6)", + "expr: expr • '+' expr (rule 6)", + "expr: expr '+' • expr (rule 6)", + "expr: expr '+' expr • (rule 6)" + ]) + expect(example.derivations2.render_for_report).to eq(<<~STR.chomp) + 0: stmt "end of file" + 3: expr '?' stmt stmt + 6: expr '+' expr + 6: expr '+' expr • + STR + + + # State 17 + # + # 1 stmt: keyword_if expr keyword_then stmt • keyword_else stmt + # 2 | keyword_if expr keyword_then stmt • ["end of file", keyword_if, keyword_else, arr, digit] + # + # keyword_else shift, and go to state 20 + # keyword_else reduce using rule 2 (stmt) + state_17 = states.states[17] + examples = counterexamples.compute(state_17) + expect(examples.count).to eq 1 + example = examples[0] + + expect(example.type).to eq :shift_reduce + # Shift Conflict + expect(example.path1.map(&:to).map(&:item).map(&:to_s)).to eq([ + "$accept: • stmt \"end of file\" (rule 0)", + "stmt: • keyword_if expr keyword_then stmt keyword_else stmt (rule 1)", + "stmt: keyword_if • expr keyword_then stmt keyword_else stmt (rule 1)", + "stmt: keyword_if expr • keyword_then stmt keyword_else stmt (rule 1)", + "stmt: keyword_if expr keyword_then • stmt keyword_else stmt (rule 1)", + "stmt: • keyword_if expr keyword_then stmt keyword_else stmt (rule 1)", + "stmt: keyword_if • expr keyword_then stmt keyword_else stmt (rule 1)", + "stmt: keyword_if expr • keyword_then stmt keyword_else stmt (rule 1)", + "stmt: keyword_if expr keyword_then • stmt keyword_else stmt (rule 1)", + "stmt: keyword_if expr keyword_then stmt • keyword_else stmt (rule 1)" + ]) + expect(example.derivations1.render_for_report).to eq(<<~STR.chomp) + 0: stmt "end of file" + 1: keyword_if expr keyword_then stmt keyword_else stmt + 1: keyword_if expr keyword_then stmt • keyword_else stmt + STR + # Reduce Conflict + expect(example.path2.map(&:to).map(&:item).map(&:to_s)).to eq([ + "$accept: • stmt \"end of file\" (rule 0)", + "stmt: • keyword_if expr keyword_then stmt keyword_else stmt (rule 1)", + "stmt: keyword_if • expr keyword_then stmt keyword_else stmt (rule 1)", + "stmt: keyword_if expr • keyword_then stmt keyword_else stmt (rule 1)", + "stmt: keyword_if expr keyword_then • stmt keyword_else stmt (rule 1)", + "stmt: • keyword_if expr keyword_then stmt (rule 2)", + "stmt: keyword_if • expr keyword_then stmt (rule 2)", + "stmt: keyword_if expr • keyword_then stmt (rule 2)", + "stmt: keyword_if expr keyword_then • stmt (rule 2)", + "stmt: keyword_if expr keyword_then stmt • (rule 2)" + ]) + expect(example.derivations2.render_for_report).to eq(<<~STR.chomp) + 0: stmt "end of file" + 1: keyword_if expr keyword_then stmt keyword_else stmt + 2: keyword_if expr keyword_then stmt • + STR + end + end + + describe "R/R conflicts" do + let(:y) do + <<~STR +%{ +// Prologue +%} + +%union { + int i; +} + +%token digit + +%type stmt +%type expr1 +%type expr2 + +%% + +stmt : expr1 + | expr2 + ; + +expr1 : digit '+' digit + ; + +expr2 : digit '+' digit + ; + +%% + + STR + end + + it "build counterexamples of R/R conflicts" do + grammar = Lrama::Parser.new(y).parse + states = Lrama::States.new(grammar, warning) + states.compute + counterexamples = Lrama::Counterexamples.new(states) + + # State 7 + # + # 3 expr1: digit '+' digit • ["end of file"] + # 4 expr2: digit '+' digit • ["end of file"] + # + # "end of file" reduce using rule 3 (expr1) + # "end of file" reduce using rule 4 (expr2) + state_7 = states.states[7] + examples = counterexamples.compute(state_7) + expect(examples.count).to eq 1 + example = examples[0] + + expect(example.type).to eq :reduce_reduce + # Reduce Conflict + expect(example.path1.map(&:to).map(&:item).map(&:to_s)).to eq([ + "$accept: • stmt \"end of file\" (rule 0)", + "stmt: • expr1 (rule 1)", + "expr1: • digit '+' digit (rule 3)", + "expr1: digit • '+' digit (rule 3)", + "expr1: digit '+' • digit (rule 3)", + "expr1: digit '+' digit • (rule 3)" + ]) + expect(example.derivations1.render_for_report).to eq(<<~STR.chomp) + 0: stmt "end of file" + 1: expr1 + 3: digit '+' digit • + STR + # Reduce Conflict + expect(example.path2.map(&:to).map(&:item).map(&:to_s)).to eq([ + "$accept: • stmt \"end of file\" (rule 0)", + "stmt: • expr2 (rule 2)", + "expr2: • digit '+' digit (rule 4)", + "expr2: digit • '+' digit (rule 4)", + "expr2: digit '+' • digit (rule 4)", + "expr2: digit '+' digit • (rule 4)" + ]) + expect(example.derivations2.render_for_report).to eq(<<~STR.chomp) + 0: stmt "end of file" + 2: expr2 + 4: digit '+' digit • + STR + end + end + + describe "target state item will be start item when finding shift conflict shortest state items" do + let(:y) do + <<~STR +%{ +// Prologue +%} + +%union { + int i; +} + +%token digit + +%% + +stmt : expr '+' + ; + +expr : digit '+' digit + | digit + ; + +%% + + STR + end + + it "build counterexamples of S/R conflicts" do + grammar = Lrama::Parser.new(y).parse + states = Lrama::States.new(grammar, warning) + states.compute + counterexamples = Lrama::Counterexamples.new(states) + + # State 1 + # + # 2 expr: digit • '+' digit + # 3 | digit • ['+'] + # + # '+' shift, and go to state 4 + # '+' reduce using rule 3 (expr) + state_1 = states.states[1] + examples = counterexamples.compute(state_1) + expect(examples.count).to eq 1 + example = examples[0] + + expect(example.type).to eq :shift_reduce + # Shift Conflict + expect(example.path1.map(&:to).map(&:item).map(&:to_s)).to eq([ + "$accept: • stmt \"end of file\" (rule 0)", + "stmt: • expr '+' (rule 1)", + "expr: • digit '+' digit (rule 2)", + "expr: digit • '+' digit (rule 2)" + ]) + expect(example.derivations1.render_for_report).to eq(<<~STR.chomp) + 0: stmt "end of file" + 1: expr '+' + 2: digit • '+' digit + STR + # Reduce Conflict + expect(example.path2.map(&:to).map(&:item).map(&:to_s)).to eq([ + "$accept: • stmt \"end of file\" (rule 0)", + "stmt: • expr '+' (rule 1)", + "expr: • digit (rule 3)", + "expr: digit • (rule 3)" + ]) + expect(example.derivations2.render_for_report).to eq(<<~STR.chomp) + 0: stmt "end of file" + 1: expr '+' + 3: digit • + STR + end + end + + describe "derivation's next symbol is nullable" do + let(:y) do + <<~STR +%{ +// Prologue +%} + +%union { + int i; +} + +%token digit + +%% + +stmt : expr + ; + +expr : num + | expr opt_nl '+' expr + ; + +num : digit + | num digit + ; + +opt_nl : /* none */ + | '\\n' + ; + +%% + + STR + end + + it "build counterexamples of S/R and R/R conflicts" do + grammar = Lrama::Parser.new(y).parse + states = Lrama::States.new(grammar, warning) + states.compute + counterexamples = Lrama::Counterexamples.new(states) + + # State 10 + # + # 3 expr: expr • opt_nl '+' expr + # 3 | expr opt_nl '+' expr • ["end of file", '+', '\n'] + # 6 opt_nl: ε • ['+'] + # 7 | • '\n' + # + # '\n' shift, and go to state 6 + # '\n' reduce using rule 3 (expr) + # + # '+' reduce using rule 3 (expr) + # '+' reduce using rule 6 (opt_nl) + state_10 = states.states[10] + examples = counterexamples.compute(state_10) + expect(examples.count).to eq 2 + example = examples[0] + + expect(example.type).to eq :shift_reduce + expect(example.conflict_symbol.id.s_value).to eq "'\\n'" + # Shift Conflict + expect(example.path1.map(&:to).map(&:item).map(&:to_s)).to eq([ + "$accept: • stmt \"end of file\" (rule 0)", + "stmt: • expr (rule 1)", + "expr: • expr opt_nl '+' expr (rule 3)", + "expr: • expr opt_nl '+' expr (rule 3)", + "expr: expr • opt_nl '+' expr (rule 3)", + "expr: expr opt_nl • '+' expr (rule 3)", + "expr: expr opt_nl '+' • expr (rule 3)", + "expr: • expr opt_nl '+' expr (rule 3)", + "expr: expr • opt_nl '+' expr (rule 3)", + "opt_nl: • '\\n' (rule 7)" + ]) + expect(example.derivations1.render_for_report).to eq(<<~STR.chomp) + 0: stmt "end of file" + 1: expr + 3: expr opt_nl '+' expr + 3: expr opt_nl '+' expr + 3: expr opt_nl '+' expr + 7: • '\\n' + STR + # Reduce Conflict + expect(example.path2.map(&:to).map(&:item).map(&:to_s)).to eq([ + "$accept: • stmt \"end of file\" (rule 0)", + "stmt: • expr (rule 1)", + "expr: • expr opt_nl '+' expr (rule 3)", + "expr: • expr opt_nl '+' expr (rule 3)", + "expr: expr • opt_nl '+' expr (rule 3)", + "expr: expr opt_nl • '+' expr (rule 3)", + "expr: expr opt_nl '+' • expr (rule 3)", + "expr: expr opt_nl '+' expr • (rule 3)" + ]) + expect(example.derivations2.render_for_report).to eq(<<~STR.chomp) + 0: stmt "end of file" + 1: expr + 3: expr opt_nl '+' expr + 3: expr opt_nl '+' expr • 7: • '\\n' + STR + + example = examples[1] + + expect(example.type).to eq :reduce_reduce + expect(example.conflict_symbol.id.s_value).to eq "'+'" + # Reduce Conflict + expect(example.path1.map(&:to).map(&:item).map(&:to_s)).to eq([ + "$accept: • stmt \"end of file\" (rule 0)", + "stmt: • expr (rule 1)", + "expr: • expr opt_nl '+' expr (rule 3)", + "expr: • expr opt_nl '+' expr (rule 3)", + "expr: expr • opt_nl '+' expr (rule 3)", + "expr: expr opt_nl • '+' expr (rule 3)", + "expr: expr opt_nl '+' • expr (rule 3)", + "expr: expr opt_nl '+' expr • (rule 3)" + ]) + expect(example.derivations1.render_for_report).to eq(<<~STR.chomp) + 0: stmt "end of file" + 1: expr + 3: expr opt_nl '+' expr + 3: expr opt_nl '+' expr • + STR + # Reduce Conflict + expect(example.path2.map(&:to).map(&:item).map(&:to_s)).to eq([ + "$accept: • stmt \"end of file\" (rule 0)", + "stmt: • expr (rule 1)", + "expr: • expr opt_nl '+' expr (rule 3)", + "expr: expr • opt_nl '+' expr (rule 3)", + "expr: expr opt_nl • '+' expr (rule 3)", + "expr: expr opt_nl '+' • expr (rule 3)", + "expr: expr • opt_nl '+' expr (rule 3)", + "opt_nl: • (rule 6)" + ]) + expect(example.derivations2.render_for_report).to eq(<<~STR.chomp) + 0: stmt "end of file" + 1: expr + 3: expr opt_nl '+' expr + 6: • + STR + end + end + end +end