From 25aad540dbc609e72dce67ab94825e7bb2442bda Mon Sep 17 00:00:00 2001
From: Sebastian Graf <sgraf1337@gmail.com>
Date: Fri, 1 Nov 2024 13:43:36 +0100
Subject: [PATCH] Auto-resolve shift/reduce conflicts involving the catch token

When the current token is the catch token, we never reduce; we only ever
shift. Hence it does not make sense to report shift/reduce conflicts
involving the catch token: Error resumption mode will only ever try to
shift it.

The solution implemented in this patch is not to generate conflicting
LR'Reduce actions in the first place by deleting the catch token from
the lookahead sets of LR1 items.
---
 happy.cabal                            |  4 +--
 lib/grammar/src/Happy/Grammar.lhs      |  4 +++
 lib/happy-lib.cabal                    |  2 +-
 lib/tabular/src/Happy/Tabular/LALR.lhs | 14 ++++----
 tests/catch-shift-reduce.y             | 50 ++++++++++++++++++++++++++
 5 files changed, 64 insertions(+), 10 deletions(-)
 create mode 100644 tests/catch-shift-reduce.y

diff --git a/happy.cabal b/happy.cabal
index ec587ba1..e750a7b4 100644
--- a/happy.cabal
+++ b/happy.cabal
@@ -1,5 +1,5 @@
 name: happy
-version: 2.1.2
+version: 2.1.3
 license: BSD2
 license-file: LICENSE
 copyright: (c) Andy Gill, Simon Marlow
@@ -139,7 +139,7 @@ executable happy
                  array,
                  containers >= 0.4.2,
                  mtl >= 2.2.1,
-                 happy-lib == 2.1.2
+                 happy-lib == 2.1.3
 
   default-language: Haskell98
   default-extensions: CPP, MagicHash, FlexibleContexts, NamedFieldPuns
diff --git a/lib/grammar/src/Happy/Grammar.lhs b/lib/grammar/src/Happy/Grammar.lhs
index b4376720..fed280a3 100644
--- a/lib/grammar/src/Happy/Grammar.lhs
+++ b/lib/grammar/src/Happy/Grammar.lhs
@@ -175,6 +175,10 @@ For array-based parsers, see the note in Tabular/LALR.lhs.
 > catchName = "catch"
 > dummyName = "%dummy"  -- shouldn't occur in the grammar anywhere
 
+TODO: Should rename firstStartTok to firstStartName!
+It denotes the *Name* of the first start non-terminal and semantically has
+nothing to do with Tokens at all.
+
 > firstStartTok, dummyTok, errorTok, catchTok, epsilonTok :: Name
 > firstStartTok   = MkName 4
 > dummyTok        = MkName 3
diff --git a/lib/happy-lib.cabal b/lib/happy-lib.cabal
index 3a3c358d..987e6d9f 100644
--- a/lib/happy-lib.cabal
+++ b/lib/happy-lib.cabal
@@ -1,6 +1,6 @@
 cabal-version: 3.0
 name: happy-lib
-version: 2.1.2
+version: 2.1.3
 license: BSD-2-Clause
 copyright: (c) Andy Gill, Simon Marlow
 author: Andy Gill and Simon Marlow
diff --git a/lib/tabular/src/Happy/Tabular/LALR.lhs b/lib/tabular/src/Happy/Tabular/LALR.lhs
index 6cf8194d..2c109ca0 100644
--- a/lib/tabular/src/Happy/Tabular/LALR.lhs
+++ b/lib/tabular/src/Happy/Tabular/LALR.lhs
@@ -125,12 +125,12 @@ using a memo table so that no work is repeated.
 > closure0 :: Grammar e -> (Name -> RuleList) -> Set Lr0Item -> Set Lr0Item
 > closure0 g closureOfNT set = Set.foldr addRules Set.empty set
 >    where
->       fst_term = first_term g
+>       last_nonterm = MkName $ getName (first_term g) - 1
 >       addRules rule set' = Set.union (Set.fromList (rule : closureOfRule rule)) set'
 >
 >       closureOfRule (Lr0 rule dot) =
 >           case findRule g rule dot of
->               (Just nt) | nt >= firstStartTok && nt < fst_term
+>               (Just nt) | nt >= firstStartTok && nt <= last_nonterm
 >                  -> closureOfNT nt
 >               _  -> []
 
@@ -141,7 +141,7 @@ Generating the closure of a set of LR(1) items
 > closure1 g first set
 >       = fst (mkClosure (\(_,new) _ -> null new) addItems ([],set))
 >       where
->       fst_term = first_term g
+>       last_nonterm = MkName $ getName (first_term g) - 1
 
 >       addItems :: ([Lr1Item],[Lr1Item]) -> ([Lr1Item],[Lr1Item])
 >       addItems (old_items, new_items) = (new_old_items, new_new_items)
@@ -153,11 +153,11 @@ Generating the closure of a set of LR(1) items
 
 >               fn :: Lr1Item -> [Lr1Item]
 >               fn (Lr1 rule dot as) = case drop dot lhs of
->                       (b:beta) | b >= firstStartTok && b < fst_term ->
->                           let terms = unionNameMap
->                                               (\a -> first (beta ++ [a])) as
+>                       (nt:beta) | nt >= firstStartTok && nt <= last_nonterm ->
+>                           let terms = NameSet.delete catchTok $ -- the catch token is always shifted and never reduced (see pop_items)
+>                                       unionNameMap (\a -> first (beta ++ [a])) as
 >                           in
->                           [ (Lr1 rule' 0 terms) | rule' <- lookupProdsOfName g b ]
+>                           [ (Lr1 rule' 0 terms) | rule' <- lookupProdsOfName g nt ]
 >                       _ -> []
 >                   where Production _name lhs _ _ = lookupProdNo g rule
 
diff --git a/tests/catch-shift-reduce.y b/tests/catch-shift-reduce.y
new file mode 100644
index 00000000..e011a2a1
--- /dev/null
+++ b/tests/catch-shift-reduce.y
@@ -0,0 +1,50 @@
+{
+module Main where
+
+import Data.Char
+}
+
+%name parseExp Exp
+%tokentype { Token }
+%error { abort } { reportError }
+
+%monad { ParseM } { (>>=) } { return }
+
+%token
+  '1' { TOne }
+  '+' { TPlus }
+  '(' { TOpen }
+  ')' { TClose }
+
+%right '+'
+%expect 0     -- The point of this test: The List productions should expose a shift/reduce conflict because of catch
+
+%%
+
+Close :: { String }
+Close : ')'   { ")" }
+      | catch { "catch" }
+
+Exp :: { String }
+Exp : catch          { "catch" }
+    | '1'            { "1"}
+    | '(' List Close { "(" ++ $2 ++ $3 }
+
+List :: { String }
+     : Exp '+' { $1 ++ "+" }
+     | Exp '+' Exp { $1 ++ "+" ++ $3 }
+
+{
+data Token = TOne | TPlus | TComma | TOpen | TClose
+
+type ParseM = Maybe
+
+abort :: [Token] -> ParseM a
+abort = undefined
+
+reportError :: [Token] -> ([Token] -> ParseM a) -> ParseM a
+reportError = undefined
+
+main :: IO ()
+main = return ()
+}