Skip to content

Commit

Permalink
Improve the abstract syntax for tokens
Browse files Browse the repository at this point in the history
Instead of deferring the handling of `$$` to the backends, properly
parse the `$$` syntax up front, and store the result in the AST.

Note that the GLR backend was improperly substituting the `$$` twice.
Now that we have better types, this was surfaced as type error, and then
removed.

Fixes #295
  • Loading branch information
Ericson2314 authored and sgraf812 committed Sep 20, 2024
1 parent 43e192a commit 17d3c93
Show file tree
Hide file tree
Showing 9 changed files with 98 additions and 64 deletions.
24 changes: 11 additions & 13 deletions lib/backend-glr/src/Happy/Backend/GLR/ProduceCode.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ This module is designed as an extension to the Haskell parser generator Happy.

> import Paths_happy_lib ( version )
> import Happy.Grammar
> import Happy.Grammar.ExpressionWithHole ( substExpressionWithHole )
> import Happy.Tabular.LALR
> import Data.Array ( Array, (!), array, assocs )
> import Data.Char ( isSpace, isAlphaNum )
Expand Down Expand Up @@ -273,9 +274,9 @@ that will be used for them in the GLR parser.
> | (i,tok) <- token_specs g ] -- Tokens (terminals)
> ++ [(eof_term g,"HappyEOF")] -- EOF symbol (internal terminal)
> where
> mkMatch tok = case mapDollarDollar tok of
> Nothing -> tok
> Just fn -> fn "_"
> mkMatch tok = case tok of
> TokenFixed t -> t
> TokenWithValue e -> substExpressionWithHole e "_"

> toGSym :: [(Int, String)] -> Int -> String
> toGSym gsMap i
Expand Down Expand Up @@ -314,10 +315,7 @@ It also shares identical reduction values as CAFs
> where
> startLine
> = unwords [ name , show_st exts state, "(" , getTok , ") =" ]
> getTok = let tok = toGSym gsMap symInt
> in case mapDollarDollar tok of
> Nothing -> tok
> Just f -> f "_"
> getTok = toGSym gsMap symInt
> mkAct act
> = case act of
> LR'Shift newSt _ -> "Shift " ++ show newSt ++ " []"
Expand Down Expand Up @@ -421,7 +419,7 @@ Creating a type for storing semantic rules
use in later stages.

> type SemInfo
> = [(String, String, [Int], [((Int,Int), ([(Int,String)],String), [Int])])]
> = [(String, String, [Int], [((Int, Int), ([(Int, TokenSpec)], String), [Int])])]

> mkGSemType :: Options -> Grammar String -> Pragmas -> (ShowS, SemInfo)
> mkGSemType (TreeDecode,_,_) g pragmas
Expand Down Expand Up @@ -566,17 +564,17 @@ Creates the appropriate semantic values.
> nodes UseFiltering = "(" ++ foldr (\l -> mkHappyVar (l+1) . showChar ':') "[])" mask
> ]

> mk_lambda :: [(Int, String)] -> Int -> String -> String
> mk_lambda :: [(Int, TokenSpec)] -> Int -> String -> String
> mk_lambda pats v
> = (\s -> "\\" ++ s ++ " -> ") . mk_binder id pats v

> mk_binder :: (String -> String) -> [(Int, String)] -> Int -> String -> String
> mk_binder :: (String -> String) -> [(Int, TokenSpec)] -> Int -> String -> String
> mk_binder wrap pats v
> = case lookup v pats of
> Nothing -> mkHappyVar v
> Just p -> case mapDollarDollar p of
> Nothing -> wrap . mkHappyVar v . showChar '@' . brack p
> Just fn -> wrap . brack' (fn . mkHappyVar v)
> Just p -> case p of
> TokenFixed p' -> wrap . mkHappyVar v . showChar '@' . brack p'
> TokenWithValue e -> wrap . brack' (substExpressionWithHole e . mkHappyVar v)


---
Expand Down
15 changes: 7 additions & 8 deletions lib/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ The code generator.
> import Paths_happy_lib ( version )
> import Data.Version ( showVersion )
> import Happy.Grammar
> import Happy.Grammar.ExpressionWithHole ( substExpressionWithHole )
> import Happy.Tabular.LALR

> import Data.Maybe ( isNothing, fromMaybe )
Expand Down Expand Up @@ -417,19 +418,17 @@ The token conversion function.
Use a variable rather than '_' to replace '$$', so we can use it on
the left hand side of '@'.

> removeDollarDollar xs = case mapDollarDollar xs of
> Nothing -> xs
> Just fn -> fn "happy_dollar_dollar"
> removeDollarDollar tok = case tok of
> TokenFixed t -> t
> TokenWithValue e -> substExpressionWithHole e "happy_dollar_dollar"

> mkHappyTerminalVar :: Int -> Int -> String -> String
> mkHappyTerminalVar i t =
> case tok_str_fn of
> case lookup t token_rep of
> Nothing -> pat
> Just fn -> brack (fn (pat []))
> Just (TokenFixed _) -> pat
> Just (TokenWithValue e) -> brack $ substExpressionWithHole e $ pat []
> where
> tok_str_fn = case lookup t token_rep of
> Nothing -> Nothing
> Just str' -> mapDollarDollar str'
> pat = mkHappyVar i

> tokIndex i = i - n_nonterminals - n_starts - 2
Expand Down
6 changes: 3 additions & 3 deletions lib/frontend/boot-src/Parser.ly
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,12 @@ The parser.
> : id { Just $1 }
> | {- nothing -} { Nothing }

> tokenSpecs :: { [(String,String)] }
> tokenSpecs :: { [(String, TokenSpec)] }
> : tokenSpec tokenSpecs { $1:$2 }
> | tokenSpec { [$1] }

> tokenSpec :: { (String,String) }
> : id code { ($1,$2) }
> tokenSpec :: { (String, TokenSpec) }
> : id code { ($1, parseTokenSpec $2) }

> ids :: { [String] }
> : id ids { $1 : $2 }
Expand Down
38 changes: 34 additions & 4 deletions lib/frontend/src/Happy/Frontend/AbsSyn.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,18 @@ Here is the abstract syntax of the language we parse.
> getImportedIdentity, getMonad, getError,
> getPrios, getPrioNames, getExpect, getErrorHandlerType,
> getAttributes, getAttributetype, getAttributeGrammarExtras,
> Rule(..), Prod(..), Term(..), Prec(..)
> parseTokenSpec,
> Rule(..), Prod(..), Term(..), Prec(..),
> TokenSpec(..) -- reexport
> ) where

> import Happy.Grammar (ErrorHandlerType(..), AttributeGrammarExtras(..))
> import Data.Char (isAlphaNum)
> import Happy.Grammar
> ( ErrorHandlerType(..)
> , TokenSpec(..)
> , AttributeGrammarExtras(..)
> )
> import Happy.Grammar.ExpressionWithHole

> data BookendedAbsSyn
> = BookendedAbsSyn
Expand Down Expand Up @@ -63,7 +71,7 @@ generate some error messages.
>
> data Directive a
> = TokenType String -- %tokentype
> | TokenSpec [(a,String)] -- %token
> | TokenSpec [(a, TokenSpec)] -- %token
> | TokenName String (Maybe String) Bool -- %name/%partial (True <=> %partial)
> | TokenLexer String String -- %lexer
> | TokenErrorHandlerType String -- %errorhandlertype
Expand Down Expand Up @@ -109,7 +117,7 @@ generate some error messages.
> [] -> (False,"()","HappyIdentity","Prelude.>>=","Prelude.return")
> _ -> error "multiple monad directives"

> getTokenSpec :: [Directive t] -> [(t, String)]
> getTokenSpec :: [Directive t] -> [(t, TokenSpec)]
> getTokenSpec ds = concat [ t | (TokenSpec t) <- ds ]

> getPrios :: [Directive t] -> [Directive t]
Expand Down Expand Up @@ -170,3 +178,25 @@ generate some error messages.
> attributetype = at
> }
> (_ : _, Nothing) -> error "attributes found without attribute type directive"

> -- | Parse a token spec.
> --
> -- The first occurence of '$$' indicates an expression in which the '$$'
> -- will be substituted for the actual lexed token. '$$' in string or char
> -- literals ('".."' and '\'.\'') however does not count.
> parseTokenSpec :: String -> TokenSpec
> parseTokenSpec code0 = go code0 ""
> where go code acc =
> case code of
> [] -> TokenFixed code0
>
> '"' :r -> case reads code :: [(String,String)] of
> [] -> go r ('"':acc)
> (s,r'):_ -> go r' (reverse (show s) ++ acc)
> a:'\'' :r | isAlphaNum a -> go r ('\'':a:acc)
> '\'' :r -> case reads code :: [(Char,String)] of
> [] -> go r ('\'':acc)
> (c,r'):_ -> go r' (reverse (show c) ++ acc)
> '\\':'$':r -> go r ('$':acc)
> '$':'$':r -> TokenWithValue $ ExpressionWithHole (reverse acc) r
> c:r -> go r (c:acc)
10 changes: 5 additions & 5 deletions lib/frontend/src/Happy/Frontend/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -135,15 +135,15 @@ happyIn19 x = Happy_GHC_Exts.unsafeCoerce# (HappyWrap19 x)
happyOut19 :: (HappyAbsSyn ) -> HappyWrap19
happyOut19 x = Happy_GHC_Exts.unsafeCoerce# x
{-# INLINE happyOut19 #-}
newtype HappyWrap20 = HappyWrap20 ([(String,String)])
happyIn20 :: ([(String,String)]) -> (HappyAbsSyn )
newtype HappyWrap20 = HappyWrap20 ([(String, TokenSpec)])
happyIn20 :: ([(String, TokenSpec)]) -> (HappyAbsSyn )
happyIn20 x = Happy_GHC_Exts.unsafeCoerce# (HappyWrap20 x)
{-# INLINE happyIn20 #-}
happyOut20 :: (HappyAbsSyn ) -> HappyWrap20
happyOut20 x = Happy_GHC_Exts.unsafeCoerce# x
{-# INLINE happyOut20 #-}
newtype HappyWrap21 = HappyWrap21 ((String,String))
happyIn21 :: ((String,String)) -> (HappyAbsSyn )
newtype HappyWrap21 = HappyWrap21 ((String, TokenSpec))
happyIn21 :: ((String, TokenSpec)) -> (HappyAbsSyn )
happyIn21 x = Happy_GHC_Exts.unsafeCoerce# (HappyWrap21 x)
{-# INLINE happyIn21 #-}
happyOut21 :: (HappyAbsSyn ) -> HappyWrap21
Expand Down Expand Up @@ -768,7 +768,7 @@ happyReduction_51 happy_x_2
= case happyOutTok happy_x_1 of { (TokenInfo happy_var_1 TokId) ->
case happyOutTok happy_x_2 of { (TokenInfo happy_var_2 TokCodeQuote) ->
happyIn21
((happy_var_1,happy_var_2)
((happy_var_1, parseTokenSpec happy_var_2)
)}}

happyReduce_52 :: () => Happy_GHC_Exts.Int# -> Token -> Happy_GHC_Exts.Int# -> Happy_IntList -> HappyStk (HappyAbsSyn ) -> P (HappyAbsSyn )
Expand Down
45 changes: 16 additions & 29 deletions lib/grammar/src/Happy/Grammar.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,43 @@ The Grammar data type.
> module Happy.Grammar (
> Name,
>
> Production(..), Grammar(..),
> Production(..),
> TokenSpec(..),
> Grammar(..),
> AttributeGrammarExtras(..),
> Priority(..),
> Assoc(..),
> Pragmas(..), ErrorHandlerType(..),
>
> errorName, errorTok, startName, dummyName, firstStartTok, dummyTok,
> eofName, epsilonTok,
>
> mapDollarDollar
> ) where

> import Data.Array
> import Data.Char (isAlphaNum)
> import Happy.Grammar.ExpressionWithHole (ExpressionWithHole)

> type Name = Int

> data Production eliminator
> = Production Name [Name] (eliminator,[Int]) Priority
> deriving Show

> data TokenSpec
>
> -- | The token is just a fixed value
> = TokenFixed String
>
> -- | The token is an expression involving the value of the lexed token.
> | TokenWithValue ExpressionWithHole
>
> deriving (Eq, Show)

> data Grammar eliminator
> = Grammar {
> productions :: [Production eliminator],
> lookupProdNo :: Int -> Production eliminator,
> lookupProdsOfName :: Name -> [Int],
> token_specs :: [(Name,String)],
> token_specs :: [(Name, TokenSpec)],
> terminals :: [Name],
> non_terminals :: [Name],
> starts :: [(String,Name,Name,Bool)],
Expand Down Expand Up @@ -145,27 +156,3 @@ For array-based parsers, see the note in Tabular/LALR.lhs.
> dummyTok = 2
> errorTok = 1
> epsilonTok = 0

-----------------------------------------------------------------------------
Replace $$ with an arbitrary string, being careful to avoid ".." and '.'.
> mapDollarDollar :: String -> Maybe (String -> String)
> mapDollarDollar = fmap (\(l, r) repr -> l ++ repr ++ r) . mapDollarDollar'
>
> mapDollarDollar' :: String -> Maybe (String, String)
> mapDollarDollar' code0 = go code0 ""
> where go code acc =
> case code of
> [] -> Nothing
>
> '"' :r -> case reads code :: [(String,String)] of
> [] -> go r ('"':acc)
> (s,r'):_ -> go r' (reverse (show s) ++ acc)
> a:'\'' :r | isAlphaNum a -> go r ('\'':a:acc)
> '\'' :r -> case reads code :: [(Char,String)] of
> [] -> go r ('\'':acc)
> (c,r'):_ -> go r' (reverse (show c) ++ acc)
> '\\':'$':r -> go r ('$':acc)
> '$':'$':r -> Just (reverse acc, r)
> c:r -> go r (c:acc)
13 changes: 13 additions & 0 deletions lib/grammar/src/Happy/Grammar/ExpressionWithHole.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module Happy.Grammar.ExpressionWithHole where

-- | The overall expression is
-- 'tokLeft ++ substitutedForHole ++ tokRight'.
data ExpressionWithHole
= ExpressionWithHole {
exprLeft :: String,
exprRight :: String
}
deriving (Eq, Show)

substExpressionWithHole :: ExpressionWithHole -> String -> String
substExpressionWithHole (ExpressionWithHole l r) = \repr -> l ++ repr ++ r
5 changes: 4 additions & 1 deletion lib/happy-lib.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ common common-stanza
library grammar
import: common-stanza
hs-source-dirs: grammar/src
exposed-modules: Happy.Grammar
exposed-modules:
Happy.Grammar
Happy.Grammar.ExpressionWithHole
build-depends: base < 5, array

library frontend
Expand Down Expand Up @@ -129,6 +131,7 @@ library backend-glr
library
import: common-stanza
reexported-modules: Happy.Grammar,
Happy.Grammar.ExpressionWithHole,
Happy.Frontend,
Happy.Frontend.AbsSyn,
Happy.Frontend.Mangler,
Expand Down
6 changes: 5 additions & 1 deletion lib/tabular/src/Happy/Tabular/Info.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Generating info files.
> import Data.Set ( Set )
> import qualified Data.Set as Set hiding ( Set )
> import Happy.Grammar
> import Happy.Grammar.ExpressionWithHole ( substExpressionWithHole )
> import Happy.Tabular.LALR ( Lr0Item(..), LRAction(..), Goto(..), GotoTable, ActionTable )

> import Data.Array
Expand Down Expand Up @@ -182,7 +183,10 @@ Produce a file of parser information, useful for debugging the parser.
> showTerminal (t,s)
> = str "\t"
> . showJName 15 t
> . str "{ " . str s . str " }"
> . str "{ " . showToken s . str " }"

> showToken (TokenFixed s) = str s
> showToken (TokenWithValue e) = str $ substExpressionWithHole e "$$"

> showNonTerminals
> = banner "Non-terminals"
Expand Down

0 comments on commit 17d3c93

Please sign in to comment.