Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Autosemi after comments #134

Open
wants to merge 5 commits into
base: new-ast
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 57 additions & 33 deletions src/Language/JavaScript/Parser/Lexer.x
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ module Language.JavaScript.Parser.Lexer
, lexCont
, alexError
, runAlex
, alexTestTokeniser
, happyTestTokenizer
, alexTestTokenizer
, setInTemplate
) where

Expand Down Expand Up @@ -411,8 +412,8 @@ lexToken = do
return tok

-- For tesing.
alexTestTokeniser :: String -> Either String [Token]
alexTestTokeniser input =
alexTestTokenizer :: String -> Either String [Token]
alexTestTokenizer input =
runAlex input $ loop []
where
loop acc = do
Expand All @@ -425,39 +426,61 @@ alexTestTokeniser input =
xs -> reverse xs
_ -> loop (tok:acc)

-- Test variant of alexTestTokenizer
-- that tokenizes using the same rules as those used by the happy parser
happyTestTokenizer :: String -> Either String [Token]
happyTestTokenizer input = runAlex input $ loop []
where
loop :: [Token] -> Alex [Token]
loop acc = genericLexStep (loop . (:acc)) (loop acc) (\_ ->
return $ case acc of
[] -> []
(TailToken{}:xs) -> reverse xs
xs -> reverse xs)

-- This is called by the Happy parser.
lexCont :: (Token -> Alex a) -> Alex a
lexCont cont =
lexLoop
lexCont cont = lexLoop
where
lexLoop = do
tok <- lexToken
case tok of
CommentToken {} -> do
addComment tok
lexLoop
WsToken {} -> do
addComment tok
ltok <- getLastToken
case ltok of
BreakToken {} -> maybeAutoSemi tok
ContinueToken {} -> maybeAutoSemi tok
ReturnToken {} -> maybeAutoSemi tok
_otherwise -> lexLoop
_other -> do
cs <- getComment
let tok' = tok{ tokenComment=(toCommentAnnotation cs) }
setComment []
cont tok'

-- If the token is a WsToken and it contains a newline, convert it to an
-- AutoSemiToken and call the continuation, otherwise, just lexLoop.
maybeAutoSemi (WsToken sp tl cmt) =
if any (== '\n') tl
then cont $ AutoSemiToken sp tl cmt
else lexLoop
maybeAutoSemi _ = lexLoop

lexLoop = genericLexStep cont lexLoop (addCommentAnnotation cont)

genericLexStep :: (Token -> Alex a) -> Alex a -> (Token -> Alex a) -> Alex a
genericLexStep cont lexLoop eof = do
tok <- lexToken
ltok <- getLastToken
case tok of
CommentToken {} -> do
addComment tok
case ltok of
BreakToken {} -> maybeAutoSemi tok
ContinueToken {} -> maybeAutoSemi tok
ReturnToken {} -> maybeAutoSemi tok
_otherwise -> lexLoop
WsToken {} -> do
addComment tok
case ltok of
BreakToken {} -> maybeAutoSemi tok
ContinueToken {} -> maybeAutoSemi tok
ReturnToken {} -> maybeAutoSemi tok
_otherwise -> lexLoop
EOFToken {} -> eof tok
_other -> addCommentAnnotation cont tok
where
-- If the token is a WsToken or CommentToken and it contains a newline, convert it to an
-- AutoSemiToken and call the continuation, otherwise, just lexLoop.
maybeAutoSemi (WsToken sp tl cmt) | hasNewline tl = cont $ AutoSemiToken sp tl cmt
maybeAutoSemi (CommentToken sp tl cmt) | hasNewline tl = cont $ AutoSemiToken sp tl cmt
maybeAutoSemi _ = lexLoop

hasNewline :: String -> Bool
hasNewline = any (== '\n')

addCommentAnnotation :: (Token -> Alex a) -> Token -> Alex a
addCommentAnnotation cont tok = do
cs <- getComment
let tok' = tok{ tokenComment=(toCommentAnnotation cs) }
setComment []
cont tok'

toCommentAnnotation :: [Token] -> [CommentAnnotation]
toCommentAnnotation [] = []
Expand All @@ -483,6 +506,7 @@ getLastToken = Alex $ \s@AlexState{alex_ust=ust} -> Right (s, previousToken ust)

setLastToken :: Token -> Alex ()
setLastToken (WsToken {}) = Alex $ \s -> Right (s, ())
setLastToken (CommentToken {}) = Alex $ \s -> Right (s, ())
setLastToken tok = Alex $ \s -> Right (s{alex_ust=(alex_ust s){previousToken=tok}}, ())

getComment :: Alex [Token]
Expand Down
229 changes: 164 additions & 65 deletions test/Test/Language/Javascript/Lexer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,71 +11,170 @@ import Language.JavaScript.Parser.Lexer

testLexer :: Spec
testLexer = describe "Lexer:" $ do
it "comments" $ do
testLex "// 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 " `shouldBe` "[CommentToken]"
testLex "/* 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 */" `shouldBe` "[CommentToken]"

it "numbers" $ do
testLex "123" `shouldBe` "[DecimalToken 123]"
testLex "037" `shouldBe` "[OctalToken 037]"
testLex "0xab" `shouldBe` "[HexIntegerToken 0xab]"
testLex "0xCD" `shouldBe` "[HexIntegerToken 0xCD]"

it "invalid numbers" $ do
testLex "089" `shouldBe` "[DecimalToken 0,DecimalToken 89]"
testLex "0xGh" `shouldBe` "[DecimalToken 0,IdentifierToken 'xGx']"

it "string" $ do
testLex "'cat'" `shouldBe` "[StringToken 'cat']"
testLex "\"dog\"" `shouldBe` "[StringToken \"dog\"]"

it "strings with escape chars" $ do
testLex "'\t'" `shouldBe` "[StringToken '\t']"
testLex "'\\n'" `shouldBe` "[StringToken '\\n']"
testLex "'\\\\n'" `shouldBe` "[StringToken '\\\\n']"
testLex "'\\\\'" `shouldBe` "[StringToken '\\\\']"
testLex "'\\0'" `shouldBe` "[StringToken '\\0']"
testLex "'\\12'" `shouldBe` "[StringToken '\\12']"
testLex "'\\s'" `shouldBe` "[StringToken '\\s']"
testLex "'\\-'" `shouldBe` "[StringToken '\\-']"

it "strings with non-escaped chars" $
testLex "'\\/'" `shouldBe` "[StringToken '\\/']"

it "strings with escaped quotes" $ do
testLex "'\"'" `shouldBe` "[StringToken '\"']"
testLex "\"\\\"\"" `shouldBe` "[StringToken \"\\\\\"\"]"
testLex "'\\\''" `shouldBe` "[StringToken '\\\\'']"
testLex "'\"'" `shouldBe` "[StringToken '\"']"
testLex "\"\\'\"" `shouldBe` "[StringToken \"\\'\"]"

it "spread token" $ do
testLex "...a" `shouldBe` "[SpreadToken,IdentifierToken 'a']"

it "assignment" $ do
testLex "x=1" `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
testLex "x=1\ny=2" `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1,WsToken,IdentifierToken 'y',SimpleAssignToken,DecimalToken 2]"

it "break/continue/return" $ do
testLex "break\nx=1" `shouldBe` "[BreakToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
testLex "continue\nx=1" `shouldBe` "[ContinueToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
testLex "return\nx=1" `shouldBe` "[ReturnToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"

it "var/let" $ do
testLex "var\n" `shouldBe` "[VarToken,WsToken]"
testLex "let\n" `shouldBe` "[LetToken,WsToken]"

it "in/of" $ do
testLex "in\n" `shouldBe` "[InToken,WsToken]"
testLex "of\n" `shouldBe` "[OfToken,WsToken]"

it "function" $ do
testLex "async function\n" `shouldBe` "[AsyncToken,WsToken,FunctionToken,WsToken]"


testLex :: String -> String
testLex str =
either id stringify $ alexTestTokeniser str
describe "with Alex rules" $ do
it "comments" $ do
alexTestLex "// 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 " `shouldBe` "[CommentToken]"
alexTestLex "/* 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 */" `shouldBe` "[CommentToken]"

it "return mixed with comments" $ do
alexTestLex "return 1" `shouldBe` "[ReturnToken,WsToken,DecimalToken 1]"
alexTestLex "return \n 1" `shouldBe` "[ReturnToken,WsToken,DecimalToken 1]"
alexTestLex "return //hello" `shouldBe` "[ReturnToken,WsToken,CommentToken]"
alexTestLex "return /*hello*/" `shouldBe` "[ReturnToken,WsToken,CommentToken]"
alexTestLex "return //hello\n 1" `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken,DecimalToken 1]"
alexTestLex "return /*hello*/\n 1" `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken,DecimalToken 1]"
alexTestLex "return /*hello 1*/\n" `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken]"

it "numbers" $ do
alexTestLex "123" `shouldBe` "[DecimalToken 123]"
alexTestLex "037" `shouldBe` "[OctalToken 037]"
alexTestLex "0xab" `shouldBe` "[HexIntegerToken 0xab]"
alexTestLex "0xCD" `shouldBe` "[HexIntegerToken 0xCD]"

it "invalid numbers" $ do
alexTestLex "089" `shouldBe` "[DecimalToken 0,DecimalToken 89]"
alexTestLex "0xGh" `shouldBe` "[DecimalToken 0,IdentifierToken 'xGx']"

it "string" $ do
alexTestLex "'cat'" `shouldBe` "[StringToken 'cat']"
alexTestLex "\"dog\"" `shouldBe` "[StringToken \"dog\"]"

it "strings with escape chars" $ do
alexTestLex "'\t'" `shouldBe` "[StringToken '\t']"
alexTestLex "'\\n'" `shouldBe` "[StringToken '\\n']"
alexTestLex "'\\\\n'" `shouldBe` "[StringToken '\\\\n']"
alexTestLex "'\\\\'" `shouldBe` "[StringToken '\\\\']"
alexTestLex "'\\0'" `shouldBe` "[StringToken '\\0']"
alexTestLex "'\\12'" `shouldBe` "[StringToken '\\12']"
alexTestLex "'\\s'" `shouldBe` "[StringToken '\\s']"
alexTestLex "'\\-'" `shouldBe` "[StringToken '\\-']"

it "strings with non-escaped chars" $
alexTestLex "'\\/'" `shouldBe` "[StringToken '\\/']"

it "strings with escaped quotes" $ do
alexTestLex "'\"'" `shouldBe` "[StringToken '\"']"
alexTestLex "\"\\\"\"" `shouldBe` "[StringToken \"\\\\\"\"]"
alexTestLex "'\\\''" `shouldBe` "[StringToken '\\\\'']"
alexTestLex "'\"'" `shouldBe` "[StringToken '\"']"
alexTestLex "\"\\'\"" `shouldBe` "[StringToken \"\\'\"]"

it "spread token" $ do
alexTestLex "...a" `shouldBe` "[SpreadToken,IdentifierToken 'a']"

it "assignment" $ do
alexTestLex "x=1" `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
alexTestLex "x=1\ny=2" `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1,WsToken,IdentifierToken 'y',SimpleAssignToken,DecimalToken 2]"

it "break/continue/return" $ do
alexTestLex "break\nx=1" `shouldBe` "[BreakToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
alexTestLex "continue\nx=1" `shouldBe` "[ContinueToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
alexTestLex "return\nx=1" `shouldBe` "[ReturnToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"

it "var/let" $ do
alexTestLex "var\n" `shouldBe` "[VarToken,WsToken]"
alexTestLex "let\n" `shouldBe` "[LetToken,WsToken]"

it "in/of" $ do
alexTestLex "in\n" `shouldBe` "[InToken,WsToken]"
alexTestLex "of\n" `shouldBe` "[OfToken,WsToken]"

it "function" $ do
alexTestLex "async function\n" `shouldBe` "[AsyncToken,WsToken,FunctionToken,WsToken]"


describe "with Happy rules" $ do
it "comments" $ do
happyTestLex "// 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 " `shouldBe` "[]"
happyTestLex "/* 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 */" `shouldBe` "[]"
happyTestLex "/* 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 */ // foo" `shouldBe` "[]"

it "return that doesn't produce autosemi" $ do
happyTestLex "return 1" `shouldBe` "[ReturnToken,DecimalToken 1]"
happyTestLex "return //hello" `shouldBe` "[ReturnToken]"
happyTestLex "return/*hello*/1" `shouldBe` "[ReturnToken,DecimalToken 1]"

it "return mixed with newlines that produce autosemi" $ do
happyTestLex "return \n 1" `shouldBe` "[ReturnToken,AutoSemiToken,DecimalToken 1]"

it "return mixed with comments that produce autosemi and trailing expressions" $ do
happyTestLex "return /*hello \n */" `shouldBe` "[ReturnToken,AutoSemiToken]"
happyTestLex "return /*hello \n 1*/" `shouldBe` "[ReturnToken,AutoSemiToken]"
happyTestLex "return //hello\n" `shouldBe` "[ReturnToken,AutoSemiToken]"

it "return mixed with comments that produce autosemi but no trailing expressions" $ do
happyTestLex "return //hello\n 1" `shouldBe` "[ReturnToken,AutoSemiToken,DecimalToken 1]"
happyTestLex "return /*hello*/\n 1" `shouldBe` "[ReturnToken,AutoSemiToken,DecimalToken 1]"
happyTestLex "return//hello\n1" `shouldBe` "[ReturnToken,AutoSemiToken,DecimalToken 1]"

it "numbers" $ do
happyTestLex "123" `shouldBe` "[DecimalToken 123]"
happyTestLex "037" `shouldBe` "[OctalToken 037]"
happyTestLex "0xab" `shouldBe` "[HexIntegerToken 0xab]"
happyTestLex "0xCD" `shouldBe` "[HexIntegerToken 0xCD]"

it "invalid numbers" $ do
happyTestLex "089" `shouldBe` "[DecimalToken 0,DecimalToken 89]"
happyTestLex "0xGh" `shouldBe` "[DecimalToken 0,IdentifierToken 'xGx']"

it "string" $ do
happyTestLex "'cat'" `shouldBe` "[StringToken 'cat']"
happyTestLex "\"dog\"" `shouldBe` "[StringToken \"dog\"]"

it "strings with escape chars" $ do
happyTestLex "'\t'" `shouldBe` "[StringToken '\t']"
happyTestLex "'\\n'" `shouldBe` "[StringToken '\\n']"
happyTestLex "'\\\\n'" `shouldBe` "[StringToken '\\\\n']"
happyTestLex "'\\\\'" `shouldBe` "[StringToken '\\\\']"
happyTestLex "'\\0'" `shouldBe` "[StringToken '\\0']"
happyTestLex "'\\12'" `shouldBe` "[StringToken '\\12']"
happyTestLex "'\\s'" `shouldBe` "[StringToken '\\s']"
happyTestLex "'\\-'" `shouldBe` "[StringToken '\\-']"

it "strings with non-escaped chars" $
happyTestLex "'\\/'" `shouldBe` "[StringToken '\\/']"

it "strings with escaped quotes" $ do
happyTestLex "'\"'" `shouldBe` "[StringToken '\"']"
happyTestLex "\"\\\"\"" `shouldBe` "[StringToken \"\\\\\"\"]"
happyTestLex "'\\\''" `shouldBe` "[StringToken '\\\\'']"
happyTestLex "'\"'" `shouldBe` "[StringToken '\"']"
happyTestLex "\"\\'\"" `shouldBe` "[StringToken \"\\'\"]"

it "spread token" $ do
happyTestLex "...a" `shouldBe` "[SpreadToken,IdentifierToken 'a']"

it "assignment" $ do
happyTestLex "x=1" `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
happyTestLex "x=1\ny=2" `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1,IdentifierToken 'y',SimpleAssignToken,DecimalToken 2]"

it "break/continue/return" $ do
happyTestLex "break\nx=1" `shouldBe` "[BreakToken,AutoSemiToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
happyTestLex "continue\nx=1" `shouldBe` "[ContinueToken,AutoSemiToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
happyTestLex "return\nx=1" `shouldBe` "[ReturnToken,AutoSemiToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"

it "var/let" $ do
happyTestLex "var\n" `shouldBe` "[VarToken]"
happyTestLex "let\n" `shouldBe` "[LetToken]"

it "in/of" $ do
happyTestLex "in\n" `shouldBe` "[InToken]"
happyTestLex "of\n" `shouldBe` "[OfToken]"

it "function" $ do
happyTestLex "async function\n" `shouldBe` "[AsyncToken,FunctionToken]"



alexTestLex :: String -> String
alexTestLex = genericTestLex alexTestTokenizer

happyTestLex :: String -> String
happyTestLex = genericTestLex happyTestTokenizer

genericTestLex :: (String -> Either String [Token]) -> String -> String
genericTestLex lexer str =
either id stringify $ lexer str
where
stringify xs = "[" ++ intercalate "," (map showToken xs) ++ "]"

Expand Down
19 changes: 18 additions & 1 deletion test/Test/Language/Javascript/ProgramParser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,30 @@ testProgramParser = describe "Program parser:" $ do
it "function" $ do
testProg "function a(){}" `shouldBe` "Right (JSAstProgram [JSFunction 'a' () (JSBlock [])])"
testProg "function a(b,c){}" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [])])"

it "comments" $ do
testProg "//blah\nx=1;//foo\na" `shouldBe` "Right (JSAstProgram [JSOpAssign ('=',JSIdentifier 'x',JSDecimal '1'),JSSemicolon,JSIdentifier 'a'])"
testProg "/*x=1\ny=2\n*/z=2;//foo\na" `shouldBe` "Right (JSAstProgram [JSOpAssign ('=',JSIdentifier 'z',JSDecimal '2'),JSSemicolon,JSIdentifier 'a'])"
testProg "/* */\nfunction f() {\n/* */\n}\n" `shouldBe` "Right (JSAstProgram [JSFunction 'f' () (JSBlock [])])"
testProg "/* **/\nfunction f() {\n/* */\n}\n" `shouldBe` "Right (JSAstProgram [JSFunction 'f' () (JSBlock [])])"

it "function with comments" $ do
testProg "function a(){/* return */}" `shouldBe` "Right (JSAstProgram [JSFunction 'a' () (JSBlock [])])"
testProg "function a(b,c/*d*/){}" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [])])"

it "return with comments" $ do
testProg "function a(b,c){ return \n 4 }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])"
testProg "function a(b,c){ return // 4\n }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ])])"
testProg "function a(b,c){ return /* 4*/\n }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ])])"

it "return with comments and trailing expression" $ do
testProg "function a(b,c){ return //\n 4 }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])"
testProg "function a(b,c){ return /*\n*/ 4 }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])"

it "return without spaces and but comments and trailing expression" $ do
testProg "function a(b,c){ return//\n4 }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])"
testProg "function a(b,c){ return/*\n*/4 }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])"

it "if" $ do
testProg "if(x);x=1" `shouldBe` "Right (JSAstProgram [JSIf (JSIdentifier 'x') (JSEmptyStatement),JSOpAssign ('=',JSIdentifier 'x',JSDecimal '1')])"
testProg "if(a)x=1;y=2" `shouldBe` "Right (JSAstProgram [JSIf (JSIdentifier 'a') (JSOpAssign ('=',JSIdentifier 'x',JSDecimal '1'),JSSemicolon),JSOpAssign ('=',JSIdentifier 'y',JSDecimal '2')])"
Expand Down Expand Up @@ -91,4 +109,3 @@ testProg str = showStrippedMaybe (parseUsing parseProgram str "src")

testFileUtf8 :: FilePath -> IO String
testFileUtf8 fileName = showStripped <$> parseFileUtf8 fileName

Loading