From 5f72880e7bdc7571d5f9bff160bf91e479a0725e Mon Sep 17 00:00:00 2001 From: Sylvain Henry Date: Mon, 17 Apr 2023 14:08:14 +0200 Subject: [PATCH] Lexer: report indentation warnings at column 1 We use a Latin1 generated parser with Alex, but we also parses Unicode BOM, unbreakable spaces, etc. In recent Alex, the reported column isn't expressed in Unicode chars anymore but in bytes/ASCII chars (probably due to https://github.com/haskell/alex/commit/ae525e34edf017544e8ef4457d7e57cf2081dcf9 but I haven't checked), which broke our tests (see https://github.com/haskell/cabal/pull/8896). To work around this we report indentation warnings at token start position, instead of token end position (i.e. always 1). Otherwise position makes no sense anymore for the user. --- Cabal-syntax/src/Distribution/Fields/Lexer.hs | 26 +++++++++---------- .../src/Distribution/Fields/LexerMonad.hs | 6 +++++ .../ParserTests/regressions/Octree-0.5.format | 2 +- .../regressions/monad-param.format | 2 +- .../regressions/th-lift-instances.format | 2 +- .../Check/PackageFiles/BOM/cabal.out | 2 +- templates/Lexer.x | 26 +++++++++---------- 7 files changed, 36 insertions(+), 30 deletions(-) diff --git a/Cabal-syntax/src/Distribution/Fields/Lexer.hs b/Cabal-syntax/src/Distribution/Fields/Lexer.hs index 926bda89fe9..3d6e97763ec 100644 --- a/Cabal-syntax/src/Distribution/Fields/Lexer.hs +++ b/Cabal-syntax/src/Distribution/Fields/Lexer.hs @@ -195,17 +195,17 @@ toki t pos len input = return $! L pos (t (B.take len input)) tok :: Token -> Position -> Int -> ByteString -> Lex LToken tok t pos _len _input = return $! L pos t -checkLeadingWhitespace :: Int -> ByteString -> Lex Int -checkLeadingWhitespace len bs +checkLeadingWhitespace :: Position -> Int -> ByteString -> Lex Int +checkLeadingWhitespace pos len bs | B.any (== 9) (B.take len bs) = do - addWarning LexWarningTab - checkWhitespace len bs - | otherwise = checkWhitespace len bs + addWarningAt pos LexWarningTab + checkWhitespace pos len bs + | otherwise = checkWhitespace pos len bs -checkWhitespace :: Int -> ByteString -> Lex Int -checkWhitespace len bs +checkWhitespace :: Position -> Int -> ByteString -> Lex Int +checkWhitespace pos len bs | B.any (== 194) (B.take len bs) = do - addWarning LexWarningNBSP + addWarningAt pos LexWarningNBSP return $ len - B.count 194 (B.take len bs) | otherwise = return len @@ -313,12 +313,12 @@ bol_section = 3 in_field_braces = 4 in_field_layout = 5 in_section = 6 -alex_action_0 = \_ len _ -> do - when (len /= 0) $ addWarning LexWarningBOM +alex_action_0 = \pos len _ -> do + when (len /= 0) $ addWarningAt pos LexWarningBOM setStartCode bol_section lexToken -alex_action_1 = \_pos len inp -> checkWhitespace len inp >> adjustPos retPos >> lexToken -alex_action_3 = \pos len inp -> checkLeadingWhitespace len inp >> +alex_action_1 = \pos len inp -> checkWhitespace pos len inp >> adjustPos retPos >> lexToken +alex_action_3 = \pos len inp -> checkLeadingWhitespace pos len inp >> if B.length inp == len then return (L pos EOF) else setStartCode in_section @@ -333,7 +333,7 @@ alex_action_12 = tok Colon alex_action_13 = tok OpenBrace alex_action_14 = tok CloseBrace alex_action_15 = \_ _ _ -> adjustPos retPos >> setStartCode bol_section >> lexToken -alex_action_16 = \pos len inp -> checkLeadingWhitespace len inp >>= \len' -> +alex_action_16 = \pos len inp -> checkLeadingWhitespace pos len inp >>= \len' -> if B.length inp == len then return (L pos EOF) else setStartCode in_field_layout diff --git a/Cabal-syntax/src/Distribution/Fields/LexerMonad.hs b/Cabal-syntax/src/Distribution/Fields/LexerMonad.hs index 3c11fac92a4..ac414c18e31 100644 --- a/Cabal-syntax/src/Distribution/Fields/LexerMonad.hs +++ b/Cabal-syntax/src/Distribution/Fields/LexerMonad.hs @@ -27,6 +27,7 @@ module Distribution.Fields.LexerMonad ( LexWarning(..), LexWarningType(..), addWarning, + addWarningAt, toPWarnings, ) where @@ -153,3 +154,8 @@ setStartCode c = Lex $ \s -> LexResult s{ curCode = c } () addWarning :: LexWarningType -> Lex () addWarning wt = Lex $ \s@LexState{ curPos = pos, warnings = ws } -> LexResult s{ warnings = LexWarning wt pos : ws } () + +-- | Add warning at specific position +addWarningAt :: Position -> LexWarningType -> Lex () +addWarningAt pos wt = Lex $ \s@LexState{ warnings = ws } -> + LexResult s{ warnings = LexWarning wt pos : ws } () diff --git a/Cabal-tests/tests/ParserTests/regressions/Octree-0.5.format b/Cabal-tests/tests/ParserTests/regressions/Octree-0.5.format index 432c3bba9a4..7d72803f0a5 100644 --- a/Cabal-tests/tests/ParserTests/regressions/Octree-0.5.format +++ b/Cabal-tests/tests/ParserTests/regressions/Octree-0.5.format @@ -1,4 +1,4 @@ -Octree-0.5.cabal:39:3: Non breaking spaces at 39:3, 41:3, 43:3 +Octree-0.5.cabal:39:1: Non breaking spaces at 39:1, 41:1, 43:1 cabal-version: >=1.8 name: Octree version: 0.5 diff --git a/Cabal-tests/tests/ParserTests/regressions/monad-param.format b/Cabal-tests/tests/ParserTests/regressions/monad-param.format index 5c0ba1b819c..360a94bc825 100644 --- a/Cabal-tests/tests/ParserTests/regressions/monad-param.format +++ b/Cabal-tests/tests/ParserTests/regressions/monad-param.format @@ -1,4 +1,4 @@ -monad-param.cabal:19:3: Tabs used as indentation at 19:3, 20:3 +monad-param.cabal:19:1: Tabs used as indentation at 19:1, 20:1 name: monad-param version: 0.0.1 license: BSD3 diff --git a/Cabal-tests/tests/ParserTests/regressions/th-lift-instances.format b/Cabal-tests/tests/ParserTests/regressions/th-lift-instances.format index df2f77dd729..8fe7579952d 100644 --- a/Cabal-tests/tests/ParserTests/regressions/th-lift-instances.format +++ b/Cabal-tests/tests/ParserTests/regressions/th-lift-instances.format @@ -1,4 +1,4 @@ -th-lift-instances.cabal:15:9: Tabs used as indentation at 15:9 +th-lift-instances.cabal:15:1: Tabs used as indentation at 15:1 cabal-version: >=1.10 name: th-lift-instances version: 0.1.4 diff --git a/cabal-testsuite/PackageTests/Check/PackageFiles/BOM/cabal.out b/cabal-testsuite/PackageTests/Check/PackageFiles/BOM/cabal.out index 8d2ce00a951..696de1ec938 100644 --- a/cabal-testsuite/PackageTests/Check/PackageFiles/BOM/cabal.out +++ b/cabal-testsuite/PackageTests/Check/PackageFiles/BOM/cabal.out @@ -1,6 +1,6 @@ # cabal check Warning: These warnings may cause trouble when distributing the package: -Warning: pkg.cabal:1:2: Byte-order mark found at the beginning of the file +Warning: pkg.cabal:1:1: Byte-order mark found at the beginning of the file Warning: The following errors will cause portability problems on other environments: Warning: ./pkg.cabal starts with an Unicode byte order mark (BOM). This may cause problems with older cabal versions. Warning: Hackage would reject this package. diff --git a/templates/Lexer.x b/templates/Lexer.x index b9f2d833082..a10045e137e 100644 --- a/templates/Lexer.x +++ b/templates/Lexer.x @@ -83,22 +83,22 @@ $instresc = $printable tokens :- <0> { - @bom? { \_ len _ -> do - when (len /= 0) $ addWarning LexWarningBOM + @bom? { \pos len _ -> do + when (len /= 0) $ addWarningAt pos LexWarningBOM setStartCode bol_section lexToken } } { - @nbspspacetab* @nl { \_pos len inp -> checkWhitespace len inp >> adjustPos retPos >> lexToken } + @nbspspacetab* @nl { \pos len inp -> checkWhitespace pos len inp >> adjustPos retPos >> lexToken } -- no @nl here to allow for comments on last line of the file with no trailing \n $spacetab* "--" $comment* ; -- TODO: check the lack of @nl works here -- including counting line numbers } { - @nbspspacetab* { \pos len inp -> checkLeadingWhitespace len inp >> + @nbspspacetab* { \pos len inp -> checkLeadingWhitespace pos len inp >> if B.length inp == len then return (L pos EOF) else setStartCode in_section @@ -123,7 +123,7 @@ tokens :- } { - @nbspspacetab* { \pos len inp -> checkLeadingWhitespace len inp >>= \len' -> + @nbspspacetab* { \pos len inp -> checkLeadingWhitespace pos len inp >>= \len' -> if B.length inp == len then return (L pos EOF) else setStartCode in_field_layout @@ -172,17 +172,17 @@ toki t pos len input = return $! L pos (t (B.take len input)) tok :: Token -> Position -> Int -> ByteString -> Lex LToken tok t pos _len _input = return $! L pos t -checkLeadingWhitespace :: Int -> ByteString -> Lex Int -checkLeadingWhitespace len bs +checkLeadingWhitespace :: Position -> Int -> ByteString -> Lex Int +checkLeadingWhitespace pos len bs | B.any (== 9) (B.take len bs) = do - addWarning LexWarningTab - checkWhitespace len bs - | otherwise = checkWhitespace len bs + addWarningAt pos LexWarningTab + checkWhitespace pos len bs + | otherwise = checkWhitespace pos len bs -checkWhitespace :: Int -> ByteString -> Lex Int -checkWhitespace len bs +checkWhitespace :: Position -> Int -> ByteString -> Lex Int +checkWhitespace pos len bs | B.any (== 194) (B.take len bs) = do - addWarning LexWarningNBSP + addWarningAt pos LexWarningNBSP return $ len - B.count 194 (B.take len bs) | otherwise = return len