Skip to content

Commit

Permalink
Lexer: report indentation warnings at column 1
Browse files Browse the repository at this point in the history
We use a Latin1 generated parser with Alex, but we also parses Unicode
BOM, unbreakable spaces, etc. In recent Alex, the reported column isn't
expressed in Unicode chars anymore but in bytes/ASCII chars (probably
due to
haskell/alex@ae525e3
but I haven't checked), which broke our tests (see
#8896).

To work around this we report indentation warnings at token start position,
instead of token end position (i.e. always 1). Otherwise position makes
no sense anymore for the user.
  • Loading branch information
hsyl20 committed Apr 26, 2023
1 parent ca7a8e2 commit 5f72880
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 30 deletions.
26 changes: 13 additions & 13 deletions Cabal-syntax/src/Distribution/Fields/Lexer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -195,17 +195,17 @@ toki t pos len input = return $! L pos (t (B.take len input))
tok :: Token -> Position -> Int -> ByteString -> Lex LToken
tok t pos _len _input = return $! L pos t

checkLeadingWhitespace :: Int -> ByteString -> Lex Int
checkLeadingWhitespace len bs
checkLeadingWhitespace :: Position -> Int -> ByteString -> Lex Int
checkLeadingWhitespace pos len bs
| B.any (== 9) (B.take len bs) = do
addWarning LexWarningTab
checkWhitespace len bs
| otherwise = checkWhitespace len bs
addWarningAt pos LexWarningTab
checkWhitespace pos len bs
| otherwise = checkWhitespace pos len bs

checkWhitespace :: Int -> ByteString -> Lex Int
checkWhitespace len bs
checkWhitespace :: Position -> Int -> ByteString -> Lex Int
checkWhitespace pos len bs
| B.any (== 194) (B.take len bs) = do
addWarning LexWarningNBSP
addWarningAt pos LexWarningNBSP
return $ len - B.count 194 (B.take len bs)
| otherwise = return len

Expand Down Expand Up @@ -313,12 +313,12 @@ bol_section = 3
in_field_braces = 4
in_field_layout = 5
in_section = 6
alex_action_0 = \_ len _ -> do
when (len /= 0) $ addWarning LexWarningBOM
alex_action_0 = \pos len _ -> do
when (len /= 0) $ addWarningAt pos LexWarningBOM
setStartCode bol_section
lexToken
alex_action_1 = \_pos len inp -> checkWhitespace len inp >> adjustPos retPos >> lexToken
alex_action_3 = \pos len inp -> checkLeadingWhitespace len inp >>
alex_action_1 = \pos len inp -> checkWhitespace pos len inp >> adjustPos retPos >> lexToken
alex_action_3 = \pos len inp -> checkLeadingWhitespace pos len inp >>
if B.length inp == len
then return (L pos EOF)
else setStartCode in_section
Expand All @@ -333,7 +333,7 @@ alex_action_12 = tok Colon
alex_action_13 = tok OpenBrace
alex_action_14 = tok CloseBrace
alex_action_15 = \_ _ _ -> adjustPos retPos >> setStartCode bol_section >> lexToken
alex_action_16 = \pos len inp -> checkLeadingWhitespace len inp >>= \len' ->
alex_action_16 = \pos len inp -> checkLeadingWhitespace pos len inp >>= \len' ->
if B.length inp == len
then return (L pos EOF)
else setStartCode in_field_layout
Expand Down
6 changes: 6 additions & 0 deletions Cabal-syntax/src/Distribution/Fields/LexerMonad.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ module Distribution.Fields.LexerMonad (
LexWarning(..),
LexWarningType(..),
addWarning,
addWarningAt,
toPWarnings,

) where
Expand Down Expand Up @@ -153,3 +154,8 @@ setStartCode c = Lex $ \s -> LexResult s{ curCode = c } ()
addWarning :: LexWarningType -> Lex ()
addWarning wt = Lex $ \s@LexState{ curPos = pos, warnings = ws } ->
LexResult s{ warnings = LexWarning wt pos : ws } ()

-- | Add warning at specific position
addWarningAt :: Position -> LexWarningType -> Lex ()
addWarningAt pos wt = Lex $ \s@LexState{ warnings = ws } ->
LexResult s{ warnings = LexWarning wt pos : ws } ()
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Octree-0.5.cabal:39:3: Non breaking spaces at 39:3, 41:3, 43:3
Octree-0.5.cabal:39:1: Non breaking spaces at 39:1, 41:1, 43:1
cabal-version: >=1.8
name: Octree
version: 0.5
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
monad-param.cabal:19:3: Tabs used as indentation at 19:3, 20:3
monad-param.cabal:19:1: Tabs used as indentation at 19:1, 20:1
name: monad-param
version: 0.0.1
license: BSD3
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
th-lift-instances.cabal:15:9: Tabs used as indentation at 15:9
th-lift-instances.cabal:15:1: Tabs used as indentation at 15:1
cabal-version: >=1.10
name: th-lift-instances
version: 0.1.4
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# cabal check
Warning: These warnings may cause trouble when distributing the package:
Warning: pkg.cabal:1:2: Byte-order mark found at the beginning of the file
Warning: pkg.cabal:1:1: Byte-order mark found at the beginning of the file
Warning: The following errors will cause portability problems on other environments:
Warning: ./pkg.cabal starts with an Unicode byte order mark (BOM). This may cause problems with older cabal versions.
Warning: Hackage would reject this package.
26 changes: 13 additions & 13 deletions templates/Lexer.x
Original file line number Diff line number Diff line change
Expand Up @@ -83,22 +83,22 @@ $instresc = $printable
tokens :-

<0> {
@bom? { \_ len _ -> do
when (len /= 0) $ addWarning LexWarningBOM
@bom? { \pos len _ -> do
when (len /= 0) $ addWarningAt pos LexWarningBOM
setStartCode bol_section
lexToken
}
}

<bol_section, bol_field_layout, bol_field_braces> {
@nbspspacetab* @nl { \_pos len inp -> checkWhitespace len inp >> adjustPos retPos >> lexToken }
@nbspspacetab* @nl { \pos len inp -> checkWhitespace pos len inp >> adjustPos retPos >> lexToken }
-- no @nl here to allow for comments on last line of the file with no trailing \n
$spacetab* "--" $comment* ; -- TODO: check the lack of @nl works here
-- including counting line numbers
}

<bol_section> {
@nbspspacetab* { \pos len inp -> checkLeadingWhitespace len inp >>
@nbspspacetab* { \pos len inp -> checkLeadingWhitespace pos len inp >>
if B.length inp == len
then return (L pos EOF)
else setStartCode in_section
Expand All @@ -123,7 +123,7 @@ tokens :-
}

<bol_field_layout> {
@nbspspacetab* { \pos len inp -> checkLeadingWhitespace len inp >>= \len' ->
@nbspspacetab* { \pos len inp -> checkLeadingWhitespace pos len inp >>= \len' ->
if B.length inp == len
then return (L pos EOF)
else setStartCode in_field_layout
Expand Down Expand Up @@ -172,17 +172,17 @@ toki t pos len input = return $! L pos (t (B.take len input))
tok :: Token -> Position -> Int -> ByteString -> Lex LToken
tok t pos _len _input = return $! L pos t

checkLeadingWhitespace :: Int -> ByteString -> Lex Int
checkLeadingWhitespace len bs
checkLeadingWhitespace :: Position -> Int -> ByteString -> Lex Int
checkLeadingWhitespace pos len bs
| B.any (== 9) (B.take len bs) = do
addWarning LexWarningTab
checkWhitespace len bs
| otherwise = checkWhitespace len bs
addWarningAt pos LexWarningTab
checkWhitespace pos len bs
| otherwise = checkWhitespace pos len bs

checkWhitespace :: Int -> ByteString -> Lex Int
checkWhitespace len bs
checkWhitespace :: Position -> Int -> ByteString -> Lex Int
checkWhitespace pos len bs
| B.any (== 194) (B.take len bs) = do
addWarning LexWarningNBSP
addWarningAt pos LexWarningNBSP
return $ len - B.count 194 (B.take len bs)
| otherwise = return len

Expand Down

0 comments on commit 5f72880

Please sign in to comment.