diff --git a/hcl/scanner/scanner.go b/hcl/scanner/scanner.go index 5a3792eb..ab0b145d 100644 --- a/hcl/scanner/scanner.go +++ b/hcl/scanner/scanner.go @@ -74,14 +74,6 @@ func (s *Scanner) next() rune { return eof } - if ch == utf8.RuneError && size == 1 { - s.srcPos.Column++ - s.srcPos.Offset += size - s.lastCharLen = size - s.err("illegal UTF-8 encoding") - return ch - } - // remember last position s.prevPos = s.srcPos @@ -89,6 +81,11 @@ func (s *Scanner) next() rune { s.lastCharLen = size s.srcPos.Offset += size + if ch == utf8.RuneError && size == 1 { + s.err("illegal UTF-8 encoding") + return ch + } + if ch == '\n' { s.srcPos.Line++ s.lastLineLen = s.srcPos.Column diff --git a/hcl/scanner/scanner_test.go b/hcl/scanner/scanner_test.go index 073c3f16..018d22c4 100644 --- a/hcl/scanner/scanner_test.go +++ b/hcl/scanner/scanner_test.go @@ -592,3 +592,22 @@ func countNewlines(s string) int { } return n } + +func TestScanHeredocRegexpCompile(t *testing.T) { + cases := []string{ + "0\xe1\n<<ȸ\nhello\nworld\nȸ", + } + + for _, c := range cases { + s := New([]byte(c)) + fmt.Printf("START %q\n", c) + + for { + tok := s.Scan() + if tok.Type == token.EOF { + break + } + t.Logf("s.Scan() = %s", tok) + } + } +}