From 73fde59edb61b0785d6802b9f13141f00cb2f5c3 Mon Sep 17 00:00:00 2001 From: Florian Forster Date: Mon, 19 Mar 2018 12:01:22 +0100 Subject: [PATCH 1/2] scanner: Add unit test triggering a panic in scanHeredoc(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` panic: regexp: Compile("[[:space:]]*<\xc8\\z"): error parsing regexp: invalid UTF-8: `�\z` goroutine 32 [running]: testing.tRunner.func1(0xc4200cae10) /usr/lib/google-golang/src/testing/testing.go:742 +0x29d panic(0x507a00, 0xc420290690) /usr/lib/google-golang/src/runtime/panic.go:505 +0x229 regexp.MustCompile(0xc420289e10, 0x10, 0xc420087680) /usr/lib/google-golang/src/regexp/regexp.go:240 +0x171 github.com/hashicorp/hcl/hcl/scanner.(*Scanner).scanHeredoc(0xc4200878c0) gopath/src/github.com/hashicorp/hcl/hcl/scanner/scanner.go:444 +0x3a9 github.com/hashicorp/hcl/hcl/scanner.(*Scanner).Scan(0xc4200878c0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0) gopath/src/github.com/hashicorp/hcl/hcl/scanner/scanner.go:186 +0x3e5 ``` --- hcl/scanner/scanner_test.go | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/hcl/scanner/scanner_test.go b/hcl/scanner/scanner_test.go index 4f2c9cbe..926ac000 100644 --- a/hcl/scanner/scanner_test.go +++ b/hcl/scanner/scanner_test.go @@ -589,3 +589,22 @@ func countNewlines(s string) int { } return n } + +func TestScanHeredocRegexpCompile(t *testing.T) { + cases := []string{ + "0\xe1\n<<ȸ\nhello\nworld\nȸ", + } + + for _, c := range cases { + s := New([]byte(c)) + fmt.Printf("START %q\n", c) + + for { + tok := s.Scan() + if tok.Type == token.EOF { + break + } + t.Logf("s.Scan() = %s", tok) + } + } +} From fdaaf22252fe4ee4cfc3a991ade0aca00cc48eb5 Mon Sep 17 00:00:00 2001 From: Florian Forster Date: Mon, 19 Mar 2018 14:00:49 +0100 Subject: [PATCH 2/2] scanner: Update prevPos even when returning utf8.RuneError. The calling code will still call unread(), causing panics. This fixes the TestScanHeredocRegexpCompile() unit test. --- hcl/scanner/scanner.go | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/hcl/scanner/scanner.go b/hcl/scanner/scanner.go index 6601ef76..6bdcdf13 100644 --- a/hcl/scanner/scanner.go +++ b/hcl/scanner/scanner.go @@ -74,14 +74,6 @@ func (s *Scanner) next() rune { return eof } - if ch == utf8.RuneError && size == 1 { - s.srcPos.Column++ - s.srcPos.Offset += size - s.lastCharLen = size - s.err("illegal UTF-8 encoding") - return ch - } - // remember last position s.prevPos = s.srcPos @@ -89,6 +81,11 @@ func (s *Scanner) next() rune { s.lastCharLen = size s.srcPos.Offset += size + if ch == utf8.RuneError && size == 1 { + s.err("illegal UTF-8 encoding") + return ch + } + if ch == '\n' { s.srcPos.Line++ s.lastLineLen = s.srcPos.Column