From f4d0f7d891d68cb7b08cd0d16ed48e60384d6647 Mon Sep 17 00:00:00 2001 From: Milas Bowman Date: Fri, 2 Sep 2022 13:24:44 -0400 Subject: [PATCH] dotenv: fix parse error on files with UTF-8 BOM Some Windows editors tend to add UTF-8 BOM markers to files, which breaks parsing of `.env` files. Now, when the file is read, if it starts with a UTF-8 BOM, we'll skip it. (`.env` files are always processed as UTF-8.) See docker/compose#9799. Signed-off-by: Milas Bowman --- dotenv/fixtures/utf8-bom.env | 9 +++++++++ dotenv/godotenv.go | 7 +++++++ dotenv/godotenv_test.go | 30 +++++++++++++++++++++++++++--- 3 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 dotenv/fixtures/utf8-bom.env diff --git a/dotenv/fixtures/utf8-bom.env b/dotenv/fixtures/utf8-bom.env new file mode 100644 index 00000000..4380d471 --- /dev/null +++ b/dotenv/fixtures/utf8-bom.env @@ -0,0 +1,9 @@ +OPTION_A=1 +OPTION_B=2 +OPTION_C= 3 +OPTION_D =4 +OPTION_E = 5 +456 = ABC +OPTION_F = +OPTION_G= +OPTION_H = my string # Inline comment diff --git a/dotenv/godotenv.go b/dotenv/godotenv.go index 543df9b1..69ac6ece 100644 --- a/dotenv/godotenv.go +++ b/dotenv/godotenv.go @@ -14,6 +14,7 @@ package dotenv import ( + "bytes" "errors" "fmt" "io" @@ -29,6 +30,8 @@ import ( const doubleQuoteSpecialChars = "\\\n\r\"!$`" +var utf8BOM = []byte("\uFEFF") + // LookupFn represents a lookup function to resolve variables from type LookupFn func(string) (string, bool) @@ -48,6 +51,10 @@ func ParseWithLookup(r io.Reader, lookupFn LookupFn) (map[string]string, error) return nil, err } + // seek past the UTF-8 BOM if it exists (particularly on Windows, some + // editors tend to add it, and it'll cause parsing to fail) + data = bytes.TrimPrefix(data, utf8BOM) + return UnmarshalBytesWithLookup(data, lookupFn) } diff --git a/dotenv/godotenv_test.go b/dotenv/godotenv_test.go index b8ff6aea..c95bcfc5 100644 --- a/dotenv/godotenv_test.go +++ b/dotenv/godotenv_test.go @@ -7,6 +7,8 @@ import ( "reflect" "strings" "testing" + + "github.com/stretchr/testify/require" ) var noopPresets = make(map[string]string) @@ -131,7 +133,7 @@ func TestLoadDoesNotOverride(t *testing.T) { loadEnvAndCompareValues(t, Load, envFileName, expectedValues, presets) } -func TestOveroadDoesOverride(t *testing.T) { +func TestOverloadDoesOverride(t *testing.T) { envFileName := "fixtures/plain.env" // ensure NO overload @@ -525,7 +527,7 @@ func TestRoundtrip(t *testing.T) { } } -func TestInheritedEnvVariablSameSize(t *testing.T) { +func TestInheritedEnvVariableSameSize(t *testing.T) { const envKey = "VAR_TO_BE_LOADED_FROM_OS_ENV" const envVal = "SOME_RANDOM_VALUE" os.Setenv(envKey, envVal) @@ -551,7 +553,7 @@ func TestInheritedEnvVariablSameSize(t *testing.T) { } } -func TestInheritedEnvVariablSingleVar(t *testing.T) { +func TestInheritedEnvVariableSingleVar(t *testing.T) { const envKey = "VAR_TO_BE_LOADED_FROM_OS_ENV" const envVal = "SOME_RANDOM_VALUE" os.Setenv(envKey, envVal) @@ -702,3 +704,25 @@ func TestSubstitutionsWithUnsetVarEnvFileDefaultValuePrecedence(t *testing.T) { } } } + +func TestUTF8BOM(t *testing.T) { + envFileName := "fixtures/utf8-bom.env" + + // sanity check the fixture, since the UTF-8 BOM is invisible, it'd be + // easy for it to get removed by accident, which would invalidate this + // test + envFileData, err := os.ReadFile(envFileName) + require.NoError(t, err) + require.True(t, bytes.HasPrefix(envFileData, []byte("\uFEFF")), + "Test fixture file is missing UTF-8 BOM") + + expectedValues := map[string]string{ + "OPTION_A": "1", + "OPTION_B": "2", + "OPTION_C": "3", + "OPTION_D": "4", + "OPTION_E": "5", + } + + loadEnvAndCompareValues(t, Load, envFileName, expectedValues, noopPresets) +}