Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

env var name CAN be any unicode char, not just ASCII #378

Merged
merged 1 commit into from
Mar 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion dotenv/godotenv.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,13 @@ func Read(filenames ...string) (map[string]string, error) {

// UnmarshalBytesWithLookup parses env file from byte slice of chars, returning a map of keys and values.
func UnmarshalBytesWithLookup(src []byte, lookupFn LookupFn) (map[string]string, error) {
return UnmarshalWithLookup(string(src), lookupFn)
}

// UnmarshalWithLookup parses env file from string, returning a map of keys and values.
func UnmarshalWithLookup(src string, lookupFn LookupFn) (map[string]string, error) {
out := make(map[string]string)
err := newParser().parseBytes(src, out, lookupFn)
err := newParser().parse(src, out, lookupFn)
return out, err
}

Expand Down
2 changes: 1 addition & 1 deletion dotenv/godotenv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ func TestLinesToIgnore(t *testing.T) {

for n, c := range cases {
t.Run(n, func(t *testing.T) {
got := string(newParser().getStatementStart([]byte(c.input)))
got := string(newParser().getStatementStart(c.input))
if got != c.want {
t.Errorf("Expected:\t %q\nGot:\t %q", c.want, got)
}
Expand Down
60 changes: 29 additions & 31 deletions dotenv/parser.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package dotenv

import (
"bytes"
"errors"
"fmt"
"regexp"
Expand Down Expand Up @@ -31,14 +30,14 @@ func newParser() *parser {
}
}

func (p *parser) parseBytes(src []byte, out map[string]string, lookupFn LookupFn) error {
func (p *parser) parse(src string, out map[string]string, lookupFn LookupFn) error {
cutset := src
if lookupFn == nil {
lookupFn = noLookupFn
}
for {
cutset = p.getStatementStart(cutset)
if cutset == nil {
if cutset == "" {
// reached end of file
break
}
Expand Down Expand Up @@ -75,10 +74,10 @@ func (p *parser) parseBytes(src []byte, out map[string]string, lookupFn LookupFn
// getStatementPosition returns position of statement begin.
//
// It skips any comment line or non-whitespace character.
func (p *parser) getStatementStart(src []byte) []byte {
func (p *parser) getStatementStart(src string) string {
pos := p.indexOfNonSpaceChar(src)
if pos == -1 {
return nil
return ""
}

src = src[pos:]
Expand All @@ -87,70 +86,69 @@ func (p *parser) getStatementStart(src []byte) []byte {
}

// skip comment section
pos = bytes.IndexFunc(src, isCharFunc('\n'))
pos = strings.IndexFunc(src, isCharFunc('\n'))
if pos == -1 {
return nil
return ""
}
return p.getStatementStart(src[pos:])
}

// locateKeyName locates and parses key name and returns rest of slice
func (p *parser) locateKeyName(src []byte) (string, []byte, bool, error) {
func (p *parser) locateKeyName(src string) (string, string, bool, error) {
var key string
var inherited bool
// trim "export" and space at beginning
src = bytes.TrimLeftFunc(exportRegex.ReplaceAll(src, nil), isSpace)
src = strings.TrimLeftFunc(exportRegex.ReplaceAllString(src, ""), isSpace)

// locate key name end and validate it in single loop
offset := 0
loop:
for i, char := range src {
rchar := rune(char)
if isSpace(rchar) {
for i, rune := range src {
if isSpace(rune) {
continue
}

switch char {
switch rune {
case '=', ':', '\n':
// library also supports yaml-style value declaration
key = string(src[0:i])
offset = i + 1
inherited = char == '\n'
inherited = rune == '\n'
break loop
case '_', '.', '-', '[', ']':
default:
// variable name should match [A-Za-z0-9_.-]
if unicode.IsLetter(rchar) || unicode.IsNumber(rchar) {
if unicode.IsLetter(rune) || unicode.IsNumber(rune) {
continue
}

return "", nil, inherited, fmt.Errorf(
return "", "", inherited, fmt.Errorf(
`line %d: unexpected character %q in variable name`,
p.line, string(char))
p.line, string(rune))
}
}

if len(src) == 0 {
return "", nil, inherited, errors.New("zero length string")
if src == "" {
return "", "", inherited, errors.New("zero length string")
}

// trim whitespace
key = strings.TrimRightFunc(key, unicode.IsSpace)
cutset := bytes.TrimLeftFunc(src[offset:], isSpace)
cutset := strings.TrimLeftFunc(src[offset:], isSpace)
return key, cutset, inherited, nil
}

// extractVarValue extracts variable value and returns rest of slice
func (p *parser) extractVarValue(src []byte, envMap map[string]string, lookupFn LookupFn) (string, []byte, error) {
func (p *parser) extractVarValue(src string, envMap map[string]string, lookupFn LookupFn) (string, string, error) {
quote, isQuoted := hasQuotePrefix(src)
if !isQuoted {
// unquoted value - read until new line
value, rest, _ := bytes.Cut(src, []byte("\n"))
value, rest, _ := strings.Cut(src, "\n")
p.line++

// Remove inline comments on unquoted lines
value, _, _ = bytes.Cut(value, []byte(" #"))
value = bytes.TrimRightFunc(value, unicode.IsSpace)
value, _, _ = strings.Cut(value, " #")
value = strings.TrimRightFunc(value, unicode.IsSpace)
retVal, err := expandVariables(string(value), envMap, lookupFn)
return retVal, rest, err
}
Expand All @@ -176,7 +174,7 @@ func (p *parser) extractVarValue(src []byte, envMap map[string]string, lookupFn
// variables on the result
retVal, err := expandVariables(expandEscapes(value), envMap, lookupFn)
if err != nil {
return "", nil, err
return "", "", err
}
value = retVal
}
Expand All @@ -185,12 +183,12 @@ func (p *parser) extractVarValue(src []byte, envMap map[string]string, lookupFn
}

// return formatted error if quoted string is not terminated
valEndIndex := bytes.IndexFunc(src, isCharFunc('\n'))
valEndIndex := strings.IndexFunc(src, isCharFunc('\n'))
if valEndIndex == -1 {
valEndIndex = len(src)
}

return "", nil, fmt.Errorf("line %d: unterminated quoted value %s", p.line, src[:valEndIndex])
return "", "", fmt.Errorf("line %d: unterminated quoted value %s", p.line, src[:valEndIndex])
}

func expandEscapes(str string) string {
Expand Down Expand Up @@ -225,8 +223,8 @@ func expandEscapes(str string) string {
return out
}

func (p *parser) indexOfNonSpaceChar(src []byte) int {
return bytes.IndexFunc(src, func(r rune) bool {
func (p *parser) indexOfNonSpaceChar(src string) int {
return strings.IndexFunc(src, func(r rune) bool {
if r == '\n' {
p.line++
}
Expand All @@ -235,8 +233,8 @@ func (p *parser) indexOfNonSpaceChar(src []byte) int {
}

// hasQuotePrefix reports whether charset starts with single or double quote and returns quote character
func hasQuotePrefix(src []byte) (byte, bool) {
if len(src) == 0 {
func hasQuotePrefix(src string) (byte, bool) {
if src == "" {
return 0, false
}

Expand Down
11 changes: 6 additions & 5 deletions dotenv/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,21 @@ var testInput = `
a=b
a[1]=c
a.propertyKey=d
árvíztűrő-TÜKÖRFÚRÓGÉP=ÁRVÍZTŰRŐ-tükörfúrógép
`

func TestParseBytes(t *testing.T) {
p := newParser()

var inputBytes = []byte(testInput)
expectedOutput := map[string]string{
"a": "b",
"a[1]": "c",
"a.propertyKey": "d",
"a": "b",
"a[1]": "c",
"a.propertyKey": "d",
"árvíztűrő-TÜKÖRFÚRÓGÉP": "ÁRVÍZTŰRŐ-tükörfúrógép",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missed opportunity for an emoji in a test case 🤌

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh boo, we still check to see if it's considered a letter:

failed to read /Users/milas/dev/repro/unicode/.env: line 1: unexpected character "😂" in variable name

Copy link
Member

@milas milas Mar 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but also...

$ docker compose run echo
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
HOSTNAME=5fadb26ba525
TERM=xterm
😂=1
HOME=/root

surprise! we do something different for env vars in YAML 🙈

services:
  echo:
   environment:
     - "😂=1"
   image: alpine
   command: env

}

out := map[string]string{}
err := p.parseBytes([]byte(inputBytes), out, nil)
err := p.parse(testInput, out, nil)

assert.NilError(t, err)
assert.Equal(t, len(expectedOutput), len(out))
Expand Down