Skip to content

Commit

Permalink
[confmap/provider/s3]Allow buckets containing dots in the s3 confmap …
Browse files Browse the repository at this point in the history
…provider (#22054)

**Description:**  Fix: properly handle bucket names containing dots. Also made the regex patter used more restrictive

Previous regex pattern:
```
s3:\/\/(.*)\.s3\.(.*).amazonaws\.com\/(.*)
```
New regex pattern:
```
^s3:\/\/([a-z0-9\.\-]{3,63})\.s3\.([a-z0-9\-]+).amazonaws\.com\/.
```

We are not including the key as part of the regex because the key can contain any unicode character, therefore we leave that for the URL parser to verify that the URI is well formed. (i.e.: all the characters are properly scaped).
---------

Signed-off-by: Raphael Silva <[email protected]>
Co-authored-by: Pablo Baeyens <[email protected]>
  • Loading branch information
rapphil and mx-psi authored May 19, 2023
1 parent cf69bb0 commit 3e5086c
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 82 deletions.
18 changes: 18 additions & 0 deletions .chloggen/rapphil-fix-s3-provider.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: confmap/provider/s3provider

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Properly handle bucket names containing dot characters

# One or more tracking issues related to the change
issues: [22054]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: |
Properly handle bucket names containing dot characters, according to
https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html
29 changes: 17 additions & 12 deletions confmap/provider/s3provider/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@ import (

const (
schemeName = "s3"
// Pattern for a s3 uri
s3Pattern = `^s3:\/\/([a-z0-9\.\-]{3,63})\.s3\.([a-z0-9\-]+).amazonaws\.com\/.`
)

var s3Regexp = regexp.MustCompile(s3Pattern)

type s3Client interface {
GetObject(context.Context, *s3.GetObjectInput, ...func(*s3.Options)) (*s3.GetObjectOutput, error)
}
Expand All @@ -35,7 +39,8 @@ type provider struct {
//
// s3-uri : s3://[BUCKET].s3.[REGION].amazonaws.com/[KEY]
//
// One example for s3-uri be like: s3://DOC-EXAMPLE-BUCKET.s3.us-west-2.amazonaws.com/photos/puppy.jpg
// One example for s3-uri be like: s3://doc-example-bucket.s3.us-west-2.amazonaws.com/photos/puppy.jpg
// References: https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html
//
// Examples:
// `s3://DOC-EXAMPLE-BUCKET.s3.us-west-2.amazonaws.com/photos/puppy.jpg` - (unix, windows)
Expand Down Expand Up @@ -101,27 +106,27 @@ func (*provider) Shutdown(context.Context) error {
// - [KEY] : The key exists in a given bucket, can be used to retrieve a file.
func s3URISplit(uri string) (string, string, string, error) {
// check whether the pattern of s3-uri is correct
matched, err := regexp.MatchString(`s3:\/\/(.*)\.s3\.(.*).amazonaws\.com\/(.*)`, uri)
if !matched || err != nil {
return "", "", "", fmt.Errorf("invalid s3-uri using a wrong pattern")

matched := s3Regexp.MatchString(uri)
if !matched {
return "", "", "", fmt.Errorf("s3 uri does not match the pattern: %q", s3Pattern)
}

captureGroups := s3Regexp.FindStringSubmatch(uri)
bucket, region := captureGroups[1], captureGroups[2]

// parse the uri as [scheme:][//[userinfo@]host][/]path[?query][#fragment], then extract components from
u, err := url.Parse(uri)
if err != nil {
return "", "", "", fmt.Errorf("failed to change the s3-uri to url.URL: %w", err)
return "", "", "", fmt.Errorf("failed to parse s3 uri: %w", err)
}
// extract components
key := strings.TrimPrefix(u.Path, "/")
host := u.Host
hostSplitted := strings.Split(host, ".")
if len(hostSplitted) < 5 {
return "", "", "", fmt.Errorf("invalid host in the s3-uri")
}
bucket := hostSplitted[0]
region := hostSplitted[2]
// check empty fields
if bucket == "" || region == "" || key == "" {
// This error should never happen because of the regexp pattern
return "", "", "", fmt.Errorf("invalid s3-uri with empty fields")
}

return bucket, region, key, nil
}
126 changes: 56 additions & 70 deletions confmap/provider/s3provider/provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ package s3provider
import (
"bytes"
"context"
"fmt"
"io"
"os"
"testing"
Expand All @@ -18,64 +17,40 @@ import (
)

// A s3 client mocking s3provider works in normal cases
type testClient struct{}

// Implement GetObject() for testClient in normal cases
func (client *testClient) GetObject(context.Context, *s3.GetObjectInput, ...func(*s3.Options)) (*s3.GetObjectOutput, error) {
// read local config file and return
f, err := os.ReadFile("./testdata/otel-config.yaml")
if err != nil {
return &s3.GetObjectOutput{}, err
}
return &s3.GetObjectOutput{Body: io.NopCloser(bytes.NewReader(f)), ContentLength: (int64)(len(f))}, nil
type testClient struct {
configFile string
bucket string
region string
key string
}

// Create a provider mocking s3provider works in normal cases
func NewTestProvider() confmap.Provider {
return &provider{client: &testClient{}}
}
// Implement GetObject() for testClient in normal cases
func (client *testClient) GetObject(_ context.Context, request *s3.GetObjectInput, opts ...func(*s3.Options)) (*s3.GetObjectOutput, error) {
s3Opts := s3.Options{}

// A s3 client mocking s3provider works when there is no corresponding config file according to the given s3-uri
type testNonExistClient struct{}
for _, opt := range opts {
opt(&s3Opts)
}

// Create a provider mocking s3provider works when there is no corresponding config file according to the given s3-uri
func NewTestNonExistProvider() confmap.Provider {
return &provider{client: &testNonExistClient{}}
}
client.bucket = *request.Bucket
client.region = s3Opts.Region
client.key = *request.Key

// Implement GetObject() for testClient when there is no corresponding config file according to the given s3-uri
func (client *testNonExistClient) GetObject(context.Context, *s3.GetObjectInput, ...func(*s3.Options)) (*s3.GetObjectOutput, error) {
// read local config file and return
f, err := os.ReadFile("./testdata/nonexist-otel-config.yaml")
f, err := os.ReadFile(client.configFile)
if err != nil {
return &s3.GetObjectOutput{}, err
}
return &s3.GetObjectOutput{Body: io.NopCloser(bytes.NewReader(f)), ContentLength: (int64)(len(f))}, nil
}

// A s3 client mocking s3provider works when the returned config file is invalid
type testInvalidClient struct{}

// Create a provider mocking s3provider works when the returned config file is invalid
func NewTestInvalidProvider() confmap.Provider {
return &provider{client: &testInvalidClient{}}
return &s3.GetObjectOutput{Body: io.NopCloser(bytes.NewReader(f)), ContentLength: (int64)(len(f))}, nil
}

// Implement GetObject() for testClient when the returned config file is invalid
func (client *testInvalidClient) GetObject(context.Context, *s3.GetObjectInput, ...func(*s3.Options)) (*s3.GetObjectOutput, error) {
// read local config file and return
return &s3.GetObjectOutput{}, fmt.Errorf("the downloaded config file")
}

func TestFunctionalityDownloadFileS3(t *testing.T) {
fp := NewTestProvider()
_, err := fp.Retrieve(context.Background(), "s3://bucket.s3.region.amazonaws.com/key", nil)
assert.NoError(t, err)
assert.NoError(t, fp.Shutdown(context.Background()))
// Create a provider mocking the s3 provider
func NewTestProvider(configFile string) confmap.Provider {
return &provider{client: &testClient{configFile: configFile}}
}

func TestFunctionalityS3URISplit(t *testing.T) {
fp := NewTestProvider()
fp := NewTestProvider("./testdata/otel-config.yaml")
bucket, region, key, err := s3URISplit("s3://bucket.s3.region.amazonaws.com/key")
assert.NoError(t, err)
assert.Equal(t, "bucket", bucket)
Expand All @@ -84,38 +59,49 @@ func TestFunctionalityS3URISplit(t *testing.T) {
assert.NoError(t, fp.Shutdown(context.Background()))
}

func TestInvalidS3URISplit(t *testing.T) {
fp := NewTestProvider()
_, err := fp.Retrieve(context.Background(), "s3://bucket.s3.region.amazonaws", nil)
assert.Error(t, err)
_, err = fp.Retrieve(context.Background(), "s3://bucket.s3.region.aws.com/key", nil)
assert.Error(t, err)
require.NoError(t, fp.Shutdown(context.Background()))
func TestURIs(t *testing.T) {

tests := []struct {
name string
uri string
valid bool
bucket string
region string
key string
}{
{"Invalid domain", "s3://bucket.s3.region.aws.com/key", false, "", "", ""},
{"Invalid region", "s3://bucket.s3.region.aws.amazonaws.com/key", false, "", "", ""},
{"Invalid bucket", "s3://b.s3.region.amazonaws.com/key", false, "", "", ""},
{"No key", "s3://bucket.s3.region.amazonaws.com/", false, "", "", ""},
{"No bucket", "s3://s3.region.amazonaws.com/key", false, "", "", ""},
{"No region", "s3://some-bucket.s3..amazonaws.com/key", false, "", "", ""},
{"Test malformed uri", "s3://some-bucket.s3.us-west-2.amazonaws.com/key%", false, "", "", ""},
{"Valid bucket", "s3://bucket.name-here.s3.us-west-2.amazonaws.com/key", true, "bucket.name-here", "us-west-2", "key"},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
fp := NewTestProvider("./testdata/otel-config.yaml")
_, err := fp.Retrieve(context.Background(), tt.uri, nil)
if !tt.valid {
assert.Error(t, err)
} else {
assert.NoError(t, err)
}
require.NoError(t, fp.Shutdown(context.Background()))
})
}
}

func TestUnsupportedScheme(t *testing.T) {
fp := NewTestProvider()
fp := NewTestProvider("./testdata/otel-config.yaml")
_, err := fp.Retrieve(context.Background(), "https://google.com", nil)
assert.Error(t, err)
assert.NoError(t, fp.Shutdown(context.Background()))
}

func TestEmptyBucket(t *testing.T) {
fp := NewTestProvider()
_, err := fp.Retrieve(context.Background(), "s3://.s3.region.amazonaws.com/key", nil)
require.Error(t, err)
require.NoError(t, fp.Shutdown(context.Background()))
}

func TestEmptyKey(t *testing.T) {
fp := NewTestProvider()
_, err := fp.Retrieve(context.Background(), "s3://bucket.s3.region.amazonaws.com/", nil)
require.Error(t, err)
require.NoError(t, fp.Shutdown(context.Background()))
}

func TestNonExistent(t *testing.T) {
fp := NewTestNonExistProvider()
fp := NewTestProvider("./testdata/non-existent.yaml")
_, err := fp.Retrieve(context.Background(), "s3://non-exist-bucket.s3.region.amazonaws.com/key", nil)
assert.Error(t, err)
_, err = fp.Retrieve(context.Background(), "s3://bucket.s3.region.amazonaws.com/non-exist-key.yaml", nil)
Expand All @@ -126,14 +112,14 @@ func TestNonExistent(t *testing.T) {
}

func TestInvalidYAML(t *testing.T) {
fp := NewTestInvalidProvider()
fp := NewTestProvider("./testdata/invalid-otel-config.yaml")
_, err := fp.Retrieve(context.Background(), "s3://bucket.s3.region.amazonaws.com/key", nil)
assert.Error(t, err)
require.NoError(t, fp.Shutdown(context.Background()))
}

func TestScheme(t *testing.T) {
fp := NewTestProvider()
fp := NewTestProvider("./testdata/otel-config.yaml")
assert.Equal(t, "s3", fp.Scheme())
require.NoError(t, fp.Shutdown(context.Background()))
}

0 comments on commit 3e5086c

Please sign in to comment.