From 644d803e30fb38718a11afd762a3ffa59e22e5ef Mon Sep 17 00:00:00 2001 From: Fabio Graetz Date: Tue, 10 Dec 2024 20:09:05 +0000 Subject: [PATCH] Fix flytestdlib's stowStore.List for google cloud storage Signed-off-by: Fabio Graetz --- flytestdlib/storage/stow_store.go | 14 ++++++++++++-- flytestdlib/storage/stow_store_test.go | 9 +++++---- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/flytestdlib/storage/stow_store.go b/flytestdlib/storage/stow_store.go index dbaddeee28..809c64a93c 100644 --- a/flytestdlib/storage/stow_store.go +++ b/flytestdlib/storage/stow_store.go @@ -263,7 +263,7 @@ func (s *StowStore) Head(ctx context.Context, reference DataReference) (Metadata } func (s *StowStore) List(ctx context.Context, reference DataReference, maxItems int, cursor Cursor) ([]DataReference, Cursor, error) { - _, containerName, key, err := reference.Split() + protocol, containerName, key, err := reference.Split() if err != nil { s.metrics.BadReference.Inc(ctx) return nil, NewCursorAtEnd(), err @@ -291,7 +291,17 @@ func (s *StowStore) List(ctx context.Context, reference DataReference, maxItems if err == nil { results := make([]DataReference, len(items)) for index, item := range items { - results[index] = DataReference(item.URL().String()) + // We don't use `item.URL()` because e.g. listing a google cloud storage + // bucket `gs://some-bucket/...` will result in items with URLs like + // `google://storage.googleapis.com/download/storage/v1/b/some-bucket/...` + // which subsequently cannot be found by the stow store. + + u := url.URL{ + Scheme: protocol, + Host: containerName, + Path: item.Name(), + } + results[index] = DataReference(u.String()) } if stow.IsCursorEnd(stowCursor) { cursor = NewCursorAtEnd() diff --git a/flytestdlib/storage/stow_store_test.go b/flytestdlib/storage/stow_store_test.go index 719aa43130..40cb610b78 100644 --- a/flytestdlib/storage/stow_store_test.go +++ b/flytestdlib/storage/stow_store_test.go @@ -97,8 +97,8 @@ func (m mockStowContainer) Items(prefix, cursor string, count int) ([]stow.Item, numItems := endIndexExc - startIndex results := make([]stow.Item, numItems) for index, itemKey := range itemKeys[startIndex:endIndexExc] { - url := fmt.Sprintf("s3://%s/%s", m.id, m.items[itemKey].url) - results[index] = mockStowItem{url: url, size: m.items[itemKey].size} + url := fmt.Sprintf("s3://%s/%s", m.id, m.items[itemKey].name) + results[index] = mockStowItem{url: url, size: m.items[itemKey].size, name: m.items[itemKey].name} } if endIndexExc == len(m.items) { @@ -123,7 +123,7 @@ func (m *mockStowContainer) Put(name string, r io.Reader, size int64, metadata m if m.putCB != nil { return m.putCB(name, r, size, metadata) } - item := mockStowItem{url: name, size: size} + item := mockStowItem{url: name, name: name, size: size} m.items[name] = item return item, nil } @@ -137,6 +137,7 @@ func newMockStowContainer(id string) *mockStowContainer { type mockStowItem struct { url string + name string size int64 } @@ -145,7 +146,7 @@ func (m mockStowItem) ID() string { } func (m mockStowItem) Name() string { - return m.url + return m.name } func (m mockStowItem) URL() *url.URL {