This repository has been archived by the owner on Oct 9, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 53
RawDataOutput directory for every task execution #67
Merged
Merged
Changes from all commits
Commits
Show all changes
21 commits
Select commit
Hold shift + click to select a range
6ba859a
Work in progress
43ab990
work in progress
dd6996d
work in progress
e141907
Merge branch 'master' into adding-dataoutput-prefix
4dd43b1
updated tests
5a3cb44
unit testing in progress
f6718bf
Merge branch 'master' into adding-dataoutput-prefix
96170d0
Merge branch 'master' into adding-dataoutput-prefix
52992e5
Unit tests added
f6d8c91
Unit test fixes
cce3a00
lint fixes
c9c9d9a
updated hasing algorithm
833b625
updated output sandbox constructor
ea42807
Renamed Sandbox -> RawOutputPath
d4df89d
Merge branch 'master' into adding-dataoutput-prefix
4e16940
Update go/tasks/pluginmachinery/ioutils/raw_output_path.go
dd03159
Update go/tasks/pluginmachinery/ioutils/raw_output_path.go
234ba97
Merge branch 'master' into adding-dataoutput-prefix
7208977
Merge branch 'adding-dataoutput-prefix' of github.com:lyft/flyteplugi…
d84b749
rename issues
434bd71
rename fix
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
package ioutils | ||
|
||
import "context" | ||
|
||
// This interface allows shard selection for OutputSandbox. | ||
type ShardSelector interface { | ||
GetShardPrefix(ctx context.Context, s []byte) (string, error) | ||
} |
73 changes: 73 additions & 0 deletions
73
go/tasks/pluginmachinery/ioutils/precomputed_shardselector.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
package ioutils | ||
|
||
import ( | ||
"context" | ||
"hash/fnv" | ||
"strings" | ||
|
||
"github.com/pkg/errors" | ||
) | ||
|
||
// Generates the entire latin alphabet and appends it to the passed in array and returns the new array | ||
func GenerateAlphabet(b []rune) []rune { | ||
for i := 'a'; i <= 'z'; i++ { | ||
b = append(b, i) | ||
} | ||
return b | ||
} | ||
|
||
// Generates all arabic numerals and appends to the passed in array and returns the new array/slice | ||
func GenerateArabicNumerals(b []rune) []rune { | ||
for i := '0'; i <= '9'; i++ { | ||
b = append(b, i) | ||
} | ||
return b | ||
} | ||
|
||
func createAlphabetAndNumerals() []rune { | ||
b := make([]rune, 0, 36) | ||
b = GenerateAlphabet(b) | ||
return GenerateArabicNumerals(b) | ||
} | ||
|
||
// this sharder distributes data into one of the precomputed buckets. The bucket is deterministically determined given the input s | ||
type PrecomputedShardSelector struct { | ||
precomputedPrefixes []string | ||
buckets uint32 | ||
} | ||
|
||
// Generates deterministic shard id for the given string s | ||
func (d *PrecomputedShardSelector) GetShardPrefix(_ context.Context, s []byte) (string, error) { | ||
h := fnv.New32a() | ||
_, err := h.Write(s) | ||
if err != nil { | ||
return "", errors.Wrap(err, "failed to create shard prefix, reason hash failure.") | ||
} | ||
idx := h.Sum32() % d.buckets | ||
return d.precomputedPrefixes[idx], nil | ||
} | ||
|
||
// Creates a PrecomputedShardSelector with 36*36 unique shards. Each shard is of the format {[0-9a-z][0-9a-z]}, i.e. 2 character long. | ||
func NewBase36PrefixShardSelector(ctx context.Context) (ShardSelector, error) { | ||
permittedChars := createAlphabetAndNumerals() | ||
n := len(permittedChars) | ||
precomputedPrefixes := make([]string, 0, n*n) | ||
for _, c1 := range permittedChars { | ||
for _, c2 := range permittedChars { | ||
sb := strings.Builder{} | ||
sb.WriteRune(c1) | ||
sb.WriteRune(c2) | ||
precomputedPrefixes = append(precomputedPrefixes, sb.String()) | ||
} | ||
} | ||
|
||
return NewConstantShardSelector(precomputedPrefixes), nil | ||
} | ||
|
||
// uses the given shards to select a shard | ||
func NewConstantShardSelector(shards []string) ShardSelector { | ||
return &PrecomputedShardSelector{ | ||
precomputedPrefixes: shards, | ||
buckets: uint32(len(shards)), | ||
} | ||
} |
61 changes: 61 additions & 0 deletions
61
go/tasks/pluginmachinery/ioutils/precomputed_shardselector_test.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
package ioutils | ||
|
||
import ( | ||
"context" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestPrecomputedShardSelector_GetShardPrefix(t *testing.T) { | ||
ctx := context.TODO() | ||
t.Run("single-shard", func(t *testing.T) { | ||
ss := PrecomputedShardSelector{precomputedPrefixes: []string{"x"}, buckets: 1} | ||
p, err := ss.GetShardPrefix(ctx, []byte("abc")) | ||
assert.NoError(t, err) | ||
assert.Equal(t, "x", p) | ||
}) | ||
|
||
t.Run("two-shards", func(t *testing.T) { | ||
ss := PrecomputedShardSelector{precomputedPrefixes: []string{"x", "y"}, buckets: 2} | ||
p, err := ss.GetShardPrefix(ctx, []byte("abc")) | ||
assert.NoError(t, err) | ||
assert.Equal(t, "y", p) | ||
p, err = ss.GetShardPrefix(ctx, []byte("xyz")) | ||
assert.NoError(t, err) | ||
assert.Equal(t, "x", p) | ||
}) | ||
} | ||
|
||
func TestGenerateAlphabet(t *testing.T) { | ||
var b []rune | ||
b = GenerateAlphabet(b) | ||
|
||
assert.Equal(t, 26, len(b)) | ||
assert.Equal(t, 'a', b[0]) | ||
assert.Equal(t, 'z', b[25]) | ||
|
||
// Additive | ||
b = GenerateAlphabet(b) | ||
|
||
assert.Equal(t, 52, len(b)) | ||
assert.Equal(t, 'a', b[26]) | ||
assert.Equal(t, 'z', b[51]) | ||
} | ||
|
||
func TestGenerateArabicNumerals(t *testing.T) { | ||
var b []rune | ||
b = GenerateArabicNumerals(b) | ||
|
||
assert.Equal(t, 10, len(b)) | ||
assert.Equal(t, '0', b[0]) | ||
assert.Equal(t, '9', b[9]) | ||
|
||
// Additive | ||
b = GenerateArabicNumerals(b) | ||
assert.Equal(t, 20, len(b)) | ||
assert.Equal(t, '0', b[0]) | ||
assert.Equal(t, '9', b[9]) | ||
assert.Equal(t, '0', b[10]) | ||
assert.Equal(t, '9', b[19]) | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I still prefer to accept interfaces and return specific types.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you cannot, the linter wont let you for non exported types
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But PrecomputedShardSelector is exported
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
good point i will unexport it