From 4d1a3713d1daae3e1bddf9922e92c6ccc9d56f13 Mon Sep 17 00:00:00 2001 From: Eduardo Apolinario <653394+eapolinario@users.noreply.github.com> Date: Thu, 3 Mar 2022 10:10:06 -0800 Subject: [PATCH] Take literal hash into account during cache key calculation (#406) * Point to flyteidl branch Signed-off-by: Eduardo Apolinario * Set tag Signed-off-by: Eduardo Apolinario * Add replaceHashInLiteral function Signed-off-by: Eduardo Apolinario * Changes to propeller-config.yaml to run locally Signed-off-by: Eduardo Apolinario * Pick up latest flyteidl@add-hash-to-literal Signed-off-by: Eduardo Apolinario * Rename variables Signed-off-by: Eduardo Apolinario * Add unit tests Signed-off-by: Eduardo Apolinario * s/replaceHashInLiteral/hashify/g Signed-off-by: Eduardo Apolinario * s/literalMapCopy/hashifiedLiteralMap/g Signed-off-by: Eduardo Apolinario * Bump flyteidl Signed-off-by: Eduardo Apolinario * Use flyteidl 0.23.0 Signed-off-by: Eduardo Apolinario * Revert "Changes to propeller-config.yaml to run locally" This reverts commit 983ee6c580b6b98a2150de8ac1a1f272367659bd. Signed-off-by: Eduardo Apolinario Co-authored-by: Eduardo Apolinario --- go.mod | 2 +- go.sum | 6 + .../task/catalog/datacatalog/transformer.go | 56 +- .../catalog/datacatalog/transformer_test.go | 556 ++++++++++++++++++ 4 files changed, 618 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 9d94544d6b..1bf5324863 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/DiSiqueira/GoTree v1.0.1-0.20180907134536-53a8e837f295 github.com/benlaurie/objecthash v0.0.0-20180202135721-d1e3d6079fc1 github.com/fatih/color v1.10.0 - github.com/flyteorg/flyteidl v0.22.0 + github.com/flyteorg/flyteidl v0.23.0 github.com/flyteorg/flyteplugins v0.10.9 github.com/flyteorg/flytestdlib v0.4.12 github.com/ghodss/yaml v1.0.0 diff --git a/go.sum b/go.sum index 34f3a45b24..1e82c22cd4 100644 --- a/go.sum +++ b/go.sum @@ -239,6 +239,12 @@ github.com/fatih/structtag v1.2.0/go.mod h1:mBJUNpUnHmRKrKlQQlmCrh5PuhftFbNv8Ys4 github.com/flyteorg/flyteidl v0.21.23/go.mod h1:576W2ViEyjTpT+kEVHAGbrTP3HARNUZ/eCwrNPmdx9U= github.com/flyteorg/flyteidl v0.22.0 h1:lVAXyacTCIX6Fl0qWoFzyto9Hfx0ADlyPhHPmOMiuIY= github.com/flyteorg/flyteidl v0.22.0/go.mod h1:576W2ViEyjTpT+kEVHAGbrTP3HARNUZ/eCwrNPmdx9U= +github.com/flyteorg/flyteidl v0.22.2-0.20220217205210-d817026005f3 h1:b/mCyBe9+Yw1LTBJRRXtsZO6TH6TqzzKaup8gkhus2U= +github.com/flyteorg/flyteidl v0.22.2-0.20220217205210-d817026005f3/go.mod h1:576W2ViEyjTpT+kEVHAGbrTP3HARNUZ/eCwrNPmdx9U= +github.com/flyteorg/flyteidl v0.22.4-0.20220301015125-8c9756723d27 h1:qrqTtWay6wRaJYYapPVeKOqMonsw+AnkYtkoYEj1k4Y= +github.com/flyteorg/flyteidl v0.22.4-0.20220301015125-8c9756723d27/go.mod h1:576W2ViEyjTpT+kEVHAGbrTP3HARNUZ/eCwrNPmdx9U= +github.com/flyteorg/flyteidl v0.23.0 h1:Pjl9Tq1pJfIK0au5PiqPVpl25xTYosN6BruZl+PgWAk= +github.com/flyteorg/flyteidl v0.23.0/go.mod h1:576W2ViEyjTpT+kEVHAGbrTP3HARNUZ/eCwrNPmdx9U= github.com/flyteorg/flyteplugins v0.10.9 h1:Hj4kBc2pNAJOTUP14+KSd44RzSE+A6fMEnTMQXhE6r8= github.com/flyteorg/flyteplugins v0.10.9/go.mod h1:RXgHGGUGC1akEnAd0yi4cLuYP1BF1rVkxhGjzIrm6VU= github.com/flyteorg/flytestdlib v0.3.13/go.mod h1:Tz8JCECAbX6VWGwFT6cmEQ+RJpZ/6L9pswu3fzWs220= diff --git a/pkg/controller/nodes/task/catalog/datacatalog/transformer.go b/pkg/controller/nodes/task/catalog/datacatalog/transformer.go index e8dbcb4768..abf36f1283 100644 --- a/pkg/controller/nodes/task/catalog/datacatalog/transformer.go +++ b/pkg/controller/nodes/task/catalog/datacatalog/transformer.go @@ -116,13 +116,67 @@ func generateTaskSignatureHash(ctx context.Context, taskInterface core.TypedInte return fmt.Sprintf("%v-%v", inputHashString, outputHashString), nil } +// Hashify a literal, in other words, produce a new literal where the corresponding value is removed in case +// the literal hash is set. +func hashify(literal *core.Literal) *core.Literal { + // Two recursive cases: + // 1. A collection of literals or + // 2. A map of literals + + if literal.GetCollection() != nil { + literals := literal.GetCollection().Literals + literalsHash := make([]*core.Literal, 0) + for _, lit := range literals { + literalsHash = append(literalsHash, hashify(lit)) + } + return &core.Literal{ + Value: &core.Literal_Collection{ + Collection: &core.LiteralCollection{ + Literals: literalsHash, + }, + }, + } + } + if literal.GetMap() != nil { + literalsMap := make(map[string]*core.Literal) + for key, lit := range literal.GetMap().Literals { + literalsMap[key] = hashify(lit) + } + return &core.Literal{ + Value: &core.Literal_Map{ + Map: &core.LiteralMap{ + Literals: literalsMap, + }, + }, + } + } + + // And a base case that consists of a scalar, where the hash might be set + if literal.GetHash() != "" { + return &core.Literal{ + Hash: literal.GetHash(), + } + } + return literal +} + // Generate a tag by hashing the input values func GenerateArtifactTagName(ctx context.Context, inputs *core.LiteralMap) (string, error) { if inputs == nil || len(inputs.Literals) == 0 { inputs = &emptyLiteralMap } - inputsHash, err := pbhash.ComputeHash(ctx, inputs) + // Hashify, i.e. generate a copy of the literal map where each literal value is removed + // in case the corresponding hash is set. + hashifiedLiteralMap := make(map[string]*core.Literal, len(inputs.Literals)) + for name, literal := range inputs.Literals { + hashifiedLiteralMap[name] = hashify(literal) + } + hashifiedInputs := &core.LiteralMap{ + Literals: hashifiedLiteralMap, + } + + inputsHash, err := pbhash.ComputeHash(ctx, hashifiedInputs) if err != nil { return "", err } diff --git a/pkg/controller/nodes/task/catalog/datacatalog/transformer_test.go b/pkg/controller/nodes/task/catalog/datacatalog/transformer_test.go index 4d2485f27d..2ce8c1dff7 100644 --- a/pkg/controller/nodes/task/catalog/datacatalog/transformer_test.go +++ b/pkg/controller/nodes/task/catalog/datacatalog/transformer_test.go @@ -340,3 +340,559 @@ func TestDatasetIDToIdentifier(t *testing.T) { assert.Equal(t, "d", id.Domain) assert.Equal(t, "v", id.Version) } + +func TestGenerateArtifactTagName_LiteralsWithHashSet(t *testing.T) { + tests := []struct { + name string + literal *core.Literal + expectedLiteral *core.Literal + }{ + { + name: "single literal where hash is not set", + literal: coreutils.MustMakeLiteral(42), + expectedLiteral: coreutils.MustMakeLiteral(42), + }, + { + name: "single literal containing hash", + literal: &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://some-address", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + Hash: "abcde", + }, + expectedLiteral: &core.Literal{ + Value: nil, + Hash: "abcde", + }, + }, + { + name: "list of literals containing a single item where literal sets its hash", + literal: &core.Literal{ + Value: &core.Literal_Collection{ + Collection: &core.LiteralCollection{ + Literals: []*core.Literal{ + &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://some-address", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + Hash: "hash1", + }, + }, + }, + }, + }, + expectedLiteral: &core.Literal{ + Value: &core.Literal_Collection{ + Collection: &core.LiteralCollection{ + Literals: []*core.Literal{ + &core.Literal{ + Value: nil, + Hash: "hash1", + }, + }, + }, + }, + }, + }, + { + name: "list of literals containing two items where each literal sets its hash", + literal: &core.Literal{ + Value: &core.Literal_Collection{ + Collection: &core.LiteralCollection{ + Literals: []*core.Literal{ + &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://some-address", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + Hash: "hash1", + }, + &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://another-address", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + Hash: "hash2", + }, + }, + }, + }, + }, + expectedLiteral: &core.Literal{ + Value: &core.Literal_Collection{ + Collection: &core.LiteralCollection{ + Literals: []*core.Literal{ + &core.Literal{ + Value: nil, + Hash: "hash1", + }, + &core.Literal{ + Value: nil, + Hash: "hash2", + }, + }, + }, + }, + }, + }, + { + name: "list of literals containing two items where only one literal has its hash set", + literal: &core.Literal{ + Value: &core.Literal_Collection{ + Collection: &core.LiteralCollection{ + Literals: []*core.Literal{ + &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://some-address", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + Hash: "hash1", + }, + &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://another-address", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + expectedLiteral: &core.Literal{ + Value: &core.Literal_Collection{ + Collection: &core.LiteralCollection{ + Literals: []*core.Literal{ + &core.Literal{ + Value: nil, + Hash: "hash1", + }, + &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://another-address", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "map of literals containing a single item where literal sets its hash", + literal: &core.Literal{ + Value: &core.Literal_Map{ + Map: &core.LiteralMap{ + Literals: map[string]*core.Literal{ + "literal1": &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://some-address", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + Hash: "hash-42", + }, + }, + }, + }, + }, + expectedLiteral: &core.Literal{ + Value: &core.Literal_Map{ + Map: &core.LiteralMap{ + Literals: map[string]*core.Literal{ + "literal1": &core.Literal{ + Value: nil, + Hash: "hash-42", + }, + }, + }, + }, + }, + }, + { + name: "map of literals containing a three items where only one literal sets its hash", + literal: &core.Literal{ + Value: &core.Literal_Map{ + Map: &core.LiteralMap{ + Literals: map[string]*core.Literal{ + "literal1": &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://some-address", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + }, + "literal2-set-its-hash": &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://some-address-for-literal-2", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + Hash: "literal-2-hash", + }, + "literal3": &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://some-address-for-literal-3", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + expectedLiteral: &core.Literal{ + Value: &core.Literal_Map{ + Map: &core.LiteralMap{ + Literals: map[string]*core.Literal{ + "literal1": &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://some-address", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + }, + "literal2-set-its-hash": &core.Literal{ + Value: nil, + Hash: "literal-2-hash", + }, + "literal3": &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://some-address-for-literal-3", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "list of map of literals containing a mixture of literals have their hashes set or not set", + literal: &core.Literal{ + Value: &core.Literal_Collection{ + Collection: &core.LiteralCollection{ + Literals: []*core.Literal{ + &core.Literal{ + Value: &core.Literal_Map{ + Map: &core.LiteralMap{ + Literals: map[string]*core.Literal{ + "literal1": &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://some-address", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + }, + "literal2-set-its-hash": &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://some-address-for-literal-2", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + Hash: "literal-2-hash", + }, + "literal3": &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://some-address-for-literal-3", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + &core.Literal{ + Value: &core.Literal_Map{ + Map: &core.LiteralMap{ + Literals: map[string]*core.Literal{ + "another-literal-1": &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://some-address-for-another-literal-1", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + Hash: "another-literal-1-hash", + }, + "another-literal2-set-its-hash": &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://some-address-for-literal-2", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + expectedLiteral: &core.Literal{ + Value: &core.Literal_Collection{ + Collection: &core.LiteralCollection{ + Literals: []*core.Literal{ + &core.Literal{ + Value: &core.Literal_Map{ + Map: &core.LiteralMap{ + Literals: map[string]*core.Literal{ + "literal1": &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://some-address", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + }, + "literal2-set-its-hash": &core.Literal{ + Value: nil, + Hash: "literal-2-hash", + }, + "literal3": &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://some-address-for-literal-3", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + &core.Literal{ + Value: &core.Literal_Map{ + Map: &core.LiteralMap{ + Literals: map[string]*core.Literal{ + "another-literal-1": &core.Literal{ + Value: nil, + Hash: "another-literal-1-hash", + }, + "another-literal2-set-its-hash": &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_StructuredDataset{ + StructuredDataset: &core.StructuredDataset{ + Uri: "my-blob-stora://some-address-for-literal-2", + Metadata: &core.StructuredDatasetMetadata{ + StructuredDatasetType: &core.StructuredDatasetType{ + Format: "my-columnar-data-format", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expectedLiteral, hashify(tt.literal)) + + // Double-check that generating a tag is successful + literalMap := &core.LiteralMap{Literals: map[string]*core.Literal{"o0": tt.literal}} + tag, err := GenerateArtifactTagName(context.TODO(), literalMap) + assert.NoError(t, err) + assert.NotEmpty(t, tag) + }) + } +}