Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add hashGlobs query to state service #3484

Merged
merged 16 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
75 changes: 75 additions & 0 deletions cmd/state-svc/internal/hash/file_hasher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package hash

import (
"encoding/base64"
"fmt"
"io"
"os"
"sort"
"time"

"github.com/ActiveState/cli/internal/errs"
"github.com/ActiveState/cli/internal/rtutils"
"github.com/cespare/xxhash"
"github.com/patrickmn/go-cache"
)

type fileCache interface {
Get(key string) (interface{}, bool)
Set(key string, value interface{}, expiration time.Duration)
}

type FileHasher struct {
cache fileCache
}

func NewFileHasher() *FileHasher {
return &FileHasher{
cache: cache.New(cache.NoExpiration, 24*time.Hour),
MDrakos marked this conversation as resolved.
Show resolved Hide resolved
}
}

func (fh *FileHasher) HashFiles(files []string) (hash string, rerr error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
func (fh *FileHasher) HashFiles(files []string) (hash string, rerr error) {
func (fh *FileHasher) HashFiles(files []string) (string, error) {

Doesn't look like naming these is used for anything. And it can actually cause bug so better to avoid.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah I see. In that case I suggest renaming hash to _, so it doesn't clash with your code. You are using a hash var in the function body. I don't think it clashes as currently written, but it feels error-prone.

sort.Strings(files)

hasher := xxhash.New()
for _, f := range files {
file, err := os.Open(f)
if err != nil {
return "", errs.Wrap(err, "Could not open file: %s", file.Name())
}
defer rtutils.Closer(file.Close, &rerr)

fileInfo, err := file.Stat()
if err != nil {
return "", errs.Wrap(err, "Could not stat file: %s", file.Name())
}

var hash string
cachedHash, ok := fh.cache.Get(cacheKey(file.Name(), fileInfo.ModTime()))
if ok {
hash, ok = cachedHash.(string)
if !ok {
return "", errs.New("Could not convert cache value to string")
}
} else {
fileHasher := xxhash.New()
if _, err := io.Copy(fileHasher, file); err != nil {
return "", errs.Wrap(err, "Could not hash file: %s", file.Name())
}

hash = fmt.Sprintf("%x", fileHasher.Sum(nil))
}

fh.cache.Set(cacheKey(file.Name(), fileInfo.ModTime()), hash, cache.NoExpiration)

// Incorporate the individual file hash into the overall hash in hex format
fmt.Fprintf(hasher, "%x", hash)
}

return base64.StdEncoding.EncodeToString(hasher.Sum(nil)), nil
}

func cacheKey(file string, modTime time.Time) string {
return fmt.Sprintf("%s-%d", file, modTime.UTC().UnixNano())
}
228 changes: 228 additions & 0 deletions cmd/state-svc/internal/hash/file_hasher_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
package hash

import (
"os"
"testing"
"time"

"github.com/patrickmn/go-cache"
"github.com/stretchr/testify/assert"
)

type testCache struct {
cache *cache.Cache
hits []string
misses []string
}

func (tc *testCache) Get(key string) (interface{}, bool) {
val, ok := tc.cache.Get(key)
if ok {
tc.hits = append(tc.hits, key)
} else {
tc.misses = append(tc.misses, key)
}

return val, ok
}

func (tc *testCache) Set(key string, value interface{}, expiration time.Duration) {
tc.cache.Set(key, value, cache.DefaultExpiration)
}

func TestFileHasher_HashFiles(t *testing.T) {
file1 := createTempFile(t, "file1")
file2 := createTempFile(t, "file2")

hasher := NewFileHasher()

hash1, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

hash2, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

assert.Equal(t, hash1, hash2)
}

func TestFileHasher_CacheHit(t *testing.T) {
file1 := createTempFile(t, "file1")
file2 := createTempFile(t, "file2")

tc := &testCache{
cache: cache.New(cache.NoExpiration, cache.NoExpiration),
}

hasher := &FileHasher{
cache: tc,
}

hash1, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

hash2, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

assert.Equal(t, hash1, hash2)
assert.Len(t, tc.hits, 2)
assert.Len(t, tc.misses, 2)
}

func TestFileHasher_CacheMiss(t *testing.T) {
file1 := createTempFile(t, "file1")
file2 := createTempFile(t, "file2")

tc := &testCache{
cache: cache.New(cache.NoExpiration, cache.NoExpiration),
}

hasher := &FileHasher{
cache: tc,
}

hash1, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

if err := os.Chtimes(file1, time.Now(), time.Now()); err != nil {
t.Fatal(err)
}

file, err := os.Open(file1)
assert.NoError(t, err)
err = file.Sync()
assert.NoError(t, err)

hash2, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

assert.Equal(t, hash1, hash2)
assert.Len(t, tc.hits, 1)
assert.Len(t, tc.misses, 3)
}

func TestFileHasher_ContentAgnostic(t *testing.T) {
// Files have same content but different names and modification times
file1 := createTempFile(t, "file1")

// Ensure mod times are different
time.Sleep(1 * time.Millisecond)
file2 := createTempFile(t, "file1")

tc := &testCache{
cache: cache.New(cache.NoExpiration, cache.NoExpiration),
}

hasher := &FileHasher{
cache: tc,
}

hash1, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

hash2, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

assert.Equal(t, hash1, hash2)
assert.Len(t, tc.hits, 2)
assert.Len(t, tc.misses, 2)
}

func TestFileHasher_NotEqualFileAdded(t *testing.T) {
file1 := createTempFile(t, "file1")
file2 := createTempFile(t, "file2")
file3 := createTempFile(t, "file3")

tc := &testCache{
cache: cache.New(cache.NoExpiration, cache.NoExpiration),
}

hasher := &FileHasher{
cache: tc,
}

hash1, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

hash2, err := hasher.HashFiles([]string{file1, file2, file3})
assert.NoError(t, err)

assert.NotEqual(t, hash1, hash2)
assert.Len(t, tc.hits, 2)
assert.Len(t, tc.misses, 3)
}

func TestFileHasher_NotEqualFileRemoved(t *testing.T) {
file1 := createTempFile(t, "file1")
file2 := createTempFile(t, "file2")
file3 := createTempFile(t, "file3")

tc := &testCache{
cache: cache.New(cache.NoExpiration, cache.NoExpiration),
}

hasher := &FileHasher{
cache: tc,
}

hash1, err := hasher.HashFiles([]string{file1, file2, file3})
assert.NoError(t, err)

hash2, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

assert.NotEqual(t, hash1, hash2)
assert.Len(t, tc.hits, 2)
assert.Len(t, tc.misses, 3)
}

func TestFileHasher_NotEqualContentChanged(t *testing.T) {
file1 := createTempFile(t, "file1")
file2 := createTempFile(t, "file2")

tc := &testCache{
cache: cache.New(cache.NoExpiration, cache.NoExpiration),
}

hasher := &FileHasher{
cache: tc,
}

hash1, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

hash2, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

assert.Equal(t, hash1, hash2)

// Change content of file1 and ensure mod time is different to avoid a cache hit.
// The time these tests take as well as the accuracy of the file system's mod time
// resolution may cause the mod time to be the same.
time.Sleep(10 * time.Millisecond)
if err := os.WriteFile(file1, []byte("file1_changed"), 0644); err != nil {
t.Fatal(err)
}

hash2Modified, err := hasher.HashFiles([]string{file1, file2})
assert.NoError(t, err)

assert.NotEqual(t, hash1, hash2Modified)
assert.Len(t, tc.hits, 3)
assert.Len(t, tc.misses, 3)
}

func createTempFile(t *testing.T, content string) string {
tmpfile, err := os.CreateTemp("", "testfile")
if err != nil {
t.Fatal(err)
}

if _, err := tmpfile.Write([]byte(content)); err != nil {
t.Fatal(err)
}
if err := tmpfile.Close(); err != nil {
t.Fatal(err)
}

return tmpfile.Name()
}
21 changes: 21 additions & 0 deletions cmd/state-svc/internal/resolver/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@ import (
"context"
"encoding/json"
"os"
"path/filepath"
"runtime/debug"
"sort"
"strconv"
"time"

"github.com/ActiveState/cli/cmd/state-svc/internal/hash"
"github.com/ActiveState/cli/cmd/state-svc/internal/messages"
"github.com/ActiveState/cli/cmd/state-svc/internal/rtwatcher"
genserver "github.com/ActiveState/cli/cmd/state-svc/internal/server/generated"
Expand Down Expand Up @@ -36,6 +38,7 @@ type Resolver struct {
updatePoller *poller.Poller
authPoller *poller.Poller
projectIDCache *projectcache.ID
fileHasher *hash.FileHasher
an *sync.Client
anForClient *sync.Client // Use separate client for events sent through service so we don't contaminate one with the other
rtwatch *rtwatcher.Watcher
Expand Down Expand Up @@ -81,6 +84,7 @@ func New(cfg *config.Instance, an *sync.Client, auth *authentication.Auth) (*Res
pollUpdate,
pollAuth,
projectcache.NewID(),
hash.NewFileHasher(),
an,
anForClient,
rtwatcher.New(cfg, anForClient),
Expand Down Expand Up @@ -263,6 +267,8 @@ func (r *Resolver) GetProcessesInUse(ctx context.Context, execDir string) ([]*gr
}

func (r *Resolver) GetJwt(ctx context.Context) (*graph.Jwt, error) {
defer func() { handlePanics(recover(), debug.Stack()) }()

if err := r.auth.MaybeRenew(); err != nil {
return nil, errs.Wrap(err, "Could not renew auth token")
}
Expand Down Expand Up @@ -296,6 +302,21 @@ func (r *Resolver) GetJwt(ctx context.Context) (*graph.Jwt, error) {
return jwt, nil
}

func (r *Resolver) HashGlobs(ctx context.Context, globs []string) (string, error) {
defer func() { handlePanics(recover(), debug.Stack()) }()

var files []string
for _, glob := range globs {
matches, err := filepath.Glob(glob)
if err != nil {
return "", errs.Wrap(err, "Could not match glob: %s", glob)
}
files = append(files, matches...)
}

return r.fileHasher.HashFiles(files)
}

func handlePanics(recovered interface{}, stack []byte) {
if recovered != nil {
multilog.Error("Panic: %v", recovered)
Expand Down
Loading
Loading