Skip to content

Commit

Permalink
dont download if file already present
Browse files Browse the repository at this point in the history
if a file is already i the file system and it validates, we do not need to download it again.
  • Loading branch information
777Denoiser committed Nov 27, 2024
1 parent 7936916 commit d2e3ce3
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 21 deletions.
54 changes: 54 additions & 0 deletions cmd/dowload_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,16 @@ import (
"crypto/sha256"
"encoding/base64"
"fmt"
"github.com/cisco-open/grabit/internal"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"

"github.com/cisco-open/grabit/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func getSha256Integrity(content string) string {
Expand Down Expand Up @@ -124,6 +130,54 @@ func TestRunDownloadFailsIntegrityTest(t *testing.T) {
assert.Contains(t, err.Error(), "integrity mismatch")
}

func TestOptimization(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte("test content"))
}))
defer ts.Close()

t.Run("Valid_File_Not_Redownloaded", func(t *testing.T) {
tmpDir := test.TmpDir(t)
testUrl := ts.URL + "/valid_test.txt"

lockPath := test.TmpFile(t, "")
lock, err := internal.NewLock(lockPath, true)
require.NoError(t, err)

err = lock.AddResource([]string{testUrl}, internal.RecommendedAlgo, nil, "valid_test.txt")
require.NoError(t, err)

err = lock.Download(tmpDir, nil, nil, "")
require.NoError(t, err)
})

t.Run("Invalid_File_Redownloaded", func(t *testing.T) {
tmpDir := test.TmpDir(t)
testUrl := ts.URL + "/invalid_test.txt"

// Create lock file with specific integrity
lockPath := test.TmpFile(t, "")
lock, err := internal.NewLock(lockPath, true)
require.NoError(t, err)

err = lock.AddResource([]string{testUrl}, internal.RecommendedAlgo, nil, "invalid_test.txt")
require.NoError(t, err)

// Save lock file
err = lock.Save()
require.NoError(t, err)

// Create invalid file
invalidPath := filepath.Join(tmpDir, "invalid_test.txt")
err = os.WriteFile(invalidPath, []byte("corrupted"), 0644)
require.NoError(t, err)

// Try downloading - should fail with integrity mismatch
err = lock.Download(tmpDir, nil, nil, "")
require.Error(t, err)
assert.Contains(t, err.Error(), "integrity mismatch")
})
}
func TestRunDownloadTriesAllUrls(t *testing.T) {
content := `abcdef`
contentIntegrity := getSha256Integrity(content)
Expand Down
98 changes: 77 additions & 21 deletions internal/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,21 @@ func GetUrlToDir(u string, targetDir string, ctx context.Context) (string, error
h := sha256.New()
h.Write([]byte(u))
fileName := filepath.Join(targetDir, fmt.Sprintf(".%s", hex.EncodeToString(h.Sum(nil))))
return getUrl(u, fileName, ctx)

// Create new file with immediate close
file, err := os.Create(fileName)
if err != nil {
return "", fmt.Errorf("failed to create file: %v", err)
}
file.Close()

fileName, err = getUrl(u, fileName, ctx)
if err != nil {
os.Remove(fileName)
return "", err
}

return fileName, nil
}

// GetUrlWithDir downloads the given resource to a temporary file and returns the path to it.
Expand All @@ -90,17 +104,7 @@ func (l *Resource) Download(dir string, mode os.FileMode, ctx context.Context) e
}
var downloadError error = nil
for _, u := range l.Urls {
// Download file in the target directory so that the call to
// os.Rename is atomic.
lpath, err := GetUrlToDir(u, dir, ctx)
if err != nil {
downloadError = err
continue
}
err = checkIntegrityFromFile(lpath, algo, l.Integrity, u)
if err != nil {
return err
}
log.Debug().Str("URL", u).Msg("Downloading")

localName := ""
if l.Filename != "" {
Expand All @@ -109,27 +113,61 @@ func (l *Resource) Download(dir string, mode os.FileMode, ctx context.Context) e
localName = path.Base(u)
}
resPath := filepath.Join(dir, localName)
err = os.Rename(lpath, resPath)

// Check existing file first
if _, err := os.Stat(resPath); err == nil {
// File exists, validate its integrity
if !ValidateLocalFile(resPath, l.Integrity) {
return fmt.Errorf("integrity mismatch for '%s'", resPath)
}
// Set file permissions if needed
if mode != NoFileMode {
if err := os.Chmod(resPath, mode.Perm()); err != nil {
return err
}
}
ok = true
continue
} else if !os.IsNotExist(err) {
// Handle other potential errors from os.Stat
return fmt.Errorf("failed to stat file '%s': %v", resPath, err)
}

// Download new file
lpath, err := GetUrlToDir(u, dir, ctx)
if err != nil {
return err
downloadError = fmt.Errorf("failed to download '%s': %v", u, err)
continue
}

// Validate downloaded file
if err := checkIntegrityFromFile(lpath, algo, l.Integrity, u); err != nil {
os.Remove(lpath)
downloadError = err
continue
}

// Move to final location
if err := os.Rename(lpath, resPath); err != nil {
os.Remove(lpath)
downloadError = err
continue
}

if mode != NoFileMode {
err = os.Chmod(resPath, mode.Perm())
if err != nil {
if err := os.Chmod(resPath, mode.Perm()); err != nil {
return err
}
}
ok = true
break
}

if !ok {
if downloadError != nil {
return downloadError
}
return err
return downloadError
}
return nil
}

func (l *Resource) Contains(url string) bool {
for _, u := range l.Urls {
if u == url {
Expand All @@ -138,3 +176,21 @@ func (l *Resource) Contains(url string) bool {
}
return false
}

func ValidateLocalFile(filePath string, expectedIntegrity string) bool {
if _, err := os.Stat(filePath); os.IsNotExist(err) {
return false
}

algo, err := getAlgoFromIntegrity(expectedIntegrity)
if err != nil {
return false
}

fileIntegrity, err := getIntegrityFromFile(filePath, algo)
if err != nil {
return false
}

return fileIntegrity == expectedIntegrity
}

0 comments on commit d2e3ce3

Please sign in to comment.