Skip to content

Commit

Permalink
[fix] - resource leak (#3402)
Browse files Browse the repository at this point in the history
* fix resource leak

* add comment

* use errors.Join

* address error wrapping
  • Loading branch information
ahrav authored Oct 15, 2024
1 parent aa17b8e commit bf38b84
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 11 deletions.
9 changes: 2 additions & 7 deletions pkg/handlers/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,14 +285,9 @@ func HandleFile(
}
defer func() {
// Ensure all data is read to prevent broken pipe.
_, copyErr := io.Copy(io.Discard, rdr)
if copyErr != nil {
err = fmt.Errorf("error discarding remaining data: %w", copyErr)
}
closeErr := rdr.Close()
if closeErr != nil {
if closeErr := rdr.Close(); closeErr != nil {
if err != nil {
err = fmt.Errorf("%v; error closing reader: %w", err, closeErr)
err = errors.Join(err, closeErr)
} else {
err = fmt.Errorf("error closing reader: %w", closeErr)
}
Expand Down
59 changes: 55 additions & 4 deletions pkg/sources/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -1213,7 +1213,14 @@ func getSafeRemoteURL(repo *git.Repository, preferred string) string {
return safeURL
}

func (s *Git) handleBinary(ctx context.Context, gitDir string, reporter sources.ChunkReporter, chunkSkel *sources.Chunk, commitHash plumbing.Hash, path string) error {
func (s *Git) handleBinary(
ctx context.Context,
gitDir string,
reporter sources.ChunkReporter,
chunkSkel *sources.Chunk,
commitHash plumbing.Hash,
path string,
) (err error) {
fileCtx := context.WithValues(ctx, "commit", commitHash.String()[:7], "path", path)
fileCtx.Logger().V(5).Info("handling binary file")

Expand All @@ -1227,9 +1234,32 @@ func (s *Git) handleBinary(ctx context.Context, gitDir string, reporter sources.
return nil
}

cmd := exec.Command("git", "-C", gitDir, "cat-file", "blob", commitHash.String()+":"+path)
const (
cmdTimeout = 60 * time.Second
waitDelay = 5 * time.Second
)
// NOTE: This kludge ensures the context timeout for the 'git cat-file' command
// matches the timeout for the HandleFile operation.
// By setting both timeouts to the same value, we can be more confident
// that both operations will run for the same duration.
// The command execution includes a small Wait delay before terminating the process,
// giving HandleFile time to respect the context
// and return before the process is forcibly killed.
// This approach helps prevent premature termination and allows for more complete processing.

// TODO: Develop a more robust mechanism to ensure consistent timeout behavior between the command execution
// and the HandleFile operation. This should prevent premature termination and allow for complete processing.
handlers.SetArchiveMaxTimeout(cmdTimeout)

// Create a timeout context for the 'git cat-file' command to ensure it does not run indefinitely.
// This prevents potential resource exhaustion by terminating the command if it exceeds the specified duration.
catFileCtx, cancel := context.WithTimeoutCause(fileCtx, cmdTimeout, errors.New("git cat-file timeout"))
defer cancel()

cmd := exec.CommandContext(catFileCtx, "git", "-C", gitDir, "cat-file", "blob", commitHash.String()+":"+path)
var stderr bytes.Buffer
cmd.Stderr = &stderr
cmd.WaitDelay = waitDelay // give the command a chance to finish before the timeout :)

stdout, err := cmd.StdoutPipe()
if err != nil {
Expand All @@ -1240,9 +1270,30 @@ func (s *Git) handleBinary(ctx context.Context, gitDir string, reporter sources.
return fmt.Errorf("error starting git cat-file: %w\n%s", err, stderr.Bytes())
}

defer func() { _ = cmd.Wait() }()
// Ensure all data from the reader (stdout) is consumed to prevent broken pipe errors.
// This operation discards any remaining data after HandleFile completion.
// If the reader is fully consumed, the copy is essentially a no-op.
// If an error occurs while discarding, it will be logged and combined with any existing error.
// The command's completion is then awaited and any execution errors are handled.
defer func() {
n, copyErr := io.Copy(io.Discard, stdout)
if copyErr != nil {
ctx.Logger().Error(
copyErr,
"Failed to discard remaining stdout data after HandleFile completion",
)
}

ctx.Logger().V(3).Info(
"HandleFile did not consume all stdout data; excess discarded",
"bytes_discarded", n)

// Wait for the command to finish and handle any errors.
waitErr := cmd.Wait()
err = errors.Join(err, copyErr, waitErr)
}()

return handlers.HandleFile(fileCtx, stdout, chunkSkel, reporter, handlers.WithSkipArchives(s.skipArchives))
return handlers.HandleFile(catFileCtx, stdout, chunkSkel, reporter, handlers.WithSkipArchives(s.skipArchives))
}

func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) error {
Expand Down

0 comments on commit bf38b84

Please sign in to comment.