Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add test that validates fingerprint.StartsWith #318

Merged
merged 2 commits into from
May 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions operator/builtin/input/file/fingerprint.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ const defaultFingerprintSize = 1000 // bytes
const minFingerprintSize = 16 // bytes

// Fingerprint is used to identify a file
// A file's fingerprint is the first N bytes of the file,
// where N is the fingerprintSize on the file_input operator
type Fingerprint struct {
FirstBytes []byte
}
Expand Down Expand Up @@ -42,6 +44,10 @@ func (f Fingerprint) Copy() *Fingerprint {

// StartsWith returns true if the fingerprints are the same
// or if the new fingerprint starts with the old one
// This is important functionality for tracking new files,
// since their initial size is typically less than that of
// a fingerprint. As the file grows, its fingerprint is updated
// until it reaches a maximum size, as configured on the operator
func (f Fingerprint) StartsWith(old *Fingerprint) bool {
l0 := len(old.FirstBytes)
if l0 == 0 {
Expand Down
63 changes: 63 additions & 0 deletions operator/builtin/input/file/fingerprint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package file

import (
"fmt"
"io/ioutil"
"math/rand"
"strings"
"testing"

Expand Down Expand Up @@ -200,4 +202,65 @@ func TestFingerprintStartsWith(t *testing.T) {
}
}

// Generates a file filled with many random bytes, then
// writes the same bytes to a second file, one byte at a time.
// Validates, after each byte is written, that fingerprint
// matching would successfully associate the two files.
// The static file can be thought of as the present state of
// the file, while each iteration of the growing file represents
// a possible state of the same file at a previous time.
func TestFingerprintStartsWith_FromFile(t *testing.T) {
r := rand.New(rand.NewSource(112358))

operator, _, tempDir := newTestFileOperator(t, nil, nil)
operator.fingerprintSize *= 10

fileLength := 12 * operator.fingerprintSize

// Make a []byte we can write one at a time
content := make([]byte, fileLength)
r.Read(content) // Fill slice with random bytes

// Overwrite some bytes with \n to ensure
// we are testing a file with multiple lines
newlineMask := make([]byte, fileLength)
r.Read(newlineMask) // Fill slice with random bytes
for i, b := range newlineMask {
if b == 0 && i != 0 { // 1/256 chance, but never first byte
content[i] = byte('\n')
}
}

fullFile, err := ioutil.TempFile(tempDir, "")
require.NoError(t, err)
defer fullFile.Close()

_, err = fullFile.Write(content)
require.NoError(t, err)

fff, err := operator.NewFingerprint(fullFile)
require.NoError(t, err)

partialFile, err := ioutil.TempFile(tempDir, "")
require.NoError(t, err)
defer partialFile.Close()

// Write the first byte before comparing, since empty files will never match
_, err = partialFile.Write(content[:1])
require.NoError(t, err)
content = content[1:]

// Write one byte at a time and validate that
// full fingerprint still starts with updated partial
for i := range content {
_, err = partialFile.Write(content[i:i])
require.NoError(t, err)

pff, err := operator.NewFingerprint(partialFile)
require.NoError(t, err)

require.True(t, fff.StartsWith(pff))
}
}

// TODO TestConfig (config_test.go) - sets defaults, errors appropriately, etc