Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add test that validates fingerprint.StartsWith #318

Merged
merged 2 commits into from
May 26, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions operator/builtin/input/file/fingerprint.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ const defaultFingerprintSize = 1000 // bytes
const minFingerprintSize = 16 // bytes

// Fingerprint is used to identify a file
// A file's fingerprint is the first N bytes of the file,
// where N is the fingerprintSize on the file_input operator
type Fingerprint struct {
FirstBytes []byte
}
Expand Down Expand Up @@ -42,6 +44,10 @@ func (f Fingerprint) Copy() *Fingerprint {

// StartsWith returns true if the fingerprints are the same
// or if the new fingerprint starts with the old one
// This is important functionality for tracking new files,
// since their initial size is typically less than that of
// a fingerprint. As the file grows, its fingerprint is updated
// until it reaches a maximum size, as configured on the operator
func (f Fingerprint) StartsWith(old *Fingerprint) bool {
l0 := len(old.FirstBytes)
if l0 == 0 {
Expand Down
55 changes: 55 additions & 0 deletions operator/builtin/input/file/fingerprint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package file

import (
"fmt"
"io/ioutil"
"math/rand"
"strings"
"testing"

Expand Down Expand Up @@ -200,4 +202,57 @@ func TestFingerprintStartsWith(t *testing.T) {
}
}

// Generates a file filled with many random bytes, then
// writes the same bytes to a second file, one byte at a time.
// Validates, after each byte is written, that fingerprint
// matching would successfully associate the two files.
// The static file can be thought of as the present state of
// the file, while each iteration of the growing file represents
// a possible state of the same file at a previous time.
func TestFingerprintStartsWith_FromFile(t *testing.T) {
r := rand.New(rand.NewSource(112358))

operator, _, tempDir := newTestFileOperator(t, nil, nil)
operator.fingerprintSize *= 10

fileLength := 12 * operator.fingerprintSize

// Make a []byte we can write one at a time
content := make([]byte, fileLength)
r.Read(content) // Fill slice with random bytes

// Overwrite some bytes with \n to ensure
// we are testing a file with multiple lines
newlineMask := make([]byte, fileLength)
r.Read(newlineMask) // Fill slice with random bytes
for i, b := range newlineMask {
if b == 0 && i != 0 { // 1/256 chance, but never first byte
content[i] = byte('\n')
}
}

fullFile, err := ioutil.TempFile(tempDir, "")
require.NoError(t, err)
_, err = fullFile.Write(content)
require.NoError(t, err)

fff, err := operator.NewFingerprint(fullFile)
require.NoError(t, err)

partialFile, err := ioutil.TempFile(tempDir, "")
require.NoError(t, err)

// Write one byte at a time and validate that updated
// full fingerprint still starts with partial
djaglowski marked this conversation as resolved.
Show resolved Hide resolved
for i := range content {
_, err = partialFile.Write(content[i:i])
require.NoError(t, err)

pff, err := operator.NewFingerprint(fullFile)
require.NoError(t, err)

require.True(t, fff.StartsWith(pff))
}
}

// TODO TestConfig (config_test.go) - sets defaults, errors appropriately, etc