Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix duplicated 8+3 filenames for long file name entries #71

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions filesystem/fat32/directory.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ import (
// Directory represents a single directory in a FAT32 filesystem
type Directory struct {
directoryEntry
entries []*directoryEntry
sfnCache map[string]bool
entries []*directoryEntry
}

// dirEntriesFromBytes loads the directory entries from the raw bytes
Expand Down Expand Up @@ -41,14 +42,23 @@ func (d *Directory) entriesToBytes(bytesPerCluster int) ([]byte, error) {
func (d *Directory) createEntry(name string, cluster uint32, dir bool) (*directoryEntry, error) {
// is it a long filename or a short filename?
var isLFN bool
// TODO: convertLfnSfn does not calculate if the short name conflicts and thus shoukld increment the last character
// that should happen here, once we can look in the directory entry
shortName, extension, isLFN, _ := convertLfnSfn(name)

if d.sfnCache == nil {
d.sfnCache = make(map[string]bool)
// load all short filenames
for _, entry := range d.entries {
d.sfnCache[entry.filenameShort+"."+entry.fileExtension] = true
}
}

shortName, extension, isLFN, _ := convertLfnSfn(name, d.sfnCache)
lfn := ""
if isLFN {
lfn = name
}

d.sfnCache[shortName+"."+extension] = true

// allocate a slot for the new filename in the existing directory
entry := directoryEntry{
filenameLong: lfn,
Expand Down
43 changes: 41 additions & 2 deletions filesystem/fat32/directoryentry.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ package fat32
import (
"encoding/binary"
"fmt"
"hash/fnv"
"math/rand"
"regexp"
"strconv"
"strings"
"time"
)
Expand Down Expand Up @@ -382,7 +385,7 @@ func calculateSlots(s string) int {
// returns shortName, extension, isLFN, isTruncated
// isLFN : was there an LFN that had to be converted
// isTruncated : was the shortname longer than 8 chars and had to be converted?
func convertLfnSfn(name string) (string, string, bool, bool) {
func convertLfnSfn(name string, cache map[string]bool) (string, string, bool, bool) {
isLFN, isTruncated := false, false
// get last period in name
lastDot := strings.LastIndex(name, ".")
Expand Down Expand Up @@ -413,11 +416,47 @@ func convertLfnSfn(name string) (string, string, bool, bool) {
isLFN = true
}

truncate := func(name string, n int) string {
return name[0:6] + "~" + strconv.Itoa(n)
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not madly in love with inline anonymous functions here, as they will get declared every time we call convertLfnSfn, and they are harder to understand and test.

Can we pull these three out into named functions, and give them more representative names?


hash := func(shortName string, n int) string {
h := fnv.New32a()
// the input name may have more information than the short name
h.Write([]byte(name))
return shortName[0:2] + fmt.Sprintf("%04X", (h.Sum32()%0x10000)) + "~" + strconv.Itoa(n)
}

random := func() string {
return fmt.Sprintf("%06X", (rand.Int63()%0x1000000)) + "~" + strconv.Itoa(1+rand.Intn(8))
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

random is a bit too generic of a name, and probably will confuse. We should pull this out too, but make it more specifically-named.


// convert shortName to 8 chars
if len(shortName) > 8 {
isLFN = true
isTruncated = true
shortName = shortName[:6] + "~" + "1"
if !cache[truncate(shortName, 9)+"."+extension] {
i := 1
for i < 9 && cache[truncate(shortName, i)+"."+extension] {
i++
}
shortName = truncate((shortName), i)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This calls truncate() duplicatively several times. Can we tighten it up a bit?

Also, why is this:

			i := 1
 			for i < 9 && cache[truncate(shortName, i)+"."+extension] {
 				i++
 			}

and not the simpler and more idiomatic:

			var found string
 			for i := i; i++; i < 9 {
                            found = truncate(shortName, i)+"."+extension
                            if _, ok := cache[found]; ok {
                               break
                            }
 			}
                        shortName = found

or similar

} else {
i := 0
for i <= 9 && cache[hash(shortName, i)+"."+extension] {
i++
}
if i <= 9 {
shortName = hash(shortName, i)
} else {
rnd := random()
for i < 1000 && cache[rnd+"."+extension] {
i++
rnd = random()
}
shortName = rnd
}
}
}
return shortName, extension, isLFN, isTruncated
}
Expand Down
19 changes: 17 additions & 2 deletions filesystem/fat32/directoryentry_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,7 @@ func TestDirectoryEntryCalculateSlots(t *testing.T) {
}

func TestDirectoryEntryConvertLfnSfn(t *testing.T) {
emptyCache := make(map[string]bool)
tests := []struct {
input string
sfn string
Expand All @@ -481,12 +482,26 @@ func TestDirectoryEntryConvertLfnSfn(t *testing.T) {
{"VeryLongName.ft", "VERYLO~1", "FT", true, true},
}
for _, tt := range tests {
sfn, extension, isLfn, isTruncated := convertLfnSfn(tt.input)
sfn, extension, isLfn, isTruncated := convertLfnSfn(tt.input, emptyCache)
if sfn != tt.sfn || extension != tt.extension || isLfn != tt.isLfn || isTruncated != tt.isTruncated {
t.Errorf("convertLfnSfn(%s) expected %s / %s / %t / %t ; actual %s / %s / %t / %t", tt.input, tt.sfn, tt.extension, tt.isLfn, tt.isTruncated, sfn, extension, isLfn, isTruncated)
}
}

// try filling a cache with 100000 long filenames
// The hashing adds 16 bits of entry, thus 64k entries at most (with the same 2 letter prefix)
// Overall we can't reach such a directory without using all collision avoidance methods
cache := make(map[string]bool)
for i := 0; i < 100000; i++ {
filename := fmt.Sprintf("FinalVersion%d.doc", i)
sfn, extension, _, _ := convertLfnSfn(filename, cache)
if len(sfn) != 8 || len(extension) != 3 {
t.Errorf("convertLfnSfn(%s) expected 8+3, got %s.%s", filename, sfn, extension)
}
cache[sfn+"."+extension] = true
}
if len(cache) != 100000 {
t.Errorf("created 100000 files, got %d entries", len(cache))
}
}

func TestDirectoryEntryUCaseValid(t *testing.T) {
Expand Down