diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000..07953cae39 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,10 @@ +# Treat all files in the Go repo as binary, with no git magic updating +# line endings. Windows users contributing to Go will need to use a +# modern version of git and editors capable of LF line endings. +# +# We'll prevent accidental CRLF line endings from entering the repo +# via the git-review gofmt checks. +# +# See golang.org/issue/9281 + +* -text diff --git a/cmd/cindex/cindex.go b/cmd/cindex/cindex.go index 7ae11f2c2d..0dcb6dd3f6 100644 --- a/cmd/cindex/cindex.go +++ b/cmd/cindex/cindex.go @@ -41,7 +41,7 @@ itself is a useful command to run in a nightly cron job. The -list flag causes cindex to list the paths it has indexed and exit. -By default cindex adds the named paths to the index but preserves +By default cindex adds the named paths to the index but preserves information about other paths that might already be indexed (the ones printed by cindex -list). The -reset flag causes cindex to delete the existing index before indexing the new paths. @@ -84,7 +84,10 @@ func main() { } if *resetFlag && len(args) == 0 { - os.Remove(index.File()) + err := os.Remove(index.File()) + if err != nil { + log.Printf("3 %s: %s", index.File(), err) + } return } if len(args) == 0 { @@ -122,6 +125,7 @@ func main() { } ix := index.Create(file) + defer ix.Close() ix.Verbose = *verboseFlag ix.AddPaths(args) for _, arg := range args { @@ -152,8 +156,14 @@ func main() { if !*resetFlag { log.Printf("merge %s %s", master, file) index.Merge(file+"~", master, file) - os.Remove(file) - os.Rename(file+"~", master) + + ix.Close() + if err := os.Remove(file); err != nil { + log.Printf("%s: %s", file, err) + } + if err := os.Rename(file+"~", master); err != nil { + log.Printf("%s: %s", file, err) + } } log.Printf("done") return diff --git a/cmd/csearch/csearch.go b/cmd/csearch/csearch.go index 4e12d6ca58..7fb10ea6c0 100644 --- a/cmd/csearch/csearch.go +++ b/cmd/csearch/csearch.go @@ -21,7 +21,7 @@ Csearch behaves like grep over all indexed files, searching for regexp, an RE2 (nearly PCRE) regular expression. The -c, -h, -i, -l, and -n flags are as in grep, although note that as per Go's -flag parsing convention, they cannot be combined: the option pair -i -n +flag parsing convention, they cannot be combined: the option pair -i -n cannot be abbreviated to -in. The -f flag restricts the search to files whose names match the RE2 regular diff --git a/index/merge.go b/index/merge.go index f767b9e4a1..4e44cf4136 100644 --- a/index/merge.go +++ b/index/merge.go @@ -26,13 +26,13 @@ package index // Now merge the posting lists (this is why they begin with the trigram). // During the merge, translate the docid numbers to the new C docid space. // Also during the merge, write the posting list index to a temporary file as usual. -// +// // Copy the name index and posting list index into C's index and write the trailer. // Rename C's index onto the new index. import ( "encoding/binary" - "os" + "log" "strings" ) @@ -227,8 +227,18 @@ func Merge(dst, src1, src2 string) { ix3.writeString(trailerMagic) ix3.flush() - os.Remove(nameIndexFile.name) - os.Remove(w.postIndexFile.name) + bufDestroy(nameIndexFile) + bufDestroy(w.postIndexFile) + + if err := ix1.Close(); err != nil { + log.Fatalf("merge: ix1 %s", err) + } + if err := ix2.Close(); err != nil { + log.Fatalf("merge: ix2 %s", err) + } + if err := ix3.file.Close(); err != nil { + log.Fatalf("merge: ix3 %s", err) + } } type postMapReader struct { diff --git a/index/mmap_bsd.go b/index/mmap_bsd.go index ce1de14927..e41552c2d8 100644 --- a/index/mmap_bsd.go +++ b/index/mmap_bsd.go @@ -29,11 +29,18 @@ func mmapFile(f *os.File) mmapData { } n := int(size) if n == 0 { - return mmapData{f, nil} + return mmapData{f, nil, nil} } data, err := syscall.Mmap(int(f.Fd()), 0, (n+4095)&^4095, _PROT_READ, _MAP_SHARED) if err != nil { log.Fatalf("mmap %s: %v", f.Name(), err) } - return mmapData{f, data[:n]} + return mmapData{f, data[:n], data[:]} +} + +func unmmapFile(m *mmapData) error { + if err := syscall.Munmap(m.dall); err != nil { + return err + } + return m.f.Close() } diff --git a/index/mmap_linux.go b/index/mmap_linux.go index a3c961dbb3..c8e7fcb09b 100644 --- a/index/mmap_linux.go +++ b/index/mmap_linux.go @@ -21,11 +21,18 @@ func mmapFile(f *os.File) mmapData { } n := int(size) if n == 0 { - return mmapData{f, nil} + return mmapData{f, nil, nil} } data, err := syscall.Mmap(int(f.Fd()), 0, (n+4095)&^4095, syscall.PROT_READ, syscall.MAP_SHARED) if err != nil { log.Fatalf("mmap %s: %v", f.Name(), err) } - return mmapData{f, data[:n]} + return mmapData{f, data[:n], data[:]} +} + +func unmmapFile(m *mmapData) error { + if err := syscall.Munmap(m.dall); err != nil { + return err + } + return m.f.Close() } diff --git a/index/mmap_test.go b/index/mmap_test.go new file mode 100644 index 0000000000..3ead4f0c2e --- /dev/null +++ b/index/mmap_test.go @@ -0,0 +1,33 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package index + +import ( + "io/ioutil" + "os" + "testing" +) + +// Test correct mmap resource clean up. +func TestMmap(t *testing.T) { + f1, err := ioutil.TempFile("", "index-test") + if err != nil { + t.Errorf("%s: %s", f1.Name(), err) + } + f1.WriteString("123456789") + data := mmapFile(f1) + err = unmmapFile(&data) + if err != nil { + t.Errorf("%s: data %v %s", data.d, f1.Name(), err) + } + err = f1.Close() + if err != nil { + t.Errorf("%s: %s", f1.Name(), err) + } + err = os.Remove(f1.Name()) + if err != nil { + t.Errorf("%s: %s", f1.Name(), err) + } +} diff --git a/index/mmap_windows.go b/index/mmap_windows.go index 38ae27471e..b75d7e8e98 100644 --- a/index/mmap_windows.go +++ b/index/mmap_windows.go @@ -21,17 +21,27 @@ func mmapFile(f *os.File) mmapData { log.Fatalf("%s: too large for mmap", f.Name()) } if size == 0 { - return mmapData{f, nil} + return mmapData{f, nil, nil} } h, err := syscall.CreateFileMapping(syscall.Handle(f.Fd()), nil, syscall.PAGE_READONLY, uint32(size>>32), uint32(size), nil) if err != nil { log.Fatalf("CreateFileMapping %s: %v", f.Name(), err) } + defer syscall.CloseHandle(syscall.Handle(h)) addr, err := syscall.MapViewOfFile(h, syscall.FILE_MAP_READ, 0, 0, 0) if err != nil { log.Fatalf("MapViewOfFile %s: %v", f.Name(), err) } - data := (*[1 << 30]byte)(unsafe.Pointer(addr)) - return mmapData{f, data[:size]} + + data := (*[1 << 34]byte)(unsafe.Pointer(addr)) + return mmapData{f, data[:size], nil} // Third parameter is not required on Windows +} + +func unmmapFile(m *mmapData) error { + err := syscall.UnmapViewOfFile(uintptr(unsafe.Pointer(&m.d[0]))) + if err != nil { + return err + } + return m.f.Close() } diff --git a/index/read.go b/index/read.go index 72ee834418..37adb9a783 100644 --- a/index/read.go +++ b/index/read.go @@ -95,7 +95,7 @@ type Index struct { const postEntrySize = 3 + 4 + 4 func Open(file string) *Index { - mm := mmap(file) + mm := mmapFileAndOpen(file) if len(mm.d) < 4*4+len(trailerMagic) || string(mm.d[len(mm.d)-len(trailerMagic):]) != trailerMagic { corrupt() } @@ -129,6 +129,11 @@ func (ix *Index) uint32(off uint32) uint32 { return binary.BigEndian.Uint32(ix.slice(off, 4)) } +// Close closes the data. +func (ix *Index) Close() error { + return unmmapFile(&ix.data) +} + // uvarint returns the varint value at the given offset in the index data. func (ix *Index) uvarint(off uint32) uint32 { v, n := binary.Uvarint(ix.slice(off, -1)) @@ -413,12 +418,13 @@ func corrupt() { // An mmapData is mmap'ed read-only data from a file. type mmapData struct { - f *os.File - d []byte + f *os.File + d []byte // [:file size] + dall []byte // [:] hole mapped data, for Linux and BSD } // mmap maps the given file into memory. -func mmap(file string) mmapData { +func mmapFileAndOpen(file string) mmapData { f, err := os.Open(file) if err != nil { log.Fatal(err) diff --git a/index/write.go b/index/write.go index 994e115c2c..3987077f1d 100644 --- a/index/write.go +++ b/index/write.go @@ -101,6 +101,11 @@ func (ix *IndexWriter) AddPaths(paths []string) { ix.paths = append(ix.paths, paths...) } +// Close closes the main index file. +func (ix *IndexWriter) Close() { + ix.main.file.Close() +} + // AddFile adds the file with the given name (opened using os.Open) // to the index. It logs errors using package log. func (ix *IndexWriter) AddFile(name string) { @@ -217,12 +222,16 @@ func (ix *IndexWriter) Flush() { } ix.main.writeString(trailerMagic) - os.Remove(ix.nameData.name) + closeFile(ix.nameData.file) for _, f := range ix.postFile { - os.Remove(f.Name()) + // postFile is already closed by unmmapFile. + err := os.Remove(f.Name()) + if err != nil { + log.Printf("%s: %s", f.Name(), err) + } } - os.Remove(ix.nameIndex.name) - os.Remove(ix.postIndex.name) + closeFile(ix.nameIndex.file) + closeFile(ix.postIndex.file) log.Printf("%d data bytes, %d index bytes", ix.totalBytes, ix.main.offset()) @@ -237,6 +246,19 @@ func copyFile(dst, src *bufWriter) { } } +func closeFile(f *os.File) { + if f != nil { + err := f.Close() + if err != nil { + log.Printf("%s: %s", f.Name(), err) + } + } + err := os.Remove(f.Name()) + if err != nil { + log.Printf("%s: %s", f.Name(), err) + } +} + // addName adds the file with the given name to the index. // It returns the assigned file ID number. func (ix *IndexWriter) addName(name string) uint32 { @@ -286,7 +308,8 @@ func (ix *IndexWriter) mergePost(out *bufWriter) { log.Printf("merge %d files + mem", len(ix.postFile)) for _, f := range ix.postFile { - h.addFile(f) + data := h.addFile(f) + defer unmmapFile(data) } sortPost(ix.post) h.addMem(ix.post) @@ -338,10 +361,12 @@ type postHeap struct { ch []*postChunk } -func (h *postHeap) addFile(f *os.File) { - data := mmapFile(f).d +func (h *postHeap) addFile(f *os.File) *mmapData { + mmapData := mmapFile(f) + data := mmapData.d m := (*[npost]postEntry)(unsafe.Pointer(&data[0]))[:len(data)/8] h.addMem(m) + return &mmapData } func (h *postHeap) addMem(x []postEntry) { @@ -445,6 +470,7 @@ func (h *postHeap) siftUp(j int) { break } ch[i], ch[j] = ch[j], ch[i] + j = i } } @@ -479,6 +505,17 @@ func bufCreate(name string) *bufWriter { } } +func bufDestroy(bf *bufWriter) { + err := bf.file.Close() + if err != nil { + log.Printf("%s: %s", bf.name, err) + } + err = os.Remove(bf.name) + if err != nil { + log.Printf("%s: %s", bf.name, err) + } +} + func (b *bufWriter) write(x []byte) { n := cap(b.buf) - len(b.buf) if len(x) > n { diff --git a/index/write_test.go b/index/write_test.go index d2653aec61..4b3e2fcdef 100644 --- a/index/write_test.go +++ b/index/write_test.go @@ -163,3 +163,19 @@ func TestTrivialWrite(t *testing.T) { func TestTrivialWriteDisk(t *testing.T) { testTrivialWrite(t, true) } + +func TestHeap(t *testing.T) { + h := &postHeap{} + es := []postEntry{7, 4, 3, 2, 4} + for _, e := range es { + h.addMem([]postEntry{e}) + } + if len(h.ch) != len(es) { + t.Fatalf("wrong heap size: %d, want %d", len(h.ch), len(es)) + } + for a, b := h.next(), h.next(); b.trigram() != (1<<24 - 1); a, b = b, h.next() { + if a > b { + t.Fatalf("%d should <= %d", a, b) + } + } +}