From ae242ec327b95176488336f365b0dc9c08d4e11b Mon Sep 17 00:00:00 2001 From: Robert Findley Date: Thu, 19 Jan 2023 16:41:08 -0500 Subject: [PATCH] gopls: fix windows file corruption Fix the bug that gopls finds the wrong content when formatting an open URI whose spelling does not match the spelling on disk (i.e. because of case insensitivity). Remove the whole View.filesByBase mechanism: it is problematic as we can't generally know whether or not we want to associate two different spellings of the same file: for the purposes of finding packages we may want to treat Foo.go as foo.go, but we don't want to treat a symlink of foo.go in another directory the same. Instead, use robustio.FileID to de-duplicate content in the cache, and otherwise treat URIs as we receive them. This fixes the formatting corruption, but means that we don't find packages for the corresponding file (because go/packages.Load("file=foo.go") fails). A failing test is added for the latter bug. Also: use a seenFiles map in the view to satisfy the concern of tracking relevant files, with a TODO to delete this problematic map. Along the way, refactor somewhat to separate and normalize the implementations of source.FileSource. For golang/go#57081 Change-Id: I02971a1702f057b644fa18a873790e8f0d98a323 Reviewed-on: https://go-review.googlesource.com/c/tools/+/462819 gopls-CI: kokoro Run-TryBot: Robert Findley TryBot-Result: Gopher Robot Reviewed-by: Alan Donovan --- gopls/internal/lsp/cache/cache.go | 122 ++------------ gopls/internal/lsp/cache/fs_memoized.go | 149 +++++++++++++++++ gopls/internal/lsp/cache/fs_overlay.go | 76 +++++++++ gopls/internal/lsp/cache/mod.go | 5 +- gopls/internal/lsp/cache/session.go | 155 +++++------------- gopls/internal/lsp/cache/snapshot.go | 10 +- gopls/internal/lsp/cache/view.go | 96 +++-------- .../regtest/workspace/misspelling_test.go | 80 +++++++++ internal/robustio/robustio.go | 10 +- internal/robustio/robustio_posix.go | 7 +- internal/robustio/robustio_test.go | 29 +++- internal/robustio/robustio_windows.go | 15 +- 12 files changed, 429 insertions(+), 325 deletions(-) create mode 100644 gopls/internal/lsp/cache/fs_memoized.go create mode 100644 gopls/internal/lsp/cache/fs_overlay.go create mode 100644 gopls/internal/regtest/workspace/misspelling_test.go diff --git a/gopls/internal/lsp/cache/cache.go b/gopls/internal/lsp/cache/cache.go index 9da185ccc57..edf1d0eeae6 100644 --- a/gopls/internal/lsp/cache/cache.go +++ b/gopls/internal/lsp/cache/cache.go @@ -11,20 +11,16 @@ import ( "go/token" "go/types" "html/template" - "os" "reflect" "sort" "strconv" - "sync" "sync/atomic" - "time" "golang.org/x/tools/gopls/internal/lsp/source" - "golang.org/x/tools/gopls/internal/span" "golang.org/x/tools/internal/event" - "golang.org/x/tools/internal/event/tag" "golang.org/x/tools/internal/gocommand" "golang.org/x/tools/internal/memoize" + "golang.org/x/tools/internal/robustio" ) // New Creates a new cache for gopls operation results, using the given file @@ -47,124 +43,32 @@ func New(fset *token.FileSet, store *memoize.Store) *Cache { } c := &Cache{ - id: strconv.FormatInt(index, 10), - fset: fset, - store: store, - fileContent: map[span.URI]*DiskFile{}, + id: strconv.FormatInt(index, 10), + fset: fset, + store: store, + memoizedFS: &memoizedFS{filesByID: map[robustio.FileID][]*DiskFile{}}, } return c } +// A Cache holds caching stores that are bundled together for consistency. +// +// TODO(rfindley): once fset and store need not be bundled together, the Cache +// type can be eliminated. type Cache struct { id string fset *token.FileSet store *memoize.Store - fileMu sync.Mutex - fileContent map[span.URI]*DiskFile -} - -// A DiskFile is a file on the filesystem, or a failure to read one. -// It implements the source.FileHandle interface. -type DiskFile struct { - uri span.URI - modTime time.Time - content []byte - hash source.Hash - err error -} - -func (h *DiskFile) URI() span.URI { return h.uri } - -func (h *DiskFile) FileIdentity() source.FileIdentity { - return source.FileIdentity{ - URI: h.uri, - Hash: h.hash, - } -} - -func (h *DiskFile) Saved() bool { return true } -func (h *DiskFile) Version() int32 { return 0 } - -func (h *DiskFile) Read() ([]byte, error) { - return h.content, h.err -} - -// GetFile stats and (maybe) reads the file, updates the cache, and returns it. -func (c *Cache) GetFile(ctx context.Context, uri span.URI) (source.FileHandle, error) { - fi, statErr := os.Stat(uri.Filename()) - if statErr != nil { - return &DiskFile{ - err: statErr, - uri: uri, - }, nil - } - - // We check if the file has changed by comparing modification times. Notably, - // this is an imperfect heuristic as various systems have low resolution - // mtimes (as much as 1s on WSL or s390x builders), so we only cache - // filehandles if mtime is old enough to be reliable, meaning that we don't - // expect a subsequent write to have the same mtime. - // - // The coarsest mtime precision we've seen in practice is 1s, so consider - // mtime to be unreliable if it is less than 2s old. Capture this before - // doing anything else. - recentlyModified := time.Since(fi.ModTime()) < 2*time.Second - - c.fileMu.Lock() - fh, ok := c.fileContent[uri] - c.fileMu.Unlock() - - if ok && fh.modTime.Equal(fi.ModTime()) { - return fh, nil - } - - fh, err := readFile(ctx, uri, fi) // ~25us - if err != nil { - return nil, err - } - c.fileMu.Lock() - if !recentlyModified { - c.fileContent[uri] = fh - } else { - delete(c.fileContent, uri) - } - c.fileMu.Unlock() - return fh, nil -} - -// ioLimit limits the number of parallel file reads per process. -var ioLimit = make(chan struct{}, 128) - -func readFile(ctx context.Context, uri span.URI, fi os.FileInfo) (*DiskFile, error) { - select { - case ioLimit <- struct{}{}: - case <-ctx.Done(): - return nil, ctx.Err() - } - defer func() { <-ioLimit }() - - ctx, done := event.Start(ctx, "cache.readFile", tag.File.Of(uri.Filename())) - _ = ctx - defer done() - - content, err := os.ReadFile(uri.Filename()) // ~20us - if err != nil { - content = nil // just in case - } - return &DiskFile{ - modTime: fi.ModTime(), - uri: uri, - content: content, - hash: source.HashOf(content), - err: err, - }, nil + *memoizedFS // implements source.FileSource } // NewSession creates a new gopls session with the given cache and options overrides. // // The provided optionsOverrides may be nil. +// +// TODO(rfindley): move this to session.go. func NewSession(ctx context.Context, c *Cache, optionsOverrides func(*source.Options)) *Session { index := atomic.AddInt64(&sessionIndex, 1) options := source.DefaultOptions().Clone() @@ -176,7 +80,7 @@ func NewSession(ctx context.Context, c *Cache, optionsOverrides func(*source.Opt cache: c, gocmdRunner: &gocommand.Runner{}, options: options, - overlays: make(map[span.URI]*Overlay), + overlayFS: newOverlayFS(c), } event.Log(ctx, "New session", KeyCreateSession.Of(s)) return s diff --git a/gopls/internal/lsp/cache/fs_memoized.go b/gopls/internal/lsp/cache/fs_memoized.go new file mode 100644 index 00000000000..9acd872762f --- /dev/null +++ b/gopls/internal/lsp/cache/fs_memoized.go @@ -0,0 +1,149 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cache + +import ( + "context" + "os" + "sync" + "time" + + "golang.org/x/tools/gopls/internal/lsp/source" + "golang.org/x/tools/gopls/internal/span" + "golang.org/x/tools/internal/event" + "golang.org/x/tools/internal/event/tag" + "golang.org/x/tools/internal/robustio" +) + +// A memoizedFS is a file source that memoizes reads, to reduce IO. +type memoizedFS struct { + mu sync.Mutex + + // filesByID maps existing file inodes to the result of a read. + // (The read may have failed, e.g. due to EACCES or a delete between stat+read.) + // Each slice is a non-empty list of aliases: different URIs. + filesByID map[robustio.FileID][]*DiskFile +} + +func newMemoizedFS() *memoizedFS { + return &memoizedFS{filesByID: make(map[robustio.FileID][]*DiskFile)} +} + +// A DiskFile is a file on the filesystem, or a failure to read one. +// It implements the source.FileHandle interface. +type DiskFile struct { + uri span.URI + modTime time.Time + content []byte + hash source.Hash + err error +} + +func (h *DiskFile) URI() span.URI { return h.uri } + +func (h *DiskFile) FileIdentity() source.FileIdentity { + return source.FileIdentity{ + URI: h.uri, + Hash: h.hash, + } +} + +func (h *DiskFile) Saved() bool { return true } +func (h *DiskFile) Version() int32 { return 0 } +func (h *DiskFile) Read() ([]byte, error) { return h.content, h.err } + +// GetFile stats and (maybe) reads the file, updates the cache, and returns it. +func (fs *memoizedFS) GetFile(ctx context.Context, uri span.URI) (source.FileHandle, error) { + id, mtime, err := robustio.GetFileID(uri.Filename()) + if err != nil { + // file does not exist + return &DiskFile{ + err: err, + uri: uri, + }, nil + } + + // We check if the file has changed by comparing modification times. Notably, + // this is an imperfect heuristic as various systems have low resolution + // mtimes (as much as 1s on WSL or s390x builders), so we only cache + // filehandles if mtime is old enough to be reliable, meaning that we don't + // expect a subsequent write to have the same mtime. + // + // The coarsest mtime precision we've seen in practice is 1s, so consider + // mtime to be unreliable if it is less than 2s old. Capture this before + // doing anything else. + recentlyModified := time.Since(mtime) < 2*time.Second + + fs.mu.Lock() + fhs, ok := fs.filesByID[id] + if ok && fhs[0].modTime.Equal(mtime) { + var fh *DiskFile + // We have already seen this file and it has not changed. + for _, h := range fhs { + if h.uri == uri { + fh = h + break + } + } + // No file handle for this exact URI. Create an alias, but share content. + if fh == nil { + newFH := *fhs[0] + newFH.uri = uri + fh = &newFH + fhs = append(fhs, fh) + fs.filesByID[id] = fhs + } + fs.mu.Unlock() + return fh, nil + } + fs.mu.Unlock() + + // Unknown file, or file has changed. Read (or re-read) it. + fh, err := readFile(ctx, uri, mtime) // ~25us + if err != nil { + return nil, err // e.g. cancelled (not: read failed) + } + + fs.mu.Lock() + if !recentlyModified { + fs.filesByID[id] = []*DiskFile{fh} + } else { + delete(fs.filesByID, id) + } + fs.mu.Unlock() + return fh, nil +} + +// ioLimit limits the number of parallel file reads per process. +var ioLimit = make(chan struct{}, 128) + +func readFile(ctx context.Context, uri span.URI, mtime time.Time) (*DiskFile, error) { + select { + case ioLimit <- struct{}{}: + case <-ctx.Done(): + return nil, ctx.Err() + } + defer func() { <-ioLimit }() + + ctx, done := event.Start(ctx, "cache.readFile", tag.File.Of(uri.Filename())) + _ = ctx + defer done() + + // It is possible that a race causes us to read a file with different file + // ID, or whose mtime differs from the given mtime. However, in these cases + // we expect the client to notify of a subsequent file change, and the file + // content should be eventually consistent. + content, err := os.ReadFile(uri.Filename()) // ~20us + if err != nil { + content = nil // just in case + } + return &DiskFile{ + modTime: mtime, + uri: uri, + content: content, + hash: source.HashOf(content), + err: err, + }, nil +} diff --git a/gopls/internal/lsp/cache/fs_overlay.go b/gopls/internal/lsp/cache/fs_overlay.go new file mode 100644 index 00000000000..d277bc6aa18 --- /dev/null +++ b/gopls/internal/lsp/cache/fs_overlay.go @@ -0,0 +1,76 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cache + +import ( + "context" + "sync" + + "golang.org/x/tools/gopls/internal/lsp/source" + "golang.org/x/tools/gopls/internal/span" +) + +// An overlayFS is a source.FileSource that keeps track of overlays on top of a +// delegate FileSource. +type overlayFS struct { + delegate source.FileSource + + mu sync.Mutex + overlays map[span.URI]*Overlay +} + +func newOverlayFS(delegate source.FileSource) *overlayFS { + return &overlayFS{ + delegate: delegate, + overlays: make(map[span.URI]*Overlay), + } +} + +// Overlays returns a new unordered array of overlays. +func (fs *overlayFS) Overlays() []*Overlay { + overlays := make([]*Overlay, 0, len(fs.overlays)) + for _, overlay := range fs.overlays { + overlays = append(overlays, overlay) + } + return overlays +} + +func (fs *overlayFS) GetFile(ctx context.Context, uri span.URI) (source.FileHandle, error) { + fs.mu.Lock() + overlay, ok := fs.overlays[uri] + fs.mu.Unlock() + if ok { + return overlay, nil + } + return fs.delegate.GetFile(ctx, uri) +} + +// An Overlay is a file open in the editor. It may have unsaved edits. +// It implements the source.FileHandle interface. +type Overlay struct { + uri span.URI + content []byte + hash source.Hash + version int32 + kind source.FileKind + + // saved is true if a file matches the state on disk, + // and therefore does not need to be part of the overlay sent to go/packages. + saved bool +} + +func (o *Overlay) URI() span.URI { return o.uri } + +func (o *Overlay) FileIdentity() source.FileIdentity { + return source.FileIdentity{ + URI: o.uri, + Hash: o.hash, + } +} + +func (o *Overlay) Read() ([]byte, error) { return o.content, nil } +func (o *Overlay) Version() int32 { return o.version } +func (o *Overlay) Saved() bool { return o.saved } +func (o *Overlay) Kind() source.FileKind { return o.kind } diff --git a/gopls/internal/lsp/cache/mod.go b/gopls/internal/lsp/cache/mod.go index 50f557ddf5e..9244f698757 100644 --- a/gopls/internal/lsp/cache/mod.go +++ b/gopls/internal/lsp/cache/mod.go @@ -184,11 +184,14 @@ func (s *snapshot) goSum(ctx context.Context, modURI span.URI) []byte { // Get the go.sum file, either from the snapshot or directly from the // cache. Avoid (*snapshot).GetFile here, as we don't want to add // nonexistent file handles to the snapshot if the file does not exist. + // + // TODO(rfindley): but that's not right. Changes to sum files should + // invalidate content, even if it's nonexistent content. sumURI := span.URIFromPath(sumFilename(modURI)) var sumFH source.FileHandle = s.FindFile(sumURI) if sumFH == nil { var err error - sumFH, err = s.view.cache.GetFile(ctx, sumURI) + sumFH, err = s.view.fs.GetFile(ctx, sumURI) if err != nil { return nil } diff --git a/gopls/internal/lsp/cache/session.go b/gopls/internal/lsp/cache/session.go index e418a0046fe..5df540e4f61 100644 --- a/gopls/internal/lsp/cache/session.go +++ b/gopls/internal/lsp/cache/session.go @@ -38,38 +38,9 @@ type Session struct { views []*View viewMap map[span.URI]*View // map of URI->best view - overlayMu sync.Mutex - overlays map[span.URI]*Overlay + *overlayFS } -// An Overlay is a file open in the editor. -// It implements the source.FileSource interface. -type Overlay struct { - uri span.URI - text []byte - hash source.Hash - version int32 - kind source.FileKind - - // saved is true if a file matches the state on disk, - // and therefore does not need to be part of the overlay sent to go/packages. - saved bool -} - -func (o *Overlay) URI() span.URI { return o.uri } - -func (o *Overlay) FileIdentity() source.FileIdentity { - return source.FileIdentity{ - URI: o.uri, - Hash: o.hash, - } -} - -func (o *Overlay) Read() ([]byte, error) { return o.text, nil } -func (o *Overlay) Version() int32 { return o.version } -func (o *Overlay) Saved() bool { return o.saved } -func (o *Overlay) Kind() source.FileKind { return o.kind } - // ID returns the unique identifier for this session on this server. func (s *Session) ID() string { return s.id } func (s *Session) String() string { return s.id } @@ -150,7 +121,7 @@ func (s *Session) createView(ctx context.Context, name string, folder span.URI, v := &View{ id: strconv.FormatInt(index, 10), - cache: s.cache, + fset: s.cache.fset, gocmdRunner: s.gocmdRunner, initialWorkspaceLoad: make(chan struct{}), initializationSema: make(chan struct{}, 1), @@ -160,8 +131,7 @@ func (s *Session) createView(ctx context.Context, name string, folder span.URI, folder: folder, moduleUpgrades: map[span.URI]map[string]string{}, vulns: map[span.URI]*govulncheck.Result{}, - filesByURI: make(map[span.URI]span.URI), - filesByBase: make(map[string][]canonicalURI), + fs: s.overlayFS, workspaceInformation: info, } v.importsState = &importsState{ @@ -422,8 +392,7 @@ func (s *Session) DidModifyFiles(ctx context.Context, changes []source.FileModif // spurious diagnostics). However, any such view would immediately be // invalidated here, so it is possible that we could update overlays before // acquiring viewMu. - overlays, err := s.updateOverlays(ctx, changes) - if err != nil { + if err := s.updateOverlays(ctx, changes); err != nil { return nil, nil, err } @@ -517,34 +486,25 @@ func (s *Session) DidModifyFiles(ctx context.Context, changes []source.FileModif // Apply the changes to all affected views. for _, view := range changedViews { - // Make sure that the file is added to the view's knownFiles set. - view.canonicalURI(c.URI, true) // ignore result + // Make sure that the file is added to the view's seenFiles set. + view.markKnown(c.URI) if _, ok := views[view]; !ok { views[view] = make(map[span.URI]*fileChange) } - if fh, ok := overlays[c.URI]; ok { - views[view][c.URI] = &fileChange{ - content: fh.text, - exists: true, - fileHandle: fh, - isUnchanged: isUnchanged, - } - } else { - fsFile, err := s.cache.GetFile(ctx, c.URI) - if err != nil { - return nil, nil, err - } - content, err := fsFile.Read() - if err != nil { - // Ignore the error: the file may be deleted. - content = nil - } - views[view][c.URI] = &fileChange{ - content: content, - exists: err == nil, - fileHandle: fsFile, - isUnchanged: isUnchanged, - } + fh, err := s.GetFile(ctx, c.URI) + if err != nil { + return nil, nil, err + } + content, err := fh.Read() + if err != nil { + // Ignore the error: the file may be deleted. + content = nil + } + views[view][c.URI] = &fileChange{ + content: content, + exists: err == nil, + fileHandle: fh, + isUnchanged: isUnchanged, } } } @@ -658,9 +618,10 @@ func knownFilesInDir(ctx context.Context, snapshots []*snapshot, dir span.URI) m } // Precondition: caller holds s.viewMu lock. -func (s *Session) updateOverlays(ctx context.Context, changes []source.FileModification) (map[span.URI]*Overlay, error) { - s.overlayMu.Lock() - defer s.overlayMu.Unlock() +// TODO(rfindley): move this to fs_overlay.go. +func (fs *overlayFS) updateOverlays(ctx context.Context, changes []source.FileModification) error { + fs.mu.Lock() + defer fs.mu.Unlock() for _, c := range changes { // Don't update overlays for metadata invalidations. @@ -668,7 +629,7 @@ func (s *Session) updateOverlays(ctx context.Context, changes []source.FileModif continue } - o, ok := s.overlays[c.URI] + o, ok := fs.overlays[c.URI] // If the file is not opened in an overlay and the change is on disk, // there's no need to update an overlay. If there is an overlay, we @@ -684,14 +645,14 @@ func (s *Session) updateOverlays(ctx context.Context, changes []source.FileModif kind = source.FileKindForLang(c.LanguageID) default: if !ok { - return nil, fmt.Errorf("updateOverlays: modifying unopened overlay %v", c.URI) + return fmt.Errorf("updateOverlays: modifying unopened overlay %v", c.URI) } kind = o.kind } // Closing a file just deletes its overlay. if c.Action == source.Close { - delete(s.overlays, c.URI) + delete(fs.overlays, c.URI) continue } @@ -701,9 +662,9 @@ func (s *Session) updateOverlays(ctx context.Context, changes []source.FileModif text := c.Text if text == nil && (c.Action == source.Save || c.OnDisk) { if !ok { - return nil, fmt.Errorf("no known content for overlay for %s", c.Action) + return fmt.Errorf("no known content for overlay for %s", c.Action) } - text = o.text + text = o.content } // On-disk changes don't come with versions. version := c.Version @@ -718,16 +679,16 @@ func (s *Session) updateOverlays(ctx context.Context, changes []source.FileModif case source.Save: // Make sure the version and content (if present) is the same. if false && o.version != version { // Client no longer sends the version - return nil, fmt.Errorf("updateOverlays: saving %s at version %v, currently at %v", c.URI, c.Version, o.version) + return fmt.Errorf("updateOverlays: saving %s at version %v, currently at %v", c.URI, c.Version, o.version) } if c.Text != nil && o.hash != hash { - return nil, fmt.Errorf("updateOverlays: overlay %s changed on save", c.URI) + return fmt.Errorf("updateOverlays: overlay %s changed on save", c.URI) } sameContentOnDisk = true default: - fh, err := s.cache.GetFile(ctx, c.URI) + fh, err := fs.delegate.GetFile(ctx, c.URI) if err != nil { - return nil, err + return err } _, readErr := fh.Read() sameContentOnDisk = (readErr == nil && fh.FileIdentity().Hash == hash) @@ -735,59 +696,19 @@ func (s *Session) updateOverlays(ctx context.Context, changes []source.FileModif o = &Overlay{ uri: c.URI, version: version, - text: text, + content: text, kind: kind, hash: hash, saved: sameContentOnDisk, } - // When opening files, ensure that we actually have a well-defined view and file kind. - if c.Action == source.Open { - view, err := s.viewOfLocked(o.uri) - if err != nil { - return nil, fmt.Errorf("updateOverlays: finding view for %s: %v", o.uri, err) - } - if kind := view.FileKind(o); kind == source.UnknownKind { - return nil, fmt.Errorf("updateOverlays: unknown file kind for %s", o.uri) - } - } + // NOTE: previous versions of this code checked here that the overlay had a + // view and file kind (but we don't know why). - s.overlays[c.URI] = o + fs.overlays[c.URI] = o } - // Get the overlays for each change while the session's overlay map is - // locked. - overlays := make(map[span.URI]*Overlay) - for _, c := range changes { - if o, ok := s.overlays[c.URI]; ok { - overlays[c.URI] = o - } - } - return overlays, nil -} - -// GetFile returns a handle for the specified file. -func (s *Session) GetFile(ctx context.Context, uri span.URI) (source.FileHandle, error) { - s.overlayMu.Lock() - overlay, ok := s.overlays[uri] - s.overlayMu.Unlock() - if ok { - return overlay, nil - } - - return s.cache.GetFile(ctx, uri) -} - -// Overlays returns a slice of file overlays for the session. -func (s *Session) Overlays() []*Overlay { - s.overlayMu.Lock() - defer s.overlayMu.Unlock() - - overlays := make([]*Overlay, 0, len(s.overlays)) - for _, overlay := range s.overlays { - overlays = append(overlays, overlay) - } - return overlays + return nil } // FileWatchingGlobPatterns returns glob patterns to watch every directory diff --git a/gopls/internal/lsp/cache/snapshot.go b/gopls/internal/lsp/cache/snapshot.go index 995fba34ca6..398240aa83c 100644 --- a/gopls/internal/lsp/cache/snapshot.go +++ b/gopls/internal/lsp/cache/snapshot.go @@ -262,7 +262,7 @@ func (s *snapshot) BackgroundContext() context.Context { } func (s *snapshot) FileSet() *token.FileSet { - return s.view.cache.fset + return s.view.fset } func (s *snapshot) ModFiles() []span.URI { @@ -620,7 +620,7 @@ func (s *snapshot) buildOverlay() map[string][]byte { return } // TODO(rstambler): Make sure not to send overlays outside of the current view. - overlays[uri.Filename()] = overlay.text + overlays[uri.Filename()] = overlay.content }) return overlays } @@ -1205,7 +1205,7 @@ func (s *snapshot) isWorkspacePackage(id PackageID) bool { } func (s *snapshot) FindFile(uri span.URI) source.FileHandle { - uri, _ = s.view.canonicalURI(uri, true) + s.view.markKnown(uri) s.mu.Lock() defer s.mu.Unlock() @@ -1220,7 +1220,7 @@ func (s *snapshot) FindFile(uri span.URI) source.FileHandle { // GetFile succeeds even if the file does not exist. A non-nil error return // indicates some type of internal error, for example if ctx is cancelled. func (s *snapshot) GetFile(ctx context.Context, uri span.URI) (source.FileHandle, error) { - uri, _ = s.view.canonicalURI(uri, true) + s.view.markKnown(uri) s.mu.Lock() defer s.mu.Unlock() @@ -1229,7 +1229,7 @@ func (s *snapshot) GetFile(ctx context.Context, uri span.URI) (source.FileHandle return fh, nil } - fh, err := s.view.cache.GetFile(ctx, uri) // read the file + fh, err := s.view.fs.GetFile(ctx, uri) // read the file if err != nil { return nil, err } diff --git a/gopls/internal/lsp/cache/view.go b/gopls/internal/lsp/cache/view.go index d8eb82e94ca..51660b3ac87 100644 --- a/gopls/internal/lsp/cache/view.go +++ b/gopls/internal/lsp/cache/view.go @@ -11,6 +11,7 @@ import ( "encoding/json" "errors" "fmt" + "go/token" "io/ioutil" "os" "path" @@ -38,7 +39,7 @@ import ( type View struct { id string - cache *Cache // shared cache + fset *token.FileSet // shared FileSet gocmdRunner *gocommand.Runner // limits go command concurrency // baseCtx is the context handed to NewView. This is the parent of all @@ -68,14 +69,14 @@ type View struct { vulnsMu sync.Mutex vulns map[span.URI]*govulncheck.Result - // filesByURI maps URIs to the canonical URI for the file it denotes. - // We also keep a set of candidates for a given basename - // to reduce the set of pairs that need to be tested for sameness. - // - // TODO(rfindley): move this file tracking to the session. - filesByMu sync.Mutex - filesByURI map[span.URI]span.URI // key is noncanonical URI (alias) - filesByBase map[string][]canonicalURI // key is basename + // fs is the file source used to populate this view. + fs source.FileSource + + // seenFiles tracks files that the view has accessed. + // TODO(golang/go#57558): this notion is fundamentally problematic, and + // should be removed. + knownFilesMu sync.Mutex + knownFiles map[span.URI]bool // initCancelFirstAttempt can be used to terminate the view's first // attempt at initialization. @@ -554,73 +555,20 @@ func (v *View) relevantChange(c source.FileModification) bool { return v.contains(c.URI) } -// knownFile reports whether the specified valid URI (or an alias) is known to the view. -func (v *View) knownFile(uri span.URI) bool { - _, known := v.canonicalURI(uri, false) - return known -} - -// TODO(adonovan): opt: eliminate 'filename' optimization. I doubt the -// cost of allocation is significant relative to the -// stat/open/fstat/close operations that follow on Windows. -type canonicalURI struct { - uri span.URI - filename string // = uri.Filename(), an optimization (on Windows) -} - -// canonicalURI returns the canonical URI that denotes the same file -// as uri, which may differ due to case insensitivity, unclean paths, -// soft or hard links, and so on. If no previous alias was found, or -// the file is missing, insert determines whether to make uri the -// canonical representative of the file or to return false. -// -// The cache grows indefinitely without invalidation: file system -// operations may cause two URIs that used to denote the same file to -// no longer to do so. Also, the basename cache grows without bound. -// TODO(adonovan): fix both bugs. -func (v *View) canonicalURI(uri span.URI, insert bool) (span.URI, bool) { - v.filesByMu.Lock() - defer v.filesByMu.Unlock() - - // Have we seen this exact URI before? - if canonical, ok := v.filesByURI[uri]; ok { - return canonical, true - } - - // Inspect all candidates with the same lowercase basename. - // This heuristic is easily defeated by symbolic links to files. - // Files with some basenames (e.g. doc.go) are very numerous. - // - // The set of candidates grows without bound, and incurs a - // linear sequence of SameFile queries to the file system. - // - // It is tempting to fetch the device/inode pair that - // uniquely identifies a file once, and then compare those - // pairs, but that would cause us to cache stale file system - // state (in addition to the filesByURI staleness). - filename := uri.Filename() - basename := strings.ToLower(filepath.Base(filename)) - if candidates := v.filesByBase[basename]; candidates != nil { - if pathStat, _ := os.Stat(filename); pathStat != nil { - for _, c := range candidates { - if cStat, _ := os.Stat(c.filename); cStat != nil { - // On Windows, SameFile is more expensive as it must - // open the file and use the equivalent of fstat(2). - if os.SameFile(pathStat, cStat) { - v.filesByURI[uri] = c.uri - return c.uri, true - } - } - } - } +func (v *View) markKnown(uri span.URI) { + v.knownFilesMu.Lock() + defer v.knownFilesMu.Unlock() + if v.knownFiles == nil { + v.knownFiles = make(map[span.URI]bool) } + v.knownFiles[uri] = true +} - // No candidates, stat failed, or no candidate matched. - if insert { - v.filesByURI[uri] = uri - v.filesByBase[basename] = append(v.filesByBase[basename], canonicalURI{uri, filename}) - } - return uri, insert +// knownFile reports whether the specified valid URI (or an alias) is known to the view. +func (v *View) knownFile(uri span.URI) bool { + v.knownFilesMu.Lock() + defer v.knownFilesMu.Unlock() + return v.knownFiles[uri] } // shutdown releases resources associated with the view, and waits for ongoing diff --git a/gopls/internal/regtest/workspace/misspelling_test.go b/gopls/internal/regtest/workspace/misspelling_test.go new file mode 100644 index 00000000000..0419a116344 --- /dev/null +++ b/gopls/internal/regtest/workspace/misspelling_test.go @@ -0,0 +1,80 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package workspace + +import ( + "runtime" + "testing" + + . "golang.org/x/tools/gopls/internal/lsp/regtest" + "golang.org/x/tools/gopls/internal/lsp/tests/compare" +) + +// Test for golang/go#57081. +func TestFormattingMisspelledURI(t *testing.T) { + if runtime.GOOS != "windows" && runtime.GOOS != "darwin" { + t.Skip("golang/go#57081 only reproduces on case-insensitive filesystems.") + } + const files = ` +-- go.mod -- +module mod.test + +go 1.19 +-- foo.go -- +package foo + +const C = 2 // extra space is intentional +` + + Run(t, files, func(t *testing.T, env *Env) { + env.OpenFile("Foo.go") + env.FormatBuffer("Foo.go") + want := env.BufferText("Foo.go") + + if want == "" { + t.Fatalf("Foo.go is empty") + } + + // In golang/go#57081, we observed that if overlay cases don't match, gopls + // will find (and format) the on-disk contents rather than the overlay, + // resulting in invalid edits. + // + // Verify that this doesn't happen, by confirming that formatting is + // idempotent. + env.FormatBuffer("Foo.go") + got := env.BufferText("Foo.go") + if diff := compare.Text(want, got); diff != "" { + t.Errorf("invalid content after second formatting:\n%s", diff) + } + }) +} + +// Test that we can find packages for open files with different spelling on +// case-insensitive file systems. +func TestPackageForMisspelledURI(t *testing.T) { + t.Skip("golang/go#57081: this test fails because the Go command does not load Foo.go correctly") + if runtime.GOOS != "windows" && runtime.GOOS != "darwin" { + t.Skip("golang/go#57081 only reproduces on case-insensitive filesystems.") + } + const files = ` +-- go.mod -- +module mod.test + +go 1.19 +-- foo.go -- +package foo + +const C = D +-- bar.go -- +package foo + +const D = 2 +` + + Run(t, files, func(t *testing.T, env *Env) { + env.OpenFile("Foo.go") + env.AfterChange(NoDiagnostics()) + }) +} diff --git a/internal/robustio/robustio.go b/internal/robustio/robustio.go index 3241f1d3a7d..0a559fc9b80 100644 --- a/internal/robustio/robustio.go +++ b/internal/robustio/robustio.go @@ -14,6 +14,8 @@ // but substantially reduce their rate of occurrence in practice. package robustio +import "time" + // Rename is like os.Rename, but on Windows retries errors that may occur if the // file is concurrently read or overwritten. // @@ -54,12 +56,14 @@ func IsEphemeralError(err error) bool { // A FileID uniquely identifies a file in the file system. // -// If GetFileID(name1) == GetFileID(name2), the two file names denote the same file. +// If GetFileID(name1) returns the same ID as GetFileID(name2), the two file +// names denote the same file. // A FileID is comparable, and thus suitable for use as a map key. type FileID struct { device, inode uint64 } -// GetFileID returns the file system's identifier for the file. +// GetFileID returns the file system's identifier for the file, and its +// modification time. // Like os.Stat, it reads through symbolic links. -func GetFileID(filename string) (FileID, error) { return getFileID(filename) } +func GetFileID(filename string) (FileID, time.Time, error) { return getFileID(filename) } diff --git a/internal/robustio/robustio_posix.go b/internal/robustio/robustio_posix.go index 175a6b49ba0..8aa13d02786 100644 --- a/internal/robustio/robustio_posix.go +++ b/internal/robustio/robustio_posix.go @@ -12,16 +12,17 @@ package robustio import ( "os" "syscall" + "time" ) -func getFileID(filename string) (FileID, error) { +func getFileID(filename string) (FileID, time.Time, error) { fi, err := os.Stat(filename) if err != nil { - return FileID{}, err + return FileID{}, time.Time{}, err } stat := fi.Sys().(*syscall.Stat_t) return FileID{ device: uint64(stat.Dev), // (int32 on darwin, uint64 on linux) inode: stat.Ino, - }, nil + }, fi.ModTime(), nil } diff --git a/internal/robustio/robustio_test.go b/internal/robustio/robustio_test.go index af53282200a..10244e21d69 100644 --- a/internal/robustio/robustio_test.go +++ b/internal/robustio/robustio_test.go @@ -9,14 +9,15 @@ import ( "path/filepath" "runtime" "testing" + "time" "golang.org/x/tools/internal/robustio" ) -func TestFileID(t *testing.T) { +func TestFileInfo(t *testing.T) { // A nonexistent file has no ID. nonexistent := filepath.Join(t.TempDir(), "nonexistent") - if _, err := robustio.GetFileID(nonexistent); err == nil { + if _, _, err := robustio.GetFileID(nonexistent); err == nil { t.Fatalf("GetFileID(nonexistent) succeeded unexpectedly") } @@ -25,22 +26,28 @@ func TestFileID(t *testing.T) { if err := os.WriteFile(real, nil, 0644); err != nil { t.Fatalf("can't create regular file: %v", err) } - realID, err := robustio.GetFileID(real) + realID, realMtime, err := robustio.GetFileID(real) if err != nil { t.Fatalf("can't get ID of regular file: %v", err) } + // Sleep so that we get a new mtime for subsequent writes. + time.Sleep(2 * time.Second) + // A second regular file has a different ID. real2 := filepath.Join(t.TempDir(), "real2") if err := os.WriteFile(real2, nil, 0644); err != nil { t.Fatalf("can't create second regular file: %v", err) } - real2ID, err := robustio.GetFileID(real2) + real2ID, real2Mtime, err := robustio.GetFileID(real2) if err != nil { t.Fatalf("can't get ID of second regular file: %v", err) } if realID == real2ID { - t.Errorf("realID %+v != real2ID %+v", realID, real2ID) + t.Errorf("realID %+v == real2ID %+v", realID, real2ID) + } + if realMtime.Equal(real2Mtime) { + t.Errorf("realMtime %v == real2Mtime %v", realMtime, real2Mtime) } // A symbolic link has the same ID as its target. @@ -49,13 +56,16 @@ func TestFileID(t *testing.T) { if err := os.Symlink(real, symlink); err != nil { t.Fatalf("can't create symbolic link: %v", err) } - symlinkID, err := robustio.GetFileID(symlink) + symlinkID, symlinkMtime, err := robustio.GetFileID(symlink) if err != nil { t.Fatalf("can't get ID of symbolic link: %v", err) } if realID != symlinkID { t.Errorf("realID %+v != symlinkID %+v", realID, symlinkID) } + if !realMtime.Equal(symlinkMtime) { + t.Errorf("realMtime %v != symlinkMtime %v", realMtime, symlinkMtime) + } } // Two hard-linked files have the same ID. @@ -64,12 +74,15 @@ func TestFileID(t *testing.T) { if err := os.Link(real, hardlink); err != nil { t.Fatal(err) } - hardlinkID, err := robustio.GetFileID(hardlink) + hardlinkID, hardlinkMtime, err := robustio.GetFileID(hardlink) if err != nil { t.Fatalf("can't get ID of hard link: %v", err) } - if hardlinkID != realID { + if realID != hardlinkID { t.Errorf("realID %+v != hardlinkID %+v", realID, hardlinkID) } + if !realMtime.Equal(hardlinkMtime) { + t.Errorf("realMtime %v != hardlinkMtime %v", realMtime, hardlinkMtime) + } } } diff --git a/internal/robustio/robustio_windows.go b/internal/robustio/robustio_windows.go index 98189560bae..616c32883d6 100644 --- a/internal/robustio/robustio_windows.go +++ b/internal/robustio/robustio_windows.go @@ -7,6 +7,7 @@ package robustio import ( "errors" "syscall" + "time" ) const errFileNotFound = syscall.ERROR_FILE_NOT_FOUND @@ -25,22 +26,26 @@ func isEphemeralError(err error) bool { return false } -func getFileID(filename string) (FileID, error) { +// Note: it may be convenient to have this helper return fs.FileInfo, but +// implementing this is actually quite involved on Windows. Since we only +// currently use mtime, keep it simple. +func getFileID(filename string) (FileID, time.Time, error) { filename16, err := syscall.UTF16PtrFromString(filename) if err != nil { - return FileID{}, err + return FileID{}, time.Time{}, err } h, err := syscall.CreateFile(filename16, 0, 0, nil, syscall.OPEN_EXISTING, uint32(syscall.FILE_FLAG_BACKUP_SEMANTICS), 0) if err != nil { - return FileID{}, err + return FileID{}, time.Time{}, err } defer syscall.CloseHandle(h) var i syscall.ByHandleFileInformation if err := syscall.GetFileInformationByHandle(h, &i); err != nil { - return FileID{}, err + return FileID{}, time.Time{}, err } + mtime := time.Unix(0, i.LastWriteTime.Nanoseconds()) return FileID{ device: uint64(i.VolumeSerialNumber), inode: uint64(i.FileIndexHigh)<<32 | uint64(i.FileIndexLow), - }, nil + }, mtime, nil }