Skip to content

Commit

Permalink
cache/filecache: Add a cache prune func
Browse files Browse the repository at this point in the history
Fixes #5439
  • Loading branch information
bep committed Nov 14, 2018
1 parent 3350266 commit 3c29c5a
Show file tree
Hide file tree
Showing 6 changed files with 232 additions and 95 deletions.
48 changes: 39 additions & 9 deletions cache/filecache/filecache.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"io/ioutil"
"path/filepath"
"strings"
"sync"
"time"

"github.com/gohugoio/hugo/common/hugio"
Expand All @@ -44,7 +45,30 @@ type Cache struct {
// 0 is effectively turning this cache off.
maxAge time.Duration

nlocker *locker.Locker
nlocker *lockTracker
}

type lockTracker struct {
seenMu sync.RWMutex
seen map[string]struct{}

*locker.Locker
}

// Lock tracks the ids in use. We use this information to do garbage collection
// after a Hugo build.
func (l *lockTracker) Lock(id string) {
l.seenMu.RLock()
if _, seen := l.seen[id]; !seen {
l.seenMu.RUnlock()
l.seenMu.Lock()
l.seen[id] = struct{}{}
l.seenMu.Unlock()
} else {
l.seenMu.RUnlock()
}

l.Locker.Lock(id)
}

// ItemInfo contains info about a cached file.
Expand All @@ -57,7 +81,7 @@ type ItemInfo struct {
func NewCache(fs afero.Fs, maxAge time.Duration) *Cache {
return &Cache{
Fs: fs,
nlocker: locker.NewLocker(),
nlocker: &lockTracker{Locker: locker.NewLocker(), seen: make(map[string]struct{})},
maxAge: maxAge,
}
}
Expand Down Expand Up @@ -232,7 +256,7 @@ func (c *Cache) getOrRemove(id string) hugio.ReadSeekCloser {
return nil
}

if time.Now().Sub(fi.ModTime()) > c.maxAge {
if c.isExpired(fi.ModTime()) {
c.Fs.Remove(id)
return nil
}
Expand All @@ -247,20 +271,26 @@ func (c *Cache) getOrRemove(id string) hugio.ReadSeekCloser {
return f
}

func (c *Cache) isExpired(modTime time.Time) bool {
return c.maxAge >= 0 && time.Now().Sub(modTime) > c.maxAge
}

// For testing
func (c *Cache) getString(id string) string {
id = cleanID(id)

c.nlocker.Lock(id)
defer c.nlocker.Unlock(id)

if r := c.getOrRemove(id); r != nil {
defer r.Close()
b, _ := ioutil.ReadAll(r)
return string(b)
f, err := c.Fs.Open(id)

if err != nil {
return ""
}
defer f.Close()

return ""
b, _ := ioutil.ReadAll(f)
return string(b)

}

Expand Down Expand Up @@ -309,5 +339,5 @@ func NewCachesFromPaths(p *paths.Paths) (Caches, error) {
}

func cleanID(name string) string {
return filepath.Clean(name)
return strings.TrimPrefix(filepath.Clean(name), helpers.FilePathSeparator)
}
80 changes: 80 additions & 0 deletions cache/filecache/filecache_pruner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package filecache

import (
"io"
"os"

"github.com/pkg/errors"
"github.com/spf13/afero"
)

// Prune removes expired and unused items from this cache.
// The last one requires a full build so the cache usage can be tracked.
// Note that we operate directly on the filesystem here, so this is not
// thread safe.
func (c Caches) Prune() (int, error) {
counter := 0
for k, cache := range c {
err := afero.Walk(cache.Fs, "", func(name string, info os.FileInfo, err error) error {
if info == nil {
return nil
}

name = cleanID(name)

if info.IsDir() {
f, err := cache.Fs.Open(name)
if err != nil {
// This cache dir may not exist.
return nil
}
defer f.Close()
_, err = f.Readdirnames(1)
if err == io.EOF {
// Empty dir.
return cache.Fs.Remove(name)
}

return nil
}

shouldRemove := cache.isExpired(info.ModTime())

if !shouldRemove && len(cache.nlocker.seen) > 0 {
// Remove it if it's not been touched/used in the last build.
_, seen := cache.nlocker.seen[name]
shouldRemove = !seen
}

if shouldRemove {
err := cache.Fs.Remove(name)
if err == nil {
counter++
}
return err
}

return nil
})

if err != nil {
return counter, errors.Wrapf(err, "failed to prune cache %q", k)
}

}

return counter, nil
}
100 changes: 100 additions & 0 deletions cache/filecache/filecache_pruner_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package filecache

import (
"fmt"
"testing"
"time"

"github.com/gohugoio/hugo/config"
"github.com/gohugoio/hugo/hugofs"
"github.com/gohugoio/hugo/hugolib/paths"

"github.com/stretchr/testify/require"
)

func TestPrune(t *testing.T) {
t.Parallel()

assert := require.New(t)

configStr := `
resourceDir = "myresources"
[caches]
[caches.getjson]
maxAge = "200ms"
dir = "/cache/c"
`

cfg, err := config.FromConfigString(configStr, "toml")
assert.NoError(err)
fs := hugofs.NewMem(cfg)
p, err := paths.New(fs, cfg)
assert.NoError(err)

caches, err := NewCachesFromPaths(p)
assert.NoError(err)

jsonCache := caches.GetJSONCache()
for i := 0; i < 10; i++ {
id := fmt.Sprintf("i%d", i)
jsonCache.GetOrCreateBytes(id, func() ([]byte, error) {
return []byte("abc"), nil
})
if i == 4 {
// This will expire the first 5
time.Sleep(201 * time.Millisecond)
}
}

count, err := caches.Prune()
assert.NoError(err)
assert.Equal(5, count)

for i := 0; i < 10; i++ {
id := fmt.Sprintf("i%d", i)
v := jsonCache.getString(id)
if i < 5 {
assert.Equal("", v, id)
} else {
assert.Equal("abc", v, id)
}
}

caches, err = NewCachesFromPaths(p)
assert.NoError(err)
jsonCache = caches.GetJSONCache()
// Touch one and then prune.
jsonCache.GetOrCreateBytes("i5", func() ([]byte, error) {
return []byte("abc"), nil
})

count, err = caches.Prune()
assert.NoError(err)
assert.Equal(4, count)

// Now only the i5 should be left.
for i := 0; i < 10; i++ {
id := fmt.Sprintf("i%d", i)
v := jsonCache.getString(id)
if i != 5 {
assert.Equal("", v, id)
} else {
assert.Equal("abc", v, id)
}
}

}
6 changes: 6 additions & 0 deletions cache/filecache/filecache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,3 +209,9 @@ dir = "/cache/c"
}
wg.Wait()
}

func TestCleanID(t *testing.T) {
assert := require.New(t)
assert.Equal(filepath.FromSlash("a/b/c.txt"), cleanID(filepath.FromSlash("/a/b//c.txt")))
assert.Equal(filepath.FromSlash("a/b/c.txt"), cleanID(filepath.FromSlash("a/b//c.txt")))
}
88 changes: 3 additions & 85 deletions hugolib/prune_resources.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2017-present The Hugo Authors. All rights reserved.
// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -13,89 +13,7 @@

package hugolib

import (
"io"
"os"
"strings"

"github.com/gohugoio/hugo/helpers"

"github.com/spf13/afero"
)

// GC requires a build first.
// GC requires a build first and must run on it's own. It is not thread safe.
func (h *HugoSites) GC() (int, error) {
s := h.Sites[0]
assetsCacheFs := h.Deps.FileCaches.AssetsCache().Fs
imageCacheFs := h.Deps.FileCaches.ImageCache().Fs

isImageInUse := func(name string) bool {
for _, site := range h.Sites {
if site.ResourceSpec.IsInImageCache(name) {
return true
}
}

return false
}

isAssetInUse := func(name string) bool {
// These assets are stored in tuplets with an added extension to the key.
key := strings.TrimSuffix(name, helpers.Ext(name))
for _, site := range h.Sites {
if site.ResourceSpec.ResourceCache.Contains(key) {
return true
}
}

return false
}

walker := func(fs afero.Fs, dirname string, inUse func(filename string) bool) (int, error) {
counter := 0
err := afero.Walk(fs, dirname, func(path string, info os.FileInfo, err error) error {
if info == nil {
return nil
}

if info.IsDir() {
f, err := fs.Open(path)
if err != nil {
return nil
}
defer f.Close()
_, err = f.Readdirnames(1)
if err == io.EOF {
// Empty dir.
s.Fs.Source.Remove(path)
}

return nil
}

inUse := inUse(path)
if !inUse {
err := fs.Remove(path)
if err != nil && !os.IsNotExist(err) {
s.Log.ERROR.Printf("Failed to remove %q: %s", path, err)
} else {
counter++
}
}
return nil
})

return counter, err
}

imageCounter, err1 := walker(imageCacheFs, "", isImageInUse)
assetsCounter, err2 := walker(assetsCacheFs, "", isAssetInUse)
totalCount := imageCounter + assetsCounter

if err1 != nil {
return totalCount, err1
}

return totalCount, err2

return h.Deps.FileCaches.Prune()
}
5 changes: 4 additions & 1 deletion tpl/data/resources_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,10 @@ func newDeps(cfg config.Provider) *deps.Deps {
logger := loggers.NewErrorLogger()
p, _ := paths.New(fs, cfg)

fileCaches, _ := filecache.NewCachesFromPaths(p)
fileCaches, err := filecache.NewCachesFromPaths(p)
if err != nil {
panic(err)
}

return &deps.Deps{
Cfg: cfg,
Expand Down

0 comments on commit 3c29c5a

Please sign in to comment.