From 1bd651fae124faace7e5f810a30ef4ae247bf048 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 4 Sep 2016 20:27:53 +0200 Subject: [PATCH] Add cache for tree blobs --- src/cmds/restic/global.go | 8 ++ src/restic/cache.go | 6 ++ src/restic/cache/cache.go | 123 ++++++++++++++++++++++++++-- src/restic/cache/cache_test.go | 70 +++++++++++++++- src/restic/cache/testing.go | 3 +- src/restic/mock/repository.go | 7 ++ src/restic/repository.go | 2 + src/restic/repository/repository.go | 48 ++++++++++- 8 files changed, 259 insertions(+), 8 deletions(-) diff --git a/src/cmds/restic/global.go b/src/cmds/restic/global.go index ee4255f7b..afb3bcf5f 100644 --- a/src/cmds/restic/global.go +++ b/src/cmds/restic/global.go @@ -13,6 +13,7 @@ import ( "restic/backend/rest" "restic/backend/s3" "restic/backend/sftp" + "restic/cache" "restic/debug" "restic/location" "restic/repository" @@ -266,6 +267,13 @@ func (o GlobalOptions) OpenRepository() (*repository.Repository, error) { return nil, errors.Fatalf("unable to open repo: %v", err) } + cache, err := cache.New(o.CacheDir, s.Config().ID) + if err != nil { + return nil, err + } + + s.UseCache(cache) + return s, nil } diff --git a/src/restic/cache.go b/src/restic/cache.go index 8e2ac7dca..34f6dabef 100644 --- a/src/restic/cache.go +++ b/src/restic/cache.go @@ -6,4 +6,10 @@ type Cache interface { PutBlob(BlobHandle, []byte) error DeleteBlob(BlobHandle) error HasBlob(BlobHandle) bool + UpdateBlobs(idx BlobIndex) error +} + +// BlobIndex returns information about blobs stored in a repo. +type BlobIndex interface { + Has(id ID, t BlobType) bool } diff --git a/src/restic/cache/cache.go b/src/restic/cache/cache.go index 355c757a6..b9d2b0e43 100644 --- a/src/restic/cache/cache.go +++ b/src/restic/cache/cache.go @@ -7,7 +7,9 @@ import ( "os" "path/filepath" "restic" + "restic/debug" "restic/errors" + "restic/fs" ) // Cache is a local cache implementation. @@ -18,14 +20,68 @@ type Cache struct { // make sure that Cache implement restic.Cache var _ restic.Cache = &Cache{} -// NewCache creates a new cache in the given directory. If it is the empty +// getXDGCacheDir returns the cache directory according to XDG basedir spec, see +// http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html +func getXDGCacheDir() (string, error) { + xdgcache := os.Getenv("XDG_CACHE_HOME") + home := os.Getenv("HOME") + + if xdgcache == "" && home == "" { + return "", errors.New("unable to locate cache directory (XDG_CACHE_HOME and HOME unset)") + } + + cachedir := "" + if xdgcache != "" { + cachedir = filepath.Join(xdgcache, "restic") + } else if home != "" { + cachedir = filepath.Join(home, ".cache", "restic") + } + + fi, err := fs.Stat(cachedir) + if os.IsNotExist(errors.Cause(err)) { + err = fs.MkdirAll(cachedir, 0700) + if err != nil { + return "", errors.Wrap(err, "MkdirAll") + } + + fi, err = fs.Stat(cachedir) + debug.Log("getCacheDir", "create cache dir %v", cachedir) + } + + if err != nil { + return "", errors.Wrap(err, "Stat") + } + + if !fi.IsDir() { + return "", errors.Errorf("cache dir %v is not a directory", cachedir) + } + + return cachedir, nil +} + +// New creates a new cache in the given directory. If it is the empty // string, the cache directory for the current user is used instead. -func NewCache(dir string) restic.Cache { - return &Cache{dir: dir} +func New(dir, repoID string) (cache restic.Cache, err error) { + if dir == "" { + dir, err = getXDGCacheDir() + if err != nil { + return nil, err + } + } + + if repoID == "" { + return nil, errors.New("cache: empty repo id") + } + + dir = filepath.Join(dir, repoID) + + return &Cache{dir: dir}, nil } func fn(dir string, h restic.BlobHandle) string { - return filepath.Join(dir, string(h.Type), h.ID.String()) + id := h.ID.String() + subdir := id[:2] + return filepath.Join(dir, h.Type.String(), subdir, id) } // GetBlob returns a blob from the cache. If the blob is not in the cache, ok @@ -94,7 +150,7 @@ func (c *Cache) PutBlob(h restic.BlobHandle, buf []byte) error { return err } - return ioutil.WriteFile(filename, buf, 0700) + return ioutil.WriteFile(filename, buf, 0600) } // DeleteBlob removes a blob from the cache. If it isn't included in the cache, @@ -116,3 +172,60 @@ func (c *Cache) HasBlob(h restic.BlobHandle) bool { return true } + +func (c *Cache) updateBlobs(idx restic.BlobIndex, t restic.BlobType) (err error) { + dir := filepath.Dir(fn(c.dir, restic.BlobHandle{Type: t})) + + var d *os.File + d, err = os.Open(dir) + if err != nil && os.IsNotExist(errors.Cause(err)) { + return nil + } + + if err != nil { + return errors.Wrap(err, "Open") + } + + defer func() { + e := d.Close() + if err == nil { + err = errors.Wrap(e, "Close") + } + }() + + entries, err := d.Readdirnames(-1) + if err != nil { + return errors.Wrap(err, "Readdirnames") + } + + debug.Log("Cache.UpdateBlobs", "checking %v/%d entries", t, len(entries)) + for _, name := range entries { + id, err := restic.ParseID(name) + if err != nil { + debug.Log("Cache.UpdateBlobs", " cache entry %q does not parse as id: %v", name, err) + continue + } + + if !idx.Has(id, t) { + debug.Log("Cache.UpdateBlobs", " remove %v/%v", t, name) + + err = os.Remove(filepath.Join(dir, name)) + if err != nil { + return errors.Wrap(err, "Remove") + } + } + } + + return nil +} + +// UpdateBlobs takes an index and removes blobs from the local cache which are +// not in the repo any more. +func (c *Cache) UpdateBlobs(idx restic.BlobIndex) (err error) { + err = c.updateBlobs(idx, restic.TreeBlob) + if err != nil { + return err + } + + return c.updateBlobs(idx, restic.DataBlob) +} diff --git a/src/restic/cache/cache_test.go b/src/restic/cache/cache_test.go index 7753f36f7..393ef7c65 100644 --- a/src/restic/cache/cache_test.go +++ b/src/restic/cache/cache_test.go @@ -1,12 +1,13 @@ package cache import ( + "math/rand" "restic" "restic/test" "testing" ) -func TestCache(t *testing.T) { +func TestNew(t *testing.T) { c, cleanup := TestNewCache(t) defer cleanup() @@ -73,3 +74,70 @@ func TestCacheBufsize(t *testing.T) { test.Assert(t, err != nil, "error is nil, although buffer size is wrong") } } + +type blobIndex struct { + blobs restic.BlobSet +} + +func (idx blobIndex) Has(id restic.ID, t restic.BlobType) bool { + _, ok := idx.blobs[restic.BlobHandle{ID: id, Type: t}] + return ok +} + +func TestUpdateBlobs(t *testing.T) { + c, cleanup := TestNewCache(t) + defer cleanup() + + blobs := restic.NewBlobSet() + + buf := test.Random(23, 15*1024) + for i := 0; i < 100; i++ { + id := restic.NewRandomID() + h := restic.BlobHandle{ID: id, Type: restic.TreeBlob} + err := c.PutBlob(h, buf) + test.OK(t, err) + blobs.Insert(h) + } + + // use an index with all blobs, this must not remove anything + idx := blobIndex{blobs: blobs} + test.OK(t, c.UpdateBlobs(idx)) + + for h := range blobs { + if !c.HasBlob(h) { + t.Errorf("blob %v was removed\n", h) + } + } + + // next, remove about 20% of the blobs + keepBlobs := restic.NewBlobSet() + for h := range blobs { + if rand.Float32() <= 0.8 { + keepBlobs.Insert(h) + } + } + idx = blobIndex{blobs: keepBlobs} + test.OK(t, c.UpdateBlobs(idx)) + + for h := range blobs { + if keepBlobs.Has(h) { + if !c.HasBlob(h) { + t.Errorf("blob %v was removed\n", h) + } + continue + } + + if c.HasBlob(h) { + t.Errorf("blob %v was kept although it should've been removed", h) + } + } + + // remove the remaining blobs + idx = blobIndex{blobs: restic.NewBlobSet()} + test.OK(t, c.UpdateBlobs(idx)) + for h := range blobs { + if c.HasBlob(h) { + t.Errorf("blob %v was not removed\n", h) + } + } +} diff --git a/src/restic/cache/testing.go b/src/restic/cache/testing.go index 84d544c1a..548939169 100644 --- a/src/restic/cache/testing.go +++ b/src/restic/cache/testing.go @@ -12,7 +12,8 @@ func TestNewCache(t testing.TB) (restic.Cache, func()) { tempdir, cleanup := test.TempDir(t) cachedir := filepath.Join(tempdir, "cache") - c := NewCache(cachedir) + c, err := New(cachedir, "test") + test.OK(t, err) return c, cleanup } diff --git a/src/restic/mock/repository.go b/src/restic/mock/repository.go index 3143a8ceb..13fd09a8f 100644 --- a/src/restic/mock/repository.go +++ b/src/restic/mock/repository.go @@ -13,6 +13,8 @@ type Repository struct { SetIndexFn func(restic.Index) + UseCacheFn func(restic.Cache) + IndexFn func() restic.Index SaveFullIndexFn func() error SaveIndexFn func() error @@ -55,6 +57,11 @@ func (repo Repository) SetIndex(idx restic.Index) { repo.SetIndexFn(idx) } +// UseCache is a stub method. +func (repo Repository) UseCache(cache restic.Cache) { + repo.UseCacheFn(cache) +} + // Index is a stub method. func (repo Repository) Index() restic.Index { return repo.IndexFn() diff --git a/src/restic/repository.go b/src/restic/repository.go index 959c0bd3c..ab429d0e0 100644 --- a/src/restic/repository.go +++ b/src/restic/repository.go @@ -11,6 +11,8 @@ type Repository interface { Key() *crypto.Key + UseCache(Cache) + SetIndex(Index) Index() Index diff --git a/src/restic/repository/repository.go b/src/restic/repository/repository.go index a7258e090..1bdd4d972 100644 --- a/src/restic/repository/repository.go +++ b/src/restic/repository/repository.go @@ -24,6 +24,8 @@ type Repository struct { keyName string idx *MasterIndex + cache restic.Cache + *packerManager } @@ -102,11 +104,25 @@ func (r *Repository) loadBlob(id restic.ID, t restic.BlobType, plaintextBuf []by // lookup packs blobs, err := r.idx.Lookup(id, t) - if err != nil { + if err != nil || len(blobs) == 0 { debug.Log("Repo.loadBlob", "id %v not found in index: %v", id.Str(), err) return 0, err } + // try to get the blob from the cache for tree blobs + h := restic.BlobHandle{ID: id, Type: t} + if t == restic.TreeBlob && r.cache != nil && r.cache.HasBlob(h) { + ok, err := r.cache.GetBlob(h, plaintextBuf) + if err != nil { + return 0, err + } + + if ok { + debug.Log("Repo.loadBlob", "loaded blob %v from cache", h) + return int(size), nil + } + } + var lastError error for _, blob := range blobs { debug.Log("Repo.loadBlob", "id %v found: %v", id.Str(), blob) @@ -146,6 +162,16 @@ func (r *Repository) loadBlob(id restic.ID, t restic.BlobType, plaintextBuf []by continue } + // store blob in the cache + if t == restic.TreeBlob && r.cache != nil { + h := restic.BlobHandle{ID: id, Type: t} + err = r.cache.PutBlob(h, plaintextBuf) + if err != nil { + return 0, err + } + debug.Log("Repo.loadBlob", "updated blob %v in cache", h) + } + return len(plaintextBuf), nil } @@ -191,6 +217,16 @@ func (r *Repository) SaveAndEncrypt(t restic.BlobType, data []byte, id *restic.I id = &hashedID } + // store blob in the cache + if t == restic.TreeBlob && r.cache != nil { + h := restic.BlobHandle{ID: *id, Type: t} + err := r.cache.PutBlob(h, data) + if err != nil { + return restic.ID{}, err + } + debug.Log("Repo.Save", "updated blob %v in cache", h) + } + debug.Log("Repo.Save", "save id %v (%v, %d bytes)", id.Str(), t, len(data)) // get buf from the pool @@ -293,6 +329,11 @@ func (r *Repository) SetIndex(i restic.Index) { r.idx = i.(*MasterIndex) } +// UseCache uses the cache c. +func (r *Repository) UseCache(c restic.Cache) { + r.cache = c +} + // SaveIndex saves an index in the repository. func SaveIndex(repo restic.Repository, index *Index) (restic.ID, error) { buf := bytes.NewBuffer(nil) @@ -369,6 +410,11 @@ func (r *Repository) LoadIndex() error { return err } + // update cache + if r.cache != nil { + r.cache.UpdateBlobs(r.idx) + } + return nil }