1
0
Fork 0
mirror of https://github.com/restic/restic.git synced 2025-03-16 00:00:05 +01:00

Add cache for tree blobs

This commit is contained in:
Alexander Neumann 2016-09-04 20:27:53 +02:00
parent cdc379d665
commit 1bd651fae1
8 changed files with 259 additions and 8 deletions

View file

@ -13,6 +13,7 @@ import (
"restic/backend/rest"
"restic/backend/s3"
"restic/backend/sftp"
"restic/cache"
"restic/debug"
"restic/location"
"restic/repository"
@ -266,6 +267,13 @@ func (o GlobalOptions) OpenRepository() (*repository.Repository, error) {
return nil, errors.Fatalf("unable to open repo: %v", err)
}
cache, err := cache.New(o.CacheDir, s.Config().ID)
if err != nil {
return nil, err
}
s.UseCache(cache)
return s, nil
}

View file

@ -6,4 +6,10 @@ type Cache interface {
PutBlob(BlobHandle, []byte) error
DeleteBlob(BlobHandle) error
HasBlob(BlobHandle) bool
UpdateBlobs(idx BlobIndex) error
}
// BlobIndex returns information about blobs stored in a repo.
type BlobIndex interface {
Has(id ID, t BlobType) bool
}

View file

@ -7,7 +7,9 @@ import (
"os"
"path/filepath"
"restic"
"restic/debug"
"restic/errors"
"restic/fs"
)
// Cache is a local cache implementation.
@ -18,14 +20,68 @@ type Cache struct {
// make sure that Cache implement restic.Cache
var _ restic.Cache = &Cache{}
// NewCache creates a new cache in the given directory. If it is the empty
// getXDGCacheDir returns the cache directory according to XDG basedir spec, see
// http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
func getXDGCacheDir() (string, error) {
xdgcache := os.Getenv("XDG_CACHE_HOME")
home := os.Getenv("HOME")
if xdgcache == "" && home == "" {
return "", errors.New("unable to locate cache directory (XDG_CACHE_HOME and HOME unset)")
}
cachedir := ""
if xdgcache != "" {
cachedir = filepath.Join(xdgcache, "restic")
} else if home != "" {
cachedir = filepath.Join(home, ".cache", "restic")
}
fi, err := fs.Stat(cachedir)
if os.IsNotExist(errors.Cause(err)) {
err = fs.MkdirAll(cachedir, 0700)
if err != nil {
return "", errors.Wrap(err, "MkdirAll")
}
fi, err = fs.Stat(cachedir)
debug.Log("getCacheDir", "create cache dir %v", cachedir)
}
if err != nil {
return "", errors.Wrap(err, "Stat")
}
if !fi.IsDir() {
return "", errors.Errorf("cache dir %v is not a directory", cachedir)
}
return cachedir, nil
}
// New creates a new cache in the given directory. If it is the empty
// string, the cache directory for the current user is used instead.
func NewCache(dir string) restic.Cache {
return &Cache{dir: dir}
func New(dir, repoID string) (cache restic.Cache, err error) {
if dir == "" {
dir, err = getXDGCacheDir()
if err != nil {
return nil, err
}
}
if repoID == "" {
return nil, errors.New("cache: empty repo id")
}
dir = filepath.Join(dir, repoID)
return &Cache{dir: dir}, nil
}
func fn(dir string, h restic.BlobHandle) string {
return filepath.Join(dir, string(h.Type), h.ID.String())
id := h.ID.String()
subdir := id[:2]
return filepath.Join(dir, h.Type.String(), subdir, id)
}
// GetBlob returns a blob from the cache. If the blob is not in the cache, ok
@ -94,7 +150,7 @@ func (c *Cache) PutBlob(h restic.BlobHandle, buf []byte) error {
return err
}
return ioutil.WriteFile(filename, buf, 0700)
return ioutil.WriteFile(filename, buf, 0600)
}
// DeleteBlob removes a blob from the cache. If it isn't included in the cache,
@ -116,3 +172,60 @@ func (c *Cache) HasBlob(h restic.BlobHandle) bool {
return true
}
func (c *Cache) updateBlobs(idx restic.BlobIndex, t restic.BlobType) (err error) {
dir := filepath.Dir(fn(c.dir, restic.BlobHandle{Type: t}))
var d *os.File
d, err = os.Open(dir)
if err != nil && os.IsNotExist(errors.Cause(err)) {
return nil
}
if err != nil {
return errors.Wrap(err, "Open")
}
defer func() {
e := d.Close()
if err == nil {
err = errors.Wrap(e, "Close")
}
}()
entries, err := d.Readdirnames(-1)
if err != nil {
return errors.Wrap(err, "Readdirnames")
}
debug.Log("Cache.UpdateBlobs", "checking %v/%d entries", t, len(entries))
for _, name := range entries {
id, err := restic.ParseID(name)
if err != nil {
debug.Log("Cache.UpdateBlobs", " cache entry %q does not parse as id: %v", name, err)
continue
}
if !idx.Has(id, t) {
debug.Log("Cache.UpdateBlobs", " remove %v/%v", t, name)
err = os.Remove(filepath.Join(dir, name))
if err != nil {
return errors.Wrap(err, "Remove")
}
}
}
return nil
}
// UpdateBlobs takes an index and removes blobs from the local cache which are
// not in the repo any more.
func (c *Cache) UpdateBlobs(idx restic.BlobIndex) (err error) {
err = c.updateBlobs(idx, restic.TreeBlob)
if err != nil {
return err
}
return c.updateBlobs(idx, restic.DataBlob)
}

View file

@ -1,12 +1,13 @@
package cache
import (
"math/rand"
"restic"
"restic/test"
"testing"
)
func TestCache(t *testing.T) {
func TestNew(t *testing.T) {
c, cleanup := TestNewCache(t)
defer cleanup()
@ -73,3 +74,70 @@ func TestCacheBufsize(t *testing.T) {
test.Assert(t, err != nil, "error is nil, although buffer size is wrong")
}
}
type blobIndex struct {
blobs restic.BlobSet
}
func (idx blobIndex) Has(id restic.ID, t restic.BlobType) bool {
_, ok := idx.blobs[restic.BlobHandle{ID: id, Type: t}]
return ok
}
func TestUpdateBlobs(t *testing.T) {
c, cleanup := TestNewCache(t)
defer cleanup()
blobs := restic.NewBlobSet()
buf := test.Random(23, 15*1024)
for i := 0; i < 100; i++ {
id := restic.NewRandomID()
h := restic.BlobHandle{ID: id, Type: restic.TreeBlob}
err := c.PutBlob(h, buf)
test.OK(t, err)
blobs.Insert(h)
}
// use an index with all blobs, this must not remove anything
idx := blobIndex{blobs: blobs}
test.OK(t, c.UpdateBlobs(idx))
for h := range blobs {
if !c.HasBlob(h) {
t.Errorf("blob %v was removed\n", h)
}
}
// next, remove about 20% of the blobs
keepBlobs := restic.NewBlobSet()
for h := range blobs {
if rand.Float32() <= 0.8 {
keepBlobs.Insert(h)
}
}
idx = blobIndex{blobs: keepBlobs}
test.OK(t, c.UpdateBlobs(idx))
for h := range blobs {
if keepBlobs.Has(h) {
if !c.HasBlob(h) {
t.Errorf("blob %v was removed\n", h)
}
continue
}
if c.HasBlob(h) {
t.Errorf("blob %v was kept although it should've been removed", h)
}
}
// remove the remaining blobs
idx = blobIndex{blobs: restic.NewBlobSet()}
test.OK(t, c.UpdateBlobs(idx))
for h := range blobs {
if c.HasBlob(h) {
t.Errorf("blob %v was not removed\n", h)
}
}
}

View file

@ -12,7 +12,8 @@ func TestNewCache(t testing.TB) (restic.Cache, func()) {
tempdir, cleanup := test.TempDir(t)
cachedir := filepath.Join(tempdir, "cache")
c := NewCache(cachedir)
c, err := New(cachedir, "test")
test.OK(t, err)
return c, cleanup
}

View file

@ -13,6 +13,8 @@ type Repository struct {
SetIndexFn func(restic.Index)
UseCacheFn func(restic.Cache)
IndexFn func() restic.Index
SaveFullIndexFn func() error
SaveIndexFn func() error
@ -55,6 +57,11 @@ func (repo Repository) SetIndex(idx restic.Index) {
repo.SetIndexFn(idx)
}
// UseCache is a stub method.
func (repo Repository) UseCache(cache restic.Cache) {
repo.UseCacheFn(cache)
}
// Index is a stub method.
func (repo Repository) Index() restic.Index {
return repo.IndexFn()

View file

@ -11,6 +11,8 @@ type Repository interface {
Key() *crypto.Key
UseCache(Cache)
SetIndex(Index)
Index() Index

View file

@ -24,6 +24,8 @@ type Repository struct {
keyName string
idx *MasterIndex
cache restic.Cache
*packerManager
}
@ -102,11 +104,25 @@ func (r *Repository) loadBlob(id restic.ID, t restic.BlobType, plaintextBuf []by
// lookup packs
blobs, err := r.idx.Lookup(id, t)
if err != nil {
if err != nil || len(blobs) == 0 {
debug.Log("Repo.loadBlob", "id %v not found in index: %v", id.Str(), err)
return 0, err
}
// try to get the blob from the cache for tree blobs
h := restic.BlobHandle{ID: id, Type: t}
if t == restic.TreeBlob && r.cache != nil && r.cache.HasBlob(h) {
ok, err := r.cache.GetBlob(h, plaintextBuf)
if err != nil {
return 0, err
}
if ok {
debug.Log("Repo.loadBlob", "loaded blob %v from cache", h)
return int(size), nil
}
}
var lastError error
for _, blob := range blobs {
debug.Log("Repo.loadBlob", "id %v found: %v", id.Str(), blob)
@ -146,6 +162,16 @@ func (r *Repository) loadBlob(id restic.ID, t restic.BlobType, plaintextBuf []by
continue
}
// store blob in the cache
if t == restic.TreeBlob && r.cache != nil {
h := restic.BlobHandle{ID: id, Type: t}
err = r.cache.PutBlob(h, plaintextBuf)
if err != nil {
return 0, err
}
debug.Log("Repo.loadBlob", "updated blob %v in cache", h)
}
return len(plaintextBuf), nil
}
@ -191,6 +217,16 @@ func (r *Repository) SaveAndEncrypt(t restic.BlobType, data []byte, id *restic.I
id = &hashedID
}
// store blob in the cache
if t == restic.TreeBlob && r.cache != nil {
h := restic.BlobHandle{ID: *id, Type: t}
err := r.cache.PutBlob(h, data)
if err != nil {
return restic.ID{}, err
}
debug.Log("Repo.Save", "updated blob %v in cache", h)
}
debug.Log("Repo.Save", "save id %v (%v, %d bytes)", id.Str(), t, len(data))
// get buf from the pool
@ -293,6 +329,11 @@ func (r *Repository) SetIndex(i restic.Index) {
r.idx = i.(*MasterIndex)
}
// UseCache uses the cache c.
func (r *Repository) UseCache(c restic.Cache) {
r.cache = c
}
// SaveIndex saves an index in the repository.
func SaveIndex(repo restic.Repository, index *Index) (restic.ID, error) {
buf := bytes.NewBuffer(nil)
@ -369,6 +410,11 @@ func (r *Repository) LoadIndex() error {
return err
}
// update cache
if r.cache != nil {
r.cache.UpdateBlobs(r.idx)
}
return nil
}