From afeca99aef3f015258a7255fe5d7d020e604a1f9 Mon Sep 17 00:00:00 2001 From: Ivan Shapovalov Date: Sat, 30 Nov 2024 14:35:02 +0400 Subject: [PATCH] restorer: implement same-file tracking and cloning ("reflinks") Note that "cloning" means either true block cloning via `ioctl(FICLONE)` or any kind of local copy in general via fallback to `io.Copy()`. TBD: - fallback to restoring a file normally if it could not be cloned - track all potential cloning sources for each file (every copy that was restored rather than cloned) and try to clone from each copy in turn (Why: imagine a set of duplicate files being restored N:M to a set of distinct filesystems or subvolumes or datasets, such that some pairs can be used as operands to a block cloning operation and some cannot) - progress reporting (report how much space we have saved, or not) Non-goals: - cloning individual blobs via `ioctl(FICLONERANGE)` (Why: this is not going to work very well, if at all, due to blobs not being aligned to any kind of a fundamental block size, _and_ this will impact cloning entire files unless the latter is special-cased, which is exactly what is being done here.) --- internal/restorer/restorer.go | 39 +++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/internal/restorer/restorer.go b/internal/restorer/restorer.go index acf411f24..bbeef5dd7 100644 --- a/internal/restorer/restorer.go +++ b/internal/restorer/restorer.go @@ -316,6 +316,24 @@ func (res *Restorer) restoreHardlinkAt(node *restic.Node, target, path, location return res.restoreNodeMetadataTo(node, path, location) } +func (res *Restorer) restoreReflink(node *restic.Node, target, path, location string) error { + cloned := true + if !res.opts.DryRun { + var err error + if err = fs.Remove(path); err != nil && !errors.Is(err, os.ErrNotExist) { + return errors.Wrap(err, "RemoveNode") + } + cloned, err = fs.Clone(target, path) + if err != nil { + return errors.WithStack(err) + } + } + + res.opts.Progress.AddProgress(location, restoreui.ActionFileRestored, node.Size, node.Size) + // reflinked files *do* have separate metadata + return res.restoreNodeMetadataTo(node, path, location) +} + func (res *Restorer) ensureDir(target string) error { if res.opts.DryRun { return nil @@ -358,6 +376,7 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) (uint64, error) } idx := NewHardlinkIndex[string]() + refIdx := NewReflinkIndex() filerestorer := newFileRestorer(dst, res.repo.LoadBlobsFromPack, res.repo.LookupBlob, res.repo.Connections(), res.opts.Sparse, res.opts.Delete, res.repo.StartWarmup, res.opts.Progress) filerestorer.Error = res.Error @@ -397,6 +416,16 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) (uint64, error) idx.Add(node.Inode, node.DeviceID, location) } + // do not bother reflinking empty files + if res.opts.Reflinks && node.Size > 0 { + refOrig, refIsOrig := refIdx.Put(location, node.Content) + if !refIsOrig { + debug.Log("reflink (deferring): orig=%s, link=%s", refOrig, location) + res.opts.Progress.AddFile(node.Size) + return nil + } + } + buf, err = res.withOverwriteCheck(ctx, node, target, location, false, buf, func(updateMetadataOnly bool, matches *fileState) error { if updateMetadataOnly { res.opts.Progress.AddSkippedFile(location, node.Size) @@ -453,6 +482,16 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) (uint64, error) return err } + if res.opts.Reflinks && node.Size > 0 { + if orig, hasOrig := refIdx.Get(node.Content); hasOrig && orig != location { + debug.Log("reflink (restoring): orig=%s, link=%s", orig, location) + _, err := res.withOverwriteCheck(ctx, node, target, location, false, nil, func(_ bool, _ *fileState) error { + return res.restoreReflink(node, filerestorer.targetPath(orig), target, location) + }) + return err + } + } + if _, ok := res.hasRestoredFile(location); ok { return res.restoreNodeMetadataTo(node, target, location) }