From fcb451cd1f0737322d0525f23ddba72ddbcd1a3b Mon Sep 17 00:00:00 2001 From: Sebastian Hasler Date: Sat, 16 Sep 2023 04:25:48 +0200 Subject: [PATCH] backup: support reading changed files/dirs from a file For `restic backup`, support new flags `--changed-files-from-verbatim` and `--changed-files-from-raw` to read the files/dirs that actually have changed from a file (or multiple files). Directories that don't (directly or indirectly) contain any changed files/dirs will reuse the corresponding subtree of the parent snapshot. This option is useful for higher-level backup tools which use restic as a backend but have their own mechanism of figuring out which files have changed (e.g., using zfs or btrfs diff tools). We require to explicitly pass `--parent` as a protection mechanism in order to make sure the higher-level backup tool and restic agree on the parent snapshot. Though the caller can circumvent this protection mechanism by passing `--parent latest`. Caveat: since device IDs are unstable (across reboots or across different zfs/btrfs snapshots of the same subvolume), the parent snapshot and current snapshot might have mismatching device IDs. In this case, the feature will still reuse subtrees of the parent snapshot (under the conditions mentioned above), so we end up with a snapshot that contains subtrees with different `device_id` values, even if there was only a single mountpoint in play. For now, we could simply document this caveat and discourage users who rely on correct restoration of hardlinks from using this feature. When https://github.com/restic/restic/issues/3041 is properly fixed in the future, then this caveat probably goes away, too. The idea for this feature emerged here: https://github.com/restic/restic/issues/1502#issuecomment-1721956623 --- cmd/restic/cmd_backup.go | 103 ++++++++++++++++++++++++++-------- internal/archiver/archiver.go | 83 +++++++++++++++++++++------ internal/archiver/testing.go | 2 +- 3 files changed, 147 insertions(+), 41 deletions(-) diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index 6b5706855..e723f2531 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -88,28 +88,30 @@ Exit status is 3 if some source data could not be read (incomplete snapshot crea type BackupOptions struct { excludePatternOptions - Parent string - GroupBy restic.SnapshotGroupByOptions - Force bool - ExcludeOtherFS bool - ExcludeIfPresent []string - ExcludeCaches bool - ExcludeLargerThan string - Stdin bool - StdinFilename string - Tags restic.TagLists - Host string - FilesFrom []string - FilesFromVerbatim []string - FilesFromRaw []string - TimeStamp string - WithAtime bool - IgnoreInode bool - IgnoreCtime bool - UseFsSnapshot bool - DryRun bool - ReadConcurrency uint - NoScan bool + Parent string + GroupBy restic.SnapshotGroupByOptions + Force bool + ExcludeOtherFS bool + ExcludeIfPresent []string + ExcludeCaches bool + ExcludeLargerThan string + Stdin bool + StdinFilename string + Tags restic.TagLists + Host string + FilesFrom []string + FilesFromVerbatim []string + FilesFromRaw []string + ChangedFilesFromVerbatim []string + ChangedFilesFromRaw []string + TimeStamp string + WithAtime bool + IgnoreInode bool + IgnoreCtime bool + UseFsSnapshot bool + DryRun bool + ReadConcurrency uint + NoScan bool } var backupOptions BackupOptions @@ -146,6 +148,8 @@ func init() { f.StringArrayVar(&backupOptions.FilesFrom, "files-from", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)") f.StringArrayVar(&backupOptions.FilesFromVerbatim, "files-from-verbatim", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)") f.StringArrayVar(&backupOptions.FilesFromRaw, "files-from-raw", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)") + f.StringArrayVar(&backupOptions.ChangedFilesFromVerbatim, "changed-files-from-verbatim", nil, "read names of changed files/directories from `file` (can be combined with changed-file args; can be specified multiple times)") + f.StringArrayVar(&backupOptions.ChangedFilesFromRaw, "changed-files-from-raw", nil, "read names of changed files/directories from `file` (can be combined with changed-file args; can be specified multiple times)") f.StringVar(&backupOptions.TimeStamp, "time", "", "`time` of the backup (ex. '2012-11-01 22:08:41') (default: now)") f.BoolVar(&backupOptions.WithAtime, "with-atime", false, "store the atime for all files and directories") f.BoolVar(&backupOptions.IgnoreInode, "ignore-inode", false, "ignore inode number changes when checking for modified files") @@ -298,11 +302,27 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error { return errors.Fatal("--stdin and --files-from-raw cannot be used together") } + if len(opts.ChangedFilesFromVerbatim) > 0 { + return errors.Fatal("--stdin and --changed-files-from-verbatim cannot be used together") + } + if len(opts.ChangedFilesFromRaw) > 0 { + return errors.Fatal("--stdin and --changed-files-from-raw cannot be used together") + } + if len(args) > 0 { return errors.Fatal("--stdin was specified and files/dirs were listed as arguments") } } + if opts.Parent == "" { + if len(opts.ChangedFilesFromVerbatim) > 0 { + return errors.Fatal("using --changed-files-from-verbatim requires to also specify --parent") + } + if len(opts.ChangedFilesFromRaw) > 0 { + return errors.Fatal("using --changed-files-from-raw requires to also specify --parent") + } + } + return nil } @@ -431,6 +451,38 @@ func collectTargets(opts BackupOptions, args []string) (targets []string, err er return targets, nil } +// collectTargets returns a list of changed files/dirs from several sources. +func collectChangedFiles(opts BackupOptions) (changedFiles *[]string, err error) { + if len(opts.ChangedFilesFromVerbatim) == 0 && len(opts.ChangedFilesFromRaw) == 0 { + return nil, nil + } + + changedFiles = &[]string{} + + for _, file := range opts.ChangedFilesFromVerbatim { + fromfile, err := readLines(file) + if err != nil { + return nil, err + } + for _, line := range fromfile { + if line == "" { + continue + } + *changedFiles = append(*changedFiles, line) + } + } + + for _, file := range opts.ChangedFilesFromRaw { + fromfile, err := readFilenamesFromFileRaw(file) + if err != nil { + return nil, err + } + *changedFiles = append(*changedFiles, fromfile...) + } + + return changedFiles, nil +} + // parent returns the ID of the parent snapshot. If there is none, nil is // returned. func findParentSnapshot(ctx context.Context, repo restic.Repository, opts BackupOptions, targets []string, timeStampLimit time.Time) (*restic.Snapshot, error) { @@ -472,6 +524,11 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter return err } + changedFiles, err := collectChangedFiles(opts) + if err != nil { + return err + } + timeStamp := time.Now() if opts.TimeStamp != "" { timeStamp, err = time.ParseInLocation(TimeFormat, opts.TimeStamp, time.Local) @@ -654,7 +711,7 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter if !gopts.JSON { progressPrinter.V("start backup on %v", targets) } - _, id, err := arch.Snapshot(ctx, targets, snapshotOpts) + _, id, err := arch.Snapshot(ctx, targets, changedFiles, snapshotOpts) // cleanly shutdown all running goroutines cancel() diff --git a/internal/archiver/archiver.go b/internal/archiver/archiver.go index 98819d797..0242ffb8d 100644 --- a/internal/archiver/archiver.go +++ b/internal/archiver/archiver.go @@ -214,7 +214,7 @@ func (arch *Archiver) wrapLoadTreeError(id restic.ID, err error) error { // SaveDir stores a directory in the repo and returns the node. snPath is the // path within the current snapshot. -func (arch *Archiver) SaveDir(ctx context.Context, snPath string, dir string, fi os.FileInfo, previous *restic.Tree, complete CompleteFunc) (d FutureNode, err error) { +func (arch *Archiver) SaveDir(ctx context.Context, snPath string, dir string, absdir string, cdtree *Tree, fi os.FileInfo, previous *restic.Node, complete CompleteFunc) (d FutureNode, err error) { debug.Log("%v %v", snPath, dir) treeNode, err := arch.nodeFromFileInfo(snPath, dir, fi) @@ -222,6 +222,25 @@ func (arch *Archiver) SaveDir(ctx context.Context, snPath string, dir string, fi return FutureNode{}, err } + if cdtree != nil && previous != nil && len(cdtree.Nodes) == 0 { + debug.Log("%v doesn't contain any changed files, using existing nodes", dir) + treeNode.Subtree = previous.Subtree + fn := newFutureNodeWithResult(futureNodeResult{ + snPath: snPath, + target: dir, + node: treeNode, + }) + return fn, nil + } + + oldSubtree, err := arch.loadSubtree(ctx, previous) + if err != nil { + err = arch.error(absdir, err) + } + if err != nil { + return FutureNode{}, err + } + names, err := readdirnames(arch.FS, dir, fs.O_NOFOLLOW) if err != nil { return FutureNode{}, err @@ -237,10 +256,16 @@ func (arch *Archiver) SaveDir(ctx context.Context, snPath string, dir string, fi return FutureNode{}, ctx.Err() } + var subcdtree *Tree + if cdtree != nil { + tmp := cdtree.Nodes[name] + subcdtree = &tmp + } + pathname := arch.FS.Join(dir, name) - oldNode := previous.Find(name) + oldNode := oldSubtree.Find(name) snItem := join(snPath, name) - fn, excluded, err := arch.Save(ctx, snItem, pathname, oldNode) + fn, excluded, err := arch.Save(ctx, snItem, pathname, subcdtree, oldNode) // return error early if possible if err != nil { @@ -331,7 +356,7 @@ func (arch *Archiver) allBlobsPresent(previous *restic.Node) bool { // Errors and completion needs to be handled by the caller. // // snPath is the path within the current snapshot. -func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous *restic.Node) (fn FutureNode, excluded bool, err error) { +func (arch *Archiver) Save(ctx context.Context, snPath, target string, cdtree *Tree, previous *restic.Node) (fn FutureNode, excluded bool, err error) { start := time.Now() debug.Log("%v target %q, previous %v", snPath, target, previous) @@ -444,15 +469,8 @@ func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous debug.Log(" %v dir", target) snItem := snPath + "/" - oldSubtree, err := arch.loadSubtree(ctx, previous) - if err != nil { - err = arch.error(abstarget, err) - } - if err != nil { - return FutureNode{}, false, err - } - fn, err = arch.SaveDir(ctx, snPath, target, fi, oldSubtree, + fn, err = arch.SaveDir(ctx, snPath, target, abstarget, cdtree, fi, previous, func(node *restic.Node, stats ItemStats) { arch.CompleteItem(snItem, previous, node, stats, time.Since(start)) }) @@ -537,7 +555,7 @@ func (arch *Archiver) statDir(dir string) (os.FileInfo, error) { // SaveTree stores a Tree in the repo, returned is the tree. snPath is the path // within the current snapshot. -func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree, previous *restic.Tree, complete CompleteFunc) (FutureNode, int, error) { +func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree, cdtree *Tree, previous *restic.Tree, complete CompleteFunc) (FutureNode, int, error) { var node *restic.Node if snPath != "/" { @@ -575,7 +593,21 @@ func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree, // this is a leaf node if subatree.Leaf() { - fn, excluded, err := arch.Save(ctx, join(snPath, name), subatree.Path, previous.Find(name)) + relative_cdtree := cdtree + if relative_cdtree != nil { + abs_path, err := arch.FS.Abs(subatree.Path) + if err != nil { + return FutureNode{}, 0, err + } + pc, _ := pathComponents(arch.FS, abs_path, false) + for _, component := range pc { + tmp := relative_cdtree.Nodes[component] + relative_cdtree = &tmp + } + debug.Log("relative_cdtree for subtree path %v:\n%v", abs_path, relative_cdtree) + } + + fn, excluded, err := arch.Save(ctx, join(snPath, name), subatree.Path, relative_cdtree, previous.Find(name)) if err != nil { err = arch.error(subatree.Path, err) @@ -609,7 +641,7 @@ func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree, } // not a leaf node, archive subtree - fn, _, err := arch.SaveTree(ctx, join(snPath, name), &subatree, oldSubtree, func(n *restic.Node, is ItemStats) { + fn, _, err := arch.SaveTree(ctx, join(snPath, name), &subatree, cdtree, oldSubtree, func(n *restic.Node, is ItemStats) { arch.CompleteItem(snItem, oldNode, n, is, time.Since(start)) }) if err != nil { @@ -728,7 +760,7 @@ func (arch *Archiver) stopWorkers() { } // Snapshot saves several targets and returns a snapshot. -func (arch *Archiver) Snapshot(ctx context.Context, targets []string, opts SnapshotOptions) (*restic.Snapshot, restic.ID, error) { +func (arch *Archiver) Snapshot(ctx context.Context, targets []string, changedFiles *[]string, opts SnapshotOptions) (*restic.Snapshot, restic.ID, error) { cleanTargets, err := resolveRelativeTargets(arch.FS, targets) if err != nil { return nil, restic.ID{}, err @@ -739,6 +771,23 @@ func (arch *Archiver) Snapshot(ctx context.Context, targets []string, opts Snaps return nil, restic.ID{}, err } + var cdtree *Tree + if changedFiles != nil { + // Make sure paths in changedFiles are absolute + for i := range *changedFiles { + (*changedFiles)[i], err = arch.FS.Abs((*changedFiles)[i]) + if err != nil { + return nil, restic.ID{}, err + } + } + + cdtree, err = NewTree(arch.FS, *changedFiles) + if err != nil { + return nil, restic.ID{}, err + } + debug.Log("cdtree:\n%v", cdtree) + } + var rootTreeID restic.ID wgUp, wgUpCtx := errgroup.WithContext(ctx) @@ -752,7 +801,7 @@ func (arch *Archiver) Snapshot(ctx context.Context, targets []string, opts Snaps arch.runWorkers(wgCtx, wg) debug.Log("starting snapshot") - fn, nodeCount, err := arch.SaveTree(wgCtx, "/", atree, arch.loadParentTree(wgCtx, opts.ParentSnapshot), func(n *restic.Node, is ItemStats) { + fn, nodeCount, err := arch.SaveTree(wgCtx, "/", atree, cdtree, arch.loadParentTree(wgCtx, opts.ParentSnapshot), func(n *restic.Node, is ItemStats) { arch.CompleteItem("/", nil, nil, is, time.Since(start)) }) if err != nil { diff --git a/internal/archiver/testing.go b/internal/archiver/testing.go index c7482d160..ad5d475d7 100644 --- a/internal/archiver/testing.go +++ b/internal/archiver/testing.go @@ -31,7 +31,7 @@ func TestSnapshot(t testing.TB, repo restic.Repository, path string, parent *res } opts.ParentSnapshot = sn } - sn, _, err := arch.Snapshot(context.TODO(), []string{path}, opts) + sn, _, err := arch.Snapshot(context.TODO(), []string{path}, nil, opts) if err != nil { t.Fatal(err) }