1
0
Fork 0
mirror of https://github.com/restic/restic.git synced 2025-03-16 00:00:05 +01:00

backup: support reading changed files/dirs from a file

For `restic backup`, support new flags
`--changed-files-from-verbatim` and `--changed-files-from-raw` to
read the files/dirs that actually have changed from a file (or
multiple files). Directories that don't (directly or indirectly)
contain any changed files/dirs will reuse the corresponding subtree
of the parent snapshot.

This option is useful for higher-level backup tools which use
restic as a backend but have their own mechanism of figuring out
which files have changed (e.g., using zfs or btrfs diff tools).
We require to explicitly pass `--parent` as a protection mechanism
in order to make sure the higher-level backup tool and restic agree
on the parent snapshot. Though the caller can circumvent this
protection mechanism by passing `--parent latest`.

Caveat: since device IDs are unstable (across reboots or across
different zfs/btrfs snapshots of the same subvolume), the parent
snapshot and current snapshot might have mismatching device IDs.
In this case, the feature will still reuse subtrees of the parent
snapshot (under the conditions mentioned above), so we end up with
a snapshot that contains subtrees with different `device_id`
values, even if there was only a single mountpoint in play.

For now, we could simply document this caveat and discourage users
who rely on correct restoration of hardlinks from using this
feature. When https://github.com/restic/restic/issues/3041 is
properly fixed in the future, then this caveat probably goes away,
too.

The idea for this feature emerged here:
https://github.com/restic/restic/issues/1502#issuecomment-1721956623
This commit is contained in:
Sebastian Hasler 2023-09-16 04:25:48 +02:00
parent 6e586b64e4
commit fcb451cd1f
3 changed files with 147 additions and 41 deletions
cmd/restic
internal/archiver

View file

@ -102,6 +102,8 @@ type BackupOptions struct {
FilesFrom []string
FilesFromVerbatim []string
FilesFromRaw []string
ChangedFilesFromVerbatim []string
ChangedFilesFromRaw []string
TimeStamp string
WithAtime bool
IgnoreInode bool
@ -146,6 +148,8 @@ func init() {
f.StringArrayVar(&backupOptions.FilesFrom, "files-from", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)")
f.StringArrayVar(&backupOptions.FilesFromVerbatim, "files-from-verbatim", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)")
f.StringArrayVar(&backupOptions.FilesFromRaw, "files-from-raw", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)")
f.StringArrayVar(&backupOptions.ChangedFilesFromVerbatim, "changed-files-from-verbatim", nil, "read names of changed files/directories from `file` (can be combined with changed-file args; can be specified multiple times)")
f.StringArrayVar(&backupOptions.ChangedFilesFromRaw, "changed-files-from-raw", nil, "read names of changed files/directories from `file` (can be combined with changed-file args; can be specified multiple times)")
f.StringVar(&backupOptions.TimeStamp, "time", "", "`time` of the backup (ex. '2012-11-01 22:08:41') (default: now)")
f.BoolVar(&backupOptions.WithAtime, "with-atime", false, "store the atime for all files and directories")
f.BoolVar(&backupOptions.IgnoreInode, "ignore-inode", false, "ignore inode number changes when checking for modified files")
@ -298,11 +302,27 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error {
return errors.Fatal("--stdin and --files-from-raw cannot be used together")
}
if len(opts.ChangedFilesFromVerbatim) > 0 {
return errors.Fatal("--stdin and --changed-files-from-verbatim cannot be used together")
}
if len(opts.ChangedFilesFromRaw) > 0 {
return errors.Fatal("--stdin and --changed-files-from-raw cannot be used together")
}
if len(args) > 0 {
return errors.Fatal("--stdin was specified and files/dirs were listed as arguments")
}
}
if opts.Parent == "" {
if len(opts.ChangedFilesFromVerbatim) > 0 {
return errors.Fatal("using --changed-files-from-verbatim requires to also specify --parent")
}
if len(opts.ChangedFilesFromRaw) > 0 {
return errors.Fatal("using --changed-files-from-raw requires to also specify --parent")
}
}
return nil
}
@ -431,6 +451,38 @@ func collectTargets(opts BackupOptions, args []string) (targets []string, err er
return targets, nil
}
// collectTargets returns a list of changed files/dirs from several sources.
func collectChangedFiles(opts BackupOptions) (changedFiles *[]string, err error) {
if len(opts.ChangedFilesFromVerbatim) == 0 && len(opts.ChangedFilesFromRaw) == 0 {
return nil, nil
}
changedFiles = &[]string{}
for _, file := range opts.ChangedFilesFromVerbatim {
fromfile, err := readLines(file)
if err != nil {
return nil, err
}
for _, line := range fromfile {
if line == "" {
continue
}
*changedFiles = append(*changedFiles, line)
}
}
for _, file := range opts.ChangedFilesFromRaw {
fromfile, err := readFilenamesFromFileRaw(file)
if err != nil {
return nil, err
}
*changedFiles = append(*changedFiles, fromfile...)
}
return changedFiles, nil
}
// parent returns the ID of the parent snapshot. If there is none, nil is
// returned.
func findParentSnapshot(ctx context.Context, repo restic.Repository, opts BackupOptions, targets []string, timeStampLimit time.Time) (*restic.Snapshot, error) {
@ -472,6 +524,11 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter
return err
}
changedFiles, err := collectChangedFiles(opts)
if err != nil {
return err
}
timeStamp := time.Now()
if opts.TimeStamp != "" {
timeStamp, err = time.ParseInLocation(TimeFormat, opts.TimeStamp, time.Local)
@ -654,7 +711,7 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter
if !gopts.JSON {
progressPrinter.V("start backup on %v", targets)
}
_, id, err := arch.Snapshot(ctx, targets, snapshotOpts)
_, id, err := arch.Snapshot(ctx, targets, changedFiles, snapshotOpts)
// cleanly shutdown all running goroutines
cancel()

View file

@ -214,7 +214,7 @@ func (arch *Archiver) wrapLoadTreeError(id restic.ID, err error) error {
// SaveDir stores a directory in the repo and returns the node. snPath is the
// path within the current snapshot.
func (arch *Archiver) SaveDir(ctx context.Context, snPath string, dir string, fi os.FileInfo, previous *restic.Tree, complete CompleteFunc) (d FutureNode, err error) {
func (arch *Archiver) SaveDir(ctx context.Context, snPath string, dir string, absdir string, cdtree *Tree, fi os.FileInfo, previous *restic.Node, complete CompleteFunc) (d FutureNode, err error) {
debug.Log("%v %v", snPath, dir)
treeNode, err := arch.nodeFromFileInfo(snPath, dir, fi)
@ -222,6 +222,25 @@ func (arch *Archiver) SaveDir(ctx context.Context, snPath string, dir string, fi
return FutureNode{}, err
}
if cdtree != nil && previous != nil && len(cdtree.Nodes) == 0 {
debug.Log("%v doesn't contain any changed files, using existing nodes", dir)
treeNode.Subtree = previous.Subtree
fn := newFutureNodeWithResult(futureNodeResult{
snPath: snPath,
target: dir,
node: treeNode,
})
return fn, nil
}
oldSubtree, err := arch.loadSubtree(ctx, previous)
if err != nil {
err = arch.error(absdir, err)
}
if err != nil {
return FutureNode{}, err
}
names, err := readdirnames(arch.FS, dir, fs.O_NOFOLLOW)
if err != nil {
return FutureNode{}, err
@ -237,10 +256,16 @@ func (arch *Archiver) SaveDir(ctx context.Context, snPath string, dir string, fi
return FutureNode{}, ctx.Err()
}
var subcdtree *Tree
if cdtree != nil {
tmp := cdtree.Nodes[name]
subcdtree = &tmp
}
pathname := arch.FS.Join(dir, name)
oldNode := previous.Find(name)
oldNode := oldSubtree.Find(name)
snItem := join(snPath, name)
fn, excluded, err := arch.Save(ctx, snItem, pathname, oldNode)
fn, excluded, err := arch.Save(ctx, snItem, pathname, subcdtree, oldNode)
// return error early if possible
if err != nil {
@ -331,7 +356,7 @@ func (arch *Archiver) allBlobsPresent(previous *restic.Node) bool {
// Errors and completion needs to be handled by the caller.
//
// snPath is the path within the current snapshot.
func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous *restic.Node) (fn FutureNode, excluded bool, err error) {
func (arch *Archiver) Save(ctx context.Context, snPath, target string, cdtree *Tree, previous *restic.Node) (fn FutureNode, excluded bool, err error) {
start := time.Now()
debug.Log("%v target %q, previous %v", snPath, target, previous)
@ -444,15 +469,8 @@ func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous
debug.Log(" %v dir", target)
snItem := snPath + "/"
oldSubtree, err := arch.loadSubtree(ctx, previous)
if err != nil {
err = arch.error(abstarget, err)
}
if err != nil {
return FutureNode{}, false, err
}
fn, err = arch.SaveDir(ctx, snPath, target, fi, oldSubtree,
fn, err = arch.SaveDir(ctx, snPath, target, abstarget, cdtree, fi, previous,
func(node *restic.Node, stats ItemStats) {
arch.CompleteItem(snItem, previous, node, stats, time.Since(start))
})
@ -537,7 +555,7 @@ func (arch *Archiver) statDir(dir string) (os.FileInfo, error) {
// SaveTree stores a Tree in the repo, returned is the tree. snPath is the path
// within the current snapshot.
func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree, previous *restic.Tree, complete CompleteFunc) (FutureNode, int, error) {
func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree, cdtree *Tree, previous *restic.Tree, complete CompleteFunc) (FutureNode, int, error) {
var node *restic.Node
if snPath != "/" {
@ -575,7 +593,21 @@ func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree,
// this is a leaf node
if subatree.Leaf() {
fn, excluded, err := arch.Save(ctx, join(snPath, name), subatree.Path, previous.Find(name))
relative_cdtree := cdtree
if relative_cdtree != nil {
abs_path, err := arch.FS.Abs(subatree.Path)
if err != nil {
return FutureNode{}, 0, err
}
pc, _ := pathComponents(arch.FS, abs_path, false)
for _, component := range pc {
tmp := relative_cdtree.Nodes[component]
relative_cdtree = &tmp
}
debug.Log("relative_cdtree for subtree path %v:\n%v", abs_path, relative_cdtree)
}
fn, excluded, err := arch.Save(ctx, join(snPath, name), subatree.Path, relative_cdtree, previous.Find(name))
if err != nil {
err = arch.error(subatree.Path, err)
@ -609,7 +641,7 @@ func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree,
}
// not a leaf node, archive subtree
fn, _, err := arch.SaveTree(ctx, join(snPath, name), &subatree, oldSubtree, func(n *restic.Node, is ItemStats) {
fn, _, err := arch.SaveTree(ctx, join(snPath, name), &subatree, cdtree, oldSubtree, func(n *restic.Node, is ItemStats) {
arch.CompleteItem(snItem, oldNode, n, is, time.Since(start))
})
if err != nil {
@ -728,7 +760,7 @@ func (arch *Archiver) stopWorkers() {
}
// Snapshot saves several targets and returns a snapshot.
func (arch *Archiver) Snapshot(ctx context.Context, targets []string, opts SnapshotOptions) (*restic.Snapshot, restic.ID, error) {
func (arch *Archiver) Snapshot(ctx context.Context, targets []string, changedFiles *[]string, opts SnapshotOptions) (*restic.Snapshot, restic.ID, error) {
cleanTargets, err := resolveRelativeTargets(arch.FS, targets)
if err != nil {
return nil, restic.ID{}, err
@ -739,6 +771,23 @@ func (arch *Archiver) Snapshot(ctx context.Context, targets []string, opts Snaps
return nil, restic.ID{}, err
}
var cdtree *Tree
if changedFiles != nil {
// Make sure paths in changedFiles are absolute
for i := range *changedFiles {
(*changedFiles)[i], err = arch.FS.Abs((*changedFiles)[i])
if err != nil {
return nil, restic.ID{}, err
}
}
cdtree, err = NewTree(arch.FS, *changedFiles)
if err != nil {
return nil, restic.ID{}, err
}
debug.Log("cdtree:\n%v", cdtree)
}
var rootTreeID restic.ID
wgUp, wgUpCtx := errgroup.WithContext(ctx)
@ -752,7 +801,7 @@ func (arch *Archiver) Snapshot(ctx context.Context, targets []string, opts Snaps
arch.runWorkers(wgCtx, wg)
debug.Log("starting snapshot")
fn, nodeCount, err := arch.SaveTree(wgCtx, "/", atree, arch.loadParentTree(wgCtx, opts.ParentSnapshot), func(n *restic.Node, is ItemStats) {
fn, nodeCount, err := arch.SaveTree(wgCtx, "/", atree, cdtree, arch.loadParentTree(wgCtx, opts.ParentSnapshot), func(n *restic.Node, is ItemStats) {
arch.CompleteItem("/", nil, nil, is, time.Since(start))
})
if err != nil {

View file

@ -31,7 +31,7 @@ func TestSnapshot(t testing.TB, repo restic.Repository, path string, parent *res
}
opts.ParentSnapshot = sn
}
sn, _, err := arch.Snapshot(context.TODO(), []string{path}, opts)
sn, _, err := arch.Snapshot(context.TODO(), []string{path}, nil, opts)
if err != nil {
t.Fatal(err)
}