Merge c774c53583 into de9a040d27

2025-03-30 00:00:14 +01:00 · 2025-03-07 18:04:22 +00:00 · 2025-03-07 18:04:22 +00:00 · 8c968c32ab
commit 8c968c32ab
parent de9a040d27 c774c53583
6 changed files with 189 additions and 9 deletions
--- a/changelog/unreleased/issue-3326
+++ b/changelog/unreleased/issue-3326
@ -0,0 +1,8 @@
+Enhancement: enable --read-data-subset and --read-data for specified snapshot(s)
+
+Snapshots can now be specified on the command line via the standard snapshot filter,
+(`--tag`, `--host`, `--path` or specifying snapshot IDs directly) and will be used
+for checking the packfiles used by these snapshots.
+
+https://github.com/restic/restic/issues/3326
+https://github.com/restic/restic/pull/5213
--- a/cmd/restic/cmd_check.go
+++ b/cmd/restic/cmd_check.go
@ -35,6 +35,9 @@ finds. It can also be used to read all data and therefore simulate a restore.
 By default, the "check" command will always load all data directly from the
 repository and not use a local cache.

+The "check" command can now check packfiles for specific snapshots. The snapshots
+are filtered via the standard SnapshotFilter.
+
 EXIT STATUS
 ===========

@ -73,6 +76,7 @@ type CheckOptions struct {
 	ReadDataSubset string
 	CheckUnused    bool
 	WithCache      bool
+	restic.SnapshotFilter
 }

 func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) {
@ -86,6 +90,7 @@ func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) {
 		panic(err)
 	}
 	f.BoolVar(&opts.WithCache, "with-cache", false, "use existing cache, only read uncached data from repository")
+	initMultiSnapshotFilter(f, &opts.SnapshotFilter, true)
 }

 func checkFlags(opts CheckOptions) error {
@ -222,9 +227,6 @@ func prepareCheckCache(opts CheckOptions, gopts *GlobalOptions, printer progress

 func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args []string, term *termstatus.Terminal) (checkSummary, error) {
 	summary := checkSummary{MessageType: "summary"}
-	if len(args) != 0 {
-		return summary, errors.Fatal("the check command expects no arguments, only options - please see `restic help check` for usage and flags")
-	}

 	var printer progress.Printer
 	if !gopts.JSON {
@ -245,6 +247,31 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args
 	}
 	defer unlock()

+	// check snapshot filter
+	selectedTrees := []restic.ID{}
+	if len(args) > 0 || !opts.SnapshotFilter.Empty() {
+		snapshotLister, err := restic.MemorizeList(ctx, repo, restic.SnapshotFile)
+		if err != nil {
+			return summary, err
+		}
+
+		err = (&opts.SnapshotFilter).FindAll(ctx, snapshotLister, repo, args, func(_ string, sn *restic.Snapshot, err error) error {
+			if err != nil {
+				return err
+			}
+
+			selectedTrees = append(selectedTrees, *sn.Tree)
+			return nil
+		})
+
+		if err != nil {
+			return summary, err
+		}
+		if len(selectedTrees) == 0 {
+			return summary, errors.New("snapshotfilter active but no snapshot selected")
+		}
+	}
+
 	chkr := checker.New(repo, opts.CheckUnused)
 	err = chkr.LoadSnapshots(ctx)
 	if err != nil {
@ -374,6 +401,15 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args
 		}
 	}

+	filterBySnapshot := false
+	if len(selectedTrees) > 0 {
+		err = chkr.CheckWithSnapshots(ctx, selectedTrees)
+		if err != nil {
+			return summary, err
+		}
+		filterBySnapshot = true
+	}
+
 	doReadData := func(packs map[restic.ID]int64) {
 		p := printer.NewCounter("packs")
 		p.SetMax(uint64(len(packs)))
@ -392,9 +428,14 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args
 		p.Done()
 	}

+	whichSelection := "data"
+	if filterBySnapshot {
+		whichSelection = "selected data"
+	}
+
 	switch {
 	case opts.ReadData:
-		printer.P("read all data\n")
+		printer.P("read all %s\n", whichSelection)
 		doReadData(selectPacksByBucket(chkr.GetPacks(), 1, 1))
 	case opts.ReadDataSubset != "":
 		var packs map[restic.ID]int64
@ -404,12 +445,13 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args
 			totalBuckets := dataSubset[1]
 			packs = selectPacksByBucket(chkr.GetPacks(), bucket, totalBuckets)
 			packCount := uint64(len(packs))
-			printer.P("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets)
+			printer.P("read group #%d of %d %s packs (out of total %d packs in %d groups)\n",
+				bucket, packCount, whichSelection, chkr.CountPacks(), totalBuckets)
 		} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
 			percentage, err := parsePercentage(opts.ReadDataSubset)
 			if err == nil {
 				packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage)
-				printer.P("read %.1f%% of data packs\n", percentage)
+				printer.P("read %.1f%% of %s packs\n", percentage, whichSelection)
 			}
 		} else {
 			repoSize := int64(0)
@ -425,7 +467,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args
 				subsetSize = repoSize
 			}
 			packs = selectRandomPacksByFileSize(chkr.GetPacks(), subsetSize, repoSize)
-			printer.P("read %d bytes of data packs\n", subsetSize)
+			printer.P("read %d bytes of %s packs\n", subsetSize, whichSelection)
 		}
 		if packs == nil {
 			return summary, errors.Fatal("internal error: failed to select packs to check")
--- a/cmd/restic/cmd_check_integration_test.go
+++ b/cmd/restic/cmd_check_integration_test.go
@ -5,6 +5,7 @@ import (
 	"context"
 	"testing"

+	"github.com/restic/restic/internal/restic"
 	rtest "github.com/restic/restic/internal/test"
 	"github.com/restic/restic/internal/ui/termstatus"
 )
@ -37,3 +38,37 @@ func testRunCheckOutput(gopts GlobalOptions, checkUnused bool) (string, error) {
 	})
 	return buf.String(), err
 }
+
+func testRunCheckOutputWithArgs(gopts GlobalOptions, opts CheckOptions, args []string) (string, error) {
+	buf := bytes.NewBuffer(nil)
+	gopts.stdout = buf
+	err := withTermStatus(gopts, func(ctx context.Context, term *termstatus.Terminal) error {
+		_, err := runCheck(context.TODO(), opts, gopts, args, term)
+		return err
+	})
+	return buf.String(), err
+}
+
+func TestRunCheckWrongArgs1(t *testing.T) {
+	env, cleanup := withTestEnvironment(t)
+	defer cleanup()
+	testSetupBackupData(t, env)
+
+	_, err := testRunCheckOutputWithArgs(env.gopts, CheckOptions{}, []string{"blubber"})
+	rtest.Assert(t, err != nil && err.Error() != "",
+		// blubber gets quoted - the error string looks messy
+		"expected specific error message - got %q", err)
+}
+
+func TestRunCheckWrongArgs2(t *testing.T) {
+	env, cleanup := withTestEnvironment(t)
+	defer cleanup()
+	testSetupBackupData(t, env)
+
+	opts := CheckOptions{
+		SnapshotFilter: restic.SnapshotFilter{Hosts: []string{""}},
+	}
+	_, err := testRunCheckOutputWithArgs(env.gopts, opts, []string{})
+	rtest.Assert(t, err != nil && err.Error() == "snapshotfilter active but no snapshot selected",
+		"expected specific error message - got %q", err)
+}
--- a/doc/077_troubleshooting.rst
+++ b/doc/077_troubleshooting.rst
@ -82,6 +82,12 @@ If ``check`` detects damaged pack files, it will show instructions on how to rep
 them using the ``repair pack`` command. Use that command instead of the "Repair the
 index" section in this guide.

+If you are interested to check the repository via snapshots, you can now
+use the standard snapshot filter method specifying ``--host``, ``--path``, ``--tag`` or
+alternatively naming snapshot ID(s) explicitely. The selected subset of packfiles
+will then be read to disk and checked for consistency
+when either ``--read-data`` or ``--read-data-subset`` is given.
+

 2. Backup the repository
 ************************
--- a/internal/checker/checker.go
+++ b/internal/checker/checker.go
@ -4,6 +4,7 @@ import (
 	"bufio"
 	"context"
 	"fmt"
+	"golang.org/x/sync/errgroup"
 	"runtime"
 	"sync"

@ -15,7 +16,6 @@ import (
 	"github.com/restic/restic/internal/repository/pack"
 	"github.com/restic/restic/internal/restic"
 	"github.com/restic/restic/internal/ui/progress"
-	"golang.org/x/sync/errgroup"
 )

 // Checker runs various checks on a repository. It is advisable to create an
@ -501,7 +501,6 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p
 	for pack := range packs {
 		packSet.Insert(pack)
 	}
-
 	// push packs to ch
 	for pbs := range c.repo.ListPacksFromIndex(ctx, packSet) {
 		size := packs[pbs.PackID]
@ -522,3 +521,33 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p
 		}
 	}
 }
+
+// CheckWithSnapshots will process snapshot IDs from 'selectedTrees' and
+// add to snapPacks so it contains only the selected packfiles.
+func (c *Checker) CheckWithSnapshots(ctx context.Context, selectedTrees []restic.ID) error {
+	if len(selectedTrees) == 0 {
+		return errors.New("no IDs given")
+	}
+
+	// gather used blobs from all trees
+	usedBlobs := restic.NewBlobSet()
+	err := restic.FindUsedBlobs(ctx, c.repo, selectedTrees, usedBlobs, nil)
+	if err != nil {
+		return err
+	}
+
+	// convert blobs to packfile IDs
+	snapPacks := map[restic.ID]int64{}
+	for blob := range usedBlobs {
+		for _, res := range c.repo.LookupBlob(blob.Type, blob.ID) {
+			snapPacks[res.PackID] = c.packs[res.PackID]
+		}
+	}
+
+	if len(snapPacks) > 0 {
+		c.packs = snapPacks
+	} else {
+		return errors.Fatal("no packfiles found for given snapshot trees")
+	}
+	return nil
+}
--- a/internal/checker/checker_test.go
+++ b/internal/checker/checker_test.go
@ -574,6 +574,66 @@ func TestCheckerBlobTypeConfusion(t *testing.T) {
 	}
 }

+// TestCheckRepoSnapshot: it is assumed here that restic.Snapshotfilter is
+// working correctly: the output of the filter is fed into the test manually
+func TestCheckRepoSnapshot(t *testing.T) {
+	repo, _, cleanup := repository.TestFromFixture(t, checkerTestData)
+	defer cleanup()
+
+	chkr := checker.New(repo, false)
+	_, errs := chkr.LoadIndex(context.TODO(), nil)
+	test.OKs(t, errs)
+
+	test.OKs(t, checkPacks(chkr))
+	test.OKs(t, checkStruct(chkr))
+
+	snID := restic.TestParseID("f7d83db709977178c9d1a09e4009355e534cde1a135b8186b8b118a3fc4fcd41")
+	sn1, err := restic.LoadSnapshot(context.TODO(), repo, snID)
+	test.OK(t, err)
+	selectedTrees := []restic.ID{*sn1.Tree}
+	test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees))
+	lenPacks := chkr.CountPacks()
+	test.Assert(t, lenPacks == uint64(1), "expected 1 packfile, got %v", lenPacks)
+
+	// index needs reloading every time
+	_, errs = chkr.LoadIndex(context.TODO(), nil)
+	test.Assert(t, len(errs) == 0, "expected no errors, got %v: %v", len(errs), errs)
+
+	snID = restic.TestParseID("c2b53c5e6a16db92fbb9aa08bd2794c58b379d8724d661ee30d20898bdfdff22")
+	sn2, err := restic.LoadSnapshot(context.TODO(), repo, snID)
+	test.OK(t, err)
+	selectedTrees = []restic.ID{*sn2.Tree}
+	test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees))
+	lenPacks = chkr.CountPacks()
+	test.Assert(t, lenPacks == 2, "expected 2 packfiles, got %v", lenPacks)
+
+	_, errs = chkr.LoadIndex(context.TODO(), nil)
+	test.Assert(t, len(errs) == 0, "expected no errors, got %v: %v", len(errs), errs)
+
+	snID = restic.TestParseID("a13c11e582b77a693dd75ab4e3a3ba96538a056594a4b9076e4cacebe6e06d43")
+	sn3, err := restic.LoadSnapshot(context.TODO(), repo, snID)
+	test.OK(t, err)
+	selectedTrees = []restic.ID{*sn3.Tree}
+	test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees))
+	lenPacks = chkr.CountPacks()
+	test.Assert(t, lenPacks == 2, "expected 2 packfiles, got %v", lenPacks)
+
+	_, errs = chkr.LoadIndex(context.TODO(), nil)
+	test.Assert(t, len(errs) == 0, "expected no errors, got %v: %v", len(errs), errs)
+
+	selectedTrees = []restic.ID{*sn1.Tree, *sn3.Tree}
+	test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees))
+	lenPacks = chkr.CountPacks()
+	test.Assert(t, lenPacks == 3, "expected 3 packfiles, got %v", lenPacks)
+
+	_, errs = chkr.LoadIndex(context.TODO(), nil)
+	test.Assert(t, len(errs) == 0, "expected no errors, got %v: %v", len(errs), errs)
+
+	selectedTrees = []restic.ID{}
+	err = chkr.CheckWithSnapshots(context.TODO(), selectedTrees)
+	test.Assert(t, err != nil && err.Error() == "no IDs given", "expected specific error, got %v", err)
+}
+
 func loadBenchRepository(t *testing.B) (*checker.Checker, restic.Repository, func()) {
 	repo, _, cleanup := repository.TestFromFixture(t, checkerTestData)