diff --git a/changelog/unreleased/issue-3326 b/changelog/unreleased/issue-3326 new file mode 100644 index 000000000..a661dba3b --- /dev/null +++ b/changelog/unreleased/issue-3326 @@ -0,0 +1,8 @@ +Enhancement: enable --read-data-subset and --read-data for specified snapshot(s) + +Snapshots can now be specified on the command line via the standard snapshot filter, +(`--tag`, `--host`, `--path` or specifying snapshot IDs directly) and will be used +for checking the packfiles used by these snapshots. + +https://github.com/restic/restic/issues/3326 +https://github.com/restic/restic/pull/5213 diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index f87303933..629e25ffe 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -35,6 +35,9 @@ finds. It can also be used to read all data and therefore simulate a restore. By default, the "check" command will always load all data directly from the repository and not use a local cache. +The "check" command can now check packfiles for specific snapshots. The snapshots +are filtered via the standard SnapshotFilter. + EXIT STATUS =========== @@ -73,6 +76,7 @@ type CheckOptions struct { ReadDataSubset string CheckUnused bool WithCache bool + restic.SnapshotFilter } func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) { @@ -86,6 +90,7 @@ func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) { panic(err) } f.BoolVar(&opts.WithCache, "with-cache", false, "use existing cache, only read uncached data from repository") + initMultiSnapshotFilter(f, &opts.SnapshotFilter, true) } func checkFlags(opts CheckOptions) error { @@ -222,9 +227,6 @@ func prepareCheckCache(opts CheckOptions, gopts *GlobalOptions, printer progress func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args []string, term *termstatus.Terminal) (checkSummary, error) { summary := checkSummary{MessageType: "summary"} - if len(args) != 0 { - return summary, errors.Fatal("the check command expects no arguments, only options - please see `restic help check` for usage and flags") - } var printer progress.Printer if !gopts.JSON { @@ -245,6 +247,31 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args } defer unlock() + // check snapshot filter + selectedTrees := []restic.ID{} + if len(args) > 0 || !opts.SnapshotFilter.Empty() { + snapshotLister, err := restic.MemorizeList(ctx, repo, restic.SnapshotFile) + if err != nil { + return summary, err + } + + err = (&opts.SnapshotFilter).FindAll(ctx, snapshotLister, repo, args, func(_ string, sn *restic.Snapshot, err error) error { + if err != nil { + return err + } + + selectedTrees = append(selectedTrees, *sn.Tree) + return nil + }) + + if err != nil { + return summary, err + } + if len(selectedTrees) == 0 { + return summary, errors.New("snapshotfilter active but no snapshot selected") + } + } + chkr := checker.New(repo, opts.CheckUnused) err = chkr.LoadSnapshots(ctx) if err != nil { @@ -374,6 +401,15 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args } } + filterBySnapshot := false + if len(selectedTrees) > 0 { + err = chkr.CheckWithSnapshots(ctx, selectedTrees) + if err != nil { + return summary, err + } + filterBySnapshot = true + } + doReadData := func(packs map[restic.ID]int64) { p := printer.NewCounter("packs") p.SetMax(uint64(len(packs))) @@ -392,9 +428,14 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args p.Done() } + whichSelection := "data" + if filterBySnapshot { + whichSelection = "selected data" + } + switch { case opts.ReadData: - printer.P("read all data\n") + printer.P("read all %s\n", whichSelection) doReadData(selectPacksByBucket(chkr.GetPacks(), 1, 1)) case opts.ReadDataSubset != "": var packs map[restic.ID]int64 @@ -404,12 +445,13 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args totalBuckets := dataSubset[1] packs = selectPacksByBucket(chkr.GetPacks(), bucket, totalBuckets) packCount := uint64(len(packs)) - printer.P("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets) + printer.P("read group #%d of %d %s packs (out of total %d packs in %d groups)\n", + bucket, packCount, whichSelection, chkr.CountPacks(), totalBuckets) } else if strings.HasSuffix(opts.ReadDataSubset, "%") { percentage, err := parsePercentage(opts.ReadDataSubset) if err == nil { packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage) - printer.P("read %.1f%% of data packs\n", percentage) + printer.P("read %.1f%% of %s packs\n", percentage, whichSelection) } } else { repoSize := int64(0) @@ -425,7 +467,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args subsetSize = repoSize } packs = selectRandomPacksByFileSize(chkr.GetPacks(), subsetSize, repoSize) - printer.P("read %d bytes of data packs\n", subsetSize) + printer.P("read %d bytes of %s packs\n", subsetSize, whichSelection) } if packs == nil { return summary, errors.Fatal("internal error: failed to select packs to check") diff --git a/cmd/restic/cmd_check_integration_test.go b/cmd/restic/cmd_check_integration_test.go index f5a3dc395..1f4104466 100644 --- a/cmd/restic/cmd_check_integration_test.go +++ b/cmd/restic/cmd_check_integration_test.go @@ -5,6 +5,7 @@ import ( "context" "testing" + "github.com/restic/restic/internal/restic" rtest "github.com/restic/restic/internal/test" "github.com/restic/restic/internal/ui/termstatus" ) @@ -37,3 +38,37 @@ func testRunCheckOutput(gopts GlobalOptions, checkUnused bool) (string, error) { }) return buf.String(), err } + +func testRunCheckOutputWithArgs(gopts GlobalOptions, opts CheckOptions, args []string) (string, error) { + buf := bytes.NewBuffer(nil) + gopts.stdout = buf + err := withTermStatus(gopts, func(ctx context.Context, term *termstatus.Terminal) error { + _, err := runCheck(context.TODO(), opts, gopts, args, term) + return err + }) + return buf.String(), err +} + +func TestRunCheckWrongArgs1(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + testSetupBackupData(t, env) + + _, err := testRunCheckOutputWithArgs(env.gopts, CheckOptions{}, []string{"blubber"}) + rtest.Assert(t, err != nil && err.Error() != "", + // blubber gets quoted - the error string looks messy + "expected specific error message - got %q", err) +} + +func TestRunCheckWrongArgs2(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + testSetupBackupData(t, env) + + opts := CheckOptions{ + SnapshotFilter: restic.SnapshotFilter{Hosts: []string{""}}, + } + _, err := testRunCheckOutputWithArgs(env.gopts, opts, []string{}) + rtest.Assert(t, err != nil && err.Error() == "snapshotfilter active but no snapshot selected", + "expected specific error message - got %q", err) +} diff --git a/doc/077_troubleshooting.rst b/doc/077_troubleshooting.rst index 36c9d63ec..7fb012dfa 100644 --- a/doc/077_troubleshooting.rst +++ b/doc/077_troubleshooting.rst @@ -82,6 +82,12 @@ If ``check`` detects damaged pack files, it will show instructions on how to rep them using the ``repair pack`` command. Use that command instead of the "Repair the index" section in this guide. +If you are interested to check the repository via snapshots, you can now +use the standard snapshot filter method specifying ``--host``, ``--path``, ``--tag`` or +alternatively naming snapshot ID(s) explicitely. The selected subset of packfiles +will then be read to disk and checked for consistency +when either ``--read-data`` or ``--read-data-subset`` is given. + 2. Backup the repository ************************ diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 12020891a..fdc1ff637 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -4,6 +4,7 @@ import ( "bufio" "context" "fmt" + "golang.org/x/sync/errgroup" "runtime" "sync" @@ -15,7 +16,6 @@ import ( "github.com/restic/restic/internal/repository/pack" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/ui/progress" - "golang.org/x/sync/errgroup" ) // Checker runs various checks on a repository. It is advisable to create an @@ -501,7 +501,6 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p for pack := range packs { packSet.Insert(pack) } - // push packs to ch for pbs := range c.repo.ListPacksFromIndex(ctx, packSet) { size := packs[pbs.PackID] @@ -522,3 +521,33 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p } } } + +// CheckWithSnapshots will process snapshot IDs from 'selectedTrees' and +// add to snapPacks so it contains only the selected packfiles. +func (c *Checker) CheckWithSnapshots(ctx context.Context, selectedTrees []restic.ID) error { + if len(selectedTrees) == 0 { + return errors.New("no IDs given") + } + + // gather used blobs from all trees + usedBlobs := restic.NewBlobSet() + err := restic.FindUsedBlobs(ctx, c.repo, selectedTrees, usedBlobs, nil) + if err != nil { + return err + } + + // convert blobs to packfile IDs + snapPacks := map[restic.ID]int64{} + for blob := range usedBlobs { + for _, res := range c.repo.LookupBlob(blob.Type, blob.ID) { + snapPacks[res.PackID] = c.packs[res.PackID] + } + } + + if len(snapPacks) > 0 { + c.packs = snapPacks + } else { + return errors.Fatal("no packfiles found for given snapshot trees") + } + return nil +} diff --git a/internal/checker/checker_test.go b/internal/checker/checker_test.go index 92bbb1da6..d7567bdea 100644 --- a/internal/checker/checker_test.go +++ b/internal/checker/checker_test.go @@ -574,6 +574,66 @@ func TestCheckerBlobTypeConfusion(t *testing.T) { } } +// TestCheckRepoSnapshot: it is assumed here that restic.Snapshotfilter is +// working correctly: the output of the filter is fed into the test manually +func TestCheckRepoSnapshot(t *testing.T) { + repo, _, cleanup := repository.TestFromFixture(t, checkerTestData) + defer cleanup() + + chkr := checker.New(repo, false) + _, errs := chkr.LoadIndex(context.TODO(), nil) + test.OKs(t, errs) + + test.OKs(t, checkPacks(chkr)) + test.OKs(t, checkStruct(chkr)) + + snID := restic.TestParseID("f7d83db709977178c9d1a09e4009355e534cde1a135b8186b8b118a3fc4fcd41") + sn1, err := restic.LoadSnapshot(context.TODO(), repo, snID) + test.OK(t, err) + selectedTrees := []restic.ID{*sn1.Tree} + test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees)) + lenPacks := chkr.CountPacks() + test.Assert(t, lenPacks == uint64(1), "expected 1 packfile, got %v", lenPacks) + + // index needs reloading every time + _, errs = chkr.LoadIndex(context.TODO(), nil) + test.Assert(t, len(errs) == 0, "expected no errors, got %v: %v", len(errs), errs) + + snID = restic.TestParseID("c2b53c5e6a16db92fbb9aa08bd2794c58b379d8724d661ee30d20898bdfdff22") + sn2, err := restic.LoadSnapshot(context.TODO(), repo, snID) + test.OK(t, err) + selectedTrees = []restic.ID{*sn2.Tree} + test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees)) + lenPacks = chkr.CountPacks() + test.Assert(t, lenPacks == 2, "expected 2 packfiles, got %v", lenPacks) + + _, errs = chkr.LoadIndex(context.TODO(), nil) + test.Assert(t, len(errs) == 0, "expected no errors, got %v: %v", len(errs), errs) + + snID = restic.TestParseID("a13c11e582b77a693dd75ab4e3a3ba96538a056594a4b9076e4cacebe6e06d43") + sn3, err := restic.LoadSnapshot(context.TODO(), repo, snID) + test.OK(t, err) + selectedTrees = []restic.ID{*sn3.Tree} + test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees)) + lenPacks = chkr.CountPacks() + test.Assert(t, lenPacks == 2, "expected 2 packfiles, got %v", lenPacks) + + _, errs = chkr.LoadIndex(context.TODO(), nil) + test.Assert(t, len(errs) == 0, "expected no errors, got %v: %v", len(errs), errs) + + selectedTrees = []restic.ID{*sn1.Tree, *sn3.Tree} + test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees)) + lenPacks = chkr.CountPacks() + test.Assert(t, lenPacks == 3, "expected 3 packfiles, got %v", lenPacks) + + _, errs = chkr.LoadIndex(context.TODO(), nil) + test.Assert(t, len(errs) == 0, "expected no errors, got %v: %v", len(errs), errs) + + selectedTrees = []restic.ID{} + err = chkr.CheckWithSnapshots(context.TODO(), selectedTrees) + test.Assert(t, err != nil && err.Error() == "no IDs given", "expected specific error, got %v", err) +} + func loadBenchRepository(t *testing.B) (*checker.Checker, restic.Repository, func()) { repo, _, cleanup := repository.TestFromFixture(t, checkerTestData)