From 6f77d4ddf82a8d0c8b3dcf70a2b191a8d683ee17 Mon Sep 17 00:00:00 2001 From: Winfried Plappert Date: Wed, 22 Jan 2025 17:07:56 +0000 Subject: [PATCH 01/10] check: enable --read-data-* for specified snapshots - rebase step 1 Add code to cmd/restic/cmd_check.go to detect snapshots Resolved conflict for cmd/restic/cmd_check.go - runCheck integrated newCheckCommand(...) --- changelog/unreleased/issue-3326 | 8 ++++++ cmd/restic/cmd_check.go | 20 +++++++++++-- internal/checker/checker.go | 51 +++++++++++++++++++++++++++++++-- 3 files changed, 74 insertions(+), 5 deletions(-) create mode 100644 changelog/unreleased/issue-3326 diff --git a/changelog/unreleased/issue-3326 b/changelog/unreleased/issue-3326 new file mode 100644 index 000000000..771c3e370 --- /dev/null +++ b/changelog/unreleased/issue-3326 @@ -0,0 +1,8 @@ +check: enable --read-data-subset and --read-data for specified snapshot(s) + +When snapshots are specified on the command line, the metadata for these +snapshots will be read and a set of packfiles will be created representing the data +parts of these snapshots. + +https://github.com/restic/restic/issues/3326 +https://github.com/restic/restic/pull/5213 diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index f87303933..e8a0ca73a 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -73,6 +73,7 @@ type CheckOptions struct { ReadDataSubset string CheckUnused bool WithCache bool + restic.SnapshotFilter } func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) { @@ -86,6 +87,7 @@ func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) { panic(err) } f.BoolVar(&opts.WithCache, "with-cache", false, "use existing cache, only read uncached data from repository") + initMultiSnapshotFilter(f, &opts.SnapshotFilter, true) } func checkFlags(opts CheckOptions) error { @@ -222,9 +224,6 @@ func prepareCheckCache(opts CheckOptions, gopts *GlobalOptions, printer progress func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args []string, term *termstatus.Terminal) (checkSummary, error) { summary := checkSummary{MessageType: "summary"} - if len(args) != 0 { - return summary, errors.Fatal("the check command expects no arguments, only options - please see `restic help check` for usage and flags") - } var printer progress.Printer if !gopts.JSON { @@ -258,6 +257,21 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args return summary, ctx.Err() } + if len(args) > 0 { + snapshotLister, err := restic.MemorizeList(ctx, repo, restic.SnapshotFile) + if err != nil { + return summary, err + } + + // run down the tree, take note of the data packfiles involved + for sn := range FindFilteredSnapshots(ctx, snapshotLister, repo, &opts.SnapshotFilter, args) { + err := chkr.FindDataPackfiles(ctx, repo, sn) + if err != nil { + return summary, err + } + } + } + errorsFound := false for _, hint := range hints { switch hint.(type) { diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 12020891a..5f9a774d8 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -15,6 +15,7 @@ import ( "github.com/restic/restic/internal/repository/pack" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/ui/progress" + "github.com/restic/restic/internal/walker" "golang.org/x/sync/errgroup" ) @@ -29,6 +30,7 @@ type Checker struct { sync.Mutex M restic.BlobSet } + packSet restic.IDSet trackUnused bool masterIndex *index.MasterIndex @@ -41,6 +43,7 @@ type Checker struct { func New(repo restic.Repository, trackUnused bool) *Checker { c := &Checker{ packs: make(map[restic.ID]int64), + packSet: restic.NewIDSet(), masterIndex: index.NewMasterIndex(), repo: repo, trackUnused: trackUnused, @@ -431,12 +434,24 @@ func (c *Checker) UnusedBlobs(ctx context.Context) (blobs restic.BlobHandles, er // CountPacks returns the number of packs in the repository. func (c *Checker) CountPacks() uint64 { - return uint64(len(c.packs)) + if len(c.packSet) == 0 { + return uint64(len(c.packs)) + } else { + return uint64(len(c.packSet)) + } } // GetPacks returns IDSet of packs in the repository func (c *Checker) GetPacks() map[restic.ID]int64 { - return c.packs + if len(c.packSet) == 0 { + return c.packs + } else { + result := map[restic.ID]int64{} + for packID := range c.packSet { + result[packID] = c.packs[packID] + } + return result + } } // ReadData loads all data from the repository and checks the integrity. @@ -522,3 +537,35 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p } } } + +// find data packfiles for checking repository based on snapshots +func (c *Checker) FindDataPackfiles(ctx context.Context, repo *repository.Repository, sn *restic.Snapshot) error { + err := walker.Walk(ctx, repo, *sn.Tree, walker.WalkVisitor{ProcessNode: func(parentTreeID restic.ID, _ string, node *restic.Node, err error) error { + if err != nil { + fmt.Printf("Unable to load tree %s\n ... which belongs to snapshot %s - reason %v\n", parentTreeID, sn.ID, err) + return walker.ErrSkipNode + } + if node == nil { + return nil + } + + if node.Type == restic.NodeTypeFile { + for _, content := range node.Content { + result := repo.LookupBlob(restic.DataBlob, content) + if len(result) == 0 { + panic("checker.FindDataPackfiles: datablob not mapped!") + } else if len(result) > 1 { + panic("checker.FindDataPackfiles: datablob found several times!") + } + c.packSet.Insert(result[0].PackID) + } + } + return nil + }}) + + if err != nil { + return errors.New(fmt.Sprintf("walker.Walk does not want to walk - reason %v\n", err)) + } + + return nil +} From 46184bd703b152083fe461310bcd8e9e7acfa453 Mon Sep 17 00:00:00 2001 From: Winfried Plappert Date: Sat, 25 Jan 2025 11:01:03 +0000 Subject: [PATCH 02/10] check: run check of packfiles filtered via snapshotfilter - rebase part 2 Added code for selecting multiple snapshots. Added message how many packfiles and their cumulative size were selected. In internal/checker/checker.go replaced the datablob / packfile selection from using walker.Walk to restic.StreamTrees - parallelizing the packfile selection. resolved conflict in cmd_check: allow check for snapshot filter --- cmd/restic/cmd_check.go | 38 ++++++++++++++++++++--- internal/checker/checker.go | 62 ++++++++++++++++++++++--------------- 2 files changed, 71 insertions(+), 29 deletions(-) diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index e8a0ca73a..4edac3f9c 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -388,6 +388,30 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args } } + filterBySnapshot := false + if len(args) > 0 || !opts.SnapshotFilter.Empty() { + snapshotLister, err := restic.MemorizeList(ctx, repo, restic.SnapshotFile) + if err != nil { + return err + } + + visitedTrees := restic.NewIDSet() + for sn := range FindFilteredSnapshots(ctx, snapshotLister, repo, &opts.SnapshotFilter, args) { + err := chkr.FindDataPackfiles(ctx, repo, sn, visitedTrees) + if err != nil { + return err + } + filterBySnapshot = true + } + + selectedPacksSize := int64(0) + for _, size := range chkr.GetPacks() { + selectedPacksSize += size + } + printer.P("snapshot checking: %d packfiles with size %s selected.\n", + chkr.CountPacks(), ui.FormatBytes(uint64(selectedPacksSize))) + } + doReadData := func(packs map[restic.ID]int64) { p := printer.NewCounter("packs") p.SetMax(uint64(len(packs))) @@ -406,9 +430,14 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args p.Done() } + whichSelection := "data" + if filterBySnapshot { + whichSelection = "selected data" + } + switch { case opts.ReadData: - printer.P("read all data\n") + printer.P("read all %s\n", whichSelection) doReadData(selectPacksByBucket(chkr.GetPacks(), 1, 1)) case opts.ReadDataSubset != "": var packs map[restic.ID]int64 @@ -418,12 +447,13 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args totalBuckets := dataSubset[1] packs = selectPacksByBucket(chkr.GetPacks(), bucket, totalBuckets) packCount := uint64(len(packs)) - printer.P("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets) + printer.P("read group #%d of %d %s packs (out of total %d packs in %d groups)\n", + bucket, packCount, whichSelection, chkr.CountPacks(), totalBuckets) } else if strings.HasSuffix(opts.ReadDataSubset, "%") { percentage, err := parsePercentage(opts.ReadDataSubset) if err == nil { packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage) - printer.P("read %.1f%% of data packs\n", percentage) + printer.P("read %.1f%% of %s packs\n", percentage, whichSelection) } } else { repoSize := int64(0) @@ -439,7 +469,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args subsetSize = repoSize } packs = selectRandomPacksByFileSize(chkr.GetPacks(), subsetSize, repoSize) - printer.P("read %d bytes of data packs\n", subsetSize) + printer.P("read %d bytes of %s packs\n", subsetSize, whichSelection) } if packs == nil { return summary, errors.Fatal("internal error: failed to select packs to check") diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 5f9a774d8..013563790 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -4,6 +4,7 @@ import ( "bufio" "context" "fmt" + "golang.org/x/sync/errgroup" "runtime" "sync" @@ -15,8 +16,6 @@ import ( "github.com/restic/restic/internal/repository/pack" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/ui/progress" - "github.com/restic/restic/internal/walker" - "golang.org/x/sync/errgroup" ) // Checker runs various checks on a repository. It is advisable to create an @@ -538,33 +537,46 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p } } -// find data packfiles for checking repository based on snapshots -func (c *Checker) FindDataPackfiles(ctx context.Context, repo *repository.Repository, sn *restic.Snapshot) error { - err := walker.Walk(ctx, repo, *sn.Tree, walker.WalkVisitor{ProcessNode: func(parentTreeID restic.ID, _ string, node *restic.Node, err error) error { - if err != nil { - fmt.Printf("Unable to load tree %s\n ... which belongs to snapshot %s - reason %v\n", parentTreeID, sn.ID, err) - return walker.ErrSkipNode - } - if node == nil { - return nil - } +// Find data packfiles for repository checking based on snapshots. +// Use restic.StreamTrees to gather all data blobs and convert them to their +// containing packfile +func (c *Checker) FindDataPackfiles(ctx context.Context, repo *repository.Repository, sn *restic.Snapshot, + visitedTrees restic.IDSet) error { - if node.Type == restic.NodeTypeFile { - for _, content := range node.Content { - result := repo.LookupBlob(restic.DataBlob, content) - if len(result) == 0 { - panic("checker.FindDataPackfiles: datablob not mapped!") - } else if len(result) > 1 { - panic("checker.FindDataPackfiles: datablob found several times!") - } - c.packSet.Insert(result[0].PackID) + var packfileMutex sync.Mutex + wg, wgCtx := errgroup.WithContext(ctx) + treeStream := restic.StreamTrees(wgCtx, wg, repo, restic.IDs{*sn.Tree}, func(tree restic.ID) bool { + visited := visitedTrees.Has(tree) + visitedTrees.Insert(tree) + return visited + }, nil) + + wg.Go(func() error { + for tree := range treeStream { + if tree.Error != nil { + return fmt.Errorf("LoadTree(%v) returned error %v", tree.ID.Str(), tree.Error) } - } - return nil - }}) + packfileMutex.Lock() + for _, node := range tree.Nodes { + // Recursion into directories is handled by StreamTrees + for _, content := range node.Content { + result := repo.LookupBlob(restic.DataBlob, content) + if len(result) == 0 { + return fmt.Errorf("checker.LookupBlob: datablob %s not mapped!", content.Str()) + } + c.packSet.Insert(result[0].PackID) + } + } + packfileMutex.Unlock() + } + + return nil + }) + + err := wg.Wait() if err != nil { - return errors.New(fmt.Sprintf("walker.Walk does not want to walk - reason %v\n", err)) + return err } return nil From c141ed1a17e6dafb5e63cdfcde583cba6d18d5b4 Mon Sep 17 00:00:00 2001 From: Winfried Plappert Date: Tue, 4 Feb 2025 18:45:03 +0000 Subject: [PATCH 03/10] restic check with snapshot list reworked the code using snapshotFilter.FindAll to find all snapshots and restic.FindUsedBlobs to retrieve all used blobs. range repo.LookupBlob (as before) to convert the blobs to their containing packfiles and c.repo.List(ctx, restic.PackFile, ...) to retrieve the sizes of those packfiles. Additional documentation and tests are still outstanding. --- cmd/restic/cmd_check.go | 18 +------ internal/checker/checker.go | 95 +++++++++++++++++-------------------- 2 files changed, 45 insertions(+), 68 deletions(-) diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index 4edac3f9c..b1705348f 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -390,26 +390,10 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args filterBySnapshot := false if len(args) > 0 || !opts.SnapshotFilter.Empty() { - snapshotLister, err := restic.MemorizeList(ctx, repo, restic.SnapshotFile) + filterBySnapshot, err = chkr.CheckWithSnapshots(ctx, repo, args, &opts.SnapshotFilter) if err != nil { return err } - - visitedTrees := restic.NewIDSet() - for sn := range FindFilteredSnapshots(ctx, snapshotLister, repo, &opts.SnapshotFilter, args) { - err := chkr.FindDataPackfiles(ctx, repo, sn, visitedTrees) - if err != nil { - return err - } - filterBySnapshot = true - } - - selectedPacksSize := int64(0) - for _, size := range chkr.GetPacks() { - selectedPacksSize += size - } - printer.P("snapshot checking: %d packfiles with size %s selected.\n", - chkr.CountPacks(), ui.FormatBytes(uint64(selectedPacksSize))) } doReadData := func(packs map[restic.ID]int64) { diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 013563790..2ac2b3615 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -29,7 +29,6 @@ type Checker struct { sync.Mutex M restic.BlobSet } - packSet restic.IDSet trackUnused bool masterIndex *index.MasterIndex @@ -42,7 +41,6 @@ type Checker struct { func New(repo restic.Repository, trackUnused bool) *Checker { c := &Checker{ packs: make(map[restic.ID]int64), - packSet: restic.NewIDSet(), masterIndex: index.NewMasterIndex(), repo: repo, trackUnused: trackUnused, @@ -433,24 +431,12 @@ func (c *Checker) UnusedBlobs(ctx context.Context) (blobs restic.BlobHandles, er // CountPacks returns the number of packs in the repository. func (c *Checker) CountPacks() uint64 { - if len(c.packSet) == 0 { - return uint64(len(c.packs)) - } else { - return uint64(len(c.packSet)) - } + return uint64(len(c.packs)) } // GetPacks returns IDSet of packs in the repository func (c *Checker) GetPacks() map[restic.ID]int64 { - if len(c.packSet) == 0 { - return c.packs - } else { - result := map[restic.ID]int64{} - for packID := range c.packSet { - result[packID] = c.packs[packID] - } - return result - } + return c.packs } // ReadData loads all data from the repository and checks the integrity. @@ -515,7 +501,6 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p for pack := range packs { packSet.Insert(pack) } - // push packs to ch for pbs := range c.repo.ListPacksFromIndex(ctx, packSet) { size := packs[pbs.PackID] @@ -537,47 +522,55 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p } } -// Find data packfiles for repository checking based on snapshots. -// Use restic.StreamTrees to gather all data blobs and convert them to their -// containing packfile -func (c *Checker) FindDataPackfiles(ctx context.Context, repo *repository.Repository, sn *restic.Snapshot, - visitedTrees restic.IDSet) error { +// process snapshot IDs from command line +func (c *Checker) CheckWithSnapshots(ctx context.Context, repo *repository.Repository, args []string, snapshotFilter *restic.SnapshotFilter) (bool, error) { - var packfileMutex sync.Mutex - wg, wgCtx := errgroup.WithContext(ctx) - treeStream := restic.StreamTrees(wgCtx, wg, repo, restic.IDs{*sn.Tree}, func(tree restic.ID) bool { - visited := visitedTrees.Has(tree) - visitedTrees.Insert(tree) - return visited - }, nil) - - wg.Go(func() error { - for tree := range treeStream { - if tree.Error != nil { - return fmt.Errorf("LoadTree(%v) returned error %v", tree.ID.Str(), tree.Error) - } - - packfileMutex.Lock() - for _, node := range tree.Nodes { - // Recursion into directories is handled by StreamTrees - for _, content := range node.Content { - result := repo.LookupBlob(restic.DataBlob, content) - if len(result) == 0 { - return fmt.Errorf("checker.LookupBlob: datablob %s not mapped!", content.Str()) - } - c.packSet.Insert(result[0].PackID) - } - } - packfileMutex.Unlock() + selectedTrees := []restic.ID{} + err := snapshotFilter.FindAll(ctx, c.snapshots, repo, args, func(id string, sn *restic.Snapshot, err error) error { + if err != nil { + return err + } else if ctx.Err() != nil { + return ctx.Err() } + selectedTrees = append(selectedTrees, *sn.Tree) return nil }) - err := wg.Wait() if err != nil { - return err + return false, err } - return nil + // gather used blobs from all trees + usedBlobs := restic.NewBlobSet() + err = restic.FindUsedBlobs(ctx, repo, selectedTrees, usedBlobs, nil) + if err != nil { + return false, err + } + + if len(selectedTrees) == 0 { + return false, nil + } + + // convert blobs to packfile IDs + c.packs = map[restic.ID]int64{} + for blob := range usedBlobs { + for _, res := range repo.LookupBlob(blob.Type, blob.ID) { + c.packs[res.PackID] = 0 + } + } + + // gather size for selected packfiles + err = c.repo.List(ctx, restic.PackFile, func(id restic.ID, size int64) error { + if _, ok := c.packs[id]; ok { + c.packs[id] = size + } + return nil + }) + + if err != nil { + return false, err + } + + return len(selectedTrees) > 0, nil } From 5e1114731176a31772ff96a3b2df315cce343204 Mon Sep 17 00:00:00 2001 From: Winfried Plappert Date: Tue, 4 Feb 2025 21:28:53 +0000 Subject: [PATCH 04/10] restic check with snapshots fixed lint errors --- internal/checker/checker.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 2ac2b3615..9a07f6e4a 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -522,11 +522,12 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p } } -// process snapshot IDs from command line +// CheckWithSnapshots will process snapshot IDs from command line and +// madify c.packs so it contains only the selected packfiles via snapshotFilter func (c *Checker) CheckWithSnapshots(ctx context.Context, repo *repository.Repository, args []string, snapshotFilter *restic.SnapshotFilter) (bool, error) { selectedTrees := []restic.ID{} - err := snapshotFilter.FindAll(ctx, c.snapshots, repo, args, func(id string, sn *restic.Snapshot, err error) error { + err := snapshotFilter.FindAll(ctx, c.snapshots, repo, args, func(_ string, sn *restic.Snapshot, err error) error { if err != nil { return err } else if ctx.Err() != nil { From 3b0552933483bc0172fb42d8170755fd6ef03b30 Mon Sep 17 00:00:00 2001 From: Winfried Plappert Date: Wed, 5 Feb 2025 17:35:25 +0000 Subject: [PATCH 05/10] check - rebase part 3 - add documentation and file issue-3326 add documentation for checking via snapshot filter. cmd_check: change 1st line of issue-3326 to recognized format. check with snapshot filter: changed issue description --- changelog/unreleased/issue-3326 | 8 ++++---- doc/077_troubleshooting.rst | 6 ++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/changelog/unreleased/issue-3326 b/changelog/unreleased/issue-3326 index 771c3e370..a661dba3b 100644 --- a/changelog/unreleased/issue-3326 +++ b/changelog/unreleased/issue-3326 @@ -1,8 +1,8 @@ -check: enable --read-data-subset and --read-data for specified snapshot(s) +Enhancement: enable --read-data-subset and --read-data for specified snapshot(s) -When snapshots are specified on the command line, the metadata for these -snapshots will be read and a set of packfiles will be created representing the data -parts of these snapshots. +Snapshots can now be specified on the command line via the standard snapshot filter, +(`--tag`, `--host`, `--path` or specifying snapshot IDs directly) and will be used +for checking the packfiles used by these snapshots. https://github.com/restic/restic/issues/3326 https://github.com/restic/restic/pull/5213 diff --git a/doc/077_troubleshooting.rst b/doc/077_troubleshooting.rst index 36c9d63ec..7fb012dfa 100644 --- a/doc/077_troubleshooting.rst +++ b/doc/077_troubleshooting.rst @@ -82,6 +82,12 @@ If ``check`` detects damaged pack files, it will show instructions on how to rep them using the ``repair pack`` command. Use that command instead of the "Repair the index" section in this guide. +If you are interested to check the repository via snapshots, you can now +use the standard snapshot filter method specifying ``--host``, ``--path``, ``--tag`` or +alternatively naming snapshot ID(s) explicitely. The selected subset of packfiles +will then be read to disk and checked for consistency +when either ``--read-data`` or ``--read-data-subset`` is given. + 2. Backup the repository ************************ From c0e5a74774f66155e78b8f19e7e709625e2b1662 Mon Sep 17 00:00:00 2001 From: Winfried Plappert Date: Sat, 15 Feb 2025 07:20:32 +0000 Subject: [PATCH 06/10] check with snapshot filter: check early for filter errors - rebase part 4 Hand down filtered tree IDs to CheckWithSnapshots which builds 'usedBlobs'. repo.LookupBlob is used to derive packfiles from 'usedBlobs' and constructs 'snapPacks' and overwrites c.packs. --- cmd/restic/cmd_check.go | 27 ++++++++++++++++++-- internal/checker/checker.go | 49 +++++++------------------------------ 2 files changed, 34 insertions(+), 42 deletions(-) diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index b1705348f..1579693c1 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -244,6 +244,28 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args } defer unlock() + // check snapshot filter + selectedTrees := []restic.ID{} + if len(args) > 0 || !opts.SnapshotFilter.Empty() { + snapshotLister, err := restic.MemorizeList(ctx, repo, restic.SnapshotFile) + if err != nil { + return err + } + + err = (&opts.SnapshotFilter).FindAll(ctx, snapshotLister, repo, args, func(_ string, sn *restic.Snapshot, err error) error { + if err != nil { + return err + } + + selectedTrees = append(selectedTrees, *sn.Tree) + return nil + }) + + if err != nil { + return err + } + } + chkr := checker.New(repo, opts.CheckUnused) err = chkr.LoadSnapshots(ctx) if err != nil { @@ -389,11 +411,12 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args } filterBySnapshot := false - if len(args) > 0 || !opts.SnapshotFilter.Empty() { - filterBySnapshot, err = chkr.CheckWithSnapshots(ctx, repo, args, &opts.SnapshotFilter) + if len(selectedTrees) > 0 { + err = chkr.CheckWithSnapshots(ctx, selectedTrees) if err != nil { return err } + filterBySnapshot = true } doReadData := func(packs map[restic.ID]int64) { diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 9a07f6e4a..32ff2488b 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -523,55 +523,24 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p } // CheckWithSnapshots will process snapshot IDs from command line and -// madify c.packs so it contains only the selected packfiles via snapshotFilter -func (c *Checker) CheckWithSnapshots(ctx context.Context, repo *repository.Repository, args []string, snapshotFilter *restic.SnapshotFilter) (bool, error) { - - selectedTrees := []restic.ID{} - err := snapshotFilter.FindAll(ctx, c.snapshots, repo, args, func(_ string, sn *restic.Snapshot, err error) error { - if err != nil { - return err - } else if ctx.Err() != nil { - return ctx.Err() - } - - selectedTrees = append(selectedTrees, *sn.Tree) - return nil - }) - - if err != nil { - return false, err - } +// add to snapPacks so it contains only the selected packfiles via snapshotFilter +func (c *Checker) CheckWithSnapshots(ctx context.Context, selectedTrees []restic.ID) error { // gather used blobs from all trees usedBlobs := restic.NewBlobSet() - err = restic.FindUsedBlobs(ctx, repo, selectedTrees, usedBlobs, nil) + err := restic.FindUsedBlobs(ctx, c.repo, selectedTrees, usedBlobs, nil) if err != nil { - return false, err - } - - if len(selectedTrees) == 0 { - return false, nil + return err } // convert blobs to packfile IDs - c.packs = map[restic.ID]int64{} + snapPacks := map[restic.ID]int64{} for blob := range usedBlobs { - for _, res := range repo.LookupBlob(blob.Type, blob.ID) { - c.packs[res.PackID] = 0 + for _, res := range c.repo.LookupBlob(blob.Type, blob.ID) { + snapPacks[res.PackID] = c.packs[res.PackID] } } - // gather size for selected packfiles - err = c.repo.List(ctx, restic.PackFile, func(id restic.ID, size int64) error { - if _, ok := c.packs[id]; ok { - c.packs[id] = size - } - return nil - }) - - if err != nil { - return false, err - } - - return len(selectedTrees) > 0, nil + c.packs = snapPacks + return nil } From 7fe83f0db0bbb78867729a52226c8a66bd6d672e Mon Sep 17 00:00:00 2001 From: Winfried Plappert Date: Sat, 15 Feb 2025 11:47:46 +0000 Subject: [PATCH 07/10] check: cmd_check, checker, checker_test: added test - rebase part 5: add checker_test cmd_check, checker: added error return checker_test: testing CheckWithSnapshots checker_test: fixed lint error for empty tree list --- cmd/restic/cmd_check.go | 3 ++ internal/checker/checker.go | 10 ++-- internal/checker/checker_test.go | 90 ++++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 3 deletions(-) diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index 1579693c1..5018a05a2 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -264,6 +264,9 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args if err != nil { return err } + if len(selectedTrees) == 0 { + return errors.Fatal("snapshotfilter active but no snapshot selected.") + } } chkr := checker.New(repo, opts.CheckUnused) diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 32ff2488b..daf5ac1c8 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -522,8 +522,8 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p } } -// CheckWithSnapshots will process snapshot IDs from command line and -// add to snapPacks so it contains only the selected packfiles via snapshotFilter +// CheckWithSnapshots will process snapshot IDs from 'selectedTrees' and +// add to snapPacks so it contains only the selected packfiles. func (c *Checker) CheckWithSnapshots(ctx context.Context, selectedTrees []restic.ID) error { // gather used blobs from all trees @@ -541,6 +541,10 @@ func (c *Checker) CheckWithSnapshots(ctx context.Context, selectedTrees []restic } } - c.packs = snapPacks + if len(snapPacks) > 0 { + c.packs = snapPacks + } else { + return errors.Fatal("no packfiles found for given snapshot trees") + } return nil } diff --git a/internal/checker/checker_test.go b/internal/checker/checker_test.go index 92bbb1da6..213011f5f 100644 --- a/internal/checker/checker_test.go +++ b/internal/checker/checker_test.go @@ -574,6 +574,96 @@ func TestCheckerBlobTypeConfusion(t *testing.T) { } } +// TestCheckRepoSnapshot: it is assumed here that restic.Snapshotfilter is +// working correctly: the output of the filter is fed into the test manually +func TestCheckRepoSnapshot(t *testing.T) { + repo, _, cleanup := repository.TestFromFixture(t, checkerTestData) + defer cleanup() + + chkr := checker.New(repo, false) + _, errs := chkr.LoadIndex(context.TODO(), nil) + if len(errs) > 0 { + t.Fatalf("expected no errors, got %v: %v", len(errs), errs) + } + + test.OKs(t, checkPacks(chkr)) + test.OKs(t, checkStruct(chkr)) + + snID := restic.TestParseID("f7d83db709977178c9d1a09e4009355e534cde1a135b8186b8b118a3fc4fcd41") + sn1, err := restic.LoadSnapshot(context.TODO(), repo, snID) + if err != nil { + t.Fatal(err) + } + selectedTrees := []restic.ID{*sn1.Tree} + test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees)) + + // size of packs (1) + lenPacks := chkr.CountPacks() + if lenPacks != 1 { + t.Fatalf("expected 1 packfile, got %v", lenPacks) + } + + _, errs = chkr.LoadIndex(context.TODO(), nil) + if len(errs) > 0 { + t.Fatalf("expected no errors, got %v: %v", len(errs), errs) + } + snID = restic.TestParseID("c2b53c5e6a16db92fbb9aa08bd2794c58b379d8724d661ee30d20898bdfdff22") + sn2, err := restic.LoadSnapshot(context.TODO(), repo, snID) + if err != nil { + t.Fatal(err) + } + selectedTrees = []restic.ID{*sn2.Tree} + test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees)) + + // size of packs (2) + lenPacks = chkr.CountPacks() + if lenPacks != 2 { + t.Fatalf("expected 2 packfiles, got %v", lenPacks) + } + + _, errs = chkr.LoadIndex(context.TODO(), nil) + if len(errs) > 0 { + t.Fatalf("expected no errors, got %v: %v", len(errs), errs) + } + snID = restic.TestParseID("a13c11e582b77a693dd75ab4e3a3ba96538a056594a4b9076e4cacebe6e06d43") + sn3, err := restic.LoadSnapshot(context.TODO(), repo, snID) + if err != nil { + t.Fatal(err) + } + selectedTrees = []restic.ID{*sn3.Tree} + test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees)) + + // size of packs (3) + lenPacks = chkr.CountPacks() + if lenPacks != 2 { + t.Fatalf("expected 2 packfiles, got %v", lenPacks) + } + + // size of packs (4) + _, errs = chkr.LoadIndex(context.TODO(), nil) + if len(errs) > 0 { + t.Fatalf("expected no errors, got %v: %v", len(errs), errs) + } + selectedTrees = []restic.ID{*sn1.Tree, *sn3.Tree} + test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees)) + lenPacks = chkr.CountPacks() + if lenPacks != 3 { + t.Fatalf("expected 3 packfiles, got %v", lenPacks) + } + + // size of packs (5): unmodified - filter empty + _, errs = chkr.LoadIndex(context.TODO(), nil) + if len(errs) > 0 { + t.Fatalf("expected no errors, got %v: %v", len(errs), errs) + } + + selectedTrees = []restic.ID{} + err = chkr.CheckWithSnapshots(context.TODO(), selectedTrees) + if err == nil { + t.Fatalf("expected an error, got no error") + } +} + func loadBenchRepository(t *testing.B) (*checker.Checker, restic.Repository, func()) { repo, _, cleanup := repository.TestFromFixture(t, checkerTestData) From b5c371892babc8306ecd3a0837d7f021d3f5f6d5 Mon Sep 17 00:00:00 2001 From: Winfried Plappert Date: Sun, 16 Feb 2025 08:38:52 +0000 Subject: [PATCH 08/10] check: harmonize my branch with master - integrate checkSummary based on the change in runCheck return values the following functions have been afftected cmd/restic/cmd_check_integration_test.go cmd/restic/cmd_migrate.go cmd/restic/cmd_prune_integration_test.go cmd/restic/integration_test.go check - corrected rebase error by not removing obsolete code. the check for the filter if len(args) > 0 { ... } is obsolete and has been replaced by if len(args) > 0 || !opts.SnapshotFilter.Empty() { ... } --- cmd/restic/cmd_check.go | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index 5018a05a2..b5d971f06 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -249,7 +249,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args if len(args) > 0 || !opts.SnapshotFilter.Empty() { snapshotLister, err := restic.MemorizeList(ctx, repo, restic.SnapshotFile) if err != nil { - return err + return summary, err } err = (&opts.SnapshotFilter).FindAll(ctx, snapshotLister, repo, args, func(_ string, sn *restic.Snapshot, err error) error { @@ -262,10 +262,10 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args }) if err != nil { - return err + return summary, err } if len(selectedTrees) == 0 { - return errors.Fatal("snapshotfilter active but no snapshot selected.") + return summary, errors.Fatal("snapshotfilter active but no snapshot selected.") } } @@ -282,21 +282,6 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args return summary, ctx.Err() } - if len(args) > 0 { - snapshotLister, err := restic.MemorizeList(ctx, repo, restic.SnapshotFile) - if err != nil { - return summary, err - } - - // run down the tree, take note of the data packfiles involved - for sn := range FindFilteredSnapshots(ctx, snapshotLister, repo, &opts.SnapshotFilter, args) { - err := chkr.FindDataPackfiles(ctx, repo, sn) - if err != nil { - return summary, err - } - } - } - errorsFound := false for _, hint := range hints { switch hint.(type) { @@ -417,7 +402,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args if len(selectedTrees) > 0 { err = chkr.CheckWithSnapshots(ctx, selectedTrees) if err != nil { - return err + return summary, err } filterBySnapshot = true } From 2707cb416e6b853f0e974ed9f2dfe096844afaf1 Mon Sep 17 00:00:00 2001 From: Winfried Plappert Date: Fri, 7 Mar 2025 17:57:33 +0000 Subject: [PATCH 09/10] restic check - add integration tests cmd_check.go - minor changes: changes error meesage to New instead of Fatal added a short description in the help text. cmd_check_integration_test.go: added two tests which should both fail: invalid input --- cmd/restic/cmd_check.go | 5 +++- cmd/restic/cmd_check_integration_test.go | 35 ++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index b5d971f06..629e25ffe 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -35,6 +35,9 @@ finds. It can also be used to read all data and therefore simulate a restore. By default, the "check" command will always load all data directly from the repository and not use a local cache. +The "check" command can now check packfiles for specific snapshots. The snapshots +are filtered via the standard SnapshotFilter. + EXIT STATUS =========== @@ -265,7 +268,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args return summary, err } if len(selectedTrees) == 0 { - return summary, errors.Fatal("snapshotfilter active but no snapshot selected.") + return summary, errors.New("snapshotfilter active but no snapshot selected") } } diff --git a/cmd/restic/cmd_check_integration_test.go b/cmd/restic/cmd_check_integration_test.go index f5a3dc395..1f4104466 100644 --- a/cmd/restic/cmd_check_integration_test.go +++ b/cmd/restic/cmd_check_integration_test.go @@ -5,6 +5,7 @@ import ( "context" "testing" + "github.com/restic/restic/internal/restic" rtest "github.com/restic/restic/internal/test" "github.com/restic/restic/internal/ui/termstatus" ) @@ -37,3 +38,37 @@ func testRunCheckOutput(gopts GlobalOptions, checkUnused bool) (string, error) { }) return buf.String(), err } + +func testRunCheckOutputWithArgs(gopts GlobalOptions, opts CheckOptions, args []string) (string, error) { + buf := bytes.NewBuffer(nil) + gopts.stdout = buf + err := withTermStatus(gopts, func(ctx context.Context, term *termstatus.Terminal) error { + _, err := runCheck(context.TODO(), opts, gopts, args, term) + return err + }) + return buf.String(), err +} + +func TestRunCheckWrongArgs1(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + testSetupBackupData(t, env) + + _, err := testRunCheckOutputWithArgs(env.gopts, CheckOptions{}, []string{"blubber"}) + rtest.Assert(t, err != nil && err.Error() != "", + // blubber gets quoted - the error string looks messy + "expected specific error message - got %q", err) +} + +func TestRunCheckWrongArgs2(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + testSetupBackupData(t, env) + + opts := CheckOptions{ + SnapshotFilter: restic.SnapshotFilter{Hosts: []string{""}}, + } + _, err := testRunCheckOutputWithArgs(env.gopts, opts, []string{}) + rtest.Assert(t, err != nil && err.Error() == "snapshotfilter active but no snapshot selected", + "expected specific error message - got %q", err) +} From c774c53583bfe0dddfb533fc954de24d1e8e52cf Mon Sep 17 00:00:00 2001 From: Winfried Plappert Date: Fri, 7 Mar 2025 18:01:33 +0000 Subject: [PATCH 10/10] internal/checker: added tests checker.go: added error message if 'selectedTrees' is empty checker_test.go: modified test checking using test.OK() and test.Assert() to make the checking more compact. --- internal/checker/checker.go | 3 ++ internal/checker/checker_test.go | 64 +++++++++----------------------- 2 files changed, 20 insertions(+), 47 deletions(-) diff --git a/internal/checker/checker.go b/internal/checker/checker.go index daf5ac1c8..fdc1ff637 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -525,6 +525,9 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p // CheckWithSnapshots will process snapshot IDs from 'selectedTrees' and // add to snapPacks so it contains only the selected packfiles. func (c *Checker) CheckWithSnapshots(ctx context.Context, selectedTrees []restic.ID) error { + if len(selectedTrees) == 0 { + return errors.New("no IDs given") + } // gather used blobs from all trees usedBlobs := restic.NewBlobSet() diff --git a/internal/checker/checker_test.go b/internal/checker/checker_test.go index 213011f5f..d7567bdea 100644 --- a/internal/checker/checker_test.go +++ b/internal/checker/checker_test.go @@ -582,86 +582,56 @@ func TestCheckRepoSnapshot(t *testing.T) { chkr := checker.New(repo, false) _, errs := chkr.LoadIndex(context.TODO(), nil) - if len(errs) > 0 { - t.Fatalf("expected no errors, got %v: %v", len(errs), errs) - } + test.OKs(t, errs) test.OKs(t, checkPacks(chkr)) test.OKs(t, checkStruct(chkr)) snID := restic.TestParseID("f7d83db709977178c9d1a09e4009355e534cde1a135b8186b8b118a3fc4fcd41") sn1, err := restic.LoadSnapshot(context.TODO(), repo, snID) - if err != nil { - t.Fatal(err) - } + test.OK(t, err) selectedTrees := []restic.ID{*sn1.Tree} test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees)) - - // size of packs (1) lenPacks := chkr.CountPacks() - if lenPacks != 1 { - t.Fatalf("expected 1 packfile, got %v", lenPacks) - } + test.Assert(t, lenPacks == uint64(1), "expected 1 packfile, got %v", lenPacks) + // index needs reloading every time _, errs = chkr.LoadIndex(context.TODO(), nil) - if len(errs) > 0 { - t.Fatalf("expected no errors, got %v: %v", len(errs), errs) - } + test.Assert(t, len(errs) == 0, "expected no errors, got %v: %v", len(errs), errs) + snID = restic.TestParseID("c2b53c5e6a16db92fbb9aa08bd2794c58b379d8724d661ee30d20898bdfdff22") sn2, err := restic.LoadSnapshot(context.TODO(), repo, snID) - if err != nil { - t.Fatal(err) - } + test.OK(t, err) selectedTrees = []restic.ID{*sn2.Tree} test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees)) - - // size of packs (2) lenPacks = chkr.CountPacks() - if lenPacks != 2 { - t.Fatalf("expected 2 packfiles, got %v", lenPacks) - } + test.Assert(t, lenPacks == 2, "expected 2 packfiles, got %v", lenPacks) _, errs = chkr.LoadIndex(context.TODO(), nil) - if len(errs) > 0 { - t.Fatalf("expected no errors, got %v: %v", len(errs), errs) - } + test.Assert(t, len(errs) == 0, "expected no errors, got %v: %v", len(errs), errs) + snID = restic.TestParseID("a13c11e582b77a693dd75ab4e3a3ba96538a056594a4b9076e4cacebe6e06d43") sn3, err := restic.LoadSnapshot(context.TODO(), repo, snID) - if err != nil { - t.Fatal(err) - } + test.OK(t, err) selectedTrees = []restic.ID{*sn3.Tree} test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees)) - - // size of packs (3) lenPacks = chkr.CountPacks() - if lenPacks != 2 { - t.Fatalf("expected 2 packfiles, got %v", lenPacks) - } + test.Assert(t, lenPacks == 2, "expected 2 packfiles, got %v", lenPacks) - // size of packs (4) _, errs = chkr.LoadIndex(context.TODO(), nil) - if len(errs) > 0 { - t.Fatalf("expected no errors, got %v: %v", len(errs), errs) - } + test.Assert(t, len(errs) == 0, "expected no errors, got %v: %v", len(errs), errs) + selectedTrees = []restic.ID{*sn1.Tree, *sn3.Tree} test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees)) lenPacks = chkr.CountPacks() - if lenPacks != 3 { - t.Fatalf("expected 3 packfiles, got %v", lenPacks) - } + test.Assert(t, lenPacks == 3, "expected 3 packfiles, got %v", lenPacks) - // size of packs (5): unmodified - filter empty _, errs = chkr.LoadIndex(context.TODO(), nil) - if len(errs) > 0 { - t.Fatalf("expected no errors, got %v: %v", len(errs), errs) - } + test.Assert(t, len(errs) == 0, "expected no errors, got %v: %v", len(errs), errs) selectedTrees = []restic.ID{} err = chkr.CheckWithSnapshots(context.TODO(), selectedTrees) - if err == nil { - t.Fatalf("expected an error, got no error") - } + test.Assert(t, err != nil && err.Error() == "no IDs given", "expected specific error, got %v", err) } func loadBenchRepository(t *testing.B) (*checker.Checker, restic.Repository, func()) {