diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index e8a0ca73a..4edac3f9c 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -388,6 +388,30 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args } } + filterBySnapshot := false + if len(args) > 0 || !opts.SnapshotFilter.Empty() { + snapshotLister, err := restic.MemorizeList(ctx, repo, restic.SnapshotFile) + if err != nil { + return err + } + + visitedTrees := restic.NewIDSet() + for sn := range FindFilteredSnapshots(ctx, snapshotLister, repo, &opts.SnapshotFilter, args) { + err := chkr.FindDataPackfiles(ctx, repo, sn, visitedTrees) + if err != nil { + return err + } + filterBySnapshot = true + } + + selectedPacksSize := int64(0) + for _, size := range chkr.GetPacks() { + selectedPacksSize += size + } + printer.P("snapshot checking: %d packfiles with size %s selected.\n", + chkr.CountPacks(), ui.FormatBytes(uint64(selectedPacksSize))) + } + doReadData := func(packs map[restic.ID]int64) { p := printer.NewCounter("packs") p.SetMax(uint64(len(packs))) @@ -406,9 +430,14 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args p.Done() } + whichSelection := "data" + if filterBySnapshot { + whichSelection = "selected data" + } + switch { case opts.ReadData: - printer.P("read all data\n") + printer.P("read all %s\n", whichSelection) doReadData(selectPacksByBucket(chkr.GetPacks(), 1, 1)) case opts.ReadDataSubset != "": var packs map[restic.ID]int64 @@ -418,12 +447,13 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args totalBuckets := dataSubset[1] packs = selectPacksByBucket(chkr.GetPacks(), bucket, totalBuckets) packCount := uint64(len(packs)) - printer.P("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets) + printer.P("read group #%d of %d %s packs (out of total %d packs in %d groups)\n", + bucket, packCount, whichSelection, chkr.CountPacks(), totalBuckets) } else if strings.HasSuffix(opts.ReadDataSubset, "%") { percentage, err := parsePercentage(opts.ReadDataSubset) if err == nil { packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage) - printer.P("read %.1f%% of data packs\n", percentage) + printer.P("read %.1f%% of %s packs\n", percentage, whichSelection) } } else { repoSize := int64(0) @@ -439,7 +469,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args subsetSize = repoSize } packs = selectRandomPacksByFileSize(chkr.GetPacks(), subsetSize, repoSize) - printer.P("read %d bytes of data packs\n", subsetSize) + printer.P("read %d bytes of %s packs\n", subsetSize, whichSelection) } if packs == nil { return summary, errors.Fatal("internal error: failed to select packs to check") diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 5f9a774d8..013563790 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -4,6 +4,7 @@ import ( "bufio" "context" "fmt" + "golang.org/x/sync/errgroup" "runtime" "sync" @@ -15,8 +16,6 @@ import ( "github.com/restic/restic/internal/repository/pack" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/ui/progress" - "github.com/restic/restic/internal/walker" - "golang.org/x/sync/errgroup" ) // Checker runs various checks on a repository. It is advisable to create an @@ -538,33 +537,46 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p } } -// find data packfiles for checking repository based on snapshots -func (c *Checker) FindDataPackfiles(ctx context.Context, repo *repository.Repository, sn *restic.Snapshot) error { - err := walker.Walk(ctx, repo, *sn.Tree, walker.WalkVisitor{ProcessNode: func(parentTreeID restic.ID, _ string, node *restic.Node, err error) error { - if err != nil { - fmt.Printf("Unable to load tree %s\n ... which belongs to snapshot %s - reason %v\n", parentTreeID, sn.ID, err) - return walker.ErrSkipNode - } - if node == nil { - return nil - } +// Find data packfiles for repository checking based on snapshots. +// Use restic.StreamTrees to gather all data blobs and convert them to their +// containing packfile +func (c *Checker) FindDataPackfiles(ctx context.Context, repo *repository.Repository, sn *restic.Snapshot, + visitedTrees restic.IDSet) error { - if node.Type == restic.NodeTypeFile { - for _, content := range node.Content { - result := repo.LookupBlob(restic.DataBlob, content) - if len(result) == 0 { - panic("checker.FindDataPackfiles: datablob not mapped!") - } else if len(result) > 1 { - panic("checker.FindDataPackfiles: datablob found several times!") - } - c.packSet.Insert(result[0].PackID) + var packfileMutex sync.Mutex + wg, wgCtx := errgroup.WithContext(ctx) + treeStream := restic.StreamTrees(wgCtx, wg, repo, restic.IDs{*sn.Tree}, func(tree restic.ID) bool { + visited := visitedTrees.Has(tree) + visitedTrees.Insert(tree) + return visited + }, nil) + + wg.Go(func() error { + for tree := range treeStream { + if tree.Error != nil { + return fmt.Errorf("LoadTree(%v) returned error %v", tree.ID.Str(), tree.Error) } - } - return nil - }}) + packfileMutex.Lock() + for _, node := range tree.Nodes { + // Recursion into directories is handled by StreamTrees + for _, content := range node.Content { + result := repo.LookupBlob(restic.DataBlob, content) + if len(result) == 0 { + return fmt.Errorf("checker.LookupBlob: datablob %s not mapped!", content.Str()) + } + c.packSet.Insert(result[0].PackID) + } + } + packfileMutex.Unlock() + } + + return nil + }) + + err := wg.Wait() if err != nil { - return errors.New(fmt.Sprintf("walker.Walk does not want to walk - reason %v\n", err)) + return err } return nil