1
0
Fork 0
mirror of https://github.com/restic/restic.git synced 2025-03-09 00:00:02 +01:00

check: run check of packfiles filtered via snapshotfilter - rebase part 2

Added code for selecting multiple snapshots.
Added message how many packfiles and their cumulative size were selected.
In internal/checker/checker.go replaced the datablob / packfile selection from using walker.Walk
to restic.StreamTrees - parallelizing the packfile selection.

resolved conflict in cmd_check: allow check for snapshot filter
This commit is contained in:
Winfried Plappert 2025-01-25 11:01:03 +00:00
parent 6f77d4ddf8
commit 46184bd703
2 changed files with 71 additions and 29 deletions

View file

@ -388,6 +388,30 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args
}
}
filterBySnapshot := false
if len(args) > 0 || !opts.SnapshotFilter.Empty() {
snapshotLister, err := restic.MemorizeList(ctx, repo, restic.SnapshotFile)
if err != nil {
return err
}
visitedTrees := restic.NewIDSet()
for sn := range FindFilteredSnapshots(ctx, snapshotLister, repo, &opts.SnapshotFilter, args) {
err := chkr.FindDataPackfiles(ctx, repo, sn, visitedTrees)
if err != nil {
return err
}
filterBySnapshot = true
}
selectedPacksSize := int64(0)
for _, size := range chkr.GetPacks() {
selectedPacksSize += size
}
printer.P("snapshot checking: %d packfiles with size %s selected.\n",
chkr.CountPacks(), ui.FormatBytes(uint64(selectedPacksSize)))
}
doReadData := func(packs map[restic.ID]int64) {
p := printer.NewCounter("packs")
p.SetMax(uint64(len(packs)))
@ -406,9 +430,14 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args
p.Done()
}
whichSelection := "data"
if filterBySnapshot {
whichSelection = "selected data"
}
switch {
case opts.ReadData:
printer.P("read all data\n")
printer.P("read all %s\n", whichSelection)
doReadData(selectPacksByBucket(chkr.GetPacks(), 1, 1))
case opts.ReadDataSubset != "":
var packs map[restic.ID]int64
@ -418,12 +447,13 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args
totalBuckets := dataSubset[1]
packs = selectPacksByBucket(chkr.GetPacks(), bucket, totalBuckets)
packCount := uint64(len(packs))
printer.P("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets)
printer.P("read group #%d of %d %s packs (out of total %d packs in %d groups)\n",
bucket, packCount, whichSelection, chkr.CountPacks(), totalBuckets)
} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
percentage, err := parsePercentage(opts.ReadDataSubset)
if err == nil {
packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage)
printer.P("read %.1f%% of data packs\n", percentage)
printer.P("read %.1f%% of %s packs\n", percentage, whichSelection)
}
} else {
repoSize := int64(0)
@ -439,7 +469,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args
subsetSize = repoSize
}
packs = selectRandomPacksByFileSize(chkr.GetPacks(), subsetSize, repoSize)
printer.P("read %d bytes of data packs\n", subsetSize)
printer.P("read %d bytes of %s packs\n", subsetSize, whichSelection)
}
if packs == nil {
return summary, errors.Fatal("internal error: failed to select packs to check")

View file

@ -4,6 +4,7 @@ import (
"bufio"
"context"
"fmt"
"golang.org/x/sync/errgroup"
"runtime"
"sync"
@ -15,8 +16,6 @@ import (
"github.com/restic/restic/internal/repository/pack"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/ui/progress"
"github.com/restic/restic/internal/walker"
"golang.org/x/sync/errgroup"
)
// Checker runs various checks on a repository. It is advisable to create an
@ -538,33 +537,46 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p
}
}
// find data packfiles for checking repository based on snapshots
func (c *Checker) FindDataPackfiles(ctx context.Context, repo *repository.Repository, sn *restic.Snapshot) error {
err := walker.Walk(ctx, repo, *sn.Tree, walker.WalkVisitor{ProcessNode: func(parentTreeID restic.ID, _ string, node *restic.Node, err error) error {
if err != nil {
fmt.Printf("Unable to load tree %s\n ... which belongs to snapshot %s - reason %v\n", parentTreeID, sn.ID, err)
return walker.ErrSkipNode
}
if node == nil {
return nil
}
// Find data packfiles for repository checking based on snapshots.
// Use restic.StreamTrees to gather all data blobs and convert them to their
// containing packfile
func (c *Checker) FindDataPackfiles(ctx context.Context, repo *repository.Repository, sn *restic.Snapshot,
visitedTrees restic.IDSet) error {
if node.Type == restic.NodeTypeFile {
for _, content := range node.Content {
result := repo.LookupBlob(restic.DataBlob, content)
if len(result) == 0 {
panic("checker.FindDataPackfiles: datablob not mapped!")
} else if len(result) > 1 {
panic("checker.FindDataPackfiles: datablob found several times!")
}
c.packSet.Insert(result[0].PackID)
var packfileMutex sync.Mutex
wg, wgCtx := errgroup.WithContext(ctx)
treeStream := restic.StreamTrees(wgCtx, wg, repo, restic.IDs{*sn.Tree}, func(tree restic.ID) bool {
visited := visitedTrees.Has(tree)
visitedTrees.Insert(tree)
return visited
}, nil)
wg.Go(func() error {
for tree := range treeStream {
if tree.Error != nil {
return fmt.Errorf("LoadTree(%v) returned error %v", tree.ID.Str(), tree.Error)
}
}
return nil
}})
packfileMutex.Lock()
for _, node := range tree.Nodes {
// Recursion into directories is handled by StreamTrees
for _, content := range node.Content {
result := repo.LookupBlob(restic.DataBlob, content)
if len(result) == 0 {
return fmt.Errorf("checker.LookupBlob: datablob %s not mapped!", content.Str())
}
c.packSet.Insert(result[0].PackID)
}
}
packfileMutex.Unlock()
}
return nil
})
err := wg.Wait()
if err != nil {
return errors.New(fmt.Sprintf("walker.Walk does not want to walk - reason %v\n", err))
return err
}
return nil