mirror of
https://github.com/restic/restic.git
synced 2025-03-09 00:00:02 +01:00
check: run check of packfiles filtered via snapshotfilter - rebase part 2
Added code for selecting multiple snapshots. Added message how many packfiles and their cumulative size were selected. In internal/checker/checker.go replaced the datablob / packfile selection from using walker.Walk to restic.StreamTrees - parallelizing the packfile selection. resolved conflict in cmd_check: allow check for snapshot filter
This commit is contained in:
parent
6f77d4ddf8
commit
46184bd703
2 changed files with 71 additions and 29 deletions
|
@ -388,6 +388,30 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args
|
|||
}
|
||||
}
|
||||
|
||||
filterBySnapshot := false
|
||||
if len(args) > 0 || !opts.SnapshotFilter.Empty() {
|
||||
snapshotLister, err := restic.MemorizeList(ctx, repo, restic.SnapshotFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
visitedTrees := restic.NewIDSet()
|
||||
for sn := range FindFilteredSnapshots(ctx, snapshotLister, repo, &opts.SnapshotFilter, args) {
|
||||
err := chkr.FindDataPackfiles(ctx, repo, sn, visitedTrees)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
filterBySnapshot = true
|
||||
}
|
||||
|
||||
selectedPacksSize := int64(0)
|
||||
for _, size := range chkr.GetPacks() {
|
||||
selectedPacksSize += size
|
||||
}
|
||||
printer.P("snapshot checking: %d packfiles with size %s selected.\n",
|
||||
chkr.CountPacks(), ui.FormatBytes(uint64(selectedPacksSize)))
|
||||
}
|
||||
|
||||
doReadData := func(packs map[restic.ID]int64) {
|
||||
p := printer.NewCounter("packs")
|
||||
p.SetMax(uint64(len(packs)))
|
||||
|
@ -406,9 +430,14 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args
|
|||
p.Done()
|
||||
}
|
||||
|
||||
whichSelection := "data"
|
||||
if filterBySnapshot {
|
||||
whichSelection = "selected data"
|
||||
}
|
||||
|
||||
switch {
|
||||
case opts.ReadData:
|
||||
printer.P("read all data\n")
|
||||
printer.P("read all %s\n", whichSelection)
|
||||
doReadData(selectPacksByBucket(chkr.GetPacks(), 1, 1))
|
||||
case opts.ReadDataSubset != "":
|
||||
var packs map[restic.ID]int64
|
||||
|
@ -418,12 +447,13 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args
|
|||
totalBuckets := dataSubset[1]
|
||||
packs = selectPacksByBucket(chkr.GetPacks(), bucket, totalBuckets)
|
||||
packCount := uint64(len(packs))
|
||||
printer.P("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets)
|
||||
printer.P("read group #%d of %d %s packs (out of total %d packs in %d groups)\n",
|
||||
bucket, packCount, whichSelection, chkr.CountPacks(), totalBuckets)
|
||||
} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
|
||||
percentage, err := parsePercentage(opts.ReadDataSubset)
|
||||
if err == nil {
|
||||
packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage)
|
||||
printer.P("read %.1f%% of data packs\n", percentage)
|
||||
printer.P("read %.1f%% of %s packs\n", percentage, whichSelection)
|
||||
}
|
||||
} else {
|
||||
repoSize := int64(0)
|
||||
|
@ -439,7 +469,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args
|
|||
subsetSize = repoSize
|
||||
}
|
||||
packs = selectRandomPacksByFileSize(chkr.GetPacks(), subsetSize, repoSize)
|
||||
printer.P("read %d bytes of data packs\n", subsetSize)
|
||||
printer.P("read %d bytes of %s packs\n", subsetSize, whichSelection)
|
||||
}
|
||||
if packs == nil {
|
||||
return summary, errors.Fatal("internal error: failed to select packs to check")
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"golang.org/x/sync/errgroup"
|
||||
"runtime"
|
||||
"sync"
|
||||
|
||||
|
@ -15,8 +16,6 @@ import (
|
|||
"github.com/restic/restic/internal/repository/pack"
|
||||
"github.com/restic/restic/internal/restic"
|
||||
"github.com/restic/restic/internal/ui/progress"
|
||||
"github.com/restic/restic/internal/walker"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
// Checker runs various checks on a repository. It is advisable to create an
|
||||
|
@ -538,33 +537,46 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p
|
|||
}
|
||||
}
|
||||
|
||||
// find data packfiles for checking repository based on snapshots
|
||||
func (c *Checker) FindDataPackfiles(ctx context.Context, repo *repository.Repository, sn *restic.Snapshot) error {
|
||||
err := walker.Walk(ctx, repo, *sn.Tree, walker.WalkVisitor{ProcessNode: func(parentTreeID restic.ID, _ string, node *restic.Node, err error) error {
|
||||
if err != nil {
|
||||
fmt.Printf("Unable to load tree %s\n ... which belongs to snapshot %s - reason %v\n", parentTreeID, sn.ID, err)
|
||||
return walker.ErrSkipNode
|
||||
}
|
||||
if node == nil {
|
||||
return nil
|
||||
}
|
||||
// Find data packfiles for repository checking based on snapshots.
|
||||
// Use restic.StreamTrees to gather all data blobs and convert them to their
|
||||
// containing packfile
|
||||
func (c *Checker) FindDataPackfiles(ctx context.Context, repo *repository.Repository, sn *restic.Snapshot,
|
||||
visitedTrees restic.IDSet) error {
|
||||
|
||||
if node.Type == restic.NodeTypeFile {
|
||||
for _, content := range node.Content {
|
||||
result := repo.LookupBlob(restic.DataBlob, content)
|
||||
if len(result) == 0 {
|
||||
panic("checker.FindDataPackfiles: datablob not mapped!")
|
||||
} else if len(result) > 1 {
|
||||
panic("checker.FindDataPackfiles: datablob found several times!")
|
||||
}
|
||||
c.packSet.Insert(result[0].PackID)
|
||||
var packfileMutex sync.Mutex
|
||||
wg, wgCtx := errgroup.WithContext(ctx)
|
||||
treeStream := restic.StreamTrees(wgCtx, wg, repo, restic.IDs{*sn.Tree}, func(tree restic.ID) bool {
|
||||
visited := visitedTrees.Has(tree)
|
||||
visitedTrees.Insert(tree)
|
||||
return visited
|
||||
}, nil)
|
||||
|
||||
wg.Go(func() error {
|
||||
for tree := range treeStream {
|
||||
if tree.Error != nil {
|
||||
return fmt.Errorf("LoadTree(%v) returned error %v", tree.ID.Str(), tree.Error)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}})
|
||||
|
||||
packfileMutex.Lock()
|
||||
for _, node := range tree.Nodes {
|
||||
// Recursion into directories is handled by StreamTrees
|
||||
for _, content := range node.Content {
|
||||
result := repo.LookupBlob(restic.DataBlob, content)
|
||||
if len(result) == 0 {
|
||||
return fmt.Errorf("checker.LookupBlob: datablob %s not mapped!", content.Str())
|
||||
}
|
||||
c.packSet.Insert(result[0].PackID)
|
||||
}
|
||||
}
|
||||
packfileMutex.Unlock()
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
err := wg.Wait()
|
||||
if err != nil {
|
||||
return errors.New(fmt.Sprintf("walker.Walk does not want to walk - reason %v\n", err))
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
Loading…
Add table
Reference in a new issue