1
0
Fork 0
mirror of https://github.com/restic/restic.git synced 2025-03-09 00:00:02 +01:00
This commit is contained in:
Winfried Plappert 2025-03-07 18:04:22 +00:00 committed by GitHub
commit 8c968c32ab
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 189 additions and 9 deletions

View file

@ -0,0 +1,8 @@
Enhancement: enable --read-data-subset and --read-data for specified snapshot(s)
Snapshots can now be specified on the command line via the standard snapshot filter,
(`--tag`, `--host`, `--path` or specifying snapshot IDs directly) and will be used
for checking the packfiles used by these snapshots.
https://github.com/restic/restic/issues/3326
https://github.com/restic/restic/pull/5213

View file

@ -35,6 +35,9 @@ finds. It can also be used to read all data and therefore simulate a restore.
By default, the "check" command will always load all data directly from the
repository and not use a local cache.
The "check" command can now check packfiles for specific snapshots. The snapshots
are filtered via the standard SnapshotFilter.
EXIT STATUS
===========
@ -73,6 +76,7 @@ type CheckOptions struct {
ReadDataSubset string
CheckUnused bool
WithCache bool
restic.SnapshotFilter
}
func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) {
@ -86,6 +90,7 @@ func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) {
panic(err)
}
f.BoolVar(&opts.WithCache, "with-cache", false, "use existing cache, only read uncached data from repository")
initMultiSnapshotFilter(f, &opts.SnapshotFilter, true)
}
func checkFlags(opts CheckOptions) error {
@ -222,9 +227,6 @@ func prepareCheckCache(opts CheckOptions, gopts *GlobalOptions, printer progress
func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args []string, term *termstatus.Terminal) (checkSummary, error) {
summary := checkSummary{MessageType: "summary"}
if len(args) != 0 {
return summary, errors.Fatal("the check command expects no arguments, only options - please see `restic help check` for usage and flags")
}
var printer progress.Printer
if !gopts.JSON {
@ -245,6 +247,31 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args
}
defer unlock()
// check snapshot filter
selectedTrees := []restic.ID{}
if len(args) > 0 || !opts.SnapshotFilter.Empty() {
snapshotLister, err := restic.MemorizeList(ctx, repo, restic.SnapshotFile)
if err != nil {
return summary, err
}
err = (&opts.SnapshotFilter).FindAll(ctx, snapshotLister, repo, args, func(_ string, sn *restic.Snapshot, err error) error {
if err != nil {
return err
}
selectedTrees = append(selectedTrees, *sn.Tree)
return nil
})
if err != nil {
return summary, err
}
if len(selectedTrees) == 0 {
return summary, errors.New("snapshotfilter active but no snapshot selected")
}
}
chkr := checker.New(repo, opts.CheckUnused)
err = chkr.LoadSnapshots(ctx)
if err != nil {
@ -374,6 +401,15 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args
}
}
filterBySnapshot := false
if len(selectedTrees) > 0 {
err = chkr.CheckWithSnapshots(ctx, selectedTrees)
if err != nil {
return summary, err
}
filterBySnapshot = true
}
doReadData := func(packs map[restic.ID]int64) {
p := printer.NewCounter("packs")
p.SetMax(uint64(len(packs)))
@ -392,9 +428,14 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args
p.Done()
}
whichSelection := "data"
if filterBySnapshot {
whichSelection = "selected data"
}
switch {
case opts.ReadData:
printer.P("read all data\n")
printer.P("read all %s\n", whichSelection)
doReadData(selectPacksByBucket(chkr.GetPacks(), 1, 1))
case opts.ReadDataSubset != "":
var packs map[restic.ID]int64
@ -404,12 +445,13 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args
totalBuckets := dataSubset[1]
packs = selectPacksByBucket(chkr.GetPacks(), bucket, totalBuckets)
packCount := uint64(len(packs))
printer.P("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets)
printer.P("read group #%d of %d %s packs (out of total %d packs in %d groups)\n",
bucket, packCount, whichSelection, chkr.CountPacks(), totalBuckets)
} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
percentage, err := parsePercentage(opts.ReadDataSubset)
if err == nil {
packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage)
printer.P("read %.1f%% of data packs\n", percentage)
printer.P("read %.1f%% of %s packs\n", percentage, whichSelection)
}
} else {
repoSize := int64(0)
@ -425,7 +467,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args
subsetSize = repoSize
}
packs = selectRandomPacksByFileSize(chkr.GetPacks(), subsetSize, repoSize)
printer.P("read %d bytes of data packs\n", subsetSize)
printer.P("read %d bytes of %s packs\n", subsetSize, whichSelection)
}
if packs == nil {
return summary, errors.Fatal("internal error: failed to select packs to check")

View file

@ -5,6 +5,7 @@ import (
"context"
"testing"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
"github.com/restic/restic/internal/ui/termstatus"
)
@ -37,3 +38,37 @@ func testRunCheckOutput(gopts GlobalOptions, checkUnused bool) (string, error) {
})
return buf.String(), err
}
func testRunCheckOutputWithArgs(gopts GlobalOptions, opts CheckOptions, args []string) (string, error) {
buf := bytes.NewBuffer(nil)
gopts.stdout = buf
err := withTermStatus(gopts, func(ctx context.Context, term *termstatus.Terminal) error {
_, err := runCheck(context.TODO(), opts, gopts, args, term)
return err
})
return buf.String(), err
}
func TestRunCheckWrongArgs1(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
testSetupBackupData(t, env)
_, err := testRunCheckOutputWithArgs(env.gopts, CheckOptions{}, []string{"blubber"})
rtest.Assert(t, err != nil && err.Error() != "",
// blubber gets quoted - the error string looks messy
"expected specific error message - got %q", err)
}
func TestRunCheckWrongArgs2(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
testSetupBackupData(t, env)
opts := CheckOptions{
SnapshotFilter: restic.SnapshotFilter{Hosts: []string{""}},
}
_, err := testRunCheckOutputWithArgs(env.gopts, opts, []string{})
rtest.Assert(t, err != nil && err.Error() == "snapshotfilter active but no snapshot selected",
"expected specific error message - got %q", err)
}

View file

@ -82,6 +82,12 @@ If ``check`` detects damaged pack files, it will show instructions on how to rep
them using the ``repair pack`` command. Use that command instead of the "Repair the
index" section in this guide.
If you are interested to check the repository via snapshots, you can now
use the standard snapshot filter method specifying ``--host``, ``--path``, ``--tag`` or
alternatively naming snapshot ID(s) explicitely. The selected subset of packfiles
will then be read to disk and checked for consistency
when either ``--read-data`` or ``--read-data-subset`` is given.
2. Backup the repository
************************

View file

@ -4,6 +4,7 @@ import (
"bufio"
"context"
"fmt"
"golang.org/x/sync/errgroup"
"runtime"
"sync"
@ -15,7 +16,6 @@ import (
"github.com/restic/restic/internal/repository/pack"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/ui/progress"
"golang.org/x/sync/errgroup"
)
// Checker runs various checks on a repository. It is advisable to create an
@ -501,7 +501,6 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p
for pack := range packs {
packSet.Insert(pack)
}
// push packs to ch
for pbs := range c.repo.ListPacksFromIndex(ctx, packSet) {
size := packs[pbs.PackID]
@ -522,3 +521,33 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p
}
}
}
// CheckWithSnapshots will process snapshot IDs from 'selectedTrees' and
// add to snapPacks so it contains only the selected packfiles.
func (c *Checker) CheckWithSnapshots(ctx context.Context, selectedTrees []restic.ID) error {
if len(selectedTrees) == 0 {
return errors.New("no IDs given")
}
// gather used blobs from all trees
usedBlobs := restic.NewBlobSet()
err := restic.FindUsedBlobs(ctx, c.repo, selectedTrees, usedBlobs, nil)
if err != nil {
return err
}
// convert blobs to packfile IDs
snapPacks := map[restic.ID]int64{}
for blob := range usedBlobs {
for _, res := range c.repo.LookupBlob(blob.Type, blob.ID) {
snapPacks[res.PackID] = c.packs[res.PackID]
}
}
if len(snapPacks) > 0 {
c.packs = snapPacks
} else {
return errors.Fatal("no packfiles found for given snapshot trees")
}
return nil
}

View file

@ -574,6 +574,66 @@ func TestCheckerBlobTypeConfusion(t *testing.T) {
}
}
// TestCheckRepoSnapshot: it is assumed here that restic.Snapshotfilter is
// working correctly: the output of the filter is fed into the test manually
func TestCheckRepoSnapshot(t *testing.T) {
repo, _, cleanup := repository.TestFromFixture(t, checkerTestData)
defer cleanup()
chkr := checker.New(repo, false)
_, errs := chkr.LoadIndex(context.TODO(), nil)
test.OKs(t, errs)
test.OKs(t, checkPacks(chkr))
test.OKs(t, checkStruct(chkr))
snID := restic.TestParseID("f7d83db709977178c9d1a09e4009355e534cde1a135b8186b8b118a3fc4fcd41")
sn1, err := restic.LoadSnapshot(context.TODO(), repo, snID)
test.OK(t, err)
selectedTrees := []restic.ID{*sn1.Tree}
test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees))
lenPacks := chkr.CountPacks()
test.Assert(t, lenPacks == uint64(1), "expected 1 packfile, got %v", lenPacks)
// index needs reloading every time
_, errs = chkr.LoadIndex(context.TODO(), nil)
test.Assert(t, len(errs) == 0, "expected no errors, got %v: %v", len(errs), errs)
snID = restic.TestParseID("c2b53c5e6a16db92fbb9aa08bd2794c58b379d8724d661ee30d20898bdfdff22")
sn2, err := restic.LoadSnapshot(context.TODO(), repo, snID)
test.OK(t, err)
selectedTrees = []restic.ID{*sn2.Tree}
test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees))
lenPacks = chkr.CountPacks()
test.Assert(t, lenPacks == 2, "expected 2 packfiles, got %v", lenPacks)
_, errs = chkr.LoadIndex(context.TODO(), nil)
test.Assert(t, len(errs) == 0, "expected no errors, got %v: %v", len(errs), errs)
snID = restic.TestParseID("a13c11e582b77a693dd75ab4e3a3ba96538a056594a4b9076e4cacebe6e06d43")
sn3, err := restic.LoadSnapshot(context.TODO(), repo, snID)
test.OK(t, err)
selectedTrees = []restic.ID{*sn3.Tree}
test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees))
lenPacks = chkr.CountPacks()
test.Assert(t, lenPacks == 2, "expected 2 packfiles, got %v", lenPacks)
_, errs = chkr.LoadIndex(context.TODO(), nil)
test.Assert(t, len(errs) == 0, "expected no errors, got %v: %v", len(errs), errs)
selectedTrees = []restic.ID{*sn1.Tree, *sn3.Tree}
test.OK(t, chkr.CheckWithSnapshots(context.TODO(), selectedTrees))
lenPacks = chkr.CountPacks()
test.Assert(t, lenPacks == 3, "expected 3 packfiles, got %v", lenPacks)
_, errs = chkr.LoadIndex(context.TODO(), nil)
test.Assert(t, len(errs) == 0, "expected no errors, got %v: %v", len(errs), errs)
selectedTrees = []restic.ID{}
err = chkr.CheckWithSnapshots(context.TODO(), selectedTrees)
test.Assert(t, err != nil && err.Error() == "no IDs given", "expected specific error, got %v", err)
}
func loadBenchRepository(t *testing.B) (*checker.Checker, restic.Repository, func()) {
repo, _, cleanup := repository.TestFromFixture(t, checkerTestData)