diff --git a/changelog/unreleased/issue-3326 b/changelog/unreleased/issue-3326 new file mode 100644 index 000000000..771c3e370 --- /dev/null +++ b/changelog/unreleased/issue-3326 @@ -0,0 +1,8 @@ +check: enable --read-data-subset and --read-data for specified snapshot(s) + +When snapshots are specified on the command line, the metadata for these +snapshots will be read and a set of packfiles will be created representing the data +parts of these snapshots. + +https://github.com/restic/restic/issues/3326 +https://github.com/restic/restic/pull/5213 diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index f87303933..e8a0ca73a 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -73,6 +73,7 @@ type CheckOptions struct { ReadDataSubset string CheckUnused bool WithCache bool + restic.SnapshotFilter } func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) { @@ -86,6 +87,7 @@ func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) { panic(err) } f.BoolVar(&opts.WithCache, "with-cache", false, "use existing cache, only read uncached data from repository") + initMultiSnapshotFilter(f, &opts.SnapshotFilter, true) } func checkFlags(opts CheckOptions) error { @@ -222,9 +224,6 @@ func prepareCheckCache(opts CheckOptions, gopts *GlobalOptions, printer progress func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args []string, term *termstatus.Terminal) (checkSummary, error) { summary := checkSummary{MessageType: "summary"} - if len(args) != 0 { - return summary, errors.Fatal("the check command expects no arguments, only options - please see `restic help check` for usage and flags") - } var printer progress.Printer if !gopts.JSON { @@ -258,6 +257,21 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args return summary, ctx.Err() } + if len(args) > 0 { + snapshotLister, err := restic.MemorizeList(ctx, repo, restic.SnapshotFile) + if err != nil { + return summary, err + } + + // run down the tree, take note of the data packfiles involved + for sn := range FindFilteredSnapshots(ctx, snapshotLister, repo, &opts.SnapshotFilter, args) { + err := chkr.FindDataPackfiles(ctx, repo, sn) + if err != nil { + return summary, err + } + } + } + errorsFound := false for _, hint := range hints { switch hint.(type) { diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 12020891a..5f9a774d8 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -15,6 +15,7 @@ import ( "github.com/restic/restic/internal/repository/pack" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/ui/progress" + "github.com/restic/restic/internal/walker" "golang.org/x/sync/errgroup" ) @@ -29,6 +30,7 @@ type Checker struct { sync.Mutex M restic.BlobSet } + packSet restic.IDSet trackUnused bool masterIndex *index.MasterIndex @@ -41,6 +43,7 @@ type Checker struct { func New(repo restic.Repository, trackUnused bool) *Checker { c := &Checker{ packs: make(map[restic.ID]int64), + packSet: restic.NewIDSet(), masterIndex: index.NewMasterIndex(), repo: repo, trackUnused: trackUnused, @@ -431,12 +434,24 @@ func (c *Checker) UnusedBlobs(ctx context.Context) (blobs restic.BlobHandles, er // CountPacks returns the number of packs in the repository. func (c *Checker) CountPacks() uint64 { - return uint64(len(c.packs)) + if len(c.packSet) == 0 { + return uint64(len(c.packs)) + } else { + return uint64(len(c.packSet)) + } } // GetPacks returns IDSet of packs in the repository func (c *Checker) GetPacks() map[restic.ID]int64 { - return c.packs + if len(c.packSet) == 0 { + return c.packs + } else { + result := map[restic.ID]int64{} + for packID := range c.packSet { + result[packID] = c.packs[packID] + } + return result + } } // ReadData loads all data from the repository and checks the integrity. @@ -522,3 +537,35 @@ func (c *Checker) ReadPacks(ctx context.Context, packs map[restic.ID]int64, p *p } } } + +// find data packfiles for checking repository based on snapshots +func (c *Checker) FindDataPackfiles(ctx context.Context, repo *repository.Repository, sn *restic.Snapshot) error { + err := walker.Walk(ctx, repo, *sn.Tree, walker.WalkVisitor{ProcessNode: func(parentTreeID restic.ID, _ string, node *restic.Node, err error) error { + if err != nil { + fmt.Printf("Unable to load tree %s\n ... which belongs to snapshot %s - reason %v\n", parentTreeID, sn.ID, err) + return walker.ErrSkipNode + } + if node == nil { + return nil + } + + if node.Type == restic.NodeTypeFile { + for _, content := range node.Content { + result := repo.LookupBlob(restic.DataBlob, content) + if len(result) == 0 { + panic("checker.FindDataPackfiles: datablob not mapped!") + } else if len(result) > 1 { + panic("checker.FindDataPackfiles: datablob found several times!") + } + c.packSet.Insert(result[0].PackID) + } + } + return nil + }}) + + if err != nil { + return errors.New(fmt.Sprintf("walker.Walk does not want to walk - reason %v\n", err)) + } + + return nil +}