1
0
Fork 0
mirror of https://github.com/restic/restic.git synced 2025-03-30 00:00:14 +01:00
restic/cmd/restic/cmd_copy.go
Gilbert Gilb's 536ebefff4
feat(backends/s3): add warmup support before repacks and restores (#5173)
* feat(backends/s3): add warmup support before repacks and restores

This commit introduces basic support for transitioning pack files stored
in cold storage to hot storage on S3 and S3-compatible providers.

To prevent unexpected behavior for existing users, the feature is gated
behind new flags:

- `s3.enable-restore`: opt-in flag (defaults to false)
- `s3.restore-days`: number of days for the restored objects to remain
  in hot storage (defaults to `7`)
- `s3.restore-timeout`: maximum time to wait for a single restoration
  (default to `1 day`)
- `s3.restore-tier`: retrieval tier at which the restore will be
  processed. (default to `Standard`)

As restoration times can be lengthy, this implementation preemptively
restores selected packs to prevent incessant restore-delays during
downloads. This is slightly sub-optimal as we could process packs
out-of-order (as soon as they're transitioned), but this would really
add too much complexity for a marginal gain in speed.

To maintain simplicity and prevent resources exhautions with lots of
packs, no new concurrency mechanisms or goroutines were added. This just
hooks gracefully into the existing routines.

**Limitations:**

- Tests against the backend were not written due to the lack of cold
  storage class support in MinIO. Testing was done manually on
  Scaleway's S3-compatible object storage. If necessary, we could
  explore testing with LocalStack or mocks, though this requires further
  discussion.
- Currently, this feature only warms up before restores and repacks
  (prune/copy), as those are the two main use-cases I came across.
  Support for other commands may be added in future iterations, as long
  as affected packs can be calculated in advance.
- The feature is gated behind a new alpha `s3-restore` feature flag to
  make it explicit that the feature is still wet behind the ears.
- There is no explicit user notification for ongoing pack restorations.
  While I think it is not necessary because of the opt-in flag, showing
  some notice may improve usability (but would probably require major
  refactoring in the progress bar which I didn't want to start). Another
  possibility would be to add a flag to send restores requests and fail
  early.

See https://github.com/restic/restic/issues/3202

* ui: warn user when files are warming up from cold storage

* refactor: remove the PacksWarmer struct

It's easier to handle multiple handles in the backend directly, and it
may open the door to reducing the number of requests made to the backend
in the future.
2025-02-01 18:26:27 +00:00

254 lines
7.4 KiB
Go

package main
import (
"context"
"fmt"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
"golang.org/x/sync/errgroup"
"github.com/spf13/cobra"
)
var cmdCopy = &cobra.Command{
Use: "copy [flags] [snapshotID ...]",
Short: "Copy snapshots from one repository to another",
Long: `
The "copy" command copies one or more snapshots from one repository to another.
NOTE: This process will have to both download (read) and upload (write) the
entire snapshot(s) due to the different encryption keys used in the source and
destination repositories. This /may incur higher bandwidth usage and costs/ than
expected during normal backup runs.
NOTE: The copying process does not re-chunk files, which may break deduplication
between the files copied and files already stored in the destination repository.
This means that copied files, which existed in both the source and destination
repository, /may occupy up to twice their space/ in the destination repository.
This can be mitigated by the "--copy-chunker-params" option when initializing a
new destination repository using the "init" command.
EXIT STATUS
===========
Exit status is 0 if the command was successful.
Exit status is 1 if there was any error.
Exit status is 10 if the repository does not exist.
Exit status is 11 if the repository is already locked.
Exit status is 12 if the password is incorrect.
`,
GroupID: cmdGroupDefault,
DisableAutoGenTag: true,
RunE: func(cmd *cobra.Command, args []string) error {
return runCopy(cmd.Context(), copyOptions, globalOptions, args)
},
}
// CopyOptions bundles all options for the copy command.
type CopyOptions struct {
secondaryRepoOptions
restic.SnapshotFilter
}
var copyOptions CopyOptions
func init() {
cmdRoot.AddCommand(cmdCopy)
f := cmdCopy.Flags()
initSecondaryRepoOptions(f, &copyOptions.secondaryRepoOptions, "destination", "to copy snapshots from")
initMultiSnapshotFilter(f, &copyOptions.SnapshotFilter, true)
}
func runCopy(ctx context.Context, opts CopyOptions, gopts GlobalOptions, args []string) error {
secondaryGopts, isFromRepo, err := fillSecondaryGlobalOpts(ctx, opts.secondaryRepoOptions, gopts, "destination")
if err != nil {
return err
}
if isFromRepo {
// swap global options, if the secondary repo was set via from-repo
gopts, secondaryGopts = secondaryGopts, gopts
}
ctx, srcRepo, unlock, err := openWithReadLock(ctx, gopts, gopts.NoLock)
if err != nil {
return err
}
defer unlock()
ctx, dstRepo, unlock, err := openWithAppendLock(ctx, secondaryGopts, false)
if err != nil {
return err
}
defer unlock()
srcSnapshotLister, err := restic.MemorizeList(ctx, srcRepo, restic.SnapshotFile)
if err != nil {
return err
}
dstSnapshotLister, err := restic.MemorizeList(ctx, dstRepo, restic.SnapshotFile)
if err != nil {
return err
}
debug.Log("Loading source index")
bar := newIndexProgress(gopts.Quiet, gopts.JSON)
if err := srcRepo.LoadIndex(ctx, bar); err != nil {
return err
}
bar = newIndexProgress(gopts.Quiet, gopts.JSON)
debug.Log("Loading destination index")
if err := dstRepo.LoadIndex(ctx, bar); err != nil {
return err
}
dstSnapshotByOriginal := make(map[restic.ID][]*restic.Snapshot)
for sn := range FindFilteredSnapshots(ctx, dstSnapshotLister, dstRepo, &opts.SnapshotFilter, nil) {
if sn.Original != nil && !sn.Original.IsNull() {
dstSnapshotByOriginal[*sn.Original] = append(dstSnapshotByOriginal[*sn.Original], sn)
}
// also consider identical snapshot copies
dstSnapshotByOriginal[*sn.ID()] = append(dstSnapshotByOriginal[*sn.ID()], sn)
}
if ctx.Err() != nil {
return ctx.Err()
}
// remember already processed trees across all snapshots
visitedTrees := restic.NewIDSet()
for sn := range FindFilteredSnapshots(ctx, srcSnapshotLister, srcRepo, &opts.SnapshotFilter, args) {
// check whether the destination has a snapshot with the same persistent ID which has similar snapshot fields
srcOriginal := *sn.ID()
if sn.Original != nil {
srcOriginal = *sn.Original
}
if originalSns, ok := dstSnapshotByOriginal[srcOriginal]; ok {
isCopy := false
for _, originalSn := range originalSns {
if similarSnapshots(originalSn, sn) {
Verboseff("\n%v\n", sn)
Verboseff("skipping source snapshot %s, was already copied to snapshot %s\n", sn.ID().Str(), originalSn.ID().Str())
isCopy = true
break
}
}
if isCopy {
continue
}
}
Verbosef("\n%v\n", sn)
Verbosef(" copy started, this may take a while...\n")
if err := copyTree(ctx, srcRepo, dstRepo, visitedTrees, *sn.Tree, gopts.Quiet); err != nil {
return err
}
debug.Log("tree copied")
// save snapshot
sn.Parent = nil // Parent does not have relevance in the new repo.
// Use Original as a persistent snapshot ID
if sn.Original == nil {
sn.Original = sn.ID()
}
newID, err := restic.SaveSnapshot(ctx, dstRepo, sn)
if err != nil {
return err
}
Verbosef("snapshot %s saved\n", newID.Str())
}
return ctx.Err()
}
func similarSnapshots(sna *restic.Snapshot, snb *restic.Snapshot) bool {
// everything except Parent and Original must match
if !sna.Time.Equal(snb.Time) || !sna.Tree.Equal(*snb.Tree) || sna.Hostname != snb.Hostname ||
sna.Username != snb.Username || sna.UID != snb.UID || sna.GID != snb.GID ||
len(sna.Paths) != len(snb.Paths) || len(sna.Excludes) != len(snb.Excludes) ||
len(sna.Tags) != len(snb.Tags) {
return false
}
if !sna.HasPaths(snb.Paths) || !sna.HasTags(snb.Tags) {
return false
}
for i, a := range sna.Excludes {
if a != snb.Excludes[i] {
return false
}
}
return true
}
func copyTree(ctx context.Context, srcRepo restic.Repository, dstRepo restic.Repository,
visitedTrees restic.IDSet, rootTreeID restic.ID, quiet bool) error {
wg, wgCtx := errgroup.WithContext(ctx)
treeStream := restic.StreamTrees(wgCtx, wg, srcRepo, restic.IDs{rootTreeID}, func(treeID restic.ID) bool {
visited := visitedTrees.Has(treeID)
visitedTrees.Insert(treeID)
return visited
}, nil)
copyBlobs := restic.NewBlobSet()
packList := restic.NewIDSet()
enqueue := func(h restic.BlobHandle) {
pb := srcRepo.LookupBlob(h.Type, h.ID)
copyBlobs.Insert(h)
for _, p := range pb {
packList.Insert(p.PackID)
}
}
wg.Go(func() error {
for tree := range treeStream {
if tree.Error != nil {
return fmt.Errorf("LoadTree(%v) returned error %v", tree.ID.Str(), tree.Error)
}
// Do we already have this tree blob?
treeHandle := restic.BlobHandle{ID: tree.ID, Type: restic.TreeBlob}
if _, ok := dstRepo.LookupBlobSize(treeHandle.Type, treeHandle.ID); !ok {
// copy raw tree bytes to avoid problems if the serialization changes
enqueue(treeHandle)
}
for _, entry := range tree.Nodes {
// Recursion into directories is handled by StreamTrees
// Copy the blobs for this file.
for _, blobID := range entry.Content {
h := restic.BlobHandle{Type: restic.DataBlob, ID: blobID}
if _, ok := dstRepo.LookupBlobSize(h.Type, h.ID); !ok {
enqueue(h)
}
}
}
}
return nil
})
err := wg.Wait()
if err != nil {
return err
}
bar := newProgressMax(!quiet, uint64(len(packList)), "packs copied")
_, err = repository.Repack(
ctx,
srcRepo,
dstRepo,
packList,
copyBlobs,
bar,
func(msg string, args ...interface{}) { fmt.Printf(msg+"\n", args...) },
)
bar.Done()
if err != nil {
return errors.Fatal(err.Error())
}
return nil
}