mirror of
https://github.com/restic/restic.git
synced 2025-03-30 00:00:14 +01:00

* feat(backends/s3): add warmup support before repacks and restores This commit introduces basic support for transitioning pack files stored in cold storage to hot storage on S3 and S3-compatible providers. To prevent unexpected behavior for existing users, the feature is gated behind new flags: - `s3.enable-restore`: opt-in flag (defaults to false) - `s3.restore-days`: number of days for the restored objects to remain in hot storage (defaults to `7`) - `s3.restore-timeout`: maximum time to wait for a single restoration (default to `1 day`) - `s3.restore-tier`: retrieval tier at which the restore will be processed. (default to `Standard`) As restoration times can be lengthy, this implementation preemptively restores selected packs to prevent incessant restore-delays during downloads. This is slightly sub-optimal as we could process packs out-of-order (as soon as they're transitioned), but this would really add too much complexity for a marginal gain in speed. To maintain simplicity and prevent resources exhautions with lots of packs, no new concurrency mechanisms or goroutines were added. This just hooks gracefully into the existing routines. **Limitations:** - Tests against the backend were not written due to the lack of cold storage class support in MinIO. Testing was done manually on Scaleway's S3-compatible object storage. If necessary, we could explore testing with LocalStack or mocks, though this requires further discussion. - Currently, this feature only warms up before restores and repacks (prune/copy), as those are the two main use-cases I came across. Support for other commands may be added in future iterations, as long as affected packs can be calculated in advance. - The feature is gated behind a new alpha `s3-restore` feature flag to make it explicit that the feature is still wet behind the ears. - There is no explicit user notification for ongoing pack restorations. While I think it is not necessary because of the opt-in flag, showing some notice may improve usability (but would probably require major refactoring in the progress bar which I didn't want to start). Another possibility would be to add a flag to send restores requests and fail early. See https://github.com/restic/restic/issues/3202 * ui: warn user when files are warming up from cold storage * refactor: remove the PacksWarmer struct It's easier to handle multiple handles in the backend directly, and it may open the door to reducing the number of requests made to the backend in the future.
254 lines
7.4 KiB
Go
254 lines
7.4 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"github.com/restic/restic/internal/debug"
|
|
"github.com/restic/restic/internal/errors"
|
|
"github.com/restic/restic/internal/repository"
|
|
"github.com/restic/restic/internal/restic"
|
|
"golang.org/x/sync/errgroup"
|
|
|
|
"github.com/spf13/cobra"
|
|
)
|
|
|
|
var cmdCopy = &cobra.Command{
|
|
Use: "copy [flags] [snapshotID ...]",
|
|
Short: "Copy snapshots from one repository to another",
|
|
Long: `
|
|
The "copy" command copies one or more snapshots from one repository to another.
|
|
|
|
NOTE: This process will have to both download (read) and upload (write) the
|
|
entire snapshot(s) due to the different encryption keys used in the source and
|
|
destination repositories. This /may incur higher bandwidth usage and costs/ than
|
|
expected during normal backup runs.
|
|
|
|
NOTE: The copying process does not re-chunk files, which may break deduplication
|
|
between the files copied and files already stored in the destination repository.
|
|
This means that copied files, which existed in both the source and destination
|
|
repository, /may occupy up to twice their space/ in the destination repository.
|
|
This can be mitigated by the "--copy-chunker-params" option when initializing a
|
|
new destination repository using the "init" command.
|
|
|
|
EXIT STATUS
|
|
===========
|
|
|
|
Exit status is 0 if the command was successful.
|
|
Exit status is 1 if there was any error.
|
|
Exit status is 10 if the repository does not exist.
|
|
Exit status is 11 if the repository is already locked.
|
|
Exit status is 12 if the password is incorrect.
|
|
`,
|
|
GroupID: cmdGroupDefault,
|
|
DisableAutoGenTag: true,
|
|
RunE: func(cmd *cobra.Command, args []string) error {
|
|
return runCopy(cmd.Context(), copyOptions, globalOptions, args)
|
|
},
|
|
}
|
|
|
|
// CopyOptions bundles all options for the copy command.
|
|
type CopyOptions struct {
|
|
secondaryRepoOptions
|
|
restic.SnapshotFilter
|
|
}
|
|
|
|
var copyOptions CopyOptions
|
|
|
|
func init() {
|
|
cmdRoot.AddCommand(cmdCopy)
|
|
|
|
f := cmdCopy.Flags()
|
|
initSecondaryRepoOptions(f, ©Options.secondaryRepoOptions, "destination", "to copy snapshots from")
|
|
initMultiSnapshotFilter(f, ©Options.SnapshotFilter, true)
|
|
}
|
|
|
|
func runCopy(ctx context.Context, opts CopyOptions, gopts GlobalOptions, args []string) error {
|
|
secondaryGopts, isFromRepo, err := fillSecondaryGlobalOpts(ctx, opts.secondaryRepoOptions, gopts, "destination")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if isFromRepo {
|
|
// swap global options, if the secondary repo was set via from-repo
|
|
gopts, secondaryGopts = secondaryGopts, gopts
|
|
}
|
|
|
|
ctx, srcRepo, unlock, err := openWithReadLock(ctx, gopts, gopts.NoLock)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer unlock()
|
|
|
|
ctx, dstRepo, unlock, err := openWithAppendLock(ctx, secondaryGopts, false)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer unlock()
|
|
|
|
srcSnapshotLister, err := restic.MemorizeList(ctx, srcRepo, restic.SnapshotFile)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
dstSnapshotLister, err := restic.MemorizeList(ctx, dstRepo, restic.SnapshotFile)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
debug.Log("Loading source index")
|
|
bar := newIndexProgress(gopts.Quiet, gopts.JSON)
|
|
if err := srcRepo.LoadIndex(ctx, bar); err != nil {
|
|
return err
|
|
}
|
|
bar = newIndexProgress(gopts.Quiet, gopts.JSON)
|
|
debug.Log("Loading destination index")
|
|
if err := dstRepo.LoadIndex(ctx, bar); err != nil {
|
|
return err
|
|
}
|
|
|
|
dstSnapshotByOriginal := make(map[restic.ID][]*restic.Snapshot)
|
|
for sn := range FindFilteredSnapshots(ctx, dstSnapshotLister, dstRepo, &opts.SnapshotFilter, nil) {
|
|
if sn.Original != nil && !sn.Original.IsNull() {
|
|
dstSnapshotByOriginal[*sn.Original] = append(dstSnapshotByOriginal[*sn.Original], sn)
|
|
}
|
|
// also consider identical snapshot copies
|
|
dstSnapshotByOriginal[*sn.ID()] = append(dstSnapshotByOriginal[*sn.ID()], sn)
|
|
}
|
|
if ctx.Err() != nil {
|
|
return ctx.Err()
|
|
}
|
|
|
|
// remember already processed trees across all snapshots
|
|
visitedTrees := restic.NewIDSet()
|
|
|
|
for sn := range FindFilteredSnapshots(ctx, srcSnapshotLister, srcRepo, &opts.SnapshotFilter, args) {
|
|
// check whether the destination has a snapshot with the same persistent ID which has similar snapshot fields
|
|
srcOriginal := *sn.ID()
|
|
if sn.Original != nil {
|
|
srcOriginal = *sn.Original
|
|
}
|
|
|
|
if originalSns, ok := dstSnapshotByOriginal[srcOriginal]; ok {
|
|
isCopy := false
|
|
for _, originalSn := range originalSns {
|
|
if similarSnapshots(originalSn, sn) {
|
|
Verboseff("\n%v\n", sn)
|
|
Verboseff("skipping source snapshot %s, was already copied to snapshot %s\n", sn.ID().Str(), originalSn.ID().Str())
|
|
isCopy = true
|
|
break
|
|
}
|
|
}
|
|
if isCopy {
|
|
continue
|
|
}
|
|
}
|
|
Verbosef("\n%v\n", sn)
|
|
Verbosef(" copy started, this may take a while...\n")
|
|
if err := copyTree(ctx, srcRepo, dstRepo, visitedTrees, *sn.Tree, gopts.Quiet); err != nil {
|
|
return err
|
|
}
|
|
debug.Log("tree copied")
|
|
|
|
// save snapshot
|
|
sn.Parent = nil // Parent does not have relevance in the new repo.
|
|
// Use Original as a persistent snapshot ID
|
|
if sn.Original == nil {
|
|
sn.Original = sn.ID()
|
|
}
|
|
newID, err := restic.SaveSnapshot(ctx, dstRepo, sn)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
Verbosef("snapshot %s saved\n", newID.Str())
|
|
}
|
|
return ctx.Err()
|
|
}
|
|
|
|
func similarSnapshots(sna *restic.Snapshot, snb *restic.Snapshot) bool {
|
|
// everything except Parent and Original must match
|
|
if !sna.Time.Equal(snb.Time) || !sna.Tree.Equal(*snb.Tree) || sna.Hostname != snb.Hostname ||
|
|
sna.Username != snb.Username || sna.UID != snb.UID || sna.GID != snb.GID ||
|
|
len(sna.Paths) != len(snb.Paths) || len(sna.Excludes) != len(snb.Excludes) ||
|
|
len(sna.Tags) != len(snb.Tags) {
|
|
return false
|
|
}
|
|
if !sna.HasPaths(snb.Paths) || !sna.HasTags(snb.Tags) {
|
|
return false
|
|
}
|
|
for i, a := range sna.Excludes {
|
|
if a != snb.Excludes[i] {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func copyTree(ctx context.Context, srcRepo restic.Repository, dstRepo restic.Repository,
|
|
visitedTrees restic.IDSet, rootTreeID restic.ID, quiet bool) error {
|
|
|
|
wg, wgCtx := errgroup.WithContext(ctx)
|
|
|
|
treeStream := restic.StreamTrees(wgCtx, wg, srcRepo, restic.IDs{rootTreeID}, func(treeID restic.ID) bool {
|
|
visited := visitedTrees.Has(treeID)
|
|
visitedTrees.Insert(treeID)
|
|
return visited
|
|
}, nil)
|
|
|
|
copyBlobs := restic.NewBlobSet()
|
|
packList := restic.NewIDSet()
|
|
|
|
enqueue := func(h restic.BlobHandle) {
|
|
pb := srcRepo.LookupBlob(h.Type, h.ID)
|
|
copyBlobs.Insert(h)
|
|
for _, p := range pb {
|
|
packList.Insert(p.PackID)
|
|
}
|
|
}
|
|
|
|
wg.Go(func() error {
|
|
for tree := range treeStream {
|
|
if tree.Error != nil {
|
|
return fmt.Errorf("LoadTree(%v) returned error %v", tree.ID.Str(), tree.Error)
|
|
}
|
|
|
|
// Do we already have this tree blob?
|
|
treeHandle := restic.BlobHandle{ID: tree.ID, Type: restic.TreeBlob}
|
|
if _, ok := dstRepo.LookupBlobSize(treeHandle.Type, treeHandle.ID); !ok {
|
|
// copy raw tree bytes to avoid problems if the serialization changes
|
|
enqueue(treeHandle)
|
|
}
|
|
|
|
for _, entry := range tree.Nodes {
|
|
// Recursion into directories is handled by StreamTrees
|
|
// Copy the blobs for this file.
|
|
for _, blobID := range entry.Content {
|
|
h := restic.BlobHandle{Type: restic.DataBlob, ID: blobID}
|
|
if _, ok := dstRepo.LookupBlobSize(h.Type, h.ID); !ok {
|
|
enqueue(h)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
})
|
|
err := wg.Wait()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
bar := newProgressMax(!quiet, uint64(len(packList)), "packs copied")
|
|
_, err = repository.Repack(
|
|
ctx,
|
|
srcRepo,
|
|
dstRepo,
|
|
packList,
|
|
copyBlobs,
|
|
bar,
|
|
func(msg string, args ...interface{}) { fmt.Printf(msg+"\n", args...) },
|
|
)
|
|
bar.Done()
|
|
if err != nil {
|
|
return errors.Fatal(err.Error())
|
|
}
|
|
return nil
|
|
}
|