From 2fd8a3865c88163f58bf785f179b459aed85b64f Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Wed, 12 Feb 2025 21:05:54 +0100 Subject: [PATCH 1/4] index: automatically write full indexes in StorePack --- internal/repository/index/associated_data_test.go | 12 ++++++------ internal/repository/index/master_index.go | 15 ++++++++++----- internal/repository/packer_manager.go | 5 +---- internal/repository/repository.go | 6 ++++-- 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/internal/repository/index/associated_data_test.go b/internal/repository/index/associated_data_test.go index 82dd9908d..d811a7faa 100644 --- a/internal/repository/index/associated_data_test.go +++ b/internal/repository/index/associated_data_test.go @@ -35,8 +35,8 @@ func TestAssociatedSet(t *testing.T) { bh, blob := makeFakePackedBlob() mi := NewMasterIndex() - mi.StorePack(blob.PackID, []restic.Blob{blob.Blob}) - test.OK(t, mi.SaveIndex(context.TODO(), &noopSaver{})) + test.OK(t, mi.StorePack(context.TODO(), blob.PackID, []restic.Blob{blob.Blob}, &noopSaver{})) + test.OK(t, mi.Flush(context.TODO(), &noopSaver{})) bs := NewAssociatedSet[uint8](mi) test.Equals(t, bs.Len(), 0) @@ -118,15 +118,15 @@ func TestAssociatedSetWithExtendedIndex(t *testing.T) { _, blob := makeFakePackedBlob() mi := NewMasterIndex() - mi.StorePack(blob.PackID, []restic.Blob{blob.Blob}) - test.OK(t, mi.SaveIndex(context.TODO(), &noopSaver{})) + test.OK(t, mi.StorePack(context.TODO(), blob.PackID, []restic.Blob{blob.Blob}, &noopSaver{})) + test.OK(t, mi.Flush(context.TODO(), &noopSaver{})) bs := NewAssociatedSet[uint8](mi) // add new blobs to index after building the set of, blob2 := makeFakePackedBlob() - mi.StorePack(blob2.PackID, []restic.Blob{blob2.Blob}) - test.OK(t, mi.SaveIndex(context.TODO(), &noopSaver{})) + test.OK(t, mi.StorePack(context.TODO(), blob2.PackID, []restic.Blob{blob2.Blob}, &noopSaver{})) + test.OK(t, mi.Flush(context.TODO(), &noopSaver{})) // non-existent test.Equals(t, false, bs.Has(of)) diff --git a/internal/repository/index/master_index.go b/internal/repository/index/master_index.go index 16923090b..827f8d9f9 100644 --- a/internal/repository/index/master_index.go +++ b/internal/repository/index/master_index.go @@ -153,7 +153,12 @@ func (mi *MasterIndex) Insert(idx *Index) { } // StorePack remembers the id and pack in the index. -func (mi *MasterIndex) StorePack(id restic.ID, blobs []restic.Blob) { +func (mi *MasterIndex) StorePack(ctx context.Context, id restic.ID, blobs []restic.Blob, r restic.SaverUnpacked[restic.FileType]) error { + mi.storePack(id, blobs) + return mi.saveFullIndex(ctx, r) +} + +func (mi *MasterIndex) storePack(id restic.ID, blobs []restic.Blob) { mi.idxMutex.Lock() defer mi.idxMutex.Unlock() @@ -589,13 +594,13 @@ func (mi *MasterIndex) saveIndex(ctx context.Context, r restic.SaverUnpacked[res return mi.MergeFinalIndexes() } -// SaveIndex saves all new indexes in the backend. -func (mi *MasterIndex) SaveIndex(ctx context.Context, r restic.SaverUnpacked[restic.FileType]) error { +// Flush saves all new indexes in the backend. +func (mi *MasterIndex) Flush(ctx context.Context, r restic.SaverUnpacked[restic.FileType]) error { return mi.saveIndex(ctx, r, mi.finalizeNotFinalIndexes()...) } -// SaveFullIndex saves all full indexes in the backend. -func (mi *MasterIndex) SaveFullIndex(ctx context.Context, r restic.SaverUnpacked[restic.FileType]) error { +// saveFullIndex saves all full indexes in the backend. +func (mi *MasterIndex) saveFullIndex(ctx context.Context, r restic.SaverUnpacked[restic.FileType]) error { return mi.saveIndex(ctx, r, mi.finalizeFullIndexes()...) } diff --git a/internal/repository/packer_manager.go b/internal/repository/packer_manager.go index 9d53c911b..d5c340e39 100644 --- a/internal/repository/packer_manager.go +++ b/internal/repository/packer_manager.go @@ -187,8 +187,5 @@ func (r *Repository) savePacker(ctx context.Context, t restic.BlobType, p *packe // update blobs in the index debug.Log(" updating blobs %v to pack %v", p.Packer.Blobs(), id) - r.idx.StorePack(id, p.Packer.Blobs()) - - // Save index if full - return r.idx.SaveFullIndex(ctx, &internalRepository{r}) + return r.idx.StorePack(ctx, id, p.Packer.Blobs(), &internalRepository{r}) } diff --git a/internal/repository/repository.go b/internal/repository/repository.go index aee0db103..8f8131b96 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -542,7 +542,7 @@ func (r *Repository) Flush(ctx context.Context) error { return err } - return r.idx.SaveIndex(ctx, &internalRepository{r}) + return r.idx.Flush(ctx, &internalRepository{r}) } func (r *Repository) StartPackUploader(ctx context.Context, wg *errgroup.Group) { @@ -701,7 +701,9 @@ func (r *Repository) createIndexFromPacks(ctx context.Context, packsize map[rest invalid = append(invalid, fi.ID) m.Unlock() } - r.idx.StorePack(fi.ID, entries) + if err := r.idx.StorePack(ctx, fi.ID, entries, &internalRepository{r}); err != nil { + return err + } p.Add(1) } From 3b8d15d65121de26a566bb332615c35b4fd07d67 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 16 Feb 2025 17:03:14 +0100 Subject: [PATCH 2/4] index: rewrite oversized indexes --- internal/repository/index/index.go | 12 ++++++++++++ internal/repository/index/master_index.go | 2 +- internal/repository/pack/pack.go | 5 +++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/internal/repository/index/index.go b/internal/repository/index/index.go index c62c1c462..5a80a3c10 100644 --- a/internal/repository/index/index.go +++ b/internal/repository/index/index.go @@ -11,6 +11,7 @@ import ( "github.com/restic/restic/internal/crypto" "github.com/restic/restic/internal/errors" + "github.com/restic/restic/internal/repository/pack" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/debug" @@ -116,7 +117,18 @@ var IndexFull = func(idx *Index) bool { debug.Log("index %p only has %d blobs and is too young (%v)", idx, blobs, age) return false +} +var IndexOversized = func(idx *Index) bool { + idx.m.RLock() + defer idx.m.RUnlock() + + var blobs uint + for typ := range idx.byType { + blobs += idx.byType[typ].len() + } + + return blobs >= indexMaxBlobs+pack.MaxHeaderEntries } // StorePack remembers the ids of all blobs of a given pack diff --git a/internal/repository/index/master_index.go b/internal/repository/index/master_index.go index 827f8d9f9..2a2f4988b 100644 --- a/internal/repository/index/master_index.go +++ b/internal/repository/index/master_index.go @@ -419,7 +419,7 @@ func (mi *MasterIndex) Rewrite(ctx context.Context, repo restic.Unpacked[restic. newIndex := NewIndex() for task := range rewriteCh { // always rewrite indexes that include a pack that must be removed or that are not full - if len(task.idx.Packs().Intersect(excludePacks)) == 0 && IndexFull(task.idx) { + if len(task.idx.Packs().Intersect(excludePacks)) == 0 && IndexFull(task.idx) && !IndexOversized(task.idx) { // make sure that each pack is only stored exactly once in the index excludePacks.Merge(task.idx.Packs()) // index is already up to date diff --git a/internal/repository/pack/pack.go b/internal/repository/pack/pack.go index 57957ce91..9dfd1004d 100644 --- a/internal/repository/pack/pack.go +++ b/internal/repository/pack/pack.go @@ -215,6 +215,11 @@ const ( eagerEntries = 15 ) +var ( + // MaxHeaderEntries is the number of entries a pack file can contain at most + MaxHeaderEntries = (MaxHeaderSize - headerSize) / entrySize +) + // readRecords reads up to bufsize bytes from the underlying ReaderAt, returning // the raw header, the total number of bytes in the header, and any error. // If the header contains fewer than bufsize bytes, the header is truncated to From 39e63ee4e37457a0fe6c33a6d1d4858d2278e740 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 16 Feb 2025 17:42:00 +0100 Subject: [PATCH 3/4] index: add tests for oversized index handling --- .../repository/index/index_internal_test.go | 40 +++++++++++ .../repository/index/master_index_test.go | 69 +++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 internal/repository/index/index_internal_test.go diff --git a/internal/repository/index/index_internal_test.go b/internal/repository/index/index_internal_test.go new file mode 100644 index 000000000..f55c9f546 --- /dev/null +++ b/internal/repository/index/index_internal_test.go @@ -0,0 +1,40 @@ +package index + +import ( + "testing" + + "github.com/restic/restic/internal/repository/pack" + "github.com/restic/restic/internal/restic" + rtest "github.com/restic/restic/internal/test" +) + +func TestIndexOversized(t *testing.T) { + idx := NewIndex() + + // Add blobs up to indexMaxBlobs + pack.MaxHeaderEntries - 1 + packID := idx.addToPacks(restic.NewRandomID()) + for i := uint(0); i < indexMaxBlobs+pack.MaxHeaderEntries-1; i++ { + idx.store(packID, restic.Blob{ + BlobHandle: restic.BlobHandle{ + Type: restic.DataBlob, + ID: restic.NewRandomID(), + }, + Length: 100, + Offset: uint(i) * 100, + }) + } + + rtest.Assert(t, !IndexOversized(idx), "index should not be considered oversized") + + // Add one more blob to exceed the limit + idx.store(packID, restic.Blob{ + BlobHandle: restic.BlobHandle{ + Type: restic.DataBlob, + ID: restic.NewRandomID(), + }, + Length: 100, + Offset: uint(indexMaxBlobs+pack.MaxHeaderEntries) * 100, + }) + + rtest.Assert(t, IndexOversized(idx), "index should be considered oversized") +} diff --git a/internal/repository/index/master_index_test.go b/internal/repository/index/master_index_test.go index 516ef045c..ac7beb84f 100644 --- a/internal/repository/index/master_index_test.go +++ b/internal/repository/index/master_index_test.go @@ -459,3 +459,72 @@ func listPacks(t testing.TB, repo restic.Lister) restic.IDSet { })) return s } + +func TestRewriteOversizedIndex(t *testing.T) { + repo, unpacked, _ := repository.TestRepositoryWithVersion(t, 2) + + const fullIndexCount = 1000 + + // replace index size checks for testing + originalIndexFull := index.IndexFull + originalIndexOversized := index.IndexOversized + defer func() { + index.IndexFull = originalIndexFull + index.IndexOversized = originalIndexOversized + }() + index.IndexFull = func(idx *index.Index) bool { + return idx.Len(restic.DataBlob) > fullIndexCount + } + index.IndexOversized = func(idx *index.Index) bool { + return idx.Len(restic.DataBlob) > 2*fullIndexCount + } + + var blobs []restic.Blob + + // build oversized index + idx := index.NewIndex() + numPacks := 5 + for p := 0; p < numPacks; p++ { + packID := restic.NewRandomID() + packBlobs := make([]restic.Blob, 0, fullIndexCount) + + for i := 0; i < fullIndexCount; i++ { + blob := restic.Blob{ + BlobHandle: restic.BlobHandle{ + Type: restic.DataBlob, + ID: restic.NewRandomID(), + }, + Length: 100, + Offset: uint(i * 100), + } + packBlobs = append(packBlobs, blob) + blobs = append(blobs, blob) + } + idx.StorePack(packID, packBlobs) + } + idx.Finalize() + + _, err := idx.SaveIndex(context.Background(), unpacked) + rtest.OK(t, err) + + // construct master index for the oversized index + mi := index.NewMasterIndex() + rtest.OK(t, mi.Load(context.Background(), repo, nil, nil)) + + // rewrite the index + rtest.OK(t, mi.Rewrite(context.Background(), unpacked, nil, nil, nil, index.MasterIndexRewriteOpts{})) + + // load the rewritten indexes + mi2 := index.NewMasterIndex() + rtest.OK(t, mi2.Load(context.Background(), repo, nil, nil)) + + // verify that blobs are still in the index + for _, blob := range blobs { + found := mi2.Has(blob.BlobHandle) + rtest.Assert(t, found, "blob %v missing after rewrite", blob.ID) + } + + // check that multiple indexes were created + ids := mi2.IDs() + rtest.Assert(t, len(ids) > 1, "oversized index was not split into multiple indexes") +} From 00628e952f2efaa0ad276fb81b964408a5d8a5a3 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 16 Feb 2025 17:58:36 +0100 Subject: [PATCH 4/4] add changelog for oversized indexes --- changelog/unreleased/pull-5249 | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 changelog/unreleased/pull-5249 diff --git a/changelog/unreleased/pull-5249 b/changelog/unreleased/pull-5249 new file mode 100644 index 000000000..e8d820229 --- /dev/null +++ b/changelog/unreleased/pull-5249 @@ -0,0 +1,10 @@ +Bugfix: Fix creation of oversized indexes by `repair index --read-all-packs` + +Since restic 0.17.0, the new index created by `repair index --read-all-packs` was +written as a single large index. This significantly increases memory usage while +loading the index. + +The index is now correctly split into multiple smaller indexes. `repair index` now +also automatically splits oversized indexes. + +https://github.com/restic/restic/pull/5249