package storage import ( "crypto/sha256" "encoding/hex" "encoding/json" "os" "path/filepath" "testing" "time" ) // helper: create a blob with known content, return its digest func createTestBlob(t *testing.T, objectsDir string, content []byte) string { t.Helper() h := sha256.Sum256(content) digest := hex.EncodeToString(h[:]) if err := os.WriteFile(filepath.Join(objectsDir, digest), content, 0644); err != nil { t.Fatalf("create blob: %v", err) } return digest } // helper: create a manifest referencing given digests func createTestManifest(t *testing.T, refsDir, name string, objects map[string]string) { t.Helper() bm := BlobManifest{ Name: name, CreatedAt: time.Now().Format(time.RFC3339), Objects: objects, } data, err := json.MarshalIndent(bm, "", " ") if err != nil { t.Fatalf("marshal manifest: %v", err) } h := sha256.Sum256(data) digest := hex.EncodeToString(h[:]) refName := name + "-" + digest[:12] + ".json" if err := os.WriteFile(filepath.Join(refsDir, refName), data, 0644); err != nil { t.Fatalf("write manifest: %v", err) } } // helper: set up a temp CAS store func setupTestCAS(t *testing.T) *CASStore { t.Helper() tmpDir := t.TempDir() store := NewCASStore(tmpDir) if err := store.Init(); err != nil { t.Fatalf("init CAS: %v", err) } return store } func TestDedupAnalytics(t *testing.T) { store := setupTestCAS(t) // Create 3 distinct blobs digestA := createTestBlob(t, store.ObjectsDir(), []byte("file-content-alpha")) digestB := createTestBlob(t, store.ObjectsDir(), []byte("file-content-bravo")) digestC := createTestBlob(t, store.ObjectsDir(), []byte("file-content-charlie")) // Manifest 1: references A and B createTestManifest(t, store.refsDir, "manifest1", map[string]string{ "bin/alpha": digestA, "bin/bravo": digestB, }) // Manifest 2: references A and C (A is shared/deduped) createTestManifest(t, store.refsDir, "manifest2", map[string]string{ "bin/alpha": digestA, "lib/charlie": digestC, }) report, err := store.Analytics() if err != nil { t.Fatalf("Analytics: %v", err) } // 3 distinct blobs if report.TotalBlobs != 3 { t.Errorf("TotalBlobs = %d, want 3", report.TotalBlobs) } // 4 total references across both manifests if report.TotalReferences != 4 { t.Errorf("TotalReferences = %d, want 4", report.TotalReferences) } // 3 unique blobs if report.UniqueBlobs != 3 { t.Errorf("UniqueBlobs = %d, want 3", report.UniqueBlobs) } // Dedup ratio = 4/3 ≈ 1.33 if report.DedupRatio < 1.3 || report.DedupRatio > 1.4 { t.Errorf("DedupRatio = %.2f, want ~1.33", report.DedupRatio) } // Storage savings: blob A (18 bytes) is referenced 2 times, saving 1 copy sizeA := int64(len("file-content-alpha")) if report.StorageSavings != sizeA { t.Errorf("StorageSavings = %d, want %d", report.StorageSavings, sizeA) } // 2 manifests if len(report.ManifestStats) != 2 { t.Errorf("ManifestStats count = %d, want 2", len(report.ManifestStats)) } // Top blobs: A should be #1 with 2 refs if len(report.TopBlobs) == 0 { t.Fatal("expected TopBlobs to be non-empty") } if report.TopBlobs[0].Digest != digestA { t.Errorf("TopBlobs[0].Digest = %s, want %s", report.TopBlobs[0].Digest, digestA) } if report.TopBlobs[0].RefCount != 2 { t.Errorf("TopBlobs[0].RefCount = %d, want 2", report.TopBlobs[0].RefCount) } } func TestAnalyticsEmptyStore(t *testing.T) { store := setupTestCAS(t) report, err := store.Analytics() if err != nil { t.Fatalf("Analytics: %v", err) } if report.TotalBlobs != 0 { t.Errorf("TotalBlobs = %d, want 0", report.TotalBlobs) } if report.TotalReferences != 0 { t.Errorf("TotalReferences = %d, want 0", report.TotalReferences) } } func TestAnalyticsSizeDistribution(t *testing.T) { store := setupTestCAS(t) // Tiny: < 1 KiB createTestBlob(t, store.ObjectsDir(), []byte("tiny")) // Small: 1 KiB – 64 KiB (create a 2 KiB blob) smallContent := make([]byte, 2048) for i := range smallContent { smallContent[i] = byte(i % 256) } createTestBlob(t, store.ObjectsDir(), smallContent) // Medium: 64 KiB – 1 MiB (create a 100 KiB blob) mediumContent := make([]byte, 100*1024) for i := range mediumContent { mediumContent[i] = byte((i + 1) % 256) } createTestBlob(t, store.ObjectsDir(), mediumContent) report, err := store.Analytics() if err != nil { t.Fatalf("Analytics: %v", err) } if report.SizeDistribution.Tiny != 1 { t.Errorf("Tiny = %d, want 1", report.SizeDistribution.Tiny) } if report.SizeDistribution.Small != 1 { t.Errorf("Small = %d, want 1", report.SizeDistribution.Small) } if report.SizeDistribution.Medium != 1 { t.Errorf("Medium = %d, want 1", report.SizeDistribution.Medium) } } func TestRetentionMaxAge(t *testing.T) { store := setupTestCAS(t) // Create blobs — one "old", one "new" oldDigest := createTestBlob(t, store.ObjectsDir(), []byte("old-blob-content")) newDigest := createTestBlob(t, store.ObjectsDir(), []byte("new-blob-content")) // Make the "old" blob look 45 days old oldTime := time.Now().Add(-45 * 24 * time.Hour) os.Chtimes(filepath.Join(store.ObjectsDir(), oldDigest), oldTime, oldTime) // Neither blob is referenced by any manifest → both are unreferenced policy := RetentionPolicy{ MaxAge: "30d", MinCopies: 1, } result, err := store.ApplyRetention(policy, true) // dry run if err != nil { t.Fatalf("ApplyRetention: %v", err) } // Only the old blob should be a candidate if len(result.Candidates) != 1 { t.Fatalf("Candidates = %d, want 1", len(result.Candidates)) } if result.Candidates[0].Digest != oldDigest { t.Errorf("Candidate digest = %s, want %s", result.Candidates[0].Digest, oldDigest) } // New blob should NOT be a candidate for _, c := range result.Candidates { if c.Digest == newDigest { t.Errorf("new blob should not be a candidate") } } // Verify dry run didn't delete anything if _, err := os.Stat(filepath.Join(store.ObjectsDir(), oldDigest)); err != nil { t.Errorf("dry run should not have deleted old blob") } } func TestRetentionMaxAgeExecute(t *testing.T) { store := setupTestCAS(t) oldDigest := createTestBlob(t, store.ObjectsDir(), []byte("old-blob-for-deletion")) oldTime := time.Now().Add(-45 * 24 * time.Hour) os.Chtimes(filepath.Join(store.ObjectsDir(), oldDigest), oldTime, oldTime) policy := RetentionPolicy{ MaxAge: "30d", MinCopies: 1, } result, err := store.ApplyRetention(policy, false) // actually delete if err != nil { t.Fatalf("ApplyRetention: %v", err) } if result.TotalDeleted != 1 { t.Errorf("TotalDeleted = %d, want 1", result.TotalDeleted) } // Blob should be gone if _, err := os.Stat(filepath.Join(store.ObjectsDir(), oldDigest)); !os.IsNotExist(err) { t.Errorf("old blob should have been deleted") } } func TestRetentionMaxSize(t *testing.T) { store := setupTestCAS(t) // Create several blobs totaling more than our limit blobs := []struct { content []byte age time.Duration }{ {make([]byte, 500), -10 * 24 * time.Hour}, // 500 bytes, 10 days old {make([]byte, 600), -20 * 24 * time.Hour}, // 600 bytes, 20 days old {make([]byte, 400), -5 * 24 * time.Hour}, // 400 bytes, 5 days old } // Fill with distinct content for i := range blobs { for j := range blobs[i].content { blobs[i].content[j] = byte(i*100 + j%256) } } var digests []string for _, b := range blobs { d := createTestBlob(t, store.ObjectsDir(), b.content) digests = append(digests, d) ts := time.Now().Add(b.age) os.Chtimes(filepath.Join(store.ObjectsDir(), d), ts, ts) } // Total: 1500 bytes. Set max to 1000 bytes. policy := RetentionPolicy{ MaxSize: "1000", MinCopies: 1, } result, err := store.ApplyRetention(policy, true) if err != nil { t.Fatalf("ApplyRetention: %v", err) } // Should identify enough blobs to get under 1000 bytes var freedTotal int64 for _, c := range result.Candidates { freedTotal += c.Size } remaining := int64(1500) - freedTotal if remaining > 1000 { t.Errorf("remaining %d bytes still over 1000 limit after retention", remaining) } // The oldest blob (20 days) should be deleted first if len(result.Candidates) == 0 { t.Fatal("expected at least one candidate") } // First candidate should be the oldest unreferenced blob if result.Candidates[0].Digest != digests[1] { // 20 days old t.Errorf("expected oldest blob to be first candidate, got %s", result.Candidates[0].Digest[:16]) } } func TestRetentionProtectsReferenced(t *testing.T) { store := setupTestCAS(t) // Create blobs referencedDigest := createTestBlob(t, store.ObjectsDir(), []byte("referenced-blob")) unreferencedDigest := createTestBlob(t, store.ObjectsDir(), []byte("unreferenced-blob")) // Make both blobs old oldTime := time.Now().Add(-60 * 24 * time.Hour) os.Chtimes(filepath.Join(store.ObjectsDir(), referencedDigest), oldTime, oldTime) os.Chtimes(filepath.Join(store.ObjectsDir(), unreferencedDigest), oldTime, oldTime) // Create a manifest referencing only the first blob createTestManifest(t, store.refsDir, "keep-manifest", map[string]string{ "important/file": referencedDigest, }) policy := RetentionPolicy{ MaxAge: "30d", MinCopies: 1, // blob has 1 ref, so it's protected } result, err := store.ApplyRetention(policy, true) if err != nil { t.Fatalf("ApplyRetention: %v", err) } // Only unreferenced blob should be a candidate for _, c := range result.Candidates { if c.Digest == referencedDigest { t.Errorf("referenced blob %s should be protected, but was marked for deletion", referencedDigest[:16]) } } // Unreferenced blob should be a candidate found := false for _, c := range result.Candidates { if c.Digest == unreferencedDigest { found = true break } } if !found { t.Errorf("unreferenced blob should be a candidate for deletion") } } func TestRetentionProtectsReferencedMaxSize(t *testing.T) { store := setupTestCAS(t) // Create blobs refContent := make([]byte, 800) for i := range refContent { refContent[i] = byte(i % 256) } referencedDigest := createTestBlob(t, store.ObjectsDir(), refContent) unrefContent := make([]byte, 500) for i := range unrefContent { unrefContent[i] = byte((i + 50) % 256) } unreferencedDigest := createTestBlob(t, store.ObjectsDir(), unrefContent) // Reference the 800-byte blob createTestManifest(t, store.refsDir, "protect-me", map[string]string{ "big/file": referencedDigest, }) // Total: 1300 bytes. Limit: 500 bytes. // Even though we're over limit, the referenced blob must be kept. policy := RetentionPolicy{ MaxSize: "500", MinCopies: 1, } result, err := store.ApplyRetention(policy, false) // actually delete if err != nil { t.Fatalf("ApplyRetention: %v", err) } // Referenced blob must still exist if _, err := os.Stat(filepath.Join(store.ObjectsDir(), referencedDigest)); err != nil { t.Errorf("referenced blob was deleted despite having refs >= min_copies") } // Unreferenced blob should be deleted if _, err := os.Stat(filepath.Join(store.ObjectsDir(), unreferencedDigest)); !os.IsNotExist(err) { t.Errorf("unreferenced blob should have been deleted") } _ = result } func TestGCWithRetention(t *testing.T) { store := setupTestCAS(t) // Create blobs digestA := createTestBlob(t, store.ObjectsDir(), []byte("blob-a-content")) digestB := createTestBlob(t, store.ObjectsDir(), []byte("blob-b-content")) // A is referenced, B is not createTestManifest(t, store.refsDir, "gc-test", map[string]string{ "file/a": digestA, }) // Make B old oldTime := time.Now().Add(-90 * 24 * time.Hour) os.Chtimes(filepath.Join(store.ObjectsDir(), digestB), oldTime, oldTime) policy := RetentionPolicy{ MaxAge: "30d", MinCopies: 1, } gcResult, retResult, err := store.GCWithRetention(&policy, true) // dry run if err != nil { t.Fatalf("GCWithRetention: %v", err) } // GC should find B as unreferenced if len(gcResult.Unreferenced) != 1 { t.Errorf("GC Unreferenced = %d, want 1", len(gcResult.Unreferenced)) } // Retention should also flag B if retResult == nil { t.Fatal("expected retention result") } if len(retResult.Candidates) != 1 { t.Errorf("Retention Candidates = %d, want 1", len(retResult.Candidates)) } } func TestParseDuration(t *testing.T) { tests := []struct { input string expected time.Duration wantErr bool }{ {"30d", 30 * 24 * time.Hour, false}, {"7d", 7 * 24 * time.Hour, false}, {"2w", 14 * 24 * time.Hour, false}, {"12h", 12 * time.Hour, false}, {"0", 0, false}, {"", 0, false}, {"xyz", 0, true}, } for _, tc := range tests { got, err := ParseDuration(tc.input) if tc.wantErr { if err == nil { t.Errorf("ParseDuration(%q) expected error", tc.input) } continue } if err != nil { t.Errorf("ParseDuration(%q) error: %v", tc.input, err) continue } if got != tc.expected { t.Errorf("ParseDuration(%q) = %v, want %v", tc.input, got, tc.expected) } } } func TestParseSize(t *testing.T) { tests := []struct { input string expected int64 wantErr bool }{ {"10G", 10 * 1024 * 1024 * 1024, false}, {"500M", 500 * 1024 * 1024, false}, {"1T", 1024 * 1024 * 1024 * 1024, false}, {"1024K", 1024 * 1024, false}, {"1024", 1024, false}, {"0", 0, false}, {"", 0, false}, {"abc", 0, true}, } for _, tc := range tests { got, err := ParseSize(tc.input) if tc.wantErr { if err == nil { t.Errorf("ParseSize(%q) expected error", tc.input) } continue } if err != nil { t.Errorf("ParseSize(%q) error: %v", tc.input, err) continue } if got != tc.expected { t.Errorf("ParseSize(%q) = %d, want %d", tc.input, got, tc.expected) } } }