Files
volt/pkg/storage/cas.go
Karl Clinger 0ebe75b2ca Volt CLI: source-available under AGPSL v5.0
Complete infrastructure platform CLI:
- Container runtime (systemd-nspawn)
- VoltVisor VMs (Neutron Stardust / QEMU)
- Stellarium CAS (content-addressed storage)
- ORAS Registry
- GitOps integration
- Landlock LSM security
- Compose orchestration
- Mesh networking

Copyright (c) Armored Gates LLC. All rights reserved.
Licensed under AGPSL v5.0
2026-03-21 02:08:15 -05:00

1085 lines
30 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*
CAS (Content-Addressed Storage) — Extended blob store for Volt hybrid workloads.
This package provides the pkg-level CAS operations that back the `volt cas`
CLI commands. File-level CAS — every file is stored as a single blob keyed
by its SHA-256 digest. No chunking, no special VM formats.
Features:
- Put / Get / Exists / Delete for individual blobs
- Manifest → blob list resolution
- Deduplication verification
- Writable layer management (overlay or tmpfs on top of CAS tree)
- Snapshot: capture writable layer changes back to CAS
- Garbage collection of unreferenced blobs
Copyright (c) Armored Gates LLC. All rights reserved.
*/
package storage
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"sync"
"time"
)
// ── Constants ────────────────────────────────────────────────────────────────
const (
// DefaultCASBase is the default CAS root directory.
DefaultCASBase = "/var/lib/volt/cas"
)
// ── CAS Store ────────────────────────────────────────────────────────────────
// CASStore manages content-addressed blob storage. All operations are safe
// for concurrent use.
type CASStore struct {
baseDir string // root of the CAS tree
objectsDir string // baseDir/objects — blob storage
refsDir string // baseDir/refs — manifest references
layersDir string // baseDir/layers — writable overlay layers
mu sync.RWMutex
}
// NewCASStore creates a CAS store rooted at baseDir. Directories are created
// lazily on first write operation.
func NewCASStore(baseDir string) *CASStore {
if baseDir == "" {
baseDir = DefaultCASBase
}
return &CASStore{
baseDir: baseDir,
objectsDir: filepath.Join(baseDir, "objects"),
refsDir: filepath.Join(baseDir, "refs"),
layersDir: filepath.Join(baseDir, "layers"),
}
}
// Init creates the CAS directory structure. Idempotent.
func (c *CASStore) Init() error {
for _, dir := range []string{c.objectsDir, c.refsDir, c.layersDir} {
if err := os.MkdirAll(dir, 0755); err != nil {
return fmt.Errorf("cas init %s: %w", dir, err)
}
}
return nil
}
// BaseDir returns the CAS root directory.
func (c *CASStore) BaseDir() string { return c.baseDir }
// ObjectsDir returns the path to the blob objects directory.
func (c *CASStore) ObjectsDir() string { return c.objectsDir }
// ── Blob Manifest (CAS-level, not workload manifest) ─────────────────────────
// BlobManifest maps relative file paths to their SHA-256 digests. This is the
// CAS-level manifest that records which blobs constitute a directory tree.
type BlobManifest struct {
Name string `json:"name"`
CreatedAt string `json:"created_at"`
Objects map[string]string `json:"objects"` // relPath → sha256 hex
}
// ── Put ──────────────────────────────────────────────────────────────────────
// Put stores the contents of reader as a CAS blob and returns the SHA-256
// digest. If a blob with the same digest already exists, the write is skipped
// (deduplication).
func (c *CASStore) Put(r io.Reader) (digest string, size int64, err error) {
if err := c.Init(); err != nil {
return "", 0, err
}
// Write to a temp file while computing the hash.
tmpFile, err := os.CreateTemp(c.objectsDir, ".cas-put-*")
if err != nil {
return "", 0, fmt.Errorf("cas put: create temp: %w", err)
}
tmpPath := tmpFile.Name()
defer func() {
tmpFile.Close()
os.Remove(tmpPath) // clean up temp on any error
}()
hasher := sha256.New()
w := io.MultiWriter(tmpFile, hasher)
n, err := io.Copy(w, r)
if err != nil {
return "", 0, fmt.Errorf("cas put: copy: %w", err)
}
if err := tmpFile.Close(); err != nil {
return "", 0, fmt.Errorf("cas put: close temp: %w", err)
}
digest = hex.EncodeToString(hasher.Sum(nil))
objPath := filepath.Join(c.objectsDir, digest)
c.mu.Lock()
defer c.mu.Unlock()
// Deduplication: if the blob already exists, skip.
if _, err := os.Stat(objPath); err == nil {
return digest, n, nil
}
// Rename temp file to final location.
if err := os.Rename(tmpPath, objPath); err != nil {
return "", 0, fmt.Errorf("cas put: rename: %w", err)
}
return digest, n, nil
}
// PutFile stores a file from disk in the CAS. Returns the digest and whether
// the file was already present (deduplicated).
func (c *CASStore) PutFile(path string) (digest string, deduplicated bool, err error) {
f, err := os.Open(path)
if err != nil {
return "", false, fmt.Errorf("cas put file: %w", err)
}
defer f.Close()
// Compute hash first to check for dedup without writing.
hasher := sha256.New()
if _, err := io.Copy(hasher, f); err != nil {
return "", false, fmt.Errorf("cas put file: hash: %w", err)
}
digest = hex.EncodeToString(hasher.Sum(nil))
if c.Exists(digest) {
return digest, true, nil
}
// Rewind and store.
if _, err := f.Seek(0, io.SeekStart); err != nil {
return "", false, fmt.Errorf("cas put file: seek: %w", err)
}
storedDigest, _, err := c.Put(f)
if err != nil {
return "", false, err
}
return storedDigest, false, nil
}
// ── Get ──────────────────────────────────────────────────────────────────────
// Get returns a ReadCloser for the blob identified by digest. The caller must
// close the reader.
func (c *CASStore) Get(digest string) (io.ReadCloser, error) {
objPath := filepath.Join(c.objectsDir, digest)
c.mu.RLock()
defer c.mu.RUnlock()
f, err := os.Open(objPath)
if err != nil {
if os.IsNotExist(err) {
return nil, fmt.Errorf("cas get: blob %s not found", digest)
}
return nil, fmt.Errorf("cas get: %w", err)
}
return f, nil
}
// GetPath returns the filesystem path to a blob. Does not verify existence.
func (c *CASStore) GetPath(digest string) string {
return filepath.Join(c.objectsDir, digest)
}
// ── Exists ───────────────────────────────────────────────────────────────────
// Exists returns true if a blob with the given digest is in the store.
func (c *CASStore) Exists(digest string) bool {
c.mu.RLock()
defer c.mu.RUnlock()
_, err := os.Stat(filepath.Join(c.objectsDir, digest))
return err == nil
}
// ── Delete ───────────────────────────────────────────────────────────────────
// Delete removes a blob from the store. Returns nil if the blob did not exist.
func (c *CASStore) Delete(digest string) error {
objPath := filepath.Join(c.objectsDir, digest)
c.mu.Lock()
defer c.mu.Unlock()
if err := os.Remove(objPath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("cas delete: %w", err)
}
return nil
}
// ── Manifest Operations ──────────────────────────────────────────────────────
// SaveManifest writes a BlobManifest to the refs directory.
func (c *CASStore) SaveManifest(bm *BlobManifest) (string, error) {
if err := c.Init(); err != nil {
return "", err
}
data, err := json.MarshalIndent(bm, "", " ")
if err != nil {
return "", fmt.Errorf("cas save manifest: marshal: %w", err)
}
h := sha256.Sum256(data)
digest := hex.EncodeToString(h[:])
refName := bm.Name + "-" + digest[:12] + ".json"
refPath := filepath.Join(c.refsDir, refName)
if err := os.WriteFile(refPath, data, 0644); err != nil {
return "", fmt.Errorf("cas save manifest: write: %w", err)
}
return refPath, nil
}
// LoadManifest reads a BlobManifest from the refs directory by filename.
func (c *CASStore) LoadManifest(refName string) (*BlobManifest, error) {
refPath := filepath.Join(c.refsDir, refName)
data, err := os.ReadFile(refPath)
if err != nil {
return nil, fmt.Errorf("cas load manifest: %w", err)
}
var bm BlobManifest
if err := json.Unmarshal(data, &bm); err != nil {
return nil, fmt.Errorf("cas load manifest: unmarshal: %w", err)
}
return &bm, nil
}
// ResolveBlobList returns the ordered list of (relPath, digest) pairs for a
// BlobManifest. This is the input to TinyVol assembly.
func (c *CASStore) ResolveBlobList(bm *BlobManifest) ([]BlobEntry, error) {
var entries []BlobEntry
var missing []string
for relPath, digest := range bm.Objects {
if !c.Exists(digest) {
missing = append(missing, digest)
continue
}
entries = append(entries, BlobEntry{
RelPath: relPath,
Digest: digest,
BlobPath: c.GetPath(digest),
})
}
if len(missing) > 0 {
return nil, fmt.Errorf("cas resolve: %d missing blobs: %s",
len(missing), strings.Join(missing[:min(5, len(missing))], ", "))
}
return entries, nil
}
// BlobEntry pairs a relative file path with its CAS blob location.
type BlobEntry struct {
RelPath string // e.g. "usr/bin/curl"
Digest string // sha256 hex
BlobPath string // absolute path to the blob on disk
}
// ── Deduplication Report ─────────────────────────────────────────────────────
// DedupReport summarizes deduplication across a set of blob manifests.
type DedupReport struct {
TotalFiles int
UniqueBlobs int
DuplicateFiles int
SavedBytes int64
}
// VerifyDedup computes a deduplication report for all manifests in the refs
// directory.
func (c *CASStore) VerifyDedup() (*DedupReport, error) {
entries, err := os.ReadDir(c.refsDir)
if err != nil {
if os.IsNotExist(err) {
return &DedupReport{}, nil
}
return nil, fmt.Errorf("cas verify dedup: %w", err)
}
digestToSize := make(map[string]int64)
totalFiles := 0
totalRefs := 0
for _, entry := range entries {
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".json") {
continue
}
bm, err := c.LoadManifest(entry.Name())
if err != nil {
continue
}
for _, digest := range bm.Objects {
totalFiles++
totalRefs++
if _, seen := digestToSize[digest]; !seen {
info, err := os.Stat(c.GetPath(digest))
if err == nil {
digestToSize[digest] = info.Size()
}
}
}
}
uniqueBlobs := len(digestToSize)
dupes := totalRefs - uniqueBlobs
var savedBytes int64
// Each duplicate saves the blob size.
// Count how many refs each blob has beyond the first.
refCount := make(map[string]int)
for _, entry := range entries {
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".json") {
continue
}
bm, _ := c.LoadManifest(entry.Name())
if bm == nil {
continue
}
for _, digest := range bm.Objects {
refCount[digest]++
}
}
for digest, count := range refCount {
if count > 1 {
savedBytes += digestToSize[digest] * int64(count-1)
}
}
return &DedupReport{
TotalFiles: totalFiles,
UniqueBlobs: uniqueBlobs,
DuplicateFiles: dupes,
SavedBytes: savedBytes,
}, nil
}
// ── Garbage Collection ───────────────────────────────────────────────────────
// GCResult holds the outcome of a garbage collection pass.
type GCResult struct {
Scanned int
Deleted int
FreedBytes int64
DryRun bool
Unreferenced []string // digests of unreferenced blobs
}
// GC removes blobs that are not referenced by any manifest in the refs
// directory. If dryRun is true, blobs are identified but not deleted.
func (c *CASStore) GC(dryRun bool) (*GCResult, error) {
result := &GCResult{DryRun: dryRun}
// Collect all referenced digests.
referenced := make(map[string]bool)
if entries, err := os.ReadDir(c.refsDir); err == nil {
for _, entry := range entries {
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".json") {
continue
}
bm, err := c.LoadManifest(entry.Name())
if err != nil {
continue
}
for _, digest := range bm.Objects {
referenced[digest] = true
}
}
}
// Walk objects and find unreferenced.
entries, err := os.ReadDir(c.objectsDir)
if err != nil {
if os.IsNotExist(err) {
return result, nil
}
return nil, fmt.Errorf("cas gc: read objects: %w", err)
}
for _, entry := range entries {
if entry.IsDir() || strings.HasPrefix(entry.Name(), ".") {
continue
}
result.Scanned++
digest := entry.Name()
if referenced[digest] {
continue
}
info, _ := entry.Info()
if info != nil {
result.FreedBytes += info.Size()
}
result.Unreferenced = append(result.Unreferenced, digest)
if !dryRun {
if err := os.Remove(filepath.Join(c.objectsDir, digest)); err != nil {
continue
}
result.Deleted++
}
}
return result, nil
}
// ── Analytics ─────────────────────────────────────────────────────────────────
// AnalyticsReport provides comprehensive CAS store analytics.
type AnalyticsReport struct {
// Store totals
TotalBlobs int
TotalBlobSize int64
// Dedup metrics
UniqueBlobs int
TotalReferences int // total blob refs across all manifests
DedupRatio float64
StorageSavings int64 // bytes saved by dedup
WithoutDedupSize int64 // what total size would be without dedup
// Per-manifest breakdown
ManifestStats []ManifestStat
// Top referenced blobs
TopBlobs []BlobRefStat
// Size distribution
SizeDistribution SizeDistribution
}
// ManifestStat holds size info for a single manifest/ref.
type ManifestStat struct {
Name string
RefFile string
BlobCount int
TotalSize int64
UniqueSize int64
}
// BlobRefStat tracks how many manifests reference a blob.
type BlobRefStat struct {
Digest string
Size int64
RefCount int
}
// SizeDistribution buckets blobs by size.
type SizeDistribution struct {
Tiny int // < 1 KiB
Small int // 1 KiB 64 KiB
Medium int // 64 KiB 1 MiB
Large int // 1 MiB 100 MiB
Huge int // > 100 MiB
}
// Analytics computes a comprehensive report on the CAS store.
func (c *CASStore) Analytics() (*AnalyticsReport, error) {
report := &AnalyticsReport{}
// 1. Scan all blobs in the objects directory.
blobSizes := make(map[string]int64)
entries, err := os.ReadDir(c.objectsDir)
if err != nil {
if os.IsNotExist(err) {
return report, nil
}
return nil, fmt.Errorf("cas analytics: read objects: %w", err)
}
for _, entry := range entries {
if entry.IsDir() || strings.HasPrefix(entry.Name(), ".") {
continue
}
info, err := entry.Info()
if err != nil {
continue
}
digest := entry.Name()
blobSizes[digest] = info.Size()
report.TotalBlobs++
report.TotalBlobSize += info.Size()
// Size distribution
sz := info.Size()
switch {
case sz < 1024:
report.SizeDistribution.Tiny++
case sz < 64*1024:
report.SizeDistribution.Small++
case sz < 1024*1024:
report.SizeDistribution.Medium++
case sz < 100*1024*1024:
report.SizeDistribution.Large++
default:
report.SizeDistribution.Huge++
}
}
// 2. Scan all manifests and compute reference counts.
refCount := make(map[string]int)
refEntries, err := os.ReadDir(c.refsDir)
if err != nil && !os.IsNotExist(err) {
return nil, fmt.Errorf("cas analytics: read refs: %w", err)
}
for _, entry := range refEntries {
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".json") {
continue
}
bm, err := c.LoadManifest(entry.Name())
if err != nil {
continue
}
ms := ManifestStat{
Name: bm.Name,
RefFile: entry.Name(),
}
seenInManifest := make(map[string]bool)
for _, digest := range bm.Objects {
ms.BlobCount++
report.TotalReferences++
refCount[digest]++
if sz, ok := blobSizes[digest]; ok {
ms.TotalSize += sz
if !seenInManifest[digest] {
ms.UniqueSize += sz
seenInManifest[digest] = true
}
}
}
report.ManifestStats = append(report.ManifestStats, ms)
}
report.UniqueBlobs = len(blobSizes)
// Dedup ratio: total references / unique blobs
if report.UniqueBlobs > 0 {
report.DedupRatio = float64(report.TotalReferences) / float64(report.UniqueBlobs)
}
// Compute storage savings: what would be used without dedup
for digest, count := range refCount {
if sz, ok := blobSizes[digest]; ok {
report.WithoutDedupSize += sz * int64(count)
if count > 1 {
report.StorageSavings += sz * int64(count-1)
}
}
}
// Add unreferenced blobs to the "without dedup" size (they exist once regardless)
for digest, sz := range blobSizes {
if refCount[digest] == 0 {
report.WithoutDedupSize += sz
}
}
// 3. Build top-referenced blobs (sorted by ref count, then size).
type blobStat struct {
digest string
size int64
refs int
}
var allStats []blobStat
for digest, count := range refCount {
allStats = append(allStats, blobStat{
digest: digest,
size: blobSizes[digest],
refs: count,
})
}
// Sort by ref count descending, then size descending
for i := 0; i < len(allStats); i++ {
for j := i + 1; j < len(allStats); j++ {
if allStats[j].refs > allStats[i].refs ||
(allStats[j].refs == allStats[i].refs && allStats[j].size > allStats[i].size) {
allStats[i], allStats[j] = allStats[j], allStats[i]
}
}
}
limit := 10
if len(allStats) < limit {
limit = len(allStats)
}
for _, s := range allStats[:limit] {
report.TopBlobs = append(report.TopBlobs, BlobRefStat{
Digest: s.digest,
Size: s.size,
RefCount: s.refs,
})
}
return report, nil
}
// ── Retention Policy ─────────────────────────────────────────────────────────
// RetentionPolicy defines rules for automatic blob cleanup.
type RetentionPolicy struct {
MaxAge string `yaml:"max_age" json:"max_age"` // e.g. "30d", "0" = disabled
MaxSize string `yaml:"max_size" json:"max_size"` // e.g. "10G", "0" = disabled
MinCopies int `yaml:"min_copies" json:"min_copies"` // min ref count to protect
Schedule string `yaml:"schedule" json:"schedule"` // "daily", "weekly", etc.
}
// RetentionConfig is the top-level retention config file structure.
type RetentionConfig struct {
Retention RetentionPolicy `yaml:"retention" json:"retention"`
}
// DefaultRetentionConfigPath is where the retention config is stored.
const DefaultRetentionConfigPath = "/etc/volt/cas-retention.yaml"
// RetentionCandidate is a blob identified for deletion by retention policy.
type RetentionCandidate struct {
Digest string
Size int64
ModTime time.Time
RefCount int
Reason string // why it's a candidate
}
// RetentionResult holds the outcome of a retention policy evaluation/execution.
type RetentionResult struct {
Candidates []RetentionCandidate
TotalFreed int64
TotalDeleted int
DryRun bool
Policy RetentionPolicy
}
// ParseDuration parses a human-friendly duration like "30d", "12h", "7d".
func ParseDuration(s string) (time.Duration, error) {
if s == "" || s == "0" {
return 0, nil
}
s = strings.TrimSpace(s)
// Handle days specially since time.ParseDuration doesn't support 'd'
if strings.HasSuffix(s, "d") {
numStr := strings.TrimSuffix(s, "d")
var days int
if _, err := fmt.Sscanf(numStr, "%d", &days); err != nil {
return 0, fmt.Errorf("invalid duration %q: %w", s, err)
}
return time.Duration(days) * 24 * time.Hour, nil
}
if strings.HasSuffix(s, "w") {
numStr := strings.TrimSuffix(s, "w")
var weeks int
if _, err := fmt.Sscanf(numStr, "%d", &weeks); err != nil {
return 0, fmt.Errorf("invalid duration %q: %w", s, err)
}
return time.Duration(weeks) * 7 * 24 * time.Hour, nil
}
return time.ParseDuration(s)
}
// ParseSize parses a human-friendly size like "10G", "500M", "1T".
func ParseSize(s string) (int64, error) {
if s == "" || s == "0" {
return 0, nil
}
s = strings.TrimSpace(strings.ToUpper(s))
multipliers := map[byte]int64{
'K': 1024,
'M': 1024 * 1024,
'G': 1024 * 1024 * 1024,
'T': 1024 * 1024 * 1024 * 1024,
}
last := s[len(s)-1]
if mult, ok := multipliers[last]; ok {
numStr := s[:len(s)-1]
// Also strip trailing 'i' or 'B' for "GiB", "GB" etc.
numStr = strings.TrimRight(numStr, "iIbB")
var val float64
if _, err := fmt.Sscanf(numStr, "%f", &val); err != nil {
return 0, fmt.Errorf("invalid size %q: %w", s, err)
}
return int64(val * float64(mult)), nil
}
// Try as plain bytes
var val int64
if _, err := fmt.Sscanf(s, "%d", &val); err != nil {
return 0, fmt.Errorf("invalid size %q: %w", s, err)
}
return val, nil
}
// ApplyRetention evaluates the retention policy against the CAS store.
// If dryRun is true, candidates are identified but not deleted.
func (c *CASStore) ApplyRetention(policy RetentionPolicy, dryRun bool) (*RetentionResult, error) {
result := &RetentionResult{
DryRun: dryRun,
Policy: policy,
}
now := time.Now()
// Parse policy values
maxAge, err := ParseDuration(policy.MaxAge)
if err != nil {
return nil, fmt.Errorf("invalid max_age: %w", err)
}
maxSize, err := ParseSize(policy.MaxSize)
if err != nil {
return nil, fmt.Errorf("invalid max_size: %w", err)
}
minCopies := policy.MinCopies
if minCopies <= 0 {
minCopies = 1
}
// 1. Collect all referenced digests and their ref counts.
refCount := make(map[string]int)
if refEntries, err := os.ReadDir(c.refsDir); err == nil {
for _, entry := range refEntries {
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".json") {
continue
}
bm, err := c.LoadManifest(entry.Name())
if err != nil {
continue
}
for _, digest := range bm.Objects {
refCount[digest]++
}
}
}
// 2. Walk all blobs and categorize.
type blobInfo struct {
digest string
size int64
modTime time.Time
refs int
}
var unreferenced []blobInfo
var totalStoreSize int64
entries, err := os.ReadDir(c.objectsDir)
if err != nil {
if os.IsNotExist(err) {
return result, nil
}
return nil, fmt.Errorf("cas retention: read objects: %w", err)
}
for _, entry := range entries {
if entry.IsDir() || strings.HasPrefix(entry.Name(), ".") {
continue
}
info, err := entry.Info()
if err != nil {
continue
}
digest := entry.Name()
refs := refCount[digest]
totalStoreSize += info.Size()
// Only consider blobs that are unreferenced or below min_copies
if refs < minCopies {
unreferenced = append(unreferenced, blobInfo{
digest: digest,
size: info.Size(),
modTime: info.ModTime(),
refs: refs,
})
}
}
// Sort unreferenced by modTime ascending (oldest first)
for i := 0; i < len(unreferenced); i++ {
for j := i + 1; j < len(unreferenced); j++ {
if unreferenced[j].modTime.Before(unreferenced[i].modTime) {
unreferenced[i], unreferenced[j] = unreferenced[j], unreferenced[i]
}
}
}
// 3. Apply max_age: mark unreferenced blobs older than threshold.
candidateSet := make(map[string]bool)
if maxAge > 0 {
cutoff := now.Add(-maxAge)
for _, blob := range unreferenced {
if blob.modTime.Before(cutoff) {
result.Candidates = append(result.Candidates, RetentionCandidate{
Digest: blob.digest,
Size: blob.size,
ModTime: blob.modTime,
RefCount: blob.refs,
Reason: fmt.Sprintf("unreferenced, older than %s", policy.MaxAge),
})
candidateSet[blob.digest] = true
result.TotalFreed += blob.size
}
}
}
// 4. Apply max_size: if store would still be over limit after age-based cleanup,
// delete oldest unreferenced blobs until under limit.
if maxSize > 0 {
projectedSize := totalStoreSize - result.TotalFreed
if projectedSize > maxSize {
for _, blob := range unreferenced {
if candidateSet[blob.digest] {
continue // already marked
}
if projectedSize <= maxSize {
break
}
result.Candidates = append(result.Candidates, RetentionCandidate{
Digest: blob.digest,
Size: blob.size,
ModTime: blob.modTime,
RefCount: blob.refs,
Reason: fmt.Sprintf("store over %s limit (projected %d bytes)", policy.MaxSize, projectedSize),
})
candidateSet[blob.digest] = true
result.TotalFreed += blob.size
projectedSize -= blob.size
}
}
}
// 5. Execute deletions if not dry-run.
if !dryRun {
for _, candidate := range result.Candidates {
if err := os.Remove(filepath.Join(c.objectsDir, candidate.Digest)); err != nil {
if !os.IsNotExist(err) {
continue
}
}
result.TotalDeleted++
}
}
return result, nil
}
// GCWithRetention runs garbage collection that also respects a retention
// policy (if provided). This is the integrated GC + retention path.
func (c *CASStore) GCWithRetention(policy *RetentionPolicy, dryRun bool) (*GCResult, *RetentionResult, error) {
// First, run standard GC (identifies unreferenced blobs)
gcResult, err := c.GC(dryRun)
if err != nil {
return nil, nil, err
}
// If a retention policy is provided, apply it on top
var retResult *RetentionResult
if policy != nil {
retResult, err = c.ApplyRetention(*policy, dryRun)
if err != nil {
return gcResult, nil, err
}
}
return gcResult, retResult, nil
}
// ── Writable Layer Management ────────────────────────────────────────────────
// WritableLayer represents a CoW layer on top of a CAS-assembled rootfs.
type WritableLayer struct {
WorkloadName string
Mode string // "overlay" or "tmpfs"
LowerDir string // the CAS-assembled tree (read-only)
UpperDir string // writable upper layer
WorkDir string // overlay workdir
MergedDir string // the merged mount point
}
// PrepareWritableLayer creates the directory structure for a writable overlay
// or tmpfs layer on top of the CAS-assembled rootfs.
func (c *CASStore) PrepareWritableLayer(workloadName, lowerDir, mode string) (*WritableLayer, error) {
layerBase := filepath.Join(c.layersDir, workloadName)
wl := &WritableLayer{
WorkloadName: workloadName,
Mode: mode,
LowerDir: lowerDir,
UpperDir: filepath.Join(layerBase, "upper"),
WorkDir: filepath.Join(layerBase, "work"),
MergedDir: filepath.Join(layerBase, "merged"),
}
for _, dir := range []string{wl.UpperDir, wl.WorkDir, wl.MergedDir} {
if err := os.MkdirAll(dir, 0755); err != nil {
return nil, fmt.Errorf("cas writable layer: mkdir %s: %w", dir, err)
}
}
return wl, nil
}
// OverlayMountOptions returns the mount options string for an overlay mount.
// The caller is responsible for actually calling mount(2) or mount(8).
func (wl *WritableLayer) OverlayMountOptions() string {
return fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s",
wl.LowerDir, wl.UpperDir, wl.WorkDir)
}
// ── Snapshot ─────────────────────────────────────────────────────────────────
// SnapshotResult holds the outcome of capturing a writable layer to CAS.
type SnapshotResult struct {
ManifestPath string
NewBlobs int
Deduplicated int
TotalFiles int
Duration time.Duration
}
// SnapshotWritableLayer walks the upper (writable) layer of a workload,
// stores each changed/new file into CAS, and creates a new BlobManifest.
func (c *CASStore) SnapshotWritableLayer(wl *WritableLayer) (*SnapshotResult, error) {
start := time.Now()
result := &SnapshotResult{}
manifest := &BlobManifest{
Name: wl.WorkloadName + "-snapshot",
CreatedAt: time.Now().Format(time.RFC3339),
Objects: make(map[string]string),
}
err := filepath.Walk(wl.UpperDir, func(path string, info os.FileInfo, err error) error {
if err != nil || info.IsDir() {
return nil
}
relPath, _ := filepath.Rel(wl.UpperDir, path)
result.TotalFiles++
digest, deduped, err := c.PutFile(path)
if err != nil {
return fmt.Errorf("snapshot %s: %w", relPath, err)
}
manifest.Objects[relPath] = digest
if deduped {
result.Deduplicated++
} else {
result.NewBlobs++
}
return nil
})
if err != nil {
return nil, err
}
refPath, err := c.SaveManifest(manifest)
if err != nil {
return nil, fmt.Errorf("snapshot save manifest: %w", err)
}
result.ManifestPath = refPath
result.Duration = time.Since(start)
return result, nil
}
// CleanupWritableLayer removes the writable layer directories for a workload.
// The caller should unmount the overlay first.
func (c *CASStore) CleanupWritableLayer(workloadName string) error {
layerBase := filepath.Join(c.layersDir, workloadName)
return os.RemoveAll(layerBase)
}
// ── Build (directory → CAS) ─────────────────────────────────────────────────
// BuildResult holds the outcome of ingesting a directory tree into CAS.
type BuildResult struct {
ManifestPath string
Stored int
Deduplicated int
TotalFiles int
Duration time.Duration
}
// BuildFromDir walks a directory tree, stores each file in CAS, and creates
// a BlobManifest. This is the pkg-level equivalent of `volt cas build`.
func (c *CASStore) BuildFromDir(srcDir, name string) (*BuildResult, error) {
start := time.Now()
result := &BuildResult{}
if name == "" {
name = filepath.Base(srcDir)
}
manifest := &BlobManifest{
Name: name,
CreatedAt: time.Now().Format(time.RFC3339),
Objects: make(map[string]string),
}
err := filepath.Walk(srcDir, func(path string, info os.FileInfo, err error) error {
if err != nil || info.IsDir() {
return nil
}
relPath, _ := filepath.Rel(srcDir, path)
result.TotalFiles++
digest, deduped, err := c.PutFile(path)
if err != nil {
return fmt.Errorf("build %s: %w", relPath, err)
}
manifest.Objects[relPath] = digest
if deduped {
result.Deduplicated++
} else {
result.Stored++
}
return nil
})
if err != nil {
return nil, err
}
refPath, err := c.SaveManifest(manifest)
if err != nil {
return nil, fmt.Errorf("build save manifest: %w", err)
}
result.ManifestPath = refPath
result.Duration = time.Since(start)
return result, nil
}
// ── Helpers ──────────────────────────────────────────────────────────────────
// min returns the smaller of a and b.
func min(a, b int) int {
if a < b {
return a
}
return b
}