/* Backup Manager — CAS-based backup and restore for Volt workloads. Provides named, metadata-rich backups built on top of the CAS store. A backup is a CAS BlobManifest + a metadata sidecar (JSON) that records the workload name, mode, timestamp, tags, size, and blob count. Features: - Create backup from a workload's rootfs → CAS + CDN - List backups (all or per-workload) - Restore backup → reassemble rootfs via TinyVol - Delete backup (metadata only — blobs cleaned up by CAS GC) - Schedule automated backups via systemd timers Backups are incremental by nature — CAS dedup means only changed files produce new blobs. A 2 GB rootfs with 50 MB of changes stores 50 MB new data. Copyright (c) Armored Gates LLC. All rights reserved. */ package backup import ( "encoding/json" "fmt" "os" "path/filepath" "sort" "strings" "time" "github.com/armoredgate/volt/pkg/storage" ) // ── Constants ──────────────────────────────────────────────────────────────── const ( // DefaultBackupDir is where backup metadata is stored. DefaultBackupDir = "/var/lib/volt/backups" // BackupTypeManual is a user-initiated backup. BackupTypeManual = "manual" // BackupTypeScheduled is an automatically scheduled backup. BackupTypeScheduled = "scheduled" // BackupTypeSnapshot is a point-in-time snapshot. BackupTypeSnapshot = "snapshot" // BackupTypePreDeploy is created automatically before deployments. BackupTypePreDeploy = "pre-deploy" ) // ── Backup Metadata ────────────────────────────────────────────────────────── // BackupMeta holds the metadata sidecar for a backup. This is stored alongside // the CAS manifest reference and provides human-friendly identification. type BackupMeta struct { // ID is a unique identifier for this backup (timestamp-based). ID string `json:"id"` // WorkloadName is the workload that was backed up. WorkloadName string `json:"workload_name"` // WorkloadMode is the execution mode at backup time (container, hybrid-native, etc.). WorkloadMode string `json:"workload_mode,omitempty"` // Type indicates how the backup was created (manual, scheduled, snapshot, pre-deploy). Type string `json:"type"` // ManifestRef is the CAS manifest filename in the refs directory. ManifestRef string `json:"manifest_ref"` // Tags are user-defined labels for the backup. Tags []string `json:"tags,omitempty"` // CreatedAt is when the backup was created. CreatedAt time.Time `json:"created_at"` // BlobCount is the number of files/blobs in the backup. BlobCount int `json:"blob_count"` // TotalSize is the total logical size of all backed-up files. TotalSize int64 `json:"total_size"` // NewBlobs is the number of blobs that were newly stored (not deduplicated). NewBlobs int `json:"new_blobs"` // DedupBlobs is the number of blobs that were already in CAS. DedupBlobs int `json:"dedup_blobs"` // Duration is how long the backup took. Duration time.Duration `json:"duration"` // PushedToCDN indicates whether blobs were pushed to the CDN. PushedToCDN bool `json:"pushed_to_cdn"` // SourcePath is the rootfs path that was backed up. SourcePath string `json:"source_path,omitempty"` // Notes is an optional user-provided description. Notes string `json:"notes,omitempty"` } // ── Backup Manager ─────────────────────────────────────────────────────────── // Manager handles backup operations, coordinating between the CAS store, // backup metadata directory, and optional CDN client. type Manager struct { cas *storage.CASStore backupDir string } // NewManager creates a backup manager with the given CAS store. func NewManager(cas *storage.CASStore) *Manager { return &Manager{ cas: cas, backupDir: DefaultBackupDir, } } // NewManagerWithDir creates a backup manager with a custom backup directory. func NewManagerWithDir(cas *storage.CASStore, backupDir string) *Manager { if backupDir == "" { backupDir = DefaultBackupDir } return &Manager{ cas: cas, backupDir: backupDir, } } // Init creates the backup metadata directory. Idempotent. func (m *Manager) Init() error { return os.MkdirAll(m.backupDir, 0755) } // ── Create ─────────────────────────────────────────────────────────────────── // CreateOptions configures a backup creation. type CreateOptions struct { WorkloadName string WorkloadMode string SourcePath string // rootfs path to back up Type string // manual, scheduled, snapshot, pre-deploy Tags []string Notes string PushToCDN bool // whether to push blobs to CDN after backup } // Create performs a full backup of the given source path into CAS and records // metadata. Returns the backup metadata with timing and dedup statistics. func (m *Manager) Create(opts CreateOptions) (*BackupMeta, error) { if err := m.Init(); err != nil { return nil, fmt.Errorf("backup init: %w", err) } if opts.SourcePath == "" { return nil, fmt.Errorf("backup create: source path is required") } if opts.WorkloadName == "" { return nil, fmt.Errorf("backup create: workload name is required") } if opts.Type == "" { opts.Type = BackupTypeManual } // Verify source exists. info, err := os.Stat(opts.SourcePath) if err != nil { return nil, fmt.Errorf("backup create: source %s: %w", opts.SourcePath, err) } if !info.IsDir() { return nil, fmt.Errorf("backup create: source %s is not a directory", opts.SourcePath) } // Generate backup ID. backupID := generateBackupID(opts.WorkloadName, opts.Type) // Build CAS manifest from the source directory. manifestName := fmt.Sprintf("backup-%s-%s", opts.WorkloadName, backupID) result, err := m.cas.BuildFromDir(opts.SourcePath, manifestName) if err != nil { return nil, fmt.Errorf("backup create: CAS build: %w", err) } // Compute total size of all blobs in the backup. var totalSize int64 // Load the manifest we just created to iterate blobs. manifestBasename := filepath.Base(result.ManifestPath) bm, err := m.cas.LoadManifest(manifestBasename) if err == nil { for _, digest := range bm.Objects { blobPath := m.cas.GetPath(digest) if fi, err := os.Stat(blobPath); err == nil { totalSize += fi.Size() } } } // Create metadata. meta := &BackupMeta{ ID: backupID, WorkloadName: opts.WorkloadName, WorkloadMode: opts.WorkloadMode, Type: opts.Type, ManifestRef: manifestBasename, Tags: opts.Tags, CreatedAt: time.Now().UTC(), BlobCount: result.TotalFiles, TotalSize: totalSize, NewBlobs: result.Stored, DedupBlobs: result.Deduplicated, Duration: result.Duration, SourcePath: opts.SourcePath, Notes: opts.Notes, } // Save metadata. if err := m.saveMeta(meta); err != nil { return nil, fmt.Errorf("backup create: save metadata: %w", err) } return meta, nil } // ── List ───────────────────────────────────────────────────────────────────── // ListOptions configures backup listing. type ListOptions struct { WorkloadName string // filter by workload (empty = all) Type string // filter by type (empty = all) Limit int // max results (0 = unlimited) } // List returns backup metadata, optionally filtered by workload name and type. // Results are sorted by creation time, newest first. func (m *Manager) List(opts ListOptions) ([]*BackupMeta, error) { entries, err := os.ReadDir(m.backupDir) if err != nil { if os.IsNotExist(err) { return nil, nil } return nil, fmt.Errorf("backup list: read dir: %w", err) } var backups []*BackupMeta for _, entry := range entries { if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".json") { continue } meta, err := m.loadMeta(entry.Name()) if err != nil { continue // skip corrupt entries } // Apply filters. if opts.WorkloadName != "" && meta.WorkloadName != opts.WorkloadName { continue } if opts.Type != "" && meta.Type != opts.Type { continue } backups = append(backups, meta) } // Sort by creation time, newest first. sort.Slice(backups, func(i, j int) bool { return backups[i].CreatedAt.After(backups[j].CreatedAt) }) // Apply limit. if opts.Limit > 0 && len(backups) > opts.Limit { backups = backups[:opts.Limit] } return backups, nil } // ── Get ────────────────────────────────────────────────────────────────────── // Get retrieves a single backup by ID. func (m *Manager) Get(backupID string) (*BackupMeta, error) { filename := backupID + ".json" return m.loadMeta(filename) } // ── Restore ────────────────────────────────────────────────────────────────── // RestoreOptions configures a backup restoration. type RestoreOptions struct { BackupID string TargetDir string // where to restore (defaults to original source path) Force bool // overwrite existing target directory } // RestoreResult holds the outcome of a restore operation. type RestoreResult struct { TargetDir string FilesLinked int TotalSize int64 Duration time.Duration } // Restore reassembles a workload's rootfs from a backup's CAS manifest. // Uses TinyVol hard-link assembly for instant, space-efficient restoration. func (m *Manager) Restore(opts RestoreOptions) (*RestoreResult, error) { start := time.Now() // Load backup metadata. meta, err := m.Get(opts.BackupID) if err != nil { return nil, fmt.Errorf("backup restore: %w", err) } // Determine target directory. targetDir := opts.TargetDir if targetDir == "" { targetDir = meta.SourcePath } if targetDir == "" { return nil, fmt.Errorf("backup restore: no target directory specified and no source path in backup metadata") } // Check if target exists. if _, err := os.Stat(targetDir); err == nil { if !opts.Force { return nil, fmt.Errorf("backup restore: target %s already exists (use --force to overwrite)", targetDir) } // Remove existing target. if err := os.RemoveAll(targetDir); err != nil { return nil, fmt.Errorf("backup restore: remove existing target: %w", err) } } // Create target directory. if err := os.MkdirAll(targetDir, 0755); err != nil { return nil, fmt.Errorf("backup restore: create target dir: %w", err) } // Load the CAS manifest. bm, err := m.cas.LoadManifest(meta.ManifestRef) if err != nil { return nil, fmt.Errorf("backup restore: load manifest %s: %w", meta.ManifestRef, err) } // Assemble using TinyVol. tv := storage.NewTinyVol(m.cas, "") assemblyResult, err := tv.Assemble(bm, targetDir) if err != nil { return nil, fmt.Errorf("backup restore: TinyVol assembly: %w", err) } return &RestoreResult{ TargetDir: targetDir, FilesLinked: assemblyResult.FilesLinked, TotalSize: assemblyResult.TotalBytes, Duration: time.Since(start), }, nil } // ── Delete ─────────────────────────────────────────────────────────────────── // Delete removes a backup's metadata. The CAS blobs are not removed — they // will be cleaned up by `volt cas gc` if no other manifests reference them. func (m *Manager) Delete(backupID string) error { filename := backupID + ".json" metaPath := filepath.Join(m.backupDir, filename) if _, err := os.Stat(metaPath); os.IsNotExist(err) { return fmt.Errorf("backup delete: backup %s not found", backupID) } if err := os.Remove(metaPath); err != nil { return fmt.Errorf("backup delete: %w", err) } return nil } // ── Schedule ───────────────────────────────────────────────────────────────── // ScheduleConfig holds the configuration for automated backups. type ScheduleConfig struct { WorkloadName string `json:"workload_name"` Interval time.Duration `json:"interval"` MaxKeep int `json:"max_keep"` // max backups to retain (0 = unlimited) PushToCDN bool `json:"push_to_cdn"` Tags []string `json:"tags,omitempty"` } // Schedule creates a systemd timer unit for automated backups. // The timer calls `volt backup create` at the specified interval. func (m *Manager) Schedule(cfg ScheduleConfig) error { if cfg.WorkloadName == "" { return fmt.Errorf("backup schedule: workload name is required") } if cfg.Interval <= 0 { return fmt.Errorf("backup schedule: interval must be positive") } unitName := fmt.Sprintf("volt-backup-%s", cfg.WorkloadName) // Create the service unit (one-shot, runs the backup command). serviceContent := fmt.Sprintf(`[Unit] Description=Volt Automated Backup for %s After=network.target [Service] Type=oneshot ExecStart=/usr/local/bin/volt backup create %s --type scheduled `, cfg.WorkloadName, cfg.WorkloadName) if cfg.MaxKeep > 0 { serviceContent += fmt.Sprintf("ExecStartPost=/usr/local/bin/volt backup prune %s --keep %d\n", cfg.WorkloadName, cfg.MaxKeep) } // Create the timer unit. intervalStr := formatSystemdInterval(cfg.Interval) timerContent := fmt.Sprintf(`[Unit] Description=Volt Backup Timer for %s [Timer] OnActiveSec=0 OnUnitActiveSec=%s Persistent=true RandomizedDelaySec=300 [Install] WantedBy=timers.target `, cfg.WorkloadName, intervalStr) // Write units. unitDir := "/etc/systemd/system" servicePath := filepath.Join(unitDir, unitName+".service") timerPath := filepath.Join(unitDir, unitName+".timer") if err := os.WriteFile(servicePath, []byte(serviceContent), 0644); err != nil { return fmt.Errorf("backup schedule: write service unit: %w", err) } if err := os.WriteFile(timerPath, []byte(timerContent), 0644); err != nil { return fmt.Errorf("backup schedule: write timer unit: %w", err) } // Save schedule config for reference. configPath := filepath.Join(m.backupDir, fmt.Sprintf("schedule-%s.json", cfg.WorkloadName)) configData, _ := json.MarshalIndent(cfg, "", " ") if err := os.WriteFile(configPath, configData, 0644); err != nil { return fmt.Errorf("backup schedule: save config: %w", err) } return nil } // ── Metadata Persistence ───────────────────────────────────────────────────── func (m *Manager) saveMeta(meta *BackupMeta) error { data, err := json.MarshalIndent(meta, "", " ") if err != nil { return fmt.Errorf("marshal backup meta: %w", err) } filename := meta.ID + ".json" metaPath := filepath.Join(m.backupDir, filename) return os.WriteFile(metaPath, data, 0644) } func (m *Manager) loadMeta(filename string) (*BackupMeta, error) { metaPath := filepath.Join(m.backupDir, filename) data, err := os.ReadFile(metaPath) if err != nil { return nil, fmt.Errorf("load backup meta %s: %w", filename, err) } var meta BackupMeta if err := json.Unmarshal(data, &meta); err != nil { return nil, fmt.Errorf("unmarshal backup meta %s: %w", filename, err) } return &meta, nil } // ── Helpers ────────────────────────────────────────────────────────────────── // generateBackupID creates a unique, sortable backup ID. // Format: YYYYMMDD-HHMMSS- (e.g., "20260619-143052-manual") func generateBackupID(workloadName, backupType string) string { now := time.Now().UTC() return fmt.Sprintf("%s-%s-%s", workloadName, now.Format("20060102-150405"), backupType) } // formatSystemdInterval converts a time.Duration to a systemd OnUnitActiveSec value. func formatSystemdInterval(d time.Duration) string { hours := int(d.Hours()) if hours >= 24 && hours%24 == 0 { return fmt.Sprintf("%dd", hours/24) } if hours > 0 { return fmt.Sprintf("%dh", hours) } minutes := int(d.Minutes()) if minutes > 0 { return fmt.Sprintf("%dmin", minutes) } return fmt.Sprintf("%ds", int(d.Seconds())) } // FormatSize formats bytes into a human-readable string. func FormatSize(b int64) string { const unit = 1024 if b < unit { return fmt.Sprintf("%d B", b) } div, exp := int64(unit), 0 for n := b / unit; n >= unit; n /= unit { div *= unit exp++ } return fmt.Sprintf("%.1f %ciB", float64(b)/float64(div), "KMGTPE"[exp]) } // FormatDuration formats a duration for human display. func FormatDuration(d time.Duration) string { if d < time.Second { return fmt.Sprintf("%dms", d.Milliseconds()) } if d < time.Minute { return fmt.Sprintf("%.1fs", d.Seconds()) } return fmt.Sprintf("%dm%ds", int(d.Minutes()), int(d.Seconds())%60) }