Complete infrastructure platform CLI: - Container runtime (systemd-nspawn) - VoltVisor VMs (Neutron Stardust / QEMU) - Stellarium CAS (content-addressed storage) - ORAS Registry - GitOps integration - Landlock LSM security - Compose orchestration - Mesh networking Copyright (c) Armored Gates LLC. All rights reserved. Licensed under AGPSL v5.0
734 lines
25 KiB
Go
734 lines
25 KiB
Go
/*
|
|
Deploy — Rolling and canary deployment strategies for Volt workloads.
|
|
|
|
Coordinates zero-downtime updates for containers and workloads by
|
|
orchestrating instance creation, health verification, traffic shifting,
|
|
and automatic rollback on failure.
|
|
|
|
Since Volt uses CAS (content-addressed storage) for rootfs assembly,
|
|
"updating" a workload means pointing it to a new CAS ref and having
|
|
TinyVol reassemble the directory tree from the new blob manifest.
|
|
|
|
Strategies:
|
|
rolling — Update instances one-by-one (respecting MaxSurge/MaxUnavail)
|
|
canary — Route a percentage of traffic to a new instance before full rollout
|
|
|
|
Copyright (c) Armored Gates LLC. All rights reserved.
|
|
*/
|
|
package deploy
|
|
|
|
import (
|
|
"fmt"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// ── Strategy ─────────────────────────────────────────────────────────────────
|
|
|
|
// Strategy defines the deployment approach.
|
|
type Strategy string
|
|
|
|
const (
|
|
// StrategyRolling updates instances one-by-one with health verification.
|
|
StrategyRolling Strategy = "rolling"
|
|
// StrategyCanary routes a percentage of traffic to a new instance first.
|
|
StrategyCanary Strategy = "canary"
|
|
)
|
|
|
|
// ── Configuration ────────────────────────────────────────────────────────────
|
|
|
|
// DeployConfig holds all parameters for a deployment operation.
|
|
type DeployConfig struct {
|
|
Strategy Strategy // Deployment strategy
|
|
Target string // Container/workload name or pattern
|
|
NewImage string // New CAS ref or image path to deploy
|
|
MaxSurge int // Max extra instances during rolling (default: 1)
|
|
MaxUnavail int // Max unavailable during rolling (default: 0)
|
|
CanaryWeight int // Canary traffic percentage (1-99)
|
|
HealthCheck HealthCheck // How to verify new instance is healthy
|
|
Timeout time.Duration // Max time for the entire deployment
|
|
AutoRollback bool // Rollback on failure
|
|
}
|
|
|
|
// Validate checks that the config is usable and fills in defaults.
|
|
func (c *DeployConfig) Validate() error {
|
|
if c.Target == "" {
|
|
return fmt.Errorf("deploy: target is required")
|
|
}
|
|
if c.NewImage == "" {
|
|
return fmt.Errorf("deploy: new image (CAS ref) is required")
|
|
}
|
|
|
|
switch c.Strategy {
|
|
case StrategyRolling:
|
|
if c.MaxSurge <= 0 {
|
|
c.MaxSurge = 1
|
|
}
|
|
if c.MaxUnavail < 0 {
|
|
c.MaxUnavail = 0
|
|
}
|
|
case StrategyCanary:
|
|
if c.CanaryWeight <= 0 || c.CanaryWeight >= 100 {
|
|
return fmt.Errorf("deploy: canary weight must be between 1 and 99, got %d", c.CanaryWeight)
|
|
}
|
|
default:
|
|
return fmt.Errorf("deploy: unknown strategy %q (use 'rolling' or 'canary')", c.Strategy)
|
|
}
|
|
|
|
if c.Timeout <= 0 {
|
|
c.Timeout = 10 * time.Minute
|
|
}
|
|
if c.HealthCheck.Type == "" {
|
|
c.HealthCheck.Type = "none"
|
|
}
|
|
if c.HealthCheck.Interval <= 0 {
|
|
c.HealthCheck.Interval = 5 * time.Second
|
|
}
|
|
if c.HealthCheck.Retries <= 0 {
|
|
c.HealthCheck.Retries = 3
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// ── Deploy Status ────────────────────────────────────────────────────────────
|
|
|
|
// Phase represents the current phase of a deployment.
|
|
type Phase string
|
|
|
|
const (
|
|
PhasePreparing Phase = "preparing"
|
|
PhaseDeploying Phase = "deploying"
|
|
PhaseVerifying Phase = "verifying"
|
|
PhaseComplete Phase = "complete"
|
|
PhaseRollingBack Phase = "rolling-back"
|
|
PhaseFailed Phase = "failed"
|
|
PhasePaused Phase = "paused"
|
|
)
|
|
|
|
// DeployStatus tracks the progress of an active deployment.
|
|
type DeployStatus struct {
|
|
ID string `json:"id" yaml:"id"`
|
|
Phase Phase `json:"phase" yaml:"phase"`
|
|
Progress string `json:"progress" yaml:"progress"` // e.g. "2/5 instances updated"
|
|
OldVersion string `json:"old_version" yaml:"old_version"` // previous CAS ref
|
|
NewVersion string `json:"new_version" yaml:"new_version"` // target CAS ref
|
|
Target string `json:"target" yaml:"target"`
|
|
Strategy Strategy `json:"strategy" yaml:"strategy"`
|
|
StartedAt time.Time `json:"started_at" yaml:"started_at"`
|
|
CompletedAt time.Time `json:"completed_at,omitempty" yaml:"completed_at,omitempty"`
|
|
Message string `json:"message,omitempty" yaml:"message,omitempty"`
|
|
}
|
|
|
|
// ── Instance abstraction ─────────────────────────────────────────────────────
|
|
|
|
// Instance represents a single running workload instance that can be deployed to.
|
|
type Instance struct {
|
|
Name string // Instance name (e.g., "web-app-1")
|
|
Image string // Current CAS ref or image
|
|
Status string // "running", "stopped", etc.
|
|
Healthy bool // Last known health state
|
|
}
|
|
|
|
// ── Executor interface ───────────────────────────────────────────────────────
|
|
|
|
// Executor abstracts the system operations needed for deployments.
|
|
// This allows testing without real systemd/nspawn/nftables calls.
|
|
type Executor interface {
|
|
// ListInstances returns all instances matching the target pattern.
|
|
ListInstances(target string) ([]Instance, error)
|
|
|
|
// CreateInstance creates a new instance with the given image.
|
|
CreateInstance(name, image string) error
|
|
|
|
// StartInstance starts a stopped instance.
|
|
StartInstance(name string) error
|
|
|
|
// StopInstance stops a running instance.
|
|
StopInstance(name string) error
|
|
|
|
// DeleteInstance removes an instance entirely.
|
|
DeleteInstance(name string) error
|
|
|
|
// GetInstanceImage returns the current image/CAS ref for an instance.
|
|
GetInstanceImage(name string) (string, error)
|
|
|
|
// UpdateInstanceImage updates an instance to use a new image (CAS ref).
|
|
// This reassembles the rootfs via TinyVol and restarts the instance.
|
|
UpdateInstanceImage(name, newImage string) error
|
|
|
|
// UpdateTrafficWeight adjusts traffic routing for canary deployments.
|
|
// weight is 0-100 representing percentage to the canary instance.
|
|
UpdateTrafficWeight(target string, canaryName string, weight int) error
|
|
}
|
|
|
|
// ── Active deployments tracking ──────────────────────────────────────────────
|
|
|
|
var (
|
|
activeDeployments = make(map[string]*DeployStatus)
|
|
activeDeploymentsMu sync.RWMutex
|
|
)
|
|
|
|
// GetActiveDeployments returns a snapshot of all active deployments.
|
|
func GetActiveDeployments() []DeployStatus {
|
|
activeDeploymentsMu.RLock()
|
|
defer activeDeploymentsMu.RUnlock()
|
|
|
|
result := make([]DeployStatus, 0, len(activeDeployments))
|
|
for _, ds := range activeDeployments {
|
|
result = append(result, *ds)
|
|
}
|
|
return result
|
|
}
|
|
|
|
// GetActiveDeployment returns the active deployment for a target, if any.
|
|
func GetActiveDeployment(target string) *DeployStatus {
|
|
activeDeploymentsMu.RLock()
|
|
defer activeDeploymentsMu.RUnlock()
|
|
|
|
if ds, ok := activeDeployments[target]; ok {
|
|
cp := *ds
|
|
return &cp
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func setActiveDeployment(ds *DeployStatus) {
|
|
activeDeploymentsMu.Lock()
|
|
defer activeDeploymentsMu.Unlock()
|
|
activeDeployments[ds.Target] = ds
|
|
}
|
|
|
|
func removeActiveDeployment(target string) {
|
|
activeDeploymentsMu.Lock()
|
|
defer activeDeploymentsMu.Unlock()
|
|
delete(activeDeployments, target)
|
|
}
|
|
|
|
// ── Progress callback ────────────────────────────────────────────────────────
|
|
|
|
// ProgressFunc is called with status updates during deployment.
|
|
type ProgressFunc func(status DeployStatus)
|
|
|
|
// ── Rolling Deploy ───────────────────────────────────────────────────────────
|
|
|
|
// RollingDeploy performs a rolling update of instances matching cfg.Target.
|
|
//
|
|
// Algorithm:
|
|
// 1. List all instances matching the target pattern
|
|
// 2. For each instance (respecting MaxSurge / MaxUnavail):
|
|
// a. Update instance image to new CAS ref (reassemble rootfs via TinyVol)
|
|
// b. Start/restart the instance
|
|
// c. Wait for health check to pass
|
|
// d. If health check fails and AutoRollback: revert to old image
|
|
// 3. Record deployment in history
|
|
func RollingDeploy(cfg DeployConfig, exec Executor, hc HealthChecker, hist *HistoryStore, progress ProgressFunc) error {
|
|
if err := cfg.Validate(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Generate deployment ID.
|
|
deployID := generateDeployID()
|
|
|
|
status := &DeployStatus{
|
|
ID: deployID,
|
|
Phase: PhasePreparing,
|
|
Target: cfg.Target,
|
|
Strategy: StrategyRolling,
|
|
NewVersion: cfg.NewImage,
|
|
StartedAt: time.Now().UTC(),
|
|
}
|
|
setActiveDeployment(status)
|
|
notifyProgress(progress, *status)
|
|
|
|
// 1. Discover instances.
|
|
instances, err := exec.ListInstances(cfg.Target)
|
|
if err != nil {
|
|
status.Phase = PhaseFailed
|
|
status.Message = fmt.Sprintf("failed to list instances: %v", err)
|
|
notifyProgress(progress, *status)
|
|
removeActiveDeployment(cfg.Target)
|
|
recordHistory(hist, status, 0)
|
|
return fmt.Errorf("deploy: %s", status.Message)
|
|
}
|
|
if len(instances) == 0 {
|
|
status.Phase = PhaseFailed
|
|
status.Message = "no instances found matching target"
|
|
notifyProgress(progress, *status)
|
|
removeActiveDeployment(cfg.Target)
|
|
recordHistory(hist, status, 0)
|
|
return fmt.Errorf("deploy: %s", status.Message)
|
|
}
|
|
|
|
// Record old version from first instance.
|
|
if len(instances) > 0 {
|
|
oldImg, _ := exec.GetInstanceImage(instances[0].Name)
|
|
status.OldVersion = oldImg
|
|
}
|
|
|
|
total := len(instances)
|
|
updated := 0
|
|
var rollbackTargets []string // instances that were updated (for rollback)
|
|
|
|
status.Phase = PhaseDeploying
|
|
status.Progress = fmt.Sprintf("0/%d instances updated", total)
|
|
notifyProgress(progress, *status)
|
|
|
|
// Timeout enforcement.
|
|
deadline := time.Now().Add(cfg.Timeout)
|
|
|
|
// 2. Rolling update loop.
|
|
for i, inst := range instances {
|
|
if time.Now().After(deadline) {
|
|
err := fmt.Errorf("deployment timed out after %s", cfg.Timeout)
|
|
if cfg.AutoRollback && len(rollbackTargets) > 0 {
|
|
status.Phase = PhaseRollingBack
|
|
status.Message = err.Error()
|
|
notifyProgress(progress, *status)
|
|
rollbackInstances(exec, rollbackTargets, status.OldVersion)
|
|
}
|
|
status.Phase = PhaseFailed
|
|
status.Message = err.Error()
|
|
status.CompletedAt = time.Now().UTC()
|
|
notifyProgress(progress, *status)
|
|
removeActiveDeployment(cfg.Target)
|
|
recordHistory(hist, status, updated)
|
|
return err
|
|
}
|
|
|
|
// Respect MaxSurge: we update in-place, so surge is about allowing
|
|
// brief overlap. With MaxUnavail=0 and MaxSurge=1, we update one at a time.
|
|
_ = cfg.MaxSurge // In single-node mode, surge is handled by updating in-place.
|
|
|
|
status.Progress = fmt.Sprintf("%d/%d instances updated (updating %s)", i, total, inst.Name)
|
|
notifyProgress(progress, *status)
|
|
|
|
// a. Update the instance image.
|
|
if err := exec.UpdateInstanceImage(inst.Name, cfg.NewImage); err != nil {
|
|
errMsg := fmt.Sprintf("failed to update instance %s: %v", inst.Name, err)
|
|
if cfg.AutoRollback {
|
|
status.Phase = PhaseRollingBack
|
|
status.Message = errMsg
|
|
notifyProgress(progress, *status)
|
|
rollbackInstances(exec, rollbackTargets, status.OldVersion)
|
|
status.Phase = PhaseFailed
|
|
} else {
|
|
status.Phase = PhaseFailed
|
|
}
|
|
status.Message = errMsg
|
|
status.CompletedAt = time.Now().UTC()
|
|
notifyProgress(progress, *status)
|
|
removeActiveDeployment(cfg.Target)
|
|
recordHistory(hist, status, updated)
|
|
return fmt.Errorf("deploy: %s", errMsg)
|
|
}
|
|
|
|
// b. Start the instance.
|
|
if err := exec.StartInstance(inst.Name); err != nil {
|
|
errMsg := fmt.Sprintf("failed to start instance %s: %v", inst.Name, err)
|
|
if cfg.AutoRollback {
|
|
status.Phase = PhaseRollingBack
|
|
status.Message = errMsg
|
|
notifyProgress(progress, *status)
|
|
// Rollback this instance too.
|
|
rollbackTargets = append(rollbackTargets, inst.Name)
|
|
rollbackInstances(exec, rollbackTargets, status.OldVersion)
|
|
status.Phase = PhaseFailed
|
|
} else {
|
|
status.Phase = PhaseFailed
|
|
}
|
|
status.Message = errMsg
|
|
status.CompletedAt = time.Now().UTC()
|
|
notifyProgress(progress, *status)
|
|
removeActiveDeployment(cfg.Target)
|
|
recordHistory(hist, status, updated)
|
|
return fmt.Errorf("deploy: %s", errMsg)
|
|
}
|
|
|
|
// c. Health check.
|
|
status.Phase = PhaseVerifying
|
|
notifyProgress(progress, *status)
|
|
|
|
if err := hc.WaitHealthy(inst.Name, cfg.HealthCheck); err != nil {
|
|
errMsg := fmt.Sprintf("health check failed for %s: %v", inst.Name, err)
|
|
if cfg.AutoRollback {
|
|
status.Phase = PhaseRollingBack
|
|
status.Message = errMsg
|
|
notifyProgress(progress, *status)
|
|
rollbackTargets = append(rollbackTargets, inst.Name)
|
|
rollbackInstances(exec, rollbackTargets, status.OldVersion)
|
|
status.Phase = PhaseFailed
|
|
} else {
|
|
status.Phase = PhaseFailed
|
|
}
|
|
status.Message = errMsg
|
|
status.CompletedAt = time.Now().UTC()
|
|
notifyProgress(progress, *status)
|
|
removeActiveDeployment(cfg.Target)
|
|
recordHistory(hist, status, updated)
|
|
return fmt.Errorf("deploy: %s", errMsg)
|
|
}
|
|
|
|
rollbackTargets = append(rollbackTargets, inst.Name)
|
|
updated++
|
|
status.Phase = PhaseDeploying
|
|
status.Progress = fmt.Sprintf("%d/%d instances updated", updated, total)
|
|
notifyProgress(progress, *status)
|
|
}
|
|
|
|
// 3. Complete.
|
|
status.Phase = PhaseComplete
|
|
status.Progress = fmt.Sprintf("%d/%d instances updated", updated, total)
|
|
status.CompletedAt = time.Now().UTC()
|
|
notifyProgress(progress, *status)
|
|
removeActiveDeployment(cfg.Target)
|
|
recordHistory(hist, status, updated)
|
|
|
|
return nil
|
|
}
|
|
|
|
// ── Canary Deploy ────────────────────────────────────────────────────────────
|
|
|
|
// CanaryDeploy creates a canary instance alongside existing instances and
|
|
// routes cfg.CanaryWeight percent of traffic to it.
|
|
//
|
|
// Algorithm:
|
|
// 1. List existing instances
|
|
// 2. Create a new canary instance with the new image
|
|
// 3. Start the canary and verify health
|
|
// 4. Update traffic routing to send CanaryWeight% to canary
|
|
// 5. If health fails and AutoRollback: remove canary, restore routing
|
|
func CanaryDeploy(cfg DeployConfig, exec Executor, hc HealthChecker, hist *HistoryStore, progress ProgressFunc) error {
|
|
if err := cfg.Validate(); err != nil {
|
|
return err
|
|
}
|
|
|
|
deployID := generateDeployID()
|
|
|
|
status := &DeployStatus{
|
|
ID: deployID,
|
|
Phase: PhasePreparing,
|
|
Target: cfg.Target,
|
|
Strategy: StrategyCanary,
|
|
NewVersion: cfg.NewImage,
|
|
StartedAt: time.Now().UTC(),
|
|
}
|
|
setActiveDeployment(status)
|
|
notifyProgress(progress, *status)
|
|
|
|
// 1. Discover existing instances.
|
|
instances, err := exec.ListInstances(cfg.Target)
|
|
if err != nil {
|
|
status.Phase = PhaseFailed
|
|
status.Message = fmt.Sprintf("failed to list instances: %v", err)
|
|
notifyProgress(progress, *status)
|
|
removeActiveDeployment(cfg.Target)
|
|
recordHistory(hist, status, 0)
|
|
return fmt.Errorf("deploy: %s", status.Message)
|
|
}
|
|
if len(instances) == 0 {
|
|
status.Phase = PhaseFailed
|
|
status.Message = "no instances found matching target"
|
|
notifyProgress(progress, *status)
|
|
removeActiveDeployment(cfg.Target)
|
|
recordHistory(hist, status, 0)
|
|
return fmt.Errorf("deploy: %s", status.Message)
|
|
}
|
|
|
|
// Record old version.
|
|
if oldImg, err := exec.GetInstanceImage(instances[0].Name); err == nil {
|
|
status.OldVersion = oldImg
|
|
}
|
|
|
|
// 2. Create canary instance.
|
|
canaryName := canaryInstanceName(cfg.Target)
|
|
|
|
status.Phase = PhaseDeploying
|
|
status.Progress = fmt.Sprintf("creating canary instance %s", canaryName)
|
|
notifyProgress(progress, *status)
|
|
|
|
if err := exec.CreateInstance(canaryName, cfg.NewImage); err != nil {
|
|
status.Phase = PhaseFailed
|
|
status.Message = fmt.Sprintf("failed to create canary: %v", err)
|
|
notifyProgress(progress, *status)
|
|
removeActiveDeployment(cfg.Target)
|
|
recordHistory(hist, status, 0)
|
|
return fmt.Errorf("deploy: %s", status.Message)
|
|
}
|
|
|
|
// 3. Start canary and verify health.
|
|
if err := exec.StartInstance(canaryName); err != nil {
|
|
cleanupCanary(exec, canaryName)
|
|
status.Phase = PhaseFailed
|
|
status.Message = fmt.Sprintf("failed to start canary: %v", err)
|
|
notifyProgress(progress, *status)
|
|
removeActiveDeployment(cfg.Target)
|
|
recordHistory(hist, status, 0)
|
|
return fmt.Errorf("deploy: %s", status.Message)
|
|
}
|
|
|
|
status.Phase = PhaseVerifying
|
|
status.Progress = "verifying canary health"
|
|
notifyProgress(progress, *status)
|
|
|
|
if err := hc.WaitHealthy(canaryName, cfg.HealthCheck); err != nil {
|
|
if cfg.AutoRollback {
|
|
status.Phase = PhaseRollingBack
|
|
status.Message = fmt.Sprintf("canary health check failed: %v", err)
|
|
notifyProgress(progress, *status)
|
|
cleanupCanary(exec, canaryName)
|
|
}
|
|
status.Phase = PhaseFailed
|
|
status.Message = fmt.Sprintf("canary health check failed: %v", err)
|
|
status.CompletedAt = time.Now().UTC()
|
|
notifyProgress(progress, *status)
|
|
removeActiveDeployment(cfg.Target)
|
|
recordHistory(hist, status, 0)
|
|
return fmt.Errorf("deploy: %s", status.Message)
|
|
}
|
|
|
|
// 4. Update traffic routing.
|
|
status.Progress = fmt.Sprintf("routing %d%% traffic to canary", cfg.CanaryWeight)
|
|
notifyProgress(progress, *status)
|
|
|
|
if err := exec.UpdateTrafficWeight(cfg.Target, canaryName, cfg.CanaryWeight); err != nil {
|
|
if cfg.AutoRollback {
|
|
cleanupCanary(exec, canaryName)
|
|
}
|
|
status.Phase = PhaseFailed
|
|
status.Message = fmt.Sprintf("failed to update traffic routing: %v", err)
|
|
status.CompletedAt = time.Now().UTC()
|
|
notifyProgress(progress, *status)
|
|
removeActiveDeployment(cfg.Target)
|
|
recordHistory(hist, status, 0)
|
|
return fmt.Errorf("deploy: %s", status.Message)
|
|
}
|
|
|
|
// 5. Canary is live.
|
|
status.Phase = PhaseComplete
|
|
status.Progress = fmt.Sprintf("canary live with %d%% traffic", cfg.CanaryWeight)
|
|
status.CompletedAt = time.Now().UTC()
|
|
notifyProgress(progress, *status)
|
|
removeActiveDeployment(cfg.Target)
|
|
recordHistory(hist, status, 1)
|
|
|
|
return nil
|
|
}
|
|
|
|
// ── Rollback ─────────────────────────────────────────────────────────────────
|
|
|
|
// Rollback reverts a target to its previous version using deployment history.
|
|
func Rollback(target string, exec Executor, hist *HistoryStore, progress ProgressFunc) error {
|
|
if hist == nil {
|
|
return fmt.Errorf("deploy rollback: no history store available")
|
|
}
|
|
|
|
entries, err := hist.ListByTarget(target)
|
|
if err != nil {
|
|
return fmt.Errorf("deploy rollback: failed to read history: %w", err)
|
|
}
|
|
|
|
// Find the last successful deployment that has a different version.
|
|
var previousRef string
|
|
for _, entry := range entries {
|
|
if entry.Status == string(PhaseComplete) && entry.OldRef != "" {
|
|
previousRef = entry.OldRef
|
|
break
|
|
}
|
|
}
|
|
if previousRef == "" {
|
|
return fmt.Errorf("deploy rollback: no previous version found in history for %q", target)
|
|
}
|
|
|
|
status := &DeployStatus{
|
|
ID: generateDeployID(),
|
|
Phase: PhaseRollingBack,
|
|
Target: target,
|
|
Strategy: StrategyRolling,
|
|
NewVersion: previousRef,
|
|
StartedAt: time.Now().UTC(),
|
|
Message: "rollback to previous version",
|
|
}
|
|
notifyProgress(progress, *status)
|
|
|
|
// Perform a rolling deploy with the previous ref.
|
|
rollbackCfg := DeployConfig{
|
|
Strategy: StrategyRolling,
|
|
Target: target,
|
|
NewImage: previousRef,
|
|
MaxSurge: 1,
|
|
MaxUnavail: 0,
|
|
HealthCheck: HealthCheck{Type: "none"},
|
|
Timeout: 5 * time.Minute,
|
|
AutoRollback: false, // Don't auto-rollback a rollback
|
|
}
|
|
|
|
return RollingDeploy(rollbackCfg, exec, &NoopHealthChecker{}, hist, progress)
|
|
}
|
|
|
|
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
|
|
// rollbackInstances reverts a list of instances to the old image.
|
|
func rollbackInstances(exec Executor, names []string, oldImage string) {
|
|
for _, name := range names {
|
|
_ = exec.UpdateInstanceImage(name, oldImage)
|
|
_ = exec.StartInstance(name)
|
|
}
|
|
}
|
|
|
|
// cleanupCanary stops and removes a canary instance.
|
|
func cleanupCanary(exec Executor, canaryName string) {
|
|
_ = exec.StopInstance(canaryName)
|
|
_ = exec.DeleteInstance(canaryName)
|
|
}
|
|
|
|
// canaryInstanceName generates a canary instance name from the target.
|
|
func canaryInstanceName(target string) string {
|
|
// Strip any trailing instance numbers and add -canary suffix.
|
|
base := strings.TrimRight(target, "0123456789-")
|
|
if base == "" {
|
|
base = target
|
|
}
|
|
return base + "-canary"
|
|
}
|
|
|
|
// generateDeployID creates a unique deployment ID.
|
|
func generateDeployID() string {
|
|
return fmt.Sprintf("deploy-%d", time.Now().UnixNano()/int64(time.Millisecond))
|
|
}
|
|
|
|
// notifyProgress safely calls the progress callback if non-nil.
|
|
func notifyProgress(fn ProgressFunc, status DeployStatus) {
|
|
if fn != nil {
|
|
fn(status)
|
|
}
|
|
}
|
|
|
|
// recordHistory saves a deployment to the history store if available.
|
|
func recordHistory(hist *HistoryStore, status *DeployStatus, instancesUpdated int) {
|
|
if hist == nil {
|
|
return
|
|
}
|
|
entry := HistoryEntry{
|
|
ID: status.ID,
|
|
Target: status.Target,
|
|
Strategy: string(status.Strategy),
|
|
OldRef: status.OldVersion,
|
|
NewRef: status.NewVersion,
|
|
Status: string(status.Phase),
|
|
StartedAt: status.StartedAt,
|
|
CompletedAt: status.CompletedAt,
|
|
InstancesUpdated: instancesUpdated,
|
|
Message: status.Message,
|
|
}
|
|
_ = hist.Append(entry)
|
|
}
|
|
|
|
// ── Default executor (real system calls) ─────────────────────────────────────
|
|
|
|
// DefaultCASDir is the default directory for CAS storage.
|
|
const DefaultCASDir = "/var/lib/volt/cas"
|
|
|
|
// SystemExecutor implements Executor using real system commands.
|
|
type SystemExecutor struct {
|
|
ContainerBaseDir string
|
|
CASBaseDir string
|
|
}
|
|
|
|
// NewSystemExecutor creates an executor for real system operations.
|
|
func NewSystemExecutor() *SystemExecutor {
|
|
return &SystemExecutor{
|
|
ContainerBaseDir: "/var/lib/volt/containers",
|
|
CASBaseDir: DefaultCASDir,
|
|
}
|
|
}
|
|
|
|
func (e *SystemExecutor) ListInstances(target string) ([]Instance, error) {
|
|
// Match instances by prefix or exact name.
|
|
// Scan /var/lib/volt/containers for directories matching the pattern.
|
|
var instances []Instance
|
|
|
|
entries, err := filepath.Glob(filepath.Join(e.ContainerBaseDir, target+"*"))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("list instances: %w", err)
|
|
}
|
|
|
|
for _, entry := range entries {
|
|
name := filepath.Base(entry)
|
|
instances = append(instances, Instance{
|
|
Name: name,
|
|
Status: "unknown",
|
|
})
|
|
}
|
|
|
|
// If no glob matches, try exact match.
|
|
if len(instances) == 0 {
|
|
exact := filepath.Join(e.ContainerBaseDir, target)
|
|
if info, err := fileInfo(exact); err == nil && info.IsDir() {
|
|
instances = append(instances, Instance{
|
|
Name: target,
|
|
Status: "unknown",
|
|
})
|
|
}
|
|
}
|
|
|
|
return instances, nil
|
|
}
|
|
|
|
func (e *SystemExecutor) CreateInstance(name, image string) error {
|
|
// Create container directory and write unit file.
|
|
// In a real implementation this would use the backend.Create flow.
|
|
return fmt.Errorf("SystemExecutor.CreateInstance not yet wired to backend")
|
|
}
|
|
|
|
func (e *SystemExecutor) StartInstance(name string) error {
|
|
return runSystemctl("start", voltContainerUnit(name))
|
|
}
|
|
|
|
func (e *SystemExecutor) StopInstance(name string) error {
|
|
return runSystemctl("stop", voltContainerUnit(name))
|
|
}
|
|
|
|
func (e *SystemExecutor) DeleteInstance(name string) error {
|
|
return fmt.Errorf("SystemExecutor.DeleteInstance not yet wired to backend")
|
|
}
|
|
|
|
func (e *SystemExecutor) GetInstanceImage(name string) (string, error) {
|
|
// Read the CAS ref from the instance's metadata.
|
|
// Stored in /var/lib/volt/containers/<name>/.volt-cas-ref
|
|
refPath := filepath.Join(e.ContainerBaseDir, name, ".volt-cas-ref")
|
|
data, err := readFile(refPath)
|
|
if err != nil {
|
|
return "", fmt.Errorf("no CAS ref found for instance %s", name)
|
|
}
|
|
return strings.TrimSpace(string(data)), nil
|
|
}
|
|
|
|
func (e *SystemExecutor) UpdateInstanceImage(name, newImage string) error {
|
|
// 1. Stop the instance.
|
|
_ = runSystemctl("stop", voltContainerUnit(name))
|
|
|
|
// 2. Write new CAS ref.
|
|
refPath := filepath.Join(e.ContainerBaseDir, name, ".volt-cas-ref")
|
|
if err := writeFile(refPath, []byte(newImage)); err != nil {
|
|
return fmt.Errorf("failed to write CAS ref: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (e *SystemExecutor) UpdateTrafficWeight(target, canaryName string, weight int) error {
|
|
// In a full implementation this would update nftables rules for load balancing.
|
|
// For now, record the weight in a metadata file.
|
|
weightPath := filepath.Join(e.ContainerBaseDir, ".traffic-weights")
|
|
data := fmt.Sprintf("%s:%s:%d\n", target, canaryName, weight)
|
|
return appendFile(weightPath, []byte(data))
|
|
}
|
|
|
|
// voltContainerUnit returns the systemd unit name for a container.
|
|
func voltContainerUnit(name string) string {
|
|
return fmt.Sprintf("volt-container@%s.service", name)
|
|
}
|