/* Deploy Tests — Verifies rolling, canary, rollback, health check, and history logic. Uses a mock executor and health checker so no real system calls are made. */ package deploy import ( "fmt" "os" "path/filepath" "strings" "sync" "testing" "time" ) // ── Mock Executor ──────────────────────────────────────────────────────────── // mockExecutor records all operations for verification. type mockExecutor struct { mu sync.Mutex instances map[string]*Instance // name → instance images map[string]string // name → current image // Recorded operation log. ops []string // Error injection. updateImageErr map[string]error // instance name → error to return startErr map[string]error createErr map[string]error trafficWeights map[string]int // canaryName → weight } func newMockExecutor(instances ...Instance) *mockExecutor { m := &mockExecutor{ instances: make(map[string]*Instance), images: make(map[string]string), updateImageErr: make(map[string]error), startErr: make(map[string]error), createErr: make(map[string]error), trafficWeights: make(map[string]int), } for _, inst := range instances { cpy := inst m.instances[inst.Name] = &cpy m.images[inst.Name] = inst.Image } return m } func (m *mockExecutor) record(op string) { m.mu.Lock() defer m.mu.Unlock() m.ops = append(m.ops, op) } func (m *mockExecutor) getOps() []string { m.mu.Lock() defer m.mu.Unlock() result := make([]string, len(m.ops)) copy(result, m.ops) return result } func (m *mockExecutor) ListInstances(target string) ([]Instance, error) { m.record(fmt.Sprintf("list:%s", target)) var result []Instance for _, inst := range m.instances { if strings.HasPrefix(inst.Name, target) || inst.Name == target { result = append(result, *inst) } } return result, nil } func (m *mockExecutor) CreateInstance(name, image string) error { m.record(fmt.Sprintf("create:%s:%s", name, image)) if err, ok := m.createErr[name]; ok { return err } m.mu.Lock() m.instances[name] = &Instance{Name: name, Image: image, Status: "stopped"} m.images[name] = image m.mu.Unlock() return nil } func (m *mockExecutor) StartInstance(name string) error { m.record(fmt.Sprintf("start:%s", name)) if err, ok := m.startErr[name]; ok { return err } m.mu.Lock() if inst, ok := m.instances[name]; ok { inst.Status = "running" } m.mu.Unlock() return nil } func (m *mockExecutor) StopInstance(name string) error { m.record(fmt.Sprintf("stop:%s", name)) m.mu.Lock() if inst, ok := m.instances[name]; ok { inst.Status = "stopped" } m.mu.Unlock() return nil } func (m *mockExecutor) DeleteInstance(name string) error { m.record(fmt.Sprintf("delete:%s", name)) m.mu.Lock() delete(m.instances, name) delete(m.images, name) m.mu.Unlock() return nil } func (m *mockExecutor) GetInstanceImage(name string) (string, error) { m.mu.Lock() defer m.mu.Unlock() if img, ok := m.images[name]; ok { return img, nil } return "", fmt.Errorf("instance %s not found", name) } func (m *mockExecutor) UpdateInstanceImage(name, newImage string) error { m.record(fmt.Sprintf("update-image:%s:%s", name, newImage)) if err, ok := m.updateImageErr[name]; ok { return err } m.mu.Lock() m.images[name] = newImage if inst, ok := m.instances[name]; ok { inst.Image = newImage } m.mu.Unlock() return nil } func (m *mockExecutor) UpdateTrafficWeight(target, canaryName string, weight int) error { m.record(fmt.Sprintf("traffic:%s:%s:%d", target, canaryName, weight)) m.mu.Lock() m.trafficWeights[canaryName] = weight m.mu.Unlock() return nil } // ── Mock Health Checker ────────────────────────────────────────────────────── // mockHealthChecker returns configurable results per instance. type mockHealthChecker struct { mu sync.Mutex results map[string]error // instance name → error (nil = healthy) calls []string } func newMockHealthChecker() *mockHealthChecker { return &mockHealthChecker{ results: make(map[string]error), } } func (h *mockHealthChecker) WaitHealthy(instanceName string, check HealthCheck) error { h.mu.Lock() h.calls = append(h.calls, instanceName) err := h.results[instanceName] h.mu.Unlock() return err } func (h *mockHealthChecker) getCalls() []string { h.mu.Lock() defer h.mu.Unlock() result := make([]string, len(h.calls)) copy(result, h.calls) return result } // ── Progress Collector ─────────────────────────────────────────────────────── type progressCollector struct { mu sync.Mutex updates []DeployStatus } func newProgressCollector() *progressCollector { return &progressCollector{} } func (p *progressCollector) callback() ProgressFunc { return func(status DeployStatus) { p.mu.Lock() defer p.mu.Unlock() p.updates = append(p.updates, status) } } func (p *progressCollector) getUpdates() []DeployStatus { p.mu.Lock() defer p.mu.Unlock() result := make([]DeployStatus, len(p.updates)) copy(result, p.updates) return result } func (p *progressCollector) phases() []Phase { p.mu.Lock() defer p.mu.Unlock() var phases []Phase for _, u := range p.updates { phases = append(phases, u.Phase) } return phases } // ── Test: Rolling Deploy Order ─────────────────────────────────────────────── func TestRollingDeployOrder(t *testing.T) { exec := newMockExecutor( Instance{Name: "web-1", Image: "sha256:old1", Status: "running"}, Instance{Name: "web-2", Image: "sha256:old1", Status: "running"}, Instance{Name: "web-3", Image: "sha256:old1", Status: "running"}, ) hc := newMockHealthChecker() pc := newProgressCollector() tmpDir := t.TempDir() hist := NewHistoryStore(tmpDir) cfg := DeployConfig{ Strategy: StrategyRolling, Target: "web", NewImage: "sha256:new1", MaxSurge: 1, MaxUnavail: 0, HealthCheck: HealthCheck{Type: "none"}, Timeout: 1 * time.Minute, AutoRollback: true, } err := RollingDeploy(cfg, exec, hc, hist, pc.callback()) if err != nil { t.Fatalf("RollingDeploy returned error: %v", err) } // Verify all instances were updated. ops := exec.getOps() // Count update-image operations. updateCount := 0 for _, op := range ops { if strings.HasPrefix(op, "update-image:") { updateCount++ // Verify new image is correct. if !strings.HasSuffix(op, ":sha256:new1") { t.Errorf("expected new image sha256:new1, got op: %s", op) } } } if updateCount != 3 { t.Errorf("expected 3 update-image ops, got %d", updateCount) } // Verify instances are updated one at a time (each update is followed by start before next update). var updateOrder []string for _, op := range ops { if strings.HasPrefix(op, "update-image:web-") { name := strings.Split(op, ":")[1] updateOrder = append(updateOrder, name) } } if len(updateOrder) != 3 { t.Errorf("expected 3 instances updated in order, got %d", len(updateOrder)) } // Verify progress callback was called. phases := pc.phases() if len(phases) == 0 { t.Error("expected progress callbacks, got none") } // First should be preparing, last should be complete. if phases[0] != PhasePreparing { t.Errorf("expected first phase to be preparing, got %s", phases[0]) } lastPhase := phases[len(phases)-1] if lastPhase != PhaseComplete { t.Errorf("expected last phase to be complete, got %s", lastPhase) } // Verify all images are now the new version. for _, name := range []string{"web-1", "web-2", "web-3"} { img, err := exec.GetInstanceImage(name) if err != nil { t.Errorf("GetInstanceImage(%s) error: %v", name, err) continue } if img != "sha256:new1" { t.Errorf("instance %s image = %s, want sha256:new1", name, img) } } } // ── Test: Canary Weight ────────────────────────────────────────────────────── func TestCanaryWeight(t *testing.T) { exec := newMockExecutor( Instance{Name: "api-1", Image: "sha256:v1", Status: "running"}, Instance{Name: "api-2", Image: "sha256:v1", Status: "running"}, ) hc := newMockHealthChecker() pc := newProgressCollector() tmpDir := t.TempDir() hist := NewHistoryStore(tmpDir) cfg := DeployConfig{ Strategy: StrategyCanary, Target: "api", NewImage: "sha256:v2", CanaryWeight: 20, HealthCheck: HealthCheck{Type: "none"}, Timeout: 1 * time.Minute, AutoRollback: true, } err := CanaryDeploy(cfg, exec, hc, hist, pc.callback()) if err != nil { t.Fatalf("CanaryDeploy returned error: %v", err) } // Verify canary instance was created. ops := exec.getOps() var createOps []string for _, op := range ops { if strings.HasPrefix(op, "create:") { createOps = append(createOps, op) } } if len(createOps) != 1 { t.Fatalf("expected 1 create op for canary, got %d: %v", len(createOps), createOps) } // Verify the canary instance name and image. canaryName := canaryInstanceName("api") expectedCreate := fmt.Sprintf("create:%s:sha256:v2", canaryName) if createOps[0] != expectedCreate { t.Errorf("create op = %q, want %q", createOps[0], expectedCreate) } // Verify traffic was routed with the correct weight. var trafficOps []string for _, op := range ops { if strings.HasPrefix(op, "traffic:") { trafficOps = append(trafficOps, op) } } if len(trafficOps) != 1 { t.Fatalf("expected 1 traffic op, got %d: %v", len(trafficOps), trafficOps) } expectedTraffic := fmt.Sprintf("traffic:api:%s:20", canaryName) if trafficOps[0] != expectedTraffic { t.Errorf("traffic op = %q, want %q", trafficOps[0], expectedTraffic) } // Verify the canary weight was recorded. exec.mu.Lock() weight := exec.trafficWeights[canaryName] exec.mu.Unlock() if weight != 20 { t.Errorf("canary traffic weight = %d, want 20", weight) } // Verify original instances were not modified. for _, name := range []string{"api-1", "api-2"} { img, _ := exec.GetInstanceImage(name) if img != "sha256:v1" { t.Errorf("original instance %s image changed to %s, should still be sha256:v1", name, img) } } // Verify progress shows canary-specific messages. updates := pc.getUpdates() foundCanaryProgress := false for _, u := range updates { if strings.Contains(u.Progress, "canary") || strings.Contains(u.Progress, "traffic") { foundCanaryProgress = true break } } if !foundCanaryProgress { t.Error("expected canary-related progress messages") } } // ── Test: Rollback Restores Previous ───────────────────────────────────────── func TestRollbackRestoresPrevious(t *testing.T) { exec := newMockExecutor( Instance{Name: "app-1", Image: "sha256:v2", Status: "running"}, ) _ = newMockHealthChecker() pc := newProgressCollector() tmpDir := t.TempDir() hist := NewHistoryStore(tmpDir) // Seed history with a previous successful deployment. _ = hist.Append(HistoryEntry{ ID: "deploy-prev", Target: "app", Strategy: "rolling", OldRef: "sha256:v1", NewRef: "sha256:v2", Status: string(PhaseComplete), StartedAt: time.Now().Add(-1 * time.Hour), CompletedAt: time.Now().Add(-50 * time.Minute), InstancesUpdated: 1, }) err := Rollback("app", exec, hist, pc.callback()) if err != nil { t.Fatalf("Rollback returned error: %v", err) } // Verify the instance was updated back to v1. img, err := exec.GetInstanceImage("app-1") if err != nil { t.Fatalf("GetInstanceImage error: %v", err) } if img != "sha256:v1" { t.Errorf("after rollback, instance image = %s, want sha256:v1", img) } // Verify rollback was recorded in history. entries, err := hist.ListByTarget("app") if err != nil { t.Fatalf("ListByTarget error: %v", err) } // Should have the original entry + the rollback entry. if len(entries) < 2 { t.Errorf("expected at least 2 history entries, got %d", len(entries)) } } // ── Test: Health Check Fail Triggers Rollback ──────────────────────────────── func TestHealthCheckFailTriggersRollback(t *testing.T) { exec := newMockExecutor( Instance{Name: "svc-1", Image: "sha256:old", Status: "running"}, Instance{Name: "svc-2", Image: "sha256:old", Status: "running"}, ) hc := newMockHealthChecker() // Make svc-2 fail health check after being updated. // Since instances are iterated from the map, we set both to fail // but we only need to verify that when any fails, rollback happens. hc.results["svc-1"] = nil // svc-1 is healthy hc.results["svc-2"] = fmt.Errorf("connection refused") pc := newProgressCollector() tmpDir := t.TempDir() hist := NewHistoryStore(tmpDir) cfg := DeployConfig{ Strategy: StrategyRolling, Target: "svc", NewImage: "sha256:bad", MaxSurge: 1, MaxUnavail: 0, HealthCheck: HealthCheck{Type: "tcp", Port: 8080, Interval: 100 * time.Millisecond, Retries: 1}, Timeout: 30 * time.Second, AutoRollback: true, } err := RollingDeploy(cfg, exec, hc, hist, pc.callback()) // Deployment should fail. if err == nil { t.Fatal("expected RollingDeploy to fail due to health check, but got nil") } if !strings.Contains(err.Error(), "health check failed") { t.Errorf("error should mention health check failure, got: %v", err) } // Verify rollback phase appeared in progress. phases := pc.phases() foundRollback := false for _, p := range phases { if p == PhaseRollingBack { foundRollback = true break } } if !foundRollback { t.Error("expected rolling-back phase in progress updates") } // Verify rollback operations were attempted (update-image back to old). ops := exec.getOps() rollbackOps := 0 for _, op := range ops { if strings.Contains(op, "update-image:") && strings.Contains(op, ":sha256:old") { rollbackOps++ } } if rollbackOps == 0 { t.Error("expected rollback operations (update-image back to sha256:old), found none") } // Verify history records the failure. entries, _ := hist.ListByTarget("svc") if len(entries) == 0 { t.Fatal("expected history entry for failed deployment") } if entries[0].Status != string(PhaseFailed) { t.Errorf("history status = %s, want failed", entries[0].Status) } } // ── Test: Deploy History ───────────────────────────────────────────────────── func TestDeployHistory(t *testing.T) { tmpDir := t.TempDir() hist := NewHistoryStore(tmpDir) // Write several entries. entries := []HistoryEntry{ { ID: "deploy-001", Target: "web-app", Strategy: "rolling", OldRef: "sha256:abc123", NewRef: "sha256:def456", Status: "complete", StartedAt: time.Date(2026, 3, 20, 15, 0, 0, 0, time.UTC), CompletedAt: time.Date(2026, 3, 20, 15, 5, 0, 0, time.UTC), InstancesUpdated: 3, }, { ID: "deploy-002", Target: "web-app", Strategy: "canary", OldRef: "sha256:def456", NewRef: "sha256:ghi789", Status: "complete", StartedAt: time.Date(2026, 3, 21, 10, 0, 0, 0, time.UTC), CompletedAt: time.Date(2026, 3, 21, 10, 2, 0, 0, time.UTC), InstancesUpdated: 1, }, { ID: "deploy-003", Target: "api-svc", Strategy: "rolling", OldRef: "sha256:111", NewRef: "sha256:222", Status: "failed", StartedAt: time.Date(2026, 3, 22, 8, 0, 0, 0, time.UTC), CompletedAt: time.Date(2026, 3, 22, 8, 1, 0, 0, time.UTC), InstancesUpdated: 0, Message: "health check timeout", }, } for _, e := range entries { if err := hist.Append(e); err != nil { t.Fatalf("Append error: %v", err) } } // Verify target-specific listing. webEntries, err := hist.ListByTarget("web-app") if err != nil { t.Fatalf("ListByTarget error: %v", err) } if len(webEntries) != 2 { t.Errorf("expected 2 web-app entries, got %d", len(webEntries)) } // Most recent first. if len(webEntries) >= 2 && webEntries[0].ID != "deploy-002" { t.Errorf("expected most recent entry first, got %s", webEntries[0].ID) } apiEntries, err := hist.ListByTarget("api-svc") if err != nil { t.Fatalf("ListByTarget error: %v", err) } if len(apiEntries) != 1 { t.Errorf("expected 1 api-svc entry, got %d", len(apiEntries)) } if len(apiEntries) == 1 && apiEntries[0].Message != "health check timeout" { t.Errorf("expected message 'health check timeout', got %q", apiEntries[0].Message) } // Verify ListAll. all, err := hist.ListAll() if err != nil { t.Fatalf("ListAll error: %v", err) } if len(all) != 3 { t.Errorf("expected 3 total entries, got %d", len(all)) } // Verify files were created. files, _ := filepath.Glob(filepath.Join(tmpDir, "*.yaml")) if len(files) != 2 { // web-app.yaml and api-svc.yaml t.Errorf("expected 2 history files, got %d", len(files)) } } // ── Test: Config Validation ────────────────────────────────────────────────── func TestConfigValidation(t *testing.T) { tests := []struct { name string cfg DeployConfig wantErr string }{ { name: "empty target", cfg: DeployConfig{Strategy: StrategyRolling, NewImage: "sha256:abc"}, wantErr: "target is required", }, { name: "empty image", cfg: DeployConfig{Strategy: StrategyRolling, Target: "web"}, wantErr: "new image", }, { name: "invalid strategy", cfg: DeployConfig{Strategy: "blue-green", Target: "web", NewImage: "sha256:abc"}, wantErr: "unknown strategy", }, { name: "canary weight zero", cfg: DeployConfig{Strategy: StrategyCanary, Target: "web", NewImage: "sha256:abc", CanaryWeight: 0}, wantErr: "canary weight must be between 1 and 99", }, { name: "canary weight 100", cfg: DeployConfig{Strategy: StrategyCanary, Target: "web", NewImage: "sha256:abc", CanaryWeight: 100}, wantErr: "canary weight must be between 1 and 99", }, { name: "valid rolling", cfg: DeployConfig{Strategy: StrategyRolling, Target: "web", NewImage: "sha256:abc"}, }, { name: "valid canary", cfg: DeployConfig{Strategy: StrategyCanary, Target: "web", NewImage: "sha256:abc", CanaryWeight: 25}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { err := tt.cfg.Validate() if tt.wantErr != "" { if err == nil { t.Errorf("expected error containing %q, got nil", tt.wantErr) } else if !strings.Contains(err.Error(), tt.wantErr) { t.Errorf("error %q should contain %q", err.Error(), tt.wantErr) } } else { if err != nil { t.Errorf("unexpected error: %v", err) } } }) } } // ── Test: Canary Instance Name ─────────────────────────────────────────────── func TestCanaryInstanceName(t *testing.T) { tests := []struct { target string want string }{ {"web-app", "web-app-canary"}, {"api-1", "api-canary"}, {"simple", "simple-canary"}, {"my-service-", "my-service-canary"}, } for _, tt := range tests { got := canaryInstanceName(tt.target) if got != tt.want { t.Errorf("canaryInstanceName(%q) = %q, want %q", tt.target, got, tt.want) } } } // ── Test: No Instances Found ───────────────────────────────────────────────── func TestRollingDeployNoInstances(t *testing.T) { exec := newMockExecutor() // empty hc := newMockHealthChecker() cfg := DeployConfig{ Strategy: StrategyRolling, Target: "nonexistent", NewImage: "sha256:abc", Timeout: 10 * time.Second, } err := RollingDeploy(cfg, exec, hc, nil, nil) if err == nil { t.Fatal("expected error for no instances, got nil") } if !strings.Contains(err.Error(), "no instances found") { t.Errorf("error should mention no instances, got: %v", err) } } // ── Test: Active Deployments Tracking ──────────────────────────────────────── func TestActiveDeployments(t *testing.T) { // Clear any leftover state. activeDeploymentsMu.Lock() activeDeployments = make(map[string]*DeployStatus) activeDeploymentsMu.Unlock() // Initially empty. active := GetActiveDeployments() if len(active) != 0 { t.Errorf("expected 0 active deployments, got %d", len(active)) } // Run a deployment and check it appears during execution. exec := newMockExecutor( Instance{Name: "track-1", Image: "sha256:old", Status: "running"}, ) hc := newMockHealthChecker() var seenActive bool progressFn := func(status DeployStatus) { if status.Phase == PhaseDeploying || status.Phase == PhaseVerifying { ad := GetActiveDeployment("track") if ad != nil { seenActive = true } } } cfg := DeployConfig{ Strategy: StrategyRolling, Target: "track", NewImage: "sha256:new", HealthCheck: HealthCheck{Type: "none"}, Timeout: 10 * time.Second, } err := RollingDeploy(cfg, exec, hc, nil, progressFn) if err != nil { t.Fatalf("unexpected error: %v", err) } if !seenActive { t.Error("expected to see active deployment during execution") } // After completion, should be empty again. active = GetActiveDeployments() if len(active) != 0 { t.Errorf("expected 0 active deployments after completion, got %d", len(active)) } } // ── Test: History File Persistence ─────────────────────────────────────────── func TestHistoryFilePersistence(t *testing.T) { tmpDir := t.TempDir() hist := NewHistoryStore(tmpDir) entry := HistoryEntry{ ID: "persist-001", Target: "my-app", Strategy: "rolling", OldRef: "sha256:aaa", NewRef: "sha256:bbb", Status: "complete", StartedAt: time.Now().UTC(), CompletedAt: time.Now().UTC(), InstancesUpdated: 2, } if err := hist.Append(entry); err != nil { t.Fatalf("Append error: %v", err) } // Verify the file exists on disk. filePath := filepath.Join(tmpDir, "my-app.yaml") if _, err := os.Stat(filePath); err != nil { t.Fatalf("history file not found: %v", err) } // Create a new store instance (simulating restart) and verify data. hist2 := NewHistoryStore(tmpDir) entries, err := hist2.ListByTarget("my-app") if err != nil { t.Fatalf("ListByTarget error: %v", err) } if len(entries) != 1 { t.Fatalf("expected 1 entry, got %d", len(entries)) } if entries[0].ID != "persist-001" { t.Errorf("entry ID = %s, want persist-001", entries[0].ID) } if entries[0].InstancesUpdated != 2 { t.Errorf("instances_updated = %d, want 2", entries[0].InstancesUpdated) } } // ── Test: Noop Health Checker ──────────────────────────────────────────────── func TestNoopHealthChecker(t *testing.T) { noop := &NoopHealthChecker{} err := noop.WaitHealthy("anything", HealthCheck{Type: "http", Port: 9999}) if err != nil { t.Errorf("NoopHealthChecker should always return nil, got: %v", err) } } // ── Test: Rollback Without History ─────────────────────────────────────────── func TestRollbackWithoutHistory(t *testing.T) { exec := newMockExecutor( Instance{Name: "no-hist-1", Image: "sha256:v2", Status: "running"}, ) tmpDir := t.TempDir() hist := NewHistoryStore(tmpDir) err := Rollback("no-hist", exec, hist, nil) if err == nil { t.Fatal("expected error for rollback without history, got nil") } if !strings.Contains(err.Error(), "no previous version") { t.Errorf("error should mention no previous version, got: %v", err) } } // ── Test: Canary Cleanup on Health Failure ──────────────────────────────────── func TestCanaryCleanupOnHealthFailure(t *testing.T) { exec := newMockExecutor( Instance{Name: "svc-1", Image: "sha256:v1", Status: "running"}, ) hc := newMockHealthChecker() canaryName := canaryInstanceName("svc") hc.results[canaryName] = fmt.Errorf("unhealthy canary") pc := newProgressCollector() tmpDir := t.TempDir() hist := NewHistoryStore(tmpDir) cfg := DeployConfig{ Strategy: StrategyCanary, Target: "svc", NewImage: "sha256:v2", CanaryWeight: 10, HealthCheck: HealthCheck{Type: "tcp", Port: 8080, Interval: 100 * time.Millisecond, Retries: 1}, Timeout: 10 * time.Second, AutoRollback: true, } err := CanaryDeploy(cfg, exec, hc, hist, pc.callback()) if err == nil { t.Fatal("expected canary to fail, got nil") } // Verify canary was cleaned up (stop + delete). ops := exec.getOps() foundStop := false foundDelete := false for _, op := range ops { if op == fmt.Sprintf("stop:%s", canaryName) { foundStop = true } if op == fmt.Sprintf("delete:%s", canaryName) { foundDelete = true } } if !foundStop { t.Error("expected canary stop operation during cleanup") } if !foundDelete { t.Error("expected canary delete operation during cleanup") } // Verify original instance was not modified. img, _ := exec.GetInstanceImage("svc-1") if img != "sha256:v1" { t.Errorf("original instance image changed to %s during failed canary", img) } }