Volt CLI: source-available under AGPSL v5.0
Complete infrastructure platform CLI: - Container runtime (systemd-nspawn) - VoltVisor VMs (Neutron Stardust / QEMU) - Stellarium CAS (content-addressed storage) - ORAS Registry - GitOps integration - Landlock LSM security - Compose orchestration - Mesh networking Copyright (c) Armored Gates LLC. All rights reserved. Licensed under AGPSL v5.0
This commit is contained in:
143
pkg/deploy/health.go
Normal file
143
pkg/deploy/health.go
Normal file
@@ -0,0 +1,143 @@
|
||||
/*
|
||||
Health — Health check implementations for deployment verification.
|
||||
|
||||
Supports HTTP, TCP, exec, and no-op health checks. Each check type
|
||||
retries according to the configured interval and retry count.
|
||||
|
||||
Copyright (c) Armored Gates LLC. All rights reserved.
|
||||
*/
|
||||
package deploy
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"os/exec"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ── Health Check Config ──────────────────────────────────────────────────────
|
||||
|
||||
// HealthCheck defines how to verify that an instance is healthy after deploy.
|
||||
type HealthCheck struct {
|
||||
Type string `json:"type" yaml:"type"` // "http", "tcp", "exec", "none"
|
||||
Path string `json:"path" yaml:"path"` // HTTP path (e.g., "/healthz")
|
||||
Port int `json:"port" yaml:"port"` // Port to check
|
||||
Command string `json:"command" yaml:"command"` // Exec command
|
||||
Interval time.Duration `json:"interval" yaml:"interval"` // Time between retries
|
||||
Retries int `json:"retries" yaml:"retries"` // Max retry count
|
||||
}
|
||||
|
||||
// ── Health Checker Interface ─────────────────────────────────────────────────
|
||||
|
||||
// HealthChecker verifies instance health during deployments.
|
||||
type HealthChecker interface {
|
||||
// WaitHealthy blocks until the instance is healthy or all retries are exhausted.
|
||||
WaitHealthy(instanceName string, check HealthCheck) error
|
||||
}
|
||||
|
||||
// ── Default Health Checker ───────────────────────────────────────────────────
|
||||
|
||||
// DefaultHealthChecker implements HealthChecker using real HTTP/TCP/exec calls.
|
||||
type DefaultHealthChecker struct {
|
||||
// InstanceIPResolver resolves an instance name to an IP address.
|
||||
// If nil, "127.0.0.1" is used.
|
||||
InstanceIPResolver func(name string) (string, error)
|
||||
}
|
||||
|
||||
// WaitHealthy performs health checks with retries.
|
||||
func (d *DefaultHealthChecker) WaitHealthy(instanceName string, check HealthCheck) error {
|
||||
switch check.Type {
|
||||
case "none", "":
|
||||
return nil
|
||||
case "http":
|
||||
return d.waitHTTP(instanceName, check)
|
||||
case "tcp":
|
||||
return d.waitTCP(instanceName, check)
|
||||
case "exec":
|
||||
return d.waitExec(instanceName, check)
|
||||
default:
|
||||
return fmt.Errorf("unknown health check type: %q", check.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *DefaultHealthChecker) resolveIP(instanceName string) string {
|
||||
if d.InstanceIPResolver != nil {
|
||||
ip, err := d.InstanceIPResolver(instanceName)
|
||||
if err == nil {
|
||||
return ip
|
||||
}
|
||||
}
|
||||
return "127.0.0.1"
|
||||
}
|
||||
|
||||
func (d *DefaultHealthChecker) waitHTTP(instanceName string, check HealthCheck) error {
|
||||
ip := d.resolveIP(instanceName)
|
||||
url := fmt.Sprintf("http://%s:%d%s", ip, check.Port, check.Path)
|
||||
|
||||
client := &http.Client{Timeout: check.Interval}
|
||||
|
||||
var lastErr error
|
||||
for i := 0; i < check.Retries; i++ {
|
||||
resp, err := client.Get(url)
|
||||
if err == nil {
|
||||
resp.Body.Close()
|
||||
if resp.StatusCode >= 200 && resp.StatusCode < 400 {
|
||||
return nil
|
||||
}
|
||||
lastErr = fmt.Errorf("HTTP %d from %s", resp.StatusCode, url)
|
||||
} else {
|
||||
lastErr = err
|
||||
}
|
||||
if i < check.Retries-1 {
|
||||
time.Sleep(check.Interval)
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("health check failed after %d retries: %w", check.Retries, lastErr)
|
||||
}
|
||||
|
||||
func (d *DefaultHealthChecker) waitTCP(instanceName string, check HealthCheck) error {
|
||||
ip := d.resolveIP(instanceName)
|
||||
addr := fmt.Sprintf("%s:%d", ip, check.Port)
|
||||
|
||||
var lastErr error
|
||||
for i := 0; i < check.Retries; i++ {
|
||||
conn, err := net.DialTimeout("tcp", addr, check.Interval)
|
||||
if err == nil {
|
||||
conn.Close()
|
||||
return nil
|
||||
}
|
||||
lastErr = err
|
||||
if i < check.Retries-1 {
|
||||
time.Sleep(check.Interval)
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("TCP health check failed after %d retries: %w", check.Retries, lastErr)
|
||||
}
|
||||
|
||||
func (d *DefaultHealthChecker) waitExec(instanceName string, check HealthCheck) error {
|
||||
var lastErr error
|
||||
for i := 0; i < check.Retries; i++ {
|
||||
cmd := exec.Command("sh", "-c", check.Command)
|
||||
if err := cmd.Run(); err == nil {
|
||||
return nil
|
||||
} else {
|
||||
lastErr = err
|
||||
}
|
||||
if i < check.Retries-1 {
|
||||
time.Sleep(check.Interval)
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("exec health check failed after %d retries: %w", check.Retries, lastErr)
|
||||
}
|
||||
|
||||
// ── Noop Health Checker ──────────────────────────────────────────────────────
|
||||
|
||||
// NoopHealthChecker always returns healthy. Used for rollbacks and when
|
||||
// health checking is disabled.
|
||||
type NoopHealthChecker struct{}
|
||||
|
||||
// WaitHealthy always succeeds immediately.
|
||||
func (n *NoopHealthChecker) WaitHealthy(instanceName string, check HealthCheck) error {
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user