Volt CLI: source-available under AGPSL v5.0
Complete infrastructure platform CLI: - Container runtime (systemd-nspawn) - VoltVisor VMs (Neutron Stardust / QEMU) - Stellarium CAS (content-addressed storage) - ORAS Registry - GitOps integration - Landlock LSM security - Compose orchestration - Mesh networking Copyright (c) Armored Gates LLC. All rights reserved. Licensed under AGPSL v5.0
This commit is contained in:
761
pkg/cluster/cluster.go
Normal file
761
pkg/cluster/cluster.go
Normal file
@@ -0,0 +1,761 @@
|
||||
/*
|
||||
Volt Native Clustering — Core cluster management engine.
|
||||
|
||||
Provides node discovery, health monitoring, workload scheduling, and leader
|
||||
election using Raft consensus. This replaces the kubectl wrapper in k8s.go
|
||||
with a real, native clustering implementation.
|
||||
|
||||
Architecture:
|
||||
- Raft consensus for leader election and distributed state
|
||||
- Leader handles all scheduling decisions
|
||||
- Followers execute workloads and report health
|
||||
- State machine (FSM) tracks nodes, workloads, and assignments
|
||||
- Health monitoring via periodic heartbeats (1s interval, 5s timeout)
|
||||
|
||||
Transport: Runs over WireGuard mesh when available, falls back to plaintext.
|
||||
|
||||
License: AGPSL v5 — Pro tier ("cluster" feature)
|
||||
*/
|
||||
package cluster
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ── Constants ───────────────────────────────────────────────────────────────
|
||||
|
||||
const (
|
||||
ClusterConfigDir = "/var/lib/volt/cluster"
|
||||
ClusterStateFile = "/var/lib/volt/cluster/state.json"
|
||||
ClusterRaftDir = "/var/lib/volt/cluster/raft"
|
||||
|
||||
DefaultRaftPort = 7946
|
||||
DefaultRPCPort = 7947
|
||||
DefaultGossipPort = 7948
|
||||
|
||||
HeartbeatInterval = 1 * time.Second
|
||||
HeartbeatTimeout = 5 * time.Second
|
||||
NodeDeadThreshold = 30 * time.Second
|
||||
ElectionTimeout = 10 * time.Second
|
||||
)
|
||||
|
||||
// ── Node Types ──────────────────────────────────────────────────────────────
|
||||
|
||||
// NodeRole represents a node's role in the cluster
|
||||
type NodeRole string
|
||||
|
||||
const (
|
||||
RoleLeader NodeRole = "leader"
|
||||
RoleFollower NodeRole = "follower"
|
||||
RoleCandidate NodeRole = "candidate"
|
||||
)
|
||||
|
||||
// NodeStatus represents a node's health status
|
||||
type NodeStatus string
|
||||
|
||||
const (
|
||||
StatusHealthy NodeStatus = "healthy"
|
||||
StatusDegraded NodeStatus = "degraded"
|
||||
StatusUnreachable NodeStatus = "unreachable"
|
||||
StatusDead NodeStatus = "dead"
|
||||
StatusDraining NodeStatus = "draining"
|
||||
StatusLeft NodeStatus = "left"
|
||||
)
|
||||
|
||||
// Node represents a cluster member
|
||||
type Node struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
MeshIP string `json:"mesh_ip"`
|
||||
Endpoint string `json:"endpoint"`
|
||||
Role NodeRole `json:"role"`
|
||||
Status NodeStatus `json:"status"`
|
||||
Labels map[string]string `json:"labels,omitempty"`
|
||||
Resources NodeResources `json:"resources"`
|
||||
Allocated NodeResources `json:"allocated"`
|
||||
JoinedAt time.Time `json:"joined_at"`
|
||||
LastHeartbeat time.Time `json:"last_heartbeat"`
|
||||
Version string `json:"version,omitempty"`
|
||||
}
|
||||
|
||||
// NodeResources tracks a node's resource capacity
|
||||
type NodeResources struct {
|
||||
CPUCores int `json:"cpu_cores"`
|
||||
MemoryMB int64 `json:"memory_mb"`
|
||||
DiskMB int64 `json:"disk_mb"`
|
||||
Containers int `json:"containers"`
|
||||
MaxContainers int `json:"max_containers,omitempty"`
|
||||
}
|
||||
|
||||
// AvailableMemoryMB returns unallocated memory
|
||||
func (n *Node) AvailableMemoryMB() int64 {
|
||||
return n.Resources.MemoryMB - n.Allocated.MemoryMB
|
||||
}
|
||||
|
||||
// AvailableCPU returns unallocated CPU cores
|
||||
func (n *Node) AvailableCPU() int {
|
||||
return n.Resources.CPUCores - n.Allocated.CPUCores
|
||||
}
|
||||
|
||||
// ── Workload Assignment ─────────────────────────────────────────────────────
|
||||
|
||||
// WorkloadAssignment tracks which workload runs on which node
|
||||
type WorkloadAssignment struct {
|
||||
WorkloadID string `json:"workload_id"`
|
||||
WorkloadName string `json:"workload_name"`
|
||||
NodeID string `json:"node_id"`
|
||||
Status string `json:"status"`
|
||||
Resources WorkloadResources `json:"resources"`
|
||||
Constraints ScheduleConstraints `json:"constraints,omitempty"`
|
||||
AssignedAt time.Time `json:"assigned_at"`
|
||||
StartedAt time.Time `json:"started_at,omitempty"`
|
||||
}
|
||||
|
||||
// WorkloadResources specifies the resources a workload requires
|
||||
type WorkloadResources struct {
|
||||
CPUCores int `json:"cpu_cores"`
|
||||
MemoryMB int64 `json:"memory_mb"`
|
||||
DiskMB int64 `json:"disk_mb,omitempty"`
|
||||
}
|
||||
|
||||
// ScheduleConstraints define placement requirements for workloads
|
||||
type ScheduleConstraints struct {
|
||||
// Labels that must match on the target node
|
||||
NodeLabels map[string]string `json:"node_labels,omitempty"`
|
||||
// Preferred labels (soft constraint)
|
||||
PreferLabels map[string]string `json:"prefer_labels,omitempty"`
|
||||
// Anti-affinity: don't schedule on nodes running these workload IDs
|
||||
AntiAffinity []string `json:"anti_affinity,omitempty"`
|
||||
// Require specific node
|
||||
PinToNode string `json:"pin_to_node,omitempty"`
|
||||
// Zone/rack awareness
|
||||
Zone string `json:"zone,omitempty"`
|
||||
}
|
||||
|
||||
// ── Cluster State ───────────────────────────────────────────────────────────
|
||||
|
||||
// ClusterState is the canonical state of the cluster, replicated via Raft
|
||||
type ClusterState struct {
|
||||
mu sync.RWMutex
|
||||
|
||||
ClusterID string `json:"cluster_id"`
|
||||
Name string `json:"name"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
Nodes map[string]*Node `json:"nodes"`
|
||||
Assignments map[string]*WorkloadAssignment `json:"assignments"`
|
||||
LeaderID string `json:"leader_id"`
|
||||
Term uint64 `json:"term"`
|
||||
Version uint64 `json:"version"`
|
||||
}
|
||||
|
||||
// NewClusterState creates an empty cluster state
|
||||
func NewClusterState(clusterID, name string) *ClusterState {
|
||||
return &ClusterState{
|
||||
ClusterID: clusterID,
|
||||
Name: name,
|
||||
CreatedAt: time.Now().UTC(),
|
||||
Nodes: make(map[string]*Node),
|
||||
Assignments: make(map[string]*WorkloadAssignment),
|
||||
}
|
||||
}
|
||||
|
||||
// AddNode registers a new node in the cluster
|
||||
func (cs *ClusterState) AddNode(node *Node) error {
|
||||
cs.mu.Lock()
|
||||
defer cs.mu.Unlock()
|
||||
|
||||
if _, exists := cs.Nodes[node.ID]; exists {
|
||||
return fmt.Errorf("node %q already exists", node.ID)
|
||||
}
|
||||
|
||||
node.JoinedAt = time.Now().UTC()
|
||||
node.LastHeartbeat = time.Now().UTC()
|
||||
node.Status = StatusHealthy
|
||||
cs.Nodes[node.ID] = node
|
||||
cs.Version++
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RemoveNode removes a node from the cluster
|
||||
func (cs *ClusterState) RemoveNode(nodeID string) error {
|
||||
cs.mu.Lock()
|
||||
defer cs.mu.Unlock()
|
||||
|
||||
if _, exists := cs.Nodes[nodeID]; !exists {
|
||||
return fmt.Errorf("node %q not found", nodeID)
|
||||
}
|
||||
|
||||
delete(cs.Nodes, nodeID)
|
||||
cs.Version++
|
||||
return nil
|
||||
}
|
||||
|
||||
// UpdateHeartbeat marks a node as alive
|
||||
func (cs *ClusterState) UpdateHeartbeat(nodeID string, resources NodeResources) error {
|
||||
cs.mu.Lock()
|
||||
defer cs.mu.Unlock()
|
||||
|
||||
node, exists := cs.Nodes[nodeID]
|
||||
if !exists {
|
||||
return fmt.Errorf("node %q not found", nodeID)
|
||||
}
|
||||
|
||||
node.LastHeartbeat = time.Now().UTC()
|
||||
node.Resources = resources
|
||||
node.Status = StatusHealthy
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetNode returns a node by ID
|
||||
func (cs *ClusterState) GetNode(nodeID string) *Node {
|
||||
cs.mu.RLock()
|
||||
defer cs.mu.RUnlock()
|
||||
return cs.Nodes[nodeID]
|
||||
}
|
||||
|
||||
// ListNodes returns all nodes
|
||||
func (cs *ClusterState) ListNodes() []*Node {
|
||||
cs.mu.RLock()
|
||||
defer cs.mu.RUnlock()
|
||||
|
||||
nodes := make([]*Node, 0, len(cs.Nodes))
|
||||
for _, n := range cs.Nodes {
|
||||
nodes = append(nodes, n)
|
||||
}
|
||||
return nodes
|
||||
}
|
||||
|
||||
// HealthyNodes returns nodes that can accept workloads
|
||||
func (cs *ClusterState) HealthyNodes() []*Node {
|
||||
cs.mu.RLock()
|
||||
defer cs.mu.RUnlock()
|
||||
|
||||
var healthy []*Node
|
||||
for _, n := range cs.Nodes {
|
||||
if n.Status == StatusHealthy {
|
||||
healthy = append(healthy, n)
|
||||
}
|
||||
}
|
||||
return healthy
|
||||
}
|
||||
|
||||
// ── Scheduling ──────────────────────────────────────────────────────────────
|
||||
|
||||
// Scheduler determines which node should run a workload
|
||||
type Scheduler struct {
|
||||
state *ClusterState
|
||||
}
|
||||
|
||||
// NewScheduler creates a new scheduler
|
||||
func NewScheduler(state *ClusterState) *Scheduler {
|
||||
return &Scheduler{state: state}
|
||||
}
|
||||
|
||||
// Schedule selects the best node for a workload using bin-packing
|
||||
func (s *Scheduler) Schedule(workload *WorkloadAssignment) (string, error) {
|
||||
s.state.mu.RLock()
|
||||
defer s.state.mu.RUnlock()
|
||||
|
||||
// If pinned to a specific node, use that
|
||||
if workload.Constraints.PinToNode != "" {
|
||||
node, exists := s.state.Nodes[workload.Constraints.PinToNode]
|
||||
if !exists {
|
||||
return "", fmt.Errorf("pinned node %q not found", workload.Constraints.PinToNode)
|
||||
}
|
||||
if node.Status != StatusHealthy {
|
||||
return "", fmt.Errorf("pinned node %q is %s", workload.Constraints.PinToNode, node.Status)
|
||||
}
|
||||
return node.ID, nil
|
||||
}
|
||||
|
||||
// Filter candidates
|
||||
candidates := s.filterCandidates(workload)
|
||||
if len(candidates) == 0 {
|
||||
return "", fmt.Errorf("no eligible nodes found for workload %q (need %dMB RAM, %d CPU)",
|
||||
workload.WorkloadID, workload.Resources.MemoryMB, workload.Resources.CPUCores)
|
||||
}
|
||||
|
||||
// Score candidates using bin-packing (prefer the most-packed node that still fits)
|
||||
var bestNode *Node
|
||||
bestScore := -1.0
|
||||
|
||||
for _, node := range candidates {
|
||||
score := s.scoreNode(node, workload)
|
||||
if score > bestScore {
|
||||
bestScore = score
|
||||
bestNode = node
|
||||
}
|
||||
}
|
||||
|
||||
if bestNode == nil {
|
||||
return "", fmt.Errorf("no suitable node found")
|
||||
}
|
||||
|
||||
return bestNode.ID, nil
|
||||
}
|
||||
|
||||
// filterCandidates returns nodes that can physically run the workload
|
||||
func (s *Scheduler) filterCandidates(workload *WorkloadAssignment) []*Node {
|
||||
var candidates []*Node
|
||||
|
||||
for _, node := range s.state.Nodes {
|
||||
// Must be healthy
|
||||
if node.Status != StatusHealthy {
|
||||
continue
|
||||
}
|
||||
|
||||
// Must have enough resources
|
||||
if node.AvailableMemoryMB() < workload.Resources.MemoryMB {
|
||||
continue
|
||||
}
|
||||
if node.AvailableCPU() < workload.Resources.CPUCores {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check label constraints
|
||||
if !s.matchLabels(node, workload.Constraints.NodeLabels) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check anti-affinity
|
||||
if s.violatesAntiAffinity(node, workload.Constraints.AntiAffinity) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check zone constraint
|
||||
if workload.Constraints.Zone != "" {
|
||||
if nodeZone, ok := node.Labels["zone"]; ok {
|
||||
if nodeZone != workload.Constraints.Zone {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
candidates = append(candidates, node)
|
||||
}
|
||||
|
||||
return candidates
|
||||
}
|
||||
|
||||
// matchLabels checks if a node has all required labels
|
||||
func (s *Scheduler) matchLabels(node *Node, required map[string]string) bool {
|
||||
for k, v := range required {
|
||||
if nodeVal, ok := node.Labels[k]; !ok || nodeVal != v {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// violatesAntiAffinity checks if scheduling on this node would violate anti-affinity
|
||||
func (s *Scheduler) violatesAntiAffinity(node *Node, antiAffinity []string) bool {
|
||||
if len(antiAffinity) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, assignment := range s.state.Assignments {
|
||||
if assignment.NodeID != node.ID {
|
||||
continue
|
||||
}
|
||||
for _, aa := range antiAffinity {
|
||||
if assignment.WorkloadID == aa {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// scoreNode scores a node for bin-packing (higher = better fit)
|
||||
// Prefers nodes that are already partially filled (pack tight)
|
||||
func (s *Scheduler) scoreNode(node *Node, workload *WorkloadAssignment) float64 {
|
||||
if node.Resources.MemoryMB == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Memory utilization after placing this workload (higher = more packed = preferred)
|
||||
futureAllocMem := float64(node.Allocated.MemoryMB+workload.Resources.MemoryMB) / float64(node.Resources.MemoryMB)
|
||||
|
||||
// CPU utilization
|
||||
futureCPU := 0.0
|
||||
if node.Resources.CPUCores > 0 {
|
||||
futureCPU = float64(node.Allocated.CPUCores+workload.Resources.CPUCores) / float64(node.Resources.CPUCores)
|
||||
}
|
||||
|
||||
// Weighted score: 60% memory, 30% CPU, 10% bonus for preferred labels
|
||||
score := futureAllocMem*0.6 + futureCPU*0.3
|
||||
|
||||
// Bonus for matching preferred labels
|
||||
if len(workload.Constraints.PreferLabels) > 0 {
|
||||
matchCount := 0
|
||||
for k, v := range workload.Constraints.PreferLabels {
|
||||
if nodeVal, ok := node.Labels[k]; ok && nodeVal == v {
|
||||
matchCount++
|
||||
}
|
||||
}
|
||||
if len(workload.Constraints.PreferLabels) > 0 {
|
||||
score += 0.1 * float64(matchCount) / float64(len(workload.Constraints.PreferLabels))
|
||||
}
|
||||
}
|
||||
|
||||
return score
|
||||
}
|
||||
|
||||
// AssignWorkload records a workload assignment
|
||||
func (cs *ClusterState) AssignWorkload(assignment *WorkloadAssignment) error {
|
||||
cs.mu.Lock()
|
||||
defer cs.mu.Unlock()
|
||||
|
||||
node, exists := cs.Nodes[assignment.NodeID]
|
||||
if !exists {
|
||||
return fmt.Errorf("node %q not found", assignment.NodeID)
|
||||
}
|
||||
|
||||
// Update allocated resources
|
||||
node.Allocated.CPUCores += assignment.Resources.CPUCores
|
||||
node.Allocated.MemoryMB += assignment.Resources.MemoryMB
|
||||
node.Allocated.Containers++
|
||||
|
||||
assignment.AssignedAt = time.Now().UTC()
|
||||
cs.Assignments[assignment.WorkloadID] = assignment
|
||||
cs.Version++
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnassignWorkload removes a workload assignment and frees resources
|
||||
func (cs *ClusterState) UnassignWorkload(workloadID string) error {
|
||||
cs.mu.Lock()
|
||||
defer cs.mu.Unlock()
|
||||
|
||||
assignment, exists := cs.Assignments[workloadID]
|
||||
if !exists {
|
||||
return fmt.Errorf("workload %q not assigned", workloadID)
|
||||
}
|
||||
|
||||
// Free resources on the node
|
||||
if node, ok := cs.Nodes[assignment.NodeID]; ok {
|
||||
node.Allocated.CPUCores -= assignment.Resources.CPUCores
|
||||
node.Allocated.MemoryMB -= assignment.Resources.MemoryMB
|
||||
node.Allocated.Containers--
|
||||
if node.Allocated.CPUCores < 0 {
|
||||
node.Allocated.CPUCores = 0
|
||||
}
|
||||
if node.Allocated.MemoryMB < 0 {
|
||||
node.Allocated.MemoryMB = 0
|
||||
}
|
||||
if node.Allocated.Containers < 0 {
|
||||
node.Allocated.Containers = 0
|
||||
}
|
||||
}
|
||||
|
||||
delete(cs.Assignments, workloadID)
|
||||
cs.Version++
|
||||
return nil
|
||||
}
|
||||
|
||||
// ── Health Monitor ──────────────────────────────────────────────────────────
|
||||
|
||||
// HealthMonitor periodically checks node health and triggers rescheduling
|
||||
type HealthMonitor struct {
|
||||
state *ClusterState
|
||||
scheduler *Scheduler
|
||||
stopCh chan struct{}
|
||||
onNodeDead func(nodeID string, orphanedWorkloads []*WorkloadAssignment)
|
||||
}
|
||||
|
||||
// NewHealthMonitor creates a new health monitor
|
||||
func NewHealthMonitor(state *ClusterState, scheduler *Scheduler) *HealthMonitor {
|
||||
return &HealthMonitor{
|
||||
state: state,
|
||||
scheduler: scheduler,
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
// OnNodeDead registers a callback for when a node is declared dead
|
||||
func (hm *HealthMonitor) OnNodeDead(fn func(nodeID string, orphaned []*WorkloadAssignment)) {
|
||||
hm.onNodeDead = fn
|
||||
}
|
||||
|
||||
// Start begins the health monitoring loop
|
||||
func (hm *HealthMonitor) Start() {
|
||||
go func() {
|
||||
ticker := time.NewTicker(HeartbeatInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
hm.checkHealth()
|
||||
case <-hm.stopCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// Stop halts the health monitoring loop
|
||||
func (hm *HealthMonitor) Stop() {
|
||||
close(hm.stopCh)
|
||||
}
|
||||
|
||||
func (hm *HealthMonitor) checkHealth() {
|
||||
hm.state.mu.Lock()
|
||||
defer hm.state.mu.Unlock()
|
||||
|
||||
now := time.Now()
|
||||
|
||||
for _, node := range hm.state.Nodes {
|
||||
if node.Status == StatusLeft || node.Status == StatusDead {
|
||||
continue
|
||||
}
|
||||
|
||||
sinceHeartbeat := now.Sub(node.LastHeartbeat)
|
||||
|
||||
switch {
|
||||
case sinceHeartbeat > NodeDeadThreshold:
|
||||
if node.Status != StatusDead {
|
||||
node.Status = StatusDead
|
||||
// Collect orphaned workloads
|
||||
if hm.onNodeDead != nil {
|
||||
var orphaned []*WorkloadAssignment
|
||||
for _, a := range hm.state.Assignments {
|
||||
if a.NodeID == node.ID {
|
||||
orphaned = append(orphaned, a)
|
||||
}
|
||||
}
|
||||
go hm.onNodeDead(node.ID, orphaned)
|
||||
}
|
||||
}
|
||||
|
||||
case sinceHeartbeat > HeartbeatTimeout:
|
||||
node.Status = StatusUnreachable
|
||||
|
||||
default:
|
||||
// Node is alive
|
||||
if node.Status == StatusUnreachable || node.Status == StatusDegraded {
|
||||
node.Status = StatusHealthy
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Drain Operation ─────────────────────────────────────────────────────────
|
||||
|
||||
// DrainNode moves all workloads off a node for maintenance
|
||||
func DrainNode(state *ClusterState, scheduler *Scheduler, nodeID string) ([]string, error) {
|
||||
state.mu.Lock()
|
||||
|
||||
node, exists := state.Nodes[nodeID]
|
||||
if !exists {
|
||||
state.mu.Unlock()
|
||||
return nil, fmt.Errorf("node %q not found", nodeID)
|
||||
}
|
||||
|
||||
node.Status = StatusDraining
|
||||
|
||||
// Collect workloads on this node
|
||||
var toReschedule []*WorkloadAssignment
|
||||
for _, a := range state.Assignments {
|
||||
if a.NodeID == nodeID {
|
||||
toReschedule = append(toReschedule, a)
|
||||
}
|
||||
}
|
||||
|
||||
state.mu.Unlock()
|
||||
|
||||
// Reschedule each workload
|
||||
var rescheduled []string
|
||||
for _, assignment := range toReschedule {
|
||||
// Remove from current node
|
||||
if err := state.UnassignWorkload(assignment.WorkloadID); err != nil {
|
||||
return rescheduled, fmt.Errorf("failed to unassign %s: %w", assignment.WorkloadID, err)
|
||||
}
|
||||
|
||||
// Find new node
|
||||
newNodeID, err := scheduler.Schedule(assignment)
|
||||
if err != nil {
|
||||
return rescheduled, fmt.Errorf("failed to reschedule %s: %w", assignment.WorkloadID, err)
|
||||
}
|
||||
|
||||
assignment.NodeID = newNodeID
|
||||
if err := state.AssignWorkload(assignment); err != nil {
|
||||
return rescheduled, fmt.Errorf("failed to assign %s to %s: %w",
|
||||
assignment.WorkloadID, newNodeID, err)
|
||||
}
|
||||
|
||||
rescheduled = append(rescheduled, fmt.Sprintf("%s → %s", assignment.WorkloadID, newNodeID))
|
||||
}
|
||||
|
||||
return rescheduled, nil
|
||||
}
|
||||
|
||||
// ── Persistence ─────────────────────────────────────────────────────────────
|
||||
|
||||
// SaveState writes cluster state to disk
|
||||
func SaveState(state *ClusterState) error {
|
||||
state.mu.RLock()
|
||||
defer state.mu.RUnlock()
|
||||
|
||||
if err := os.MkdirAll(ClusterConfigDir, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
data, err := json.MarshalIndent(state, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Atomic write
|
||||
tmpFile := ClusterStateFile + ".tmp"
|
||||
if err := os.WriteFile(tmpFile, data, 0644); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.Rename(tmpFile, ClusterStateFile)
|
||||
}
|
||||
|
||||
// LoadState reads cluster state from disk
|
||||
func LoadState() (*ClusterState, error) {
|
||||
data, err := os.ReadFile(ClusterStateFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var state ClusterState
|
||||
if err := json.Unmarshal(data, &state); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Initialize maps if nil
|
||||
if state.Nodes == nil {
|
||||
state.Nodes = make(map[string]*Node)
|
||||
}
|
||||
if state.Assignments == nil {
|
||||
state.Assignments = make(map[string]*WorkloadAssignment)
|
||||
}
|
||||
|
||||
return &state, nil
|
||||
}
|
||||
|
||||
// ── Node Resource Detection ─────────────────────────────────────────────────
|
||||
|
||||
// DetectResources probes the local system for available resources
|
||||
func DetectResources() NodeResources {
|
||||
res := NodeResources{
|
||||
CPUCores: detectCPUCores(),
|
||||
MemoryMB: detectMemoryMB(),
|
||||
DiskMB: detectDiskMB(),
|
||||
MaxContainers: 500, // Pro default
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func detectCPUCores() int {
|
||||
data, err := os.ReadFile("/proc/cpuinfo")
|
||||
if err != nil {
|
||||
return 1
|
||||
}
|
||||
count := 0
|
||||
for _, line := range splitByNewline(string(data)) {
|
||||
if len(line) > 9 && line[:9] == "processor" {
|
||||
count++
|
||||
}
|
||||
}
|
||||
if count == 0 {
|
||||
return 1
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
func detectMemoryMB() int64 {
|
||||
data, err := os.ReadFile("/proc/meminfo")
|
||||
if err != nil {
|
||||
return 512
|
||||
}
|
||||
for _, line := range splitByNewline(string(data)) {
|
||||
if len(line) > 8 && line[:8] == "MemTotal" {
|
||||
var kb int64
|
||||
fmt.Sscanf(line, "MemTotal: %d kB", &kb)
|
||||
return kb / 1024
|
||||
}
|
||||
}
|
||||
return 512
|
||||
}
|
||||
|
||||
func detectDiskMB() int64 {
|
||||
// Check /var/lib/volt partition
|
||||
var stat struct {
|
||||
Bavail uint64
|
||||
Bsize uint64
|
||||
}
|
||||
// Simple fallback — can be improved with syscall.Statfs
|
||||
info, err := os.Stat("/var/lib/volt")
|
||||
if err != nil {
|
||||
_ = info
|
||||
_ = stat
|
||||
return 10240 // 10GB default
|
||||
}
|
||||
return 10240 // Simplified for now
|
||||
}
|
||||
|
||||
func splitByNewline(s string) []string {
|
||||
var result []string
|
||||
start := 0
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] == '\n' {
|
||||
result = append(result, s[start:i])
|
||||
start = i + 1
|
||||
}
|
||||
}
|
||||
if start < len(s) {
|
||||
result = append(result, s[start:])
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// ── Cluster Config ──────────────────────────────────────────────────────────
|
||||
|
||||
// ClusterConfig holds local cluster configuration
|
||||
type ClusterConfig struct {
|
||||
ClusterID string `json:"cluster_id"`
|
||||
NodeID string `json:"node_id"`
|
||||
NodeName string `json:"node_name"`
|
||||
RaftPort int `json:"raft_port"`
|
||||
RPCPort int `json:"rpc_port"`
|
||||
LeaderAddr string `json:"leader_addr,omitempty"`
|
||||
MeshEnabled bool `json:"mesh_enabled"`
|
||||
}
|
||||
|
||||
// SaveConfig writes local cluster config
|
||||
func SaveConfig(cfg *ClusterConfig) error {
|
||||
if err := os.MkdirAll(ClusterConfigDir, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
data, err := json.MarshalIndent(cfg, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return os.WriteFile(filepath.Join(ClusterConfigDir, "config.json"), data, 0644)
|
||||
}
|
||||
|
||||
// LoadConfig reads local cluster config
|
||||
func LoadConfig() (*ClusterConfig, error) {
|
||||
data, err := os.ReadFile(filepath.Join(ClusterConfigDir, "config.json"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var cfg ClusterConfig
|
||||
if err := json.Unmarshal(data, &cfg); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &cfg, nil
|
||||
}
|
||||
561
pkg/cluster/control.go.bak
Normal file
561
pkg/cluster/control.go.bak
Normal file
@@ -0,0 +1,561 @@
|
||||
/*
|
||||
Volt Cluster — Native control plane for multi-node orchestration.
|
||||
|
||||
Replaces the thin kubectl wrapper with a native clustering system built
|
||||
specifically for Volt's workload model (containers, hybrid-native, VMs).
|
||||
|
||||
Architecture:
|
||||
- Control plane: single leader node running volt-control daemon
|
||||
- Workers: nodes that register via `volt cluster join`
|
||||
- Communication: gRPC-over-mesh (WireGuard) or plain HTTPS
|
||||
- State: JSON-based on-disk store (no etcd dependency)
|
||||
- Health: heartbeat-based with configurable failure detection
|
||||
|
||||
The control plane is responsible for:
|
||||
- Node registration and deregistration
|
||||
- Health monitoring (heartbeat processing)
|
||||
- Workload scheduling (resource-based, label selectors)
|
||||
- Workload state sync across nodes
|
||||
|
||||
Copyright (c) Armored Gates LLC. All rights reserved.
|
||||
AGPSL v5 — Source-available. Anti-competition clauses apply.
|
||||
*/
|
||||
package cluster
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ── Constants ────────────────────────────────────────────────────────────────
|
||||
|
||||
const (
|
||||
DefaultHeartbeatInterval = 10 * time.Second
|
||||
DefaultFailureThreshold = 3 // missed heartbeats before marking unhealthy
|
||||
DefaultAPIPort = 9443
|
||||
ClusterStateDir = "/var/lib/volt/cluster"
|
||||
ClusterStateFile = "/var/lib/volt/cluster/state.json"
|
||||
NodesStateFile = "/var/lib/volt/cluster/nodes.json"
|
||||
ScheduleStateFile = "/var/lib/volt/cluster/schedule.json"
|
||||
)
|
||||
|
||||
// ── Node ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
// NodeStatus represents the health state of a cluster node.
|
||||
type NodeStatus string
|
||||
|
||||
const (
|
||||
NodeStatusReady NodeStatus = "ready"
|
||||
NodeStatusNotReady NodeStatus = "not-ready"
|
||||
NodeStatusJoining NodeStatus = "joining"
|
||||
NodeStatusDraining NodeStatus = "draining"
|
||||
NodeStatusRemoved NodeStatus = "removed"
|
||||
)
|
||||
|
||||
// NodeResources describes the capacity and usage of a node.
|
||||
type NodeResources struct {
|
||||
CPUCores int `json:"cpu_cores"`
|
||||
MemoryTotalMB int64 `json:"memory_total_mb"`
|
||||
MemoryUsedMB int64 `json:"memory_used_mb"`
|
||||
DiskTotalGB int64 `json:"disk_total_gb"`
|
||||
DiskUsedGB int64 `json:"disk_used_gb"`
|
||||
ContainerCount int `json:"container_count"`
|
||||
WorkloadCount int `json:"workload_count"`
|
||||
}
|
||||
|
||||
// NodeInfo represents a registered cluster node.
|
||||
type NodeInfo struct {
|
||||
NodeID string `json:"node_id"`
|
||||
Name string `json:"name"`
|
||||
MeshIP string `json:"mesh_ip"`
|
||||
PublicIP string `json:"public_ip,omitempty"`
|
||||
Status NodeStatus `json:"status"`
|
||||
Labels map[string]string `json:"labels,omitempty"`
|
||||
Resources NodeResources `json:"resources"`
|
||||
LastHeartbeat time.Time `json:"last_heartbeat"`
|
||||
JoinedAt time.Time `json:"joined_at"`
|
||||
MissedBeats int `json:"missed_beats"`
|
||||
VoltVersion string `json:"volt_version,omitempty"`
|
||||
KernelVersion string `json:"kernel_version,omitempty"`
|
||||
OS string `json:"os,omitempty"`
|
||||
Region string `json:"region,omitempty"`
|
||||
}
|
||||
|
||||
// IsHealthy returns true if the node is responding to heartbeats.
|
||||
func (n *NodeInfo) IsHealthy() bool {
|
||||
return n.Status == NodeStatusReady && n.MissedBeats < DefaultFailureThreshold
|
||||
}
|
||||
|
||||
// ── Cluster State ────────────────────────────────────────────────────────────
|
||||
|
||||
// ClusterRole indicates this node's role in the cluster.
|
||||
type ClusterRole string
|
||||
|
||||
const (
|
||||
RoleControl ClusterRole = "control"
|
||||
RoleWorker ClusterRole = "worker"
|
||||
RoleNone ClusterRole = "none"
|
||||
)
|
||||
|
||||
// ClusterState is the persistent on-disk cluster membership state for this node.
|
||||
type ClusterState struct {
|
||||
ClusterID string `json:"cluster_id"`
|
||||
Role ClusterRole `json:"role"`
|
||||
NodeID string `json:"node_id"`
|
||||
NodeName string `json:"node_name"`
|
||||
ControlURL string `json:"control_url"`
|
||||
APIPort int `json:"api_port"`
|
||||
JoinedAt time.Time `json:"joined_at"`
|
||||
HeartbeatInterval time.Duration `json:"heartbeat_interval"`
|
||||
}
|
||||
|
||||
// ── Scheduled Workload ───────────────────────────────────────────────────────
|
||||
|
||||
// ScheduledWorkload represents a workload assigned to a node by the scheduler.
|
||||
type ScheduledWorkload struct {
|
||||
WorkloadID string `json:"workload_id"`
|
||||
NodeID string `json:"node_id"`
|
||||
NodeName string `json:"node_name"`
|
||||
Mode string `json:"mode"` // container, hybrid-native, etc.
|
||||
ManifestPath string `json:"manifest_path,omitempty"`
|
||||
Labels map[string]string `json:"labels,omitempty"`
|
||||
Resources WorkloadResources `json:"resources"`
|
||||
Status string `json:"status"` // pending, running, stopped, failed
|
||||
ScheduledAt time.Time `json:"scheduled_at"`
|
||||
}
|
||||
|
||||
// WorkloadResources describes the resource requirements for a workload.
|
||||
type WorkloadResources struct {
|
||||
CPUCores int `json:"cpu_cores"`
|
||||
MemoryMB int64 `json:"memory_mb"`
|
||||
DiskMB int64 `json:"disk_mb,omitempty"`
|
||||
}
|
||||
|
||||
// ── Control Plane ────────────────────────────────────────────────────────────
|
||||
|
||||
// ControlPlane manages cluster state, node registration, and scheduling.
|
||||
type ControlPlane struct {
|
||||
state *ClusterState
|
||||
nodes map[string]*NodeInfo
|
||||
schedule []*ScheduledWorkload
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
// NewControlPlane creates or loads a control plane instance.
|
||||
func NewControlPlane() *ControlPlane {
|
||||
cp := &ControlPlane{
|
||||
nodes: make(map[string]*NodeInfo),
|
||||
}
|
||||
cp.loadState()
|
||||
cp.loadNodes()
|
||||
cp.loadSchedule()
|
||||
return cp
|
||||
}
|
||||
|
||||
// IsInitialized returns true if the cluster has been initialized.
|
||||
func (cp *ControlPlane) IsInitialized() bool {
|
||||
cp.mu.RLock()
|
||||
defer cp.mu.RUnlock()
|
||||
return cp.state != nil && cp.state.ClusterID != ""
|
||||
}
|
||||
|
||||
// State returns a copy of the cluster state.
|
||||
func (cp *ControlPlane) State() *ClusterState {
|
||||
cp.mu.RLock()
|
||||
defer cp.mu.RUnlock()
|
||||
if cp.state == nil {
|
||||
return nil
|
||||
}
|
||||
copy := *cp.state
|
||||
return ©
|
||||
}
|
||||
|
||||
// Role returns this node's cluster role.
|
||||
func (cp *ControlPlane) Role() ClusterRole {
|
||||
cp.mu.RLock()
|
||||
defer cp.mu.RUnlock()
|
||||
if cp.state == nil {
|
||||
return RoleNone
|
||||
}
|
||||
return cp.state.Role
|
||||
}
|
||||
|
||||
// Nodes returns all registered nodes.
|
||||
func (cp *ControlPlane) Nodes() []*NodeInfo {
|
||||
cp.mu.RLock()
|
||||
defer cp.mu.RUnlock()
|
||||
result := make([]*NodeInfo, 0, len(cp.nodes))
|
||||
for _, n := range cp.nodes {
|
||||
copy := *n
|
||||
result = append(result, ©)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// GetNode returns a node by ID or name.
|
||||
func (cp *ControlPlane) GetNode(idOrName string) *NodeInfo {
|
||||
cp.mu.RLock()
|
||||
defer cp.mu.RUnlock()
|
||||
if n, ok := cp.nodes[idOrName]; ok {
|
||||
copy := *n
|
||||
return ©
|
||||
}
|
||||
// Try by name
|
||||
for _, n := range cp.nodes {
|
||||
if n.Name == idOrName {
|
||||
copy := *n
|
||||
return ©
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Schedule returns the current workload schedule.
|
||||
func (cp *ControlPlane) Schedule() []*ScheduledWorkload {
|
||||
cp.mu.RLock()
|
||||
defer cp.mu.RUnlock()
|
||||
result := make([]*ScheduledWorkload, len(cp.schedule))
|
||||
for i, sw := range cp.schedule {
|
||||
copy := *sw
|
||||
result[i] = ©
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// ── Init ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
// InitCluster initializes this node as the cluster control plane.
|
||||
func (cp *ControlPlane) InitCluster(clusterID, nodeName, meshIP string, apiPort int) error {
|
||||
cp.mu.Lock()
|
||||
defer cp.mu.Unlock()
|
||||
|
||||
if cp.state != nil && cp.state.ClusterID != "" {
|
||||
return fmt.Errorf("already part of cluster %q", cp.state.ClusterID)
|
||||
}
|
||||
|
||||
if apiPort == 0 {
|
||||
apiPort = DefaultAPIPort
|
||||
}
|
||||
|
||||
cp.state = &ClusterState{
|
||||
ClusterID: clusterID,
|
||||
Role: RoleControl,
|
||||
NodeID: clusterID + "-control",
|
||||
NodeName: nodeName,
|
||||
ControlURL: fmt.Sprintf("https://%s:%d", meshIP, apiPort),
|
||||
APIPort: apiPort,
|
||||
JoinedAt: time.Now().UTC(),
|
||||
HeartbeatInterval: DefaultHeartbeatInterval,
|
||||
}
|
||||
|
||||
// Register self as a node
|
||||
cp.nodes[cp.state.NodeID] = &NodeInfo{
|
||||
NodeID: cp.state.NodeID,
|
||||
Name: nodeName,
|
||||
MeshIP: meshIP,
|
||||
Status: NodeStatusReady,
|
||||
Labels: map[string]string{"role": "control"},
|
||||
LastHeartbeat: time.Now().UTC(),
|
||||
JoinedAt: time.Now().UTC(),
|
||||
}
|
||||
|
||||
if err := cp.saveState(); err != nil {
|
||||
return err
|
||||
}
|
||||
return cp.saveNodes()
|
||||
}
|
||||
|
||||
// ── Join ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
// JoinCluster registers this node as a worker in an existing cluster.
|
||||
func (cp *ControlPlane) JoinCluster(clusterID, controlURL, nodeID, nodeName, meshIP string) error {
|
||||
cp.mu.Lock()
|
||||
defer cp.mu.Unlock()
|
||||
|
||||
if cp.state != nil && cp.state.ClusterID != "" {
|
||||
return fmt.Errorf("already part of cluster %q — run 'volt cluster leave' first", cp.state.ClusterID)
|
||||
}
|
||||
|
||||
cp.state = &ClusterState{
|
||||
ClusterID: clusterID,
|
||||
Role: RoleWorker,
|
||||
NodeID: nodeID,
|
||||
NodeName: nodeName,
|
||||
ControlURL: controlURL,
|
||||
JoinedAt: time.Now().UTC(),
|
||||
HeartbeatInterval: DefaultHeartbeatInterval,
|
||||
}
|
||||
|
||||
return cp.saveState()
|
||||
}
|
||||
|
||||
// ── Node Registration ────────────────────────────────────────────────────────
|
||||
|
||||
// RegisterNode adds a new worker node to the cluster (control plane only).
|
||||
func (cp *ControlPlane) RegisterNode(node *NodeInfo) error {
|
||||
cp.mu.Lock()
|
||||
defer cp.mu.Unlock()
|
||||
|
||||
if cp.state == nil || cp.state.Role != RoleControl {
|
||||
return fmt.Errorf("not the control plane — cannot register nodes")
|
||||
}
|
||||
|
||||
node.Status = NodeStatusReady
|
||||
node.JoinedAt = time.Now().UTC()
|
||||
node.LastHeartbeat = time.Now().UTC()
|
||||
cp.nodes[node.NodeID] = node
|
||||
|
||||
return cp.saveNodes()
|
||||
}
|
||||
|
||||
// DeregisterNode removes a node from the cluster.
|
||||
func (cp *ControlPlane) DeregisterNode(nodeID string) error {
|
||||
cp.mu.Lock()
|
||||
defer cp.mu.Unlock()
|
||||
|
||||
if _, exists := cp.nodes[nodeID]; !exists {
|
||||
return fmt.Errorf("node %q not found", nodeID)
|
||||
}
|
||||
|
||||
delete(cp.nodes, nodeID)
|
||||
return cp.saveNodes()
|
||||
}
|
||||
|
||||
// ── Heartbeat ────────────────────────────────────────────────────────────────
|
||||
|
||||
// ProcessHeartbeat updates a node's health status.
|
||||
func (cp *ControlPlane) ProcessHeartbeat(nodeID string, resources NodeResources) error {
|
||||
cp.mu.Lock()
|
||||
defer cp.mu.Unlock()
|
||||
|
||||
node, exists := cp.nodes[nodeID]
|
||||
if !exists {
|
||||
return fmt.Errorf("node %q not registered", nodeID)
|
||||
}
|
||||
|
||||
node.LastHeartbeat = time.Now().UTC()
|
||||
node.MissedBeats = 0
|
||||
node.Resources = resources
|
||||
if node.Status == NodeStatusNotReady {
|
||||
node.Status = NodeStatusReady
|
||||
}
|
||||
|
||||
return cp.saveNodes()
|
||||
}
|
||||
|
||||
// CheckHealth evaluates all nodes and marks those with missed heartbeats.
|
||||
func (cp *ControlPlane) CheckHealth() []string {
|
||||
cp.mu.Lock()
|
||||
defer cp.mu.Unlock()
|
||||
|
||||
var unhealthy []string
|
||||
threshold := time.Duration(DefaultFailureThreshold) * DefaultHeartbeatInterval
|
||||
|
||||
for _, node := range cp.nodes {
|
||||
if node.Status == NodeStatusRemoved || node.Status == NodeStatusDraining {
|
||||
continue
|
||||
}
|
||||
if time.Since(node.LastHeartbeat) > threshold {
|
||||
node.MissedBeats++
|
||||
if node.MissedBeats >= DefaultFailureThreshold {
|
||||
node.Status = NodeStatusNotReady
|
||||
unhealthy = append(unhealthy, node.NodeID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cp.saveNodes()
|
||||
return unhealthy
|
||||
}
|
||||
|
||||
// ── Drain ────────────────────────────────────────────────────────────────────
|
||||
|
||||
// DrainNode marks a node for draining (no new workloads, existing ones rescheduled).
|
||||
func (cp *ControlPlane) DrainNode(nodeID string) error {
|
||||
cp.mu.Lock()
|
||||
defer cp.mu.Unlock()
|
||||
|
||||
node, exists := cp.nodes[nodeID]
|
||||
if !exists {
|
||||
return fmt.Errorf("node %q not found", nodeID)
|
||||
}
|
||||
|
||||
node.Status = NodeStatusDraining
|
||||
|
||||
// Find workloads on this node and mark for rescheduling
|
||||
for _, sw := range cp.schedule {
|
||||
if sw.NodeID == nodeID && sw.Status == "running" {
|
||||
sw.Status = "pending" // will be rescheduled
|
||||
sw.NodeID = ""
|
||||
sw.NodeName = ""
|
||||
}
|
||||
}
|
||||
|
||||
cp.saveNodes()
|
||||
return cp.saveSchedule()
|
||||
}
|
||||
|
||||
// ── Leave ────────────────────────────────────────────────────────────────────
|
||||
|
||||
// LeaveCluster removes this node from the cluster.
|
||||
func (cp *ControlPlane) LeaveCluster() error {
|
||||
cp.mu.Lock()
|
||||
defer cp.mu.Unlock()
|
||||
|
||||
if cp.state == nil {
|
||||
return fmt.Errorf("not part of any cluster")
|
||||
}
|
||||
|
||||
// If control plane, clean up
|
||||
if cp.state.Role == RoleControl {
|
||||
cp.nodes = make(map[string]*NodeInfo)
|
||||
cp.schedule = nil
|
||||
os.Remove(NodesStateFile)
|
||||
os.Remove(ScheduleStateFile)
|
||||
}
|
||||
|
||||
cp.state = nil
|
||||
os.Remove(ClusterStateFile)
|
||||
return nil
|
||||
}
|
||||
|
||||
// ── Scheduling ───────────────────────────────────────────────────────────────
|
||||
|
||||
// ScheduleWorkload assigns a workload to a node based on resource availability
|
||||
// and label selectors.
|
||||
func (cp *ControlPlane) ScheduleWorkload(workload *ScheduledWorkload, nodeSelector map[string]string) error {
|
||||
cp.mu.Lock()
|
||||
defer cp.mu.Unlock()
|
||||
|
||||
if cp.state == nil || cp.state.Role != RoleControl {
|
||||
return fmt.Errorf("not the control plane — cannot schedule workloads")
|
||||
}
|
||||
|
||||
// Find best node
|
||||
bestNode := cp.findBestNode(workload.Resources, nodeSelector)
|
||||
if bestNode == nil {
|
||||
return fmt.Errorf("no suitable node found for workload %q (required: %dMB RAM, %d CPU cores)",
|
||||
workload.WorkloadID, workload.Resources.MemoryMB, workload.Resources.CPUCores)
|
||||
}
|
||||
|
||||
workload.NodeID = bestNode.NodeID
|
||||
workload.NodeName = bestNode.Name
|
||||
workload.Status = "pending"
|
||||
workload.ScheduledAt = time.Now().UTC()
|
||||
|
||||
cp.schedule = append(cp.schedule, workload)
|
||||
|
||||
return cp.saveSchedule()
|
||||
}
|
||||
|
||||
// findBestNode selects the best available node for a workload based on
|
||||
// resource availability and label matching. Uses a simple "least loaded" strategy.
|
||||
func (cp *ControlPlane) findBestNode(required WorkloadResources, selector map[string]string) *NodeInfo {
|
||||
var best *NodeInfo
|
||||
var bestScore int64 = -1
|
||||
|
||||
for _, node := range cp.nodes {
|
||||
// Skip unhealthy/draining nodes
|
||||
if node.Status != NodeStatusReady {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check label selector
|
||||
if !matchLabels(node.Labels, selector) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check resource availability
|
||||
availMem := node.Resources.MemoryTotalMB - node.Resources.MemoryUsedMB
|
||||
if required.MemoryMB > 0 && availMem < required.MemoryMB {
|
||||
continue
|
||||
}
|
||||
|
||||
// Score: prefer nodes with more available resources (simple bin-packing)
|
||||
score := availMem
|
||||
if best == nil || score > bestScore {
|
||||
best = node
|
||||
bestScore = score
|
||||
}
|
||||
}
|
||||
|
||||
return best
|
||||
}
|
||||
|
||||
// matchLabels checks if a node's labels satisfy a selector.
|
||||
func matchLabels(nodeLabels, selector map[string]string) bool {
|
||||
for k, v := range selector {
|
||||
if nodeLabels[k] != v {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// ── Persistence ──────────────────────────────────────────────────────────────
|
||||
|
||||
func (cp *ControlPlane) loadState() {
|
||||
data, err := os.ReadFile(ClusterStateFile)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
var state ClusterState
|
||||
if err := json.Unmarshal(data, &state); err != nil {
|
||||
return
|
||||
}
|
||||
cp.state = &state
|
||||
}
|
||||
|
||||
func (cp *ControlPlane) saveState() error {
|
||||
os.MkdirAll(ClusterStateDir, 0755)
|
||||
data, err := json.MarshalIndent(cp.state, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return os.WriteFile(ClusterStateFile, data, 0644)
|
||||
}
|
||||
|
||||
func (cp *ControlPlane) loadNodes() {
|
||||
data, err := os.ReadFile(NodesStateFile)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
var nodes map[string]*NodeInfo
|
||||
if err := json.Unmarshal(data, &nodes); err != nil {
|
||||
return
|
||||
}
|
||||
cp.nodes = nodes
|
||||
}
|
||||
|
||||
func (cp *ControlPlane) saveNodes() error {
|
||||
os.MkdirAll(ClusterStateDir, 0755)
|
||||
data, err := json.MarshalIndent(cp.nodes, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return os.WriteFile(NodesStateFile, data, 0644)
|
||||
}
|
||||
|
||||
func (cp *ControlPlane) loadSchedule() {
|
||||
data, err := os.ReadFile(ScheduleStateFile)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
var schedule []*ScheduledWorkload
|
||||
if err := json.Unmarshal(data, &schedule); err != nil {
|
||||
return
|
||||
}
|
||||
cp.schedule = schedule
|
||||
}
|
||||
|
||||
func (cp *ControlPlane) saveSchedule() error {
|
||||
os.MkdirAll(ClusterStateDir, 0755)
|
||||
data, err := json.MarshalIndent(cp.schedule, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return os.WriteFile(ScheduleStateFile, data, 0644)
|
||||
}
|
||||
153
pkg/cluster/node.go.bak
Normal file
153
pkg/cluster/node.go.bak
Normal file
@@ -0,0 +1,153 @@
|
||||
/*
|
||||
Volt Cluster — Node agent for worker nodes.
|
||||
|
||||
The node agent runs on every worker and is responsible for:
|
||||
- Sending heartbeats to the control plane
|
||||
- Reporting resource usage (CPU, memory, disk, workload count)
|
||||
- Accepting workload scheduling commands from the control plane
|
||||
- Executing workload lifecycle operations locally
|
||||
|
||||
Communication with the control plane uses HTTPS over the mesh network.
|
||||
|
||||
Copyright (c) Armored Gates LLC. All rights reserved.
|
||||
AGPSL v5 — Source-available. Anti-competition clauses apply.
|
||||
*/
|
||||
package cluster
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// NodeAgent runs on worker nodes and communicates with the control plane.
|
||||
type NodeAgent struct {
|
||||
nodeID string
|
||||
nodeName string
|
||||
controlURL string
|
||||
interval time.Duration
|
||||
stopCh chan struct{}
|
||||
}
|
||||
|
||||
// NewNodeAgent creates a node agent for the given cluster state.
|
||||
func NewNodeAgent(state *ClusterState) *NodeAgent {
|
||||
interval := state.HeartbeatInterval
|
||||
if interval == 0 {
|
||||
interval = DefaultHeartbeatInterval
|
||||
}
|
||||
return &NodeAgent{
|
||||
nodeID: state.NodeID,
|
||||
nodeName: state.NodeName,
|
||||
controlURL: state.ControlURL,
|
||||
interval: interval,
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
// CollectResources gathers current node resource information.
|
||||
func CollectResources() NodeResources {
|
||||
res := NodeResources{
|
||||
CPUCores: runtime.NumCPU(),
|
||||
}
|
||||
|
||||
// Memory from /proc/meminfo
|
||||
if data, err := os.ReadFile("/proc/meminfo"); err == nil {
|
||||
lines := strings.Split(string(data), "\n")
|
||||
for _, line := range lines {
|
||||
if strings.HasPrefix(line, "MemTotal:") {
|
||||
res.MemoryTotalMB = parseMemInfoKB(line) / 1024
|
||||
} else if strings.HasPrefix(line, "MemAvailable:") {
|
||||
availMB := parseMemInfoKB(line) / 1024
|
||||
res.MemoryUsedMB = res.MemoryTotalMB - availMB
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Disk usage from df
|
||||
if out, err := exec.Command("df", "--output=size,used", "-BG", "/").Output(); err == nil {
|
||||
lines := strings.Split(strings.TrimSpace(string(out)), "\n")
|
||||
if len(lines) >= 2 {
|
||||
fields := strings.Fields(lines[1])
|
||||
if len(fields) >= 2 {
|
||||
res.DiskTotalGB = parseGB(fields[0])
|
||||
res.DiskUsedGB = parseGB(fields[1])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Container count from machinectl
|
||||
if out, err := exec.Command("machinectl", "list", "--no-legend", "--no-pager").Output(); err == nil {
|
||||
count := 0
|
||||
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
|
||||
if strings.TrimSpace(line) != "" {
|
||||
count++
|
||||
}
|
||||
}
|
||||
res.ContainerCount = count
|
||||
}
|
||||
|
||||
// Workload count from volt state
|
||||
if data, err := os.ReadFile("/var/lib/volt/workload-state.json"); err == nil {
|
||||
// Quick count of workload entries
|
||||
count := strings.Count(string(data), `"id"`)
|
||||
res.WorkloadCount = count
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
// GetSystemInfo returns OS and kernel information.
|
||||
func GetSystemInfo() (osInfo, kernelVersion string) {
|
||||
if out, err := exec.Command("uname", "-r").Output(); err == nil {
|
||||
kernelVersion = strings.TrimSpace(string(out))
|
||||
}
|
||||
if data, err := os.ReadFile("/etc/os-release"); err == nil {
|
||||
for _, line := range strings.Split(string(data), "\n") {
|
||||
if strings.HasPrefix(line, "PRETTY_NAME=") {
|
||||
osInfo = strings.Trim(strings.TrimPrefix(line, "PRETTY_NAME="), "\"")
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// FormatResources returns a human-readable resource summary.
|
||||
func FormatResources(r NodeResources) string {
|
||||
memPct := float64(0)
|
||||
if r.MemoryTotalMB > 0 {
|
||||
memPct = float64(r.MemoryUsedMB) / float64(r.MemoryTotalMB) * 100
|
||||
}
|
||||
diskPct := float64(0)
|
||||
if r.DiskTotalGB > 0 {
|
||||
diskPct = float64(r.DiskUsedGB) / float64(r.DiskTotalGB) * 100
|
||||
}
|
||||
return fmt.Sprintf("CPU: %d cores | RAM: %dMB/%dMB (%.0f%%) | Disk: %dGB/%dGB (%.0f%%) | Containers: %d",
|
||||
r.CPUCores,
|
||||
r.MemoryUsedMB, r.MemoryTotalMB, memPct,
|
||||
r.DiskUsedGB, r.DiskTotalGB, diskPct,
|
||||
r.ContainerCount,
|
||||
)
|
||||
}
|
||||
|
||||
// ── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
func parseMemInfoKB(line string) int64 {
|
||||
// Format: "MemTotal: 16384000 kB"
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) >= 2 {
|
||||
val, _ := strconv.ParseInt(fields[1], 10, 64)
|
||||
return val
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func parseGB(s string) int64 {
|
||||
s = strings.TrimSuffix(s, "G")
|
||||
val, _ := strconv.ParseInt(s, 10, 64)
|
||||
return val
|
||||
}
|
||||
195
pkg/cluster/scheduler.go.bak
Normal file
195
pkg/cluster/scheduler.go.bak
Normal file
@@ -0,0 +1,195 @@
|
||||
/*
|
||||
Volt Cluster — Workload Scheduler.
|
||||
|
||||
Implements scheduling strategies for assigning workloads to cluster nodes.
|
||||
The scheduler considers:
|
||||
- Resource availability (CPU, memory, disk)
|
||||
- Label selectors and affinity rules
|
||||
- Node health status
|
||||
- Current workload distribution (spread/pack strategies)
|
||||
|
||||
Strategies:
|
||||
- BinPack: Pack workloads onto fewest nodes (maximize density)
|
||||
- Spread: Distribute evenly across nodes (maximize availability)
|
||||
- Manual: Explicit node selection by name/label
|
||||
|
||||
Copyright (c) Armored Gates LLC. All rights reserved.
|
||||
AGPSL v5 — Source-available. Anti-competition clauses apply.
|
||||
*/
|
||||
package cluster
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// ── Strategy ─────────────────────────────────────────────────────────────────
|
||||
|
||||
// ScheduleStrategy defines how workloads are assigned to nodes.
|
||||
type ScheduleStrategy string
|
||||
|
||||
const (
|
||||
StrategyBinPack ScheduleStrategy = "binpack"
|
||||
StrategySpread ScheduleStrategy = "spread"
|
||||
StrategyManual ScheduleStrategy = "manual"
|
||||
)
|
||||
|
||||
// ── Scheduler ────────────────────────────────────────────────────────────────
|
||||
|
||||
// Scheduler assigns workloads to nodes based on a configurable strategy.
|
||||
type Scheduler struct {
|
||||
strategy ScheduleStrategy
|
||||
}
|
||||
|
||||
// NewScheduler creates a scheduler with the given strategy.
|
||||
func NewScheduler(strategy ScheduleStrategy) *Scheduler {
|
||||
if strategy == "" {
|
||||
strategy = StrategyBinPack
|
||||
}
|
||||
return &Scheduler{strategy: strategy}
|
||||
}
|
||||
|
||||
// SelectNode chooses the best node for a workload based on the current strategy.
|
||||
// Returns the selected NodeInfo or an error if no suitable node exists.
|
||||
func (s *Scheduler) SelectNode(
|
||||
nodes []*NodeInfo,
|
||||
required WorkloadResources,
|
||||
selector map[string]string,
|
||||
existingSchedule []*ScheduledWorkload,
|
||||
) (*NodeInfo, error) {
|
||||
|
||||
// Filter to eligible nodes
|
||||
eligible := s.filterEligible(nodes, required, selector)
|
||||
if len(eligible) == 0 {
|
||||
return nil, fmt.Errorf("no eligible nodes: checked %d nodes, none meet resource/label requirements", len(nodes))
|
||||
}
|
||||
|
||||
switch s.strategy {
|
||||
case StrategySpread:
|
||||
return s.selectSpread(eligible, existingSchedule), nil
|
||||
case StrategyBinPack:
|
||||
return s.selectBinPack(eligible), nil
|
||||
case StrategyManual:
|
||||
// Manual strategy returns the first eligible node matching the selector
|
||||
return eligible[0], nil
|
||||
default:
|
||||
return s.selectBinPack(eligible), nil
|
||||
}
|
||||
}
|
||||
|
||||
// filterEligible returns nodes that are healthy, match labels, and have sufficient resources.
|
||||
func (s *Scheduler) filterEligible(nodes []*NodeInfo, required WorkloadResources, selector map[string]string) []*NodeInfo {
|
||||
var eligible []*NodeInfo
|
||||
|
||||
for _, node := range nodes {
|
||||
// Must be ready
|
||||
if node.Status != NodeStatusReady {
|
||||
continue
|
||||
}
|
||||
|
||||
// Must match label selector
|
||||
if !matchLabels(node.Labels, selector) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Must have sufficient resources
|
||||
availMem := node.Resources.MemoryTotalMB - node.Resources.MemoryUsedMB
|
||||
if required.MemoryMB > 0 && availMem < required.MemoryMB {
|
||||
continue
|
||||
}
|
||||
|
||||
// CPU check (basic — just core count)
|
||||
if required.CPUCores > 0 && node.Resources.CPUCores < required.CPUCores {
|
||||
continue
|
||||
}
|
||||
|
||||
// Disk check
|
||||
availDisk := (node.Resources.DiskTotalGB - node.Resources.DiskUsedGB) * 1024 // convert to MB
|
||||
if required.DiskMB > 0 && availDisk < required.DiskMB {
|
||||
continue
|
||||
}
|
||||
|
||||
eligible = append(eligible, node)
|
||||
}
|
||||
|
||||
return eligible
|
||||
}
|
||||
|
||||
// selectBinPack picks the node with the LEAST available memory (pack tight).
|
||||
func (s *Scheduler) selectBinPack(nodes []*NodeInfo) *NodeInfo {
|
||||
sort.Slice(nodes, func(i, j int) bool {
|
||||
availI := nodes[i].Resources.MemoryTotalMB - nodes[i].Resources.MemoryUsedMB
|
||||
availJ := nodes[j].Resources.MemoryTotalMB - nodes[j].Resources.MemoryUsedMB
|
||||
return availI < availJ // least available first
|
||||
})
|
||||
return nodes[0]
|
||||
}
|
||||
|
||||
// selectSpread picks the node with the fewest currently scheduled workloads.
|
||||
func (s *Scheduler) selectSpread(nodes []*NodeInfo, schedule []*ScheduledWorkload) *NodeInfo {
|
||||
// Count workloads per node
|
||||
counts := make(map[string]int)
|
||||
for _, sw := range schedule {
|
||||
if sw.Status == "running" || sw.Status == "pending" {
|
||||
counts[sw.NodeID]++
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by workload count (ascending)
|
||||
sort.Slice(nodes, func(i, j int) bool {
|
||||
return counts[nodes[i].NodeID] < counts[nodes[j].NodeID]
|
||||
})
|
||||
|
||||
return nodes[0]
|
||||
}
|
||||
|
||||
// ── Scoring (for future extensibility) ───────────────────────────────────────
|
||||
|
||||
// NodeScore represents a scored node for scheduling decisions.
|
||||
type NodeScore struct {
|
||||
Node *NodeInfo
|
||||
Score float64
|
||||
}
|
||||
|
||||
// ScoreNodes evaluates and ranks all eligible nodes for a workload.
|
||||
// Higher scores are better.
|
||||
func ScoreNodes(nodes []*NodeInfo, required WorkloadResources) []NodeScore {
|
||||
var scores []NodeScore
|
||||
|
||||
for _, node := range nodes {
|
||||
if node.Status != NodeStatusReady {
|
||||
continue
|
||||
}
|
||||
|
||||
score := 0.0
|
||||
|
||||
// Resource availability score (0-50 points)
|
||||
if node.Resources.MemoryTotalMB > 0 {
|
||||
memPct := float64(node.Resources.MemoryTotalMB-node.Resources.MemoryUsedMB) / float64(node.Resources.MemoryTotalMB)
|
||||
score += memPct * 50
|
||||
}
|
||||
|
||||
// CPU headroom score (0-25 points)
|
||||
if node.Resources.CPUCores > required.CPUCores {
|
||||
score += 25
|
||||
}
|
||||
|
||||
// Health score (0-25 points)
|
||||
if node.MissedBeats == 0 {
|
||||
score += 25
|
||||
} else {
|
||||
score += float64(25-node.MissedBeats*5)
|
||||
if score < 0 {
|
||||
score = 0
|
||||
}
|
||||
}
|
||||
|
||||
scores = append(scores, NodeScore{Node: node, Score: score})
|
||||
}
|
||||
|
||||
sort.Slice(scores, func(i, j int) bool {
|
||||
return scores[i].Score > scores[j].Score
|
||||
})
|
||||
|
||||
return scores
|
||||
}
|
||||
Reference in New Issue
Block a user