Complete infrastructure platform CLI: - Container runtime (systemd-nspawn) - VoltVisor VMs (Neutron Stardust / QEMU) - Stellarium CAS (content-addressed storage) - ORAS Registry - GitOps integration - Landlock LSM security - Compose orchestration - Mesh networking Copyright (c) Armored Gates LLC. All rights reserved. Licensed under AGPSL v5.0
762 lines
21 KiB
Go
762 lines
21 KiB
Go
/*
|
|
Volt Native Clustering — Core cluster management engine.
|
|
|
|
Provides node discovery, health monitoring, workload scheduling, and leader
|
|
election using Raft consensus. This replaces the kubectl wrapper in k8s.go
|
|
with a real, native clustering implementation.
|
|
|
|
Architecture:
|
|
- Raft consensus for leader election and distributed state
|
|
- Leader handles all scheduling decisions
|
|
- Followers execute workloads and report health
|
|
- State machine (FSM) tracks nodes, workloads, and assignments
|
|
- Health monitoring via periodic heartbeats (1s interval, 5s timeout)
|
|
|
|
Transport: Runs over WireGuard mesh when available, falls back to plaintext.
|
|
|
|
License: AGPSL v5 — Pro tier ("cluster" feature)
|
|
*/
|
|
package cluster
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// ── Constants ───────────────────────────────────────────────────────────────
|
|
|
|
const (
|
|
ClusterConfigDir = "/var/lib/volt/cluster"
|
|
ClusterStateFile = "/var/lib/volt/cluster/state.json"
|
|
ClusterRaftDir = "/var/lib/volt/cluster/raft"
|
|
|
|
DefaultRaftPort = 7946
|
|
DefaultRPCPort = 7947
|
|
DefaultGossipPort = 7948
|
|
|
|
HeartbeatInterval = 1 * time.Second
|
|
HeartbeatTimeout = 5 * time.Second
|
|
NodeDeadThreshold = 30 * time.Second
|
|
ElectionTimeout = 10 * time.Second
|
|
)
|
|
|
|
// ── Node Types ──────────────────────────────────────────────────────────────
|
|
|
|
// NodeRole represents a node's role in the cluster
|
|
type NodeRole string
|
|
|
|
const (
|
|
RoleLeader NodeRole = "leader"
|
|
RoleFollower NodeRole = "follower"
|
|
RoleCandidate NodeRole = "candidate"
|
|
)
|
|
|
|
// NodeStatus represents a node's health status
|
|
type NodeStatus string
|
|
|
|
const (
|
|
StatusHealthy NodeStatus = "healthy"
|
|
StatusDegraded NodeStatus = "degraded"
|
|
StatusUnreachable NodeStatus = "unreachable"
|
|
StatusDead NodeStatus = "dead"
|
|
StatusDraining NodeStatus = "draining"
|
|
StatusLeft NodeStatus = "left"
|
|
)
|
|
|
|
// Node represents a cluster member
|
|
type Node struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"`
|
|
MeshIP string `json:"mesh_ip"`
|
|
Endpoint string `json:"endpoint"`
|
|
Role NodeRole `json:"role"`
|
|
Status NodeStatus `json:"status"`
|
|
Labels map[string]string `json:"labels,omitempty"`
|
|
Resources NodeResources `json:"resources"`
|
|
Allocated NodeResources `json:"allocated"`
|
|
JoinedAt time.Time `json:"joined_at"`
|
|
LastHeartbeat time.Time `json:"last_heartbeat"`
|
|
Version string `json:"version,omitempty"`
|
|
}
|
|
|
|
// NodeResources tracks a node's resource capacity
|
|
type NodeResources struct {
|
|
CPUCores int `json:"cpu_cores"`
|
|
MemoryMB int64 `json:"memory_mb"`
|
|
DiskMB int64 `json:"disk_mb"`
|
|
Containers int `json:"containers"`
|
|
MaxContainers int `json:"max_containers,omitempty"`
|
|
}
|
|
|
|
// AvailableMemoryMB returns unallocated memory
|
|
func (n *Node) AvailableMemoryMB() int64 {
|
|
return n.Resources.MemoryMB - n.Allocated.MemoryMB
|
|
}
|
|
|
|
// AvailableCPU returns unallocated CPU cores
|
|
func (n *Node) AvailableCPU() int {
|
|
return n.Resources.CPUCores - n.Allocated.CPUCores
|
|
}
|
|
|
|
// ── Workload Assignment ─────────────────────────────────────────────────────
|
|
|
|
// WorkloadAssignment tracks which workload runs on which node
|
|
type WorkloadAssignment struct {
|
|
WorkloadID string `json:"workload_id"`
|
|
WorkloadName string `json:"workload_name"`
|
|
NodeID string `json:"node_id"`
|
|
Status string `json:"status"`
|
|
Resources WorkloadResources `json:"resources"`
|
|
Constraints ScheduleConstraints `json:"constraints,omitempty"`
|
|
AssignedAt time.Time `json:"assigned_at"`
|
|
StartedAt time.Time `json:"started_at,omitempty"`
|
|
}
|
|
|
|
// WorkloadResources specifies the resources a workload requires
|
|
type WorkloadResources struct {
|
|
CPUCores int `json:"cpu_cores"`
|
|
MemoryMB int64 `json:"memory_mb"`
|
|
DiskMB int64 `json:"disk_mb,omitempty"`
|
|
}
|
|
|
|
// ScheduleConstraints define placement requirements for workloads
|
|
type ScheduleConstraints struct {
|
|
// Labels that must match on the target node
|
|
NodeLabels map[string]string `json:"node_labels,omitempty"`
|
|
// Preferred labels (soft constraint)
|
|
PreferLabels map[string]string `json:"prefer_labels,omitempty"`
|
|
// Anti-affinity: don't schedule on nodes running these workload IDs
|
|
AntiAffinity []string `json:"anti_affinity,omitempty"`
|
|
// Require specific node
|
|
PinToNode string `json:"pin_to_node,omitempty"`
|
|
// Zone/rack awareness
|
|
Zone string `json:"zone,omitempty"`
|
|
}
|
|
|
|
// ── Cluster State ───────────────────────────────────────────────────────────
|
|
|
|
// ClusterState is the canonical state of the cluster, replicated via Raft
|
|
type ClusterState struct {
|
|
mu sync.RWMutex
|
|
|
|
ClusterID string `json:"cluster_id"`
|
|
Name string `json:"name"`
|
|
CreatedAt time.Time `json:"created_at"`
|
|
Nodes map[string]*Node `json:"nodes"`
|
|
Assignments map[string]*WorkloadAssignment `json:"assignments"`
|
|
LeaderID string `json:"leader_id"`
|
|
Term uint64 `json:"term"`
|
|
Version uint64 `json:"version"`
|
|
}
|
|
|
|
// NewClusterState creates an empty cluster state
|
|
func NewClusterState(clusterID, name string) *ClusterState {
|
|
return &ClusterState{
|
|
ClusterID: clusterID,
|
|
Name: name,
|
|
CreatedAt: time.Now().UTC(),
|
|
Nodes: make(map[string]*Node),
|
|
Assignments: make(map[string]*WorkloadAssignment),
|
|
}
|
|
}
|
|
|
|
// AddNode registers a new node in the cluster
|
|
func (cs *ClusterState) AddNode(node *Node) error {
|
|
cs.mu.Lock()
|
|
defer cs.mu.Unlock()
|
|
|
|
if _, exists := cs.Nodes[node.ID]; exists {
|
|
return fmt.Errorf("node %q already exists", node.ID)
|
|
}
|
|
|
|
node.JoinedAt = time.Now().UTC()
|
|
node.LastHeartbeat = time.Now().UTC()
|
|
node.Status = StatusHealthy
|
|
cs.Nodes[node.ID] = node
|
|
cs.Version++
|
|
|
|
return nil
|
|
}
|
|
|
|
// RemoveNode removes a node from the cluster
|
|
func (cs *ClusterState) RemoveNode(nodeID string) error {
|
|
cs.mu.Lock()
|
|
defer cs.mu.Unlock()
|
|
|
|
if _, exists := cs.Nodes[nodeID]; !exists {
|
|
return fmt.Errorf("node %q not found", nodeID)
|
|
}
|
|
|
|
delete(cs.Nodes, nodeID)
|
|
cs.Version++
|
|
return nil
|
|
}
|
|
|
|
// UpdateHeartbeat marks a node as alive
|
|
func (cs *ClusterState) UpdateHeartbeat(nodeID string, resources NodeResources) error {
|
|
cs.mu.Lock()
|
|
defer cs.mu.Unlock()
|
|
|
|
node, exists := cs.Nodes[nodeID]
|
|
if !exists {
|
|
return fmt.Errorf("node %q not found", nodeID)
|
|
}
|
|
|
|
node.LastHeartbeat = time.Now().UTC()
|
|
node.Resources = resources
|
|
node.Status = StatusHealthy
|
|
|
|
return nil
|
|
}
|
|
|
|
// GetNode returns a node by ID
|
|
func (cs *ClusterState) GetNode(nodeID string) *Node {
|
|
cs.mu.RLock()
|
|
defer cs.mu.RUnlock()
|
|
return cs.Nodes[nodeID]
|
|
}
|
|
|
|
// ListNodes returns all nodes
|
|
func (cs *ClusterState) ListNodes() []*Node {
|
|
cs.mu.RLock()
|
|
defer cs.mu.RUnlock()
|
|
|
|
nodes := make([]*Node, 0, len(cs.Nodes))
|
|
for _, n := range cs.Nodes {
|
|
nodes = append(nodes, n)
|
|
}
|
|
return nodes
|
|
}
|
|
|
|
// HealthyNodes returns nodes that can accept workloads
|
|
func (cs *ClusterState) HealthyNodes() []*Node {
|
|
cs.mu.RLock()
|
|
defer cs.mu.RUnlock()
|
|
|
|
var healthy []*Node
|
|
for _, n := range cs.Nodes {
|
|
if n.Status == StatusHealthy {
|
|
healthy = append(healthy, n)
|
|
}
|
|
}
|
|
return healthy
|
|
}
|
|
|
|
// ── Scheduling ──────────────────────────────────────────────────────────────
|
|
|
|
// Scheduler determines which node should run a workload
|
|
type Scheduler struct {
|
|
state *ClusterState
|
|
}
|
|
|
|
// NewScheduler creates a new scheduler
|
|
func NewScheduler(state *ClusterState) *Scheduler {
|
|
return &Scheduler{state: state}
|
|
}
|
|
|
|
// Schedule selects the best node for a workload using bin-packing
|
|
func (s *Scheduler) Schedule(workload *WorkloadAssignment) (string, error) {
|
|
s.state.mu.RLock()
|
|
defer s.state.mu.RUnlock()
|
|
|
|
// If pinned to a specific node, use that
|
|
if workload.Constraints.PinToNode != "" {
|
|
node, exists := s.state.Nodes[workload.Constraints.PinToNode]
|
|
if !exists {
|
|
return "", fmt.Errorf("pinned node %q not found", workload.Constraints.PinToNode)
|
|
}
|
|
if node.Status != StatusHealthy {
|
|
return "", fmt.Errorf("pinned node %q is %s", workload.Constraints.PinToNode, node.Status)
|
|
}
|
|
return node.ID, nil
|
|
}
|
|
|
|
// Filter candidates
|
|
candidates := s.filterCandidates(workload)
|
|
if len(candidates) == 0 {
|
|
return "", fmt.Errorf("no eligible nodes found for workload %q (need %dMB RAM, %d CPU)",
|
|
workload.WorkloadID, workload.Resources.MemoryMB, workload.Resources.CPUCores)
|
|
}
|
|
|
|
// Score candidates using bin-packing (prefer the most-packed node that still fits)
|
|
var bestNode *Node
|
|
bestScore := -1.0
|
|
|
|
for _, node := range candidates {
|
|
score := s.scoreNode(node, workload)
|
|
if score > bestScore {
|
|
bestScore = score
|
|
bestNode = node
|
|
}
|
|
}
|
|
|
|
if bestNode == nil {
|
|
return "", fmt.Errorf("no suitable node found")
|
|
}
|
|
|
|
return bestNode.ID, nil
|
|
}
|
|
|
|
// filterCandidates returns nodes that can physically run the workload
|
|
func (s *Scheduler) filterCandidates(workload *WorkloadAssignment) []*Node {
|
|
var candidates []*Node
|
|
|
|
for _, node := range s.state.Nodes {
|
|
// Must be healthy
|
|
if node.Status != StatusHealthy {
|
|
continue
|
|
}
|
|
|
|
// Must have enough resources
|
|
if node.AvailableMemoryMB() < workload.Resources.MemoryMB {
|
|
continue
|
|
}
|
|
if node.AvailableCPU() < workload.Resources.CPUCores {
|
|
continue
|
|
}
|
|
|
|
// Check label constraints
|
|
if !s.matchLabels(node, workload.Constraints.NodeLabels) {
|
|
continue
|
|
}
|
|
|
|
// Check anti-affinity
|
|
if s.violatesAntiAffinity(node, workload.Constraints.AntiAffinity) {
|
|
continue
|
|
}
|
|
|
|
// Check zone constraint
|
|
if workload.Constraints.Zone != "" {
|
|
if nodeZone, ok := node.Labels["zone"]; ok {
|
|
if nodeZone != workload.Constraints.Zone {
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
|
|
candidates = append(candidates, node)
|
|
}
|
|
|
|
return candidates
|
|
}
|
|
|
|
// matchLabels checks if a node has all required labels
|
|
func (s *Scheduler) matchLabels(node *Node, required map[string]string) bool {
|
|
for k, v := range required {
|
|
if nodeVal, ok := node.Labels[k]; !ok || nodeVal != v {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// violatesAntiAffinity checks if scheduling on this node would violate anti-affinity
|
|
func (s *Scheduler) violatesAntiAffinity(node *Node, antiAffinity []string) bool {
|
|
if len(antiAffinity) == 0 {
|
|
return false
|
|
}
|
|
|
|
for _, assignment := range s.state.Assignments {
|
|
if assignment.NodeID != node.ID {
|
|
continue
|
|
}
|
|
for _, aa := range antiAffinity {
|
|
if assignment.WorkloadID == aa {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// scoreNode scores a node for bin-packing (higher = better fit)
|
|
// Prefers nodes that are already partially filled (pack tight)
|
|
func (s *Scheduler) scoreNode(node *Node, workload *WorkloadAssignment) float64 {
|
|
if node.Resources.MemoryMB == 0 {
|
|
return 0
|
|
}
|
|
|
|
// Memory utilization after placing this workload (higher = more packed = preferred)
|
|
futureAllocMem := float64(node.Allocated.MemoryMB+workload.Resources.MemoryMB) / float64(node.Resources.MemoryMB)
|
|
|
|
// CPU utilization
|
|
futureCPU := 0.0
|
|
if node.Resources.CPUCores > 0 {
|
|
futureCPU = float64(node.Allocated.CPUCores+workload.Resources.CPUCores) / float64(node.Resources.CPUCores)
|
|
}
|
|
|
|
// Weighted score: 60% memory, 30% CPU, 10% bonus for preferred labels
|
|
score := futureAllocMem*0.6 + futureCPU*0.3
|
|
|
|
// Bonus for matching preferred labels
|
|
if len(workload.Constraints.PreferLabels) > 0 {
|
|
matchCount := 0
|
|
for k, v := range workload.Constraints.PreferLabels {
|
|
if nodeVal, ok := node.Labels[k]; ok && nodeVal == v {
|
|
matchCount++
|
|
}
|
|
}
|
|
if len(workload.Constraints.PreferLabels) > 0 {
|
|
score += 0.1 * float64(matchCount) / float64(len(workload.Constraints.PreferLabels))
|
|
}
|
|
}
|
|
|
|
return score
|
|
}
|
|
|
|
// AssignWorkload records a workload assignment
|
|
func (cs *ClusterState) AssignWorkload(assignment *WorkloadAssignment) error {
|
|
cs.mu.Lock()
|
|
defer cs.mu.Unlock()
|
|
|
|
node, exists := cs.Nodes[assignment.NodeID]
|
|
if !exists {
|
|
return fmt.Errorf("node %q not found", assignment.NodeID)
|
|
}
|
|
|
|
// Update allocated resources
|
|
node.Allocated.CPUCores += assignment.Resources.CPUCores
|
|
node.Allocated.MemoryMB += assignment.Resources.MemoryMB
|
|
node.Allocated.Containers++
|
|
|
|
assignment.AssignedAt = time.Now().UTC()
|
|
cs.Assignments[assignment.WorkloadID] = assignment
|
|
cs.Version++
|
|
|
|
return nil
|
|
}
|
|
|
|
// UnassignWorkload removes a workload assignment and frees resources
|
|
func (cs *ClusterState) UnassignWorkload(workloadID string) error {
|
|
cs.mu.Lock()
|
|
defer cs.mu.Unlock()
|
|
|
|
assignment, exists := cs.Assignments[workloadID]
|
|
if !exists {
|
|
return fmt.Errorf("workload %q not assigned", workloadID)
|
|
}
|
|
|
|
// Free resources on the node
|
|
if node, ok := cs.Nodes[assignment.NodeID]; ok {
|
|
node.Allocated.CPUCores -= assignment.Resources.CPUCores
|
|
node.Allocated.MemoryMB -= assignment.Resources.MemoryMB
|
|
node.Allocated.Containers--
|
|
if node.Allocated.CPUCores < 0 {
|
|
node.Allocated.CPUCores = 0
|
|
}
|
|
if node.Allocated.MemoryMB < 0 {
|
|
node.Allocated.MemoryMB = 0
|
|
}
|
|
if node.Allocated.Containers < 0 {
|
|
node.Allocated.Containers = 0
|
|
}
|
|
}
|
|
|
|
delete(cs.Assignments, workloadID)
|
|
cs.Version++
|
|
return nil
|
|
}
|
|
|
|
// ── Health Monitor ──────────────────────────────────────────────────────────
|
|
|
|
// HealthMonitor periodically checks node health and triggers rescheduling
|
|
type HealthMonitor struct {
|
|
state *ClusterState
|
|
scheduler *Scheduler
|
|
stopCh chan struct{}
|
|
onNodeDead func(nodeID string, orphanedWorkloads []*WorkloadAssignment)
|
|
}
|
|
|
|
// NewHealthMonitor creates a new health monitor
|
|
func NewHealthMonitor(state *ClusterState, scheduler *Scheduler) *HealthMonitor {
|
|
return &HealthMonitor{
|
|
state: state,
|
|
scheduler: scheduler,
|
|
stopCh: make(chan struct{}),
|
|
}
|
|
}
|
|
|
|
// OnNodeDead registers a callback for when a node is declared dead
|
|
func (hm *HealthMonitor) OnNodeDead(fn func(nodeID string, orphaned []*WorkloadAssignment)) {
|
|
hm.onNodeDead = fn
|
|
}
|
|
|
|
// Start begins the health monitoring loop
|
|
func (hm *HealthMonitor) Start() {
|
|
go func() {
|
|
ticker := time.NewTicker(HeartbeatInterval)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
hm.checkHealth()
|
|
case <-hm.stopCh:
|
|
return
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
// Stop halts the health monitoring loop
|
|
func (hm *HealthMonitor) Stop() {
|
|
close(hm.stopCh)
|
|
}
|
|
|
|
func (hm *HealthMonitor) checkHealth() {
|
|
hm.state.mu.Lock()
|
|
defer hm.state.mu.Unlock()
|
|
|
|
now := time.Now()
|
|
|
|
for _, node := range hm.state.Nodes {
|
|
if node.Status == StatusLeft || node.Status == StatusDead {
|
|
continue
|
|
}
|
|
|
|
sinceHeartbeat := now.Sub(node.LastHeartbeat)
|
|
|
|
switch {
|
|
case sinceHeartbeat > NodeDeadThreshold:
|
|
if node.Status != StatusDead {
|
|
node.Status = StatusDead
|
|
// Collect orphaned workloads
|
|
if hm.onNodeDead != nil {
|
|
var orphaned []*WorkloadAssignment
|
|
for _, a := range hm.state.Assignments {
|
|
if a.NodeID == node.ID {
|
|
orphaned = append(orphaned, a)
|
|
}
|
|
}
|
|
go hm.onNodeDead(node.ID, orphaned)
|
|
}
|
|
}
|
|
|
|
case sinceHeartbeat > HeartbeatTimeout:
|
|
node.Status = StatusUnreachable
|
|
|
|
default:
|
|
// Node is alive
|
|
if node.Status == StatusUnreachable || node.Status == StatusDegraded {
|
|
node.Status = StatusHealthy
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── Drain Operation ─────────────────────────────────────────────────────────
|
|
|
|
// DrainNode moves all workloads off a node for maintenance
|
|
func DrainNode(state *ClusterState, scheduler *Scheduler, nodeID string) ([]string, error) {
|
|
state.mu.Lock()
|
|
|
|
node, exists := state.Nodes[nodeID]
|
|
if !exists {
|
|
state.mu.Unlock()
|
|
return nil, fmt.Errorf("node %q not found", nodeID)
|
|
}
|
|
|
|
node.Status = StatusDraining
|
|
|
|
// Collect workloads on this node
|
|
var toReschedule []*WorkloadAssignment
|
|
for _, a := range state.Assignments {
|
|
if a.NodeID == nodeID {
|
|
toReschedule = append(toReschedule, a)
|
|
}
|
|
}
|
|
|
|
state.mu.Unlock()
|
|
|
|
// Reschedule each workload
|
|
var rescheduled []string
|
|
for _, assignment := range toReschedule {
|
|
// Remove from current node
|
|
if err := state.UnassignWorkload(assignment.WorkloadID); err != nil {
|
|
return rescheduled, fmt.Errorf("failed to unassign %s: %w", assignment.WorkloadID, err)
|
|
}
|
|
|
|
// Find new node
|
|
newNodeID, err := scheduler.Schedule(assignment)
|
|
if err != nil {
|
|
return rescheduled, fmt.Errorf("failed to reschedule %s: %w", assignment.WorkloadID, err)
|
|
}
|
|
|
|
assignment.NodeID = newNodeID
|
|
if err := state.AssignWorkload(assignment); err != nil {
|
|
return rescheduled, fmt.Errorf("failed to assign %s to %s: %w",
|
|
assignment.WorkloadID, newNodeID, err)
|
|
}
|
|
|
|
rescheduled = append(rescheduled, fmt.Sprintf("%s → %s", assignment.WorkloadID, newNodeID))
|
|
}
|
|
|
|
return rescheduled, nil
|
|
}
|
|
|
|
// ── Persistence ─────────────────────────────────────────────────────────────
|
|
|
|
// SaveState writes cluster state to disk
|
|
func SaveState(state *ClusterState) error {
|
|
state.mu.RLock()
|
|
defer state.mu.RUnlock()
|
|
|
|
if err := os.MkdirAll(ClusterConfigDir, 0755); err != nil {
|
|
return err
|
|
}
|
|
|
|
data, err := json.MarshalIndent(state, "", " ")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Atomic write
|
|
tmpFile := ClusterStateFile + ".tmp"
|
|
if err := os.WriteFile(tmpFile, data, 0644); err != nil {
|
|
return err
|
|
}
|
|
return os.Rename(tmpFile, ClusterStateFile)
|
|
}
|
|
|
|
// LoadState reads cluster state from disk
|
|
func LoadState() (*ClusterState, error) {
|
|
data, err := os.ReadFile(ClusterStateFile)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var state ClusterState
|
|
if err := json.Unmarshal(data, &state); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Initialize maps if nil
|
|
if state.Nodes == nil {
|
|
state.Nodes = make(map[string]*Node)
|
|
}
|
|
if state.Assignments == nil {
|
|
state.Assignments = make(map[string]*WorkloadAssignment)
|
|
}
|
|
|
|
return &state, nil
|
|
}
|
|
|
|
// ── Node Resource Detection ─────────────────────────────────────────────────
|
|
|
|
// DetectResources probes the local system for available resources
|
|
func DetectResources() NodeResources {
|
|
res := NodeResources{
|
|
CPUCores: detectCPUCores(),
|
|
MemoryMB: detectMemoryMB(),
|
|
DiskMB: detectDiskMB(),
|
|
MaxContainers: 500, // Pro default
|
|
}
|
|
return res
|
|
}
|
|
|
|
func detectCPUCores() int {
|
|
data, err := os.ReadFile("/proc/cpuinfo")
|
|
if err != nil {
|
|
return 1
|
|
}
|
|
count := 0
|
|
for _, line := range splitByNewline(string(data)) {
|
|
if len(line) > 9 && line[:9] == "processor" {
|
|
count++
|
|
}
|
|
}
|
|
if count == 0 {
|
|
return 1
|
|
}
|
|
return count
|
|
}
|
|
|
|
func detectMemoryMB() int64 {
|
|
data, err := os.ReadFile("/proc/meminfo")
|
|
if err != nil {
|
|
return 512
|
|
}
|
|
for _, line := range splitByNewline(string(data)) {
|
|
if len(line) > 8 && line[:8] == "MemTotal" {
|
|
var kb int64
|
|
fmt.Sscanf(line, "MemTotal: %d kB", &kb)
|
|
return kb / 1024
|
|
}
|
|
}
|
|
return 512
|
|
}
|
|
|
|
func detectDiskMB() int64 {
|
|
// Check /var/lib/volt partition
|
|
var stat struct {
|
|
Bavail uint64
|
|
Bsize uint64
|
|
}
|
|
// Simple fallback — can be improved with syscall.Statfs
|
|
info, err := os.Stat("/var/lib/volt")
|
|
if err != nil {
|
|
_ = info
|
|
_ = stat
|
|
return 10240 // 10GB default
|
|
}
|
|
return 10240 // Simplified for now
|
|
}
|
|
|
|
func splitByNewline(s string) []string {
|
|
var result []string
|
|
start := 0
|
|
for i := 0; i < len(s); i++ {
|
|
if s[i] == '\n' {
|
|
result = append(result, s[start:i])
|
|
start = i + 1
|
|
}
|
|
}
|
|
if start < len(s) {
|
|
result = append(result, s[start:])
|
|
}
|
|
return result
|
|
}
|
|
|
|
// ── Cluster Config ──────────────────────────────────────────────────────────
|
|
|
|
// ClusterConfig holds local cluster configuration
|
|
type ClusterConfig struct {
|
|
ClusterID string `json:"cluster_id"`
|
|
NodeID string `json:"node_id"`
|
|
NodeName string `json:"node_name"`
|
|
RaftPort int `json:"raft_port"`
|
|
RPCPort int `json:"rpc_port"`
|
|
LeaderAddr string `json:"leader_addr,omitempty"`
|
|
MeshEnabled bool `json:"mesh_enabled"`
|
|
}
|
|
|
|
// SaveConfig writes local cluster config
|
|
func SaveConfig(cfg *ClusterConfig) error {
|
|
if err := os.MkdirAll(ClusterConfigDir, 0755); err != nil {
|
|
return err
|
|
}
|
|
data, err := json.MarshalIndent(cfg, "", " ")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return os.WriteFile(filepath.Join(ClusterConfigDir, "config.json"), data, 0644)
|
|
}
|
|
|
|
// LoadConfig reads local cluster config
|
|
func LoadConfig() (*ClusterConfig, error) {
|
|
data, err := os.ReadFile(filepath.Join(ClusterConfigDir, "config.json"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var cfg ClusterConfig
|
|
if err := json.Unmarshal(data, &cfg); err != nil {
|
|
return nil, err
|
|
}
|
|
return &cfg, nil
|
|
}
|