/*
Volt Cluster — Native control plane for multi-node orchestration.

Replaces the thin kubectl wrapper with a native clustering system built
specifically for Volt's workload model (containers, hybrid-native, VMs).

Architecture:
  - Control plane: single leader node running volt-control daemon
  - Workers: nodes that register via `volt cluster join`
  - Communication: gRPC-over-mesh (WireGuard) or plain HTTPS
  - State: JSON-based on-disk store (no etcd dependency)
  - Health: heartbeat-based with configurable failure detection

The control plane is responsible for:
  - Node registration and deregistration
  - Health monitoring (heartbeat processing)
  - Workload scheduling (resource-based, label selectors)
  - Workload state sync across nodes

Copyright (c) Armored Gates LLC. All rights reserved.
AGPSL v5 — Source-available. Anti-competition clauses apply.
*/
package cluster

import (
	"encoding/json"
	"fmt"
	"os"
	"sync"
	"time"
)

// ── Constants ────────────────────────────────────────────────────────────────

const (
	DefaultHeartbeatInterval = 10 * time.Second
	DefaultFailureThreshold  = 3  // missed heartbeats before marking unhealthy
	DefaultAPIPort           = 9443
	ClusterStateDir          = "/var/lib/volt/cluster"
	ClusterStateFile         = "/var/lib/volt/cluster/state.json"
	NodesStateFile           = "/var/lib/volt/cluster/nodes.json"
	ScheduleStateFile        = "/var/lib/volt/cluster/schedule.json"
)

// ── Node ─────────────────────────────────────────────────────────────────────

// NodeStatus represents the health state of a cluster node.
type NodeStatus string

const (
	NodeStatusReady      NodeStatus = "ready"
	NodeStatusNotReady   NodeStatus = "not-ready"
	NodeStatusJoining    NodeStatus = "joining"
	NodeStatusDraining   NodeStatus = "draining"
	NodeStatusRemoved    NodeStatus = "removed"
)

// NodeResources describes the capacity and usage of a node.
type NodeResources struct {
	CPUCores       int    `json:"cpu_cores"`
	MemoryTotalMB  int64  `json:"memory_total_mb"`
	MemoryUsedMB   int64  `json:"memory_used_mb"`
	DiskTotalGB    int64  `json:"disk_total_gb"`
	DiskUsedGB     int64  `json:"disk_used_gb"`
	ContainerCount int    `json:"container_count"`
	WorkloadCount  int    `json:"workload_count"`
}

// NodeInfo represents a registered cluster node.
type NodeInfo struct {
	NodeID          string            `json:"node_id"`
	Name            string            `json:"name"`
	MeshIP          string            `json:"mesh_ip"`
	PublicIP        string            `json:"public_ip,omitempty"`
	Status          NodeStatus        `json:"status"`
	Labels          map[string]string `json:"labels,omitempty"`
	Resources       NodeResources     `json:"resources"`
	LastHeartbeat   time.Time         `json:"last_heartbeat"`
	JoinedAt        time.Time         `json:"joined_at"`
	MissedBeats     int               `json:"missed_beats"`
	VoltVersion     string            `json:"volt_version,omitempty"`
	KernelVersion   string            `json:"kernel_version,omitempty"`
	OS              string            `json:"os,omitempty"`
	Region          string            `json:"region,omitempty"`
}

// IsHealthy returns true if the node is responding to heartbeats.
func (n *NodeInfo) IsHealthy() bool {
	return n.Status == NodeStatusReady && n.MissedBeats < DefaultFailureThreshold
}

// ── Cluster State ────────────────────────────────────────────────────────────

// ClusterRole indicates this node's role in the cluster.
type ClusterRole string

const (
	RoleControl ClusterRole = "control"
	RoleWorker  ClusterRole = "worker"
	RoleNone    ClusterRole = "none"
)

// ClusterState is the persistent on-disk cluster membership state for this node.
type ClusterState struct {
	ClusterID   string      `json:"cluster_id"`
	Role        ClusterRole `json:"role"`
	NodeID      string      `json:"node_id"`
	NodeName    string      `json:"node_name"`
	ControlURL  string      `json:"control_url"`
	APIPort     int         `json:"api_port"`
	JoinedAt    time.Time   `json:"joined_at"`
	HeartbeatInterval time.Duration `json:"heartbeat_interval"`
}

// ── Scheduled Workload ───────────────────────────────────────────────────────

// ScheduledWorkload represents a workload assigned to a node by the scheduler.
type ScheduledWorkload struct {
	WorkloadID   string            `json:"workload_id"`
	NodeID       string            `json:"node_id"`
	NodeName     string            `json:"node_name"`
	Mode         string            `json:"mode"`          // container, hybrid-native, etc.
	ManifestPath string            `json:"manifest_path,omitempty"`
	Labels       map[string]string `json:"labels,omitempty"`
	Resources    WorkloadResources `json:"resources"`
	Status       string            `json:"status"`        // pending, running, stopped, failed
	ScheduledAt  time.Time         `json:"scheduled_at"`
}

// WorkloadResources describes the resource requirements for a workload.
type WorkloadResources struct {
	CPUCores  int   `json:"cpu_cores"`
	MemoryMB  int64 `json:"memory_mb"`
	DiskMB    int64 `json:"disk_mb,omitempty"`
}

// ── Control Plane ────────────────────────────────────────────────────────────

// ControlPlane manages cluster state, node registration, and scheduling.
type ControlPlane struct {
	state      *ClusterState
	nodes      map[string]*NodeInfo
	schedule   []*ScheduledWorkload
	mu         sync.RWMutex
}

// NewControlPlane creates or loads a control plane instance.
func NewControlPlane() *ControlPlane {
	cp := &ControlPlane{
		nodes:    make(map[string]*NodeInfo),
	}
	cp.loadState()
	cp.loadNodes()
	cp.loadSchedule()
	return cp
}

// IsInitialized returns true if the cluster has been initialized.
func (cp *ControlPlane) IsInitialized() bool {
	cp.mu.RLock()
	defer cp.mu.RUnlock()
	return cp.state != nil && cp.state.ClusterID != ""
}

// State returns a copy of the cluster state.
func (cp *ControlPlane) State() *ClusterState {
	cp.mu.RLock()
	defer cp.mu.RUnlock()
	if cp.state == nil {
		return nil
	}
	copy := *cp.state
	return &copy
}

// Role returns this node's cluster role.
func (cp *ControlPlane) Role() ClusterRole {
	cp.mu.RLock()
	defer cp.mu.RUnlock()
	if cp.state == nil {
		return RoleNone
	}
	return cp.state.Role
}

// Nodes returns all registered nodes.
func (cp *ControlPlane) Nodes() []*NodeInfo {
	cp.mu.RLock()
	defer cp.mu.RUnlock()
	result := make([]*NodeInfo, 0, len(cp.nodes))
	for _, n := range cp.nodes {
		copy := *n
		result = append(result, &copy)
	}
	return result
}

// GetNode returns a node by ID or name.
func (cp *ControlPlane) GetNode(idOrName string) *NodeInfo {
	cp.mu.RLock()
	defer cp.mu.RUnlock()
	if n, ok := cp.nodes[idOrName]; ok {
		copy := *n
		return &copy
	}
	// Try by name
	for _, n := range cp.nodes {
		if n.Name == idOrName {
			copy := *n
			return &copy
		}
	}
	return nil
}

// Schedule returns the current workload schedule.
func (cp *ControlPlane) Schedule() []*ScheduledWorkload {
	cp.mu.RLock()
	defer cp.mu.RUnlock()
	result := make([]*ScheduledWorkload, len(cp.schedule))
	for i, sw := range cp.schedule {
		copy := *sw
		result[i] = &copy
	}
	return result
}

// ── Init ─────────────────────────────────────────────────────────────────────

// InitCluster initializes this node as the cluster control plane.
func (cp *ControlPlane) InitCluster(clusterID, nodeName, meshIP string, apiPort int) error {
	cp.mu.Lock()
	defer cp.mu.Unlock()

	if cp.state != nil && cp.state.ClusterID != "" {
		return fmt.Errorf("already part of cluster %q", cp.state.ClusterID)
	}

	if apiPort == 0 {
		apiPort = DefaultAPIPort
	}

	cp.state = &ClusterState{
		ClusterID:         clusterID,
		Role:              RoleControl,
		NodeID:            clusterID + "-control",
		NodeName:          nodeName,
		ControlURL:        fmt.Sprintf("https://%s:%d", meshIP, apiPort),
		APIPort:           apiPort,
		JoinedAt:          time.Now().UTC(),
		HeartbeatInterval: DefaultHeartbeatInterval,
	}

	// Register self as a node
	cp.nodes[cp.state.NodeID] = &NodeInfo{
		NodeID:        cp.state.NodeID,
		Name:          nodeName,
		MeshIP:        meshIP,
		Status:        NodeStatusReady,
		Labels:        map[string]string{"role": "control"},
		LastHeartbeat: time.Now().UTC(),
		JoinedAt:      time.Now().UTC(),
	}

	if err := cp.saveState(); err != nil {
		return err
	}
	return cp.saveNodes()
}

// ── Join ─────────────────────────────────────────────────────────────────────

// JoinCluster registers this node as a worker in an existing cluster.
func (cp *ControlPlane) JoinCluster(clusterID, controlURL, nodeID, nodeName, meshIP string) error {
	cp.mu.Lock()
	defer cp.mu.Unlock()

	if cp.state != nil && cp.state.ClusterID != "" {
		return fmt.Errorf("already part of cluster %q — run 'volt cluster leave' first", cp.state.ClusterID)
	}

	cp.state = &ClusterState{
		ClusterID:         clusterID,
		Role:              RoleWorker,
		NodeID:            nodeID,
		NodeName:          nodeName,
		ControlURL:        controlURL,
		JoinedAt:          time.Now().UTC(),
		HeartbeatInterval: DefaultHeartbeatInterval,
	}

	return cp.saveState()
}

// ── Node Registration ────────────────────────────────────────────────────────

// RegisterNode adds a new worker node to the cluster (control plane only).
func (cp *ControlPlane) RegisterNode(node *NodeInfo) error {
	cp.mu.Lock()
	defer cp.mu.Unlock()

	if cp.state == nil || cp.state.Role != RoleControl {
		return fmt.Errorf("not the control plane — cannot register nodes")
	}

	node.Status = NodeStatusReady
	node.JoinedAt = time.Now().UTC()
	node.LastHeartbeat = time.Now().UTC()
	cp.nodes[node.NodeID] = node

	return cp.saveNodes()
}

// DeregisterNode removes a node from the cluster.
func (cp *ControlPlane) DeregisterNode(nodeID string) error {
	cp.mu.Lock()
	defer cp.mu.Unlock()

	if _, exists := cp.nodes[nodeID]; !exists {
		return fmt.Errorf("node %q not found", nodeID)
	}

	delete(cp.nodes, nodeID)
	return cp.saveNodes()
}

// ── Heartbeat ────────────────────────────────────────────────────────────────

// ProcessHeartbeat updates a node's health status.
func (cp *ControlPlane) ProcessHeartbeat(nodeID string, resources NodeResources) error {
	cp.mu.Lock()
	defer cp.mu.Unlock()

	node, exists := cp.nodes[nodeID]
	if !exists {
		return fmt.Errorf("node %q not registered", nodeID)
	}

	node.LastHeartbeat = time.Now().UTC()
	node.MissedBeats = 0
	node.Resources = resources
	if node.Status == NodeStatusNotReady {
		node.Status = NodeStatusReady
	}

	return cp.saveNodes()
}

// CheckHealth evaluates all nodes and marks those with missed heartbeats.
func (cp *ControlPlane) CheckHealth() []string {
	cp.mu.Lock()
	defer cp.mu.Unlock()

	var unhealthy []string
	threshold := time.Duration(DefaultFailureThreshold) * DefaultHeartbeatInterval

	for _, node := range cp.nodes {
		if node.Status == NodeStatusRemoved || node.Status == NodeStatusDraining {
			continue
		}
		if time.Since(node.LastHeartbeat) > threshold {
			node.MissedBeats++
			if node.MissedBeats >= DefaultFailureThreshold {
				node.Status = NodeStatusNotReady
				unhealthy = append(unhealthy, node.NodeID)
			}
		}
	}

	cp.saveNodes()
	return unhealthy
}

// ── Drain ────────────────────────────────────────────────────────────────────

// DrainNode marks a node for draining (no new workloads, existing ones rescheduled).
func (cp *ControlPlane) DrainNode(nodeID string) error {
	cp.mu.Lock()
	defer cp.mu.Unlock()

	node, exists := cp.nodes[nodeID]
	if !exists {
		return fmt.Errorf("node %q not found", nodeID)
	}

	node.Status = NodeStatusDraining

	// Find workloads on this node and mark for rescheduling
	for _, sw := range cp.schedule {
		if sw.NodeID == nodeID && sw.Status == "running" {
			sw.Status = "pending" // will be rescheduled
			sw.NodeID = ""
			sw.NodeName = ""
		}
	}

	cp.saveNodes()
	return cp.saveSchedule()
}

// ── Leave ────────────────────────────────────────────────────────────────────

// LeaveCluster removes this node from the cluster.
func (cp *ControlPlane) LeaveCluster() error {
	cp.mu.Lock()
	defer cp.mu.Unlock()

	if cp.state == nil {
		return fmt.Errorf("not part of any cluster")
	}

	// If control plane, clean up
	if cp.state.Role == RoleControl {
		cp.nodes = make(map[string]*NodeInfo)
		cp.schedule = nil
		os.Remove(NodesStateFile)
		os.Remove(ScheduleStateFile)
	}

	cp.state = nil
	os.Remove(ClusterStateFile)
	return nil
}

// ── Scheduling ───────────────────────────────────────────────────────────────

// ScheduleWorkload assigns a workload to a node based on resource availability
// and label selectors.
func (cp *ControlPlane) ScheduleWorkload(workload *ScheduledWorkload, nodeSelector map[string]string) error {
	cp.mu.Lock()
	defer cp.mu.Unlock()

	if cp.state == nil || cp.state.Role != RoleControl {
		return fmt.Errorf("not the control plane — cannot schedule workloads")
	}

	// Find best node
	bestNode := cp.findBestNode(workload.Resources, nodeSelector)
	if bestNode == nil {
		return fmt.Errorf("no suitable node found for workload %q (required: %dMB RAM, %d CPU cores)",
			workload.WorkloadID, workload.Resources.MemoryMB, workload.Resources.CPUCores)
	}

	workload.NodeID = bestNode.NodeID
	workload.NodeName = bestNode.Name
	workload.Status = "pending"
	workload.ScheduledAt = time.Now().UTC()

	cp.schedule = append(cp.schedule, workload)

	return cp.saveSchedule()
}

// findBestNode selects the best available node for a workload based on
// resource availability and label matching. Uses a simple "least loaded" strategy.
func (cp *ControlPlane) findBestNode(required WorkloadResources, selector map[string]string) *NodeInfo {
	var best *NodeInfo
	var bestScore int64 = -1

	for _, node := range cp.nodes {
		// Skip unhealthy/draining nodes
		if node.Status != NodeStatusReady {
			continue
		}

		// Check label selector
		if !matchLabels(node.Labels, selector) {
			continue
		}

		// Check resource availability
		availMem := node.Resources.MemoryTotalMB - node.Resources.MemoryUsedMB
		if required.MemoryMB > 0 && availMem < required.MemoryMB {
			continue
		}

		// Score: prefer nodes with more available resources (simple bin-packing)
		score := availMem
		if best == nil || score > bestScore {
			best = node
			bestScore = score
		}
	}

	return best
}

// matchLabels checks if a node's labels satisfy a selector.
func matchLabels(nodeLabels, selector map[string]string) bool {
	for k, v := range selector {
		if nodeLabels[k] != v {
			return false
		}
	}
	return true
}

// ── Persistence ──────────────────────────────────────────────────────────────

func (cp *ControlPlane) loadState() {
	data, err := os.ReadFile(ClusterStateFile)
	if err != nil {
		return
	}
	var state ClusterState
	if err := json.Unmarshal(data, &state); err != nil {
		return
	}
	cp.state = &state
}

func (cp *ControlPlane) saveState() error {
	os.MkdirAll(ClusterStateDir, 0755)
	data, err := json.MarshalIndent(cp.state, "", "  ")
	if err != nil {
		return err
	}
	return os.WriteFile(ClusterStateFile, data, 0644)
}

func (cp *ControlPlane) loadNodes() {
	data, err := os.ReadFile(NodesStateFile)
	if err != nil {
		return
	}
	var nodes map[string]*NodeInfo
	if err := json.Unmarshal(data, &nodes); err != nil {
		return
	}
	cp.nodes = nodes
}

func (cp *ControlPlane) saveNodes() error {
	os.MkdirAll(ClusterStateDir, 0755)
	data, err := json.MarshalIndent(cp.nodes, "", "  ")
	if err != nil {
		return err
	}
	return os.WriteFile(NodesStateFile, data, 0644)
}

func (cp *ControlPlane) loadSchedule() {
	data, err := os.ReadFile(ScheduleStateFile)
	if err != nil {
		return
	}
	var schedule []*ScheduledWorkload
	if err := json.Unmarshal(data, &schedule); err != nil {
		return
	}
	cp.schedule = schedule
}

func (cp *ControlPlane) saveSchedule() error {
	os.MkdirAll(ClusterStateDir, 0755)
	data, err := json.MarshalIndent(cp.schedule, "", "  ")
	if err != nil {
		return err
	}
	return os.WriteFile(ScheduleStateFile, data, 0644)
}