KVM-based microVMM for the Volt platform: - Sub-second VM boot times - Minimal memory footprint - Landlock LSM + seccomp security - Virtio device support - Custom kernel management Copyright (c) Armored Gates LLC. All rights reserved. Licensed under AGPSL v5.0
538 lines
12 KiB
Go
538 lines
12 KiB
Go
package unified
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"net"
|
|
"os"
|
|
"path/filepath"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/vishvananda/netlink"
|
|
)
|
|
|
|
// Manager handles unified network operations for VMs and containers
|
|
type Manager struct {
|
|
// State directory for leases and config
|
|
stateDir string
|
|
|
|
// Network configurations by name
|
|
networks map[string]*NetworkConfig
|
|
|
|
// IPAM state
|
|
ipam *IPAM
|
|
|
|
// Active interfaces by workload ID
|
|
interfaces map[string]*Interface
|
|
|
|
mu sync.RWMutex
|
|
}
|
|
|
|
// NewManager creates a new unified network manager
|
|
func NewManager(stateDir string) (*Manager, error) {
|
|
if err := os.MkdirAll(stateDir, 0755); err != nil {
|
|
return nil, fmt.Errorf("create state dir: %w", err)
|
|
}
|
|
|
|
m := &Manager{
|
|
stateDir: stateDir,
|
|
networks: make(map[string]*NetworkConfig),
|
|
interfaces: make(map[string]*Interface),
|
|
}
|
|
|
|
// Initialize IPAM
|
|
ipam, err := NewIPAM(filepath.Join(stateDir, "ipam"))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("init IPAM: %w", err)
|
|
}
|
|
m.ipam = ipam
|
|
|
|
// Load existing state
|
|
if err := m.loadState(); err != nil {
|
|
// Non-fatal, might be first run
|
|
_ = err
|
|
}
|
|
|
|
return m, nil
|
|
}
|
|
|
|
// AddNetwork registers a network configuration
|
|
func (m *Manager) AddNetwork(config *NetworkConfig) error {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
// Validate
|
|
if config.Name == "" {
|
|
return fmt.Errorf("network name required")
|
|
}
|
|
if config.Subnet == "" {
|
|
return fmt.Errorf("subnet required")
|
|
}
|
|
|
|
_, subnet, err := net.ParseCIDR(config.Subnet)
|
|
if err != nil {
|
|
return fmt.Errorf("invalid subnet: %w", err)
|
|
}
|
|
|
|
// Set defaults
|
|
if config.MTU == 0 {
|
|
config.MTU = 1500
|
|
}
|
|
if config.Type == "" {
|
|
config.Type = NetworkBridged
|
|
}
|
|
if config.Bridge == "" && config.Type == NetworkBridged {
|
|
config.Bridge = config.Name
|
|
}
|
|
|
|
// Register with IPAM
|
|
if config.IPAM != nil {
|
|
var gateway net.IP
|
|
if config.Gateway != "" {
|
|
gateway = net.ParseIP(config.Gateway)
|
|
}
|
|
if err := m.ipam.AddPool(config.Name, subnet, gateway, nil); err != nil {
|
|
return fmt.Errorf("register IPAM pool: %w", err)
|
|
}
|
|
}
|
|
|
|
m.networks[config.Name] = config
|
|
return m.saveState()
|
|
}
|
|
|
|
// EnsureBridge ensures the bridge exists and is configured
|
|
func (m *Manager) EnsureBridge(name string) (*BridgeInfo, error) {
|
|
// Check if bridge exists
|
|
link, err := netlink.LinkByName(name)
|
|
if err != nil {
|
|
// Bridge doesn't exist, create it
|
|
bridge := &netlink.Bridge{
|
|
LinkAttrs: netlink.LinkAttrs{
|
|
Name: name,
|
|
MTU: 1500,
|
|
},
|
|
}
|
|
if err := netlink.LinkAdd(bridge); err != nil {
|
|
return nil, fmt.Errorf("create bridge %s: %w", name, err)
|
|
}
|
|
link, err = netlink.LinkByName(name)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("get created bridge: %w", err)
|
|
}
|
|
}
|
|
|
|
// Ensure it's up
|
|
if err := netlink.LinkSetUp(link); err != nil {
|
|
return nil, fmt.Errorf("set bridge up: %w", err)
|
|
}
|
|
|
|
// Get bridge info
|
|
info := &BridgeInfo{
|
|
Name: name,
|
|
MTU: link.Attrs().MTU,
|
|
Up: link.Attrs().OperState == netlink.OperUp,
|
|
}
|
|
|
|
if link.Attrs().HardwareAddr != nil {
|
|
info.MAC = link.Attrs().HardwareAddr
|
|
}
|
|
|
|
// Get IP addresses
|
|
addrs, err := netlink.AddrList(link, netlink.FAMILY_V4)
|
|
if err == nil && len(addrs) > 0 {
|
|
info.IP = addrs[0].IP
|
|
info.Subnet = addrs[0].IPNet
|
|
}
|
|
|
|
return info, nil
|
|
}
|
|
|
|
// CreateTAP creates a TAP device for a VM and attaches it to the bridge
|
|
func (m *Manager) CreateTAP(network, workloadID string) (*Interface, error) {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
config, ok := m.networks[network]
|
|
if !ok {
|
|
return nil, fmt.Errorf("network %s not found", network)
|
|
}
|
|
|
|
// Generate TAP name (max 15 chars for Linux interface names)
|
|
tapName := fmt.Sprintf("tap-%s", truncateID(workloadID, 10))
|
|
|
|
// Create TAP device
|
|
tap := &netlink.Tuntap{
|
|
LinkAttrs: netlink.LinkAttrs{
|
|
Name: tapName,
|
|
MTU: config.MTU,
|
|
},
|
|
Mode: netlink.TUNTAP_MODE_TAP,
|
|
Flags: netlink.TUNTAP_NO_PI | netlink.TUNTAP_VNET_HDR,
|
|
Queues: 1, // Can increase for multi-queue
|
|
}
|
|
|
|
if err := netlink.LinkAdd(tap); err != nil {
|
|
return nil, fmt.Errorf("create TAP %s: %w", tapName, err)
|
|
}
|
|
|
|
// Get the created link to get FD
|
|
link, err := netlink.LinkByName(tapName)
|
|
if err != nil {
|
|
_ = netlink.LinkDel(tap)
|
|
return nil, fmt.Errorf("get TAP link: %w", err)
|
|
}
|
|
|
|
// Get the file descriptor from the TAP
|
|
// This requires opening /dev/net/tun with the TAP name
|
|
fd, err := openTAPFD(tapName)
|
|
if err != nil {
|
|
_ = netlink.LinkDel(tap)
|
|
return nil, fmt.Errorf("open TAP fd: %w", err)
|
|
}
|
|
|
|
// Attach to bridge
|
|
bridge, err := netlink.LinkByName(config.Bridge)
|
|
if err != nil {
|
|
_ = netlink.LinkDel(tap)
|
|
return nil, fmt.Errorf("get bridge %s: %w", config.Bridge, err)
|
|
}
|
|
|
|
if err := netlink.LinkSetMaster(link, bridge); err != nil {
|
|
_ = netlink.LinkDel(tap)
|
|
return nil, fmt.Errorf("attach to bridge: %w", err)
|
|
}
|
|
|
|
// Set link up
|
|
if err := netlink.LinkSetUp(link); err != nil {
|
|
_ = netlink.LinkDel(tap)
|
|
return nil, fmt.Errorf("set TAP up: %w", err)
|
|
}
|
|
|
|
// Generate MAC address
|
|
mac := generateMAC(workloadID)
|
|
|
|
// Allocate IP if IPAM enabled
|
|
var ip net.IP
|
|
var mask net.IPMask
|
|
var gateway net.IP
|
|
if config.IPAM != nil {
|
|
lease, err := m.ipam.Allocate(network, workloadID, mac)
|
|
if err != nil {
|
|
_ = netlink.LinkDel(tap)
|
|
return nil, fmt.Errorf("allocate IP: %w", err)
|
|
}
|
|
ip = lease.IP
|
|
_, subnet, _ := net.ParseCIDR(config.Subnet)
|
|
mask = subnet.Mask
|
|
if config.Gateway != "" {
|
|
gateway = net.ParseIP(config.Gateway)
|
|
}
|
|
}
|
|
|
|
iface := &Interface{
|
|
Name: tapName,
|
|
MAC: mac,
|
|
IP: ip,
|
|
Mask: mask,
|
|
Gateway: gateway,
|
|
Bridge: config.Bridge,
|
|
WorkloadID: workloadID,
|
|
WorkloadType: WorkloadVM,
|
|
FD: fd,
|
|
}
|
|
|
|
m.interfaces[workloadID] = iface
|
|
_ = m.saveState()
|
|
|
|
return iface, nil
|
|
}
|
|
|
|
// CreateVeth creates a veth pair for a container and attaches host end to bridge
|
|
func (m *Manager) CreateVeth(network, workloadID string) (*Interface, error) {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
config, ok := m.networks[network]
|
|
if !ok {
|
|
return nil, fmt.Errorf("network %s not found", network)
|
|
}
|
|
|
|
// Generate veth names (max 15 chars)
|
|
hostName := fmt.Sprintf("veth-%s-h", truncateID(workloadID, 7))
|
|
peerName := fmt.Sprintf("veth-%s-c", truncateID(workloadID, 7))
|
|
|
|
// Create veth pair
|
|
veth := &netlink.Veth{
|
|
LinkAttrs: netlink.LinkAttrs{
|
|
Name: hostName,
|
|
MTU: config.MTU,
|
|
},
|
|
PeerName: peerName,
|
|
}
|
|
|
|
if err := netlink.LinkAdd(veth); err != nil {
|
|
return nil, fmt.Errorf("create veth pair: %w", err)
|
|
}
|
|
|
|
// Get the created links
|
|
hostLink, err := netlink.LinkByName(hostName)
|
|
if err != nil {
|
|
_ = netlink.LinkDel(veth)
|
|
return nil, fmt.Errorf("get host veth: %w", err)
|
|
}
|
|
|
|
peerLink, err := netlink.LinkByName(peerName)
|
|
if err != nil {
|
|
_ = netlink.LinkDel(veth)
|
|
return nil, fmt.Errorf("get peer veth: %w", err)
|
|
}
|
|
|
|
// Attach host end to bridge
|
|
bridge, err := netlink.LinkByName(config.Bridge)
|
|
if err != nil {
|
|
_ = netlink.LinkDel(veth)
|
|
return nil, fmt.Errorf("get bridge %s: %w", config.Bridge, err)
|
|
}
|
|
|
|
if err := netlink.LinkSetMaster(hostLink, bridge); err != nil {
|
|
_ = netlink.LinkDel(veth)
|
|
return nil, fmt.Errorf("attach to bridge: %w", err)
|
|
}
|
|
|
|
// Set host end up
|
|
if err := netlink.LinkSetUp(hostLink); err != nil {
|
|
_ = netlink.LinkDel(veth)
|
|
return nil, fmt.Errorf("set host veth up: %w", err)
|
|
}
|
|
|
|
// Generate MAC address
|
|
mac := generateMAC(workloadID)
|
|
|
|
// Set MAC on peer (container) end
|
|
if err := netlink.LinkSetHardwareAddr(peerLink, mac); err != nil {
|
|
_ = netlink.LinkDel(veth)
|
|
return nil, fmt.Errorf("set peer MAC: %w", err)
|
|
}
|
|
|
|
// Allocate IP if IPAM enabled
|
|
var ip net.IP
|
|
var mask net.IPMask
|
|
var gateway net.IP
|
|
if config.IPAM != nil {
|
|
lease, err := m.ipam.Allocate(network, workloadID, mac)
|
|
if err != nil {
|
|
_ = netlink.LinkDel(veth)
|
|
return nil, fmt.Errorf("allocate IP: %w", err)
|
|
}
|
|
ip = lease.IP
|
|
_, subnet, _ := net.ParseCIDR(config.Subnet)
|
|
mask = subnet.Mask
|
|
if config.Gateway != "" {
|
|
gateway = net.ParseIP(config.Gateway)
|
|
}
|
|
}
|
|
|
|
iface := &Interface{
|
|
Name: hostName,
|
|
PeerName: peerName,
|
|
MAC: mac,
|
|
IP: ip,
|
|
Mask: mask,
|
|
Gateway: gateway,
|
|
Bridge: config.Bridge,
|
|
WorkloadID: workloadID,
|
|
WorkloadType: WorkloadContainer,
|
|
}
|
|
|
|
m.interfaces[workloadID] = iface
|
|
_ = m.saveState()
|
|
|
|
return iface, nil
|
|
}
|
|
|
|
// MoveVethToNamespace moves the container end of a veth pair to a network namespace
|
|
func (m *Manager) MoveVethToNamespace(workloadID string, nsFD int) error {
|
|
m.mu.RLock()
|
|
iface, ok := m.interfaces[workloadID]
|
|
m.mu.RUnlock()
|
|
|
|
if !ok {
|
|
return fmt.Errorf("interface for %s not found", workloadID)
|
|
}
|
|
|
|
if iface.PeerName == "" {
|
|
return fmt.Errorf("not a veth pair interface")
|
|
}
|
|
|
|
// Get peer link
|
|
peerLink, err := netlink.LinkByName(iface.PeerName)
|
|
if err != nil {
|
|
return fmt.Errorf("get peer veth: %w", err)
|
|
}
|
|
|
|
// Move to namespace
|
|
if err := netlink.LinkSetNsFd(peerLink, nsFD); err != nil {
|
|
return fmt.Errorf("move to namespace: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// ConfigureContainerInterface configures the interface inside the container namespace
|
|
// This should be called from within the container's network namespace
|
|
func (m *Manager) ConfigureContainerInterface(workloadID string) error {
|
|
m.mu.RLock()
|
|
iface, ok := m.interfaces[workloadID]
|
|
m.mu.RUnlock()
|
|
|
|
if !ok {
|
|
return fmt.Errorf("interface for %s not found", workloadID)
|
|
}
|
|
|
|
// Get the interface (should be the peer that was moved into this namespace)
|
|
link, err := netlink.LinkByName(iface.PeerName)
|
|
if err != nil {
|
|
return fmt.Errorf("get interface: %w", err)
|
|
}
|
|
|
|
// Set link up
|
|
if err := netlink.LinkSetUp(link); err != nil {
|
|
return fmt.Errorf("set link up: %w", err)
|
|
}
|
|
|
|
// Add IP address if allocated
|
|
if iface.IP != nil {
|
|
addr := &netlink.Addr{
|
|
IPNet: &net.IPNet{
|
|
IP: iface.IP,
|
|
Mask: iface.Mask,
|
|
},
|
|
}
|
|
if err := netlink.AddrAdd(link, addr); err != nil {
|
|
return fmt.Errorf("add IP address: %w", err)
|
|
}
|
|
}
|
|
|
|
// Add default route via gateway
|
|
if iface.Gateway != nil {
|
|
route := &netlink.Route{
|
|
Gw: iface.Gateway,
|
|
}
|
|
if err := netlink.RouteAdd(route); err != nil {
|
|
return fmt.Errorf("add default route: %w", err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Release releases the network interface for a workload
|
|
func (m *Manager) Release(workloadID string) error {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
iface, ok := m.interfaces[workloadID]
|
|
if !ok {
|
|
return nil // Already released
|
|
}
|
|
|
|
// Release IP from IPAM
|
|
for network := range m.networks {
|
|
_ = m.ipam.Release(network, workloadID)
|
|
}
|
|
|
|
// Delete the interface
|
|
link, err := netlink.LinkByName(iface.Name)
|
|
if err == nil {
|
|
_ = netlink.LinkDel(link)
|
|
}
|
|
|
|
delete(m.interfaces, workloadID)
|
|
return m.saveState()
|
|
}
|
|
|
|
// GetInterface returns the interface for a workload
|
|
func (m *Manager) GetInterface(workloadID string) (*Interface, error) {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
|
|
iface, ok := m.interfaces[workloadID]
|
|
if !ok {
|
|
return nil, fmt.Errorf("interface for %s not found", workloadID)
|
|
}
|
|
return iface, nil
|
|
}
|
|
|
|
// ListInterfaces returns all managed interfaces
|
|
func (m *Manager) ListInterfaces() []*Interface {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
|
|
result := make([]*Interface, 0, len(m.interfaces))
|
|
for _, iface := range m.interfaces {
|
|
result = append(result, iface)
|
|
}
|
|
return result
|
|
}
|
|
|
|
// saveState persists current state to disk
|
|
func (m *Manager) saveState() error {
|
|
data, err := json.MarshalIndent(m.interfaces, "", " ")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return os.WriteFile(filepath.Join(m.stateDir, "interfaces.json"), data, 0644)
|
|
}
|
|
|
|
// loadState loads state from disk
|
|
func (m *Manager) loadState() error {
|
|
data, err := os.ReadFile(filepath.Join(m.stateDir, "interfaces.json"))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return json.Unmarshal(data, &m.interfaces)
|
|
}
|
|
|
|
// truncateID truncates a workload ID for use in interface names
|
|
func truncateID(id string, maxLen int) string {
|
|
if len(id) <= maxLen {
|
|
return id
|
|
}
|
|
return id[:maxLen]
|
|
}
|
|
|
|
// generateMAC generates a deterministic MAC address from workload ID
|
|
func generateMAC(workloadID string) net.HardwareAddr {
|
|
// Use first 5 bytes of workload ID hash
|
|
// Set local/unicast bits
|
|
mac := make([]byte, 6)
|
|
mac[0] = 0x52 // Local, unicast (Volt prefix)
|
|
mac[1] = 0x54
|
|
mac[2] = 0x00
|
|
|
|
// Hash-based bytes
|
|
h := 0
|
|
for _, c := range workloadID {
|
|
h = h*31 + int(c)
|
|
}
|
|
mac[3] = byte((h >> 16) & 0xFF)
|
|
mac[4] = byte((h >> 8) & 0xFF)
|
|
mac[5] = byte(h & 0xFF)
|
|
|
|
return mac
|
|
}
|
|
|
|
// openTAPFD opens a TAP device and returns its file descriptor
|
|
func openTAPFD(name string) (int, error) {
|
|
// This is a simplified version - in production, use proper ioctl
|
|
// The netlink library handles TAP creation, but we need the FD for VMM use
|
|
|
|
// For now, return -1 as placeholder
|
|
// Real implementation would:
|
|
// 1. Open /dev/net/tun
|
|
// 2. ioctl TUNSETIFF with name and flags
|
|
// 3. Return the fd
|
|
return -1, fmt.Errorf("TAP FD extraction not yet implemented - use device fd from netlink")
|
|
}
|