/* Volt Runtime - Core VM execution engine Uses native Linux kernel isolation: - Namespaces (PID, NET, MNT, UTS, IPC, USER) - Cgroups v2 (resource limits) - Landlock (filesystem access control) - Seccomp (syscall filtering) - SystemD (lifecycle management) NO HYPERVISOR. */ package runtime import ( "fmt" "os" "os/exec" "path/filepath" "syscall" "unsafe" "golang.org/x/sys/unix" ) // unsafePointer returns an unsafe.Pointer to v. func unsafePointer[T any](v *T) unsafe.Pointer { return unsafe.Pointer(v) } // unsafeSize returns the size of T. func unsafeSize[T any](v T) uintptr { return unsafe.Sizeof(v) } // VM represents a Volt virtual machine type VM struct { Name string Image string Kernel string Memory string CPUs int Network string Mounts []Mount RootFS string PID int Status VMStatus ODEProfile string } // Mount represents an attached storage mount type Mount struct { Source string Target string Type string Flags uintptr } // VMStatus represents VM lifecycle state type VMStatus string const ( VMStatusCreated VMStatus = "created" VMStatusRunning VMStatus = "running" VMStatusStopped VMStatus = "stopped" VMStatusError VMStatus = "error" ) // Config holds runtime configuration type Config struct { BaseDir string // /var/lib/volt KernelDir string // /var/lib/volt/kernels ImageDir string // /var/lib/volt/images RunDir string // /var/run/volt NetworkBridge string // voltbr0 } // DefaultConfig returns standard configuration func DefaultConfig() *Config { return &Config{ BaseDir: "/var/lib/volt", KernelDir: "/var/lib/volt/kernels", ImageDir: "/var/lib/volt/images", RunDir: "/var/run/volt", NetworkBridge: "voltbr0", } } // Runtime manages VM lifecycle type Runtime struct { config *Config } // NewRuntime creates a new runtime instance func NewRuntime(config *Config) (*Runtime, error) { if config == nil { config = DefaultConfig() } // Ensure directories exist dirs := []string{ config.BaseDir, config.KernelDir, config.ImageDir, config.RunDir, filepath.Join(config.BaseDir, "vms"), } for _, dir := range dirs { if err := os.MkdirAll(dir, 0755); err != nil { return nil, fmt.Errorf("failed to create directory %s: %w", dir, err) } } return &Runtime{config: config}, nil } // Create creates a new VM (does not start it) func (r *Runtime) Create(vm *VM) error { vmDir := filepath.Join(r.config.BaseDir, "vms", vm.Name) // Create VM directory structure dirs := []string{ vmDir, filepath.Join(vmDir, "rootfs"), filepath.Join(vmDir, "mounts"), filepath.Join(vmDir, "run"), } for _, dir := range dirs { if err := os.MkdirAll(dir, 0755); err != nil { return fmt.Errorf("failed to create %s: %w", dir, err) } } // Prepare TinyVol rootfs from image if err := r.prepareRootFS(vm); err != nil { return fmt.Errorf("failed to prepare rootfs: %w", err) } // Setup network namespace if err := r.setupNetwork(vm); err != nil { return fmt.Errorf("failed to setup network: %w", err) } // Write VM config if err := r.writeVMConfig(vm); err != nil { return fmt.Errorf("failed to write config: %w", err) } vm.Status = VMStatusCreated return nil } // Start starts a created VM func (r *Runtime) Start(vm *VM) error { if vm.Status != VMStatusCreated && vm.Status != VMStatusStopped { return fmt.Errorf("VM %s is not in a startable state: %s", vm.Name, vm.Status) } vmDir := filepath.Join(r.config.BaseDir, "vms", vm.Name) rootfs := filepath.Join(vmDir, "rootfs") // Clone with new namespaces cmd := &exec.Cmd{ Path: "/proc/self/exe", Args: []string{"volt-init", vm.Name}, Dir: rootfs, SysProcAttr: &syscall.SysProcAttr{ Cloneflags: syscall.CLONE_NEWNS | syscall.CLONE_NEWUTS | syscall.CLONE_NEWIPC | syscall.CLONE_NEWPID | syscall.CLONE_NEWNET | syscall.CLONE_NEWUSER, UidMappings: []syscall.SysProcIDMap{ {ContainerID: 0, HostID: os.Getuid(), Size: 1}, }, GidMappings: []syscall.SysProcIDMap{ {ContainerID: 0, HostID: os.Getgid(), Size: 1}, }, }, } if err := cmd.Start(); err != nil { return fmt.Errorf("failed to start VM: %w", err) } vm.PID = cmd.Process.Pid vm.Status = VMStatusRunning // Write PID file pidFile := filepath.Join(vmDir, "run", "vm.pid") os.WriteFile(pidFile, []byte(fmt.Sprintf("%d", vm.PID)), 0644) return nil } // Stop stops a running VM func (r *Runtime) Stop(vm *VM) error { if vm.Status != VMStatusRunning { return fmt.Errorf("VM %s is not running", vm.Name) } // Send SIGTERM if err := syscall.Kill(vm.PID, syscall.SIGTERM); err != nil { return fmt.Errorf("failed to send SIGTERM: %w", err) } // Wait for graceful shutdown (or SIGKILL after timeout) // This would be handled by systemd in production vm.Status = VMStatusStopped return nil } // Destroy removes a VM completely func (r *Runtime) Destroy(vm *VM) error { // Stop if running if vm.Status == VMStatusRunning { r.Stop(vm) } // Remove VM directory vmDir := filepath.Join(r.config.BaseDir, "vms", vm.Name) return os.RemoveAll(vmDir) } // prepareRootFS sets up the TinyVol filesystem for the VM func (r *Runtime) prepareRootFS(vm *VM) error { vmDir := filepath.Join(r.config.BaseDir, "vms", vm.Name) rootfs := filepath.Join(vmDir, "rootfs") // In production, this would: // 1. Pull TinyVol from ArmoredLedger/registry // 2. Verify cryptographic signature // 3. Check SBOM against policy // 4. Mount as overlay (copy-on-write) // For now, create minimal rootfs structure dirs := []string{ "bin", "sbin", "usr/bin", "usr/sbin", "etc", "var", "tmp", "proc", "sys", "dev", "run", "home", "root", } for _, dir := range dirs { os.MkdirAll(filepath.Join(rootfs, dir), 0755) } return nil } // setupNetwork creates network namespace and veth pair func (r *Runtime) setupNetwork(vm *VM) error { // In production, this would: // 1. Create network namespace // 2. Create veth pair // 3. Move one end into namespace // 4. Connect other end to bridge // 5. Configure IP addressing return nil } // writeVMConfig writes VM configuration to disk func (r *Runtime) writeVMConfig(vm *VM) error { vmDir := filepath.Join(r.config.BaseDir, "vms", vm.Name) configPath := filepath.Join(vmDir, "config.json") config := fmt.Sprintf(`{ "name": "%s", "image": "%s", "kernel": "%s", "memory": "%s", "cpus": %d, "network": "%s", "ode_profile": "%s" }`, vm.Name, vm.Image, vm.Kernel, vm.Memory, vm.CPUs, vm.Network, vm.ODEProfile) return os.WriteFile(configPath, []byte(config), 0644) } // Landlock syscall numbers (not yet in golang.org/x/sys v0.16.0) const ( sysLandlockCreateRuleset = 444 sysLandlockAddRule = 445 sysLandlockRestrictSelf = 446 ) // ApplyLandlock applies Landlock filesystem restrictions func ApplyLandlock(rules []LandlockRule) error { // Create ruleset attr := unix.LandlockRulesetAttr{ Access_fs: unix.LANDLOCK_ACCESS_FS_READ_FILE | unix.LANDLOCK_ACCESS_FS_WRITE_FILE | unix.LANDLOCK_ACCESS_FS_EXECUTE, } fd, _, errno := syscall.Syscall(sysLandlockCreateRuleset, uintptr(unsafePointer(&attr)), uintptr(unsafeSize(attr)), 0, ) if errno != 0 { return fmt.Errorf("landlock_create_ruleset: %w", errno) } defer unix.Close(int(fd)) // Add rules for _, rule := range rules { pathFd, err := unix.Open(rule.Path, unix.O_PATH|unix.O_CLOEXEC, 0) if err != nil { continue // Skip non-existent paths } pathBeneath := unix.LandlockPathBeneathAttr{ Allowed_access: rule.Access, Parent_fd: int32(pathFd), } syscall.Syscall6(sysLandlockAddRule, fd, uintptr(unix.LANDLOCK_RULE_PATH_BENEATH), uintptr(unsafePointer(&pathBeneath)), 0, 0, 0, ) unix.Close(pathFd) } // Enforce if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { return fmt.Errorf("prctl(NO_NEW_PRIVS): %w", err) } _, _, errno = syscall.Syscall(sysLandlockRestrictSelf, fd, 0, 0) if errno != 0 { return fmt.Errorf("landlock_restrict_self: %w", errno) } return nil } // LandlockRule defines a filesystem access rule type LandlockRule struct { Path string Access uint64 } // ServerLandlockRules returns Landlock rules for server VMs func ServerLandlockRules(rootfs string) []LandlockRule { return []LandlockRule{ {Path: filepath.Join(rootfs, "app"), Access: unix.LANDLOCK_ACCESS_FS_READ_FILE | unix.LANDLOCK_ACCESS_FS_WRITE_FILE}, {Path: filepath.Join(rootfs, "tmp"), Access: unix.LANDLOCK_ACCESS_FS_READ_FILE | unix.LANDLOCK_ACCESS_FS_WRITE_FILE}, {Path: filepath.Join(rootfs, "var/log"), Access: unix.LANDLOCK_ACCESS_FS_WRITE_FILE}, {Path: filepath.Join(rootfs, "usr"), Access: unix.LANDLOCK_ACCESS_FS_READ_FILE | unix.LANDLOCK_ACCESS_FS_EXECUTE}, {Path: filepath.Join(rootfs, "lib"), Access: unix.LANDLOCK_ACCESS_FS_READ_FILE}, } } // DesktopLandlockRules returns Landlock rules for desktop VMs func DesktopLandlockRules(rootfs string) []LandlockRule { return []LandlockRule{ {Path: filepath.Join(rootfs, "home"), Access: unix.LANDLOCK_ACCESS_FS_READ_FILE | unix.LANDLOCK_ACCESS_FS_WRITE_FILE}, {Path: filepath.Join(rootfs, "tmp"), Access: unix.LANDLOCK_ACCESS_FS_READ_FILE | unix.LANDLOCK_ACCESS_FS_WRITE_FILE}, {Path: filepath.Join(rootfs, "usr"), Access: unix.LANDLOCK_ACCESS_FS_READ_FILE | unix.LANDLOCK_ACCESS_FS_EXECUTE}, {Path: filepath.Join(rootfs, "lib"), Access: unix.LANDLOCK_ACCESS_FS_READ_FILE}, {Path: filepath.Join(rootfs, "var"), Access: unix.LANDLOCK_ACCESS_FS_READ_FILE | unix.LANDLOCK_ACCESS_FS_WRITE_FILE}, } }