Volt VMM (Neutron Stardust): source-available under AGPSL v5.0

KVM-based microVMM for the Volt platform:
- Sub-second VM boot times
- Minimal memory footprint
- Landlock LSM + seccomp security
- Virtio device support
- Custom kernel management

Copyright (c) Armored Gates LLC. All rights reserved.
Licensed under AGPSL v5.0
This commit is contained in:
Karl Clinger
2026-03-21 01:04:35 -05:00
commit 40ed108dd5
143 changed files with 50300 additions and 0 deletions

608
vmm/src/boot/pvh.rs Normal file
View File

@@ -0,0 +1,608 @@
//! PVH Boot Protocol Implementation
//!
//! PVH (Para-Virtualized Hardware) is a boot protocol that allows direct kernel
//! entry without BIOS/UEFI firmware. This is the fastest path to boot a Linux VM.
//!
//! # Overview
//!
//! The PVH boot protocol:
//! 1. Skips BIOS POST and firmware initialization
//! 2. Loads kernel directly into memory
//! 3. Sets up minimal boot structures (E820 map, start_info)
//! 4. Jumps directly to kernel 64-bit entry point
//!
//! # Boot Time Comparison
//!
//! | Method | Boot Time |
//! |--------|-----------|
//! | BIOS | 1-3s |
//! | UEFI | 0.5-1s |
//! | PVH | <50ms |
//!
//! # Memory Requirements
//!
//! The PVH start_info structure must be placed in guest memory and
//! its address passed to the kernel via RBX register.
use super::{layout, BootError, GuestMemory, Result};
/// Maximum number of E820 entries
pub const MAX_E820_ENTRIES: usize = 128;
/// E820 memory type values (matching Linux kernel definitions)
#[repr(u32)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum E820Type {
/// Usable RAM
Ram = 1,
/// Reserved by system
Reserved = 2,
/// ACPI reclaimable
Acpi = 3,
/// ACPI NVS (Non-Volatile Storage)
Nvs = 4,
/// Unusable memory
Unusable = 5,
/// Disabled memory (EFI)
Disabled = 6,
/// Persistent memory
Pmem = 7,
/// Undefined/other
Undefined = 0,
}
impl From<u32> for E820Type {
fn from(val: u32) -> Self {
match val {
1 => E820Type::Ram,
2 => E820Type::Reserved,
3 => E820Type::Acpi,
4 => E820Type::Nvs,
5 => E820Type::Unusable,
6 => E820Type::Disabled,
7 => E820Type::Pmem,
_ => E820Type::Undefined,
}
}
}
/// E820 memory map entry
///
/// Matches the Linux kernel's e820entry structure for compatibility.
#[repr(C, packed)]
#[derive(Debug, Clone, Copy, Default)]
pub struct E820Entry {
/// Start address of memory region
pub addr: u64,
/// Size of memory region in bytes
pub size: u64,
/// Type of memory region
pub entry_type: u32,
}
impl E820Entry {
/// Create a new E820 entry
pub fn new(addr: u64, size: u64, entry_type: E820Type) -> Self {
Self {
addr,
size,
entry_type: entry_type as u32,
}
}
/// Create a RAM entry
pub fn ram(addr: u64, size: u64) -> Self {
Self::new(addr, size, E820Type::Ram)
}
/// Create a reserved entry
pub fn reserved(addr: u64, size: u64) -> Self {
Self::new(addr, size, E820Type::Reserved)
}
}
/// PVH start_info structure
///
/// This is a simplified version compatible with the Xen PVH ABI.
/// The structure is placed in guest memory and its address is passed
/// to the kernel in RBX.
///
/// # Memory Layout
///
/// The structure must be at a known location (typically 0x7000) and
/// contain pointers to other boot structures.
#[repr(C)]
#[derive(Debug, Clone, Default)]
pub struct StartInfo {
/// Magic number (XEN_HVM_START_MAGIC_VALUE or custom)
pub magic: u32,
/// Version of the start_info structure
pub version: u32,
/// Flags (reserved, should be 0)
pub flags: u32,
/// Number of modules (initrd counts as 1)
pub nr_modules: u32,
/// Physical address of module list
pub modlist_paddr: u64,
/// Physical address of command line string
pub cmdline_paddr: u64,
/// Physical address of RSDP (ACPI, 0 if none)
pub rsdp_paddr: u64,
/// Physical address of E820 memory map
pub memmap_paddr: u64,
/// Number of entries in memory map
pub memmap_entries: u32,
/// Reserved/padding
pub reserved: u32,
}
/// XEN HVM start magic value
pub const XEN_HVM_START_MAGIC: u32 = 0x336ec578;
/// Volt custom magic (for identification)
pub const VOLT_MAGIC: u32 = 0x4e4f5641; // "NOVA"
impl StartInfo {
/// Create a new StartInfo with default values
pub fn new() -> Self {
Self {
magic: XEN_HVM_START_MAGIC,
version: 1,
flags: 0,
..Default::default()
}
}
/// Set command line address
pub fn with_cmdline(mut self, addr: u64) -> Self {
self.cmdline_paddr = addr;
self
}
/// Set memory map address and entry count
pub fn with_memmap(mut self, addr: u64, entries: u32) -> Self {
self.memmap_paddr = addr;
self.memmap_entries = entries;
self
}
/// Set module (initrd) information
pub fn with_module(mut self, modlist_addr: u64) -> Self {
self.nr_modules = 1;
self.modlist_paddr = modlist_addr;
self
}
/// Convert to bytes for writing to guest memory
pub fn as_bytes(&self) -> &[u8] {
unsafe {
std::slice::from_raw_parts(
self as *const Self as *const u8,
std::mem::size_of::<Self>(),
)
}
}
}
/// Module (initrd) entry for PVH
#[repr(C)]
#[derive(Debug, Clone, Copy, Default)]
pub struct HvmModlistEntry {
/// Physical address of module
pub paddr: u64,
/// Size of module in bytes
pub size: u64,
/// Physical address of command line for module (0 if none)
pub cmdline_paddr: u64,
/// Reserved
pub reserved: u64,
}
impl HvmModlistEntry {
/// Create entry for initrd
pub fn new(paddr: u64, size: u64) -> Self {
Self {
paddr,
size,
cmdline_paddr: 0,
reserved: 0,
}
}
/// Convert to bytes
pub fn as_bytes(&self) -> &[u8] {
unsafe {
std::slice::from_raw_parts(
self as *const Self as *const u8,
std::mem::size_of::<Self>(),
)
}
}
}
/// PVH configuration for boot setup
#[derive(Debug, Clone)]
pub struct PvhConfig {
/// Total memory size in bytes
pub memory_size: u64,
/// Number of vCPUs
pub vcpu_count: u32,
/// Physical address of command line
pub cmdline_addr: u64,
/// Physical address of initrd (if any)
pub initrd_addr: Option<u64>,
/// Size of initrd (if any)
pub initrd_size: Option<u64>,
}
/// PVH boot setup implementation
pub struct PvhBootSetup;
impl PvhBootSetup {
/// Set up PVH boot structures in guest memory
///
/// Creates and writes:
/// 1. E820 memory map
/// 2. start_info structure
/// 3. Module list (for initrd)
pub fn setup<M: GuestMemory>(config: &PvhConfig, guest_mem: &mut M) -> Result<()> {
// Build E820 memory map
let e820_entries = Self::build_e820_map(config.memory_size)?;
let e820_count = e820_entries.len() as u32;
// Write E820 map to guest memory
Self::write_e820_map(&e820_entries, guest_mem)?;
// Write module list if initrd is present
let modlist_addr = if let (Some(addr), Some(size)) = (config.initrd_addr, config.initrd_size) {
let modlist_addr = layout::E820_MAP_ADDR +
(MAX_E820_ENTRIES * std::mem::size_of::<E820Entry>()) as u64;
let entry = HvmModlistEntry::new(addr, size);
guest_mem.write_bytes(modlist_addr, entry.as_bytes())?;
Some(modlist_addr)
} else {
None
};
// Build and write start_info structure
let mut start_info = StartInfo::new()
.with_cmdline(config.cmdline_addr)
.with_memmap(layout::E820_MAP_ADDR, e820_count);
if let Some(addr) = modlist_addr {
start_info = start_info.with_module(addr);
}
guest_mem.write_bytes(layout::PVH_START_INFO_ADDR, start_info.as_bytes())?;
Ok(())
}
/// Build E820 memory map for the VM
///
/// Creates a standard x86_64 memory layout:
/// - Low memory (0-640KB): RAM
/// - Legacy hole (640KB-1MB): Reserved
/// - High memory (1MB+): RAM
fn build_e820_map(memory_size: u64) -> Result<Vec<E820Entry>> {
let mut entries = Vec::with_capacity(4);
// Validate minimum memory
if memory_size < layout::HIGH_MEMORY_START {
return Err(BootError::MemoryLayout(format!(
"Memory size {} is less than minimum required {}",
memory_size,
layout::HIGH_MEMORY_START
)));
}
// Low memory: 0 to 640KB (0x0 - 0x9FFFF)
// We reserve the first page for real-mode IVT
entries.push(E820Entry::ram(0, layout::LOW_MEMORY_END));
// Legacy video/ROM hole: 640KB to 1MB (0xA0000 - 0xFFFFF)
// This is reserved for VGA memory, option ROMs, etc.
let legacy_hole_size = layout::HIGH_MEMORY_START - layout::LOW_MEMORY_END;
entries.push(E820Entry::reserved(layout::LOW_MEMORY_END, legacy_hole_size));
// High memory: 1MB to RAM size
let high_memory_size = memory_size - layout::HIGH_MEMORY_START;
if high_memory_size > 0 {
entries.push(E820Entry::ram(layout::HIGH_MEMORY_START, high_memory_size));
}
// If memory > 4GB, we might need to handle the MMIO hole
// For now, we assume memory <= 4GB for simplicity
// Production systems should handle:
// - PCI MMIO hole (typically 0xE0000000 - 0xFFFFFFFF)
// - Memory above 4GB remapped
Ok(entries)
}
/// Write E820 map entries to guest memory
fn write_e820_map<M: GuestMemory>(entries: &[E820Entry], guest_mem: &mut M) -> Result<()> {
let entry_size = std::mem::size_of::<E820Entry>();
for (i, entry) in entries.iter().enumerate() {
let addr = layout::E820_MAP_ADDR + (i * entry_size) as u64;
let bytes = unsafe {
std::slice::from_raw_parts(entry as *const E820Entry as *const u8, entry_size)
};
guest_mem.write_bytes(addr, bytes)?;
}
Ok(())
}
/// Get initial CPU register state for PVH boot
///
/// Returns the register values needed to start the vCPU in 64-bit mode
/// with PVH boot protocol.
pub fn get_initial_regs(entry_point: u64) -> PvhRegs {
PvhRegs {
// Instruction pointer - kernel entry
rip: entry_point,
// RBX contains pointer to start_info (Xen PVH convention)
rbx: layout::PVH_START_INFO_ADDR,
// RSI also contains start_info pointer (Linux boot convention)
rsi: layout::PVH_START_INFO_ADDR,
// Stack pointer
rsp: layout::BOOT_STACK_POINTER,
// Clear other general-purpose registers
rax: 0,
rcx: 0,
rdx: 0,
rdi: 0,
rbp: 0,
r8: 0,
r9: 0,
r10: 0,
r11: 0,
r12: 0,
r13: 0,
r14: 0,
r15: 0,
// Flags - interrupts disabled
rflags: 0x2,
// Segment selectors for 64-bit mode
cs: 0x10, // Code segment, ring 0
ds: 0x18, // Data segment
es: 0x18,
fs: 0x18,
gs: 0x18,
ss: 0x18,
// CR registers for 64-bit mode
cr0: CR0_PE | CR0_ET | CR0_PG,
cr3: 0, // Page table base - set by kernel setup
cr4: CR4_PAE,
// EFER for long mode
efer: EFER_LME | EFER_LMA,
}
}
}
/// Control Register 0 bits
const CR0_PE: u64 = 1 << 0; // Protection Enable
const CR0_ET: u64 = 1 << 4; // Extension Type (387 present)
const CR0_PG: u64 = 1 << 31; // Paging Enable
/// Control Register 4 bits
const CR4_PAE: u64 = 1 << 5; // Physical Address Extension
/// EFER (Extended Feature Enable Register) bits
const EFER_LME: u64 = 1 << 8; // Long Mode Enable
const EFER_LMA: u64 = 1 << 10; // Long Mode Active
/// CPU register state for PVH boot
#[derive(Debug, Clone, Default)]
pub struct PvhRegs {
// General purpose registers
pub rax: u64,
pub rbx: u64,
pub rcx: u64,
pub rdx: u64,
pub rsi: u64,
pub rdi: u64,
pub rsp: u64,
pub rbp: u64,
pub r8: u64,
pub r9: u64,
pub r10: u64,
pub r11: u64,
pub r12: u64,
pub r13: u64,
pub r14: u64,
pub r15: u64,
// Instruction pointer
pub rip: u64,
// Flags
pub rflags: u64,
// Segment selectors
pub cs: u16,
pub ds: u16,
pub es: u16,
pub fs: u16,
pub gs: u16,
pub ss: u16,
// Control registers
pub cr0: u64,
pub cr3: u64,
pub cr4: u64,
// Model-specific registers
pub efer: u64,
}
/// GDT entries for 64-bit mode boot
///
/// This provides a minimal GDT for transitioning to 64-bit mode.
/// The kernel will set up its own GDT later.
pub struct BootGdt;
impl BootGdt {
/// Null descriptor (required as GDT[0])
pub const NULL: u64 = 0;
/// 64-bit code segment (CS)
/// Base: 0, Limit: 0xFFFFF (ignored in 64-bit mode)
/// Type: Code, Execute/Read, Present, DPL=0
pub const CODE64: u64 = 0x00af_9b00_0000_ffff;
/// 64-bit data segment (DS, ES, SS, FS, GS)
/// Base: 0, Limit: 0xFFFFF
/// Type: Data, Read/Write, Present, DPL=0
pub const DATA64: u64 = 0x00cf_9300_0000_ffff;
/// Build GDT table as bytes
pub fn as_bytes() -> [u8; 24] {
let mut gdt = [0u8; 24];
gdt[0..8].copy_from_slice(&Self::NULL.to_le_bytes());
gdt[8..16].copy_from_slice(&Self::CODE64.to_le_bytes());
gdt[16..24].copy_from_slice(&Self::DATA64.to_le_bytes());
gdt
}
}
#[cfg(test)]
mod tests {
use super::*;
struct MockMemory {
size: u64,
data: Vec<u8>,
}
impl MockMemory {
fn new(size: u64) -> Self {
Self {
size,
data: vec![0; size as usize],
}
}
}
impl GuestMemory for MockMemory {
fn write_bytes(&mut self, addr: u64, data: &[u8]) -> Result<()> {
let end = addr as usize + data.len();
if end > self.data.len() {
return Err(BootError::GuestMemoryWrite(format!(
"Write at {:#x} exceeds memory size",
addr
)));
}
self.data[addr as usize..end].copy_from_slice(data);
Ok(())
}
fn size(&self) -> u64 {
self.size
}
}
#[test]
fn test_e820_entry_size() {
// E820 entry must be exactly 20 bytes for Linux kernel compatibility
assert_eq!(std::mem::size_of::<E820Entry>(), 20);
}
#[test]
fn test_build_e820_map() {
let memory_size = 128 * 1024 * 1024; // 128MB
let entries = PvhBootSetup::build_e820_map(memory_size).unwrap();
// Should have at least 3 entries
assert!(entries.len() >= 3);
// First entry should be low memory RAM — copy from packed struct
let e0_addr = entries[0].addr;
let e0_type = entries[0].entry_type;
assert_eq!(e0_addr, 0);
assert_eq!(e0_type, E820Type::Ram as u32);
// Second entry should be legacy hole (reserved)
let e1_addr = entries[1].addr;
let e1_type = entries[1].entry_type;
assert_eq!(e1_addr, layout::LOW_MEMORY_END);
assert_eq!(e1_type, E820Type::Reserved as u32);
// Third entry should be high memory RAM
let e2_addr = entries[2].addr;
let e2_type = entries[2].entry_type;
assert_eq!(e2_addr, layout::HIGH_MEMORY_START);
assert_eq!(e2_type, E820Type::Ram as u32);
}
#[test]
fn test_start_info_size() {
// StartInfo should be reasonable size (under 4KB page)
let size = std::mem::size_of::<StartInfo>();
assert!(size < 4096);
assert!(size >= 48); // Minimum expected fields
}
#[test]
fn test_pvh_setup() {
let mut mem = MockMemory::new(128 * 1024 * 1024);
let config = PvhConfig {
memory_size: 128 * 1024 * 1024,
vcpu_count: 2,
cmdline_addr: layout::CMDLINE_ADDR,
initrd_addr: Some(100 * 1024 * 1024),
initrd_size: Some(10 * 1024 * 1024),
};
let result = PvhBootSetup::setup(&config, &mut mem);
assert!(result.is_ok());
// Verify magic was written to start_info location
let magic = u32::from_le_bytes([
mem.data[layout::PVH_START_INFO_ADDR as usize],
mem.data[layout::PVH_START_INFO_ADDR as usize + 1],
mem.data[layout::PVH_START_INFO_ADDR as usize + 2],
mem.data[layout::PVH_START_INFO_ADDR as usize + 3],
]);
assert_eq!(magic, XEN_HVM_START_MAGIC);
}
#[test]
fn test_pvh_regs() {
let entry_point = 0x100200;
let regs = PvhBootSetup::get_initial_regs(entry_point);
// Verify entry point
assert_eq!(regs.rip, entry_point);
// Verify start_info pointer in rbx
assert_eq!(regs.rbx, layout::PVH_START_INFO_ADDR);
// Verify 64-bit mode flags
assert!(regs.cr0 & CR0_PE != 0); // Protection enabled
assert!(regs.cr0 & CR0_PG != 0); // Paging enabled
assert!(regs.cr4 & CR4_PAE != 0); // PAE enabled
assert!(regs.efer & EFER_LME != 0); // Long mode enabled
}
#[test]
fn test_gdt_layout() {
let gdt = BootGdt::as_bytes();
assert_eq!(gdt.len(), 24); // 3 entries × 8 bytes
// First entry should be null
assert_eq!(&gdt[0..8], &[0u8; 8]);
}
}