//! PVH Boot Protocol Implementation //! //! PVH (Para-Virtualized Hardware) is a boot protocol that allows direct kernel //! entry without BIOS/UEFI firmware. This is the fastest path to boot a Linux VM. //! //! # Overview //! //! The PVH boot protocol: //! 1. Skips BIOS POST and firmware initialization //! 2. Loads kernel directly into memory //! 3. Sets up minimal boot structures (E820 map, start_info) //! 4. Jumps directly to kernel 64-bit entry point //! //! # Boot Time Comparison //! //! | Method | Boot Time | //! |--------|-----------| //! | BIOS | 1-3s | //! | UEFI | 0.5-1s | //! | PVH | <50ms | //! //! # Memory Requirements //! //! The PVH start_info structure must be placed in guest memory and //! its address passed to the kernel via RBX register. use super::{layout, BootError, GuestMemory, Result}; /// Maximum number of E820 entries pub const MAX_E820_ENTRIES: usize = 128; /// E820 memory type values (matching Linux kernel definitions) #[repr(u32)] #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum E820Type { /// Usable RAM Ram = 1, /// Reserved by system Reserved = 2, /// ACPI reclaimable Acpi = 3, /// ACPI NVS (Non-Volatile Storage) Nvs = 4, /// Unusable memory Unusable = 5, /// Disabled memory (EFI) Disabled = 6, /// Persistent memory Pmem = 7, /// Undefined/other Undefined = 0, } impl From for E820Type { fn from(val: u32) -> Self { match val { 1 => E820Type::Ram, 2 => E820Type::Reserved, 3 => E820Type::Acpi, 4 => E820Type::Nvs, 5 => E820Type::Unusable, 6 => E820Type::Disabled, 7 => E820Type::Pmem, _ => E820Type::Undefined, } } } /// E820 memory map entry /// /// Matches the Linux kernel's e820entry structure for compatibility. #[repr(C, packed)] #[derive(Debug, Clone, Copy, Default)] pub struct E820Entry { /// Start address of memory region pub addr: u64, /// Size of memory region in bytes pub size: u64, /// Type of memory region pub entry_type: u32, } impl E820Entry { /// Create a new E820 entry pub fn new(addr: u64, size: u64, entry_type: E820Type) -> Self { Self { addr, size, entry_type: entry_type as u32, } } /// Create a RAM entry pub fn ram(addr: u64, size: u64) -> Self { Self::new(addr, size, E820Type::Ram) } /// Create a reserved entry pub fn reserved(addr: u64, size: u64) -> Self { Self::new(addr, size, E820Type::Reserved) } } /// PVH start_info structure /// /// This is a simplified version compatible with the Xen PVH ABI. /// The structure is placed in guest memory and its address is passed /// to the kernel in RBX. /// /// # Memory Layout /// /// The structure must be at a known location (typically 0x7000) and /// contain pointers to other boot structures. #[repr(C)] #[derive(Debug, Clone, Default)] pub struct StartInfo { /// Magic number (XEN_HVM_START_MAGIC_VALUE or custom) pub magic: u32, /// Version of the start_info structure pub version: u32, /// Flags (reserved, should be 0) pub flags: u32, /// Number of modules (initrd counts as 1) pub nr_modules: u32, /// Physical address of module list pub modlist_paddr: u64, /// Physical address of command line string pub cmdline_paddr: u64, /// Physical address of RSDP (ACPI, 0 if none) pub rsdp_paddr: u64, /// Physical address of E820 memory map pub memmap_paddr: u64, /// Number of entries in memory map pub memmap_entries: u32, /// Reserved/padding pub reserved: u32, } /// XEN HVM start magic value pub const XEN_HVM_START_MAGIC: u32 = 0x336ec578; /// Volt custom magic (for identification) pub const VOLT_MAGIC: u32 = 0x4e4f5641; // "NOVA" impl StartInfo { /// Create a new StartInfo with default values pub fn new() -> Self { Self { magic: XEN_HVM_START_MAGIC, version: 1, flags: 0, ..Default::default() } } /// Set command line address pub fn with_cmdline(mut self, addr: u64) -> Self { self.cmdline_paddr = addr; self } /// Set memory map address and entry count pub fn with_memmap(mut self, addr: u64, entries: u32) -> Self { self.memmap_paddr = addr; self.memmap_entries = entries; self } /// Set module (initrd) information pub fn with_module(mut self, modlist_addr: u64) -> Self { self.nr_modules = 1; self.modlist_paddr = modlist_addr; self } /// Convert to bytes for writing to guest memory pub fn as_bytes(&self) -> &[u8] { unsafe { std::slice::from_raw_parts( self as *const Self as *const u8, std::mem::size_of::(), ) } } } /// Module (initrd) entry for PVH #[repr(C)] #[derive(Debug, Clone, Copy, Default)] pub struct HvmModlistEntry { /// Physical address of module pub paddr: u64, /// Size of module in bytes pub size: u64, /// Physical address of command line for module (0 if none) pub cmdline_paddr: u64, /// Reserved pub reserved: u64, } impl HvmModlistEntry { /// Create entry for initrd pub fn new(paddr: u64, size: u64) -> Self { Self { paddr, size, cmdline_paddr: 0, reserved: 0, } } /// Convert to bytes pub fn as_bytes(&self) -> &[u8] { unsafe { std::slice::from_raw_parts( self as *const Self as *const u8, std::mem::size_of::(), ) } } } /// PVH configuration for boot setup #[derive(Debug, Clone)] pub struct PvhConfig { /// Total memory size in bytes pub memory_size: u64, /// Number of vCPUs pub vcpu_count: u32, /// Physical address of command line pub cmdline_addr: u64, /// Physical address of initrd (if any) pub initrd_addr: Option, /// Size of initrd (if any) pub initrd_size: Option, } /// PVH boot setup implementation pub struct PvhBootSetup; impl PvhBootSetup { /// Set up PVH boot structures in guest memory /// /// Creates and writes: /// 1. E820 memory map /// 2. start_info structure /// 3. Module list (for initrd) pub fn setup(config: &PvhConfig, guest_mem: &mut M) -> Result<()> { // Build E820 memory map let e820_entries = Self::build_e820_map(config.memory_size)?; let e820_count = e820_entries.len() as u32; // Write E820 map to guest memory Self::write_e820_map(&e820_entries, guest_mem)?; // Write module list if initrd is present let modlist_addr = if let (Some(addr), Some(size)) = (config.initrd_addr, config.initrd_size) { let modlist_addr = layout::E820_MAP_ADDR + (MAX_E820_ENTRIES * std::mem::size_of::()) as u64; let entry = HvmModlistEntry::new(addr, size); guest_mem.write_bytes(modlist_addr, entry.as_bytes())?; Some(modlist_addr) } else { None }; // Build and write start_info structure let mut start_info = StartInfo::new() .with_cmdline(config.cmdline_addr) .with_memmap(layout::E820_MAP_ADDR, e820_count); if let Some(addr) = modlist_addr { start_info = start_info.with_module(addr); } guest_mem.write_bytes(layout::PVH_START_INFO_ADDR, start_info.as_bytes())?; Ok(()) } /// Build E820 memory map for the VM /// /// Creates a standard x86_64 memory layout: /// - Low memory (0-640KB): RAM /// - Legacy hole (640KB-1MB): Reserved /// - High memory (1MB+): RAM fn build_e820_map(memory_size: u64) -> Result> { let mut entries = Vec::with_capacity(4); // Validate minimum memory if memory_size < layout::HIGH_MEMORY_START { return Err(BootError::MemoryLayout(format!( "Memory size {} is less than minimum required {}", memory_size, layout::HIGH_MEMORY_START ))); } // Low memory: 0 to 640KB (0x0 - 0x9FFFF) // We reserve the first page for real-mode IVT entries.push(E820Entry::ram(0, layout::LOW_MEMORY_END)); // Legacy video/ROM hole: 640KB to 1MB (0xA0000 - 0xFFFFF) // This is reserved for VGA memory, option ROMs, etc. let legacy_hole_size = layout::HIGH_MEMORY_START - layout::LOW_MEMORY_END; entries.push(E820Entry::reserved(layout::LOW_MEMORY_END, legacy_hole_size)); // High memory: 1MB to RAM size let high_memory_size = memory_size - layout::HIGH_MEMORY_START; if high_memory_size > 0 { entries.push(E820Entry::ram(layout::HIGH_MEMORY_START, high_memory_size)); } // If memory > 4GB, we might need to handle the MMIO hole // For now, we assume memory <= 4GB for simplicity // Production systems should handle: // - PCI MMIO hole (typically 0xE0000000 - 0xFFFFFFFF) // - Memory above 4GB remapped Ok(entries) } /// Write E820 map entries to guest memory fn write_e820_map(entries: &[E820Entry], guest_mem: &mut M) -> Result<()> { let entry_size = std::mem::size_of::(); for (i, entry) in entries.iter().enumerate() { let addr = layout::E820_MAP_ADDR + (i * entry_size) as u64; let bytes = unsafe { std::slice::from_raw_parts(entry as *const E820Entry as *const u8, entry_size) }; guest_mem.write_bytes(addr, bytes)?; } Ok(()) } /// Get initial CPU register state for PVH boot /// /// Returns the register values needed to start the vCPU in 64-bit mode /// with PVH boot protocol. pub fn get_initial_regs(entry_point: u64) -> PvhRegs { PvhRegs { // Instruction pointer - kernel entry rip: entry_point, // RBX contains pointer to start_info (Xen PVH convention) rbx: layout::PVH_START_INFO_ADDR, // RSI also contains start_info pointer (Linux boot convention) rsi: layout::PVH_START_INFO_ADDR, // Stack pointer rsp: layout::BOOT_STACK_POINTER, // Clear other general-purpose registers rax: 0, rcx: 0, rdx: 0, rdi: 0, rbp: 0, r8: 0, r9: 0, r10: 0, r11: 0, r12: 0, r13: 0, r14: 0, r15: 0, // Flags - interrupts disabled rflags: 0x2, // Segment selectors for 64-bit mode cs: 0x10, // Code segment, ring 0 ds: 0x18, // Data segment es: 0x18, fs: 0x18, gs: 0x18, ss: 0x18, // CR registers for 64-bit mode cr0: CR0_PE | CR0_ET | CR0_PG, cr3: 0, // Page table base - set by kernel setup cr4: CR4_PAE, // EFER for long mode efer: EFER_LME | EFER_LMA, } } } /// Control Register 0 bits const CR0_PE: u64 = 1 << 0; // Protection Enable const CR0_ET: u64 = 1 << 4; // Extension Type (387 present) const CR0_PG: u64 = 1 << 31; // Paging Enable /// Control Register 4 bits const CR4_PAE: u64 = 1 << 5; // Physical Address Extension /// EFER (Extended Feature Enable Register) bits const EFER_LME: u64 = 1 << 8; // Long Mode Enable const EFER_LMA: u64 = 1 << 10; // Long Mode Active /// CPU register state for PVH boot #[derive(Debug, Clone, Default)] pub struct PvhRegs { // General purpose registers pub rax: u64, pub rbx: u64, pub rcx: u64, pub rdx: u64, pub rsi: u64, pub rdi: u64, pub rsp: u64, pub rbp: u64, pub r8: u64, pub r9: u64, pub r10: u64, pub r11: u64, pub r12: u64, pub r13: u64, pub r14: u64, pub r15: u64, // Instruction pointer pub rip: u64, // Flags pub rflags: u64, // Segment selectors pub cs: u16, pub ds: u16, pub es: u16, pub fs: u16, pub gs: u16, pub ss: u16, // Control registers pub cr0: u64, pub cr3: u64, pub cr4: u64, // Model-specific registers pub efer: u64, } /// GDT entries for 64-bit mode boot /// /// This provides a minimal GDT for transitioning to 64-bit mode. /// The kernel will set up its own GDT later. pub struct BootGdt; impl BootGdt { /// Null descriptor (required as GDT[0]) pub const NULL: u64 = 0; /// 64-bit code segment (CS) /// Base: 0, Limit: 0xFFFFF (ignored in 64-bit mode) /// Type: Code, Execute/Read, Present, DPL=0 pub const CODE64: u64 = 0x00af_9b00_0000_ffff; /// 64-bit data segment (DS, ES, SS, FS, GS) /// Base: 0, Limit: 0xFFFFF /// Type: Data, Read/Write, Present, DPL=0 pub const DATA64: u64 = 0x00cf_9300_0000_ffff; /// Build GDT table as bytes pub fn as_bytes() -> [u8; 24] { let mut gdt = [0u8; 24]; gdt[0..8].copy_from_slice(&Self::NULL.to_le_bytes()); gdt[8..16].copy_from_slice(&Self::CODE64.to_le_bytes()); gdt[16..24].copy_from_slice(&Self::DATA64.to_le_bytes()); gdt } } #[cfg(test)] mod tests { use super::*; struct MockMemory { size: u64, data: Vec, } impl MockMemory { fn new(size: u64) -> Self { Self { size, data: vec![0; size as usize], } } } impl GuestMemory for MockMemory { fn write_bytes(&mut self, addr: u64, data: &[u8]) -> Result<()> { let end = addr as usize + data.len(); if end > self.data.len() { return Err(BootError::GuestMemoryWrite(format!( "Write at {:#x} exceeds memory size", addr ))); } self.data[addr as usize..end].copy_from_slice(data); Ok(()) } fn size(&self) -> u64 { self.size } } #[test] fn test_e820_entry_size() { // E820 entry must be exactly 20 bytes for Linux kernel compatibility assert_eq!(std::mem::size_of::(), 20); } #[test] fn test_build_e820_map() { let memory_size = 128 * 1024 * 1024; // 128MB let entries = PvhBootSetup::build_e820_map(memory_size).unwrap(); // Should have at least 3 entries assert!(entries.len() >= 3); // First entry should be low memory RAM — copy from packed struct let e0_addr = entries[0].addr; let e0_type = entries[0].entry_type; assert_eq!(e0_addr, 0); assert_eq!(e0_type, E820Type::Ram as u32); // Second entry should be legacy hole (reserved) let e1_addr = entries[1].addr; let e1_type = entries[1].entry_type; assert_eq!(e1_addr, layout::LOW_MEMORY_END); assert_eq!(e1_type, E820Type::Reserved as u32); // Third entry should be high memory RAM let e2_addr = entries[2].addr; let e2_type = entries[2].entry_type; assert_eq!(e2_addr, layout::HIGH_MEMORY_START); assert_eq!(e2_type, E820Type::Ram as u32); } #[test] fn test_start_info_size() { // StartInfo should be reasonable size (under 4KB page) let size = std::mem::size_of::(); assert!(size < 4096); assert!(size >= 48); // Minimum expected fields } #[test] fn test_pvh_setup() { let mut mem = MockMemory::new(128 * 1024 * 1024); let config = PvhConfig { memory_size: 128 * 1024 * 1024, vcpu_count: 2, cmdline_addr: layout::CMDLINE_ADDR, initrd_addr: Some(100 * 1024 * 1024), initrd_size: Some(10 * 1024 * 1024), }; let result = PvhBootSetup::setup(&config, &mut mem); assert!(result.is_ok()); // Verify magic was written to start_info location let magic = u32::from_le_bytes([ mem.data[layout::PVH_START_INFO_ADDR as usize], mem.data[layout::PVH_START_INFO_ADDR as usize + 1], mem.data[layout::PVH_START_INFO_ADDR as usize + 2], mem.data[layout::PVH_START_INFO_ADDR as usize + 3], ]); assert_eq!(magic, XEN_HVM_START_MAGIC); } #[test] fn test_pvh_regs() { let entry_point = 0x100200; let regs = PvhBootSetup::get_initial_regs(entry_point); // Verify entry point assert_eq!(regs.rip, entry_point); // Verify start_info pointer in rbx assert_eq!(regs.rbx, layout::PVH_START_INFO_ADDR); // Verify 64-bit mode flags assert!(regs.cr0 & CR0_PE != 0); // Protection enabled assert!(regs.cr0 & CR0_PG != 0); // Paging enabled assert!(regs.cr4 & CR4_PAE != 0); // PAE enabled assert!(regs.efer & EFER_LME != 0); // Long mode enabled } #[test] fn test_gdt_layout() { let gdt = BootGdt::as_bytes(); assert_eq!(gdt.len(), 24); // 3 entries × 8 bytes // First entry should be null assert_eq!(&gdt[0..8], &[0u8; 8]); } }