Volt VMM (Neutron Stardust): source-available under AGPSL v5.0

KVM-based microVMM for the Volt platform:
- Sub-second VM boot times
- Minimal memory footprint
- Landlock LSM + seccomp security
- Virtio device support
- Custom kernel management

Copyright (c) Armored Gates LLC. All rights reserved.
Licensed under AGPSL v5.0
This commit is contained in:
Karl Clinger
2026-03-21 01:04:35 -05:00
commit 40ed108dd5
143 changed files with 50300 additions and 0 deletions

611
vmm/src/boot/mptable.rs Normal file
View File

@@ -0,0 +1,611 @@
//! Intel MultiProcessor Specification (MPS) Table Construction
//!
//! Implements MP Floating Pointer and MP Configuration Table structures
//! to advertise SMP topology to the guest kernel. This allows Linux to
//! discover and boot Application Processors (APs) beyond the Bootstrap
//! Processor (BSP).
//!
//! # Table Layout (placed at 0x9FC00, just below EBDA)
//!
//! ```text
//! 0x9FC00: MP Floating Pointer Structure (16 bytes)
//! 0x9FC10: MP Configuration Table Header (44 bytes)
//! 0x9FC3C: Processor Entry 0 (BSP, APIC ID 0) — 20 bytes
//! 0x9FC50: Processor Entry 1 (AP, APIC ID 1) — 20 bytes
//! ...
//! Bus Entry (ISA, 8 bytes)
//! I/O APIC Entry (8 bytes)
//! I/O Interrupt Entries (IRQ 0-15, 8 bytes each)
//! ```
//!
//! # References
//! - Intel MultiProcessor Specification v1.4 (May 1997)
//! - Firecracker's mpspec implementation (src/vmm/src/arch/x86_64/mptable.rs)
//! - Linux kernel: arch/x86/kernel/mpparse.c
use super::{BootError, GuestMemory, Result};
/// Base address for MP tables — just below EBDA at 640KB boundary.
/// This address (0x9FC00) is a conventional location that Linux scans.
pub const MP_TABLE_START: u64 = 0x9FC00;
/// Maximum number of vCPUs we can fit in the MP table area.
/// Each processor entry is 20 bytes. Between 0x9FC00 and 0xA0000 we have
/// 1024 bytes. After headers (60 bytes), bus (8), IOAPIC (8), and 16 IRQ
/// entries (128 bytes), we have ~830 bytes = 41 processor entries.
/// That's more than enough — clamp to 255 (max APIC IDs).
pub const MAX_CPUS: u8 = 255;
// ============================================================================
// MP Floating Pointer Structure (16 bytes)
// Intel MPS Table 4-1
// ============================================================================
/// MP Floating Pointer signature: "_MP_"
const MP_FP_SIGNATURE: [u8; 4] = [b'_', b'M', b'P', b'_'];
/// MP Configuration Table signature: "PCMP"
const MP_CT_SIGNATURE: [u8; 4] = [b'P', b'C', b'M', b'P'];
/// MP spec revision 1.4
const MP_SPEC_REVISION: u8 = 4;
/// MP Floating Pointer Feature Byte 1: indicates MP Config Table present
const MP_FEATURE_IMCRP: u8 = 0x80;
// ============================================================================
// MP Table Entry Types
// ============================================================================
const MP_ENTRY_PROCESSOR: u8 = 0;
const MP_ENTRY_BUS: u8 = 1;
const MP_ENTRY_IOAPIC: u8 = 2;
const MP_ENTRY_IO_INTERRUPT: u8 = 3;
#[allow(dead_code)]
const MP_ENTRY_LOCAL_INTERRUPT: u8 = 4;
// Processor entry flags
const CPU_FLAG_ENABLED: u8 = 0x01;
const CPU_FLAG_BSP: u8 = 0x02;
// Interrupt types
const INT_TYPE_INT: u8 = 0; // Vectored interrupt
#[allow(dead_code)]
const INT_TYPE_NMI: u8 = 1;
#[allow(dead_code)]
const INT_TYPE_SMI: u8 = 2;
const INT_TYPE_EXTINT: u8 = 3; // ExtINT (from 8259)
// Interrupt polarity/trigger flags
const INT_FLAG_DEFAULT: u16 = 0x0000; // Conforms to bus spec
// I/O APIC default address
const IOAPIC_DEFAULT_ADDR: u32 = 0xFEC0_0000;
/// ISA bus type string
const BUS_TYPE_ISA: [u8; 6] = [b'I', b'S', b'A', b' ', b' ', b' '];
// ============================================================================
// MP Table Builder
// ============================================================================
/// Write MP tables to guest memory for SMP discovery.
///
/// # Arguments
/// * `guest_mem` — Guest memory to write the tables into
/// * `num_cpus` — Number of vCPUs (1-255)
///
/// # Returns
/// The guest physical address where the MP Floating Pointer was written.
pub fn setup_mptable<M: GuestMemory>(guest_mem: &mut M, num_cpus: u8) -> Result<u64> {
if num_cpus == 0 {
return Err(BootError::MemoryLayout(
"MP table requires at least 1 CPU".to_string(),
));
}
if num_cpus > MAX_CPUS {
return Err(BootError::MemoryLayout(format!(
"MP table supports at most {} CPUs, got {}",
MAX_CPUS, num_cpus
)));
}
// Calculate sizes and offsets
let fp_size: u64 = 16; // MP Floating Pointer
let header_size: u64 = 44; // MP Config Table Header
let processor_entry_size: u64 = 20;
let bus_entry_size: u64 = 8;
let ioapic_entry_size: u64 = 8;
let io_int_entry_size: u64 = 8;
// Number of IO interrupt entries: IRQ 0-15 = 16 entries
let num_irqs: u64 = 16;
let config_table_addr = MP_TABLE_START + fp_size;
let _entries_start = config_table_addr + header_size;
// Calculate total config table size (header + all entries)
let total_entries_size = (num_cpus as u64) * processor_entry_size
+ bus_entry_size
+ ioapic_entry_size
+ num_irqs * io_int_entry_size;
let config_table_size = header_size + total_entries_size;
// Verify we fit in the available space (between 0x9FC00 and 0xA0000)
let total_size = fp_size + config_table_size;
if MP_TABLE_START + total_size > 0xA0000 {
return Err(BootError::MemoryLayout(format!(
"MP tables ({} bytes) exceed available space (0x9FC00-0xA0000)",
total_size
)));
}
// Verify we have enough guest memory
if MP_TABLE_START + total_size > guest_mem.size() {
return Err(BootError::MemoryLayout(format!(
"MP tables at 0x{:x} exceed guest memory size 0x{:x}",
MP_TABLE_START + total_size,
guest_mem.size()
)));
}
// Build the MP Configuration Table body (entries)
let mut table_buf = Vec::with_capacity(config_table_size as usize);
// Leave space for the header (we'll fill it after computing checksum)
table_buf.resize(header_size as usize, 0);
// ---- Processor Entries ----
let mut entry_count: u16 = 0;
for cpu_id in 0..num_cpus {
let flags = if cpu_id == 0 {
CPU_FLAG_ENABLED | CPU_FLAG_BSP
} else {
CPU_FLAG_ENABLED
};
// CPU signature: Family 6, Model 15 (Core 2 / Merom-class)
// This is a safe generic modern x86_64 signature
let cpu_signature: u32 = (6 << 8) | (15 << 4) | 1; // Family=6, Model=F, Stepping=1
let feature_flags: u32 = 0x0781_FBFF; // Common feature flags (FPU, SSE, SSE2, etc.)
write_processor_entry(
&mut table_buf,
cpu_id, // Local APIC ID
0x14, // Local APIC version (integrated APIC)
flags,
cpu_signature,
feature_flags,
);
entry_count += 1;
}
// ---- Bus Entry (ISA) ----
write_bus_entry(&mut table_buf, 0, &BUS_TYPE_ISA);
entry_count += 1;
// ---- I/O APIC Entry ----
// I/O APIC ID = num_cpus (first ID after all processors)
let ioapic_id = num_cpus;
write_ioapic_entry(&mut table_buf, ioapic_id, 0x11, IOAPIC_DEFAULT_ADDR);
entry_count += 1;
// ---- I/O Interrupt Assignment Entries ----
// Map ISA IRQs 0-15 to IOAPIC pins 0-15
// IRQ 0: ExtINT (8259 cascade through IOAPIC pin 0)
write_io_interrupt_entry(
&mut table_buf,
INT_TYPE_EXTINT,
INT_FLAG_DEFAULT,
0, // source bus = ISA
0, // source bus IRQ = 0
ioapic_id,
0, // IOAPIC pin 0
);
entry_count += 1;
// IRQs 1-15: Standard vectored interrupts
for irq in 1..16u8 {
// IRQ 2 is the PIC cascade — skip it (Linux doesn't use it in APIC mode)
// But we still report it for completeness
write_io_interrupt_entry(
&mut table_buf,
INT_TYPE_INT,
INT_FLAG_DEFAULT,
0, // source bus = ISA
irq, // source bus IRQ
ioapic_id,
irq, // IOAPIC pin = same as IRQ number
);
entry_count += 1;
}
// ---- Fill in the Configuration Table Header ----
// Build header at the start of table_buf
{
// Compute length before taking mutable borrow of the header slice
let table_len = table_buf.len() as u16;
let header = &mut table_buf[0..header_size as usize];
// Signature: "PCMP"
header[0..4].copy_from_slice(&MP_CT_SIGNATURE);
// Base table length (u16 LE) — entire config table including header
header[4..6].copy_from_slice(&table_len.to_le_bytes());
// Spec revision
header[6] = MP_SPEC_REVISION;
// Checksum — will be filled below
header[7] = 0;
// OEM ID (8 bytes, space-padded)
header[8..16].copy_from_slice(b"NOVAFLAR");
// Product ID (12 bytes, space-padded)
header[16..28].copy_from_slice(b"VOLT VM");
// OEM table pointer (0 = none)
header[28..32].copy_from_slice(&0u32.to_le_bytes());
// OEM table size
header[32..34].copy_from_slice(&0u16.to_le_bytes());
// Entry count
header[34..36].copy_from_slice(&entry_count.to_le_bytes());
// Local APIC address
header[36..40].copy_from_slice(&0xFEE0_0000u32.to_le_bytes());
// Extended table length
header[40..42].copy_from_slice(&0u16.to_le_bytes());
// Extended table checksum
header[42] = 0;
// Reserved
header[43] = 0;
// Compute and set checksum
let checksum = compute_checksum(&table_buf);
table_buf[7] = checksum;
}
// ---- Build the MP Floating Pointer Structure ----
let mut fp_buf = [0u8; 16];
// Signature: "_MP_"
fp_buf[0..4].copy_from_slice(&MP_FP_SIGNATURE);
// Physical address pointer to MP Config Table (u32 LE)
fp_buf[4..8].copy_from_slice(&(config_table_addr as u32).to_le_bytes());
// Length in 16-byte paragraphs (1 = 16 bytes)
fp_buf[8] = 1;
// Spec revision
fp_buf[9] = MP_SPEC_REVISION;
// Checksum — filled below
fp_buf[10] = 0;
// Feature byte 1: 0 = MP Config Table present (not default config)
fp_buf[11] = 0;
// Feature byte 2: bit 7 = IMCR present (PIC mode available)
fp_buf[12] = MP_FEATURE_IMCRP;
// Feature bytes 3-5: reserved
fp_buf[13] = 0;
fp_buf[14] = 0;
fp_buf[15] = 0;
// Compute floating pointer checksum
let fp_checksum = compute_checksum(&fp_buf);
fp_buf[10] = fp_checksum;
// ---- Write everything to guest memory ----
guest_mem.write_bytes(MP_TABLE_START, &fp_buf)?;
guest_mem.write_bytes(config_table_addr, &table_buf)?;
tracing::info!(
"MP table written at 0x{:x}: {} CPUs, {} entries, {} bytes total\n\
Layout: FP=0x{:x}, Config=0x{:x}, IOAPIC ID={}, IOAPIC addr=0x{:x}",
MP_TABLE_START,
num_cpus,
entry_count,
total_size,
MP_TABLE_START,
config_table_addr,
ioapic_id,
IOAPIC_DEFAULT_ADDR,
);
Ok(MP_TABLE_START)
}
/// Write a Processor Entry (20 bytes) to the table buffer.
///
/// Format (Intel MPS Table 4-4):
/// ```text
/// Offset Size Field
/// 0 1 Entry type (0 = processor)
/// 1 1 Local APIC ID
/// 2 1 Local APIC version
/// 3 1 CPU flags (bit 0=EN, bit 1=BP)
/// 4 4 CPU signature (stepping, model, family)
/// 8 4 Feature flags (from CPUID leaf 1 EDX)
/// 12 8 Reserved
/// ```
fn write_processor_entry(
buf: &mut Vec<u8>,
apic_id: u8,
apic_version: u8,
flags: u8,
cpu_signature: u32,
feature_flags: u32,
) {
buf.push(MP_ENTRY_PROCESSOR); // Entry type
buf.push(apic_id); // Local APIC ID
buf.push(apic_version); // Local APIC version
buf.push(flags); // CPU flags
buf.extend_from_slice(&cpu_signature.to_le_bytes()); // CPU signature
buf.extend_from_slice(&feature_flags.to_le_bytes()); // Feature flags
buf.extend_from_slice(&[0u8; 8]); // Reserved
}
/// Write a Bus Entry (8 bytes) to the table buffer.
///
/// Format (Intel MPS Table 4-5):
/// ```text
/// Offset Size Field
/// 0 1 Entry type (1 = bus)
/// 1 1 Bus ID
/// 2 6 Bus type string (space-padded)
/// ```
fn write_bus_entry(buf: &mut Vec<u8>, bus_id: u8, bus_type: &[u8; 6]) {
buf.push(MP_ENTRY_BUS);
buf.push(bus_id);
buf.extend_from_slice(bus_type);
}
/// Write an I/O APIC Entry (8 bytes) to the table buffer.
///
/// Format (Intel MPS Table 4-6):
/// ```text
/// Offset Size Field
/// 0 1 Entry type (2 = I/O APIC)
/// 1 1 I/O APIC ID
/// 2 1 I/O APIC version
/// 3 1 I/O APIC flags (bit 0 = EN)
/// 4 4 I/O APIC address
/// ```
fn write_ioapic_entry(buf: &mut Vec<u8>, id: u8, version: u8, addr: u32) {
buf.push(MP_ENTRY_IOAPIC);
buf.push(id);
buf.push(version);
buf.push(0x01); // flags: enabled
buf.extend_from_slice(&addr.to_le_bytes());
}
/// Write an I/O Interrupt Assignment Entry (8 bytes) to the table buffer.
///
/// Format (Intel MPS Table 4-7):
/// ```text
/// Offset Size Field
/// 0 1 Entry type (3 = I/O interrupt)
/// 1 1 Interrupt type (0=INT, 1=NMI, 2=SMI, 3=ExtINT)
/// 2 2 Flags (polarity/trigger)
/// 4 1 Source bus ID
/// 5 1 Source bus IRQ
/// 6 1 Destination I/O APIC ID
/// 7 1 Destination I/O APIC pin (INTIN#)
/// ```
fn write_io_interrupt_entry(
buf: &mut Vec<u8>,
int_type: u8,
flags: u16,
src_bus_id: u8,
src_bus_irq: u8,
dst_ioapic_id: u8,
dst_ioapic_pin: u8,
) {
buf.push(MP_ENTRY_IO_INTERRUPT);
buf.push(int_type);
buf.extend_from_slice(&flags.to_le_bytes());
buf.push(src_bus_id);
buf.push(src_bus_irq);
buf.push(dst_ioapic_id);
buf.push(dst_ioapic_pin);
}
/// Compute the two's-complement checksum for an MP structure.
/// The sum of all bytes in the structure must be 0 (mod 256).
fn compute_checksum(data: &[u8]) -> u8 {
let sum: u8 = data.iter().fold(0u8, |acc, &b| acc.wrapping_add(b));
(!sum).wrapping_add(1) // Two's complement = negate
}
// ============================================================================
// Tests
// ============================================================================
#[cfg(test)]
mod tests {
use super::*;
struct MockMemory {
size: u64,
data: Vec<u8>,
}
impl MockMemory {
fn new(size: u64) -> Self {
Self {
size,
data: vec![0; size as usize],
}
}
fn read_bytes(&self, addr: u64, len: usize) -> &[u8] {
&self.data[addr as usize..(addr as usize + len)]
}
}
impl GuestMemory for MockMemory {
fn write_bytes(&mut self, addr: u64, data: &[u8]) -> Result<()> {
let end = addr as usize + data.len();
if end > self.data.len() {
return Err(BootError::GuestMemoryWrite(format!(
"Write at {:#x} exceeds memory",
addr
)));
}
self.data[addr as usize..end].copy_from_slice(data);
Ok(())
}
fn size(&self) -> u64 {
self.size
}
}
#[test]
fn test_checksum() {
// A buffer with known checksum byte should sum to 0
let data = vec![1, 2, 3, 4];
let cs = compute_checksum(&data);
let total: u8 = data.iter().chain(std::iter::once(&cs)).fold(0u8, |a, b| a.wrapping_add(*b));
// With the checksum byte replacing the original slot, the sum should be 0
let mut with_cs = data.clone();
with_cs.push(0); // placeholder
// Actually the checksum replaces index 10 in the FP or 7 in the config header,
// but let's verify the math differently:
let sum_without: u8 = data.iter().fold(0u8, |a, b| a.wrapping_add(*b));
assert_eq!(sum_without.wrapping_add(cs), 0);
}
#[test]
fn test_mp_floating_pointer_signature() {
let mut mem = MockMemory::new(1024 * 1024);
let result = setup_mptable(&mut mem, 1);
assert!(result.is_ok());
let fp_addr = result.unwrap() as usize;
assert_eq!(&mem.data[fp_addr..fp_addr + 4], b"_MP_");
}
#[test]
fn test_mp_floating_pointer_checksum() {
let mut mem = MockMemory::new(1024 * 1024);
setup_mptable(&mut mem, 2).unwrap();
// MP Floating Pointer is 16 bytes at MP_TABLE_START
let fp = mem.read_bytes(MP_TABLE_START, 16);
let sum: u8 = fp.iter().fold(0u8, |a, &b| a.wrapping_add(b));
assert_eq!(sum, 0, "MP Floating Pointer checksum mismatch");
}
#[test]
fn test_mp_config_table_checksum() {
let mut mem = MockMemory::new(1024 * 1024);
setup_mptable(&mut mem, 2).unwrap();
// Config table starts at MP_TABLE_START + 16
let config_addr = (MP_TABLE_START + 16) as usize;
// Read table length from header bytes 4-5
let table_len = u16::from_le_bytes([
mem.data[config_addr + 4],
mem.data[config_addr + 5],
]) as usize;
let table = &mem.data[config_addr..config_addr + table_len];
let sum: u8 = table.iter().fold(0u8, |a, &b| a.wrapping_add(b));
assert_eq!(sum, 0, "MP Config Table checksum mismatch");
}
#[test]
fn test_mp_config_table_signature() {
let mut mem = MockMemory::new(1024 * 1024);
setup_mptable(&mut mem, 1).unwrap();
let config_addr = (MP_TABLE_START + 16) as usize;
assert_eq!(&mem.data[config_addr..config_addr + 4], b"PCMP");
}
#[test]
fn test_mp_table_1_cpu() {
let mut mem = MockMemory::new(1024 * 1024);
setup_mptable(&mut mem, 1).unwrap();
let config_addr = (MP_TABLE_START + 16) as usize;
// Entry count at offset 34 in header
let entry_count = u16::from_le_bytes([
mem.data[config_addr + 34],
mem.data[config_addr + 35],
]);
// 1 CPU + 1 bus + 1 IOAPIC + 16 IRQs = 19 entries
assert_eq!(entry_count, 19);
}
#[test]
fn test_mp_table_4_cpus() {
let mut mem = MockMemory::new(1024 * 1024);
setup_mptable(&mut mem, 4).unwrap();
let config_addr = (MP_TABLE_START + 16) as usize;
let entry_count = u16::from_le_bytes([
mem.data[config_addr + 34],
mem.data[config_addr + 35],
]);
// 4 CPUs + 1 bus + 1 IOAPIC + 16 IRQs = 22 entries
assert_eq!(entry_count, 22);
}
#[test]
fn test_mp_table_bsp_flag() {
let mut mem = MockMemory::new(1024 * 1024);
setup_mptable(&mut mem, 4).unwrap();
// First processor entry starts at config_addr + 44 (header size)
let proc0_offset = (MP_TABLE_START + 16 + 44) as usize;
assert_eq!(mem.data[proc0_offset], 0); // Entry type = processor
assert_eq!(mem.data[proc0_offset + 1], 0); // APIC ID = 0
assert_eq!(mem.data[proc0_offset + 3], CPU_FLAG_ENABLED | CPU_FLAG_BSP); // BSP + EN
// Second processor
let proc1_offset = proc0_offset + 20;
assert_eq!(mem.data[proc1_offset + 1], 1); // APIC ID = 1
assert_eq!(mem.data[proc1_offset + 3], CPU_FLAG_ENABLED); // EN only (no BSP)
}
#[test]
fn test_mp_table_ioapic() {
let mut mem = MockMemory::new(1024 * 1024);
let num_cpus: u8 = 2;
setup_mptable(&mut mem, num_cpus).unwrap();
// IOAPIC entry follows: processors (2*20) + bus (8) = 48 bytes after entries start
let entries_start = (MP_TABLE_START + 16 + 44) as usize;
let ioapic_offset = entries_start + (num_cpus as usize * 20) + 8;
assert_eq!(mem.data[ioapic_offset], MP_ENTRY_IOAPIC); // Entry type
assert_eq!(mem.data[ioapic_offset + 1], num_cpus); // IOAPIC ID = num_cpus
assert_eq!(mem.data[ioapic_offset + 3], 0x01); // Enabled
// IOAPIC address
let addr = u32::from_le_bytes([
mem.data[ioapic_offset + 4],
mem.data[ioapic_offset + 5],
mem.data[ioapic_offset + 6],
mem.data[ioapic_offset + 7],
]);
assert_eq!(addr, IOAPIC_DEFAULT_ADDR);
}
#[test]
fn test_mp_table_zero_cpus_error() {
let mut mem = MockMemory::new(1024 * 1024);
let result = setup_mptable(&mut mem, 0);
assert!(result.is_err());
}
#[test]
fn test_mp_table_local_apic_addr() {
let mut mem = MockMemory::new(1024 * 1024);
setup_mptable(&mut mem, 2).unwrap();
let config_addr = (MP_TABLE_START + 16) as usize;
// Local APIC address at offset 36 in header
let lapic_addr = u32::from_le_bytes([
mem.data[config_addr + 36],
mem.data[config_addr + 37],
mem.data[config_addr + 38],
mem.data[config_addr + 39],
]);
assert_eq!(lapic_addr, 0xFEE0_0000);
}
}