KVM-based microVMM for the Volt platform: - Sub-second VM boot times - Minimal memory footprint - Landlock LSM + seccomp security - Virtio device support - Custom kernel management Copyright (c) Armored Gates LLC. All rights reserved. Licensed under AGPSL v5.0
589 lines
18 KiB
Rust
589 lines
18 KiB
Rust
//! CAS-backed Volume Builder
|
|
//!
|
|
//! Creates TinyVol volumes from directory trees or existing images,
|
|
//! storing data in Nebula's content-addressed store for deduplication.
|
|
//!
|
|
//! # Usage
|
|
//!
|
|
//! ```ignore
|
|
//! // Build from a directory tree
|
|
//! stellarium cas-build --from-dir /path/to/rootfs --store /tmp/cas --output /tmp/vol
|
|
//!
|
|
//! // Build from an existing ext4 image
|
|
//! stellarium cas-build --from-image rootfs.ext4 --store /tmp/cas --output /tmp/vol
|
|
//!
|
|
//! // Clone an existing volume (instant, O(1))
|
|
//! stellarium cas-clone --source /tmp/vol --output /tmp/vol-clone
|
|
//!
|
|
//! // Show volume info
|
|
//! stellarium cas-info /tmp/vol
|
|
//! ```
|
|
|
|
use anyhow::{Context, Result, bail};
|
|
use std::fs::{self, File};
|
|
use std::io::{Read, Write};
|
|
use std::path::Path;
|
|
use std::process::Command;
|
|
|
|
use crate::nebula::store::{ContentStore, StoreConfig};
|
|
use crate::tinyvol::{Volume, VolumeConfig};
|
|
|
|
/// Build a CAS-backed TinyVol volume from a directory tree.
|
|
///
|
|
/// This:
|
|
/// 1. Creates a temporary ext4 image from the directory
|
|
/// 2. Chunks the ext4 image into CAS
|
|
/// 3. Creates a TinyVol volume with the data as base
|
|
///
|
|
/// The resulting volume can be used directly by Volt's virtio-blk.
|
|
pub fn build_from_dir(
|
|
source_dir: &Path,
|
|
store_path: &Path,
|
|
output_path: &Path,
|
|
size_mb: u64,
|
|
block_size: u32,
|
|
) -> Result<BuildResult> {
|
|
if !source_dir.exists() {
|
|
bail!("Source directory not found: {}", source_dir.display());
|
|
}
|
|
|
|
tracing::info!(
|
|
source = %source_dir.display(),
|
|
store = %store_path.display(),
|
|
output = %output_path.display(),
|
|
size_mb = size_mb,
|
|
"Building CAS-backed volume from directory"
|
|
);
|
|
|
|
// Step 1: Create temporary ext4 image
|
|
let tempdir = tempfile::tempdir().context("Failed to create temp directory")?;
|
|
let ext4_path = tempdir.path().join("rootfs.ext4");
|
|
|
|
create_ext4_from_dir(source_dir, &ext4_path, size_mb)?;
|
|
|
|
// Step 2: Build from the ext4 image
|
|
let result = build_from_image(&ext4_path, store_path, output_path, block_size)?;
|
|
|
|
tracing::info!(
|
|
chunks = result.chunks_stored,
|
|
dedup_chunks = result.dedup_chunks,
|
|
raw_size = result.raw_size,
|
|
stored_size = result.stored_size,
|
|
"Volume built from directory"
|
|
);
|
|
|
|
Ok(result)
|
|
}
|
|
|
|
/// Build a CAS-backed TinyVol volume from an existing ext4/raw image.
|
|
///
|
|
/// This:
|
|
/// 1. Opens the image file
|
|
/// 2. Reads it in block_size chunks
|
|
/// 3. Stores each chunk in the Nebula ContentStore (dedup'd)
|
|
/// 4. Creates a TinyVol volume backed by the image
|
|
pub fn build_from_image(
|
|
image_path: &Path,
|
|
store_path: &Path,
|
|
output_path: &Path,
|
|
block_size: u32,
|
|
) -> Result<BuildResult> {
|
|
if !image_path.exists() {
|
|
bail!("Image not found: {}", image_path.display());
|
|
}
|
|
|
|
let image_size = fs::metadata(image_path)?.len();
|
|
tracing::info!(
|
|
image = %image_path.display(),
|
|
image_size = image_size,
|
|
block_size = block_size,
|
|
"Importing image into CAS"
|
|
);
|
|
|
|
// Open/create the content store
|
|
let store_config = StoreConfig {
|
|
path: store_path.to_path_buf(),
|
|
..Default::default()
|
|
};
|
|
let store = ContentStore::open(store_config)
|
|
.context("Failed to open content store")?;
|
|
|
|
let _initial_chunks = store.chunk_count();
|
|
let initial_bytes = store.total_bytes();
|
|
|
|
// Read the image in block-sized chunks and store in CAS
|
|
let mut image_file = File::open(image_path)?;
|
|
let mut buf = vec![0u8; block_size as usize];
|
|
let total_blocks = (image_size + block_size as u64 - 1) / block_size as u64;
|
|
let mut chunks_stored = 0u64;
|
|
let mut dedup_chunks = 0u64;
|
|
|
|
for block_idx in 0..total_blocks {
|
|
let bytes_remaining = image_size - (block_idx * block_size as u64);
|
|
let to_read = (bytes_remaining as usize).min(block_size as usize);
|
|
|
|
buf.fill(0); // Zero-fill in case of partial read
|
|
image_file.read_exact(&mut buf[..to_read]).with_context(|| {
|
|
format!("Failed to read block {} from image", block_idx)
|
|
})?;
|
|
|
|
// Check if it's a zero block (skip storage)
|
|
if buf.iter().all(|&b| b == 0) {
|
|
continue;
|
|
}
|
|
|
|
let prev_count = store.chunk_count();
|
|
store.insert(&buf)?;
|
|
let new_count = store.chunk_count();
|
|
|
|
if new_count == prev_count {
|
|
dedup_chunks += 1;
|
|
}
|
|
chunks_stored += 1;
|
|
|
|
if block_idx % 1000 == 0 && block_idx > 0 {
|
|
tracing::debug!(
|
|
"Progress: block {}/{} ({:.1}%)",
|
|
block_idx, total_blocks,
|
|
(block_idx as f64 / total_blocks as f64) * 100.0
|
|
);
|
|
}
|
|
}
|
|
|
|
store.flush()?;
|
|
|
|
let final_chunks = store.chunk_count();
|
|
let final_bytes = store.total_bytes();
|
|
|
|
tracing::info!(
|
|
total_blocks = total_blocks,
|
|
non_zero_blocks = chunks_stored,
|
|
dedup_chunks = dedup_chunks,
|
|
store_chunks = final_chunks,
|
|
store_bytes = final_bytes,
|
|
"Image imported into CAS"
|
|
);
|
|
|
|
// Step 3: Create TinyVol volume backed by the image
|
|
// The volume uses the original image as its base and has an empty delta
|
|
let config = VolumeConfig::new(image_size).with_block_size(block_size);
|
|
let volume = Volume::create(output_path, config)
|
|
.context("Failed to create TinyVol volume")?;
|
|
|
|
// Copy the image file as the base for the volume
|
|
let base_path = output_path.join("base.img");
|
|
fs::copy(image_path, &base_path)?;
|
|
|
|
volume.flush().map_err(|e| anyhow::anyhow!("Failed to flush volume: {}", e))?;
|
|
|
|
tracing::info!(
|
|
volume = %output_path.display(),
|
|
virtual_size = image_size,
|
|
"TinyVol volume created"
|
|
);
|
|
|
|
Ok(BuildResult {
|
|
volume_path: output_path.to_path_buf(),
|
|
store_path: store_path.to_path_buf(),
|
|
base_image_path: Some(base_path),
|
|
raw_size: image_size,
|
|
stored_size: final_bytes - initial_bytes,
|
|
chunks_stored,
|
|
dedup_chunks,
|
|
total_blocks,
|
|
block_size,
|
|
})
|
|
}
|
|
|
|
/// Create an ext4 filesystem image from a directory tree.
|
|
///
|
|
/// Uses mkfs.ext4 and a loop mount to populate the image.
|
|
fn create_ext4_from_dir(source_dir: &Path, output: &Path, size_mb: u64) -> Result<()> {
|
|
tracing::info!(
|
|
source = %source_dir.display(),
|
|
output = %output.display(),
|
|
size_mb = size_mb,
|
|
"Creating ext4 image from directory"
|
|
);
|
|
|
|
// Create sparse file
|
|
let status = Command::new("dd")
|
|
.args([
|
|
"if=/dev/zero",
|
|
&format!("of={}", output.display()),
|
|
"bs=1M",
|
|
&format!("count=0"),
|
|
&format!("seek={}", size_mb),
|
|
])
|
|
.stdout(std::process::Stdio::null())
|
|
.stderr(std::process::Stdio::null())
|
|
.status()
|
|
.context("Failed to create image file with dd")?;
|
|
|
|
if !status.success() {
|
|
bail!("dd failed to create image file");
|
|
}
|
|
|
|
// Format as ext4
|
|
let status = Command::new("mkfs.ext4")
|
|
.args([
|
|
"-F",
|
|
"-q",
|
|
"-L", "rootfs",
|
|
"-O", "^huge_file,^metadata_csum",
|
|
"-b", "4096",
|
|
&output.display().to_string(),
|
|
])
|
|
.stdout(std::process::Stdio::null())
|
|
.stderr(std::process::Stdio::null())
|
|
.status()
|
|
.context("Failed to format image as ext4")?;
|
|
|
|
if !status.success() {
|
|
bail!("mkfs.ext4 failed");
|
|
}
|
|
|
|
// Mount and copy files
|
|
let mount_dir = tempfile::tempdir().context("Failed to create mount directory")?;
|
|
let mount_path = mount_dir.path();
|
|
|
|
// Try to mount (requires root/sudo or fuse2fs)
|
|
let mount_result = try_mount_and_copy(output, mount_path, source_dir);
|
|
|
|
match mount_result {
|
|
Ok(()) => {
|
|
tracing::info!("Files copied to ext4 image successfully");
|
|
}
|
|
Err(e) => {
|
|
// Fall back to e2cp (if available) or debugfs
|
|
tracing::warn!("Mount failed ({}), trying e2cp fallback...", e);
|
|
copy_with_debugfs(output, source_dir)?;
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Try to mount the image and copy files (requires privileges or fuse)
|
|
fn try_mount_and_copy(image: &Path, mount_point: &Path, source: &Path) -> Result<()> {
|
|
// Try fuse2fs first (doesn't require root)
|
|
let status = Command::new("fuse2fs")
|
|
.args([
|
|
&image.display().to_string(),
|
|
&mount_point.display().to_string(),
|
|
"-o", "rw",
|
|
])
|
|
.status();
|
|
|
|
let use_fuse = match status {
|
|
Ok(s) if s.success() => true,
|
|
_ => {
|
|
// Try mount with sudo
|
|
let status = Command::new("sudo")
|
|
.args([
|
|
"mount", "-o", "loop",
|
|
&image.display().to_string(),
|
|
&mount_point.display().to_string(),
|
|
])
|
|
.status()
|
|
.context("Neither fuse2fs nor sudo mount available")?;
|
|
|
|
if !status.success() {
|
|
bail!("Failed to mount image");
|
|
}
|
|
false
|
|
}
|
|
};
|
|
|
|
// Copy files
|
|
let copy_result = Command::new("cp")
|
|
.args(["-a", &format!("{}/.)", source.display()), &mount_point.display().to_string()])
|
|
.status();
|
|
|
|
// Also try rsync as fallback
|
|
let copy_ok = match copy_result {
|
|
Ok(s) if s.success() => true,
|
|
_ => {
|
|
let status = Command::new("rsync")
|
|
.args(["-a", &format!("{}/", source.display()), &format!("{}/", mount_point.display())])
|
|
.status()
|
|
.unwrap_or_else(|_| std::process::ExitStatus::default());
|
|
status.success()
|
|
}
|
|
};
|
|
|
|
// Unmount
|
|
if use_fuse {
|
|
let _ = Command::new("fusermount")
|
|
.args(["-u", &mount_point.display().to_string()])
|
|
.status();
|
|
} else {
|
|
let _ = Command::new("sudo")
|
|
.args(["umount", &mount_point.display().to_string()])
|
|
.status();
|
|
}
|
|
|
|
if !copy_ok {
|
|
bail!("Failed to copy files to image");
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Copy files using debugfs (doesn't require root)
|
|
fn copy_with_debugfs(image: &Path, source: &Path) -> Result<()> {
|
|
// Walk source directory and write files using debugfs
|
|
let mut cmds = String::new();
|
|
|
|
for entry in walkdir::WalkDir::new(source)
|
|
.min_depth(1)
|
|
.into_iter()
|
|
.filter_map(|e| e.ok())
|
|
{
|
|
let rel_path = entry.path().strip_prefix(source)
|
|
.unwrap_or(entry.path());
|
|
|
|
let guest_path = format!("/{}", rel_path.display());
|
|
|
|
if entry.file_type().is_dir() {
|
|
cmds.push_str(&format!("mkdir {}\n", guest_path));
|
|
} else if entry.file_type().is_file() {
|
|
cmds.push_str(&format!("write {} {}\n", entry.path().display(), guest_path));
|
|
}
|
|
}
|
|
|
|
if cmds.is_empty() {
|
|
return Ok(());
|
|
}
|
|
|
|
let mut child = Command::new("debugfs")
|
|
.args(["-w", &image.display().to_string()])
|
|
.stdin(std::process::Stdio::piped())
|
|
.stdout(std::process::Stdio::null())
|
|
.stderr(std::process::Stdio::null())
|
|
.spawn()
|
|
.context("debugfs not available")?;
|
|
|
|
child.stdin.as_mut().unwrap().write_all(cmds.as_bytes())?;
|
|
let status = child.wait()?;
|
|
|
|
if !status.success() {
|
|
bail!("debugfs failed to copy files");
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Clone a TinyVol volume (instant, O(1) manifest copy)
|
|
pub fn clone_volume(source: &Path, output: &Path) -> Result<CloneResult> {
|
|
tracing::info!(
|
|
source = %source.display(),
|
|
output = %output.display(),
|
|
"Cloning volume"
|
|
);
|
|
|
|
let volume = Volume::open(source)
|
|
.map_err(|e| anyhow::anyhow!("Failed to open source volume: {}", e))?;
|
|
|
|
let stats_before = volume.stats();
|
|
|
|
let _cloned = volume.clone_to(output)
|
|
.map_err(|e| anyhow::anyhow!("Failed to clone volume: {}", e))?;
|
|
|
|
// Copy the base image link if present
|
|
let base_path = source.join("base.img");
|
|
if base_path.exists() {
|
|
let dest_base = output.join("base.img");
|
|
// Create a hard link (shares data) or symlink
|
|
if fs::hard_link(&base_path, &dest_base).is_err() {
|
|
// Fall back to symlink
|
|
let canonical = base_path.canonicalize()?;
|
|
std::os::unix::fs::symlink(&canonical, &dest_base)?;
|
|
}
|
|
}
|
|
|
|
tracing::info!(
|
|
output = %output.display(),
|
|
virtual_size = stats_before.virtual_size,
|
|
"Volume cloned (instant)"
|
|
);
|
|
|
|
Ok(CloneResult {
|
|
source_path: source.to_path_buf(),
|
|
clone_path: output.to_path_buf(),
|
|
virtual_size: stats_before.virtual_size,
|
|
})
|
|
}
|
|
|
|
/// Show information about a TinyVol volume and its CAS store
|
|
pub fn show_volume_info(volume_path: &Path, store_path: Option<&Path>) -> Result<()> {
|
|
let volume = Volume::open(volume_path)
|
|
.map_err(|e| anyhow::anyhow!("Failed to open volume: {}", e))?;
|
|
|
|
let stats = volume.stats();
|
|
|
|
println!("Volume: {}", volume_path.display());
|
|
println!(" Virtual size: {} ({} bytes)", format_bytes(stats.virtual_size), stats.virtual_size);
|
|
println!(" Block size: {} ({} bytes)", format_bytes(stats.block_size as u64), stats.block_size);
|
|
println!(" Block count: {}", stats.block_count);
|
|
println!(" Modified blocks: {}", stats.modified_blocks);
|
|
println!(" Manifest size: {} bytes", stats.manifest_size);
|
|
println!(" Delta size: {}", format_bytes(stats.delta_size));
|
|
println!(" Efficiency: {:.6} (actual/virtual)", stats.efficiency());
|
|
|
|
let base_path = volume_path.join("base.img");
|
|
if base_path.exists() {
|
|
let base_size = fs::metadata(&base_path)?.len();
|
|
println!(" Base image: {} ({})", base_path.display(), format_bytes(base_size));
|
|
}
|
|
|
|
// Show CAS store info if path provided
|
|
if let Some(store_path) = store_path {
|
|
if store_path.exists() {
|
|
let store_config = StoreConfig {
|
|
path: store_path.to_path_buf(),
|
|
..Default::default()
|
|
};
|
|
if let Ok(store) = ContentStore::open(store_config) {
|
|
let store_stats = store.stats();
|
|
println!();
|
|
println!("CAS Store: {}", store_path.display());
|
|
println!(" Total chunks: {}", store_stats.total_chunks);
|
|
println!(" Total bytes: {}", format_bytes(store_stats.total_bytes));
|
|
println!(" Duplicates found: {}", store_stats.duplicates_found);
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Format bytes as human-readable string
|
|
fn format_bytes(bytes: u64) -> String {
|
|
if bytes >= 1024 * 1024 * 1024 {
|
|
format!("{:.2} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
|
|
} else if bytes >= 1024 * 1024 {
|
|
format!("{:.2} MB", bytes as f64 / (1024.0 * 1024.0))
|
|
} else if bytes >= 1024 {
|
|
format!("{:.2} KB", bytes as f64 / 1024.0)
|
|
} else {
|
|
format!("{} bytes", bytes)
|
|
}
|
|
}
|
|
|
|
/// Result of a volume build operation
|
|
#[derive(Debug)]
|
|
pub struct BuildResult {
|
|
/// Path to the created volume
|
|
pub volume_path: std::path::PathBuf,
|
|
/// Path to the CAS store
|
|
pub store_path: std::path::PathBuf,
|
|
/// Path to the base image (if created)
|
|
pub base_image_path: Option<std::path::PathBuf>,
|
|
/// Raw image size
|
|
pub raw_size: u64,
|
|
/// Size stored in CAS (after dedup)
|
|
pub stored_size: u64,
|
|
/// Number of non-zero chunks stored
|
|
pub chunks_stored: u64,
|
|
/// Number of chunks deduplicated
|
|
pub dedup_chunks: u64,
|
|
/// Total blocks in image
|
|
pub total_blocks: u64,
|
|
/// Block size used
|
|
pub block_size: u32,
|
|
}
|
|
|
|
impl BuildResult {
|
|
/// Calculate deduplication ratio
|
|
pub fn dedup_ratio(&self) -> f64 {
|
|
if self.chunks_stored == 0 {
|
|
return 1.0;
|
|
}
|
|
self.dedup_chunks as f64 / self.chunks_stored as f64
|
|
}
|
|
|
|
/// Calculate space savings
|
|
pub fn savings(&self) -> f64 {
|
|
if self.raw_size == 0 {
|
|
return 0.0;
|
|
}
|
|
1.0 - (self.stored_size as f64 / self.raw_size as f64)
|
|
}
|
|
}
|
|
|
|
/// Result of a volume clone operation
|
|
#[derive(Debug)]
|
|
pub struct CloneResult {
|
|
/// Source volume path
|
|
pub source_path: std::path::PathBuf,
|
|
/// Clone path
|
|
pub clone_path: std::path::PathBuf,
|
|
/// Virtual size
|
|
pub virtual_size: u64,
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use tempfile::tempdir;
|
|
|
|
#[test]
|
|
fn test_format_bytes() {
|
|
assert_eq!(format_bytes(100), "100 bytes");
|
|
assert_eq!(format_bytes(1536), "1.50 KB");
|
|
assert_eq!(format_bytes(2 * 1024 * 1024), "2.00 MB");
|
|
assert_eq!(format_bytes(3 * 1024 * 1024 * 1024), "3.00 GB");
|
|
}
|
|
|
|
#[test]
|
|
fn test_build_from_image() {
|
|
let dir = tempdir().unwrap();
|
|
let image_path = dir.path().join("test.img");
|
|
let store_path = dir.path().join("cas-store");
|
|
let volume_path = dir.path().join("volume");
|
|
|
|
// Create a small test image (just raw data, not a real ext4)
|
|
let mut img = File::create(&image_path).unwrap();
|
|
let data = vec![0x42u8; 64 * 1024]; // 64KB of data
|
|
img.write_all(&data).unwrap();
|
|
// Add some zeros to test sparse detection
|
|
let zeros = vec![0u8; 64 * 1024];
|
|
img.write_all(&zeros).unwrap();
|
|
img.flush().unwrap();
|
|
drop(img);
|
|
|
|
let result = build_from_image(
|
|
&image_path,
|
|
&store_path,
|
|
&volume_path,
|
|
4096, // 4KB blocks
|
|
).unwrap();
|
|
|
|
assert!(result.volume_path.exists());
|
|
assert_eq!(result.raw_size, 128 * 1024);
|
|
assert!(result.chunks_stored > 0);
|
|
// Zero blocks should be skipped
|
|
assert!(result.total_blocks > result.chunks_stored);
|
|
}
|
|
|
|
#[test]
|
|
fn test_clone_volume() {
|
|
let dir = tempdir().unwrap();
|
|
let vol_path = dir.path().join("original");
|
|
let clone_path = dir.path().join("clone");
|
|
|
|
// Create a volume
|
|
let config = VolumeConfig::new(1024 * 1024).with_block_size(4096);
|
|
let volume = Volume::create(&vol_path, config).unwrap();
|
|
volume.write_block(0, &vec![0x11; 4096]).unwrap();
|
|
volume.flush().unwrap();
|
|
drop(volume);
|
|
|
|
// Clone it
|
|
let result = clone_volume(&vol_path, &clone_path).unwrap();
|
|
assert!(result.clone_path.exists());
|
|
assert!(clone_path.join("manifest.tvol").exists());
|
|
}
|
|
}
|