//! CAS-backed Volume Builder //! //! Creates TinyVol volumes from directory trees or existing images, //! storing data in Nebula's content-addressed store for deduplication. //! //! # Usage //! //! ```ignore //! // Build from a directory tree //! stellarium cas-build --from-dir /path/to/rootfs --store /tmp/cas --output /tmp/vol //! //! // Build from an existing ext4 image //! stellarium cas-build --from-image rootfs.ext4 --store /tmp/cas --output /tmp/vol //! //! // Clone an existing volume (instant, O(1)) //! stellarium cas-clone --source /tmp/vol --output /tmp/vol-clone //! //! // Show volume info //! stellarium cas-info /tmp/vol //! ``` use anyhow::{Context, Result, bail}; use std::fs::{self, File}; use std::io::{Read, Write}; use std::path::Path; use std::process::Command; use crate::nebula::store::{ContentStore, StoreConfig}; use crate::tinyvol::{Volume, VolumeConfig}; /// Build a CAS-backed TinyVol volume from a directory tree. /// /// This: /// 1. Creates a temporary ext4 image from the directory /// 2. Chunks the ext4 image into CAS /// 3. Creates a TinyVol volume with the data as base /// /// The resulting volume can be used directly by Volt's virtio-blk. pub fn build_from_dir( source_dir: &Path, store_path: &Path, output_path: &Path, size_mb: u64, block_size: u32, ) -> Result { if !source_dir.exists() { bail!("Source directory not found: {}", source_dir.display()); } tracing::info!( source = %source_dir.display(), store = %store_path.display(), output = %output_path.display(), size_mb = size_mb, "Building CAS-backed volume from directory" ); // Step 1: Create temporary ext4 image let tempdir = tempfile::tempdir().context("Failed to create temp directory")?; let ext4_path = tempdir.path().join("rootfs.ext4"); create_ext4_from_dir(source_dir, &ext4_path, size_mb)?; // Step 2: Build from the ext4 image let result = build_from_image(&ext4_path, store_path, output_path, block_size)?; tracing::info!( chunks = result.chunks_stored, dedup_chunks = result.dedup_chunks, raw_size = result.raw_size, stored_size = result.stored_size, "Volume built from directory" ); Ok(result) } /// Build a CAS-backed TinyVol volume from an existing ext4/raw image. /// /// This: /// 1. Opens the image file /// 2. Reads it in block_size chunks /// 3. Stores each chunk in the Nebula ContentStore (dedup'd) /// 4. Creates a TinyVol volume backed by the image pub fn build_from_image( image_path: &Path, store_path: &Path, output_path: &Path, block_size: u32, ) -> Result { if !image_path.exists() { bail!("Image not found: {}", image_path.display()); } let image_size = fs::metadata(image_path)?.len(); tracing::info!( image = %image_path.display(), image_size = image_size, block_size = block_size, "Importing image into CAS" ); // Open/create the content store let store_config = StoreConfig { path: store_path.to_path_buf(), ..Default::default() }; let store = ContentStore::open(store_config) .context("Failed to open content store")?; let _initial_chunks = store.chunk_count(); let initial_bytes = store.total_bytes(); // Read the image in block-sized chunks and store in CAS let mut image_file = File::open(image_path)?; let mut buf = vec![0u8; block_size as usize]; let total_blocks = (image_size + block_size as u64 - 1) / block_size as u64; let mut chunks_stored = 0u64; let mut dedup_chunks = 0u64; for block_idx in 0..total_blocks { let bytes_remaining = image_size - (block_idx * block_size as u64); let to_read = (bytes_remaining as usize).min(block_size as usize); buf.fill(0); // Zero-fill in case of partial read image_file.read_exact(&mut buf[..to_read]).with_context(|| { format!("Failed to read block {} from image", block_idx) })?; // Check if it's a zero block (skip storage) if buf.iter().all(|&b| b == 0) { continue; } let prev_count = store.chunk_count(); store.insert(&buf)?; let new_count = store.chunk_count(); if new_count == prev_count { dedup_chunks += 1; } chunks_stored += 1; if block_idx % 1000 == 0 && block_idx > 0 { tracing::debug!( "Progress: block {}/{} ({:.1}%)", block_idx, total_blocks, (block_idx as f64 / total_blocks as f64) * 100.0 ); } } store.flush()?; let final_chunks = store.chunk_count(); let final_bytes = store.total_bytes(); tracing::info!( total_blocks = total_blocks, non_zero_blocks = chunks_stored, dedup_chunks = dedup_chunks, store_chunks = final_chunks, store_bytes = final_bytes, "Image imported into CAS" ); // Step 3: Create TinyVol volume backed by the image // The volume uses the original image as its base and has an empty delta let config = VolumeConfig::new(image_size).with_block_size(block_size); let volume = Volume::create(output_path, config) .context("Failed to create TinyVol volume")?; // Copy the image file as the base for the volume let base_path = output_path.join("base.img"); fs::copy(image_path, &base_path)?; volume.flush().map_err(|e| anyhow::anyhow!("Failed to flush volume: {}", e))?; tracing::info!( volume = %output_path.display(), virtual_size = image_size, "TinyVol volume created" ); Ok(BuildResult { volume_path: output_path.to_path_buf(), store_path: store_path.to_path_buf(), base_image_path: Some(base_path), raw_size: image_size, stored_size: final_bytes - initial_bytes, chunks_stored, dedup_chunks, total_blocks, block_size, }) } /// Create an ext4 filesystem image from a directory tree. /// /// Uses mkfs.ext4 and a loop mount to populate the image. fn create_ext4_from_dir(source_dir: &Path, output: &Path, size_mb: u64) -> Result<()> { tracing::info!( source = %source_dir.display(), output = %output.display(), size_mb = size_mb, "Creating ext4 image from directory" ); // Create sparse file let status = Command::new("dd") .args([ "if=/dev/zero", &format!("of={}", output.display()), "bs=1M", &format!("count=0"), &format!("seek={}", size_mb), ]) .stdout(std::process::Stdio::null()) .stderr(std::process::Stdio::null()) .status() .context("Failed to create image file with dd")?; if !status.success() { bail!("dd failed to create image file"); } // Format as ext4 let status = Command::new("mkfs.ext4") .args([ "-F", "-q", "-L", "rootfs", "-O", "^huge_file,^metadata_csum", "-b", "4096", &output.display().to_string(), ]) .stdout(std::process::Stdio::null()) .stderr(std::process::Stdio::null()) .status() .context("Failed to format image as ext4")?; if !status.success() { bail!("mkfs.ext4 failed"); } // Mount and copy files let mount_dir = tempfile::tempdir().context("Failed to create mount directory")?; let mount_path = mount_dir.path(); // Try to mount (requires root/sudo or fuse2fs) let mount_result = try_mount_and_copy(output, mount_path, source_dir); match mount_result { Ok(()) => { tracing::info!("Files copied to ext4 image successfully"); } Err(e) => { // Fall back to e2cp (if available) or debugfs tracing::warn!("Mount failed ({}), trying e2cp fallback...", e); copy_with_debugfs(output, source_dir)?; } } Ok(()) } /// Try to mount the image and copy files (requires privileges or fuse) fn try_mount_and_copy(image: &Path, mount_point: &Path, source: &Path) -> Result<()> { // Try fuse2fs first (doesn't require root) let status = Command::new("fuse2fs") .args([ &image.display().to_string(), &mount_point.display().to_string(), "-o", "rw", ]) .status(); let use_fuse = match status { Ok(s) if s.success() => true, _ => { // Try mount with sudo let status = Command::new("sudo") .args([ "mount", "-o", "loop", &image.display().to_string(), &mount_point.display().to_string(), ]) .status() .context("Neither fuse2fs nor sudo mount available")?; if !status.success() { bail!("Failed to mount image"); } false } }; // Copy files let copy_result = Command::new("cp") .args(["-a", &format!("{}/.)", source.display()), &mount_point.display().to_string()]) .status(); // Also try rsync as fallback let copy_ok = match copy_result { Ok(s) if s.success() => true, _ => { let status = Command::new("rsync") .args(["-a", &format!("{}/", source.display()), &format!("{}/", mount_point.display())]) .status() .unwrap_or_else(|_| std::process::ExitStatus::default()); status.success() } }; // Unmount if use_fuse { let _ = Command::new("fusermount") .args(["-u", &mount_point.display().to_string()]) .status(); } else { let _ = Command::new("sudo") .args(["umount", &mount_point.display().to_string()]) .status(); } if !copy_ok { bail!("Failed to copy files to image"); } Ok(()) } /// Copy files using debugfs (doesn't require root) fn copy_with_debugfs(image: &Path, source: &Path) -> Result<()> { // Walk source directory and write files using debugfs let mut cmds = String::new(); for entry in walkdir::WalkDir::new(source) .min_depth(1) .into_iter() .filter_map(|e| e.ok()) { let rel_path = entry.path().strip_prefix(source) .unwrap_or(entry.path()); let guest_path = format!("/{}", rel_path.display()); if entry.file_type().is_dir() { cmds.push_str(&format!("mkdir {}\n", guest_path)); } else if entry.file_type().is_file() { cmds.push_str(&format!("write {} {}\n", entry.path().display(), guest_path)); } } if cmds.is_empty() { return Ok(()); } let mut child = Command::new("debugfs") .args(["-w", &image.display().to_string()]) .stdin(std::process::Stdio::piped()) .stdout(std::process::Stdio::null()) .stderr(std::process::Stdio::null()) .spawn() .context("debugfs not available")?; child.stdin.as_mut().unwrap().write_all(cmds.as_bytes())?; let status = child.wait()?; if !status.success() { bail!("debugfs failed to copy files"); } Ok(()) } /// Clone a TinyVol volume (instant, O(1) manifest copy) pub fn clone_volume(source: &Path, output: &Path) -> Result { tracing::info!( source = %source.display(), output = %output.display(), "Cloning volume" ); let volume = Volume::open(source) .map_err(|e| anyhow::anyhow!("Failed to open source volume: {}", e))?; let stats_before = volume.stats(); let _cloned = volume.clone_to(output) .map_err(|e| anyhow::anyhow!("Failed to clone volume: {}", e))?; // Copy the base image link if present let base_path = source.join("base.img"); if base_path.exists() { let dest_base = output.join("base.img"); // Create a hard link (shares data) or symlink if fs::hard_link(&base_path, &dest_base).is_err() { // Fall back to symlink let canonical = base_path.canonicalize()?; std::os::unix::fs::symlink(&canonical, &dest_base)?; } } tracing::info!( output = %output.display(), virtual_size = stats_before.virtual_size, "Volume cloned (instant)" ); Ok(CloneResult { source_path: source.to_path_buf(), clone_path: output.to_path_buf(), virtual_size: stats_before.virtual_size, }) } /// Show information about a TinyVol volume and its CAS store pub fn show_volume_info(volume_path: &Path, store_path: Option<&Path>) -> Result<()> { let volume = Volume::open(volume_path) .map_err(|e| anyhow::anyhow!("Failed to open volume: {}", e))?; let stats = volume.stats(); println!("Volume: {}", volume_path.display()); println!(" Virtual size: {} ({} bytes)", format_bytes(stats.virtual_size), stats.virtual_size); println!(" Block size: {} ({} bytes)", format_bytes(stats.block_size as u64), stats.block_size); println!(" Block count: {}", stats.block_count); println!(" Modified blocks: {}", stats.modified_blocks); println!(" Manifest size: {} bytes", stats.manifest_size); println!(" Delta size: {}", format_bytes(stats.delta_size)); println!(" Efficiency: {:.6} (actual/virtual)", stats.efficiency()); let base_path = volume_path.join("base.img"); if base_path.exists() { let base_size = fs::metadata(&base_path)?.len(); println!(" Base image: {} ({})", base_path.display(), format_bytes(base_size)); } // Show CAS store info if path provided if let Some(store_path) = store_path { if store_path.exists() { let store_config = StoreConfig { path: store_path.to_path_buf(), ..Default::default() }; if let Ok(store) = ContentStore::open(store_config) { let store_stats = store.stats(); println!(); println!("CAS Store: {}", store_path.display()); println!(" Total chunks: {}", store_stats.total_chunks); println!(" Total bytes: {}", format_bytes(store_stats.total_bytes)); println!(" Duplicates found: {}", store_stats.duplicates_found); } } } Ok(()) } /// Format bytes as human-readable string fn format_bytes(bytes: u64) -> String { if bytes >= 1024 * 1024 * 1024 { format!("{:.2} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0)) } else if bytes >= 1024 * 1024 { format!("{:.2} MB", bytes as f64 / (1024.0 * 1024.0)) } else if bytes >= 1024 { format!("{:.2} KB", bytes as f64 / 1024.0) } else { format!("{} bytes", bytes) } } /// Result of a volume build operation #[derive(Debug)] pub struct BuildResult { /// Path to the created volume pub volume_path: std::path::PathBuf, /// Path to the CAS store pub store_path: std::path::PathBuf, /// Path to the base image (if created) pub base_image_path: Option, /// Raw image size pub raw_size: u64, /// Size stored in CAS (after dedup) pub stored_size: u64, /// Number of non-zero chunks stored pub chunks_stored: u64, /// Number of chunks deduplicated pub dedup_chunks: u64, /// Total blocks in image pub total_blocks: u64, /// Block size used pub block_size: u32, } impl BuildResult { /// Calculate deduplication ratio pub fn dedup_ratio(&self) -> f64 { if self.chunks_stored == 0 { return 1.0; } self.dedup_chunks as f64 / self.chunks_stored as f64 } /// Calculate space savings pub fn savings(&self) -> f64 { if self.raw_size == 0 { return 0.0; } 1.0 - (self.stored_size as f64 / self.raw_size as f64) } } /// Result of a volume clone operation #[derive(Debug)] pub struct CloneResult { /// Source volume path pub source_path: std::path::PathBuf, /// Clone path pub clone_path: std::path::PathBuf, /// Virtual size pub virtual_size: u64, } #[cfg(test)] mod tests { use super::*; use tempfile::tempdir; #[test] fn test_format_bytes() { assert_eq!(format_bytes(100), "100 bytes"); assert_eq!(format_bytes(1536), "1.50 KB"); assert_eq!(format_bytes(2 * 1024 * 1024), "2.00 MB"); assert_eq!(format_bytes(3 * 1024 * 1024 * 1024), "3.00 GB"); } #[test] fn test_build_from_image() { let dir = tempdir().unwrap(); let image_path = dir.path().join("test.img"); let store_path = dir.path().join("cas-store"); let volume_path = dir.path().join("volume"); // Create a small test image (just raw data, not a real ext4) let mut img = File::create(&image_path).unwrap(); let data = vec![0x42u8; 64 * 1024]; // 64KB of data img.write_all(&data).unwrap(); // Add some zeros to test sparse detection let zeros = vec![0u8; 64 * 1024]; img.write_all(&zeros).unwrap(); img.flush().unwrap(); drop(img); let result = build_from_image( &image_path, &store_path, &volume_path, 4096, // 4KB blocks ).unwrap(); assert!(result.volume_path.exists()); assert_eq!(result.raw_size, 128 * 1024); assert!(result.chunks_stored > 0); // Zero blocks should be skipped assert!(result.total_blocks > result.chunks_stored); } #[test] fn test_clone_volume() { let dir = tempdir().unwrap(); let vol_path = dir.path().join("original"); let clone_path = dir.path().join("clone"); // Create a volume let config = VolumeConfig::new(1024 * 1024).with_block_size(4096); let volume = Volume::create(&vol_path, config).unwrap(); volume.write_block(0, &vec![0x11; 4096]).unwrap(); volume.flush().unwrap(); drop(volume); // Clone it let result = clone_volume(&vol_path, &clone_path).unwrap(); assert!(result.clone_path.exists()); assert!(clone_path.join("manifest.tvol").exists()); } }