Files
volt/tests/hybrid/test_hybrid_lifecycle.sh
Karl Clinger 0ebe75b2ca Volt CLI: source-available under AGPSL v5.0
Complete infrastructure platform CLI:
- Container runtime (systemd-nspawn)
- VoltVisor VMs (Neutron Stardust / QEMU)
- Stellarium CAS (content-addressed storage)
- ORAS Registry
- GitOps integration
- Landlock LSM security
- Compose orchestration
- Mesh networking

Copyright (c) Armored Gates LLC. All rights reserved.
Licensed under AGPSL v5.0
2026-03-21 02:08:15 -05:00

298 lines
12 KiB
Bash
Executable File

#!/bin/bash
# ══════════════════════════════════════════════════════════════════════════════
# Volt Hybrid Integration Tests — Hybrid-Native Mode Lifecycle
#
# Tests the full lifecycle of a hybrid-native workload:
# 1. Create hybrid workload from image
# 2. Start and verify running with own kernel/init (boot mode)
# 3. Verify PID namespace isolation (PID 1 = systemd inside)
# 4. Verify private /proc (different from host)
# 5. Verify cgroups v2 delegation working
# 6. Stop gracefully
# 7. Destroy and verify cleanup
#
# Hybrid-native means: systemd-nspawn in --boot mode with full init inside,
# private /proc, /sys, delegated cgroups v2, own PID namespace.
#
# Requires: root, systemd-nspawn, base image
# ══════════════════════════════════════════════════════════════════════════════
set -uo pipefail
source "$(dirname "$0")/test_helpers.sh"
# ── Prerequisites ─────────────────────────────────────────────────────────────
require_root
require_volt
require_nspawn
BASE_IMAGE="/var/lib/volt/images/ubuntu_24.04"
if ! require_image "$BASE_IMAGE"; then
echo "SKIP: No base image."
exit 0
fi
trap cleanup_all EXIT
echo "⚡ Volt Hybrid Integration Tests — Hybrid-Native Mode Lifecycle"
echo "════════════════════════════════════════════════════════════════"
HYB=$(test_name "hybrid")
# ── 1. Create hybrid workload ───────────────────────────────────────────────
section "📦 1. Create Hybrid-Native Workload"
output=$(create_container "$HYB" "$BASE_IMAGE" 2>&1)
assert_ok "Create hybrid workload '$HYB'" test $? -eq 0
assert_dir_exists "Hybrid rootfs exists" "/var/lib/volt/containers/$HYB"
assert_file_exists "Hybrid unit file exists" "/etc/systemd/system/volt-hybrid@${HYB}.service"
# Verify unit file is configured for boot mode
unit_content=$(cat "/etc/systemd/system/volt-hybrid@${HYB}.service" 2>/dev/null)
if echo "$unit_content" | grep -q "\-\-boot"; then
pass "Unit file configured for boot mode (--boot)"
else
fail "Unit file configured for boot mode (--boot)"
fi
# Verify cgroup delegation is enabled
if echo "$unit_content" | grep -q "Delegate=yes"; then
pass "Cgroup delegation enabled (Delegate=yes)"
else
# Check the .nspawn config file as well
nspawn_content=$(cat "/etc/systemd/nspawn/${HYB}.nspawn" 2>/dev/null)
if echo "$nspawn_content" | grep -q "Boot=yes"; then
pass "Boot mode enabled in .nspawn config"
else
skip "Cgroup delegation verification" "not found in unit or nspawn config"
fi
fi
# ── 2. Start and verify running with own init ───────────────────────────────
section "🚀 2. Start Hybrid-Native Workload"
output=$(start_workload "$HYB" 2>&1)
assert_ok "Start hybrid workload '$HYB'" test $? -eq 0
if wait_running "$HYB" 30; then
pass "Hybrid workload reached running state"
else
fail "Hybrid workload reached running state" "timed out"
fi
# Wait for init (systemd) inside to finish booting
if wait_booted "$HYB" 30; then
pass "Systemd inside hybrid workload reached running target"
else
skip "Systemd inside hybrid workload reached running target" "may be degraded or slow"
fi
# Verify the container has a leader PID
LEADER_PID=$(get_leader_pid "$HYB")
assert_nonempty "Leader PID is set" "$LEADER_PID"
# ── 3. PID Namespace Isolation ──────────────────────────────────────────────
section "🔒 3. PID Namespace Isolation"
# Inside a boot-mode container, PID 1 should be the init system (systemd/init).
# We check this via nsenter or machinectl shell.
pid1_inside=$(sudo nsenter -t "$LEADER_PID" -p -m cat /proc/1/comm 2>/dev/null || echo "")
if [[ -n "$pid1_inside" ]]; then
pass "Can read /proc/1/comm inside container (got: $pid1_inside)"
if echo "$pid1_inside" | grep -qE "systemd|init"; then
pass "PID 1 inside container is systemd/init"
else
fail "PID 1 inside container is systemd/init" "got: $pid1_inside"
fi
else
# Fallback: use machinectl shell
pid1_inside=$(sudo machinectl shell "$HYB" /bin/cat /proc/1/comm 2>/dev/null | tail -1 || echo "")
if echo "$pid1_inside" | grep -qE "systemd|init"; then
pass "PID 1 inside container is systemd/init (via machinectl)"
else
skip "PID 1 inside container check" "could not read /proc/1/comm"
fi
fi
# Host PID 1 should be different from container PID 1's view
host_pid1=$(cat /proc/1/comm 2>/dev/null || echo "unknown")
pass "Host PID 1 is: $host_pid1"
# Verify the container cannot see host processes
# Inside the container, 'ps aux' should NOT list the host's processes
host_unique_pid=$$ # our own PID, which runs on the host
inside_ps=$(sudo nsenter -t "$LEADER_PID" -p -m sh -c "cat /proc/*/comm 2>/dev/null" 2>/dev/null || echo "")
if [[ -n "$inside_ps" ]]; then
# The container should have far fewer processes than the host
host_proc_count=$(ls /proc/*/comm 2>/dev/null | wc -l)
inside_proc_count=$(sudo nsenter -t "$LEADER_PID" -p -m sh -c "ls /proc/*/comm 2>/dev/null | wc -l" 2>/dev/null || echo "0")
if [[ "$inside_proc_count" -lt "$host_proc_count" ]]; then
pass "Container has fewer processes ($inside_proc_count) than host ($host_proc_count)"
else
fail "Container has fewer processes than host" "inside=$inside_proc_count, host=$host_proc_count"
fi
else
skip "Process count comparison" "could not enumerate container processes"
fi
# ── 4. Private /proc ────────────────────────────────────────────────────────
section "📂 4. Private /proc Verification"
# In boot mode, the container gets its own /proc mount.
# The host's /proc/version and the container's should differ in PID views.
# Check that /proc/self/pid-namespace differs
host_pidns=$(readlink /proc/self/ns/pid 2>/dev/null || echo "host")
container_pidns=$(sudo nsenter -t "$LEADER_PID" -p -m readlink /proc/self/ns/pid 2>/dev/null || echo "container")
if [[ "$host_pidns" != "$container_pidns" ]]; then
pass "PID namespace differs (host=$host_pidns, container=$container_pidns)"
else
# PID namespace inode comparison
skip "PID namespace differs" "both report same namespace (may need -p flag)"
fi
# Check /proc/uptime inside — should be different from host uptime
host_uptime=$(awk '{print int($1)}' /proc/uptime 2>/dev/null || echo "0")
container_uptime=$(sudo nsenter -t "$LEADER_PID" -p -m cat /proc/uptime 2>/dev/null | awk '{print int($1)}' || echo "0")
if [[ "$container_uptime" -lt "$host_uptime" ]]; then
pass "Container uptime ($container_uptime s) < host uptime ($host_uptime s)"
else
skip "Container uptime check" "uptime comparison inconclusive (host=$host_uptime, container=$container_uptime)"
fi
# Verify /proc/mounts is different inside the container
host_mounts_count=$(wc -l < /proc/mounts 2>/dev/null || echo "0")
container_mounts_count=$(sudo nsenter -t "$LEADER_PID" -m cat /proc/mounts 2>/dev/null | wc -l || echo "0")
if [[ "$container_mounts_count" -gt 0 && "$container_mounts_count" != "$host_mounts_count" ]]; then
pass "Container /proc/mounts differs from host (host=$host_mounts_count, container=$container_mounts_count)"
else
skip "Container /proc/mounts comparison" "could not compare mount counts"
fi
# ── 5. Cgroups v2 Delegation ────────────────────────────────────────────────
section "⚙️ 5. Cgroups v2 Delegation"
# In a hybrid-native workload, systemd inside should have its own cgroup subtree
# and be able to create child cgroups (delegation must be enabled).
# Find the container's cgroup path
cgroup_path=""
for candidate in \
"/sys/fs/cgroup/machine.slice/volt-hybrid@${HYB}.service" \
"/sys/fs/cgroup/machine.slice/machine-${HYB}.scope" \
"/sys/fs/cgroup/machine.slice/systemd-nspawn@${HYB}.service"; do
if [[ -d "$candidate" ]]; then
cgroup_path="$candidate"
break
fi
done
if [[ -n "$cgroup_path" ]]; then
pass "Container cgroup found at $cgroup_path"
# Check that cgroup.subtree_control exists (delegation is working)
if [[ -f "$cgroup_path/cgroup.subtree_control" ]]; then
subtree=$(cat "$cgroup_path/cgroup.subtree_control" 2>/dev/null)
pass "cgroup.subtree_control exists (controllers: ${subtree:-none})"
else
skip "cgroup.subtree_control check" "file not found"
fi
# Check memory controller is available
if [[ -f "$cgroup_path/memory.max" ]]; then
mem_max=$(cat "$cgroup_path/memory.max" 2>/dev/null)
pass "memory.max is set ($mem_max)"
else
skip "memory.max check" "file not found in cgroup"
fi
# Check PIDs controller
if [[ -f "$cgroup_path/pids.max" ]]; then
pids_max=$(cat "$cgroup_path/pids.max" 2>/dev/null)
pass "pids.max is set ($pids_max)"
else
skip "pids.max check" "file not found in cgroup"
fi
else
skip "Cgroup path detection" "could not find container cgroup"
fi
# Verify systemd inside can manage services (proves cgroup delegation works)
# Try enabling a dummy timer or checking systemd unit management
inside_units=$(sudo nsenter -t "$LEADER_PID" -p -m --mount-proc /bin/systemctl list-units --type=service --no-pager 2>/dev/null | wc -l || echo "0")
if [[ "$inside_units" -gt 0 ]]; then
pass "systemd inside can list units ($inside_units services)"
else
skip "systemd inside unit listing" "could not list units"
fi
# ── 6. Stop gracefully ──────────────────────────────────────────────────────
section "⏹️ 6. Stop Hybrid-Native Workload"
output=$(stop_workload "$HYB" 2>&1)
assert_ok "Stop hybrid workload '$HYB'" test $? -eq 0
sleep 2
# Verify stopped
if ! sudo machinectl show "$HYB" --property=State 2>/dev/null | grep -q "running"; then
pass "Hybrid workload no longer running after stop"
else
fail "Hybrid workload no longer running after stop"
fi
# Verify leader PID is gone
if [[ -n "$LEADER_PID" && ! -d "/proc/$LEADER_PID" ]]; then
pass "Leader PID ($LEADER_PID) is gone"
else
if [[ -z "$LEADER_PID" ]]; then
skip "Leader PID cleanup check" "no PID recorded"
else
fail "Leader PID ($LEADER_PID) is gone" "still exists"
fi
fi
# Rootfs should still exist
assert_dir_exists "Rootfs persists after stop" "/var/lib/volt/containers/$HYB"
# ── 7. Destroy and verify cleanup ───────────────────────────────────────────
section "🗑️ 7. Destroy Hybrid-Native Workload"
output=$(destroy_workload "$HYB" 2>&1)
assert_ok "Destroy hybrid workload '$HYB'" test $? -eq 0
assert_no_file "Rootfs removed" "/var/lib/volt/containers/$HYB"
assert_no_file "Unit file removed" "/etc/systemd/system/volt-hybrid@${HYB}.service"
assert_no_file "Nspawn config removed" "/etc/systemd/nspawn/${HYB}.nspawn"
# Cgroup should be cleaned up
if [[ -n "$cgroup_path" && ! -d "$cgroup_path" ]]; then
pass "Cgroup directory cleaned up"
else
if [[ -z "$cgroup_path" ]]; then
skip "Cgroup cleanup check" "no cgroup path was found"
else
skip "Cgroup cleanup check" "cgroup may linger briefly"
fi
fi
CLEANUP_WORKLOADS=("${CLEANUP_WORKLOADS[@]/$HYB/}")
# ── Results ──────────────────────────────────────────────────────────────────
print_results "Hybrid-Native Mode Lifecycle"
exit $?