#!/bin/bash # ══════════════════════════════════════════════════════════════════════════════ # Volt Hybrid Integration Tests — Hybrid-Native Mode Lifecycle # # Tests the full lifecycle of a hybrid-native workload: # 1. Create hybrid workload from image # 2. Start and verify running with own kernel/init (boot mode) # 3. Verify PID namespace isolation (PID 1 = systemd inside) # 4. Verify private /proc (different from host) # 5. Verify cgroups v2 delegation working # 6. Stop gracefully # 7. Destroy and verify cleanup # # Hybrid-native means: systemd-nspawn in --boot mode with full init inside, # private /proc, /sys, delegated cgroups v2, own PID namespace. # # Requires: root, systemd-nspawn, base image # ══════════════════════════════════════════════════════════════════════════════ set -uo pipefail source "$(dirname "$0")/test_helpers.sh" # ── Prerequisites ───────────────────────────────────────────────────────────── require_root require_volt require_nspawn BASE_IMAGE="/var/lib/volt/images/ubuntu_24.04" if ! require_image "$BASE_IMAGE"; then echo "SKIP: No base image." exit 0 fi trap cleanup_all EXIT echo "⚡ Volt Hybrid Integration Tests — Hybrid-Native Mode Lifecycle" echo "════════════════════════════════════════════════════════════════" HYB=$(test_name "hybrid") # ── 1. Create hybrid workload ─────────────────────────────────────────────── section "📦 1. Create Hybrid-Native Workload" output=$(create_container "$HYB" "$BASE_IMAGE" 2>&1) assert_ok "Create hybrid workload '$HYB'" test $? -eq 0 assert_dir_exists "Hybrid rootfs exists" "/var/lib/volt/containers/$HYB" assert_file_exists "Hybrid unit file exists" "/etc/systemd/system/volt-hybrid@${HYB}.service" # Verify unit file is configured for boot mode unit_content=$(cat "/etc/systemd/system/volt-hybrid@${HYB}.service" 2>/dev/null) if echo "$unit_content" | grep -q "\-\-boot"; then pass "Unit file configured for boot mode (--boot)" else fail "Unit file configured for boot mode (--boot)" fi # Verify cgroup delegation is enabled if echo "$unit_content" | grep -q "Delegate=yes"; then pass "Cgroup delegation enabled (Delegate=yes)" else # Check the .nspawn config file as well nspawn_content=$(cat "/etc/systemd/nspawn/${HYB}.nspawn" 2>/dev/null) if echo "$nspawn_content" | grep -q "Boot=yes"; then pass "Boot mode enabled in .nspawn config" else skip "Cgroup delegation verification" "not found in unit or nspawn config" fi fi # ── 2. Start and verify running with own init ─────────────────────────────── section "🚀 2. Start Hybrid-Native Workload" output=$(start_workload "$HYB" 2>&1) assert_ok "Start hybrid workload '$HYB'" test $? -eq 0 if wait_running "$HYB" 30; then pass "Hybrid workload reached running state" else fail "Hybrid workload reached running state" "timed out" fi # Wait for init (systemd) inside to finish booting if wait_booted "$HYB" 30; then pass "Systemd inside hybrid workload reached running target" else skip "Systemd inside hybrid workload reached running target" "may be degraded or slow" fi # Verify the container has a leader PID LEADER_PID=$(get_leader_pid "$HYB") assert_nonempty "Leader PID is set" "$LEADER_PID" # ── 3. PID Namespace Isolation ────────────────────────────────────────────── section "🔒 3. PID Namespace Isolation" # Inside a boot-mode container, PID 1 should be the init system (systemd/init). # We check this via nsenter or machinectl shell. pid1_inside=$(sudo nsenter -t "$LEADER_PID" -p -m cat /proc/1/comm 2>/dev/null || echo "") if [[ -n "$pid1_inside" ]]; then pass "Can read /proc/1/comm inside container (got: $pid1_inside)" if echo "$pid1_inside" | grep -qE "systemd|init"; then pass "PID 1 inside container is systemd/init" else fail "PID 1 inside container is systemd/init" "got: $pid1_inside" fi else # Fallback: use machinectl shell pid1_inside=$(sudo machinectl shell "$HYB" /bin/cat /proc/1/comm 2>/dev/null | tail -1 || echo "") if echo "$pid1_inside" | grep -qE "systemd|init"; then pass "PID 1 inside container is systemd/init (via machinectl)" else skip "PID 1 inside container check" "could not read /proc/1/comm" fi fi # Host PID 1 should be different from container PID 1's view host_pid1=$(cat /proc/1/comm 2>/dev/null || echo "unknown") pass "Host PID 1 is: $host_pid1" # Verify the container cannot see host processes # Inside the container, 'ps aux' should NOT list the host's processes host_unique_pid=$$ # our own PID, which runs on the host inside_ps=$(sudo nsenter -t "$LEADER_PID" -p -m sh -c "cat /proc/*/comm 2>/dev/null" 2>/dev/null || echo "") if [[ -n "$inside_ps" ]]; then # The container should have far fewer processes than the host host_proc_count=$(ls /proc/*/comm 2>/dev/null | wc -l) inside_proc_count=$(sudo nsenter -t "$LEADER_PID" -p -m sh -c "ls /proc/*/comm 2>/dev/null | wc -l" 2>/dev/null || echo "0") if [[ "$inside_proc_count" -lt "$host_proc_count" ]]; then pass "Container has fewer processes ($inside_proc_count) than host ($host_proc_count)" else fail "Container has fewer processes than host" "inside=$inside_proc_count, host=$host_proc_count" fi else skip "Process count comparison" "could not enumerate container processes" fi # ── 4. Private /proc ──────────────────────────────────────────────────────── section "📂 4. Private /proc Verification" # In boot mode, the container gets its own /proc mount. # The host's /proc/version and the container's should differ in PID views. # Check that /proc/self/pid-namespace differs host_pidns=$(readlink /proc/self/ns/pid 2>/dev/null || echo "host") container_pidns=$(sudo nsenter -t "$LEADER_PID" -p -m readlink /proc/self/ns/pid 2>/dev/null || echo "container") if [[ "$host_pidns" != "$container_pidns" ]]; then pass "PID namespace differs (host=$host_pidns, container=$container_pidns)" else # PID namespace inode comparison skip "PID namespace differs" "both report same namespace (may need -p flag)" fi # Check /proc/uptime inside — should be different from host uptime host_uptime=$(awk '{print int($1)}' /proc/uptime 2>/dev/null || echo "0") container_uptime=$(sudo nsenter -t "$LEADER_PID" -p -m cat /proc/uptime 2>/dev/null | awk '{print int($1)}' || echo "0") if [[ "$container_uptime" -lt "$host_uptime" ]]; then pass "Container uptime ($container_uptime s) < host uptime ($host_uptime s)" else skip "Container uptime check" "uptime comparison inconclusive (host=$host_uptime, container=$container_uptime)" fi # Verify /proc/mounts is different inside the container host_mounts_count=$(wc -l < /proc/mounts 2>/dev/null || echo "0") container_mounts_count=$(sudo nsenter -t "$LEADER_PID" -m cat /proc/mounts 2>/dev/null | wc -l || echo "0") if [[ "$container_mounts_count" -gt 0 && "$container_mounts_count" != "$host_mounts_count" ]]; then pass "Container /proc/mounts differs from host (host=$host_mounts_count, container=$container_mounts_count)" else skip "Container /proc/mounts comparison" "could not compare mount counts" fi # ── 5. Cgroups v2 Delegation ──────────────────────────────────────────────── section "⚙️ 5. Cgroups v2 Delegation" # In a hybrid-native workload, systemd inside should have its own cgroup subtree # and be able to create child cgroups (delegation must be enabled). # Find the container's cgroup path cgroup_path="" for candidate in \ "/sys/fs/cgroup/machine.slice/volt-hybrid@${HYB}.service" \ "/sys/fs/cgroup/machine.slice/machine-${HYB}.scope" \ "/sys/fs/cgroup/machine.slice/systemd-nspawn@${HYB}.service"; do if [[ -d "$candidate" ]]; then cgroup_path="$candidate" break fi done if [[ -n "$cgroup_path" ]]; then pass "Container cgroup found at $cgroup_path" # Check that cgroup.subtree_control exists (delegation is working) if [[ -f "$cgroup_path/cgroup.subtree_control" ]]; then subtree=$(cat "$cgroup_path/cgroup.subtree_control" 2>/dev/null) pass "cgroup.subtree_control exists (controllers: ${subtree:-none})" else skip "cgroup.subtree_control check" "file not found" fi # Check memory controller is available if [[ -f "$cgroup_path/memory.max" ]]; then mem_max=$(cat "$cgroup_path/memory.max" 2>/dev/null) pass "memory.max is set ($mem_max)" else skip "memory.max check" "file not found in cgroup" fi # Check PIDs controller if [[ -f "$cgroup_path/pids.max" ]]; then pids_max=$(cat "$cgroup_path/pids.max" 2>/dev/null) pass "pids.max is set ($pids_max)" else skip "pids.max check" "file not found in cgroup" fi else skip "Cgroup path detection" "could not find container cgroup" fi # Verify systemd inside can manage services (proves cgroup delegation works) # Try enabling a dummy timer or checking systemd unit management inside_units=$(sudo nsenter -t "$LEADER_PID" -p -m --mount-proc /bin/systemctl list-units --type=service --no-pager 2>/dev/null | wc -l || echo "0") if [[ "$inside_units" -gt 0 ]]; then pass "systemd inside can list units ($inside_units services)" else skip "systemd inside unit listing" "could not list units" fi # ── 6. Stop gracefully ────────────────────────────────────────────────────── section "⏹️ 6. Stop Hybrid-Native Workload" output=$(stop_workload "$HYB" 2>&1) assert_ok "Stop hybrid workload '$HYB'" test $? -eq 0 sleep 2 # Verify stopped if ! sudo machinectl show "$HYB" --property=State 2>/dev/null | grep -q "running"; then pass "Hybrid workload no longer running after stop" else fail "Hybrid workload no longer running after stop" fi # Verify leader PID is gone if [[ -n "$LEADER_PID" && ! -d "/proc/$LEADER_PID" ]]; then pass "Leader PID ($LEADER_PID) is gone" else if [[ -z "$LEADER_PID" ]]; then skip "Leader PID cleanup check" "no PID recorded" else fail "Leader PID ($LEADER_PID) is gone" "still exists" fi fi # Rootfs should still exist assert_dir_exists "Rootfs persists after stop" "/var/lib/volt/containers/$HYB" # ── 7. Destroy and verify cleanup ─────────────────────────────────────────── section "🗑️ 7. Destroy Hybrid-Native Workload" output=$(destroy_workload "$HYB" 2>&1) assert_ok "Destroy hybrid workload '$HYB'" test $? -eq 0 assert_no_file "Rootfs removed" "/var/lib/volt/containers/$HYB" assert_no_file "Unit file removed" "/etc/systemd/system/volt-hybrid@${HYB}.service" assert_no_file "Nspawn config removed" "/etc/systemd/nspawn/${HYB}.nspawn" # Cgroup should be cleaned up if [[ -n "$cgroup_path" && ! -d "$cgroup_path" ]]; then pass "Cgroup directory cleaned up" else if [[ -z "$cgroup_path" ]]; then skip "Cgroup cleanup check" "no cgroup path was found" else skip "Cgroup cleanup check" "cgroup may linger briefly" fi fi CLEANUP_WORKLOADS=("${CLEANUP_WORKLOADS[@]/$HYB/}") # ── Results ────────────────────────────────────────────────────────────────── print_results "Hybrid-Native Mode Lifecycle" exit $?