Files
volt/tests/hybrid/test_isolation.sh
Karl Clinger 0ebe75b2ca Volt CLI: source-available under AGPSL v5.0
Complete infrastructure platform CLI:
- Container runtime (systemd-nspawn)
- VoltVisor VMs (Neutron Stardust / QEMU)
- Stellarium CAS (content-addressed storage)
- ORAS Registry
- GitOps integration
- Landlock LSM security
- Compose orchestration
- Mesh networking

Copyright (c) Armored Gates LLC. All rights reserved.
Licensed under AGPSL v5.0
2026-03-21 02:08:15 -05:00

382 lines
16 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# ══════════════════════════════════════════════════════════════════════════════
# Volt Hybrid Integration Tests — Isolation Verification
#
# Verifies security isolation boundaries for hybrid-native workloads:
# 1. Process isolation — can't see host processes
# 2. Network namespace isolation — different IP / interfaces
# 3. Mount namespace isolation — different /proc/mounts
# 4. Cgroup isolation — resource limits enforced
# 5. OOM stress test — memory over-allocation kills inside, host unaffected
#
# All isolation is via Linux kernel primitives:
# Namespaces (PID, NET, MNT, UTS, IPC), cgroups v2, Landlock, Seccomp
# NO Docker. NO AppArmor. Landlock only.
#
# Requires: root, systemd-nspawn, base image
# ══════════════════════════════════════════════════════════════════════════════
set -uo pipefail
source "$(dirname "$0")/test_helpers.sh"
# ── Prerequisites ─────────────────────────────────────────────────────────────
require_root
require_volt
require_nspawn
BASE_IMAGE="/var/lib/volt/images/ubuntu_24.04"
if ! require_image "$BASE_IMAGE"; then
echo "SKIP: No base image."
exit 0
fi
trap cleanup_all EXIT
echo "⚡ Volt Hybrid Integration Tests — Isolation Verification"
echo "════════════════════════════════════════════════════════════════"
ISO_WL=$(test_name "isolation")
# Create and start the hybrid workload
create_container "$ISO_WL" "$BASE_IMAGE" 2>&1 >/dev/null
start_workload "$ISO_WL" 2>&1 >/dev/null
if ! wait_running "$ISO_WL" 30; then
echo "FATAL: Could not start workload for isolation tests"
exit 1
fi
LEADER_PID=$(get_leader_pid "$ISO_WL")
if [[ -z "$LEADER_PID" || "$LEADER_PID" == "0" ]]; then
echo "FATAL: No leader PID for workload"
exit 1
fi
# ── 1. Process Isolation ────────────────────────────────────────────────────
section "🔒 1. Process Isolation (PID Namespace)"
# Container should NOT see host processes.
# We look for a host-only process that the container shouldn't see.
# Get the container's view of its process list
container_pids=$(sudo nsenter -t "$LEADER_PID" -p -m sh -c \
"ls -d /proc/[0-9]* 2>/dev/null | wc -l" 2>/dev/null || echo "0")
host_pids=$(ls -d /proc/[0-9]* 2>/dev/null | wc -l)
if [[ "$container_pids" -gt 0 ]]; then
pass "Container can see $container_pids processes"
else
fail "Container can see processes" "got 0"
fi
if [[ "$container_pids" -lt "$host_pids" ]]; then
pass "Container sees fewer processes ($container_pids) than host ($host_pids)"
else
fail "Container sees fewer processes than host" "container=$container_pids, host=$host_pids"
fi
# Check if the container can see OUR test script PID
our_pid=$$
can_see_us=$(sudo nsenter -t "$LEADER_PID" -p -m sh -c \
"test -d /proc/$our_pid && echo 'yes' || echo 'no'" 2>/dev/null || echo "unknown")
if [[ "$can_see_us" == "no" ]]; then
pass "Container cannot see host test script PID ($our_pid)"
elif [[ "$can_see_us" == "yes" ]]; then
fail "Container should NOT see host PID $our_pid" "but it can"
else
skip "Host PID visibility check" "could not determine"
fi
# Verify PID namespace inode differs
host_pidns_inode=$(stat -L -c '%i' /proc/self/ns/pid 2>/dev/null || echo "0")
container_pidns_inode=$(sudo nsenter -t "$LEADER_PID" -p -m stat -L -c '%i' /proc/self/ns/pid 2>/dev/null || echo "0")
if [[ "$host_pidns_inode" != "$container_pidns_inode" && "$container_pidns_inode" != "0" ]]; then
pass "PID namespace inode differs (host=$host_pidns_inode, container=$container_pidns_inode)"
else
skip "PID namespace inode check" "host=$host_pidns_inode, container=$container_pidns_inode"
fi
# Verify PID 1 inside is NOT the host's PID 1
host_pid1_name=$(cat /proc/1/comm 2>/dev/null || echo "")
container_pid1_name=$(sudo nsenter -t "$LEADER_PID" -p -m cat /proc/1/comm 2>/dev/null || echo "")
if [[ -n "$container_pid1_name" ]]; then
pass "Container PID 1 process: $container_pid1_name"
# In boot mode, PID 1 should be systemd; verify it's the container's own init
if echo "$container_pid1_name" | grep -qE "systemd|init"; then
pass "Container PID 1 is its own init system"
else
skip "Container PID 1 identity" "unexpected: $container_pid1_name"
fi
fi
# ── 2. Network Namespace Isolation ──────────────────────────────────────────
section "🌐 2. Network Namespace Isolation"
# Verify the container has a different network namespace
host_netns_inode=$(stat -L -c '%i' /proc/self/ns/net 2>/dev/null || echo "0")
container_netns_inode=$(sudo nsenter -t "$LEADER_PID" -n stat -L -c '%i' /proc/self/ns/net 2>/dev/null || echo "0")
if [[ "$host_netns_inode" != "$container_netns_inode" && "$container_netns_inode" != "0" ]]; then
pass "Network namespace inode differs (host=$host_netns_inode, container=$container_netns_inode)"
else
fail "Network namespace inode differs" "host=$host_netns_inode, container=$container_netns_inode"
fi
# Get the container's IP address — should differ from host
host_ip=$(ip -4 -o addr show scope global 2>/dev/null | awk '{print $4}' | head -1 | cut -d/ -f1)
container_ip=$(sudo nsenter -t "$LEADER_PID" -n ip -4 -o addr show scope global 2>/dev/null | awk '{print $4}' | head -1 | cut -d/ -f1)
if [[ -n "$container_ip" && -n "$host_ip" && "$container_ip" != "$host_ip" ]]; then
pass "Container IP ($container_ip) differs from host IP ($host_ip)"
elif [[ -z "$container_ip" ]]; then
# Container may only have loopback (NetworkNone mode or bridge not set up)
skip "Container IP comparison" "container has no global IP (bridge may not be configured)"
else
fail "Container IP should differ from host" "both are $host_ip"
fi
# Verify container has its own interfaces (not sharing host interfaces)
host_ifaces=$(ip link show 2>/dev/null | grep -c "^[0-9]")
container_ifaces=$(sudo nsenter -t "$LEADER_PID" -n ip link show 2>/dev/null | grep -c "^[0-9]" || echo "0")
if [[ "$container_ifaces" -gt 0 ]]; then
pass "Container has $container_ifaces network interfaces"
if [[ "$container_ifaces" -lt "$host_ifaces" ]]; then
pass "Container has fewer interfaces ($container_ifaces) than host ($host_ifaces)"
else
skip "Interface count comparison" "container=$container_ifaces, host=$host_ifaces"
fi
else
fail "Container should have at least loopback interface"
fi
# Verify loopback is present inside
if sudo nsenter -t "$LEADER_PID" -n ip link show lo 2>/dev/null | grep -q "UP"; then
pass "Container loopback (lo) is UP"
else
skip "Container loopback check" "lo may not be UP yet"
fi
# ── 3. Mount Namespace Isolation ────────────────────────────────────────────
section "📁 3. Mount Namespace Isolation"
# The container should have its own mount namespace with different mounts
host_mntns_inode=$(stat -L -c '%i' /proc/self/ns/mnt 2>/dev/null || echo "0")
container_mntns_inode=$(sudo nsenter -t "$LEADER_PID" -m stat -L -c '%i' /proc/self/ns/mnt 2>/dev/null || echo "0")
if [[ "$host_mntns_inode" != "$container_mntns_inode" && "$container_mntns_inode" != "0" ]]; then
pass "Mount namespace inode differs (host=$host_mntns_inode, container=$container_mntns_inode)"
else
fail "Mount namespace inode differs" "host=$host_mntns_inode, container=$container_mntns_inode"
fi
# Compare /proc/mounts content — should be fundamentally different
host_root_mount=$(grep "^[^ ]* / " /proc/mounts 2>/dev/null | head -1)
container_root_mount=$(sudo nsenter -t "$LEADER_PID" -m cat /proc/mounts 2>/dev/null | grep "^[^ ]* / " | head -1)
if [[ -n "$container_root_mount" && "$container_root_mount" != "$host_root_mount" ]]; then
pass "Container root mount differs from host"
elif [[ -z "$container_root_mount" ]]; then
skip "Container root mount check" "could not read container /proc/mounts"
else
fail "Container root mount should differ" "same as host"
fi
# Verify host's /home is not visible inside (private rootfs)
if sudo nsenter -t "$LEADER_PID" -m ls /home/karl 2>/dev/null; then
fail "Host /home/karl should NOT be visible inside container"
else
pass "Host /home/karl is NOT visible inside container"
fi
# Verify /proc inside is a new mount (procfs)
container_proc_type=$(sudo nsenter -t "$LEADER_PID" -m grep "^proc /proc" /proc/mounts 2>/dev/null | awk '{print $3}')
if [[ "$container_proc_type" == "proc" ]]; then
pass "Container has its own /proc (type=proc)"
else
skip "Container /proc type check" "got: $container_proc_type"
fi
# ── 4. Cgroup Isolation ─────────────────────────────────────────────────────
section "⚙️ 4. Cgroup Isolation (Resource Limits)"
# Find the cgroup for this container
cgroup_path=""
for candidate in \
"/sys/fs/cgroup/machine.slice/volt-hybrid@${ISO_WL}.service" \
"/sys/fs/cgroup/machine.slice/machine-${ISO_WL}.scope" \
"/sys/fs/cgroup/machine.slice/systemd-nspawn@${ISO_WL}.service"; do
if [[ -d "$candidate" ]]; then
cgroup_path="$candidate"
break
fi
done
if [[ -z "$cgroup_path" ]]; then
# Try broader search
cgroup_path=$(find /sys/fs/cgroup -maxdepth 5 -name "*${ISO_WL}*" -type d 2>/dev/null | head -1)
fi
if [[ -n "$cgroup_path" && -d "$cgroup_path" ]]; then
pass "Cgroup found: $cgroup_path"
# Memory limit check
if [[ -f "$cgroup_path/memory.max" ]]; then
mem_max=$(cat "$cgroup_path/memory.max" 2>/dev/null)
if [[ "$mem_max" != "max" && -n "$mem_max" ]]; then
pass "Memory limit set: $mem_max bytes"
else
skip "Memory limit" "set to 'max' (unlimited)"
fi
else
skip "Memory limit check" "memory.max not found"
fi
# Memory current usage
if [[ -f "$cgroup_path/memory.current" ]]; then
mem_cur=$(cat "$cgroup_path/memory.current" 2>/dev/null)
if [[ -n "$mem_cur" && "$mem_cur" != "0" ]]; then
pass "Memory usage tracked: $mem_cur bytes"
else
skip "Memory usage" "current=0"
fi
fi
# PIDs limit check
if [[ -f "$cgroup_path/pids.max" ]]; then
pids_max=$(cat "$cgroup_path/pids.max" 2>/dev/null)
if [[ "$pids_max" != "max" && -n "$pids_max" ]]; then
pass "PIDs limit set: $pids_max"
else
skip "PIDs limit" "set to 'max' (unlimited)"
fi
fi
# PIDs current
if [[ -f "$cgroup_path/pids.current" ]]; then
pids_cur=$(cat "$cgroup_path/pids.current" 2>/dev/null)
pass "PIDs current: $pids_cur"
fi
# CPU weight/shares
if [[ -f "$cgroup_path/cpu.weight" ]]; then
cpu_weight=$(cat "$cgroup_path/cpu.weight" 2>/dev/null)
pass "CPU weight set: $cpu_weight"
fi
# Verify cgroup controllers are enabled for the container
if [[ -f "$cgroup_path/cgroup.controllers" ]]; then
controllers=$(cat "$cgroup_path/cgroup.controllers" 2>/dev/null)
pass "Available controllers: $controllers"
fi
else
skip "Cgroup isolation checks" "could not find cgroup for $ISO_WL"
fi
# ── 5. OOM Stress Test ──────────────────────────────────────────────────────
section "💥 5. OOM Stress Test (Memory Overallocation)"
# This test creates a SEPARATE workload with a tight memory limit,
# then attempts to allocate more than the limit inside.
# Expected: the process inside gets OOM-killed, host is unaffected.
OOM_WL=$(test_name "oom-test")
create_container "$OOM_WL" "$BASE_IMAGE" 2>&1 >/dev/null
start_workload "$OOM_WL" 2>&1 >/dev/null
if ! wait_running "$OOM_WL" 30; then
skip "OOM test" "could not start OOM test workload"
else
OOM_PID=$(get_leader_pid "$OOM_WL")
# Set a tight memory limit via cgroup (128M)
oom_cgroup=""
for candidate in \
"/sys/fs/cgroup/machine.slice/volt-hybrid@${OOM_WL}.service" \
"/sys/fs/cgroup/machine.slice/machine-${OOM_WL}.scope" \
"/sys/fs/cgroup/machine.slice/systemd-nspawn@${OOM_WL}.service"; do
if [[ -d "$candidate" ]]; then
oom_cgroup="$candidate"
break
fi
done
if [[ -z "$oom_cgroup" ]]; then
oom_cgroup=$(find /sys/fs/cgroup -maxdepth 5 -name "*${OOM_WL}*" -type d 2>/dev/null | head -1)
fi
if [[ -n "$oom_cgroup" && -f "$oom_cgroup/memory.max" ]]; then
# Set hard limit to 128MB
echo "134217728" | sudo tee "$oom_cgroup/memory.max" >/dev/null 2>&1
current_limit=$(cat "$oom_cgroup/memory.max" 2>/dev/null)
pass "OOM test: memory limit set to $current_limit bytes"
# Record host memory before stress
host_mem_before=$(free -m 2>/dev/null | awk '/^Mem:/{print $7}')
pass "Host available memory before stress: ${host_mem_before}MB"
# Try to allocate 256MB inside the container (2× the limit)
# Use a simple python/dd/stress approach
oom_result=$(sudo nsenter -t "$OOM_PID" -p -m -n sh -c \
"dd if=/dev/zero of=/dev/null bs=1M count=256 2>&1; echo EXIT_CODE=\$?" 2>/dev/null || echo "killed")
# Check for OOM events in the cgroup
if [[ -f "$oom_cgroup/memory.events" ]]; then
oom_count=$(grep "^oom " "$oom_cgroup/memory.events" 2>/dev/null | awk '{print $2}')
oom_kill_count=$(grep "^oom_kill " "$oom_cgroup/memory.events" 2>/dev/null | awk '{print $2}')
if [[ "${oom_count:-0}" -gt 0 || "${oom_kill_count:-0}" -gt 0 ]]; then
pass "OOM events triggered (oom=$oom_count, oom_kill=$oom_kill_count)"
else
# dd of=/dev/null doesn't actually allocate memory, try a real allocator
# Use a subshell approach: allocate via /dev/shm or python
sudo nsenter -t "$OOM_PID" -p -m -n sh -c \
"head -c 200M /dev/zero > /tmp/oom-alloc 2>/dev/null" || true
sleep 2
oom_count=$(grep "^oom " "$oom_cgroup/memory.events" 2>/dev/null | awk '{print $2}')
oom_kill_count=$(grep "^oom_kill " "$oom_cgroup/memory.events" 2>/dev/null | awk '{print $2}')
if [[ "${oom_count:-0}" -gt 0 || "${oom_kill_count:-0}" -gt 0 ]]; then
pass "OOM events triggered after file allocation (oom=$oom_count, oom_kill=$oom_kill_count)"
else
skip "OOM events" "no oom events detected (oom=$oom_count, oom_kill=$oom_kill_count)"
fi
fi
else
skip "OOM events check" "memory.events not found"
fi
# Verify host is still healthy
host_mem_after=$(free -m 2>/dev/null | awk '/^Mem:/{print $7}')
pass "Host available memory after stress: ${host_mem_after}MB"
# Host should still be responsive (if we got here, it is)
if uptime &>/dev/null; then
pass "Host is still responsive after OOM test"
else
fail "Host responsiveness check"
fi
else
skip "OOM stress test" "could not find cgroup or memory.max for OOM workload"
fi
fi
# Cleanup OOM workload
destroy_workload "$OOM_WL"
CLEANUP_WORKLOADS=("${CLEANUP_WORKLOADS[@]/$OOM_WL/}")
# ── Cleanup main isolation workload ─────────────────────────────────────────
stop_workload "$ISO_WL" &>/dev/null
destroy_workload "$ISO_WL"
CLEANUP_WORKLOADS=("${CLEANUP_WORKLOADS[@]/$ISO_WL/}")
# ── Results ──────────────────────────────────────────────────────────────────
print_results "Isolation Verification"
exit $?