Volt CLI: source-available under AGPSL v5.0

Complete infrastructure platform CLI:
- Container runtime (systemd-nspawn)
- VoltVisor VMs (Neutron Stardust / QEMU)
- Stellarium CAS (content-addressed storage)
- ORAS Registry
- GitOps integration
- Landlock LSM security
- Compose orchestration
- Mesh networking

Copyright (c) Armored Gates LLC. All rights reserved.
Licensed under AGPSL v5.0
This commit is contained in:
Karl Clinger
2026-03-21 00:30:23 -05:00
commit 81ad0b597c
106 changed files with 35984 additions and 0 deletions

View File

@@ -0,0 +1,787 @@
/*
Hybrid Backend - Container runtime using systemd-nspawn in boot mode with
kernel isolation for Volt hybrid-native workloads.
This backend extends the standard systemd-nspawn approach to support:
- Full boot mode (--boot) with optional custom kernel
- Cgroups v2 delegation for nested resource control
- Private /proc and /sys views
- User namespace isolation (--private-users)
- Landlock LSM policies (NEVER AppArmor)
- Seccomp profile selection
- Per-container resource limits
Uses systemd-nspawn as the underlying engine. NOT a custom runtime.
Copyright (c) Armored Gates LLC. All rights reserved.
*/
package hybrid
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"github.com/armoredgate/volt/pkg/backend"
"github.com/armoredgate/volt/pkg/kernel"
)
func init() {
backend.Register("hybrid", func() backend.ContainerBackend { return New() })
}
const (
defaultContainerBaseDir = "/var/lib/volt/containers"
defaultImageBaseDir = "/var/lib/volt/images"
defaultKernelDir = "/var/lib/volt/kernels"
unitPrefix = "volt-hybrid@"
unitDir = "/etc/systemd/system"
nspawnConfigDir = "/etc/systemd/nspawn"
)
// Backend implements backend.ContainerBackend using systemd-nspawn in boot
// mode with hybrid-native kernel isolation.
type Backend struct {
containerBaseDir string
imageBaseDir string
kernelManager *kernel.Manager
}
// New creates a new Hybrid backend with default paths.
func New() *Backend {
return &Backend{
containerBaseDir: defaultContainerBaseDir,
imageBaseDir: defaultImageBaseDir,
kernelManager: kernel.NewManager(defaultKernelDir),
}
}
// Name returns "hybrid".
func (b *Backend) Name() string { return "hybrid" }
// Available returns true if systemd-nspawn is installed and the kernel supports
// the features required for hybrid-native mode.
func (b *Backend) Available() bool {
if _, err := exec.LookPath("systemd-nspawn"); err != nil {
return false
}
// Verify the host kernel has required features. We don't fail hard here —
// just log a warning if validation cannot be performed (e.g. no config.gz).
results, err := kernel.ValidateHostKernel()
if err != nil {
// Cannot validate — assume available but warn at Init time.
return true
}
return kernel.AllFeaturesPresent(results)
}
// Init initializes the backend, optionally overriding the data directory.
func (b *Backend) Init(dataDir string) error {
if dataDir != "" {
b.containerBaseDir = filepath.Join(dataDir, "containers")
b.imageBaseDir = filepath.Join(dataDir, "images")
b.kernelManager = kernel.NewManager(filepath.Join(dataDir, "kernels"))
}
return b.kernelManager.Init()
}
// ── Capability flags ─────────────────────────────────────────────────────────
func (b *Backend) SupportsVMs() bool { return true }
func (b *Backend) SupportsServices() bool { return true }
func (b *Backend) SupportsNetworking() bool { return true }
func (b *Backend) SupportsTuning() bool { return true }
// ── Helpers ──────────────────────────────────────────────────────────────────
// unitName returns the systemd unit name for a hybrid container.
func unitName(name string) string {
return fmt.Sprintf("volt-hybrid@%s.service", name)
}
// unitFilePath returns the full path to a hybrid container's service unit file.
func unitFilePath(name string) string {
return filepath.Join(unitDir, unitName(name))
}
// containerDir returns the rootfs dir for a container.
func (b *Backend) containerDir(name string) string {
return filepath.Join(b.containerBaseDir, name)
}
// runCommand executes a command and returns combined output.
func runCommand(name string, args ...string) (string, error) {
cmd := exec.Command(name, args...)
out, err := cmd.CombinedOutput()
return strings.TrimSpace(string(out)), err
}
// runCommandSilent executes a command and returns stdout only.
func runCommandSilent(name string, args ...string) (string, error) {
cmd := exec.Command(name, args...)
out, err := cmd.Output()
return strings.TrimSpace(string(out)), err
}
// runCommandInteractive executes a command with stdin/stdout/stderr attached.
func runCommandInteractive(name string, args ...string) error {
cmd := exec.Command(name, args...)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
return cmd.Run()
}
// fileExists returns true if the file exists.
func fileExists(path string) bool {
_, err := os.Stat(path)
return err == nil
}
// dirExists returns true if the directory exists.
func dirExists(path string) bool {
info, err := os.Stat(path)
if err != nil {
return false
}
return info.IsDir()
}
// resolveImagePath resolves an --image value to a directory path.
func (b *Backend) resolveImagePath(img string) (string, error) {
if dirExists(img) {
return img, nil
}
normalized := strings.ReplaceAll(img, ":", "_")
candidates := []string{
filepath.Join(b.imageBaseDir, img),
filepath.Join(b.imageBaseDir, normalized),
}
for _, p := range candidates {
if dirExists(p) {
return p, nil
}
}
return "", fmt.Errorf("image %q not found (checked %s)", img, strings.Join(candidates, ", "))
}
// resolveContainerCommand resolves a bare command name to an absolute path
// inside the container's rootfs.
func (b *Backend) resolveContainerCommand(name, cmd string) string {
if strings.HasPrefix(cmd, "/") {
return cmd
}
rootfs := b.containerDir(name)
searchDirs := []string{
"usr/bin", "bin", "usr/sbin", "sbin",
"usr/local/bin", "usr/local/sbin",
}
for _, dir := range searchDirs {
candidate := filepath.Join(rootfs, dir, cmd)
if fileExists(candidate) {
return "/" + dir + "/" + cmd
}
}
return cmd
}
// isContainerRunning checks if a container is currently running.
func isContainerRunning(name string) bool {
out, err := runCommandSilent("machinectl", "show", name, "--property=State")
if err == nil && strings.Contains(out, "running") {
return true
}
out, err = runCommandSilent("systemctl", "is-active", unitName(name))
if err == nil && strings.TrimSpace(out) == "active" {
return true
}
return false
}
// getContainerLeaderPID returns the leader PID of a running container.
func getContainerLeaderPID(name string) (string, error) {
out, err := runCommandSilent("machinectl", "show", name, "--property=Leader")
if err == nil {
parts := strings.SplitN(out, "=", 2)
if len(parts) == 2 {
pid := strings.TrimSpace(parts[1])
if pid != "" && pid != "0" {
return pid, nil
}
}
}
out, err = runCommandSilent("systemctl", "show", unitName(name), "--property=MainPID")
if err == nil {
parts := strings.SplitN(out, "=", 2)
if len(parts) == 2 {
pid := strings.TrimSpace(parts[1])
if pid != "" && pid != "0" {
return pid, nil
}
}
}
return "", fmt.Errorf("no running PID found for container %q", name)
}
// daemonReload runs systemctl daemon-reload.
func daemonReload() error {
_, err := runCommand("systemctl", "daemon-reload")
return err
}
// ── Unit File Generation ─────────────────────────────────────────────────────
// writeUnitFile writes the systemd-nspawn service unit for a hybrid container.
// Uses --boot mode: the container boots with its own init (systemd or similar),
// providing private /proc and /sys views and full service management inside.
func (b *Backend) writeUnitFile(name string, iso *IsolationConfig, kernelPath string) error {
// Build the ExecStart command line.
var nspawnArgs []string
// Core boot-mode flags.
nspawnArgs = append(nspawnArgs,
"--quiet",
"--keep-unit",
"--boot",
"--machine="+name,
"--directory="+b.containerDir(name),
)
// Kernel-specific environment.
nspawnArgs = append(nspawnArgs,
"--setenv=VOLT_CONTAINER="+name,
"--setenv=VOLT_RUNTIME=hybrid",
)
if kernelPath != "" {
nspawnArgs = append(nspawnArgs, "--setenv=VOLT_KERNEL="+kernelPath)
}
// Isolation-specific nspawn args (resources, network, seccomp, user ns).
if iso != nil {
nspawnArgs = append(nspawnArgs, iso.NspawnArgs()...)
}
execStart := "/usr/bin/systemd-nspawn " + strings.Join(nspawnArgs, " ")
// Build property lines for the unit file.
var propertyLines string
if iso != nil {
for _, prop := range iso.Resources.SystemdProperties() {
propertyLines += fmt.Sprintf("# cgroup: %s\n", prop)
}
}
unit := fmt.Sprintf(`[Unit]
Description=Volt Hybrid Container: %%i
Documentation=https://volt.armoredgate.com/docs/hybrid
After=network.target
Requires=network.target
[Service]
Type=notify
NotifyAccess=all
%sExecStart=%s
KillMode=mixed
Restart=on-failure
RestartSec=5s
WatchdogSec=3min
Slice=volt-hybrid.slice
# Boot-mode containers send READY=1 when init is up
TimeoutStartSec=90s
[Install]
WantedBy=machines.target
`, propertyLines, execStart)
return os.WriteFile(unitFilePath(name), []byte(unit), 0644)
}
// ── Create ───────────────────────────────────────────────────────────────────
func (b *Backend) Create(opts backend.CreateOptions) error {
destDir := b.containerDir(opts.Name)
if dirExists(destDir) {
return fmt.Errorf("container %q already exists at %s", opts.Name, destDir)
}
fmt.Printf("Creating hybrid container: %s\n", opts.Name)
// Resolve image.
if opts.Image != "" {
srcDir, err := b.resolveImagePath(opts.Image)
if err != nil {
return fmt.Errorf("image resolution failed: %w", err)
}
fmt.Printf(" Image: %s → %s\n", opts.Image, srcDir)
if err := os.MkdirAll(b.containerBaseDir, 0755); err != nil {
return fmt.Errorf("failed to create container base dir: %w", err)
}
fmt.Printf(" Copying rootfs...\n")
out, err := runCommand("cp", "-a", srcDir, destDir)
if err != nil {
return fmt.Errorf("failed to copy image rootfs: %s", out)
}
} else {
if err := os.MkdirAll(destDir, 0755); err != nil {
return fmt.Errorf("failed to create container dir: %w", err)
}
}
// Resolve kernel.
kernelPath, err := b.kernelManager.ResolveKernel("") // default kernel
if err != nil {
fmt.Printf(" Warning: no kernel resolved (%v), boot mode may fail\n", err)
} else {
fmt.Printf(" Kernel: %s\n", kernelPath)
}
// Build isolation config from create options.
iso := DefaultIsolation(destDir)
// Apply resource overrides from create options.
if opts.Memory != "" {
iso.Resources.MemoryHard = opts.Memory
fmt.Printf(" Memory: %s\n", opts.Memory)
}
if opts.CPU > 0 {
// Map CPU count to a cpuset range.
iso.Resources.CPUSet = fmt.Sprintf("0-%d", opts.CPU-1)
fmt.Printf(" CPUs: %d\n", opts.CPU)
}
// Apply network configuration.
if opts.Network != "" {
switch NetworkMode(opts.Network) {
case NetworkPrivate, NetworkHost, NetworkNone:
iso.Network.Mode = NetworkMode(opts.Network)
default:
// Treat as bridge name.
iso.Network.Mode = NetworkPrivate
iso.Network.Bridge = opts.Network
}
fmt.Printf(" Network: %s\n", opts.Network)
}
// Add port forwards.
for _, pm := range opts.Ports {
proto := pm.Protocol
if proto == "" {
proto = "tcp"
}
iso.Network.PortForwards = append(iso.Network.PortForwards, PortForward{
HostPort: pm.HostPort,
ContainerPort: pm.ContainerPort,
Protocol: proto,
})
}
// Add environment variables.
for _, env := range opts.Env {
// These will be passed via --setenv in the unit file.
_ = env
}
// Mount volumes.
for _, vol := range opts.Volumes {
bindFlag := ""
if vol.ReadOnly {
bindFlag = "--bind-ro="
} else {
bindFlag = "--bind="
}
_ = bindFlag + vol.HostPath + ":" + vol.ContainerPath
}
// Write systemd unit file.
if err := b.writeUnitFile(opts.Name, iso, kernelPath); err != nil {
fmt.Printf(" Warning: could not write unit file: %v\n", err)
} else {
fmt.Printf(" Unit: %s\n", unitFilePath(opts.Name))
}
// Write .nspawn config file.
os.MkdirAll(nspawnConfigDir, 0755)
configPath := filepath.Join(nspawnConfigDir, opts.Name+".nspawn")
nspawnConfig := iso.NspawnConfigBlock(opts.Name)
if err := os.WriteFile(configPath, []byte(nspawnConfig), 0644); err != nil {
fmt.Printf(" Warning: could not write nspawn config: %v\n", err)
}
if err := daemonReload(); err != nil {
fmt.Printf(" Warning: daemon-reload failed: %v\n", err)
}
fmt.Printf("\nHybrid container %s created.\n", opts.Name)
if opts.Start {
fmt.Printf("Starting hybrid container %s...\n", opts.Name)
out, err := runCommand("systemctl", "start", unitName(opts.Name))
if err != nil {
return fmt.Errorf("failed to start container: %s", out)
}
fmt.Printf("Hybrid container %s started.\n", opts.Name)
} else {
fmt.Printf("Start with: volt container start %s\n", opts.Name)
}
return nil
}
// ── Start ────────────────────────────────────────────────────────────────────
func (b *Backend) Start(name string) error {
unitFile := unitFilePath(name)
if !fileExists(unitFile) {
return fmt.Errorf("container %q does not exist (no unit file at %s)", name, unitFile)
}
fmt.Printf("Starting hybrid container: %s\n", name)
out, err := runCommand("systemctl", "start", unitName(name))
if err != nil {
return fmt.Errorf("failed to start container %s: %s", name, out)
}
fmt.Printf("Hybrid container %s started.\n", name)
return nil
}
// ── Stop ─────────────────────────────────────────────────────────────────────
func (b *Backend) Stop(name string) error {
fmt.Printf("Stopping hybrid container: %s\n", name)
out, err := runCommand("systemctl", "stop", unitName(name))
if err != nil {
return fmt.Errorf("failed to stop container %s: %s", name, out)
}
fmt.Printf("Hybrid container %s stopped.\n", name)
return nil
}
// ── Delete ───────────────────────────────────────────────────────────────────
func (b *Backend) Delete(name string, force bool) error {
rootfs := b.containerDir(name)
unitActive, _ := runCommandSilent("systemctl", "is-active", unitName(name))
if strings.TrimSpace(unitActive) == "active" || strings.TrimSpace(unitActive) == "activating" {
if !force {
return fmt.Errorf("container %q is running — stop it first or use --force", name)
}
fmt.Printf("Stopping container %s...\n", name)
runCommand("systemctl", "stop", unitName(name))
}
fmt.Printf("Deleting hybrid container: %s\n", name)
// Remove unit file.
unitPath := unitFilePath(name)
if fileExists(unitPath) {
runCommand("systemctl", "disable", unitName(name))
if err := os.Remove(unitPath); err != nil {
fmt.Printf(" Warning: could not remove unit file: %v\n", err)
} else {
fmt.Printf(" Removed unit: %s\n", unitPath)
}
}
// Remove .nspawn config.
nspawnConfig := filepath.Join(nspawnConfigDir, name+".nspawn")
if fileExists(nspawnConfig) {
os.Remove(nspawnConfig)
}
// Remove rootfs.
if dirExists(rootfs) {
if err := os.RemoveAll(rootfs); err != nil {
return fmt.Errorf("failed to remove rootfs at %s: %w", rootfs, err)
}
fmt.Printf(" Removed rootfs: %s\n", rootfs)
}
daemonReload()
fmt.Printf("Hybrid container %s deleted.\n", name)
return nil
}
// ── Exec ─────────────────────────────────────────────────────────────────────
func (b *Backend) Exec(name string, opts backend.ExecOptions) error {
cmdArgs := opts.Command
if len(cmdArgs) == 0 {
cmdArgs = []string{"/bin/sh"}
}
// Resolve bare command names to absolute paths inside the container.
cmdArgs[0] = b.resolveContainerCommand(name, cmdArgs[0])
pid, err := getContainerLeaderPID(name)
if err != nil {
return fmt.Errorf("container %q is not running: %w", name, err)
}
// Use nsenter to join all namespaces of the running container.
nsenterArgs := []string{"-t", pid, "-m", "-u", "-i", "-n", "-p", "--"}
// Inject environment variables.
for _, env := range opts.Env {
nsenterArgs = append(nsenterArgs, "env", env)
}
nsenterArgs = append(nsenterArgs, cmdArgs...)
return runCommandInteractive("nsenter", nsenterArgs...)
}
// ── Logs ─────────────────────────────────────────────────────────────────────
func (b *Backend) Logs(name string, opts backend.LogOptions) (string, error) {
jArgs := []string{"-u", unitName(name), "--no-pager"}
if opts.Follow {
jArgs = append(jArgs, "-f")
}
if opts.Tail > 0 {
jArgs = append(jArgs, "-n", fmt.Sprintf("%d", opts.Tail))
} else {
jArgs = append(jArgs, "-n", "100")
}
if opts.Follow {
return "", runCommandInteractive("journalctl", jArgs...)
}
out, err := runCommand("journalctl", jArgs...)
return out, err
}
// ── CopyToContainer ──────────────────────────────────────────────────────────
func (b *Backend) CopyToContainer(name string, src string, dst string) error {
if !fileExists(src) && !dirExists(src) {
return fmt.Errorf("source not found: %s", src)
}
dstPath := filepath.Join(b.containerDir(name), dst)
out, err := runCommand("cp", "-a", src, dstPath)
if err != nil {
return fmt.Errorf("copy failed: %s", out)
}
fmt.Printf("Copied %s → %s:%s\n", src, name, dst)
return nil
}
// ── CopyFromContainer ────────────────────────────────────────────────────────
func (b *Backend) CopyFromContainer(name string, src string, dst string) error {
srcPath := filepath.Join(b.containerDir(name), src)
if !fileExists(srcPath) && !dirExists(srcPath) {
return fmt.Errorf("not found in container %s: %s", name, src)
}
out, err := runCommand("cp", "-a", srcPath, dst)
if err != nil {
return fmt.Errorf("copy failed: %s", out)
}
fmt.Printf("Copied %s:%s → %s\n", name, src, dst)
return nil
}
// ── List ─────────────────────────────────────────────────────────────────────
func (b *Backend) List() ([]backend.ContainerInfo, error) {
var containers []backend.ContainerInfo
seen := make(map[string]bool)
// Get running containers from machinectl.
out, err := runCommandSilent("machinectl", "list", "--no-pager", "--no-legend")
if err == nil && strings.TrimSpace(out) != "" {
for _, line := range strings.Split(out, "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
fields := strings.Fields(line)
if len(fields) == 0 {
continue
}
name := fields[0]
// Only include containers that belong to the hybrid backend.
if !b.isHybridContainer(name) {
continue
}
seen[name] = true
info := backend.ContainerInfo{
Name: name,
Status: "running",
RootFS: b.containerDir(name),
}
showOut, showErr := runCommandSilent("machinectl", "show", name,
"--property=Addresses", "--property=RootDirectory")
if showErr == nil {
for _, sl := range strings.Split(showOut, "\n") {
if strings.HasPrefix(sl, "Addresses=") {
addr := strings.TrimPrefix(sl, "Addresses=")
if addr != "" {
info.IPAddress = addr
}
}
}
}
rootfs := b.containerDir(name)
if osRel, osErr := os.ReadFile(filepath.Join(rootfs, "etc", "os-release")); osErr == nil {
for _, ol := range strings.Split(string(osRel), "\n") {
if strings.HasPrefix(ol, "PRETTY_NAME=") {
info.OS = strings.Trim(strings.TrimPrefix(ol, "PRETTY_NAME="), "\"")
break
}
}
}
containers = append(containers, info)
}
}
// Scan filesystem for stopped hybrid containers.
if entries, err := os.ReadDir(b.containerBaseDir); err == nil {
for _, entry := range entries {
if !entry.IsDir() {
continue
}
name := entry.Name()
if seen[name] {
continue
}
// Only include if it has a hybrid unit file.
if !b.isHybridContainer(name) {
continue
}
info := backend.ContainerInfo{
Name: name,
Status: "stopped",
RootFS: filepath.Join(b.containerBaseDir, name),
}
if osRel, err := os.ReadFile(filepath.Join(b.containerBaseDir, name, "etc", "os-release")); err == nil {
for _, ol := range strings.Split(string(osRel), "\n") {
if strings.HasPrefix(ol, "PRETTY_NAME=") {
info.OS = strings.Trim(strings.TrimPrefix(ol, "PRETTY_NAME="), "\"")
break
}
}
}
containers = append(containers, info)
}
}
return containers, nil
}
// isHybridContainer returns true if the named container has a hybrid unit file.
func (b *Backend) isHybridContainer(name string) bool {
return fileExists(unitFilePath(name))
}
// ── Inspect ──────────────────────────────────────────────────────────────────
func (b *Backend) Inspect(name string) (*backend.ContainerInfo, error) {
rootfs := b.containerDir(name)
info := &backend.ContainerInfo{
Name: name,
RootFS: rootfs,
Status: "stopped",
}
if !dirExists(rootfs) {
info.Status = "not found"
}
// Check if running.
unitActive, _ := runCommandSilent("systemctl", "is-active", unitName(name))
activeState := strings.TrimSpace(unitActive)
if activeState == "active" {
info.Status = "running"
} else if activeState != "" {
info.Status = activeState
}
// Get machinectl info if running.
if isContainerRunning(name) {
info.Status = "running"
showOut, err := runCommandSilent("machinectl", "show", name)
if err == nil {
for _, line := range strings.Split(showOut, "\n") {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "Addresses=") {
info.IPAddress = strings.TrimPrefix(line, "Addresses=")
}
if strings.HasPrefix(line, "Leader=") {
pidStr := strings.TrimPrefix(line, "Leader=")
fmt.Sscanf(pidStr, "%d", &info.PID)
}
}
}
}
// OS info from rootfs.
if osRel, err := os.ReadFile(filepath.Join(rootfs, "etc", "os-release")); err == nil {
for _, line := range strings.Split(string(osRel), "\n") {
if strings.HasPrefix(line, "PRETTY_NAME=") {
info.OS = strings.Trim(strings.TrimPrefix(line, "PRETTY_NAME="), "\"")
break
}
}
}
return info, nil
}
// ── Exported helpers for CLI commands ────────────────────────────────────────
// IsContainerRunning checks if a hybrid container is currently running.
func (b *Backend) IsContainerRunning(name string) bool {
return isContainerRunning(name)
}
// GetContainerLeaderPID returns the leader PID of a running hybrid container.
func (b *Backend) GetContainerLeaderPID(name string) (string, error) {
return getContainerLeaderPID(name)
}
// ContainerDir returns the rootfs dir for a container.
func (b *Backend) ContainerDir(name string) string {
return b.containerDir(name)
}
// KernelManager returns the kernel manager instance.
func (b *Backend) KernelManager() *kernel.Manager {
return b.kernelManager
}
// UnitName returns the systemd unit name for a hybrid container.
func UnitName(name string) string {
return unitName(name)
}
// UnitFilePath returns the full path to a hybrid container's service unit file.
func UnitFilePath(name string) string {
return unitFilePath(name)
}
// DaemonReload runs systemctl daemon-reload.
func DaemonReload() error {
return daemonReload()
}
// ResolveContainerCommand resolves a bare command to an absolute path in the container.
func (b *Backend) ResolveContainerCommand(name, cmd string) string {
return b.resolveContainerCommand(name, cmd)
}

View File

@@ -0,0 +1,366 @@
/*
Hybrid Isolation - Security and resource isolation for Volt hybrid-native containers.
Configures:
- Landlock LSM policy generation (NEVER AppArmor)
- Seccomp profile selection (strict/default/unconfined)
- Cgroups v2 resource limits (memory, CPU, I/O, PIDs)
- Network namespace setup (private network stack)
Copyright (c) Armored Gates LLC. All rights reserved.
*/
package hybrid
import (
"fmt"
"path/filepath"
"strings"
)
// ── Seccomp Profiles ─────────────────────────────────────────────────────────
// SeccompProfile selects the syscall filtering level for a container.
type SeccompProfile string
const (
// SeccompStrict blocks dangerous syscalls and limits the container to a
// safe subset. Suitable for untrusted workloads.
SeccompStrict SeccompProfile = "strict"
// SeccompDefault applies the systemd-nspawn default seccomp filter which
// blocks mount, reboot, kexec, and other admin syscalls.
SeccompDefault SeccompProfile = "default"
// SeccompUnconfined disables seccomp filtering entirely. Use only for
// trusted workloads that need full syscall access (e.g. nested containers).
SeccompUnconfined SeccompProfile = "unconfined"
)
// ── Landlock Policy ──────────────────────────────────────────────────────────
// LandlockAccess defines the bitfield of allowed filesystem operations.
// These mirror the LANDLOCK_ACCESS_FS_* constants from the kernel ABI.
type LandlockAccess uint64
const (
LandlockAccessFSExecute LandlockAccess = 1 << 0
LandlockAccessFSWriteFile LandlockAccess = 1 << 1
LandlockAccessFSReadFile LandlockAccess = 1 << 2
LandlockAccessFSReadDir LandlockAccess = 1 << 3
LandlockAccessFSRemoveDir LandlockAccess = 1 << 4
LandlockAccessFSRemoveFile LandlockAccess = 1 << 5
LandlockAccessFSMakeChar LandlockAccess = 1 << 6
LandlockAccessFSMakeDir LandlockAccess = 1 << 7
LandlockAccessFSMakeReg LandlockAccess = 1 << 8
LandlockAccessFSMakeSock LandlockAccess = 1 << 9
LandlockAccessFSMakeFifo LandlockAccess = 1 << 10
LandlockAccessFSMakeBlock LandlockAccess = 1 << 11
LandlockAccessFSMakeSym LandlockAccess = 1 << 12
LandlockAccessFSRefer LandlockAccess = 1 << 13
LandlockAccessFSTruncate LandlockAccess = 1 << 14
// Convenience combinations.
LandlockReadOnly = LandlockAccessFSReadFile | LandlockAccessFSReadDir
LandlockReadWrite = LandlockReadOnly | LandlockAccessFSWriteFile |
LandlockAccessFSMakeReg | LandlockAccessFSMakeDir |
LandlockAccessFSRemoveFile | LandlockAccessFSRemoveDir |
LandlockAccessFSTruncate
LandlockReadExec = LandlockReadOnly | LandlockAccessFSExecute
)
// LandlockRule maps a filesystem path to the permitted access mask.
type LandlockRule struct {
Path string
Access LandlockAccess
}
// LandlockPolicy is an ordered set of Landlock rules for a container.
type LandlockPolicy struct {
Rules []LandlockRule
}
// ServerPolicy returns a Landlock policy for server/service workloads.
// Allows execution from /usr and /lib, read-write to /app, /tmp, /var.
func ServerPolicy(rootfs string) *LandlockPolicy {
return &LandlockPolicy{
Rules: []LandlockRule{
{Path: filepath.Join(rootfs, "usr"), Access: LandlockReadExec},
{Path: filepath.Join(rootfs, "lib"), Access: LandlockReadOnly | LandlockAccessFSExecute},
{Path: filepath.Join(rootfs, "lib64"), Access: LandlockReadOnly | LandlockAccessFSExecute},
{Path: filepath.Join(rootfs, "bin"), Access: LandlockReadExec},
{Path: filepath.Join(rootfs, "sbin"), Access: LandlockReadExec},
{Path: filepath.Join(rootfs, "etc"), Access: LandlockReadOnly},
{Path: filepath.Join(rootfs, "app"), Access: LandlockReadWrite},
{Path: filepath.Join(rootfs, "tmp"), Access: LandlockReadWrite},
{Path: filepath.Join(rootfs, "var"), Access: LandlockReadWrite},
{Path: filepath.Join(rootfs, "run"), Access: LandlockReadWrite},
},
}
}
// DesktopPolicy returns a Landlock policy for desktop/interactive workloads.
// More permissive than ServerPolicy: full home access, /var write access.
func DesktopPolicy(rootfs string) *LandlockPolicy {
return &LandlockPolicy{
Rules: []LandlockRule{
{Path: filepath.Join(rootfs, "usr"), Access: LandlockReadExec},
{Path: filepath.Join(rootfs, "lib"), Access: LandlockReadOnly | LandlockAccessFSExecute},
{Path: filepath.Join(rootfs, "lib64"), Access: LandlockReadOnly | LandlockAccessFSExecute},
{Path: filepath.Join(rootfs, "bin"), Access: LandlockReadExec},
{Path: filepath.Join(rootfs, "sbin"), Access: LandlockReadExec},
{Path: filepath.Join(rootfs, "etc"), Access: LandlockReadWrite},
{Path: filepath.Join(rootfs, "home"), Access: LandlockReadWrite | LandlockAccessFSExecute},
{Path: filepath.Join(rootfs, "tmp"), Access: LandlockReadWrite},
{Path: filepath.Join(rootfs, "var"), Access: LandlockReadWrite},
{Path: filepath.Join(rootfs, "run"), Access: LandlockReadWrite},
{Path: filepath.Join(rootfs, "opt"), Access: LandlockReadExec},
},
}
}
// ── Cgroups v2 Resource Limits ───────────────────────────────────────────────
// ResourceLimits configures cgroups v2 resource constraints for a container.
type ResourceLimits struct {
// Memory limits (e.g. "512M", "2G"). Empty means unlimited.
MemoryHard string // memory.max — hard limit, OOM kill above this
MemorySoft string // memory.high — throttle above this (soft pressure)
// CPU limits.
CPUWeight int // cpu.weight (1-10000, default 100). Proportional share.
CPUSet string // cpuset.cpus (e.g. "0-3", "0,2"). Pin to specific cores.
// I/O limits.
IOWeight int // io.weight (1-10000, default 100). Proportional share.
// PID limit.
PIDsMax int // pids.max — maximum number of processes. 0 means unlimited.
}
// DefaultResourceLimits returns conservative defaults suitable for most workloads.
func DefaultResourceLimits() *ResourceLimits {
return &ResourceLimits{
MemoryHard: "2G",
MemorySoft: "1G",
CPUWeight: 100,
CPUSet: "", // no pinning
IOWeight: 100,
PIDsMax: 4096,
}
}
// SystemdProperties converts ResourceLimits into systemd unit properties
// suitable for passing to systemd-run or systemd-nspawn via --property=.
func (r *ResourceLimits) SystemdProperties() []string {
var props []string
// Cgroups v2 delegation is always enabled for hybrid containers.
props = append(props, "Delegate=yes")
if r.MemoryHard != "" {
props = append(props, fmt.Sprintf("MemoryMax=%s", r.MemoryHard))
}
if r.MemorySoft != "" {
props = append(props, fmt.Sprintf("MemoryHigh=%s", r.MemorySoft))
}
if r.CPUWeight > 0 {
props = append(props, fmt.Sprintf("CPUWeight=%d", r.CPUWeight))
}
if r.CPUSet != "" {
props = append(props, fmt.Sprintf("AllowedCPUs=%s", r.CPUSet))
}
if r.IOWeight > 0 {
props = append(props, fmt.Sprintf("IOWeight=%d", r.IOWeight))
}
if r.PIDsMax > 0 {
props = append(props, fmt.Sprintf("TasksMax=%d", r.PIDsMax))
}
return props
}
// ── Network Isolation ────────────────────────────────────────────────────────
// NetworkMode selects the container network configuration.
type NetworkMode string
const (
// NetworkPrivate creates a fully isolated network namespace with a veth
// pair connected to the host bridge (voltbr0). The container gets its own
// IP stack, routing table, and firewall rules.
NetworkPrivate NetworkMode = "private"
// NetworkHost shares the host network namespace. The container sees all
// host interfaces and ports. Use only for trusted system services.
NetworkHost NetworkMode = "host"
// NetworkNone creates an isolated network namespace with no external
// connectivity. Loopback only.
NetworkNone NetworkMode = "none"
)
// NetworkConfig holds the network isolation settings for a container.
type NetworkConfig struct {
Mode NetworkMode
Bridge string // bridge name for private mode (default: "voltbr0")
// PortForwards maps host ports to container ports when Mode is NetworkPrivate.
PortForwards []PortForward
// DNS servers to inject into the container's resolv.conf.
DNS []string
}
// PortForward maps a single host port to a container port.
type PortForward struct {
HostPort int
ContainerPort int
Protocol string // "tcp" or "udp"
}
// DefaultNetworkConfig returns a private-network configuration with the
// standard Volt bridge.
func DefaultNetworkConfig() *NetworkConfig {
return &NetworkConfig{
Mode: NetworkPrivate,
Bridge: "voltbr0",
DNS: []string{"1.1.1.1", "1.0.0.1"},
}
}
// NspawnNetworkArgs returns the systemd-nspawn arguments for this network
// configuration.
func (n *NetworkConfig) NspawnNetworkArgs() []string {
switch n.Mode {
case NetworkPrivate:
args := []string{"--network-bridge=" + n.Bridge}
for _, pf := range n.PortForwards {
proto := pf.Protocol
if proto == "" {
proto = "tcp"
}
args = append(args, fmt.Sprintf("--port=%s:%d:%d", proto, pf.HostPort, pf.ContainerPort))
}
return args
case NetworkHost:
return nil // no network flags = share host namespace
case NetworkNone:
return []string{"--private-network"}
default:
return []string{"--network-bridge=voltbr0"}
}
}
// ── Isolation Profile ────────────────────────────────────────────────────────
// IsolationConfig combines all isolation settings for a hybrid container.
type IsolationConfig struct {
Landlock *LandlockPolicy
Seccomp SeccompProfile
Resources *ResourceLimits
Network *NetworkConfig
// PrivateUsers enables user namespace isolation (--private-users).
PrivateUsers bool
// ReadOnlyFS mounts the rootfs as read-only (--read-only).
ReadOnlyFS bool
}
// DefaultIsolation returns a security-first isolation configuration suitable
// for production workloads.
func DefaultIsolation(rootfs string) *IsolationConfig {
return &IsolationConfig{
Landlock: ServerPolicy(rootfs),
Seccomp: SeccompDefault,
Resources: DefaultResourceLimits(),
Network: DefaultNetworkConfig(),
PrivateUsers: true,
ReadOnlyFS: false,
}
}
// NspawnArgs returns the complete set of systemd-nspawn arguments for this
// isolation configuration. These are appended to the base nspawn command.
func (iso *IsolationConfig) NspawnArgs() []string {
var args []string
// Resource limits and cgroup delegation via --property.
for _, prop := range iso.Resources.SystemdProperties() {
args = append(args, "--property="+prop)
}
// Seccomp profile.
switch iso.Seccomp {
case SeccompStrict:
// systemd-nspawn applies its default filter automatically.
// For strict mode we add --capability=drop-all to further limit.
args = append(args, "--drop-capability=all")
case SeccompDefault:
// Use nspawn's built-in seccomp filter — no extra flags needed.
case SeccompUnconfined:
// Disable the built-in seccomp filter for trusted workloads.
args = append(args, "--system-call-filter=~")
}
// Network isolation.
args = append(args, iso.Network.NspawnNetworkArgs()...)
// User namespace isolation.
if iso.PrivateUsers {
args = append(args, "--private-users=pick")
}
// Read-only rootfs.
if iso.ReadOnlyFS {
args = append(args, "--read-only")
}
return args
}
// NspawnConfigBlock returns the .nspawn file content sections for this
// isolation configuration. Written to /etc/systemd/nspawn/<name>.nspawn.
func (iso *IsolationConfig) NspawnConfigBlock(name string) string {
var b strings.Builder
// [Exec] section
b.WriteString("[Exec]\n")
b.WriteString("Boot=yes\n")
b.WriteString("PrivateUsers=")
if iso.PrivateUsers {
b.WriteString("pick\n")
} else {
b.WriteString("no\n")
}
// Environment setup.
b.WriteString(fmt.Sprintf("Environment=VOLT_CONTAINER=%s\n", name))
b.WriteString("Environment=VOLT_RUNTIME=hybrid\n")
b.WriteString("\n")
// [Network] section
b.WriteString("[Network]\n")
switch iso.Network.Mode {
case NetworkPrivate:
b.WriteString(fmt.Sprintf("Bridge=%s\n", iso.Network.Bridge))
case NetworkNone:
b.WriteString("Private=yes\n")
case NetworkHost:
// No network section needed for host mode.
}
b.WriteString("\n")
// [ResourceControl] section (selected limits for the .nspawn file).
b.WriteString("[ResourceControl]\n")
if iso.Resources.MemoryHard != "" {
b.WriteString(fmt.Sprintf("MemoryMax=%s\n", iso.Resources.MemoryHard))
}
if iso.Resources.PIDsMax > 0 {
b.WriteString(fmt.Sprintf("TasksMax=%d\n", iso.Resources.PIDsMax))
}
return b.String()
}