nixos/modules/system/ollama-rocm.nix
2026-01-14 21:24:19 +01:00

175 lines
5.5 KiB
Nix

# Ollama ROCm Module (System)
# Provides: Ollama LLM server with AMD ROCm GPU passthrough as system container
#
# Usage:
# myModules.ollamaRocm = {
# enable = true;
# };
{
config,
lib,
pkgs,
...
}:
let
cfg = config.myModules.ollamaRocm;
in
{
options.myModules.ollamaRocm = {
enable = lib.mkEnableOption "Ollama with ROCm GPU passthrough (System Service)";
image = lib.mkOption {
type = lib.types.str;
default = "docker.io/ollama/ollama:rocm";
description = "Ollama ROCm container image";
};
dataDir = lib.mkOption {
type = lib.types.str;
default = "/var/lib/ollama";
description = "Path to Ollama data directory (models, etc.)";
};
port = lib.mkOption {
type = lib.types.port;
default = 11434;
description = "Ollama API port";
};
hsaGfxVersion = lib.mkOption {
type = lib.types.str;
default = "12.0.1";
description = "HSA_OVERRIDE_GFX_VERSION for AMD GPU compatibility";
};
# Note: For system podman, usually we don't need group permissions if running as root,
# but passing devices needs strictly correct flags.
# We will assume root execution for simplicity and GPU access.
keepAlive = lib.mkOption {
type = lib.types.str;
default = "5m";
description = "Duration to keep model in memory (e.g. 5m, 1h). Set to 0 to unload immediately.";
};
# Performance Tuning
numParallel = lib.mkOption {
type = lib.types.int;
default = 1;
description = "OLLAMA_NUM_PARALLEL: Concurrent requests (keep low for speed)";
};
maxLoadedModels = lib.mkOption {
type = lib.types.int;
default = 1;
description = "OLLAMA_MAX_LOADED_MODELS: Max models in memory";
};
numThreads = lib.mkOption {
type = lib.types.nullOr lib.types.int;
default = 6; # Optimized for Ryzen 5600X (physical cores)
description = "OLLAMA_NUM_THREADS: CPU threads for inference";
};
processPriority = lib.mkOption {
type = lib.types.int;
default = -10;
description = "Systemd Nice priority (lower is higher priority, range -20 to 19)";
};
};
config = lib.mkIf cfg.enable {
# Ensure data directory exists
systemd.tmpfiles.rules = [
"d ${cfg.dataDir} 0755 root root -"
];
systemd.services.ollama = {
description = "Ollama ROCm Container (System)";
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
serviceConfig = {
Restart = "always";
Nice = cfg.processPriority;
# Hardening
ProtectSystem = "full";
ProtectHome = false;
PrivateTmp = true;
ProtectKernelTunables = false; # Needed for Podman (BPF, etc)
ProtectControlGroups = false; # Podman needs cgroups
ProtectKernelModules = true;
# Allow Podman to write to state and data
ReadWritePaths = [
"/var/lib/containers"
"/run" # Podman needs to write to sockets and runtime dirs in /run
"/etc/containers" # Network configs live here
cfg.dataDir
];
# ExecStartPre to cleanup old container and create net if needed.
# Note: 'podman' in system context sees system containers/networks.
ExecStartPre = [
"-${pkgs.podman}/bin/podman stop ollama"
"-${pkgs.podman}/bin/podman rm ollama"
"-${pkgs.podman}/bin/podman network rm antigravity-net"
"${pkgs.podman}/bin/podman network create antigravity-net --ignore"
# Fix permission issue where /var/lib/ollama is a symlink to /var/lib/private/ollama
# which is not accessible by the subuid user (200000).
(pkgs.writeShellScript "ollama-pre-start" ''
DATA_DIR="${cfg.dataDir}"
# Check if it is a symlink
if [ -L "$DATA_DIR" ]; then
echo "Detected symlink at $DATA_DIR. Removing and converting to directory..."
TARGET=$(readlink -f "$DATA_DIR")
rm "$DATA_DIR"
mkdir -p "$DATA_DIR"
# If the target existed and has data, copy it back (optional, but safe)
if [ -d "$TARGET" ]; then
echo "Restoring data from $TARGET..."
cp -r "$TARGET"/* "$DATA_DIR/" || true
fi
else
mkdir -p "$DATA_DIR"
fi
# Fix ownership for UserNS (container user maps to host UID 200000)
${pkgs.coreutils}/bin/chown -R 200000:200000 "$DATA_DIR"
${pkgs.coreutils}/bin/chmod 0755 "$DATA_DIR"
'')
];
ExecStart = ''
${pkgs.podman}/bin/podman run --rm --name ollama \
--network=antigravity-net \
--network-alias=ollama \
--dns=8.8.8.8 \
--device=/dev/kfd \
--device=/dev/dri \
--userns=auto \
-e HSA_OVERRIDE_GFX_VERSION=${cfg.hsaGfxVersion} \
-e OLLAMA_HOST=0.0.0.0 \
-e OLLAMA_ORIGINS="*" \
-e OLLAMA_KEEP_ALIVE=${cfg.keepAlive} \
-e OLLAMA_NUM_PARALLEL=${toString cfg.numParallel} \
-e OLLAMA_MAX_LOADED_MODELS=${toString cfg.maxLoadedModels} \
${
lib.optionalString (cfg.numThreads != null) "-e OLLAMA_NUM_THREADS=${toString cfg.numThreads}"
} \
-v ${cfg.dataDir}:/root/.ollama:U \
-p 127.0.0.1:${toString cfg.port}:11434 \
${cfg.image}
'';
ExecStop = "${pkgs.podman}/bin/podman stop ollama";
};
wantedBy = [ "multi-user.target" ];
};
};
}