175 lines
5.5 KiB
Nix
175 lines
5.5 KiB
Nix
# Ollama ROCm Module (System)
|
|
# Provides: Ollama LLM server with AMD ROCm GPU passthrough as system container
|
|
#
|
|
# Usage:
|
|
# myModules.ollamaRocm = {
|
|
# enable = true;
|
|
# };
|
|
|
|
{
|
|
config,
|
|
lib,
|
|
pkgs,
|
|
...
|
|
}:
|
|
|
|
let
|
|
cfg = config.myModules.ollamaRocm;
|
|
in
|
|
{
|
|
options.myModules.ollamaRocm = {
|
|
enable = lib.mkEnableOption "Ollama with ROCm GPU passthrough (System Service)";
|
|
|
|
image = lib.mkOption {
|
|
type = lib.types.str;
|
|
default = "docker.io/ollama/ollama:rocm";
|
|
description = "Ollama ROCm container image";
|
|
};
|
|
|
|
dataDir = lib.mkOption {
|
|
type = lib.types.str;
|
|
default = "/var/lib/ollama";
|
|
description = "Path to Ollama data directory (models, etc.)";
|
|
};
|
|
|
|
port = lib.mkOption {
|
|
type = lib.types.port;
|
|
default = 11434;
|
|
description = "Ollama API port";
|
|
};
|
|
|
|
hsaGfxVersion = lib.mkOption {
|
|
type = lib.types.str;
|
|
default = "12.0.1";
|
|
description = "HSA_OVERRIDE_GFX_VERSION for AMD GPU compatibility";
|
|
};
|
|
|
|
# Note: For system podman, usually we don't need group permissions if running as root,
|
|
# but passing devices needs strictly correct flags.
|
|
# We will assume root execution for simplicity and GPU access.
|
|
|
|
keepAlive = lib.mkOption {
|
|
type = lib.types.str;
|
|
default = "5m";
|
|
description = "Duration to keep model in memory (e.g. 5m, 1h). Set to 0 to unload immediately.";
|
|
};
|
|
|
|
# Performance Tuning
|
|
numParallel = lib.mkOption {
|
|
type = lib.types.int;
|
|
default = 1;
|
|
description = "OLLAMA_NUM_PARALLEL: Concurrent requests (keep low for speed)";
|
|
};
|
|
|
|
maxLoadedModels = lib.mkOption {
|
|
type = lib.types.int;
|
|
default = 1;
|
|
description = "OLLAMA_MAX_LOADED_MODELS: Max models in memory";
|
|
};
|
|
|
|
numThreads = lib.mkOption {
|
|
type = lib.types.nullOr lib.types.int;
|
|
default = 6; # Optimized for Ryzen 5600X (physical cores)
|
|
description = "OLLAMA_NUM_THREADS: CPU threads for inference";
|
|
};
|
|
|
|
processPriority = lib.mkOption {
|
|
type = lib.types.int;
|
|
default = -10;
|
|
description = "Systemd Nice priority (lower is higher priority, range -20 to 19)";
|
|
};
|
|
};
|
|
|
|
config = lib.mkIf cfg.enable {
|
|
# Ensure data directory exists
|
|
systemd.tmpfiles.rules = [
|
|
"d ${cfg.dataDir} 0755 root root -"
|
|
];
|
|
|
|
systemd.services.ollama = {
|
|
description = "Ollama ROCm Container (System)";
|
|
after = [ "network-online.target" ];
|
|
wants = [ "network-online.target" ];
|
|
|
|
serviceConfig = {
|
|
Restart = "always";
|
|
Nice = cfg.processPriority;
|
|
|
|
# Hardening
|
|
ProtectSystem = "full";
|
|
ProtectHome = false;
|
|
PrivateTmp = true;
|
|
ProtectKernelTunables = false; # Needed for Podman (BPF, etc)
|
|
ProtectControlGroups = false; # Podman needs cgroups
|
|
ProtectKernelModules = true;
|
|
|
|
# Allow Podman to write to state and data
|
|
ReadWritePaths = [
|
|
"/var/lib/containers"
|
|
"/run" # Podman needs to write to sockets and runtime dirs in /run
|
|
"/etc/containers" # Network configs live here
|
|
cfg.dataDir
|
|
];
|
|
|
|
# ExecStartPre to cleanup old container and create net if needed.
|
|
# Note: 'podman' in system context sees system containers/networks.
|
|
ExecStartPre = [
|
|
"-${pkgs.podman}/bin/podman stop ollama"
|
|
"-${pkgs.podman}/bin/podman rm ollama"
|
|
"-${pkgs.podman}/bin/podman network rm antigravity-net"
|
|
"${pkgs.podman}/bin/podman network create antigravity-net --ignore"
|
|
|
|
# Fix permission issue where /var/lib/ollama is a symlink to /var/lib/private/ollama
|
|
# which is not accessible by the subuid user (200000).
|
|
(pkgs.writeShellScript "ollama-pre-start" ''
|
|
DATA_DIR="${cfg.dataDir}"
|
|
|
|
# Check if it is a symlink
|
|
if [ -L "$DATA_DIR" ]; then
|
|
echo "Detected symlink at $DATA_DIR. Removing and converting to directory..."
|
|
TARGET=$(readlink -f "$DATA_DIR")
|
|
rm "$DATA_DIR"
|
|
mkdir -p "$DATA_DIR"
|
|
|
|
# If the target existed and has data, copy it back (optional, but safe)
|
|
if [ -d "$TARGET" ]; then
|
|
echo "Restoring data from $TARGET..."
|
|
cp -r "$TARGET"/* "$DATA_DIR/" || true
|
|
fi
|
|
else
|
|
mkdir -p "$DATA_DIR"
|
|
fi
|
|
|
|
# Fix ownership for UserNS (container user maps to host UID 200000)
|
|
${pkgs.coreutils}/bin/chown -R 200000:200000 "$DATA_DIR"
|
|
${pkgs.coreutils}/bin/chmod 0755 "$DATA_DIR"
|
|
'')
|
|
];
|
|
ExecStart = ''
|
|
${pkgs.podman}/bin/podman run --rm --name ollama \
|
|
--network=antigravity-net \
|
|
--network-alias=ollama \
|
|
--dns=8.8.8.8 \
|
|
--device=/dev/kfd \
|
|
--device=/dev/dri \
|
|
--userns=auto \
|
|
-e HSA_OVERRIDE_GFX_VERSION=${cfg.hsaGfxVersion} \
|
|
-e OLLAMA_HOST=0.0.0.0 \
|
|
-e OLLAMA_ORIGINS="*" \
|
|
-e OLLAMA_KEEP_ALIVE=${cfg.keepAlive} \
|
|
-e OLLAMA_NUM_PARALLEL=${toString cfg.numParallel} \
|
|
-e OLLAMA_MAX_LOADED_MODELS=${toString cfg.maxLoadedModels} \
|
|
${
|
|
lib.optionalString (cfg.numThreads != null) "-e OLLAMA_NUM_THREADS=${toString cfg.numThreads}"
|
|
} \
|
|
-v ${cfg.dataDir}:/root/.ollama:U \
|
|
-p 127.0.0.1:${toString cfg.port}:11434 \
|
|
${cfg.image}
|
|
'';
|
|
ExecStop = "${pkgs.podman}/bin/podman stop ollama";
|
|
};
|
|
|
|
wantedBy = [ "multi-user.target" ];
|
|
};
|
|
};
|
|
}
|