init
This commit is contained in:
commit
2be8de47fa
87 changed files with 11501 additions and 0 deletions
175
modules/system/ollama-rocm.nix
Normal file
175
modules/system/ollama-rocm.nix
Normal file
|
|
@ -0,0 +1,175 @@
|
|||
# Ollama ROCm Module (System)
|
||||
# Provides: Ollama LLM server with AMD ROCm GPU passthrough as system container
|
||||
#
|
||||
# Usage:
|
||||
# myModules.ollamaRocm = {
|
||||
# enable = true;
|
||||
# };
|
||||
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
|
||||
let
|
||||
cfg = config.myModules.ollamaRocm;
|
||||
in
|
||||
{
|
||||
options.myModules.ollamaRocm = {
|
||||
enable = lib.mkEnableOption "Ollama with ROCm GPU passthrough (System Service)";
|
||||
|
||||
image = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = "docker.io/ollama/ollama:rocm";
|
||||
description = "Ollama ROCm container image";
|
||||
};
|
||||
|
||||
dataDir = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = "/var/lib/ollama";
|
||||
description = "Path to Ollama data directory (models, etc.)";
|
||||
};
|
||||
|
||||
port = lib.mkOption {
|
||||
type = lib.types.port;
|
||||
default = 11434;
|
||||
description = "Ollama API port";
|
||||
};
|
||||
|
||||
hsaGfxVersion = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = "12.0.1";
|
||||
description = "HSA_OVERRIDE_GFX_VERSION for AMD GPU compatibility";
|
||||
};
|
||||
|
||||
# Note: For system podman, usually we don't need group permissions if running as root,
|
||||
# but passing devices needs strictly correct flags.
|
||||
# We will assume root execution for simplicity and GPU access.
|
||||
|
||||
keepAlive = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = "5m";
|
||||
description = "Duration to keep model in memory (e.g. 5m, 1h). Set to 0 to unload immediately.";
|
||||
};
|
||||
|
||||
# Performance Tuning
|
||||
numParallel = lib.mkOption {
|
||||
type = lib.types.int;
|
||||
default = 1;
|
||||
description = "OLLAMA_NUM_PARALLEL: Concurrent requests (keep low for speed)";
|
||||
};
|
||||
|
||||
maxLoadedModels = lib.mkOption {
|
||||
type = lib.types.int;
|
||||
default = 1;
|
||||
description = "OLLAMA_MAX_LOADED_MODELS: Max models in memory";
|
||||
};
|
||||
|
||||
numThreads = lib.mkOption {
|
||||
type = lib.types.nullOr lib.types.int;
|
||||
default = 6; # Optimized for Ryzen 5600X (physical cores)
|
||||
description = "OLLAMA_NUM_THREADS: CPU threads for inference";
|
||||
};
|
||||
|
||||
processPriority = lib.mkOption {
|
||||
type = lib.types.int;
|
||||
default = -10;
|
||||
description = "Systemd Nice priority (lower is higher priority, range -20 to 19)";
|
||||
};
|
||||
};
|
||||
|
||||
config = lib.mkIf cfg.enable {
|
||||
# Ensure data directory exists
|
||||
systemd.tmpfiles.rules = [
|
||||
"d ${cfg.dataDir} 0755 root root -"
|
||||
];
|
||||
|
||||
systemd.services.ollama = {
|
||||
description = "Ollama ROCm Container (System)";
|
||||
after = [ "network-online.target" ];
|
||||
wants = [ "network-online.target" ];
|
||||
|
||||
serviceConfig = {
|
||||
Restart = "always";
|
||||
Nice = cfg.processPriority;
|
||||
|
||||
# Hardening
|
||||
ProtectSystem = "full";
|
||||
ProtectHome = false;
|
||||
PrivateTmp = true;
|
||||
ProtectKernelTunables = false; # Needed for Podman (BPF, etc)
|
||||
ProtectControlGroups = false; # Podman needs cgroups
|
||||
ProtectKernelModules = true;
|
||||
|
||||
# Allow Podman to write to state and data
|
||||
ReadWritePaths = [
|
||||
"/var/lib/containers"
|
||||
"/run" # Podman needs to write to sockets and runtime dirs in /run
|
||||
"/etc/containers" # Network configs live here
|
||||
cfg.dataDir
|
||||
];
|
||||
|
||||
# ExecStartPre to cleanup old container and create net if needed.
|
||||
# Note: 'podman' in system context sees system containers/networks.
|
||||
ExecStartPre = [
|
||||
"-${pkgs.podman}/bin/podman stop ollama"
|
||||
"-${pkgs.podman}/bin/podman rm ollama"
|
||||
"-${pkgs.podman}/bin/podman network rm antigravity-net"
|
||||
"${pkgs.podman}/bin/podman network create antigravity-net --ignore"
|
||||
|
||||
# Fix permission issue where /var/lib/ollama is a symlink to /var/lib/private/ollama
|
||||
# which is not accessible by the subuid user (200000).
|
||||
(pkgs.writeShellScript "ollama-pre-start" ''
|
||||
DATA_DIR="${cfg.dataDir}"
|
||||
|
||||
# Check if it is a symlink
|
||||
if [ -L "$DATA_DIR" ]; then
|
||||
echo "Detected symlink at $DATA_DIR. Removing and converting to directory..."
|
||||
TARGET=$(readlink -f "$DATA_DIR")
|
||||
rm "$DATA_DIR"
|
||||
mkdir -p "$DATA_DIR"
|
||||
|
||||
# If the target existed and has data, copy it back (optional, but safe)
|
||||
if [ -d "$TARGET" ]; then
|
||||
echo "Restoring data from $TARGET..."
|
||||
cp -r "$TARGET"/* "$DATA_DIR/" || true
|
||||
fi
|
||||
else
|
||||
mkdir -p "$DATA_DIR"
|
||||
fi
|
||||
|
||||
# Fix ownership for UserNS (container user maps to host UID 200000)
|
||||
${pkgs.coreutils}/bin/chown -R 200000:200000 "$DATA_DIR"
|
||||
${pkgs.coreutils}/bin/chmod 0755 "$DATA_DIR"
|
||||
'')
|
||||
];
|
||||
ExecStart = ''
|
||||
${pkgs.podman}/bin/podman run --rm --name ollama \
|
||||
--network=antigravity-net \
|
||||
--network-alias=ollama \
|
||||
--dns=8.8.8.8 \
|
||||
--device=/dev/kfd \
|
||||
--device=/dev/dri \
|
||||
--userns=auto \
|
||||
-e HSA_OVERRIDE_GFX_VERSION=${cfg.hsaGfxVersion} \
|
||||
-e OLLAMA_HOST=0.0.0.0 \
|
||||
-e OLLAMA_ORIGINS="*" \
|
||||
-e OLLAMA_KEEP_ALIVE=${cfg.keepAlive} \
|
||||
-e OLLAMA_NUM_PARALLEL=${toString cfg.numParallel} \
|
||||
-e OLLAMA_MAX_LOADED_MODELS=${toString cfg.maxLoadedModels} \
|
||||
${
|
||||
lib.optionalString (cfg.numThreads != null) "-e OLLAMA_NUM_THREADS=${toString cfg.numThreads}"
|
||||
} \
|
||||
-v ${cfg.dataDir}:/root/.ollama:U \
|
||||
-p 127.0.0.1:${toString cfg.port}:11434 \
|
||||
${cfg.image}
|
||||
'';
|
||||
ExecStop = "${pkgs.podman}/bin/podman stop ollama";
|
||||
};
|
||||
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
};
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue