nixos/modules/system/ollama-rocm.nix

# Ollama ROCm Module (System)
# Provides: Ollama LLM server with AMD ROCm GPU passthrough as system container
#
# Usage:
#   myModules.ollamaRocm = {
#     enable = true;
#   };

{
  config,
  lib,
  pkgs,
  ...
}:

let
  cfg = config.myModules.ollamaRocm;
in
{
  options.myModules.ollamaRocm = {
    enable = lib.mkEnableOption "Ollama with ROCm GPU passthrough (System Service)";

    image = lib.mkOption {
      type = lib.types.str;
      default = "docker.io/ollama/ollama:rocm";
      description = "Ollama ROCm container image";
    };

    dataDir = lib.mkOption {
      type = lib.types.str;
      default = "/var/lib/ollama";
      description = "Path to Ollama data directory (models, etc.)";
    };

    port = lib.mkOption {
      type = lib.types.port;
      default = 11434;
      description = "Ollama API port";
    };

    hsaGfxVersion = lib.mkOption {
      type = lib.types.str;
      default = "12.0.1";
      description = "HSA_OVERRIDE_GFX_VERSION for AMD GPU compatibility";
    };

    # Note: For system podman, usually we don't need group permissions if running as root,
    # but passing devices needs strictly correct flags.
    # We will assume root execution for simplicity and GPU access.

    keepAlive = lib.mkOption {
      type = lib.types.str;
      default = "5m";
      description = "Duration to keep model in memory (e.g. 5m, 1h). Set to 0 to unload immediately.";
    };

    # Performance Tuning
    numParallel = lib.mkOption {
      type = lib.types.int;
      default = 1;
      description = "OLLAMA_NUM_PARALLEL: Concurrent requests (keep low for speed)";
    };

    maxLoadedModels = lib.mkOption {
      type = lib.types.int;
      default = 1;
      description = "OLLAMA_MAX_LOADED_MODELS: Max models in memory";
    };

    numThreads = lib.mkOption {
      type = lib.types.nullOr lib.types.int;
      default = 6; # Optimized for Ryzen 5600X (physical cores)
      description = "OLLAMA_NUM_THREADS: CPU threads for inference";
    };

    processPriority = lib.mkOption {
      type = lib.types.int;
      default = -10;
      description = "Systemd Nice priority (lower is higher priority, range -20 to 19)";
    };
  };

  config = lib.mkIf cfg.enable {
    # Ensure data directory exists
    systemd.tmpfiles.rules = [
      "d ${cfg.dataDir} 0755 root root -"
    ];

    systemd.services.ollama = {
      description = "Ollama ROCm Container (System)";
      after = [ "network-online.target" ];
      wants = [ "network-online.target" ];

      serviceConfig = {
        Restart = "always";
        Nice = cfg.processPriority;

        # Hardening
        ProtectSystem = "full";
        ProtectHome = false;
        PrivateTmp = true;
        ProtectKernelTunables = false; # Needed for Podman (BPF, etc)
        ProtectControlGroups = false; # Podman needs cgroups
        ProtectKernelModules = true;

        # Allow Podman to write to state and data
        ReadWritePaths = [
          "/var/lib/containers"
          "/run" # Podman needs to write to sockets and runtime dirs in /run
          "/etc/containers" # Network configs live here
          cfg.dataDir
        ];

        # ExecStartPre to cleanup old container and create net if needed.
        # Note: 'podman' in system context sees system containers/networks.
        ExecStartPre = [
          "-${pkgs.podman}/bin/podman stop ollama"
          "-${pkgs.podman}/bin/podman rm ollama"
          "-${pkgs.podman}/bin/podman network rm antigravity-net"
          "${pkgs.podman}/bin/podman network create antigravity-net --ignore"

          # Fix permission issue where /var/lib/ollama is a symlink to /var/lib/private/ollama
          # which is not accessible by the subuid user (200000).
          (pkgs.writeShellScript "ollama-pre-start" ''
            DATA_DIR="${cfg.dataDir}"

            # Check if it is a symlink
            if [ -L "$DATA_DIR" ]; then
              echo "Detected symlink at $DATA_DIR. Removing and converting to directory..."
              TARGET=$(readlink -f "$DATA_DIR")
              rm "$DATA_DIR"
              mkdir -p "$DATA_DIR"

              # If the target existed and has data, copy it back (optional, but safe)
              if [ -d "$TARGET" ]; then
                echo "Restoring data from $TARGET..."
                cp -r "$TARGET"/* "$DATA_DIR/" || true
              fi
            else
              mkdir -p "$DATA_DIR"
            fi

            # Fix ownership for UserNS (container user maps to host UID 200000)
            ${pkgs.coreutils}/bin/chown -R 200000:200000 "$DATA_DIR"
            ${pkgs.coreutils}/bin/chmod 0755 "$DATA_DIR"
          '')
        ];
        ExecStart = ''
          ${pkgs.podman}/bin/podman run --rm --name ollama \
            --network=antigravity-net \
            --network-alias=ollama \
            --dns=8.8.8.8 \
            --device=/dev/kfd \
            --device=/dev/dri \
            --userns=auto \
            -e HSA_OVERRIDE_GFX_VERSION=${cfg.hsaGfxVersion} \
            -e OLLAMA_HOST=0.0.0.0 \
            -e OLLAMA_ORIGINS="*" \
            -e OLLAMA_KEEP_ALIVE=${cfg.keepAlive} \
            -e OLLAMA_NUM_PARALLEL=${toString cfg.numParallel} \
            -e OLLAMA_MAX_LOADED_MODELS=${toString cfg.maxLoadedModels} \
            ${
              lib.optionalString (cfg.numThreads != null) "-e OLLAMA_NUM_THREADS=${toString cfg.numThreads}"
            } \
            -v ${cfg.dataDir}:/root/.ollama:U \
            -p 127.0.0.1:${toString cfg.port}:11434 \
            ${cfg.image}
        '';
        ExecStop = "${pkgs.podman}/bin/podman stop ollama";
      };

      wantedBy = [ "multi-user.target" ];
    };
  };
}