tweaks to ollama

2025-06-14 09:38:10 +02:00 · 2025-06-14 09:38:10 +02:00 · d4436fe7f3
commit d4436fe7f3
parent fe96f9fb7c
2 changed files with 3 additions and 509 deletions
--- a/machines/grey-area/services/ollama.nix
+++ b/machines/grey-area/services/ollama.nix
@ -32,7 +32,7 @@
      OLLAMA_MAX_QUEUE = "256";
      # Enable debug logging initially for troubleshooting
-      OLLAMA_DEBUG = "1";
+      OLLAMA_DEBUG = "0";
    };
    openFirewall = true; # Set to true if you want to allow external access
@ -44,88 +44,15 @@
  # Apply resource limits using systemd overrides
  systemd.services.ollama = {
    serviceConfig = {
-      MemoryMax = "12G";
+      MemoryMax = "20G";
-      CPUQuota = "75%";
+      CPUQuota = "800%";
    };
  };
  # Optional: Create a simple web interface using a lightweight tool
  # This could be added later if desired for easier model management
  # Add useful packages for AI development
  environment.systemPackages = with pkgs; [
    # CLI clients for testing
    curl
    jq
    # Python packages for AI development (optional)
    (python3.withPackages (ps:
      with ps; [
        requests
        openai # For OpenAI-compatible API testing
      ]))
  ];
  # Create a simple script for testing Ollama
  environment.etc."ollama-test.sh" = {
    text = ''
      #!/usr/bin/env bash
      # Simple test script for Ollama service
      echo "Testing Ollama service..."
      # Test basic connectivity
      if curl -s http://localhost:11434/api/tags >/dev/null; then
        echo "✓ Ollama API is responding"
      else
        echo "✗ Ollama API is not responding"
        exit 1
      fi
      # List available models
      echo "Available models:"
      curl -s http://localhost:11434/api/tags | jq -r '.models[]?.name // "No models found"'
      # Simple generation test if models are available
      if curl -s http://localhost:11434/api/tags | jq -e '.models | length > 0' >/dev/null; then
        echo "Testing text generation..."
        model=$(curl -s http://localhost:11434/api/tags | jq -r '.models[0].name')
        response=$(curl -s -X POST http://localhost:11434/api/generate \
          -H "Content-Type: application/json" \
          -d "{\"model\": \"$model\", \"prompt\": \"Hello, world!\", \"stream\": false}" | \
          jq -r '.response // "No response"')
        echo "Response from $model: $response"
      else
        echo "No models available for testing"
      fi
    '';
    mode = "0755";
  };
  # Firewall rule comments for documentation
  # To enable external access later, you would:
  # 1. Set services.homelab-ollama.openFirewall = true;
  # 2. Or configure a reverse proxy (recommended for production)
  # Example reverse proxy configuration (commented out):
  /*
  services.nginx = {
    enable = true;
    virtualHosts."ollama.grey-area.lan" = {
      listen = [
        { addr = "0.0.0.0"; port = 8080; }
      ];
      locations."/" = {
        proxyPass = "http://127.0.0.1:11434";
        proxyWebsockets = true;
        extraConfig = ''
          proxy_set_header Host $host;
          proxy_set_header X-Real-IP $remote_addr;
          proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
          proxy_set_header X-Forwarded-Proto $scheme;
        '';
      };
    };
  };
  */
 }
--- a/modules/services/ollama.nix
+++ b/modules/services/ollama.nix
@ -1,433 +0,0 @@
 # NixOS Ollama Service Configuration
 #
 # This module provides a comprehensive Ollama service configuration for the home lab.
 # Ollama is a tool for running large language models locally with an OpenAI-compatible API.
 #
 # Features:
 # - Secure service isolation with dedicated user
 # - Configurable network binding (localhost by default for security)
 # - Resource management and monitoring
 # - Integration with existing NixOS infrastructure
 # - Optional GPU acceleration support
 # - Comprehensive logging and monitoring
 {
  config,
  lib,
  pkgs,
  ...
 }:
 with lib; let
  cfg = config.services.homelab-ollama;
 in {
  options.services.homelab-ollama = {
    enable = mkEnableOption "Ollama local LLM service for home lab";
    package = mkOption {
      type = types.package;
      default = pkgs.ollama;
      description = "The Ollama package to use";
    };
    host = mkOption {
      type = types.str;
      default = "127.0.0.1";
      description = ''
        The host address to bind to. Use "0.0.0.0" to allow external access.
        Default is localhost for security.
      '';
    };
    port = mkOption {
      type = types.port;
      default = 11434;
      description = "The port to bind to";
    };
    dataDir = mkOption {
      type = types.path;
      default = "/var/lib/ollama";
      description = "Directory to store Ollama data including models";
    };
    user = mkOption {
      type = types.str;
      default = "ollama";
      description = "User account under which Ollama runs";
    };
    group = mkOption {
      type = types.str;
      default = "ollama";
      description = "Group under which Ollama runs";
    };
    environmentVariables = mkOption {
      type = types.attrsOf types.str;
      default = {};
      description = ''
        Environment variables for the Ollama service.
        Common variables:
        - OLLAMA_ORIGINS: Allowed origins for CORS (default: http://localhost,http://127.0.0.1)
        - OLLAMA_CONTEXT_LENGTH: Context window size (default: 2048)
        - OLLAMA_NUM_PARALLEL: Number of parallel requests (default: 1)
        - OLLAMA_MAX_QUEUE: Maximum queued requests (default: 512)
        - OLLAMA_DEBUG: Enable debug logging (default: false)
        - OLLAMA_MODELS: Model storage directory
      '';
      example = {
        OLLAMA_ORIGINS = "http://localhost,http://127.0.0.1,http://grey-area.lan";
        OLLAMA_CONTEXT_LENGTH = "4096";
        OLLAMA_DEBUG = "1";
      };
    };
    models = mkOption {
      type = types.listOf types.str;
      default = [];
      description = ''
        List of models to automatically download on service start.
        Models will be pulled using 'ollama pull <model>'.
        Popular models:
        - "llama3.3:8b" - Meta's latest Llama model (8B parameters)
        - "mistral:7b" - Mistral AI's efficient model
        - "codellama:7b" - Code-focused model
        - "gemma2:9b" - Google's Gemma model
        - "qwen2.5:7b" - Multilingual model with good coding
        Note: Models are large (4-32GB each). Ensure adequate storage.
      '';
      example = ["llama3.3:8b" "codellama:7b" "mistral:7b"];
    };
    openFirewall = mkOption {
      type = types.bool;
      default = false;
      description = ''
        Whether to open the firewall for the Ollama service.
        Only enable if you need external access to the API.
      '';
    };
    enableGpuAcceleration = mkOption {
      type = types.bool;
      default = false;
      description = ''
        Enable GPU acceleration for model inference.
        Requires compatible GPU and drivers (NVIDIA CUDA or AMD ROCm).
        For NVIDIA: Ensure nvidia-docker and nvidia-container-toolkit are configured.
        For AMD: Ensure ROCm is installed and configured.
      '';
    };
    resourceLimits = {
      maxMemory = mkOption {
        type = types.nullOr types.str;
        default = null;
        description = ''
          Maximum memory usage for the Ollama service (systemd MemoryMax).
          Use suffixes like "8G", "16G", etc.
          Set to null for no limit.
        '';
        example = "16G";
      };
      maxCpuPercent = mkOption {
        type = types.nullOr types.int;
        default = null;
        description = ''
          Maximum CPU usage percentage (systemd CPUQuota).
          Value between 1-100. Set to null for no limit.
        '';
        example = 80;
      };
    };
    backup = {
      enable = mkOption {
        type = types.bool;
        default = false;
        description = "Enable automatic backup of custom models and configuration";
      };
      destination = mkOption {
        type = types.str;
        default = "/backup/ollama";
        description = "Backup destination directory";
      };
      schedule = mkOption {
        type = types.str;
        default = "daily";
        description = "Backup schedule (systemd timer format)";
      };
    };
    monitoring = {
      enable = mkOption {
        type = types.bool;
        default = true;
        description = "Enable monitoring and health checks";
      };
      healthCheckInterval = mkOption {
        type = types.str;
        default = "30s";
        description = "Health check interval";
      };
    };
  };
  config = mkIf cfg.enable {
    # Ensure the Ollama package is available in the system
    environment.systemPackages = [cfg.package];
    # User and group configuration
    users.users.${cfg.user} = {
      isSystemUser = true;
      group = cfg.group;
      home = cfg.dataDir;
      createHome = true;
      description = "Ollama service user";
      shell = pkgs.bash;
    };
    users.groups.${cfg.group} = {};
    # GPU support configuration
    hardware.opengl = mkIf cfg.enableGpuAcceleration {
      enable = true;
      driSupport = true;
      driSupport32Bit = true;
    };
    # NVIDIA GPU support
    services.xserver.videoDrivers = mkIf (cfg.enableGpuAcceleration && config.hardware.nvidia.modesetting.enable) ["nvidia"];
    # AMD GPU support
    systemd.packages = mkIf (cfg.enableGpuAcceleration && config.hardware.amdgpu.opencl.enable) [pkgs.rocmPackages.clr];
    # Main Ollama service
    systemd.services.ollama = {
      description = "Ollama Local LLM Service";
      wantedBy = ["multi-user.target"];
      after = ["network-online.target"];
      wants = ["network-online.target"];
      environment =
        {
          OLLAMA_HOST = "${cfg.host}:${toString cfg.port}";
          OLLAMA_MODELS = "${cfg.dataDir}/models";
          OLLAMA_RUNNERS_DIR = "${cfg.dataDir}/runners";
        }
        // cfg.environmentVariables;
      serviceConfig = {
        Type = "simple";
        ExecStart = "${cfg.package}/bin/ollama serve";
        User = cfg.user;
        Group = cfg.group;
        Restart = "always";
        RestartSec = "3";
        # Security hardening
        NoNewPrivileges = true;
        ProtectSystem = "strict";
        ProtectHome = true;
        PrivateTmp = true;
        PrivateDevices = mkIf (!cfg.enableGpuAcceleration) true;
        ProtectHostname = true;
        ProtectClock = true;
        ProtectKernelTunables = true;
        ProtectKernelModules = true;
        ProtectKernelLogs = true;
        ProtectControlGroups = true;
        RestrictAddressFamilies = ["AF_UNIX" "AF_INET" "AF_INET6"];
        RestrictNamespaces = true;
        LockPersonality = true;
        RestrictRealtime = true;
        RestrictSUIDSGID = true;
        RemoveIPC = true;
        # Resource limits
        MemoryMax = mkIf (cfg.resourceLimits.maxMemory != null) cfg.resourceLimits.maxMemory;
        CPUQuota = mkIf (cfg.resourceLimits.maxCpuPercent != null) "${toString cfg.resourceLimits.maxCpuPercent}%";
        # File system access
        ReadWritePaths = [cfg.dataDir];
        StateDirectory = "ollama";
        CacheDirectory = "ollama";
        LogsDirectory = "ollama";
        # GPU access for NVIDIA
        SupplementaryGroups = mkIf (cfg.enableGpuAcceleration && config.hardware.nvidia.modesetting.enable) ["video" "render"];
        # For AMD GPU access, allow access to /dev/dri
        DeviceAllow = mkIf (cfg.enableGpuAcceleration && config.hardware.amdgpu.opencl.enable) [
          "/dev/dri"
          "/dev/kfd rw"
        ];
      };
      # Ensure data directory exists with correct permissions
      preStart = ''
        mkdir -p ${cfg.dataDir}/{models,runners}
        chown -R ${cfg.user}:${cfg.group} ${cfg.dataDir}
        chmod 755 ${cfg.dataDir}
      '';
    };
    # Model download service (runs after ollama is up)
    systemd.services.ollama-model-download = mkIf (cfg.models != []) {
      description = "Download Ollama Models";
      wantedBy = ["multi-user.target"];
      after = ["ollama.service"];
      wants = ["ollama.service"];
      environment = {
        OLLAMA_HOST = "${cfg.host}:${toString cfg.port}";
      };
      serviceConfig = {
        Type = "oneshot";
        User = cfg.user;
        Group = cfg.group;
        RemainAfterExit = true;
        TimeoutStartSec = "30min"; # Models can be large
      };
      script = ''
        # Wait for Ollama to be ready
        echo "Waiting for Ollama service to be ready..."
        while ! ${cfg.package}/bin/ollama list >/dev/null 2>&1; do
          sleep 2
        done
        echo "Ollama is ready. Downloading configured models..."
        ${concatMapStringsSep "\n" (model: ''
            echo "Downloading model: ${model}"
            if ! ${cfg.package}/bin/ollama list | grep -q "^${model}"; then
              ${cfg.package}/bin/ollama pull "${model}"
            else
              echo "Model ${model} already exists, skipping download"
            fi
          '')
          cfg.models}
        echo "Model download completed"
      '';
    };
    # Health check service
    systemd.services.ollama-health-check = mkIf cfg.monitoring.enable {
      description = "Ollama Health Check";
      serviceConfig = {
        Type = "oneshot";
        User = cfg.user;
        Group = cfg.group;
        ExecStart = pkgs.writeShellScript "ollama-health-check" ''
          # Basic health check - verify API is responding
          if ! ${pkgs.curl}/bin/curl -f -s "http://${cfg.host}:${toString cfg.port}/api/tags" >/dev/null; then
            echo "Ollama health check failed - API not responding"
            exit 1
          fi
          # Check if we can list models
          if ! ${cfg.package}/bin/ollama list >/dev/null 2>&1; then
            echo "Ollama health check failed - cannot list models"
            exit 1
          fi
          echo "Ollama health check passed"
        '';
      };
    };
    # Health check timer
    systemd.timers.ollama-health-check = mkIf cfg.monitoring.enable {
      description = "Ollama Health Check Timer";
      wantedBy = ["timers.target"];
      timerConfig = {
        OnBootSec = "5min";
        OnUnitActiveSec = cfg.monitoring.healthCheckInterval;
        Persistent = true;
      };
    };
    # Backup service
    systemd.services.ollama-backup = mkIf cfg.backup.enable {
      description = "Backup Ollama Data";
      serviceConfig = {
        Type = "oneshot";
        User = "root"; # Need root for backup operations
        ExecStart = pkgs.writeShellScript "ollama-backup" ''
          mkdir -p "${cfg.backup.destination}"
          # Backup custom models and configuration (excluding large standard models)
          echo "Starting Ollama backup to ${cfg.backup.destination}"
          # Create timestamped backup
          backup_dir="${cfg.backup.destination}/$(date +%Y%m%d_%H%M%S)"
          mkdir -p "$backup_dir"
          # Backup configuration and custom content
          if [ -d "${cfg.dataDir}" ]; then
            # Only backup manifests and small configuration files, not the large model blobs
            find "${cfg.dataDir}" -name "*.json" -o -name "*.yaml" -o -name "*.txt" | \
              ${pkgs.rsync}/bin/rsync -av --files-from=- / "$backup_dir/"
          fi
          # Keep only last 7 backups
          find "${cfg.backup.destination}" -maxdepth 1 -type d -name "????????_??????" | \
            sort -r | tail -n +8 | xargs -r rm -rf
          echo "Ollama backup completed"
        '';
      };
    };
    # Backup timer
    systemd.timers.ollama-backup = mkIf cfg.backup.enable {
      description = "Ollama Backup Timer";
      wantedBy = ["timers.target"];
      timerConfig = {
        OnCalendar = cfg.backup.schedule;
        Persistent = true;
      };
    };
    # Firewall configuration
    networking.firewall = mkIf cfg.openFirewall {
      allowedTCPPorts = [cfg.port];
    };
    # Log rotation
    services.logrotate.settings.ollama = {
      files = ["/var/log/ollama/*.log"];
      frequency = "daily";
      rotate = 7;
      compress = true;
      delaycompress = true;
      missingok = true;
      notifempty = true;
      create = "644 ${cfg.user} ${cfg.group}";
    };
    # Add helpful aliases
    environment.shellAliases = {
      ollama-status = "systemctl status ollama";
      ollama-logs = "journalctl -u ollama -f";
      ollama-models = "${cfg.package}/bin/ollama list";
      ollama-pull = "${cfg.package}/bin/ollama pull";
      ollama-run = "${cfg.package}/bin/ollama run";
    };
    # Ensure proper permissions for model directory
    systemd.tmpfiles.rules = [
      "d ${cfg.dataDir} 0755 ${cfg.user} ${cfg.group} -"
      "d ${cfg.dataDir}/models 0755 ${cfg.user} ${cfg.group} -"
      "d ${cfg.dataDir}/runners 0755 ${cfg.user} ${cfg.group} -"
    ];
  };
 }