From d4436fe7f38a59ed702e52c159fa701685a5d2b3 Mon Sep 17 00:00:00 2001 From: Geir Okkenhaug Jerstad Date: Sat, 14 Jun 2025 09:38:10 +0200 Subject: [PATCH] tweaks to ollama --- machines/grey-area/services/ollama.nix | 79 +---- modules/services/ollama.nix | 433 ------------------------- 2 files changed, 3 insertions(+), 509 deletions(-) delete mode 100644 modules/services/ollama.nix diff --git a/machines/grey-area/services/ollama.nix b/machines/grey-area/services/ollama.nix index 0d02a7b..7b847be 100644 --- a/machines/grey-area/services/ollama.nix +++ b/machines/grey-area/services/ollama.nix @@ -32,7 +32,7 @@ OLLAMA_MAX_QUEUE = "256"; # Enable debug logging initially for troubleshooting - OLLAMA_DEBUG = "1"; + OLLAMA_DEBUG = "0"; }; openFirewall = true; # Set to true if you want to allow external access @@ -44,88 +44,15 @@ # Apply resource limits using systemd overrides systemd.services.ollama = { serviceConfig = { - MemoryMax = "12G"; - CPUQuota = "75%"; + MemoryMax = "20G"; + CPUQuota = "800%"; }; }; - # Optional: Create a simple web interface using a lightweight tool - # This could be added later if desired for easier model management - # Add useful packages for AI development environment.systemPackages = with pkgs; [ # CLI clients for testing curl jq - - # Python packages for AI development (optional) - (python3.withPackages (ps: - with ps; [ - requests - openai # For OpenAI-compatible API testing - ])) ]; - - # Create a simple script for testing Ollama - environment.etc."ollama-test.sh" = { - text = '' - #!/usr/bin/env bash - # Simple test script for Ollama service - - echo "Testing Ollama service..." - - # Test basic connectivity - if curl -s http://localhost:11434/api/tags >/dev/null; then - echo "✓ Ollama API is responding" - else - echo "✗ Ollama API is not responding" - exit 1 - fi - - # List available models - echo "Available models:" - curl -s http://localhost:11434/api/tags | jq -r '.models[]?.name // "No models found"' - - # Simple generation test if models are available - if curl -s http://localhost:11434/api/tags | jq -e '.models | length > 0' >/dev/null; then - echo "Testing text generation..." - model=$(curl -s http://localhost:11434/api/tags | jq -r '.models[0].name') - response=$(curl -s -X POST http://localhost:11434/api/generate \ - -H "Content-Type: application/json" \ - -d "{\"model\": \"$model\", \"prompt\": \"Hello, world!\", \"stream\": false}" | \ - jq -r '.response // "No response"') - echo "Response from $model: $response" - else - echo "No models available for testing" - fi - ''; - mode = "0755"; - }; - - # Firewall rule comments for documentation - # To enable external access later, you would: - # 1. Set services.homelab-ollama.openFirewall = true; - # 2. Or configure a reverse proxy (recommended for production) - - # Example reverse proxy configuration (commented out): - /* - services.nginx = { - enable = true; - virtualHosts."ollama.grey-area.lan" = { - listen = [ - { addr = "0.0.0.0"; port = 8080; } - ]; - locations."/" = { - proxyPass = "http://127.0.0.1:11434"; - proxyWebsockets = true; - extraConfig = '' - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - ''; - }; - }; - }; - */ } diff --git a/modules/services/ollama.nix b/modules/services/ollama.nix deleted file mode 100644 index d67d5ba..0000000 --- a/modules/services/ollama.nix +++ /dev/null @@ -1,433 +0,0 @@ -# NixOS Ollama Service Configuration -# -# This module provides a comprehensive Ollama service configuration for the home lab. -# Ollama is a tool for running large language models locally with an OpenAI-compatible API. -# -# Features: -# - Secure service isolation with dedicated user -# - Configurable network binding (localhost by default for security) -# - Resource management and monitoring -# - Integration with existing NixOS infrastructure -# - Optional GPU acceleration support -# - Comprehensive logging and monitoring -{ - config, - lib, - pkgs, - ... -}: -with lib; let - cfg = config.services.homelab-ollama; -in { - options.services.homelab-ollama = { - enable = mkEnableOption "Ollama local LLM service for home lab"; - - package = mkOption { - type = types.package; - default = pkgs.ollama; - description = "The Ollama package to use"; - }; - - host = mkOption { - type = types.str; - default = "127.0.0.1"; - description = '' - The host address to bind to. Use "0.0.0.0" to allow external access. - Default is localhost for security. - ''; - }; - - port = mkOption { - type = types.port; - default = 11434; - description = "The port to bind to"; - }; - - dataDir = mkOption { - type = types.path; - default = "/var/lib/ollama"; - description = "Directory to store Ollama data including models"; - }; - - user = mkOption { - type = types.str; - default = "ollama"; - description = "User account under which Ollama runs"; - }; - - group = mkOption { - type = types.str; - default = "ollama"; - description = "Group under which Ollama runs"; - }; - - environmentVariables = mkOption { - type = types.attrsOf types.str; - default = {}; - description = '' - Environment variables for the Ollama service. - Common variables: - - OLLAMA_ORIGINS: Allowed origins for CORS (default: http://localhost,http://127.0.0.1) - - OLLAMA_CONTEXT_LENGTH: Context window size (default: 2048) - - OLLAMA_NUM_PARALLEL: Number of parallel requests (default: 1) - - OLLAMA_MAX_QUEUE: Maximum queued requests (default: 512) - - OLLAMA_DEBUG: Enable debug logging (default: false) - - OLLAMA_MODELS: Model storage directory - ''; - example = { - OLLAMA_ORIGINS = "http://localhost,http://127.0.0.1,http://grey-area.lan"; - OLLAMA_CONTEXT_LENGTH = "4096"; - OLLAMA_DEBUG = "1"; - }; - }; - - models = mkOption { - type = types.listOf types.str; - default = []; - description = '' - List of models to automatically download on service start. - Models will be pulled using 'ollama pull '. - - Popular models: - - "llama3.3:8b" - Meta's latest Llama model (8B parameters) - - "mistral:7b" - Mistral AI's efficient model - - "codellama:7b" - Code-focused model - - "gemma2:9b" - Google's Gemma model - - "qwen2.5:7b" - Multilingual model with good coding - - Note: Models are large (4-32GB each). Ensure adequate storage. - ''; - example = ["llama3.3:8b" "codellama:7b" "mistral:7b"]; - }; - - openFirewall = mkOption { - type = types.bool; - default = false; - description = '' - Whether to open the firewall for the Ollama service. - Only enable if you need external access to the API. - ''; - }; - - enableGpuAcceleration = mkOption { - type = types.bool; - default = false; - description = '' - Enable GPU acceleration for model inference. - Requires compatible GPU and drivers (NVIDIA CUDA or AMD ROCm). - - For NVIDIA: Ensure nvidia-docker and nvidia-container-toolkit are configured. - For AMD: Ensure ROCm is installed and configured. - ''; - }; - - resourceLimits = { - maxMemory = mkOption { - type = types.nullOr types.str; - default = null; - description = '' - Maximum memory usage for the Ollama service (systemd MemoryMax). - Use suffixes like "8G", "16G", etc. - Set to null for no limit. - ''; - example = "16G"; - }; - - maxCpuPercent = mkOption { - type = types.nullOr types.int; - default = null; - description = '' - Maximum CPU usage percentage (systemd CPUQuota). - Value between 1-100. Set to null for no limit. - ''; - example = 80; - }; - }; - - backup = { - enable = mkOption { - type = types.bool; - default = false; - description = "Enable automatic backup of custom models and configuration"; - }; - - destination = mkOption { - type = types.str; - default = "/backup/ollama"; - description = "Backup destination directory"; - }; - - schedule = mkOption { - type = types.str; - default = "daily"; - description = "Backup schedule (systemd timer format)"; - }; - }; - - monitoring = { - enable = mkOption { - type = types.bool; - default = true; - description = "Enable monitoring and health checks"; - }; - - healthCheckInterval = mkOption { - type = types.str; - default = "30s"; - description = "Health check interval"; - }; - }; - }; - - config = mkIf cfg.enable { - # Ensure the Ollama package is available in the system - environment.systemPackages = [cfg.package]; - - # User and group configuration - users.users.${cfg.user} = { - isSystemUser = true; - group = cfg.group; - home = cfg.dataDir; - createHome = true; - description = "Ollama service user"; - shell = pkgs.bash; - }; - - users.groups.${cfg.group} = {}; - - # GPU support configuration - hardware.opengl = mkIf cfg.enableGpuAcceleration { - enable = true; - driSupport = true; - driSupport32Bit = true; - }; - - # NVIDIA GPU support - services.xserver.videoDrivers = mkIf (cfg.enableGpuAcceleration && config.hardware.nvidia.modesetting.enable) ["nvidia"]; - - # AMD GPU support - systemd.packages = mkIf (cfg.enableGpuAcceleration && config.hardware.amdgpu.opencl.enable) [pkgs.rocmPackages.clr]; - - # Main Ollama service - systemd.services.ollama = { - description = "Ollama Local LLM Service"; - wantedBy = ["multi-user.target"]; - after = ["network-online.target"]; - wants = ["network-online.target"]; - - environment = - { - OLLAMA_HOST = "${cfg.host}:${toString cfg.port}"; - OLLAMA_MODELS = "${cfg.dataDir}/models"; - OLLAMA_RUNNERS_DIR = "${cfg.dataDir}/runners"; - } - // cfg.environmentVariables; - - serviceConfig = { - Type = "simple"; - ExecStart = "${cfg.package}/bin/ollama serve"; - User = cfg.user; - Group = cfg.group; - Restart = "always"; - RestartSec = "3"; - - # Security hardening - NoNewPrivileges = true; - ProtectSystem = "strict"; - ProtectHome = true; - PrivateTmp = true; - PrivateDevices = mkIf (!cfg.enableGpuAcceleration) true; - ProtectHostname = true; - ProtectClock = true; - ProtectKernelTunables = true; - ProtectKernelModules = true; - ProtectKernelLogs = true; - ProtectControlGroups = true; - RestrictAddressFamilies = ["AF_UNIX" "AF_INET" "AF_INET6"]; - RestrictNamespaces = true; - LockPersonality = true; - RestrictRealtime = true; - RestrictSUIDSGID = true; - RemoveIPC = true; - - # Resource limits - MemoryMax = mkIf (cfg.resourceLimits.maxMemory != null) cfg.resourceLimits.maxMemory; - CPUQuota = mkIf (cfg.resourceLimits.maxCpuPercent != null) "${toString cfg.resourceLimits.maxCpuPercent}%"; - - # File system access - ReadWritePaths = [cfg.dataDir]; - StateDirectory = "ollama"; - CacheDirectory = "ollama"; - LogsDirectory = "ollama"; - - # GPU access for NVIDIA - SupplementaryGroups = mkIf (cfg.enableGpuAcceleration && config.hardware.nvidia.modesetting.enable) ["video" "render"]; - - # For AMD GPU access, allow access to /dev/dri - DeviceAllow = mkIf (cfg.enableGpuAcceleration && config.hardware.amdgpu.opencl.enable) [ - "/dev/dri" - "/dev/kfd rw" - ]; - }; - - # Ensure data directory exists with correct permissions - preStart = '' - mkdir -p ${cfg.dataDir}/{models,runners} - chown -R ${cfg.user}:${cfg.group} ${cfg.dataDir} - chmod 755 ${cfg.dataDir} - ''; - }; - - # Model download service (runs after ollama is up) - systemd.services.ollama-model-download = mkIf (cfg.models != []) { - description = "Download Ollama Models"; - wantedBy = ["multi-user.target"]; - after = ["ollama.service"]; - wants = ["ollama.service"]; - - environment = { - OLLAMA_HOST = "${cfg.host}:${toString cfg.port}"; - }; - - serviceConfig = { - Type = "oneshot"; - User = cfg.user; - Group = cfg.group; - RemainAfterExit = true; - TimeoutStartSec = "30min"; # Models can be large - }; - - script = '' - # Wait for Ollama to be ready - echo "Waiting for Ollama service to be ready..." - while ! ${cfg.package}/bin/ollama list >/dev/null 2>&1; do - sleep 2 - done - - echo "Ollama is ready. Downloading configured models..." - ${concatMapStringsSep "\n" (model: '' - echo "Downloading model: ${model}" - if ! ${cfg.package}/bin/ollama list | grep -q "^${model}"; then - ${cfg.package}/bin/ollama pull "${model}" - else - echo "Model ${model} already exists, skipping download" - fi - '') - cfg.models} - - echo "Model download completed" - ''; - }; - - # Health check service - systemd.services.ollama-health-check = mkIf cfg.monitoring.enable { - description = "Ollama Health Check"; - serviceConfig = { - Type = "oneshot"; - User = cfg.user; - Group = cfg.group; - ExecStart = pkgs.writeShellScript "ollama-health-check" '' - # Basic health check - verify API is responding - if ! ${pkgs.curl}/bin/curl -f -s "http://${cfg.host}:${toString cfg.port}/api/tags" >/dev/null; then - echo "Ollama health check failed - API not responding" - exit 1 - fi - - # Check if we can list models - if ! ${cfg.package}/bin/ollama list >/dev/null 2>&1; then - echo "Ollama health check failed - cannot list models" - exit 1 - fi - - echo "Ollama health check passed" - ''; - }; - }; - - # Health check timer - systemd.timers.ollama-health-check = mkIf cfg.monitoring.enable { - description = "Ollama Health Check Timer"; - wantedBy = ["timers.target"]; - timerConfig = { - OnBootSec = "5min"; - OnUnitActiveSec = cfg.monitoring.healthCheckInterval; - Persistent = true; - }; - }; - - # Backup service - systemd.services.ollama-backup = mkIf cfg.backup.enable { - description = "Backup Ollama Data"; - serviceConfig = { - Type = "oneshot"; - User = "root"; # Need root for backup operations - ExecStart = pkgs.writeShellScript "ollama-backup" '' - mkdir -p "${cfg.backup.destination}" - - # Backup custom models and configuration (excluding large standard models) - echo "Starting Ollama backup to ${cfg.backup.destination}" - - # Create timestamped backup - backup_dir="${cfg.backup.destination}/$(date +%Y%m%d_%H%M%S)" - mkdir -p "$backup_dir" - - # Backup configuration and custom content - if [ -d "${cfg.dataDir}" ]; then - # Only backup manifests and small configuration files, not the large model blobs - find "${cfg.dataDir}" -name "*.json" -o -name "*.yaml" -o -name "*.txt" | \ - ${pkgs.rsync}/bin/rsync -av --files-from=- / "$backup_dir/" - fi - - # Keep only last 7 backups - find "${cfg.backup.destination}" -maxdepth 1 -type d -name "????????_??????" | \ - sort -r | tail -n +8 | xargs -r rm -rf - - echo "Ollama backup completed" - ''; - }; - }; - - # Backup timer - systemd.timers.ollama-backup = mkIf cfg.backup.enable { - description = "Ollama Backup Timer"; - wantedBy = ["timers.target"]; - timerConfig = { - OnCalendar = cfg.backup.schedule; - Persistent = true; - }; - }; - - # Firewall configuration - networking.firewall = mkIf cfg.openFirewall { - allowedTCPPorts = [cfg.port]; - }; - - # Log rotation - services.logrotate.settings.ollama = { - files = ["/var/log/ollama/*.log"]; - frequency = "daily"; - rotate = 7; - compress = true; - delaycompress = true; - missingok = true; - notifempty = true; - create = "644 ${cfg.user} ${cfg.group}"; - }; - - # Add helpful aliases - environment.shellAliases = { - ollama-status = "systemctl status ollama"; - ollama-logs = "journalctl -u ollama -f"; - ollama-models = "${cfg.package}/bin/ollama list"; - ollama-pull = "${cfg.package}/bin/ollama pull"; - ollama-run = "${cfg.package}/bin/ollama run"; - }; - - # Ensure proper permissions for model directory - systemd.tmpfiles.rules = [ - "d ${cfg.dataDir} 0755 ${cfg.user} ${cfg.group} -" - "d ${cfg.dataDir}/models 0755 ${cfg.user} ${cfg.group} -" - "d ${cfg.dataDir}/runners 0755 ${cfg.user} ${cfg.group} -" - ]; - }; -}