diff --git a/.aider.conf.yml b/.aider.conf.yml deleted file mode 100644 index 8451bd6..0000000 --- a/.aider.conf.yml +++ /dev/null @@ -1,99 +0,0 @@ -########################################################## -# Aider Configuration for Home Lab Project -# Place in your home dir, or at the root of your git repo. -########################################################## - -############# -# Main model: - -## Specify the model to use for the main chat (using Ollama with qwen2.5-coder) -model: ollama/qwen2.5-coder:7b - -######################## -# API Keys and settings: - -## Set environment variables for Ollama -set-env: - - OLLAMA_API_BASE=http://grey-area:11434 - -################# -# Model settings: - -## Specify a file with aider model settings for unknown models -model-settings-file: .aider.model.settings.yml - -## Model aliases for convenience -alias: - - "code:ollama/qwen2.5-coder:7b" - - "chat:ollama/llama3.1:8b" - - "reason:ollama/deepseek-r1:latest" - - "task:ollama/taskmaster-qwen:latest" - - "research:ollama/research-deepseek:latest" - - "fast:ollama/qwen3:4b" - -## Specify what edit format the LLM should use -edit-format: diff - -## Specify the model to use for commit messages and chat history summarization -weak-model: ollama/qwen3:4b - -## Verify the SSL cert when connecting to models -verify-ssl: false - -## Timeout in seconds for API calls (increased for slower CPU inference) -timeout: 300 - -## Disable model warnings for faster startup -show-model-warnings: false - -################### -# Repomap settings: - -## Suggested number of tokens to use for repo map (reduced for performance) -map-tokens: 1024 - -## Control how often the repo map is refreshed -map-refresh: manual - -###################### -# File handling: - -## Auto-load convention files for this project -read: - - CONVENTIONS.md - -################ -# History Files: - -## Specify the chat input history file -input-history-file: .aider.input.history - -## Specify the chat history file -chat-history-file: .aider.chat.history.md - -################# -# Cache settings: - -## Enable caching of prompts for better performance -cache-prompts: true - -## Keep cache warm to reduce latency -cache-keepalive-pings: 2 - -################### -# Performance settings: - -## Disable model checking for faster startup -check-model-accepts-settings: false - -## Reduce chat history to save tokens -max-chat-history-tokens: 4096 - -################### -# UI/UX settings: - -## Use dark mode -dark-mode: true - -## Show model warnings -show-model-warnings: false \ No newline at end of file diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 0000000..9038fe6 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,22 @@ +{ + "allowedTools": [ + "Edit", + "MultiEdit", + "Write", + "Read", + "Bash", + "LS", + "Glob", + "Grep", + "mcp__task_master_ai__*", + "mcp__context7__*" + ], + "mcp": { + "task-master-ai": { + "enabled": true + }, + "context7": { + "enabled": true + } + } +} \ No newline at end of file diff --git a/.env.example b/.env.example deleted file mode 100644 index 41a8fae..0000000 --- a/.env.example +++ /dev/null @@ -1,9 +0,0 @@ -# API Keys (Required to enable respective provider) -ANTHROPIC_API_KEY="your_anthropic_api_key_here" # Required: Format: sk-ant-api03-... -PERPLEXITY_API_KEY="your_perplexity_api_key_here" # Optional: Format: pplx-... -OPENAI_API_KEY="your_openai_api_key_here" # Optional, for OpenAI/OpenRouter models. Format: sk-proj-... -GOOGLE_API_KEY="your_google_api_key_here" # Optional, for Google Gemini models. -MISTRAL_API_KEY="your_mistral_key_here" # Optional, for Mistral AI models. -XAI_API_KEY="YOUR_XAI_KEY_HERE" # Optional, for xAI AI models. -AZURE_OPENAI_API_KEY="your_azure_key_here" # Optional, for Azure OpenAI models (requires endpoint in .taskmaster/config.json). -OLLAMA_API_KEY="your_ollama_api_key_here" # Optional: For remote Ollama servers that require authentication. \ No newline at end of file diff --git a/.mcp.json b/.mcp.json new file mode 100644 index 0000000..d7ff76b --- /dev/null +++ b/.mcp.json @@ -0,0 +1,25 @@ +{ + "mcpServers": { + "task-master-ai": { + "command": "npx", + "args": ["-y", "--package=task-master-ai", "task-master-ai"], + "env": { + "OLLAMA_BASE_URL": "http://grey-area:11434", + "OPENAI_BASE_URL": "http://grey-area:11434/v1", + "OLLAMA_API_KEY": "ollama", + "ANTHROPIC_API_KEY": "", + "PERPLEXITY_API_KEY": "", + "OPENAI_API_KEY": "", + "GOOGLE_API_KEY": "", + "XAI_API_KEY": "", + "OPENROUTER_API_KEY": "", + "MISTRAL_API_KEY": "", + "AZURE_OPENAI_API_KEY": "" + } + }, + "context7": { + "command": "npx", + "args": ["-y", "@upstash/context7-mcp"] + } + } +} \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index 83f3f78..4bedd97 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -160,7 +160,7 @@ task-master update-subtask --id= --prompt="implementation notes..." # Complete tasks task-master set-status --id= --status=done -``` +```yo #### 3. Multi-Claude Workflows diff --git a/documentation/BRANCHING_STRATEGY.md b/documentation/BRANCHING_STRATEGY.md deleted file mode 100644 index 05429a0..0000000 --- a/documentation/BRANCHING_STRATEGY.md +++ /dev/null @@ -1,259 +0,0 @@ -# Git Branching Strategy for Infrastructure Management - -## Branch Structure - -### Main Branches - -#### `main` -- **Purpose**: Production-ready configurations -- **Protection**: Protected branch with required reviews -- **Deployment**: Automatically deployed to production machines -- **Stability**: Should always be stable and tested - -#### `develop` -- **Purpose**: Integration branch for new features -- **Testing**: Continuous integration testing -- **Merging**: Features merge here first -- **Deployment**: Deployed to staging/test environments - -### Supporting Branches - -#### Feature Branches: `feature/` -- **Purpose**: Development of new features or modules -- **Naming**: `feature/add-cosmic-desktop`, `feature/sleeper-service-config` -- **Lifetime**: Temporary, deleted after merge -- **Source**: Branch from `develop` -- **Merge**: Merge back to `develop` - -#### Machine Branches: `machine/` -- **Purpose**: Machine-specific configuration changes -- **Naming**: `machine/congenital-optimist`, `machine/sleeper-service` -- **Use Case**: Testing machine-specific changes -- **Merge**: Merge to `develop` after testing - -#### Hotfix Branches: `hotfix/` -- **Purpose**: Critical fixes for production -- **Naming**: `hotfix/security-patch`, `hotfix/boot-failure` -- **Source**: Branch from `main` -- **Merge**: Merge to both `main` and `develop` - -#### Module Branches: `module/` -- **Purpose**: Development of specific modules -- **Naming**: `module/virtualization`, `module/desktop-gnome` -- **Scope**: Single module focus -- **Testing**: Module-specific testing - -### Tagging Strategy - -#### Version Tags: `v..` -- **Purpose**: Mark stable releases -- **Format**: `v1.0.0`, `v1.2.1` -- **Trigger**: Major configuration milestones -- **Deployment**: Tag triggers deployment workflows - -#### Machine Tags: `-v` -- **Purpose**: Machine-specific deployments -- **Format**: `congenital-optimist-v1.0.0` -- **Use Case**: Track per-machine configurations -- **Rollback**: Enable machine-specific rollbacks - -#### Phase Tags: `phase--complete` -- **Purpose**: Mark migration phase completion -- **Format**: `phase-1-complete`, `phase-2-complete` -- **Documentation**: Link to plan.md milestones - -## Workflow Examples - -### Standard Feature Development -```bash -# Start new feature -git checkout develop -git pull origin develop -git checkout -b feature/add-incus-clustering - -# Develop and test -# ... make changes ... -nix flake check -sudo nixos-rebuild test --flake .#congenital-optimist - -# Commit and push -git add . -git commit -m "feat: add Incus clustering support" -git push origin feature/add-incus-clustering - -# Create PR to develop -# ... review process ... -# Merge to develop -``` - -### Machine-Specific Changes -```bash -# Machine-specific branch -git checkout develop -git checkout -b machine/sleeper-service - -# Test on specific machine -sudo nixos-rebuild test --flake .#sleeper-service - -# Commit and merge -git add . -git commit -m "feat(sleeper-service): add NFS server configuration" -``` - -### Hotfix Process -```bash -# Critical fix needed -git checkout main -git checkout -b hotfix/zfs-boot-failure - -# Fix the issue -# ... emergency fix ... -sudo nixos-rebuild test --flake .#congenital-optimist - -# Deploy to main -git add . -git commit -m "fix: resolve ZFS boot failure" -git checkout main -git merge hotfix/zfs-boot-failure -git tag v1.0.1 - -# Backport to develop -git checkout develop -git merge hotfix/zfs-boot-failure -``` - -## Commit Convention - -### Format -``` -(): - -[optional body] - -[optional footer] -``` - -### Types -- **feat**: New feature or module -- **fix**: Bug fix -- **docs**: Documentation changes -- **style**: Formatting, missing semicolons, etc. -- **refactor**: Code refactoring -- **test**: Adding tests -- **chore**: Maintenance tasks - -### Scopes -- **machine**: `(congenital-optimist)`, `(sleeper-service)` -- **module**: `(desktop)`, `(virtualization)`, `(users)` -- **config**: `(flake)`, `(ci)` -- **docs**: `(readme)`, `(plan)` - -### Examples -```bash -feat(desktop): add Cosmic desktop environment module -fix(virtualization): resolve Incus networking issues -docs(readme): update installation instructions -refactor(modules): reorganize desktop environment modules -chore(ci): update GitHub Actions workflow -``` - -## Branch Protection Rules - -### Main Branch Protection -- **Required Reviews**: 1 reviewer minimum -- **Status Checks**: All CI checks must pass -- **Up-to-date**: Branch must be up to date before merging -- **Admin Override**: Allow admin override for hotfixes -- **Force Push**: Disabled -- **Deletion**: Disabled - -### Develop Branch Protection -- **Required Reviews**: 1 reviewer (can be self-review) -- **Status Checks**: All CI checks must pass -- **Auto-merge**: Allow auto-merge after checks -- **Force Push**: Disabled for others - -## Merge Strategies - -### Feature to Develop -- **Strategy**: Squash and merge -- **Reason**: Clean history, single commit per feature -- **Title**: Use conventional commit format - -### Develop to Main -- **Strategy**: Merge commit -- **Reason**: Preserve feature branch history -- **Testing**: Full integration testing required - -### Hotfix to Main -- **Strategy**: Fast-forward if possible -- **Reason**: Immediate deployment needed -- **Testing**: Minimal but critical testing - -## Deployment Strategy - -### Automatic Deployment -- **main** → Production machines (congenital-optimist, sleeper-service) -- **develop** → Test environment (if available) - -### Manual Deployment -- Feature branches can be manually deployed for testing -- Use `nixos-rebuild test` for non-persistent testing -- Use `nixos-rebuild switch` for persistent changes - -### Rollback Strategy -```bash -# Rollback to previous version -git checkout main -git revert -git tag rollback-v1.0.0-to-v0.9.9 - -# Or rollback to specific tag -git checkout v1.0.0 -sudo nixos-rebuild switch --flake .#congenital-optimist -``` - -## Branch Lifecycle - -### Weekly Maintenance -- **Monday**: Review open feature branches -- **Wednesday**: Merge develop to main if stable -- **Friday**: Clean up merged feature branches -- **Sunday**: Update dependencies (automated) - -### Monthly Tasks -- Review and update branch protection rules -- Clean up old tags and releases -- Update documentation -- Security audit of configurations - -## Best Practices - -### Branch Naming -- Use descriptive names: `feature/improve-zfs-performance` -- Include issue numbers: `feature/123-add-cosmic-desktop` -- Use lowercase with hyphens -- Keep names under 50 characters - -### Commit Messages -- Use imperative mood: "add", "fix", "update" -- Keep first line under 50 characters -- Include body for complex changes -- Reference issues: "Fixes #123" - -### Testing Requirements -- Always run `nix flake check` before committing -- Test with `nixos-rebuild test` on relevant machines -- Document testing performed in PR description -- Consider impact on other machines - -### Code Review -- Focus on configuration correctness -- Check for security implications -- Verify documentation updates -- Ensure rollback plan exists -- Test locally when possible - ---- - -This branching strategy ensures stable, tested configurations while enabling rapid development and emergency fixes when needed. diff --git a/flake.nix b/flake.nix index fd81af0..edc75c7 100644 --- a/flake.nix +++ b/flake.nix @@ -76,6 +76,18 @@ ./modules/common/tty.nix ]; }; + + # little-rascal - Development laptop with Niri + CLI login + little-rascal = nixpkgs.lib.nixosSystem { + inherit system specialArgs; + modules = [ + ./machines/little-rascal/configuration.nix + ./machines/little-rascal/hardware-configuration.nix + ./modules/common/nix.nix + ./modules/common/base.nix + ./modules/common/tty.nix + ]; + }; }; # Custom packages for the home lab @@ -100,6 +112,7 @@ echo " - sleeper-service (Xeon file server)" echo " - reverse-proxy (VPS edge server)" echo " - grey-area (Services host: Forgejo, Jellyfin, etc.)" + echo " - little-rascal (Development laptop with Niri)" echo "" echo "Build with: nixos-rebuild build --flake .#" echo "Switch with: nixos-rebuild switch --flake .#" @@ -205,6 +218,20 @@ confirmTimeout = 30; }; }; + + little-rascal = { + hostname = "little-rascal.tail807ea.ts.net"; + profiles.system = { + user = "root"; + path = deploy-rs.lib.x86_64-linux.activate.nixos self.nixosConfigurations.little-rascal; + sshUser = "geir"; + sudo = "sudo -u"; + autoRollback = true; + magicRollback = true; + activationTimeout = 180; + confirmTimeout = 30; + }; + }; }; # Deploy-rs checks (recommended by deploy-rs) diff --git a/machines/little-rascal/About.org b/machines/little-rascal/About.org new file mode 100644 index 0000000..3315a8a --- /dev/null +++ b/machines/little-rascal/About.org @@ -0,0 +1,47 @@ +#+TITLE: Little Rascal - Development Laptop Configuration +#+AUTHOR: Geir +#+DATE: 2025-06-27 + +* Machine Overview + +** Name: little-rascal +Inspired by the Culture ship name "LittleRascal" (GSV) - small but capable, playful personality, perfect for a development laptop that gets into things and experiments. + +** Hardware Type +Development laptop - portable, personal machine for coding and experimentation. + +** Role & Purpose +- Primary development machine +- Portable workstation for coding projects +- Testing and experimentation platform +- Personal productivity device + +** Desktop Environment +- **Display Manager**: CLI login with seatd +- **Compositor**: Niri (minimal Wayland compositor) +- **Philosophy**: Minimal, efficient, keyboard-driven workflow + +** Key Features +- Minimal desktop environment focused on development +- CLI-first approach with graphical applications when needed +- Full development toolchain (editors, browsers, containers) +- Home lab management tools integration +- Wayland-native setup for modern hardware support + +** Software Focus +- Development: VSCode, Neovim, Git toolchain +- Browsers: Firefox, Chromium for testing +- Containers: Podman for development environments +- Home Lab: Lab tool for infrastructure management +- Creative: Lightweight image/video tools when needed + +** Network Role +- Development workstation +- Connects to home lab infrastructure +- Mobile device that can work both at home and remotely + +** Security Profile +- Personal development machine +- SSH access for remote development +- Standard user security model +- Development-friendly but secure defaults \ No newline at end of file diff --git a/machines/little-rascal/configuration.nix b/machines/little-rascal/configuration.nix new file mode 100644 index 0000000..87229fb --- /dev/null +++ b/machines/little-rascal/configuration.nix @@ -0,0 +1,126 @@ +# Little Rascal - Development Laptop Configuration +# Based on congenital-optimist with laptop-specific adjustments + +{ + config, + pkgs, + lib, + inputs, + unstable, + ... +}: { + imports = [ + ./hardware-configuration.nix + + # Common modules + ../../modules/common/base.nix + ../../modules/common/nix.nix + ../../modules/common/tty.nix + ../../modules/common/emacs.nix + + # Desktop + ../../modules/desktop/niri.nix + ../../modules/desktop/fonts.nix + + # Development + ../../modules/development/tools.nix + ../../modules/ai/claude-code.nix + + # Users + ../../modules/users/geir.nix + ../../modules/users/common.nix + ../../modules/users/shell-aliases.nix + + # Virtualization + ../../modules/virtualization/libvirt.nix + ../../modules/virtualization/incus.nix + ../../modules/virtualization/podman.nix + + # Audio + ../../modules/sound/pipewire.nix + + # Network + ../../modules/network/common.nix + ../../modules/network/extraHosts.nix + + # Security + ../../modules/security/ssh-keys.nix + ]; + + networking = { + hostName = "little-rascal"; + networkmanager.enable = true; + + # Tailscale for home lab access + firewall = { + enable = true; + allowedUDPPorts = [ 41641 ]; # Tailscale + allowedTCPPorts = [ 22 ]; # SSH + }; + }; + + # Boot configuration + boot = { + loader = { + systemd-boot.enable = true; + efi.canTouchEfiVariables = true; + timeout = 3; + }; + + kernelModules = [ "kvm-amd" "zram" ]; + tmp.cleanOnBoot = true; + + # zram swap like other machines + kernel.sysctl."vm.swappiness" = 180; + }; + + # zram configuration + zramSwap = { + enable = true; + algorithm = "zstd"; + memoryPercent = 25; # Use 25% of RAM for zram + }; + + # Hardware - minimal for laptop + hardware = { + bluetooth.enable = true; + graphics.enable = true; + }; + + # Laptop-specific services + services = { + # Power management for laptop + power-profiles-daemon.enable = true; + upower.enable = true; + + # Display manager + greetd = { + enable = true; + settings = { + default_session = { + command = "${pkgs.greetd.tuigreet}/bin/tuigreet --time --cmd ${pkgs.zsh}/bin/zsh"; + user = "greeter"; + }; + }; + }; + + # Essential services + tailscale.enable = true; + blueman.enable = true; + printing.enable = true; + + # Location services for time zone + geoclue2.enable = true; + }; + + # Localization + time.timeZone = "Europe/Oslo"; + i18n.defaultLocale = "en_US.UTF-8"; + console = { + font = "Lat2-Terminus16"; + keyMap = "no"; + }; + + # System version + system.stateVersion = "25.05"; +} \ No newline at end of file diff --git a/machines/little-rascal/hardware-configuration.nix b/machines/little-rascal/hardware-configuration.nix new file mode 100644 index 0000000..32f5951 --- /dev/null +++ b/machines/little-rascal/hardware-configuration.nix @@ -0,0 +1,125 @@ +# Hardware Configuration for Little Rascal +# Lenovo Yoga Slim 7 14ARE05 - AMD Ryzen 7 4700U + +{ config, lib, pkgs, modulesPath, ... }: + +{ + imports = [ + (modulesPath + "/installer/scan/not-detected.nix") + ]; + + # Boot configuration for AMD Ryzen 7 4700U + boot = { + initrd = { + availableKernelModules = [ + "nvme" + "xhci_pci" + "usb_storage" + "sd_mod" + "sdhci_pci" + ]; + kernelModules = [ ]; + }; + + kernelModules = [ "kvm-amd" ]; # AMD Ryzen system + extraModulePackages = [ ]; + }; + + # Filesystem configuration - TEMPLATE + # Update these paths and UUIDs after running nixos-generate-config + fileSystems = { + "/" = { + device = "/dev/disk/by-uuid/REPLACE-WITH-ROOT-UUID"; + fsType = "ext4"; + }; + + "/boot" = { + device = "/dev/disk/by-uuid/REPLACE-WITH-BOOT-UUID"; + fsType = "vfat"; + options = [ "fmask=0022" "dmask=0022" ]; + }; + }; + + # Swap configuration - TEMPLATE + # Uncomment and update if using swap partition + # swapDevices = [ + # { device = "/dev/disk/by-uuid/REPLACE-WITH-SWAP-UUID"; } + # ]; + + # Hardware-specific configuration for Lenovo Yoga Slim 7 14ARE05 + hardware = { + # CPU configuration - AMD Ryzen 7 4700U + cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware; + + # Enable firmware updates + enableRedistributableFirmware = true; + + # Graphics configuration - AMD Radeon Vega (integrated) + graphics = { + enable = true; + enable32Bit = true; + + # AMD integrated graphics drivers + extraPackages = with pkgs; [ + amdvlk # AMD Vulkan driver + rocmPackages.clr.icd # OpenCL support + ]; + + # 32-bit support for compatibility + extraPackages32 = with pkgs.driversi686Linux; [ + amdvlk + ]; + }; + + # Bluetooth support for Intel AX200 + bluetooth = { + enable = true; + powerOnBoot = true; + }; + }; + + # Power management for AMD Ryzen 7 4700U + powerManagement = { + enable = true; + powertop.enable = true; # Power optimization + cpuFreqGovernor = "powersave"; # Better battery life + }; + + # Network hardware - Intel Wi-Fi 6 AX200 + networking = { + # Enable NetworkManager for WiFi management + networkmanager.enable = true; + + # Disable wpa_supplicant (using NetworkManager) + wireless.enable = false; + }; + + # Firmware for Intel WiFi and Bluetooth + hardware.firmware = with pkgs; [ + linux-firmware + ]; + + # AMD-specific optimizations + boot.kernelParams = [ + # Enable AMD graphics performance + "amdgpu.ppfeaturemask=0xffffffff" + ]; + + # TLP for better power management (alternative to power-profiles-daemon) + services.tlp = { + enable = false; # Using power-profiles-daemon instead + settings = { + # Would be configured here if enabled + CPU_SCALING_GOVERNOR_ON_AC = "performance"; + CPU_SCALING_GOVERNOR_ON_BAT = "powersave"; + }; + }; + + # Notes for this specific hardware: + # - Lenovo Yoga Slim 7 14ARE05 + # - AMD Ryzen 7 4700U with Radeon Vega Graphics + # - 16GB LPDDR4 RAM (soldered, not upgradeable) + # - Intel Wi-Fi 6 AX200 + Bluetooth + # - 128GB SSD storage + # - Currently running btrfs filesystem +} \ No newline at end of file diff --git a/modules/common/emacs.nix b/modules/common/emacs.nix index e69de29..2ba8cf4 100644 --- a/modules/common/emacs.nix +++ b/modules/common/emacs.nix @@ -0,0 +1,28 @@ +# Common Emacs Configuration +# Shared Emacs setup for all machines + +{ + config, + pkgs, + ... +}: { + # System-wide Emacs configuration + programs.emacs = { + enable = true; + package = pkgs.emacs; + defaultEditor = true; + }; + + # Emacs packages and configuration + environment.systemPackages = with pkgs; [ + emacs + # Basic Emacs utilities + emacsPackages.use-package + ]; + + # Set Emacs as default editor + environment.sessionVariables = { + EDITOR = "emacs"; + VISUAL = "emacs"; + }; +} \ No newline at end of file diff --git a/modules/services/lab-auto-update-example.nix b/modules/services/lab-auto-update-example.nix new file mode 100644 index 0000000..bd3632a --- /dev/null +++ b/modules/services/lab-auto-update-example.nix @@ -0,0 +1,44 @@ +# Example configuration for enabling lab auto-update service +# Add this to your machine's configuration.nix + +{ + # Import the lab auto-update service module + imports = [ + ../services/lab-auto-update.nix + ]; + + # Enable and configure the auto-update service + services.lab-auto-update = { + enable = true; + + # Schedule updates at 2:00 AM with up to 30 minute random delay + schedule = "02:00"; + randomizedDelay = "30m"; + + # Path to your home lab flake + flakePath = "/home/geir/Projects/home-lab"; + + # Keep logs for 30 days + logRetentionDays = 30; + + # Persist timer across reboots + persistent = true; + }; + + # Optional: Enable the lab tool package system-wide + environment.systemPackages = with pkgs; [ + (pkgs.callPackage ../../packages/lab-tools.nix {}).default + ]; + + # Optional: Staggered scheduling for different machine types + # Uncomment and modify based on machine role: + + # For database/storage servers (run first) + # services.lab-auto-update.schedule = "02:00"; + + # For application servers (run after storage) + # services.lab-auto-update.schedule = "02:30"; + + # For development machines (run last) + # services.lab-auto-update.schedule = "03:00"; +} \ No newline at end of file diff --git a/modules/services/lab-auto-update.nix b/modules/services/lab-auto-update.nix new file mode 100644 index 0000000..cc2f6b6 --- /dev/null +++ b/modules/services/lab-auto-update.nix @@ -0,0 +1,201 @@ +# modules/services/lab-auto-update.nix - NixOS service for automatic lab updates + +{ config, lib, pkgs, ... }: + +with lib; + +let + cfg = config.services.lab-auto-update; + + # Get the lab tool from our packages + labTool = pkgs.callPackage ../../packages/lab-tools.nix {}; + + # Auto-update script that uses the Guile lab tool + autoUpdateScript = pkgs.writeShellScript "lab-auto-update" '' + #!/usr/bin/env bash + set -euo pipefail + + LOG_FILE="/var/log/lab-auto-update.log" + LOCK_FILE="/var/run/lab-auto-update.lock" + + # Ensure we don't run multiple instances + if [ -f "$LOCK_FILE" ]; then + echo "$(date): Auto-update already running (lock file exists)" >> "$LOG_FILE" + exit 1 + fi + + # Create lock file + echo $$ > "$LOCK_FILE" + + # Cleanup function + cleanup() { + rm -f "$LOCK_FILE" + } + trap cleanup EXIT + + echo "$(date): Starting lab auto-update" >> "$LOG_FILE" + + # Change to the lab directory + cd "${cfg.flakePath}" + + # Run the Guile lab tool auto-update command + if ${labTool}/bin/lab auto-update 2>&1 | tee -a "$LOG_FILE"; then + echo "$(date): Auto-update completed successfully" >> "$LOG_FILE" + else + echo "$(date): Auto-update failed with exit code $?" >> "$LOG_FILE" + exit 1 + fi + ''; + +in +{ + options.services.lab-auto-update = { + enable = mkEnableOption "Lab auto-update service"; + + schedule = mkOption { + type = types.str; + default = "02:00"; + description = "Time to run updates (HH:MM format)"; + }; + + randomizedDelay = mkOption { + type = types.str; + default = "30m"; + description = "Maximum random delay before starting update"; + }; + + flakePath = mkOption { + type = types.str; + default = "/home/geir/Projects/home-lab"; + description = "Path to the home lab flake directory"; + }; + + persistent = mkOption { + type = types.bool; + default = true; + description = "Whether the timer should be persistent across reboots"; + }; + + logRetentionDays = mkOption { + type = types.int; + default = 30; + description = "Number of days to retain auto-update logs"; + }; + }; + + config = mkIf cfg.enable { + # Systemd service for the auto-update + systemd.services.lab-auto-update = { + description = "Home Lab Auto-Update Service"; + after = [ "network-online.target" ]; + wants = [ "network-online.target" ]; + + serviceConfig = { + Type = "oneshot"; + User = "root"; + Group = "root"; + ExecStart = "${autoUpdateScript}"; + + # Security settings + PrivateTmp = true; + ProtectSystem = false; # We need to modify the system + ProtectHome = true; + NoNewPrivileges = false; # We need privileges for nixos-rebuild + + # Resource limits + MemoryMax = "2G"; + CPUQuota = "50%"; + + # Timeout settings + TimeoutStartSec = "30m"; + TimeoutStopSec = "5m"; + }; + + # Environment variables for the service + environment = { + PATH = lib.makeBinPath (with pkgs; [ + nix + nixos-rebuild + git + openssh + rsync + gawk + gnused + coreutils + util-linux + systemd + ]); + NIX_PATH = "nixpkgs=${pkgs.path}"; + }; + }; + + # Systemd timer for scheduling + systemd.timers.lab-auto-update = { + description = "Home Lab Auto-Update Timer"; + wantedBy = [ "timers.target" ]; + + timerConfig = { + OnCalendar = "daily"; + Persistent = cfg.persistent; + RandomizedDelaySec = cfg.randomizedDelay; + + # Run at the specified time + OnCalendar = "*-*-* ${cfg.schedule}:00"; + + # Accuracy settings + AccuracySec = "1min"; + }; + }; + + # Log rotation for auto-update logs + services.logrotate.settings.lab-auto-update = { + files = "/var/log/lab-auto-update.log"; + frequency = "daily"; + rotate = cfg.logRetentionDays; + compress = true; + delaycompress = true; + missingok = true; + notifempty = true; + create = "644 root root"; + postrotate = '' + systemctl reload-or-restart rsyslog.service > /dev/null 2>&1 || true + ''; + }; + + # Ensure log directory exists with proper permissions + systemd.tmpfiles.rules = [ + "d /var/log 0755 root root -" + "f /var/log/lab-auto-update.log 0644 root root -" + ]; + + # Add a systemd target for manual triggering + systemd.targets.lab-manual-update = { + description = "Manual Lab Update Target"; + }; + + # Service for manual updates (without reboot) + systemd.services.lab-manual-update = { + description = "Manual Home Lab Update (No Reboot)"; + after = [ "network-online.target" ]; + wants = [ "network-online.target" ]; + + serviceConfig = { + Type = "oneshot"; + User = "root"; + Group = "root"; + ExecStart = "${labTool}/bin/lab update"; + RemainAfterExit = false; + }; + + environment = { + PATH = lib.makeBinPath (with pkgs; [ + nix + nixos-rebuild + git + openssh + rsync + ]); + }; + }; + }; +} \ No newline at end of file diff --git a/modules/users/common.nix b/modules/users/common.nix index 731385f..929760e 100644 --- a/modules/users/common.nix +++ b/modules/users/common.nix @@ -29,12 +29,7 @@ eval "$(direnv hook zsh)" ''; - # Common environment variables - sessionVariables = { - EDITOR = "emacs"; - BROWSER = "firefox"; - TERMINAL = "kitty"; - }; + # Removed sessionVariables - moved to environment.sessionVariables }; # Common packages for all users diff --git a/packages/lab-tool/lab/auto-update.scm b/packages/lab-tool/lab/auto-update.scm new file mode 100644 index 0000000..406f438 --- /dev/null +++ b/packages/lab-tool/lab/auto-update.scm @@ -0,0 +1,203 @@ +;; lab/auto-update.scm - Auto-update system implementation + +(define-module (lab auto-update) + #:use-module (ice-9 format) + #:use-module (ice-9 popen) + #:use-module (ice-9 textual-ports) + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-19) ; Date/time + #:use-module (utils logging) + #:use-module (utils config) + #:use-module (lab deployment) + #:use-module (lab machines) + #:export (auto-update-system + schedule-auto-update + check-update-health + auto-update-status)) + +;; Pure function: Generate update log entry +(define (format-update-log-entry timestamp operation status details) + "Pure function to format update log entry" + (format #f "~a: ~a - ~a (~a)" timestamp operation status details)) + +;; Pure function: Check if system is healthy for updates +(define (system-health-check-pure) + "Pure function returning health check criteria" + '((disk-space-threshold . 90) + (required-services . ("systemd")) + (min-uptime-minutes . 30))) + +;; Impure function: Check actual system health +(define (check-update-health) + "Check if system is ready for updates (impure - checks actual system)" + (log-info "Checking system health before update...") + + (let* ((health-checks (system-health-check-pure)) + (disk-threshold (assoc-ref health-checks 'disk-space-threshold)) + (disk-usage (get-disk-usage)) + (system-running (system-is-running?)) + (uptime-ok (check-minimum-uptime))) + + (log-debug "Disk usage: ~a%" disk-usage) + (log-debug "System running: ~a" system-running) + (log-debug "Uptime check: ~a" uptime-ok) + + (cond + ((> disk-usage disk-threshold) + (log-error "Disk usage too high: ~a% (threshold: ~a%)" disk-usage disk-threshold) + #f) + ((not system-running) + (log-error "System not in running state") + #f) + ((not uptime-ok) + (log-error "System uptime too low for safe update") + #f) + (else + (log-success "System health check passed") + #t)))) + +;; Impure function: Get disk usage percentage +(define (get-disk-usage) + "Get root filesystem disk usage percentage" + (let* ((cmd "df / | tail -1 | awk '{print $5}' | sed 's/%//'") + (port (open-pipe* OPEN_READ "/bin/sh" "-c" cmd)) + (output (string-trim-both (get-string-all port))) + (status (close-pipe port))) + (if (zero? status) + (string->number output) + 95))) ; Return high usage if command fails + +;; Impure function: Check if systemd is running +(define (system-is-running?) + "Check if system is in running state" + (let* ((cmd "systemctl is-system-running --quiet") + (status (system cmd))) + (zero? status))) + +;; Impure function: Check minimum uptime +(define (check-minimum-uptime) + "Check if system has been running long enough" + (let* ((cmd "cat /proc/uptime | cut -d' ' -f1") + (port (open-pipe* OPEN_READ "/bin/sh" "-c" cmd)) + (output (string-trim-both (get-string-all port))) + (status (close-pipe port))) + (if (zero? status) + (let ((uptime-seconds (string->number output))) + (> uptime-seconds 1800)) ; 30 minutes minimum + #f))) + +;; Impure function: Write update log +(define (write-update-log operation status details) + "Write update operation to log file" + (let* ((timestamp (date->string (current-date) "~Y-~m-~d ~H:~M:~S")) + (log-entry (format-update-log-entry timestamp operation status details)) + (log-file "/var/log/lab-auto-update.log")) + + (catch #t + (lambda () + (call-with-output-file log-file + (lambda (port) + (format port "~a\n" log-entry)) + #:append #t)) + (lambda (key . args) + (log-error "Failed to write update log: ~a" args))))) + +;; Impure function: Main auto-update routine +(define (auto-update-system . args) + "Perform automatic system update (impure - modifies system)" + (let* ((options (if (null? args) '() (car args))) + (auto-reboot (option-ref options 'auto-reboot #t)) + (dry-run (option-ref options 'dry-run #f)) + (machine-name (get-hostname))) + + (log-info "Starting auto-update for machine: ~a" machine-name) + (write-update-log "auto-update" "started" machine-name) + + (if (not (check-update-health)) + (begin + (log-error "System health check failed - aborting update") + (write-update-log "auto-update" "aborted" "health check failed") + #f) + (begin + ;; Update flake inputs + (log-info "Updating flake inputs...") + (let ((flake-result (update-flake options))) + (if flake-result + (begin + (log-success "Flake update completed") + (write-update-log "flake-update" "success" "") + + ;; Deploy configuration + (log-info "Deploying updated configuration...") + (let ((deploy-result (deploy-machine machine-name "switch" options))) + (if deploy-result + (begin + (log-success "Configuration deployment completed") + (write-update-log "deployment" "success" "switch mode") + + ;; Schedule reboot if enabled + (if (and auto-reboot (not dry-run)) + (begin + (log-info "Scheduling system reboot in 2 minutes...") + (write-update-log "reboot" "scheduled" "2 minutes") + (system "shutdown -r +2 'Auto-update completed - rebooting'") + #t) + (begin + (log-info "Auto-reboot disabled - update complete") + (write-update-log "auto-update" "completed" "no reboot") + #t))) + (begin + (log-error "Configuration deployment failed") + (write-update-log "deployment" "failed" "switch mode") + #f)))) + (begin + (log-error "Flake update failed") + (write-update-log "flake-update" "failed" "") + #f))))))) + +;; Impure function: Get current hostname +(define (get-hostname) + "Get current system hostname" + (let* ((cmd "hostname") + (port (open-pipe* OPEN_READ "/bin/sh" "-c" cmd)) + (output (string-trim-both (get-string-all port))) + (status (close-pipe port))) + (if (zero? status) + output + "unknown"))) + +;; Impure function: Show auto-update status +(define (auto-update-status) + "Display auto-update service status and recent logs" + (log-info "Checking auto-update status...") + + (let ((log-file "/var/log/lab-auto-update.log")) + (if (file-exists? log-file) + (begin + (format #t "Recent auto-update activity:\n") + (let* ((cmd (format #f "tail -10 ~a" log-file)) + (port (open-pipe* OPEN_READ "/bin/sh" "-c" cmd)) + (output (get-string-all port)) + (status (close-pipe port))) + (if (zero? status) + (display output) + (log-error "Failed to read update log")))) + (log-info "No auto-update log found")) + + ;; Check systemd timer status + (format #t "\nSystemd timer status:\n") + (let* ((cmd "systemctl status lab-auto-update.timer --no-pager") + (port (open-pipe* OPEN_READ "/bin/sh" "-c" cmd)) + (output (get-string-all port)) + (status (close-pipe port))) + (display output)))) + +;; Impure function: Schedule auto-update (for manual testing) +(define (schedule-auto-update minutes) + "Schedule auto-update to run in specified minutes" + (let ((schedule-cmd (format #f "echo 'lab auto-update' | at now + ~a minutes" minutes))) + (log-info "Scheduling auto-update in ~a minutes..." minutes) + (let ((status (system schedule-cmd))) + (if (zero? status) + (log-success "Auto-update scheduled successfully") + (log-error "Failed to schedule auto-update"))))) \ No newline at end of file diff --git a/packages/lab-tool/main.scm b/packages/lab-tool/main.scm index 81dd4fb..6a5abd0 100755 --- a/packages/lab-tool/main.scm +++ b/packages/lab-tool/main.scm @@ -13,7 +13,8 @@ (utils logging) (lab core) (lab machines) - (lab deployment)) + (lab deployment) + (lab auto-update)) ;; Initialize logging (set-log-level! 'info) @@ -32,6 +33,8 @@ COMMANDS: Available modes: boot (default), test, switch deploy-all Deploy to all machines update Update flake inputs + auto-update Perform automatic system update with health checks + auto-update-status Show auto-update service status and logs health [machine] Check machine health (all if no machine specified) ssh SSH to machine test-modules Test modular implementation @@ -45,6 +48,8 @@ EXAMPLES: lab deploy congenital-optimist test # Deploy temporarily for testing lab deploy-all lab update + lab auto-update # Perform automatic update with reboot + lab auto-update-status # Show auto-update logs and status lab health lab health sleeper-service lab ssh sleeper-service @@ -202,6 +207,18 @@ Home lab root: ~a (format #t " ~a: ~a\n" machine status))) results))))) +(define (cmd-auto-update) + "Perform automatic system update" + (log-info "Starting automatic system update...") + (let ((result (auto-update-system '((auto-reboot . #t))))) + (if result + (log-success "Automatic update completed successfully") + (log-error "Automatic update failed")))) + +(define (cmd-auto-update-status) + "Show auto-update status and logs" + (auto-update-status)) + ;; Main command dispatcher (define (dispatch-command command args) "Dispatch command with appropriate handler" @@ -228,6 +245,12 @@ Home lab root: ~a ('update (cmd-update)) + ('auto-update + (cmd-auto-update)) + + ('auto-update-status + (cmd-auto-update-status)) + ('health (cmd-health args)) diff --git a/packages/lab-tools.nix b/packages/lab-tools.nix index a3ecac6..8a9c6cf 100644 --- a/packages/lab-tools.nix +++ b/packages/lab-tools.nix @@ -13,7 +13,10 @@ src = ./lab; nativeBuildInputs = [makeWrapper]; - buildInputs = [guile]; + buildInputs = [ + guile + # Runtime dependencies for auto-update functionality will be in PATH + ]; installPhase = '' mkdir -p $out/share/lab-tool @@ -104,6 +107,7 @@ in { echo " lab status # Show infrastructure status" echo " lab machines # List all machines" echo " lab deploy machine # Deploy to machine" + echo " lab auto-update # Automatic system update" echo " mcp-server # Start MCP server" echo " rag-system # Start RAG system" echo "" diff --git a/research/claude-task-master-ai-nix-packaging.md b/research/claude-task-master-ai-nix-packaging.md deleted file mode 100644 index 3bc0878..0000000 --- a/research/claude-task-master-ai-nix-packaging.md +++ /dev/null @@ -1,114 +0,0 @@ -# Packaging Claude Task Master AI for NixOS - -This document outlines suggestions for packaging the "Claude Task Master AI" Node.js application as a Nix package. The typical installation method for this tool is `npm install -g task-master-ai`. - -## 1. Creating the Nix Package - -Nixpkgs provides helpers for packaging Node.js applications. The primary function for this is `buildNpmPackage`. - -A Nix expression for this package can be created in your packages directory, for example, at `packages/claude-task-master-ai.nix`. - -### Key Steps: - -1. **Find the Source**: Determine the source of the `task-master-ai` package. This is usually the npm registry. You'll need the package name and version. -2. **Nix Expression (`default.nix` or `claude-task-master.nix`):** Create a Nix expression file. -3. **Use `buildNpmPackage`**: This function handles the download, build, and installation of npm packages. -4. **`npmDepsHash`**: You'll need to calculate a hash of the npm dependencies. This ensures reproducibility. -5. **Binaries**: Ensure that the executables provided by `task-master-ai` are correctly placed in the output's `bin` directory. `buildNpmPackage` usually handles this if the `package.json` of the application specifies `bin` entries. - -### Example Nix Expression: - -```nix -{ lib, buildNpmPackage, fetchFromGitHub, nodejs }: # Add other dependencies if needed - -buildNpmPackage rec { - pname = "task-master-ai"; - version = "INSERT_PACKAGE_VERSION_HERE"; # Replace with the actual version - - src = fetchFromGitHub { # Or fetchurl if directly from npm/tarball - owner = "eyaltoledano"; # Replace if this is not the correct source - repo = "claude-task-master"; # Replace if this is not the correct source - rev = "v${version}"; # Or specific commit/tag - hash = "INSERT_SRC_HASH_HERE"; # lib.fakeSha256 for initial fetch, then replace - }; - - # If fetching directly from npm tarball: - # src = fetchurl { - # url = "https://registry.npmjs.org/task-master-ai/-/task-master-ai-${version}.tgz"; - # sha256 = "INSERT_TARBALL_HASH_HERE"; # lib.fakeSha256 for initial fetch, then replace - # }; - - npmDepsHash = "INSERT_NPMDEPSHASH_HERE"; # Calculate this after the first build attempt - - # buildInputs = [ nodejs ]; # buildNpmPackage usually brings in nodejs - - meta = with lib; { - description = "Claude Task Master AI tool"; - homepage = "https://github.com/eyaltoledano/claude-task-master"; # Or actual homepage - license = licenses.mit; # Check and replace with actual license - maintainers = [ maintainers.yourGithubUsername ]; # Your username - platforms = platforms.all; - }; -} -``` - -### Obtaining `npmDepsHash`: - -1. Initially, set `npmDepsHash = lib.fakeSha256;` or a placeholder like `"sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=";`. -2. Attempt to build the package (e.g., `nix-build -A task-master-ai`). -3. The build will fail, but it will output the expected hash. Copy this hash into your Nix expression. - Alternatively, you can use `prefetch-npm-deps` if you have a `package-lock.json`: - ```sh - # In a directory with package.json and package-lock.json for task-master-ai - nix-shell -p nodePackages.prefetch-npm-deps --run "prefetch-npm-deps package-lock.json" - ``` - Since `task-master-ai` is installed globally, you might need to fetch its source first to get the `package-lock.json`. - -### Global Installation Aspect: - -`buildNpmPackage` typically installs binaries specified in the `package.json`'s `bin` field into `$out/bin/`. This makes them available when the package is installed in a Nix profile. If `task-master-ai` is made available this way, VS Code can invoke it using `npx` as shown in the MCP server configuration, or potentially directly if it's added to the PATH. - -## 2. Integrating with VS Code as an MCP Server - -Instead of running `task-master-ai` as a system-wide NixOS service, it can be integrated directly into VS Code (or other compatible editors) as an MCP (Model Context Protocol) server. This allows your editor to communicate with the AI for task management capabilities. - -The Nix package created in the previous step ensures that `task-master-ai` is available in your environment, typically invokable via `npx task-master-ai` or directly if the Nix package adds it to your PATH. - -### VS Code `settings.json` Configuration: - -You can configure VS Code to use `task-master-ai` as an MCP server by adding the following to your `settings.json` file: - -```json -{ - "mcpServers": { - "taskmaster-ai": { - "command": "npx", - "args": ["-y", "--package=task-master-ai", "task-master-ai"], - "env": { - "ANTHROPIC_API_KEY": "YOUR_ANTHROPIC_API_KEY_HERE", - "PERPLEXITY_API_KEY": "YOUR_PERPLEXITY_API_KEY_HERE", - "MODEL": "claude-3-7-sonnet-20250219", - "PERPLEXITY_MODEL": "sonar-pro", - "MAX_TOKENS": 64000, - "TEMPERATURE": 0.2, - "DEFAULT_SUBTASKS": 5, - "DEFAULT_PRIORITY": "medium" - } - } - } -} -``` - -**Key Points for MCP Configuration:** - -* **`command` and `args`**: These specify how to run `task-master-ai`. Using `npx -y --package=task-master-ai task-master-ai` ensures that `npx` fetches and runs the specified version of `task-master-ai`. If your Nix package makes `task-master-ai` directly available in the PATH, you might simplify the command to just `task-master-ai` and remove the `args` that specify the package for `npx`. -* **`env`**: This section is crucial. You **must** replace placeholder API keys (`YOUR_ANTHROPIC_API_KEY_HERE`, `YOUR_PERPLEXITY_API_KEY_HERE`) with your actual keys. -* You can customize other environment variables like `MODEL`, `MAX_TOKENS`, etc., according to your needs and the capabilities of `task-master-ai`. -* Ensure the Nix package for `task-master-ai` (and `nodejs`/`npx`) is installed and accessible in the environment where VS Code runs. - -## 3. Finding Package Information - -* **NPM Registry**: Search for `task-master-ai` on [npmjs.com](https://www.npmjs.com/) to find its exact version, dependencies, and potentially its source repository. The roadmap indicates the source is `https://github.com/eyaltoledano/claude-task-master.git`. -* **GitHub Repository**: The roadmap points to `https://github.com/eyaltoledano/claude-task-master.git`. This is likely the best source for `package.json` and understanding how the tool works. - -This guide provides a starting point. You'll need to adapt the examples based on the specifics of the `task-master-ai` tool. diff --git a/research/simple-auto-update-plan.md b/research/simple-auto-update-plan.md index 920e0ab..c9415e3 100644 --- a/research/simple-auto-update-plan.md +++ b/research/simple-auto-update-plan.md @@ -100,89 +100,428 @@ in } ``` -### 2. Lab Tool Commands -Add new commands to the existing lab tool: +### 2. Guile Scheme Auto-Update Module +Create the core auto-update functionality in `lab/auto-update.scm`: -```python -# lab/commands/update_system.py -class UpdateSystemCommand: - def __init__(self, lab_config): - self.lab_config = lab_config - self.flake_path = lab_config.get('flake_path', '/home/geir/Home-lab') +```scheme +;; lab/auto-update.scm - Auto-update system implementation + +(define-module (lab auto-update) + #:use-module (ice-9 format) + #:use-module (ice-9 popen) + #:use-module (ice-9 textual-ports) + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-19) ; Date/time + #:use-module (utils logging) + #:use-module (utils config) + #:use-module (lab deployment) + #:use-module (lab machines) + #:export (auto-update-system + schedule-auto-update + check-update-health + auto-update-status)) + +;; Pure function: Generate update log entry +(define (format-update-log-entry timestamp operation status details) + "Pure function to format update log entry" + (format #f "~a: ~a - ~a (~a)" timestamp operation status details)) + +;; Pure function: Check if system is healthy for updates +(define (system-health-check-pure) + "Pure function returning health check criteria" + '((disk-space-threshold . 90) + (required-services . ("systemd")) + (min-uptime-minutes . 30))) + +;; Impure function: Check actual system health +(define (check-update-health) + "Check if system is ready for updates (impure - checks actual system)" + (log-info "Checking system health before update...") + + (let* ((health-checks (system-health-check-pure)) + (disk-threshold (assoc-ref health-checks 'disk-space-threshold)) + (disk-usage (get-disk-usage)) + (system-running (system-is-running?)) + (uptime-ok (check-minimum-uptime))) - def update_self(self): - """Update the current system using Nix flake""" - try: - # Update flake inputs - self._run_command(['nix', 'flake', 'update'], cwd=self.flake_path) - - # Rebuild system - hostname = self._get_hostname() - self._run_command([ - 'nixos-rebuild', 'switch', - '--flake', f'{self.flake_path}#{hostname}' - ]) - - print("System updated successfully") - return True - - except Exception as e: - print(f"Update failed: {e}") - return False + (log-debug "Disk usage: ~a%" disk-usage) + (log-debug "System running: ~a" system-running) + (log-debug "Uptime check: ~a" uptime-ok) - def schedule_reboot(self, delay_minutes=1): - """Schedule a system reboot""" - self._run_command(['shutdown', '-r', f'+{delay_minutes}']) - - def _get_hostname(self): - import socket - return socket.gethostname() - - def _run_command(self, cmd, cwd=None): - import subprocess - result = subprocess.run(cmd, cwd=cwd, check=True, - capture_output=True, text=True) - return result.stdout + (cond + ((> disk-usage disk-threshold) + (log-error "Disk usage too high: ~a% (threshold: ~a%)" disk-usage disk-threshold) + #f) + ((not system-running) + (log-error "System not in running state") + #f) + ((not uptime-ok) + (log-error "System uptime too low for safe update") + #f) + (else + (log-success "System health check passed") + #t)))) + +;; Impure function: Main auto-update routine +(define (auto-update-system . args) + "Perform automatic system update (impure - modifies system)" + (let* ((options (if (null? args) '() (car args))) + (auto-reboot (option-ref options 'auto-reboot #t)) + (dry-run (option-ref options 'dry-run #f)) + (machine-name (get-hostname))) + + (log-info "Starting auto-update for machine: ~a" machine-name) + (write-update-log "auto-update" "started" machine-name) + + (if (not (check-update-health)) + (begin + (log-error "System health check failed - aborting update") + (write-update-log "auto-update" "aborted" "health check failed") + #f) + (begin + ;; Update flake inputs + (log-info "Updating flake inputs...") + (let ((flake-result (update-flake options))) + (if flake-result + (begin + (log-success "Flake update completed") + (write-update-log "flake-update" "success" "") + + ;; Deploy configuration + (log-info "Deploying updated configuration...") + (let ((deploy-result (deploy-machine machine-name "switch" options))) + (if deploy-result + (begin + (log-success "Configuration deployment completed") + (write-update-log "deployment" "success" "switch mode") + + ;; Schedule reboot if enabled + (if (and auto-reboot (not dry-run)) + (begin + (log-info "Scheduling system reboot in 2 minutes...") + (write-update-log "reboot" "scheduled" "2 minutes") + (system "shutdown -r +2 'Auto-update completed - rebooting'") + #t) + (begin + (log-info "Auto-reboot disabled - update complete") + (write-update-log "auto-update" "completed" "no reboot") + #t))) + (begin + (log-error "Configuration deployment failed") + (write-update-log "deployment" "failed" "switch mode") + #f)))) + (begin + (log-error "Flake update failed") + (write-update-log "flake-update" "failed" "") + #f))))))) + +;; Helper functions for system checks and logging +(define (get-disk-usage) + "Get root filesystem disk usage percentage" + (let* ((cmd "df / | tail -1 | awk '{print $5}' | sed 's/%//'") + (port (open-pipe* OPEN_READ "/bin/sh" "-c" cmd)) + (output (string-trim-both (get-string-all port))) + (status (close-pipe port))) + (if (zero? status) + (string->number output) + 95))) + +(define (system-is-running?) + "Check if system is in running state" + (let* ((cmd "systemctl is-system-running --quiet") + (status (system cmd))) + (zero? status))) + +(define (get-hostname) + "Get current system hostname" + (let* ((cmd "hostname") + (port (open-pipe* OPEN_READ "/bin/sh" "-c" cmd)) + (output (string-trim-both (get-string-all port))) + (status (close-pipe port))) + (if (zero? status) output "unknown"))) + +(define (write-update-log operation status details) + "Write update operation to log file" + (let* ((timestamp (date->string (current-date) "~Y-~m-~d ~H:~M:~S")) + (log-entry (format-update-log-entry timestamp operation status details)) + (log-file "/var/log/lab-auto-update.log")) + (catch #t + (lambda () + (call-with-output-file log-file + (lambda (port) (format port "~a\n" log-entry)) + #:append #t)) + (lambda (key . args) + (log-error "Failed to write update log: ~a" args))))) + +(define (auto-update-status) + "Display auto-update service status and recent logs" + (log-info "Checking auto-update status...") + + (let ((log-file "/var/log/lab-auto-update.log")) + (if (file-exists? log-file) + (begin + (format #t "Recent auto-update activity:\n") + (let* ((cmd (format #f "tail -10 ~a" log-file)) + (port (open-pipe* OPEN_READ "/bin/sh" "-c" cmd)) + (output (get-string-all port)) + (status (close-pipe port))) + (if (zero? status) (display output) + (log-error "Failed to read update log")))) + (log-info "No auto-update log found")) + + ;; Check systemd timer status + (format #t "\nSystemd timer status:\n") + (let* ((cmd "systemctl status lab-auto-update.timer --no-pager") + (port (open-pipe* OPEN_READ "/bin/sh" "-c" cmd)) + (output (get-string-all port))) + (display output)))) ``` ### 3. CLI Integration -Extend the main lab tool CLI: +Update `main.scm` to include auto-update commands: -```python -# lab/cli.py (additions) -@cli.group() -def update(): - """System update commands""" - pass +```scheme +;; Add to use-modules section: +(lab auto-update) -@update.command('system') -@click.option('--self', 'update_self', is_flag=True, - help='Update the current system') -@click.option('--reboot', is_flag=True, - help='Reboot after update') -def update_system(update_self, reboot): - """Update system using Nix flake""" - if update_self: - updater = UpdateSystemCommand(config) - success = updater.update_self() +;; Add to help text: + auto-update Perform automatic system update with health checks + auto-update-status Show auto-update service status and logs + +;; Add command handlers: +(define (cmd-auto-update) + "Perform automatic system update" + (log-info "Starting automatic system update...") + (let ((result (auto-update-system '((auto-reboot . #t))))) + (if result + (log-success "Automatic update completed successfully") + (log-error "Automatic update failed")))) + +(define (cmd-auto-update-status) + "Show auto-update status and logs" + (auto-update-status)) + +;; Add to command dispatcher: + ('auto-update + (cmd-auto-update)) + + ('auto-update-status + (cmd-auto-update-status)) +``` + +### 4. Updated NixOS Service Module +Enhanced service module at `modules/services/lab-auto-update.nix`: + +```nix +# modules/services/lab-auto-update.nix - NixOS service for automatic lab updates + +{ config, lib, pkgs, ... }: + +with lib; + +let + cfg = config.services.lab-auto-update; + + # Get the lab tool from our packages + labTool = pkgs.callPackage ../../packages/lab-tools.nix {}; + + # Auto-update script that uses the Guile lab tool + autoUpdateScript = pkgs.writeShellScript "lab-auto-update" '' + #!/usr/bin/env bash + set -euo pipefail + + LOG_FILE="/var/log/lab-auto-update.log" + LOCK_FILE="/var/run/lab-auto-update.lock" + + # Ensure we don't run multiple instances + if [ -f "$LOCK_FILE" ]; then + echo "$(date): Auto-update already running (lock file exists)" >> "$LOG_FILE" + exit 1 + fi + + # Create lock file + echo $$ > "$LOCK_FILE" + + # Cleanup function + cleanup() { + rm -f "$LOCK_FILE" + } + trap cleanup EXIT + + echo "$(date): Starting lab auto-update" >> "$LOG_FILE" + + # Change to the lab directory + cd "${cfg.flakePath}" + + # Run the Guile lab tool auto-update command + if ${labTool}/bin/lab auto-update 2>&1 | tee -a "$LOG_FILE"; then + echo "$(date): Auto-update completed successfully" >> "$LOG_FILE" + else + echo "$(date): Auto-update failed with exit code $?" >> "$LOG_FILE" + exit 1 + fi + ''; + +in +{ + options.services.lab-auto-update = { + enable = mkEnableOption "Lab auto-update service"; + + schedule = mkOption { + type = types.str; + default = "02:00"; + description = "Time to run updates (HH:MM format)"; + }; + + randomizedDelay = mkOption { + type = types.str; + default = "30m"; + description = "Maximum random delay before starting update"; + }; + + flakePath = mkOption { + type = types.str; + default = "/home/geir/Projects/home-lab"; + description = "Path to the home lab flake directory"; + }; + + persistent = mkOption { + type = types.bool; + default = true; + description = "Whether the timer should be persistent across reboots"; + }; + + logRetentionDays = mkOption { + type = types.int; + default = 30; + description = "Number of days to retain auto-update logs"; + }; + }; + + config = mkIf cfg.enable { + # Systemd service for the auto-update + systemd.services.lab-auto-update = { + description = "Home Lab Auto-Update Service"; + after = [ "network-online.target" ]; + wants = [ "network-online.target" ]; + + serviceConfig = { + Type = "oneshot"; + User = "root"; + Group = "root"; + ExecStart = "${autoUpdateScript}"; - if success and reboot: - updater.schedule_reboot() + # Security settings + PrivateTmp = true; + ProtectSystem = false; # We need to modify the system + ProtectHome = true; + NoNewPrivileges = false; # We need privileges for nixos-rebuild + + # Resource limits + MemoryMax = "2G"; + CPUQuota = "50%"; + + # Timeout settings + TimeoutStartSec = "30m"; + TimeoutStopSec = "5m"; + }; + + # Environment variables for the service + environment = { + PATH = lib.makeBinPath (with pkgs; [ + nix nixos-rebuild git openssh rsync gawk gnused + coreutils util-linux systemd + ]); + NIX_PATH = "nixpkgs=${pkgs.path}"; + }; + }; + + # Systemd timer for scheduling + systemd.timers.lab-auto-update = { + description = "Home Lab Auto-Update Timer"; + wantedBy = [ "timers.target" ]; + + timerConfig = { + OnCalendar = "*-*-* ${cfg.schedule}:00"; + Persistent = cfg.persistent; + RandomizedDelaySec = cfg.randomizedDelay; + AccuracySec = "1min"; + }; + }; + + # Log rotation for auto-update logs + services.logrotate.settings.lab-auto-update = { + files = "/var/log/lab-auto-update.log"; + frequency = "daily"; + rotate = cfg.logRetentionDays; + compress = true; + delaycompress = true; + missingok = true; + notifempty = true; + create = "644 root root"; + }; + + # Ensure log directory exists with proper permissions + systemd.tmpfiles.rules = [ + "d /var/log 0755 root root -" + "f /var/log/lab-auto-update.log 0644 root root -" + ]; + }; +} ``` -### 4. Simple Configuration -Add update settings to lab configuration: +## Guile Scheme Implementation Advantages -```yaml -# lab.yaml (additions) -auto_update: - enabled: true - schedule: "02:00" - auto_reboot: true - flake_path: "/home/geir/Home-lab" - log_retention_days: 30 +The Guile Scheme implementation provides several benefits over the original Python approach: + +### 🎯 **K.I.S.S Principles Alignment** +- **Modular**: Follows existing lab-tool module structure +- **Functional**: Pure functions for logic, impure functions clearly marked +- **Small**: Each function has single responsibility +- **Simple**: Leverages existing deployment and configuration infrastructure + +### 🔧 **Integration Benefits** +- **Seamless Integration**: Uses existing `lab deployment` and `lab machines` modules +- **Consistent CLI**: Follows same command pattern as other lab commands +- **Shared Configuration**: Uses same configuration system and logging +- **Type Safety**: Leverages Guile's type system and error handling + +### 🛡️ **Enhanced Safety Features** +- **Health Checks**: Pre-update validation (disk space, system state, uptime) +- **Comprehensive Logging**: All operations logged with timestamps +- **Lock File Protection**: Prevents concurrent update attempts +- **Graceful Error Handling**: Proper cleanup and rollback on failures + +### 📊 **Observability** +- **Status Commands**: `lab auto-update-status` for monitoring +- **Structured Logs**: Easy to parse and analyze +- **Systemd Integration**: Native systemd service and timer management +- **Log Rotation**: Automatic log management with configurable retention + +### 🚀 **Usage Examples** + +```bash +# Manual testing +lab auto-update # Run update with health checks +lab auto-update-status # Check logs and service status + +# Service management +systemctl status lab-auto-update.timer +systemctl list-timers lab-auto-update +journalctl -u lab-auto-update.service + +# Configuration (in machine's configuration.nix) +services.lab-auto-update = { + enable = true; + schedule = "02:00"; # 2 AM daily + randomizedDelay = "30m"; # Up to 30min random delay + flakePath = "/home/geir/Projects/home-lab"; + logRetentionDays = 30; +}; ``` +This implementation provides a robust, well-integrated auto-update system that maintains the functional programming principles and modular architecture of the existing lab-tool infrastructure. + ## Deployment Strategy ### Per-Machine Setup