grokking simplicity and refactoring
This commit is contained in:
parent
89a7fe100d
commit
fb4361d938
67 changed files with 6275 additions and 56 deletions
326
packages/lab-tool/research/core.scm
Normal file
326
packages/lab-tool/research/core.scm
Normal file
|
@ -0,0 +1,326 @@
|
|||
;; lab/core.scm - Core home lab operations
|
||||
|
||||
(define-module (lab core)
|
||||
#:use-module (ice-9 format)
|
||||
#:use-module (ice-9 popen)
|
||||
#:use-module (ice-9 rdelim)
|
||||
#:use-module (ice-9 textual-ports)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (srfi srfi-19)
|
||||
#:export (get-infrastructure-status
|
||||
check-system-health
|
||||
update-flake
|
||||
validate-environment
|
||||
execute-nixos-rebuild
|
||||
check-network-connectivity
|
||||
option-ref))
|
||||
|
||||
;; Simple option reference function
|
||||
(define (option-ref options key default)
|
||||
"Get option value from options alist with default"
|
||||
(let ((value (assoc-ref options key)))
|
||||
(if value value default)))
|
||||
|
||||
;; Stub logging functions (to be replaced with proper logging module)
|
||||
(define (log-info format-str . args)
|
||||
"Log info message"
|
||||
(apply format #t (string-append "[INFO] " format-str "~%") args))
|
||||
|
||||
(define (log-debug format-str . args)
|
||||
"Log debug message"
|
||||
(apply format #t (string-append "[DEBUG] " format-str "~%") args))
|
||||
|
||||
(define (log-success format-str . args)
|
||||
"Log success message"
|
||||
(apply format #t (string-append "[SUCCESS] " format-str "~%") args))
|
||||
|
||||
(define (log-error format-str . args)
|
||||
"Log error message"
|
||||
(apply format #t (string-append "[ERROR] " format-str "~%") args))
|
||||
|
||||
(define (log-warn format-str . args)
|
||||
"Log warning message"
|
||||
(apply format #t (string-append "[WARN] " format-str "~%") args))
|
||||
|
||||
;; Stub configuration functions
|
||||
(define (get-all-machines)
|
||||
"Get list of all machines"
|
||||
'(grey-area sleeper-service congenital-optimist reverse-proxy))
|
||||
|
||||
(define (get-machine-config machine-name)
|
||||
"Get configuration for a machine"
|
||||
`((services . (systemd ssh))
|
||||
(type . server)))
|
||||
|
||||
(define (get-ssh-config machine-name)
|
||||
"Get SSH configuration for a machine"
|
||||
`((hostname . ,(symbol->string machine-name))
|
||||
(is-local . #f)))
|
||||
|
||||
(define (get-homelab-root)
|
||||
"Get home lab root directory"
|
||||
"/home/geir/Home-lab")
|
||||
|
||||
;; Stub SSH functions
|
||||
(define (test-ssh-connection machine-name)
|
||||
"Test SSH connection to machine"
|
||||
(zero? (system (format #f "ssh -o ConnectTimeout=5 -o BatchMode=yes ~a exit 2>/dev/null" machine-name))))
|
||||
|
||||
(define (run-remote-command machine-name command . args)
|
||||
"Run command on remote machine via SSH"
|
||||
(let* ((full-command (if (null? args)
|
||||
command
|
||||
(string-join (cons command args) " ")))
|
||||
(ssh-command (format #f "ssh ~a '~a'" machine-name full-command))
|
||||
(port (open-input-pipe ssh-command))
|
||||
(output (read-string port))
|
||||
(status (close-pipe port)))
|
||||
(values (zero? status) output)))
|
||||
|
||||
;; Utility function for spinner (stub)
|
||||
(define (with-spinner message proc)
|
||||
"Execute procedure with spinner (stub implementation)"
|
||||
(display (format #f "~a..." message))
|
||||
(let ((result (proc)))
|
||||
(display " done.\n")
|
||||
result))
|
||||
|
||||
;; Get comprehensive infrastructure status
|
||||
(define (get-infrastructure-status . args)
|
||||
"Get status of all machines or specific machine if provided"
|
||||
(let ((target-machine (if (null? args) #f (car args)))
|
||||
(machines (if (null? args)
|
||||
(get-all-machines)
|
||||
(list (car args)))))
|
||||
|
||||
(log-info "Checking infrastructure status...")
|
||||
|
||||
(map (lambda (machine-name)
|
||||
(let ((start-time (current-time)))
|
||||
(log-debug "Checking ~a..." machine-name)
|
||||
|
||||
(let* ((ssh-config (get-ssh-config machine-name))
|
||||
(is-local (and ssh-config (assoc-ref ssh-config 'is-local)))
|
||||
(connection-status (test-ssh-connection machine-name))
|
||||
(services-status (if connection-status
|
||||
(get-machine-services-status machine-name)
|
||||
'()))
|
||||
(system-info (if connection-status
|
||||
(get-machine-system-info machine-name)
|
||||
#f))
|
||||
(elapsed (- (current-time) start-time)))
|
||||
|
||||
`((machine . ,machine-name)
|
||||
(type . ,(if is-local 'local 'remote))
|
||||
(connection . ,(if connection-status 'online 'offline))
|
||||
(services . ,services-status)
|
||||
(system . ,system-info)
|
||||
(check-time . ,elapsed)))))
|
||||
machines)))
|
||||
|
||||
;; Get services status for a machine
|
||||
(define (get-machine-services-status machine-name)
|
||||
"Check status of services on a machine"
|
||||
(let ((machine-config (get-machine-config machine-name)))
|
||||
(if machine-config
|
||||
(let ((services (assoc-ref machine-config 'services)))
|
||||
(if services
|
||||
(map (lambda (service)
|
||||
(call-with-values
|
||||
(lambda () (run-remote-command machine-name
|
||||
"systemctl is-active"
|
||||
(symbol->string service)))
|
||||
(lambda (success output)
|
||||
`(,service . ,(if success
|
||||
(string-trim-right output)
|
||||
"unknown")))))
|
||||
services)
|
||||
'()))
|
||||
'())))
|
||||
|
||||
;; Get basic system information from a machine
|
||||
(define (get-machine-system-info machine-name)
|
||||
"Get basic system information from a machine"
|
||||
(let ((info-commands
|
||||
'(("uptime" "uptime -p")
|
||||
("load" "cat /proc/loadavg | cut -d' ' -f1-3")
|
||||
("memory" "free -h | grep Mem | awk '{print $3\"/\"$2}'")
|
||||
("disk" "df -h / | tail -1 | awk '{print $5}'")
|
||||
("kernel" "uname -r"))))
|
||||
|
||||
(fold (lambda (cmd-pair acc)
|
||||
(let ((key (car cmd-pair))
|
||||
(command (cadr cmd-pair)))
|
||||
(call-with-values
|
||||
(lambda () (run-remote-command machine-name command))
|
||||
(lambda (success output)
|
||||
(if success
|
||||
(assoc-set! acc (string->symbol key) (string-trim-right output))
|
||||
acc)))))
|
||||
'()
|
||||
info-commands)))
|
||||
|
||||
;; Check system health with comprehensive tests
|
||||
(define (check-system-health machine-name)
|
||||
"Perform comprehensive health check on a machine"
|
||||
(log-info "Performing health check on ~a..." machine-name)
|
||||
|
||||
(let ((health-checks
|
||||
'(("connectivity" . test-ssh-connection)
|
||||
("disk-space" . check-disk-space)
|
||||
("system-load" . check-system-load)
|
||||
("critical-services" . check-critical-services)
|
||||
("network" . check-network-connectivity))))
|
||||
|
||||
(map (lambda (check-pair)
|
||||
(let ((check-name (car check-pair))
|
||||
(check-proc (cdr check-pair)))
|
||||
(log-debug "Running ~a check..." check-name)
|
||||
(catch #t
|
||||
(lambda ()
|
||||
(let ((result (check-proc machine-name)))
|
||||
`(,check-name . ((status . ,(if result 'pass 'fail))
|
||||
(result . ,result))))
|
||||
(lambda (key . args)
|
||||
(log-warn "Health check ~a failed: ~a" check-name key)
|
||||
`(,check-name . ((status . error)
|
||||
(error . ,key)))))))
|
||||
health-checks)))
|
||||
|
||||
;; Individual health check functions
|
||||
(define (check-disk-space machine-name)
|
||||
"Check if disk space is below critical threshold"
|
||||
(call-with-values
|
||||
(lambda () (run-remote-command machine-name "df / | tail -1 | awk '{print $5}' | sed 's/%//'"))
|
||||
(lambda (success output)
|
||||
(if success
|
||||
(let ((usage (string->number (string-trim-right output))))
|
||||
(< usage 90)) ; Pass if usage < 90%
|
||||
#f))))
|
||||
|
||||
(define (check-system-load machine-name)
|
||||
"Check if system load is reasonable"
|
||||
(call-with-values
|
||||
(lambda () (run-remote-command machine-name "cat /proc/loadavg | cut -d' ' -f1"))
|
||||
(lambda (success output)
|
||||
(if success
|
||||
(let ((load (string->number (string-trim-right output))))
|
||||
(< load 5.0)) ; Pass if load < 5.0
|
||||
#f))))
|
||||
|
||||
(define (check-critical-services machine-name)
|
||||
"Check that critical services are running"
|
||||
(let ((critical-services '("sshd")))
|
||||
(every (lambda (service)
|
||||
(call-with-values
|
||||
(lambda () (run-remote-command machine-name "systemctl is-active" service))
|
||||
(lambda (success output)
|
||||
(and success (string=? (string-trim-right output) "active")))))
|
||||
critical-services)))
|
||||
|
||||
(define (check-network-connectivity machine-name)
|
||||
"Check basic network connectivity"
|
||||
(call-with-values
|
||||
(lambda () (run-remote-command machine-name "ping -c 1 -W 5 8.8.8.8 > /dev/null 2>&1; echo $?"))
|
||||
(lambda (success output)
|
||||
(and success (string=? (string-trim-right output) "0")))))
|
||||
|
||||
;; Update flake inputs
|
||||
(define (update-flake options)
|
||||
"Update flake inputs in the home lab repository"
|
||||
(let ((homelab-root (get-homelab-root))
|
||||
(dry-run (option-ref options 'dry-run #f)))
|
||||
|
||||
(log-info "Updating flake inputs...")
|
||||
|
||||
(if dry-run
|
||||
(begin
|
||||
(log-info "DRY RUN: Would execute: nix flake update")
|
||||
#t)
|
||||
(let* ((update-cmd (format #f "cd ~a && nix flake update" homelab-root))
|
||||
(port (open-pipe* OPEN_READ "/bin/sh" "-c" update-cmd))
|
||||
(output (get-string-all port))
|
||||
(status (close-pipe port)))
|
||||
|
||||
(if (zero? status)
|
||||
(begin
|
||||
(log-success "Flake inputs updated successfully")
|
||||
(log-debug "Update output: ~a" output)
|
||||
#t)
|
||||
(begin
|
||||
(log-error "Flake update failed (exit: ~a)" status)
|
||||
(log-error "Error output: ~a" output)
|
||||
#f))))))
|
||||
|
||||
;; Validate home lab environment
|
||||
(define (validate-environment)
|
||||
"Validate that the home lab environment is properly configured"
|
||||
(log-info "Validating home lab environment...")
|
||||
|
||||
(let ((checks
|
||||
`(("homelab-root" . ,(lambda () (file-exists? (get-homelab-root))))
|
||||
("flake-file" . ,(lambda () (file-exists? (string-append (get-homelab-root) "/flake.nix"))))
|
||||
("ssh-config" . ,(lambda () (file-exists? (string-append (getenv "HOME") "/.ssh/config"))))
|
||||
("nix-command" . ,(lambda () (zero? (system "which nix > /dev/null 2>&1"))))
|
||||
("machines-config" . ,(lambda () (not (null? (get-all-machines))))))))
|
||||
|
||||
(let ((results (map (lambda (check-pair)
|
||||
(let ((check-name (car check-pair))
|
||||
(check-proc (cdr check-pair)))
|
||||
(let ((result (check-proc)))
|
||||
(if result
|
||||
(log-success "✓ ~a" check-name)
|
||||
(log-error "✗ ~a" check-name))
|
||||
`(,check-name . ,result))))
|
||||
checks)))
|
||||
|
||||
(let ((failures (filter (lambda (result) (not (cdr result))) results)))
|
||||
(if (null? failures)
|
||||
(begin
|
||||
(log-success "Environment validation passed")
|
||||
#t)
|
||||
(begin
|
||||
(log-error "Environment validation failed: ~a" (map car failures))
|
||||
#f))))))
|
||||
|
||||
;; Execute nixos-rebuild with proper error handling
|
||||
(define (execute-nixos-rebuild machine-name mode options)
|
||||
"Execute nixos-rebuild on a machine with comprehensive error handling"
|
||||
(let ((homelab-root (get-homelab-root))
|
||||
(dry-run (option-ref options 'dry-run #f))
|
||||
(ssh-config (get-ssh-config machine-name)))
|
||||
|
||||
(if (not ssh-config)
|
||||
(begin
|
||||
(log-error "No SSH configuration for machine: ~a" machine-name)
|
||||
#f)
|
||||
(let* ((is-local (assoc-ref ssh-config 'is-local))
|
||||
(flake-ref (format #f "~a#~a" homelab-root machine-name))
|
||||
(rebuild-cmd (if is-local
|
||||
(format #f "sudo nixos-rebuild ~a --flake ~a" mode flake-ref)
|
||||
(format #f "nixos-rebuild ~a --flake ~a --target-host ~a --use-remote-sudo"
|
||||
mode flake-ref (assoc-ref ssh-config 'hostname)))))
|
||||
|
||||
(log-info "Executing nixos-rebuild for ~a (mode: ~a)" machine-name mode)
|
||||
(log-debug "Command: ~a" rebuild-cmd)
|
||||
|
||||
(if dry-run
|
||||
(begin
|
||||
(log-info "DRY RUN: Would execute: ~a" rebuild-cmd)
|
||||
#t)
|
||||
(with-spinner
|
||||
(format #f "Rebuilding ~a" machine-name)
|
||||
(lambda ()
|
||||
(let* ((port (open-pipe* OPEN_READ "/bin/sh" "-c" rebuild-cmd))
|
||||
(output (get-string-all port))
|
||||
(status (close-pipe port)))
|
||||
|
||||
(if (zero? status)
|
||||
(begin
|
||||
(log-success "nixos-rebuild completed successfully for ~a" machine-name)
|
||||
(log-debug "Build output: ~a" output)
|
||||
#t)
|
||||
(begin
|
||||
(log-error "nixos-rebuild failed for ~a (exit: ~a)" machine-name status)
|
||||
(log-error "Error output: ~a" output)
|
||||
#f))))))))))
|
329
packages/lab-tool/research/deployment.scm
Normal file
329
packages/lab-tool/research/deployment.scm
Normal file
|
@ -0,0 +1,329 @@
|
|||
;; lab/deployment.scm - Deployment operations for Home Lab Tool
|
||||
|
||||
(define-module (lab deployment)
|
||||
#:use-module (ice-9 format)
|
||||
#:use-module (ice-9 match)
|
||||
#:use-module (ice-9 popen)
|
||||
#:use-module (ice-9 textual-ports)
|
||||
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (srfi srfi-19)
|
||||
#:use-module (utils logging)
|
||||
#:use-module (utils config)
|
||||
#:use-module (utils ssh)
|
||||
#:use-module (lab core)
|
||||
#:export (deploy-machine
|
||||
update-all-machines
|
||||
hybrid-update
|
||||
validate-deployment
|
||||
rollback-deployment
|
||||
deployment-status
|
||||
option-ref))
|
||||
|
||||
;; Helper function for option handling
|
||||
(define (option-ref options key default)
|
||||
"Get option value with default fallback"
|
||||
(let ((value (assoc-ref options key)))
|
||||
(if value value default)))
|
||||
|
||||
;; Deploy configuration to a specific machine
|
||||
(define (deploy-machine machine-name mode options)
|
||||
"Deploy NixOS configuration to a specific machine"
|
||||
(let ((valid-modes '("boot" "test" "switch"))
|
||||
(dry-run (option-ref options 'dry-run #f)))
|
||||
|
||||
;; Validate inputs
|
||||
(if (not (validate-machine-name machine-name))
|
||||
#f
|
||||
(if (not (member mode valid-modes))
|
||||
(begin
|
||||
(log-error "Invalid deployment mode: ~a" mode)
|
||||
(log-error "Valid modes: ~a" (string-join valid-modes ", "))
|
||||
#f)
|
||||
|
||||
;; Proceed with deployment
|
||||
(begin
|
||||
(log-info "Starting deployment: ~a -> ~a (mode: ~a)"
|
||||
machine-name machine-name mode)
|
||||
|
||||
;; Pre-deployment validation
|
||||
(if (not (validate-deployment-prerequisites machine-name))
|
||||
(begin
|
||||
(log-error "Pre-deployment validation failed")
|
||||
#f)
|
||||
|
||||
;; Execute deployment
|
||||
(let ((deployment-result
|
||||
(execute-deployment machine-name mode options)))
|
||||
|
||||
;; Post-deployment validation
|
||||
(if deployment-result
|
||||
(begin
|
||||
(log-info "Deployment completed, validating...")
|
||||
(if (validate-post-deployment machine-name mode)
|
||||
(begin
|
||||
(log-success "Deployment successful and validated ✓")
|
||||
#t)
|
||||
(begin
|
||||
(log-warn "Deployment completed but validation failed")
|
||||
;; Don't fail completely - deployment might still be functional
|
||||
#t)))
|
||||
(begin
|
||||
(log-error "Deployment failed")
|
||||
#f)))))))))
|
||||
|
||||
;; Execute the actual deployment
|
||||
(define (execute-deployment machine-name mode options)
|
||||
"Execute the deployment based on machine type and configuration"
|
||||
(let ((ssh-config (get-ssh-config machine-name))
|
||||
(machine-config (get-machine-config machine-name)))
|
||||
|
||||
(if (not ssh-config)
|
||||
(begin
|
||||
(log-error "No SSH configuration found for ~a" machine-name)
|
||||
#f)
|
||||
|
||||
(let ((deployment-method (assoc-ref machine-config 'deployment-method))
|
||||
(is-local (assoc-ref ssh-config 'is-local)))
|
||||
|
||||
(log-debug "Using deployment method: ~a" (or deployment-method 'nixos-rebuild))
|
||||
|
||||
(match (or deployment-method 'nixos-rebuild)
|
||||
('nixos-rebuild
|
||||
(execute-nixos-rebuild machine-name mode options))
|
||||
|
||||
('deploy-rs
|
||||
(execute-deploy-rs machine-name mode options))
|
||||
|
||||
('hybrid
|
||||
(execute-hybrid-deployment machine-name mode options))
|
||||
|
||||
(method
|
||||
(log-error "Unknown deployment method: ~a" method)
|
||||
#f))))))
|
||||
|
||||
;; Execute deploy-rs deployment
|
||||
(define (execute-deploy-rs machine-name mode options)
|
||||
"Deploy using deploy-rs for atomic deployments"
|
||||
(let ((homelab-root (get-homelab-root))
|
||||
(dry-run (option-ref options 'dry-run #f)))
|
||||
|
||||
(log-info "Deploying ~a using deploy-rs..." machine-name)
|
||||
|
||||
(if dry-run
|
||||
(begin
|
||||
(log-info "DRY RUN: Would execute deploy-rs for ~a" machine-name)
|
||||
#t)
|
||||
|
||||
(let* ((deploy-cmd (format #f "cd ~a && deploy '.#~a' --magic-rollback --auto-rollback"
|
||||
homelab-root machine-name))
|
||||
(start-time (current-time)))
|
||||
|
||||
(log-debug "Deploy command: ~a" deploy-cmd)
|
||||
|
||||
(with-spinner
|
||||
(format #f "Deploying ~a with deploy-rs" machine-name)
|
||||
(lambda ()
|
||||
(let* ((port (open-pipe* OPEN_READ "/bin/sh" "-c" deploy-cmd))
|
||||
(output (get-string-all port))
|
||||
(status (close-pipe port))
|
||||
(elapsed (- (current-time) start-time)))
|
||||
|
||||
(if (zero? status)
|
||||
(begin
|
||||
(log-success "deploy-rs completed in ~as" elapsed)
|
||||
(log-debug "Deploy output: ~a" output)
|
||||
#t)
|
||||
(begin
|
||||
(log-error "deploy-rs failed (exit: ~a)" status)
|
||||
(log-error "Error output: ~a" output)
|
||||
#f)))))))))
|
||||
|
||||
;; Execute hybrid deployment (flake update + deploy)
|
||||
(define (execute-hybrid-deployment machine-name mode options)
|
||||
"Execute hybrid deployment: update flake then deploy"
|
||||
(log-info "Starting hybrid deployment for ~a..." machine-name)
|
||||
|
||||
;; First update flake
|
||||
(if (update-flake options)
|
||||
;; Then deploy
|
||||
(execute-nixos-rebuild machine-name mode options)
|
||||
(begin
|
||||
(log-error "Flake update failed, aborting deployment")
|
||||
#f)))
|
||||
|
||||
;; Validate deployment prerequisites
|
||||
(define (validate-deployment-prerequisites machine-name)
|
||||
"Validate that deployment prerequisites are met"
|
||||
(log-debug "Validating deployment prerequisites for ~a..." machine-name)
|
||||
|
||||
(let ((checks
|
||||
`(("machine-config" . ,(lambda () (get-machine-config machine-name)))
|
||||
("ssh-connectivity" . ,(lambda () (test-ssh-connection machine-name)))
|
||||
("flake-exists" . ,(lambda () (file-exists? (string-append (get-homelab-root) "/flake.nix"))))
|
||||
("machine-flake-config" . ,(lambda () (validate-machine-flake-config machine-name))))))
|
||||
|
||||
(let ((results (map (lambda (check-pair)
|
||||
(let ((check-name (car check-pair))
|
||||
(check-proc (cdr check-pair)))
|
||||
(let ((result (check-proc)))
|
||||
(if result
|
||||
(log-debug "✓ Prerequisite: ~a" check-name)
|
||||
(log-error "✗ Prerequisite failed: ~a" check-name))
|
||||
result)))
|
||||
checks)))
|
||||
|
||||
(every identity results))))
|
||||
|
||||
;; Validate machine has flake configuration
|
||||
(define (validate-machine-flake-config machine-name)
|
||||
"Check that machine has a configuration in the flake"
|
||||
(let ((machine-dir (string-append (get-homelab-root) "/machines/" machine-name)))
|
||||
(and (file-exists? machine-dir)
|
||||
(file-exists? (string-append machine-dir "/configuration.nix")))))
|
||||
|
||||
;; Validate post-deployment state
|
||||
(define (validate-post-deployment machine-name mode)
|
||||
"Validate system state after deployment"
|
||||
(log-debug "Validating post-deployment state for ~a..." machine-name)
|
||||
|
||||
;; Wait a moment for services to stabilize
|
||||
(sleep 3)
|
||||
|
||||
(let ((checks
|
||||
`(("ssh-connectivity" . ,(lambda () (test-ssh-connection machine-name)))
|
||||
("system-responsive" . ,(lambda () (check-system-responsive machine-name)))
|
||||
("critical-services" . ,(lambda () (check-critical-services machine-name))))))
|
||||
|
||||
(let ((results (map (lambda (check-pair)
|
||||
(let ((check-name (car check-pair))
|
||||
(check-proc (cdr check-pair)))
|
||||
(catch #t
|
||||
(lambda ()
|
||||
(let ((result (check-proc)))
|
||||
(if result
|
||||
(log-debug "✓ Post-deployment: ~a" check-name)
|
||||
(log-warn "✗ Post-deployment: ~a" check-name))
|
||||
result))
|
||||
(lambda (key . args)
|
||||
(log-warn "Post-deployment check ~a failed: ~a" check-name key)
|
||||
#f))))
|
||||
checks)))
|
||||
|
||||
(every identity results))))
|
||||
|
||||
;; Check if system is responsive after deployment
|
||||
(define (check-system-responsive machine-name)
|
||||
"Check if system is responsive after deployment"
|
||||
(call-with-values (((success output)
|
||||
(run-remote-command machine-name "echo 'system-check' && uptime")))
|
||||
(and success (string-contains output "system-check"))))
|
||||
|
||||
;; Update all machines
|
||||
(define (update-all-machines mode options)
|
||||
"Update all configured machines"
|
||||
(let ((machines (get-all-machines))
|
||||
(dry-run (option-ref options 'dry-run #f)))
|
||||
|
||||
(log-info "Starting update of all machines (mode: ~a)..." mode)
|
||||
|
||||
(if dry-run
|
||||
(begin
|
||||
(log-info "DRY RUN: Would update machines: ~a" (string-join machines ", "))
|
||||
#t)
|
||||
|
||||
(let ((results
|
||||
(map (lambda (machine-name)
|
||||
(log-info "Updating ~a..." machine-name)
|
||||
(let ((result (deploy-machine machine-name mode options)))
|
||||
(if result
|
||||
(log-success "✓ ~a updated successfully" machine-name)
|
||||
(log-error "✗ ~a update failed" machine-name))
|
||||
`(,machine-name . ,result)))
|
||||
machines)))
|
||||
|
||||
(let ((successful (filter cdr results))
|
||||
(failed (filter (lambda (r) (not (cdr r))) results)))
|
||||
|
||||
(log-info "Update summary:")
|
||||
(log-info " Successful: ~a/~a" (length successful) (length results))
|
||||
|
||||
(when (not (null? failed))
|
||||
(log-warn " Failed: ~a" (map car failed)))
|
||||
|
||||
;; Return success if all succeeded
|
||||
(= (length successful) (length results)))))))
|
||||
|
||||
;; Hybrid update: flake update + selective deployment
|
||||
(define (hybrid-update target options)
|
||||
"Perform hybrid update: flake update followed by deployment"
|
||||
(log-info "Starting hybrid update for target: ~a" target)
|
||||
|
||||
;; First update flake
|
||||
(if (update-flake options)
|
||||
|
||||
;; Then deploy based on target
|
||||
(match target
|
||||
("all"
|
||||
(update-all-machines "boot" options))
|
||||
|
||||
(machine-name
|
||||
(if (validate-machine-name machine-name)
|
||||
(deploy-machine machine-name "boot" options)
|
||||
#f)))
|
||||
|
||||
(begin
|
||||
(log-error "Flake update failed, aborting hybrid update")
|
||||
#f)))
|
||||
|
||||
;; Get deployment status
|
||||
(define (deployment-status . machine-name)
|
||||
"Get current deployment status for machines"
|
||||
(let ((machines (if (null? machine-name)
|
||||
(get-all-machines)
|
||||
machine-name)))
|
||||
|
||||
(map (lambda (machine)
|
||||
(let ((last-deployment (get-last-deployment-info machine))
|
||||
(current-generation (get-current-generation machine)))
|
||||
`((machine . ,machine)
|
||||
(last-deployment . ,last-deployment)
|
||||
(current-generation . ,current-generation)
|
||||
(status . ,(get-deployment-health machine)))))
|
||||
machines)))
|
||||
|
||||
;; Get last deployment information
|
||||
(define (get-last-deployment-info machine-name)
|
||||
"Get information about the last deployment"
|
||||
(call-with-values (((success output)
|
||||
(run-remote-command machine-name
|
||||
"ls -la /nix/var/nix/profiles/system* | tail -1")))
|
||||
(if success
|
||||
(string-trim-right output)
|
||||
"unknown")))
|
||||
|
||||
;; Get current system generation
|
||||
(define (get-current-generation machine-name)
|
||||
"Get current NixOS generation information"
|
||||
(call-with-values (((success output)
|
||||
(run-remote-command machine-name
|
||||
"nixos-version")))
|
||||
(if success
|
||||
(string-trim-right output)
|
||||
"unknown")))
|
||||
|
||||
;; Get deployment health status
|
||||
(define (get-deployment-health machine-name)
|
||||
"Check if deployment is healthy"
|
||||
(if (test-ssh-connection machine-name)
|
||||
'healthy
|
||||
'unhealthy))
|
||||
|
||||
;; Rollback deployment (placeholder for future implementation)
|
||||
(define (rollback-deployment machine-name . generation)
|
||||
"Rollback to previous generation (deploy-rs feature)"
|
||||
(log-warn "Rollback functionality not yet implemented")
|
||||
(log-info "For manual rollback on ~a:" machine-name)
|
||||
(log-info " 1. SSH to machine")
|
||||
(log-info " 2. Run: sudo nixos-rebuild switch --rollback")
|
||||
#f)
|
348
packages/lab-tool/research/guile-mcp-server.scm
Normal file
348
packages/lab-tool/research/guile-mcp-server.scm
Normal file
|
@ -0,0 +1,348 @@
|
|||
#!/usr/bin/env guile
|
||||
!#
|
||||
|
||||
;; Guile MCP Server for Home Lab Integration
|
||||
;; Implements Model Context Protocol for VS Code extension
|
||||
|
||||
(use-modules (json)
|
||||
(ice-9 textual-ports)
|
||||
(ice-9 popen)
|
||||
(ice-9 rdelim)
|
||||
(ice-9 match)
|
||||
(ice-9 threads)
|
||||
(srfi srfi-1)
|
||||
(srfi srfi-19)
|
||||
(srfi srfi-26))
|
||||
|
||||
;; MCP Protocol Implementation
|
||||
(define mcp-protocol-version "2024-11-05")
|
||||
(define request-id-counter 0)
|
||||
|
||||
;; Server capabilities and state
|
||||
(define server-capabilities
|
||||
`((tools . ())
|
||||
(resources . ())
|
||||
(prompts . ())))
|
||||
|
||||
(define server-info
|
||||
`((name . "guile-homelab-mcp")
|
||||
(version . "0.1.0")))
|
||||
|
||||
;; Request/Response utilities
|
||||
(define (make-response id result)
|
||||
`((jsonrpc . "2.0")
|
||||
(id . ,id)
|
||||
(result . ,result)))
|
||||
|
||||
(define (make-error id code message)
|
||||
`((jsonrpc . "2.0")
|
||||
(id . ,id)
|
||||
(error . ((code . ,code)
|
||||
(message . ,message)))))
|
||||
|
||||
(define (send-response response)
|
||||
(let ((json-str (scm->json-string response)))
|
||||
(display json-str)
|
||||
(newline)
|
||||
(force-output)))
|
||||
|
||||
;; Home Lab Tools Implementation
|
||||
(define (list-machines)
|
||||
"List all available machines in the home lab"
|
||||
(let* ((proc (open-input-pipe "find /etc/nixos/hosts -name '*.nix' -type f"))
|
||||
(output (read-string proc)))
|
||||
(close-pipe proc)
|
||||
(if (string-null? output)
|
||||
'()
|
||||
(map (lambda (path)
|
||||
(basename path ".nix"))
|
||||
(string-split (string-trim-right output #\newline) #\newline)))))
|
||||
|
||||
(define (get-machine-status machine)
|
||||
"Get status of a specific machine"
|
||||
(let* ((cmd (format #f "ping -c 1 -W 1 ~a > /dev/null 2>&1" machine))
|
||||
(status (system cmd)))
|
||||
(if (= status 0) "online" "offline")))
|
||||
|
||||
(define (deploy-machine machine method)
|
||||
"Deploy configuration to a machine"
|
||||
(match method
|
||||
("deploy-rs"
|
||||
(let ((cmd (format #f "deploy '.#~a'" machine)))
|
||||
(deploy-with-command cmd machine)))
|
||||
("hybrid-update"
|
||||
(let ((cmd (format #f "nixos-rebuild switch --flake '.#~a' --target-host ~a --use-remote-sudo" machine machine)))
|
||||
(deploy-with-command cmd machine)))
|
||||
("legacy"
|
||||
(let ((cmd (format #f "nixos-rebuild switch --flake '.#~a'" machine)))
|
||||
(deploy-with-command cmd machine)))
|
||||
(_ (throw 'deployment-error "Unknown deployment method" method))))
|
||||
|
||||
(define (deploy-with-command cmd machine)
|
||||
"Execute deployment command and return result"
|
||||
(let* ((proc (open-input-pipe cmd))
|
||||
(output (read-string proc))
|
||||
(status (close-pipe proc)))
|
||||
`((success . ,(= status 0))
|
||||
(output . ,output)
|
||||
(machine . ,machine)
|
||||
(timestamp . ,(date->string (current-date))))))
|
||||
|
||||
(define (generate-nix-config machine-name services)
|
||||
"Generate NixOS configuration for a new machine"
|
||||
(let ((config (format #f "# Generated NixOS configuration for ~a
|
||||
# Generated on ~a
|
||||
|
||||
{ config, pkgs, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
./hardware-configuration.nix
|
||||
];
|
||||
|
||||
# Machine name
|
||||
networking.hostName = \"~a\";
|
||||
|
||||
# Basic system configuration
|
||||
system.stateVersion = \"23.11\";
|
||||
|
||||
# Enable services
|
||||
~a
|
||||
|
||||
# Network configuration
|
||||
networking.firewall.enable = true;
|
||||
|
||||
# SSH access
|
||||
services.openssh.enable = true;
|
||||
users.users.root.openssh.authorizedKeys.keys = [
|
||||
# Add your public key here
|
||||
];
|
||||
}
|
||||
"
|
||||
machine-name
|
||||
(date->string (current-date))
|
||||
machine-name
|
||||
(string-join
|
||||
(map (lambda (service)
|
||||
(format #f " services.~a.enable = true;" service))
|
||||
services)
|
||||
"\n"))))
|
||||
`((content . ,config)
|
||||
(filename . ,(format #f "~a.nix" machine-name)))))
|
||||
|
||||
(define (get-infrastructure-status)
|
||||
"Get comprehensive infrastructure status"
|
||||
(let* ((machines (list-machines))
|
||||
(machine-status (map (lambda (m)
|
||||
`((name . ,m)
|
||||
(status . ,(get-machine-status m))))
|
||||
machines)))
|
||||
`((machines . ,machine-status)
|
||||
(timestamp . ,(date->string (current-date)))
|
||||
(total-machines . ,(length machines))
|
||||
(online-machines . ,(length (filter (lambda (m)
|
||||
(equal? (assoc-ref m 'status) "online"))
|
||||
machine-status))))))
|
||||
|
||||
;; MCP Tools Registry
|
||||
(define mcp-tools
|
||||
`(((name . "deploy-machine")
|
||||
(description . "Deploy NixOS configuration to a home lab machine")
|
||||
(inputSchema . ((type . "object")
|
||||
(properties . ((machine . ((type . "string")
|
||||
(description . "Machine hostname to deploy to")))
|
||||
(method . ((type . "string")
|
||||
(enum . ("deploy-rs" "hybrid-update" "legacy"))
|
||||
(description . "Deployment method to use")))))
|
||||
(required . ("machine" "method")))))
|
||||
|
||||
((name . "list-machines")
|
||||
(description . "List all available machines in the home lab")
|
||||
(inputSchema . ((type . "object")
|
||||
(properties . ()))))
|
||||
|
||||
((name . "check-status")
|
||||
(description . "Check status of home lab infrastructure")
|
||||
(inputSchema . ((type . "object")
|
||||
(properties . ((machine . ((type . "string")
|
||||
(description . "Specific machine to check (optional)")))))))
|
||||
|
||||
((name . "generate-nix-config")
|
||||
(description . "Generate NixOS configuration for a new machine")
|
||||
(inputSchema . ((type . "object")
|
||||
(properties . ((machine-name . ((type . "string")
|
||||
(description . "Name for the new machine")))
|
||||
(services . ((type . "array")
|
||||
(items . ((type . "string")))
|
||||
(description . "List of services to enable")))))
|
||||
(required . ("machine-name")))))
|
||||
|
||||
((name . "list-services")
|
||||
(description . "List available NixOS services")
|
||||
(inputSchema . ((type . "object")
|
||||
(properties . ()))))))
|
||||
|
||||
;; MCP Resources Registry
|
||||
(define mcp-resources
|
||||
`(((uri . "homelab://status/all")
|
||||
(name . "Infrastructure Status")
|
||||
(description . "Complete status of all home lab machines and services")
|
||||
(mimeType . "application/json"))
|
||||
|
||||
((uri . "homelab://status/summary")
|
||||
(name . "Status Summary")
|
||||
(description . "Summary of infrastructure health")
|
||||
(mimeType . "text/plain"))
|
||||
|
||||
((uri . "homelab://context/copilot")
|
||||
(name . "Copilot Context")
|
||||
(description . "Context information for GitHub Copilot integration")
|
||||
(mimeType . "text/markdown"))))
|
||||
|
||||
;; Tool execution dispatcher
|
||||
(define (execute-tool name arguments)
|
||||
"Execute a registered MCP tool"
|
||||
(match name
|
||||
("deploy-machine"
|
||||
(let ((machine (assoc-ref arguments 'machine))
|
||||
(method (assoc-ref arguments 'method)))
|
||||
(deploy-machine machine method)))
|
||||
|
||||
("list-machines"
|
||||
`((machines . ,(list-machines))))
|
||||
|
||||
("check-status"
|
||||
(let ((machine (assoc-ref arguments 'machine)))
|
||||
(if machine
|
||||
`((machine . ,machine)
|
||||
(status . ,(get-machine-status machine)))
|
||||
(get-infrastructure-status))))
|
||||
|
||||
("generate-nix-config"
|
||||
(let ((machine-name (assoc-ref arguments 'machine-name))
|
||||
(services (or (assoc-ref arguments 'services) '())))
|
||||
(generate-nix-config machine-name services)))
|
||||
|
||||
("list-services"
|
||||
`((services . ("nginx" "postgresql" "redis" "mysql" "docker" "kubernetes"
|
||||
"grafana" "prometheus" "gitea" "nextcloud" "jellyfin"))))
|
||||
|
||||
(_ (throw 'unknown-tool "Tool not found" name))))
|
||||
|
||||
;; Resource content providers
|
||||
(define (get-resource-content uri)
|
||||
"Get content for a resource URI"
|
||||
(match uri
|
||||
("homelab://status/all"
|
||||
`((content . ,(get-infrastructure-status))))
|
||||
|
||||
("homelab://status/summary"
|
||||
(let ((status (get-infrastructure-status)))
|
||||
`((content . ,(format #f "Home Lab Status: ~a/~a machines online"
|
||||
(assoc-ref status 'online-machines)
|
||||
(assoc-ref status 'total-machines))))))
|
||||
|
||||
("homelab://context/copilot"
|
||||
(let ((status (get-infrastructure-status)))
|
||||
`((content . ,(format #f "# Home Lab Infrastructure Context
|
||||
|
||||
## Current Status
|
||||
- Total Machines: ~a
|
||||
- Online Machines: ~a
|
||||
- Last Updated: ~a
|
||||
|
||||
## Available Operations
|
||||
Use the home lab extension commands or MCP tools for:
|
||||
- Deploying configurations (deploy-machine)
|
||||
- Checking infrastructure status (check-status)
|
||||
- Generating new machine configs (generate-nix-config)
|
||||
- Managing services across the fleet
|
||||
|
||||
## Machine List
|
||||
~a
|
||||
|
||||
This context helps GitHub Copilot understand your home lab infrastructure state."
|
||||
(assoc-ref status 'total-machines)
|
||||
(assoc-ref status 'online-machines)
|
||||
(assoc-ref status 'timestamp)
|
||||
(string-join
|
||||
(map (lambda (m)
|
||||
(format #f "- ~a: ~a"
|
||||
(assoc-ref m 'name)
|
||||
(assoc-ref m 'status)))
|
||||
(assoc-ref status 'machines))
|
||||
"\n"))))))
|
||||
|
||||
(_ (throw 'unknown-resource "Resource not found" uri))))
|
||||
|
||||
;; MCP Protocol Handlers
|
||||
(define (handle-initialize params)
|
||||
"Handle MCP initialize request"
|
||||
`((protocolVersion . ,mcp-protocol-version)
|
||||
(capabilities . ((tools . ((listChanged . #f)))
|
||||
(resources . ((subscribe . #f)
|
||||
(listChanged . #f)))
|
||||
(prompts . ((listChanged . #f)))))
|
||||
(serverInfo . ,server-info)))
|
||||
|
||||
(define (handle-tools-list params)
|
||||
"Handle tools/list request"
|
||||
`((tools . ,mcp-tools)))
|
||||
|
||||
(define (handle-tools-call params)
|
||||
"Handle tools/call request"
|
||||
(let ((name (assoc-ref params 'name))
|
||||
(arguments (assoc-ref params 'arguments)))
|
||||
(execute-tool name arguments)))
|
||||
|
||||
(define (handle-resources-list params)
|
||||
"Handle resources/list request"
|
||||
`((resources . ,mcp-resources)))
|
||||
|
||||
(define (handle-resources-read params)
|
||||
"Handle resources/read request"
|
||||
(let ((uri (assoc-ref params 'uri)))
|
||||
(get-resource-content uri)))
|
||||
|
||||
;; Main request dispatcher
|
||||
(define (handle-request request)
|
||||
"Main request handler"
|
||||
(let ((method (assoc-ref request 'method))
|
||||
(params (assoc-ref request 'params))
|
||||
(id (assoc-ref request 'id)))
|
||||
|
||||
(catch #t
|
||||
(lambda ()
|
||||
(let ((result
|
||||
(match method
|
||||
("initialize" (handle-initialize params))
|
||||
("tools/list" (handle-tools-list params))
|
||||
("tools/call" (handle-tools-call params))
|
||||
("resources/list" (handle-resources-list params))
|
||||
("resources/read" (handle-resources-read params))
|
||||
(_ (throw 'method-not-found "Method not supported" method)))))
|
||||
(send-response (make-response id result))))
|
||||
|
||||
(lambda (key . args)
|
||||
(send-response (make-error id -32603 (format #f "~a: ~a" key args)))))))
|
||||
|
||||
;; Main server loop
|
||||
(define (run-mcp-server)
|
||||
"Run the MCP server main loop"
|
||||
(let loop ()
|
||||
(let ((line (read-line)))
|
||||
(unless (eof-object? line)
|
||||
(catch #t
|
||||
(lambda ()
|
||||
(let ((request (json-string->scm line)))
|
||||
(handle-request request)))
|
||||
(lambda (key . args)
|
||||
(send-response (make-error 0 -32700 "Parse error"))))
|
||||
(loop)))))
|
||||
|
||||
;; Export main function for use as module
|
||||
(define-public run-mcp-server run-mcp-server)
|
||||
|
||||
;; Run server if called directly
|
||||
(when (equal? (car (command-line)) (current-filename))
|
||||
(run-mcp-server))
|
846
packages/lab-tool/research/guile.md
Normal file
846
packages/lab-tool/research/guile.md
Normal file
|
@ -0,0 +1,846 @@
|
|||
# Guile Scheme Coding Instructions for Home Lab Tool
|
||||
|
||||
## Functional Programming Principles
|
||||
|
||||
**Core Philosophy**: Functional programming is about actions, data, and computation - compose small, pure functions to build complex behaviors.
|
||||
|
||||
### 1. Pure Functions First
|
||||
- Functions should be deterministic and side-effect free when possible
|
||||
- Separate pure computation from I/O operations
|
||||
- Use immutable data structures as default
|
||||
|
||||
```scheme
|
||||
;; Good: Pure function
|
||||
(define (calculate-deployment-hash config)
|
||||
(sha256 (scm->json-string config)))
|
||||
|
||||
;; Better: Separate pure logic from I/O
|
||||
(define (deployment-ready? machine-config current-state)
|
||||
(and (eq? (assoc-ref machine-config 'status) 'configured)
|
||||
(eq? (assoc-ref current-state 'connectivity) 'online)))
|
||||
|
||||
;; I/O operations separate
|
||||
(define (check-machine-deployment machine)
|
||||
(let ((config (load-machine-config machine))
|
||||
(state (probe-machine-state machine)))
|
||||
(deployment-ready? config state)))
|
||||
```
|
||||
|
||||
### 2. Data-Driven Design
|
||||
- Represent configurations and state as data structures
|
||||
- Use association lists (alists) and vectors for structured data
|
||||
- Leverage Guile's homoiconicity (code as data)
|
||||
|
||||
```scheme
|
||||
;; Machine configuration as data
|
||||
(define machine-specs
|
||||
`((grey-area
|
||||
(services (ollama jellyfin forgejo))
|
||||
(deployment-method deploy-rs)
|
||||
(backup-schedule weekly))
|
||||
(sleeper-service
|
||||
(services (nfs zfs monitoring))
|
||||
(deployment-method hybrid-update)
|
||||
(backup-schedule daily))))
|
||||
|
||||
;; Operations on data
|
||||
(define (get-machine-services machine)
|
||||
(assoc-ref (assoc-ref machine-specs machine) 'services))
|
||||
|
||||
(define (machines-with-service service)
|
||||
(filter (lambda (machine-spec)
|
||||
(member service (get-machine-services (car machine-spec))))
|
||||
machine-specs))
|
||||
```
|
||||
|
||||
## Guile-Specific Idioms
|
||||
|
||||
### 3. Module Organization
|
||||
- Use meaningful module hierarchies
|
||||
- Export only necessary public interfaces
|
||||
- Group related functionality together
|
||||
|
||||
```scheme
|
||||
;; File: modules/lab/machines.scm
|
||||
(define-module (lab machines)
|
||||
#:use-module (srfi srfi-1) ; List processing
|
||||
#:use-module (srfi srfi-26) ; Cut/cute
|
||||
#:use-module (ice-9 match) ; Pattern matching
|
||||
#:use-module (ssh session)
|
||||
#:export (machine-status
|
||||
deploy-machine
|
||||
list-machines
|
||||
machine-services))
|
||||
|
||||
;; File: modules/lab/deployment.scm
|
||||
(define-module (lab deployment)
|
||||
#:use-module (lab machines)
|
||||
#:use-module (json)
|
||||
#:export (deploy-rs
|
||||
hybrid-update
|
||||
rollback-deployment))
|
||||
```
|
||||
|
||||
### 4. Error Handling the Scheme Way
|
||||
- Use exceptions for exceptional conditions
|
||||
- Return #f or special values for expected failures
|
||||
- Provide meaningful error context
|
||||
|
||||
```scheme
|
||||
;; Use exceptions for programming errors
|
||||
(define (deploy-machine machine method)
|
||||
(unless (member machine (list-machines))
|
||||
(throw 'invalid-machine "Unknown machine" machine))
|
||||
(unless (member method '(deploy-rs hybrid-update legacy))
|
||||
(throw 'invalid-method "Unknown deployment method" method))
|
||||
;; ... deployment logic)
|
||||
|
||||
;; Return #f for expected failures
|
||||
(define (machine-reachable? machine)
|
||||
(catch #t
|
||||
(lambda ()
|
||||
(ssh-connect machine)
|
||||
#t)
|
||||
(lambda (key . args)
|
||||
#f)))
|
||||
|
||||
;; Provide context with failure info
|
||||
(define (deployment-result success? machine method details)
|
||||
`((success . ,success?)
|
||||
(machine . ,machine)
|
||||
(method . ,method)
|
||||
(timestamp . ,(current-time))
|
||||
(details . ,details)))
|
||||
```
|
||||
|
||||
### 5. Higher-Order Functions and Composition
|
||||
- Use map, filter, fold for list processing
|
||||
- Compose functions to build complex operations
|
||||
- Leverage SRFI-1 for advanced list operations
|
||||
|
||||
```scheme
|
||||
(use-modules (srfi srfi-1))
|
||||
|
||||
;; Functional composition
|
||||
(define (healthy-machines machines)
|
||||
(filter machine-reachable?
|
||||
(filter (lambda (m) (not (maintenance-mode? m)))
|
||||
machines)))
|
||||
|
||||
;; Map operations across machines
|
||||
(define (update-all-machines)
|
||||
(map (lambda (machine)
|
||||
(cons machine (update-machine machine)))
|
||||
(healthy-machines (list-machines))))
|
||||
|
||||
;; Fold for aggregation
|
||||
(define (deployment-summary results)
|
||||
(fold (lambda (result acc)
|
||||
(if (assoc-ref result 'success)
|
||||
(cons 'successful (1+ (assoc-ref acc 'successful)))
|
||||
(cons 'failed (1+ (assoc-ref acc 'failed)))))
|
||||
'((successful . 0) (failed . 0))
|
||||
results))
|
||||
```
|
||||
|
||||
### 6. Pattern Matching for Control Flow
|
||||
- Use `match` for destructuring and dispatch
|
||||
- Pattern match on data structures
|
||||
- Cleaner than nested if/cond statements
|
||||
|
||||
```scheme
|
||||
(use-modules (ice-9 match))
|
||||
|
||||
(define (handle-deployment-event event)
|
||||
(match event
|
||||
(('start machine method)
|
||||
(log-info "Starting deployment of ~a using ~a" machine method))
|
||||
|
||||
(('progress machine percent)
|
||||
(update-progress-bar machine percent))
|
||||
|
||||
(('success machine result)
|
||||
(log-success "Deployment completed: ~a" machine)
|
||||
(notify-success machine result))
|
||||
|
||||
(('error machine error-msg)
|
||||
(log-error "Deployment failed: ~a - ~a" machine error-msg)
|
||||
(initiate-rollback machine))
|
||||
|
||||
(_ (log-warning "Unknown event: ~a" event))))
|
||||
|
||||
;; Pattern matching for configuration parsing
|
||||
(define (parse-machine-config config-sexp)
|
||||
(match config-sexp
|
||||
(('machine name ('services services ...) ('options options ...))
|
||||
`((name . ,name)
|
||||
(services . ,services)
|
||||
(options . ,(alist->hash-table options))))
|
||||
|
||||
(_ (throw 'invalid-config "Malformed machine config" config-sexp))))
|
||||
```
|
||||
|
||||
### 7. REPL-Driven Development
|
||||
- Design for interactive development
|
||||
- Provide introspection functions
|
||||
- Make state queryable and modifiable
|
||||
|
||||
```scheme
|
||||
;; REPL helpers for development
|
||||
(define (debug-machine-state machine)
|
||||
"Display comprehensive machine state for debugging"
|
||||
(format #t "Machine: ~a~%" machine)
|
||||
(format #t "Status: ~a~%" (machine-status machine))
|
||||
(format #t "Services: ~a~%" (machine-services machine))
|
||||
(format #t "Last deployment: ~a~%" (last-deployment machine))
|
||||
(format #t "Reachable: ~a~%" (machine-reachable? machine)))
|
||||
|
||||
;; Interactive deployment with confirmation
|
||||
(define (interactive-deploy machine)
|
||||
(let ((current-config (get-machine-config machine)))
|
||||
(display-config current-config)
|
||||
(when (yes-or-no? "Proceed with deployment?")
|
||||
(deploy-machine machine 'deploy-rs))))
|
||||
|
||||
;; State introspection
|
||||
(define (lab-status)
|
||||
`((total-machines . ,(length (list-machines)))
|
||||
(reachable . ,(length (filter machine-reachable? (list-machines))))
|
||||
(services-running . ,(total-running-services))
|
||||
(pending-deployments . ,(length (pending-deployments)))))
|
||||
```
|
||||
|
||||
### 8. Concurrency with Fibers
|
||||
- Use fibers for concurrent operations
|
||||
- Non-blocking I/O for better performance
|
||||
- Coordinate parallel deployments safely
|
||||
|
||||
```scheme
|
||||
(use-modules (fibers) (fibers channels))
|
||||
|
||||
;; Concurrent machine checking
|
||||
(define (check-all-machines-concurrent machines)
|
||||
(run-fibers
|
||||
(lambda ()
|
||||
(let ((results-channel (make-channel)))
|
||||
;; Spawn fiber for each machine
|
||||
(for-each (lambda (machine)
|
||||
(spawn-fiber
|
||||
(lambda ()
|
||||
(let ((status (check-machine-status machine)))
|
||||
(put-message results-channel
|
||||
(cons machine status))))))
|
||||
machines)
|
||||
|
||||
;; Collect results
|
||||
(let loop ((remaining (length machines))
|
||||
(results '()))
|
||||
(if (zero? remaining)
|
||||
results
|
||||
(loop (1- remaining)
|
||||
(cons (get-message results-channel) results))))))))
|
||||
|
||||
;; Parallel deployment with coordination
|
||||
(define (deploy-machines-parallel machines)
|
||||
(run-fibers
|
||||
(lambda ()
|
||||
(let ((deployment-channel (make-channel))
|
||||
(coordinator (spawn-fiber (deployment-coordinator deployment-channel))))
|
||||
(par-map (lambda (machine)
|
||||
(deploy-with-coordination machine deployment-channel))
|
||||
machines)))))
|
||||
```
|
||||
|
||||
### 9. MCP Server Implementation Patterns
|
||||
- Structured message handling
|
||||
- Capability-based tool organization
|
||||
- Resource management with caching
|
||||
|
||||
```scheme
|
||||
;; MCP message dispatch
|
||||
(define (handle-mcp-request request)
|
||||
(match (json-ref request "method")
|
||||
("tools/list"
|
||||
(mcp-tools-list))
|
||||
|
||||
("tools/call"
|
||||
(let ((tool (json-ref request "params" "name"))
|
||||
(args (json-ref request "params" "arguments")))
|
||||
(call-lab-tool tool args)))
|
||||
|
||||
("resources/list"
|
||||
(mcp-resources-list))
|
||||
|
||||
("resources/read"
|
||||
(let ((uri (json-ref request "params" "uri")))
|
||||
(read-lab-resource uri)))
|
||||
|
||||
(method
|
||||
(mcp-error -32601 "Method not found" method))))
|
||||
|
||||
;; Tool capability definition
|
||||
(define lab-tools
|
||||
`((deploy-machine
|
||||
(description . "Deploy configuration to a specific machine")
|
||||
(inputSchema . ,(json-schema
|
||||
`((type . "object")
|
||||
(properties . ((machine (type . "string"))
|
||||
(method (type . "string")
|
||||
(enum . ("deploy-rs" "hybrid-update")))))
|
||||
(required . ("machine")))))
|
||||
(handler . ,deploy-machine-tool))
|
||||
|
||||
(check-status
|
||||
(description . "Check machine status and connectivity")
|
||||
(inputSchema . ,(json-schema
|
||||
`((type . "object")
|
||||
(properties . ((machines (type . "array")
|
||||
(items (type . "string"))))))))
|
||||
(handler . ,check-status-tool))))
|
||||
```
|
||||
|
||||
### 10. Configuration and Environment
|
||||
- Use parameters for configuration
|
||||
- Environment-aware defaults
|
||||
- Validate configuration on startup
|
||||
|
||||
```scheme
|
||||
;; Configuration parameters
|
||||
(define lab-config-dir
|
||||
(make-parameter (or (getenv "LAB_CONFIG_DIR")
|
||||
"/etc/lab-tool")))
|
||||
|
||||
(define deployment-timeout
|
||||
(make-parameter (string->number (or (getenv "DEPLOYMENT_TIMEOUT") "300"))))
|
||||
|
||||
(define ssh-key-path
|
||||
(make-parameter (or (getenv "LAB_SSH_KEY")
|
||||
(string-append (getenv "HOME") "/.ssh/lab_key"))))
|
||||
|
||||
;; Configuration validation
|
||||
(define (validate-lab-config)
|
||||
(unless (file-exists? (lab-config-dir))
|
||||
(throw 'config-error "Lab config directory not found" (lab-config-dir)))
|
||||
|
||||
(unless (file-exists? (ssh-key-path))
|
||||
(throw 'config-error "SSH key not found" (ssh-key-path)))
|
||||
|
||||
(unless (> (deployment-timeout) 0)
|
||||
(throw 'config-error "Invalid deployment timeout" (deployment-timeout))))
|
||||
|
||||
;; Initialize with validation
|
||||
(define (init-lab-tool)
|
||||
(validate-lab-config)
|
||||
(load-machine-configurations)
|
||||
(initialize-ssh-agent)
|
||||
(setup-logging))
|
||||
```
|
||||
|
||||
## Code Style Guidelines
|
||||
|
||||
### 11. Naming Conventions
|
||||
- Use kebab-case for variables and functions
|
||||
- Predicates end with `?`
|
||||
- Mutating procedures end with `!`
|
||||
- Constants in ALL-CAPS with hyphens
|
||||
|
||||
```scheme
|
||||
;; Good naming
|
||||
(define DEFAULT-SSH-PORT 22)
|
||||
(define machine-deployment-status ...)
|
||||
(define (machine-reachable? machine) ...)
|
||||
(define (update-machine-config! machine config) ...)
|
||||
|
||||
;; Avoid
|
||||
(define defaultSSHPort 22) ; camelCase
|
||||
(define machine_status ...) ; snake_case
|
||||
(define (is-machine-reachable ...) ; unnecessary 'is-'
|
||||
```
|
||||
|
||||
### 12. Documentation and Comments
|
||||
- Document module purposes and exports
|
||||
- Use docstrings for complex functions
|
||||
- Comment the "why", not the "what"
|
||||
|
||||
```scheme
|
||||
(define (deploy-machine machine method)
|
||||
"Deploy configuration to MACHINE using METHOD.
|
||||
|
||||
Returns a deployment result alist with success status, timing,
|
||||
and any error messages. May throw exceptions for invalid inputs."
|
||||
|
||||
;; Validate inputs early to fail fast
|
||||
(validate-machine machine)
|
||||
(validate-deployment-method method)
|
||||
|
||||
;; Use atomic operations to prevent partial deployments
|
||||
(call-with-deployment-lock machine
|
||||
(lambda ()
|
||||
(let ((start-time (current-time)))
|
||||
;; ... deployment logic
|
||||
))))
|
||||
```
|
||||
|
||||
### 13. Testing Approach
|
||||
- Write tests for pure functions first
|
||||
- Mock I/O operations
|
||||
- Use SRFI-64 testing framework
|
||||
|
||||
```scheme
|
||||
(use-modules (srfi srfi-64))
|
||||
|
||||
(test-begin "machine-configuration")
|
||||
|
||||
(test-equal "machine services extraction"
|
||||
'(ollama jellyfin forgejo)
|
||||
(get-machine-services 'grey-area))
|
||||
|
||||
(test-assert "deployment readiness check"
|
||||
(deployment-ready?
|
||||
'((status . configured) (health . good))
|
||||
'((connectivity . online) (load . normal))))
|
||||
|
||||
(test-error "invalid machine throws exception"
|
||||
'invalid-machine
|
||||
(deploy-machine 'non-existent-machine 'deploy-rs))
|
||||
|
||||
(test-end "machine-configuration")
|
||||
```
|
||||
|
||||
## Project Structure Best Practices
|
||||
|
||||
### 14. Module Organization
|
||||
```
|
||||
modules/
|
||||
├── lab/
|
||||
│ ├── core.scm ; Core data structures and utilities
|
||||
│ ├── machines.scm ; Machine management
|
||||
│ ├── deployment.scm ; Deployment strategies
|
||||
│ ├── monitoring.scm ; Status checking and metrics
|
||||
│ └── config.scm ; Configuration handling
|
||||
├── mcp/
|
||||
│ ├── server.scm ; MCP server implementation
|
||||
│ ├── tools.scm ; MCP tool definitions
|
||||
│ └── resources.scm ; MCP resource handlers
|
||||
└── utils/
|
||||
├── ssh.scm ; SSH utilities
|
||||
├── json.scm ; JSON helpers
|
||||
└── logging.scm ; Logging facilities
|
||||
```
|
||||
|
||||
### 15. Build and Development Workflow
|
||||
- Use Guile's module compilation
|
||||
- Leverage REPL for iterative development
|
||||
- Provide development/production configurations
|
||||
|
||||
```scheme
|
||||
;; Development helpers in separate module
|
||||
(define-module (lab dev)
|
||||
#:use-module (lab core)
|
||||
#:export (reload-config
|
||||
reset-state
|
||||
dev-deploy))
|
||||
|
||||
;; Hot-reload for development
|
||||
(define (reload-config)
|
||||
(reload-module (resolve-module '(lab config)))
|
||||
(init-lab-tool))
|
||||
|
||||
;; Safe deployment for development
|
||||
(define (dev-deploy machine)
|
||||
(if (eq? (current-environment) 'development)
|
||||
(deploy-machine machine 'deploy-rs)
|
||||
(error "dev-deploy only available in development mode")))
|
||||
```
|
||||
|
||||
## VS Code and GitHub Copilot Integration
|
||||
|
||||
### 16. MCP Client Integration with VS Code
|
||||
- Implement MCP client in VS Code extension
|
||||
- Bridge home lab context to Copilot
|
||||
- Provide real-time infrastructure state
|
||||
|
||||
```typescript
|
||||
// VS Code extension structure for MCP integration
|
||||
// File: vscode-extension/src/extension.ts
|
||||
import * as vscode from 'vscode';
|
||||
import { MCPClient } from './mcp-client';
|
||||
|
||||
export function activate(context: vscode.ExtensionContext) {
|
||||
const mcpClient = new MCPClient('stdio', {
|
||||
command: 'guile',
|
||||
args: ['-c', '(use-modules (mcp server)) (run-mcp-server)']
|
||||
});
|
||||
|
||||
// Register commands for home lab operations
|
||||
const deployCommand = vscode.commands.registerCommand(
|
||||
'homelab.deploy',
|
||||
async (machine: string) => {
|
||||
const result = await mcpClient.callTool('deploy-machine', {
|
||||
machine: machine,
|
||||
method: 'deploy-rs'
|
||||
});
|
||||
vscode.window.showInformationMessage(
|
||||
`Deployment ${result.success ? 'succeeded' : 'failed'}`
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
// Provide context to Copilot through workspace state
|
||||
const statusProvider = new HomeLab StatusProvider(mcpClient);
|
||||
context.subscriptions.push(
|
||||
vscode.workspace.registerTextDocumentContentProvider(
|
||||
'homelab', statusProvider
|
||||
)
|
||||
);
|
||||
|
||||
context.subscriptions.push(deployCommand);
|
||||
}
|
||||
|
||||
class HomeLabStatusProvider implements vscode.TextDocumentContentProvider {
|
||||
constructor(private mcpClient: MCPClient) {}
|
||||
|
||||
async provideTextDocumentContent(uri: vscode.Uri): Promise<string> {
|
||||
// Fetch current lab state for Copilot context
|
||||
const resources = await this.mcpClient.listResources();
|
||||
const status = await this.mcpClient.readResource('machines://status/all');
|
||||
|
||||
return `# Home Lab Status
|
||||
Current Infrastructure State:
|
||||
${JSON.stringify(status, null, 2)}
|
||||
|
||||
Available Resources:
|
||||
${resources.map(r => `- ${r.uri}: ${r.description}`).join('\n')}
|
||||
`;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 17. MCP Server Configuration for IDE Integration
|
||||
- Provide IDE-specific tools and resources
|
||||
- Format responses for developer consumption
|
||||
- Include code suggestions and snippets
|
||||
|
||||
```scheme
|
||||
;; IDE-specific MCP tools
|
||||
(define ide-tools
|
||||
`((generate-nix-config
|
||||
(description . "Generate NixOS configuration for new machine")
|
||||
(inputSchema . ,(json-schema
|
||||
`((type . "object")
|
||||
(properties . ((machine-name (type . "string"))
|
||||
(services (type . "array")
|
||||
(items (type . "string")))
|
||||
(hardware-profile (type . "string"))))
|
||||
(required . ("machine-name")))))
|
||||
(handler . ,generate-nix-config-tool))
|
||||
|
||||
(suggest-deployment-strategy
|
||||
(description . "Suggest optimal deployment strategy for changes")
|
||||
(inputSchema . ,(json-schema
|
||||
`((type . "object")
|
||||
(properties . ((changed-files (type . "array")
|
||||
(items (type . "string")))
|
||||
(target-machines (type . "array")
|
||||
(items (type . "string")))))
|
||||
(required . ("changed-files")))))
|
||||
(handler . ,suggest-deployment-strategy-tool))
|
||||
|
||||
(validate-config
|
||||
(description . "Validate NixOS configuration syntax and dependencies")
|
||||
(inputSchema . ,(json-schema
|
||||
`((type . "object")
|
||||
(properties . ((config-path (type . "string"))
|
||||
(machine (type . "string"))))
|
||||
(required . ("config-path")))))
|
||||
(handler . ,validate-config-tool))))
|
||||
|
||||
;; IDE-specific resources
|
||||
(define ide-resources
|
||||
`(("homelab://templates/machine-config"
|
||||
(description . "Template for new machine configuration")
|
||||
(mimeType . "application/x-nix"))
|
||||
|
||||
("homelab://examples/service-configs"
|
||||
(description . "Example service configurations")
|
||||
(mimeType . "application/x-nix"))
|
||||
|
||||
("homelab://docs/deployment-guide"
|
||||
(description . "Step-by-step deployment procedures")
|
||||
(mimeType . "text/markdown"))
|
||||
|
||||
("homelab://status/real-time"
|
||||
(description . "Real-time infrastructure status for context")
|
||||
(mimeType . "application/json"))))
|
||||
|
||||
;; Generate contextual code suggestions
|
||||
(define (generate-nix-config-tool args)
|
||||
(let ((machine-name (assoc-ref args "machine-name"))
|
||||
(services (assoc-ref args "services"))
|
||||
(hardware-profile (assoc-ref args "hardware-profile")))
|
||||
|
||||
`((content . ,(format #f "# Generated configuration for ~a
|
||||
{ config, pkgs, ... }:
|
||||
|
||||
{
|
||||
imports = [
|
||||
./hardware-configuration.nix
|
||||
~/args
|
||||
];
|
||||
|
||||
# Machine-specific configuration
|
||||
networking.hostName = \"~a\";
|
||||
|
||||
# Services configuration
|
||||
~a
|
||||
|
||||
# System packages
|
||||
environment.systemPackages = with pkgs; [
|
||||
# Add your packages here
|
||||
];
|
||||
|
||||
system.stateVersion = \"24.05\";
|
||||
}"
|
||||
machine-name
|
||||
machine-name
|
||||
(if services
|
||||
(string-join
|
||||
(map (lambda (service)
|
||||
(format #f " services.~a.enable = true;" service))
|
||||
services)
|
||||
"\n")
|
||||
" # No services specified")))
|
||||
(isError . #f))))
|
||||
```
|
||||
|
||||
### 18. Copilot Context Enhancement
|
||||
- Provide infrastructure context to improve suggestions
|
||||
- Include deployment patterns and best practices
|
||||
- Real-time system state for informed recommendations
|
||||
|
||||
```scheme
|
||||
;; Context provider for Copilot integration
|
||||
(define (provide-copilot-context)
|
||||
`((infrastructure-state . ,(get-current-infrastructure-state))
|
||||
(deployment-patterns . ,(get-common-deployment-patterns))
|
||||
(service-configurations . ,(get-service-config-templates))
|
||||
(best-practices . ,(get-deployment-best-practices))
|
||||
(current-issues . ,(get-active-alerts))))
|
||||
|
||||
(define (get-current-infrastructure-state)
|
||||
`((machines . ,(map (lambda (machine)
|
||||
`((name . ,machine)
|
||||
(status . ,(machine-status machine))
|
||||
(services . ,(machine-services machine))
|
||||
(last-deployment . ,(last-deployment-time machine))))
|
||||
(list-machines)))
|
||||
(network-topology . ,(get-network-topology))
|
||||
(resource-usage . ,(get-resource-utilization))))
|
||||
|
||||
(define (get-common-deployment-patterns)
|
||||
`((safe-deployment . "Use deploy-rs for production, hybrid-update for development")
|
||||
(rollback-strategy . "Always test deployments in staging first")
|
||||
(service-dependencies . "Ensure database services start before applications")
|
||||
(backup-before-deploy . "Create snapshots before major configuration changes")))
|
||||
|
||||
;; Format context for IDE consumption
|
||||
(define (format-ide-context context)
|
||||
(scm->json-string context #:pretty #t))
|
||||
```
|
||||
|
||||
### 19. VS Code Extension Development
|
||||
- Create extension for seamless MCP integration
|
||||
- Provide commands, views, and context
|
||||
- Enable real-time collaboration with infrastructure
|
||||
|
||||
```typescript
|
||||
// package.json for VS Code extension
|
||||
{
|
||||
"name": "homelab-mcp-integration",
|
||||
"displayName": "Home Lab MCP Integration",
|
||||
"description": "Integrate home lab infrastructure with VS Code through MCP",
|
||||
"version": "0.1.0",
|
||||
"engines": {
|
||||
"vscode": "^1.74.0"
|
||||
},
|
||||
"categories": ["Other"],
|
||||
"activationEvents": [
|
||||
"onCommand:homelab.connect",
|
||||
"workspaceContains:**/flake.nix"
|
||||
],
|
||||
"main": "./out/extension.js",
|
||||
"contributes": {
|
||||
"commands": [
|
||||
{
|
||||
"command": "homelab.deploy",
|
||||
"title": "Deploy Machine",
|
||||
"category": "Home Lab"
|
||||
},
|
||||
{
|
||||
"command": "homelab.status",
|
||||
"title": "Check Status",
|
||||
"category": "Home Lab"
|
||||
},
|
||||
{
|
||||
"command": "homelab.generateConfig",
|
||||
"title": "Generate Config",
|
||||
"category": "Home Lab"
|
||||
}
|
||||
],
|
||||
"views": {
|
||||
"explorer": [
|
||||
{
|
||||
"id": "homelabStatus",
|
||||
"name": "Home Lab Status",
|
||||
"when": "homelab:connected"
|
||||
}
|
||||
]
|
||||
},
|
||||
"viewsContainers": {
|
||||
"activitybar": [
|
||||
{
|
||||
"id": "homelab",
|
||||
"title": "Home Lab",
|
||||
"icon": "$(server-environment)"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MCP Client implementation
|
||||
class MCPClient {
|
||||
private transport: MCPTransport;
|
||||
private capabilities: MCPCapabilities;
|
||||
|
||||
constructor(transportType: 'stdio' | 'websocket', config: any) {
|
||||
this.transport = this.createTransport(transportType, config);
|
||||
this.initialize();
|
||||
}
|
||||
|
||||
async callTool(name: string, arguments: any): Promise<any> {
|
||||
return this.transport.request('tools/call', {
|
||||
name: name,
|
||||
arguments: arguments
|
||||
});
|
||||
}
|
||||
|
||||
async listResources(): Promise<MCPResource[]> {
|
||||
const response = await this.transport.request('resources/list', {});
|
||||
return response.resources;
|
||||
}
|
||||
|
||||
async readResource(uri: string): Promise<any> {
|
||||
return this.transport.request('resources/read', { uri });
|
||||
}
|
||||
|
||||
// Integration with Copilot context
|
||||
async getCopilotContext(): Promise<string> {
|
||||
const context = await this.readResource('homelab://context/copilot');
|
||||
return context.content;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 20. GitHub Copilot Workspace Integration
|
||||
- Configure workspace for optimal Copilot suggestions
|
||||
- Provide infrastructure context files
|
||||
- Set up context patterns for deployment scenarios
|
||||
|
||||
```json
|
||||
// .vscode/settings.json
|
||||
{
|
||||
"github.copilot.enable": {
|
||||
"*": true,
|
||||
"yaml": true,
|
||||
"nix": true,
|
||||
"scheme": true
|
||||
},
|
||||
"github.copilot.advanced": {
|
||||
"length": 500,
|
||||
"temperature": 0.2
|
||||
},
|
||||
"homelab.mcpServer": {
|
||||
"command": "guile",
|
||||
"args": ["-L", "modules", "-c", "(use-modules (mcp server)) (run-mcp-server)"],
|
||||
"autoStart": true
|
||||
},
|
||||
"files.associations": {
|
||||
"*.scm": "scheme",
|
||||
"flake.lock": "json"
|
||||
}
|
||||
}
|
||||
|
||||
// .copilot/context.md for workspace context
|
||||
```markdown
|
||||
# Home Lab Infrastructure Context
|
||||
|
||||
## Current Architecture
|
||||
- NixOS-based infrastructure with multiple machines
|
||||
- Deploy-rs for safe deployments
|
||||
- Services: Ollama, Jellyfin, Forgejo, NFS, ZFS
|
||||
- Network topology: reverse-proxy, grey-area, sleeper-service, congenital-optimist
|
||||
|
||||
## Common Patterns
|
||||
- Use `deploy-rs` for production deployments
|
||||
- Test with `hybrid-update` in development
|
||||
- Always backup before major changes
|
||||
- Follow NixOS module structure in `/modules/`
|
||||
|
||||
## Configuration Standards
|
||||
- Machine configs in `/machines/{hostname}/`
|
||||
- Shared modules in `/modules/`
|
||||
- Service-specific configs in `services/` subdirectories
|
||||
```
|
||||
|
||||
### 21. Real-time Context Updates
|
||||
- Stream infrastructure changes to VS Code
|
||||
- Update Copilot context automatically
|
||||
- Provide deployment feedback in editor
|
||||
|
||||
```scheme
|
||||
;; Real-time context streaming
|
||||
(define (start-context-stream port)
|
||||
"Stream infrastructure changes to connected IDE clients"
|
||||
(let ((clients (make-hash-table)))
|
||||
(spawn-fiber
|
||||
(lambda ()
|
||||
(let loop ()
|
||||
(let ((update (get-infrastructure-update)))
|
||||
(hash-for-each
|
||||
(lambda (client-id websocket)
|
||||
(catch #t
|
||||
(lambda ()
|
||||
(websocket-send websocket
|
||||
(scm->json-string update)))
|
||||
(lambda (key . args)
|
||||
(hash-remove! clients client-id))))
|
||||
clients)
|
||||
(sleep 5)
|
||||
(loop)))))
|
||||
|
||||
;; WebSocket server for IDE connections
|
||||
(run-websocket-server
|
||||
(lambda (ws)
|
||||
(let ((client-id (generate-client-id)))
|
||||
(hash-set! clients client-id ws)
|
||||
(websocket-send ws
|
||||
(scm->json-string
|
||||
`((type . "welcome")
|
||||
(context . ,(get-current-context)))))
|
||||
(handle-client-messages ws client-id clients)))
|
||||
#:port port)))
|
||||
|
||||
;; Integration with file watchers
|
||||
(define (watch-config-changes)
|
||||
"Watch for configuration file changes and update context"
|
||||
(file-system-watcher
|
||||
(list "/home/geir/Home-lab/machines"
|
||||
"/home/geir/Home-lab/modules")
|
||||
(lambda (event)
|
||||
(match event
|
||||
(('modify path)
|
||||
(when (string-suffix? ".nix" path)
|
||||
(update-copilot-context path)))
|
||||
(_ #f)))))
|
||||
```
|
394
packages/lab-tool/research/guile_ecosystem.md
Normal file
394
packages/lab-tool/research/guile_ecosystem.md
Normal file
|
@ -0,0 +1,394 @@
|
|||
|
||||
# Guile Scheme Ecosystem Analysis for Home Lab Tool Migration and MCP Integration
|
||||
|
||||
## Executive Summary
|
||||
|
||||
This analysis examines the GNU Guile Scheme ecosystem to evaluate its suitability for migrating the home lab tool from Bash and potentially implementing a Model Context Protocol (MCP) server. Based on comprehensive research, Guile offers a robust ecosystem with numerous libraries that address the core requirements of modern system administration, networking, and infrastructure management.
|
||||
|
||||
**Key Findings:**
|
||||
|
||||
- **Rich ecosystem**: 200+ libraries available through GNU Guix ecosystem
|
||||
- **Strong system administration capabilities**: SSH, system interaction, process management
|
||||
- **Excellent networking support**: HTTP servers/clients, WebSocket, JSON-RPC
|
||||
- **Mature infrastructure**: Well-maintained libraries with active development
|
||||
- **MCP compatibility**: All necessary components available for MCP server implementation
|
||||
|
||||
## Current State Analysis
|
||||
|
||||
### Existing Lab Tool Capabilities
|
||||
|
||||
Based on the documentation, the current lab tool provides:
|
||||
|
||||
- Machine status checking and connectivity
|
||||
- Multiple deployment methods (deploy-rs, hybrid-update, legacy)
|
||||
- NixOS configuration management
|
||||
- SSH-based operations
|
||||
- Package updates via flake management
|
||||
|
||||
### Migration Benefits to Guile
|
||||
|
||||
1. **Enhanced error handling** over Bash's limited error management
|
||||
2. **Structured data handling** for machine configurations and status
|
||||
3. **Better modularity** and code organization
|
||||
4. **Advanced networking capabilities** for future expansion
|
||||
5. **REPL-driven development** for rapid prototyping and debugging
|
||||
|
||||
## Core Libraries for Home Lab Tool Migration
|
||||
|
||||
### 1. System Administration & SSH
|
||||
|
||||
**guile-ssh** - *Essential for remote operations*
|
||||
|
||||
- **Capabilities**: SSH client/server, SFTP, port forwarding, tunneling
|
||||
- **Use cases**: All remote machine interactions, deployment coordination
|
||||
- **Maturity**: Very mature, actively maintained
|
||||
- **Documentation**: Comprehensive with examples
|
||||
|
||||
```scheme
|
||||
;; Example SSH connection and command execution
|
||||
(use-modules (ssh session) (ssh channel))
|
||||
(let ((session (make-session #:host "sleeper-service")))
|
||||
(connect! session)
|
||||
(authenticate-server session)
|
||||
(userauth-public-key! session key)
|
||||
;; Execute nixos-rebuild or other commands
|
||||
(call-with-remote-output-pipe session "nixos-rebuild switch"
|
||||
(lambda (port) (display (read-string port)))))
|
||||
```
|
||||
|
||||
### 2. JSON Data Handling
|
||||
|
||||
**guile-json** - *For structured configuration and API communication*
|
||||
|
||||
- **Capabilities**: JSON parsing/generation, RFC 7464 support, pretty printing
|
||||
- **Use cases**: Configuration management, API responses, deployment metadata
|
||||
- **Features**: JSON Text Sequences, record mapping, validation
|
||||
|
||||
```scheme
|
||||
;; Machine configuration as JSON
|
||||
(define machine-config
|
||||
`(("name" . "grey-area")
|
||||
("services" . #("ollama" "jellyfin" "forgejo"))
|
||||
("deployment" . (("method" . "deploy-rs") ("status" . "ready")))))
|
||||
|
||||
(scm->json machine-config #:pretty #t)
|
||||
```
|
||||
|
||||
### 3. HTTP Server/Client Operations
|
||||
|
||||
**guile-webutils** & **guile-curl** - *For web-based interfaces and API calls*
|
||||
|
||||
- **guile-webutils**: Session management, multipart messages, form handling
|
||||
- **guile-curl**: HTTP client operations, file transfers
|
||||
- **Use cases**: Web dashboard, API endpoints, remote service integration
|
||||
|
||||
### 4. Process Management & System Interaction
|
||||
|
||||
**guile-bash** - *Bridge between Scheme and shell operations*
|
||||
|
||||
- **Capabilities**: Execute shell commands, capture output, dynamic variables
|
||||
- **Use cases**: Gradual migration, leveraging existing shell tools
|
||||
- **Integration**: Call existing scripts while building Scheme alternatives
|
||||
|
||||
### 5. Configuration Management
|
||||
|
||||
**guile-config** - *Declarative configuration handling*
|
||||
|
||||
- **Capabilities**: Declarative config specs, file parsing, command-line args
|
||||
- **Use cases**: Tool configuration, machine definitions, deployment parameters
|
||||
|
||||
## MCP Server Implementation Libraries
|
||||
|
||||
### 1. JSON-RPC Foundation
|
||||
|
||||
**scheme-json-rpc** - *Core MCP protocol implementation*
|
||||
|
||||
- **Capabilities**: JSON-RPC 2.0 specification compliance
|
||||
- **Transport**: Works over stdio, WebSocket, HTTP
|
||||
- **Use cases**: MCP message handling, method dispatch
|
||||
|
||||
### 2. WebSocket Support
|
||||
|
||||
**guile-websocket** - *Real-time communication*
|
||||
|
||||
- **Capabilities**: RFC 6455 compliant WebSocket implementation
|
||||
- **Features**: Server and client support, binary/text messages
|
||||
- **Use cases**: MCP transport layer, real-time lab monitoring
|
||||
|
||||
### 3. Web Server Infrastructure
|
||||
|
||||
**artanis** - *Full-featured web application framework*
|
||||
|
||||
- **Capabilities**: Routing, templating, database access, session management
|
||||
- **Use cases**: MCP HTTP transport, web dashboard, API endpoints
|
||||
|
||||
```scheme
|
||||
;; MCP server endpoint structure
|
||||
(define-handler mcp-handler
|
||||
(lambda (request)
|
||||
(let ((method (json-ref (request-body request) "method")))
|
||||
(case method
|
||||
(("tools/list") (handle-tools-list))
|
||||
(("resources/list") (handle-resources-list))
|
||||
(("tools/call") (handle-tool-call request))
|
||||
(else (mcp-error "Unknown method"))))))
|
||||
```
|
||||
|
||||
## Enhanced Networking & Protocol Libraries
|
||||
|
||||
### 1. Advanced HTTP/Network Operations
|
||||
|
||||
**guile-curl** - *Comprehensive HTTP client*
|
||||
|
||||
- Features: HTTPS, authentication, file uploads, progress callbacks
|
||||
- Use cases: API integrations, file transfers, service health checks
|
||||
|
||||
**guile-dns** - *DNS operations*
|
||||
|
||||
- Pure Guile DNS implementation
|
||||
- Use cases: Service discovery, network diagnostics
|
||||
|
||||
### 2. Data Serialization
|
||||
|
||||
**guile-cbor** - *Efficient binary serialization*
|
||||
|
||||
- Alternative to JSON for performance-critical operations
|
||||
- Smaller payload sizes for resource monitoring
|
||||
|
||||
**guile-yaml** / **guile-yamlpp** - *YAML processing*
|
||||
|
||||
- Configuration file handling
|
||||
- Integration with existing YAML-based tools
|
||||
|
||||
### 3. Database Integration
|
||||
|
||||
**guile-sqlite3** - *Local data storage*
|
||||
|
||||
- Deployment history, machine states, configuration versioning
|
||||
- Embedded database for tool state management
|
||||
|
||||
**guile-redis** - *Caching and session storage*
|
||||
|
||||
- Performance optimization for frequent operations
|
||||
- Distributed state management across lab machines
|
||||
|
||||
## System Integration Libraries
|
||||
|
||||
### 1. File System Operations
|
||||
|
||||
**guile-filesystem** & **f.scm** - *Enhanced file handling*
|
||||
|
||||
- Beyond basic Guile file operations
|
||||
- Path manipulation, directory traversal, file monitoring
|
||||
|
||||
### 2. Process and Service Management
|
||||
|
||||
**shepherd** - *Service management*
|
||||
|
||||
- GNU Shepherd integration for service lifecycle management
|
||||
- Alternative to systemd interactions
|
||||
|
||||
### 3. Cryptography and Security
|
||||
|
||||
**guile-gcrypt** - *Cryptographic operations*
|
||||
|
||||
- Key management, encryption/decryption, hashing
|
||||
- Secure configuration storage, deployment verification
|
||||
|
||||
## Specialized Infrastructure Libraries
|
||||
|
||||
### 1. Containerization Support
|
||||
|
||||
**guile-docker** / Container operations
|
||||
|
||||
- Docker/Podman integration for containerized services
|
||||
- Image management, container lifecycle
|
||||
|
||||
### 2. Version Control Integration
|
||||
|
||||
**guile-git** - *Git operations*
|
||||
|
||||
- Flake updates, configuration versioning
|
||||
- Automated commit/push for deployment tracking
|
||||
|
||||
### 3. Monitoring and Metrics
|
||||
|
||||
**prometheus** (Guile implementation) - *Metrics collection*
|
||||
|
||||
- Performance monitoring, deployment success rates
|
||||
- Integration with existing monitoring infrastructure
|
||||
|
||||
## MCP Server Implementation Strategy
|
||||
|
||||
### Core MCP Capabilities to Implement
|
||||
|
||||
1. **Tools**: Home lab management operations
|
||||
- `deploy-machine`: Deploy specific machine configurations
|
||||
- `check-status`: Machine connectivity and health checks
|
||||
- `update-flake`: Update package definitions
|
||||
- `rollback-deployment`: Emergency rollback procedures
|
||||
|
||||
2. **Resources**: Lab state and configuration access
|
||||
- Machine configurations (read-only access to NixOS configs)
|
||||
- Deployment history and logs
|
||||
- Service status across all machines
|
||||
- Network topology and connectivity maps
|
||||
|
||||
3. **Prompts**: Common operational templates
|
||||
- Deployment workflows
|
||||
- Troubleshooting procedures
|
||||
- Security audit checklists
|
||||
|
||||
### Implementation Architecture
|
||||
|
||||
```scheme
|
||||
(use-modules (json) (web socket) (ssh session) (scheme json-rpc))
|
||||
|
||||
(define-mcp-server home-lab-mcp
|
||||
#:tools `(("deploy-machine"
|
||||
#:description "Deploy configuration to specified machine"
|
||||
#:parameters ,(make-schema-object
|
||||
`(("machine" #:type "string" #:required #t)
|
||||
("method" #:type "string" #:enum ("deploy-rs" "hybrid-update")))))
|
||||
|
||||
("check-status"
|
||||
#:description "Check machine connectivity and services"
|
||||
#:parameters ,(make-schema-object
|
||||
`(("machines" #:type "array" #:items "string")))))
|
||||
|
||||
#:resources `(("machines://config/{machine}"
|
||||
#:description "NixOS configuration for machine")
|
||||
("machines://status/{machine}"
|
||||
#:description "Current status and health metrics"))
|
||||
|
||||
#:prompts `(("deployment-workflow"
|
||||
#:description "Standard deployment procedure")
|
||||
("troubleshoot-machine"
|
||||
#:description "Machine diagnostics checklist")))
|
||||
```
|
||||
|
||||
## Migration Strategy
|
||||
|
||||
### Phase 1: Core Infrastructure (Weeks 1-2)
|
||||
|
||||
1. Set up Guile development environment in NixOS
|
||||
2. Implement basic SSH operations using guile-ssh
|
||||
3. Port status checking functionality
|
||||
4. Create JSON-based machine configuration format
|
||||
|
||||
### Phase 2: Enhanced Features (Weeks 3-4)
|
||||
|
||||
1. Implement deployment methods (deploy-rs integration)
|
||||
2. Add error handling and logging
|
||||
3. Create web interface for monitoring
|
||||
4. Develop basic MCP server capabilities
|
||||
|
||||
### Phase 3: Advanced Integration (Weeks 5-6)
|
||||
|
||||
1. Full MCP server implementation
|
||||
2. Web dashboard with real-time updates
|
||||
3. Integration with existing monitoring tools
|
||||
4. Documentation and testing
|
||||
|
||||
### Phase 4: Production Deployment (Week 7)
|
||||
|
||||
1. Gradual rollout with fallback to Bash tool
|
||||
2. Performance optimization
|
||||
3. User training and documentation
|
||||
4. Monitoring and feedback collection
|
||||
|
||||
## Guile vs. Alternative Languages
|
||||
|
||||
### Advantages of Guile
|
||||
|
||||
- **Homoiconicity**: Code as data enables powerful metaprogramming
|
||||
- **REPL Development**: Interactive development and debugging
|
||||
- **GNU Integration**: Seamless integration with GNU tools and philosophy
|
||||
- **Extensibility**: Easy C library bindings for performance-critical code
|
||||
- **Stability**: Mature language with stable API
|
||||
|
||||
### Considerations
|
||||
|
||||
- **Learning Curve**: Lisp syntax may be unfamiliar
|
||||
- **Performance**: Generally slower than compiled languages for CPU-intensive tasks
|
||||
- **Ecosystem Size**: Smaller than Python/JavaScript ecosystems
|
||||
- **Tooling**: Fewer IDE integrations compared to mainstream languages
|
||||
|
||||
## Recommended Libraries by Priority
|
||||
|
||||
### Tier 1 (Essential)
|
||||
|
||||
1. **guile-ssh** - Remote operations foundation
|
||||
2. **guile-json** - Data interchange format
|
||||
3. **scheme-json-rpc** - MCP protocol implementation
|
||||
4. **guile-webutils** - Web application utilities
|
||||
|
||||
### Tier 2 (Important)
|
||||
|
||||
1. **guile-websocket** - Real-time communication
|
||||
2. **artanis** - Web framework
|
||||
3. **guile-curl** - HTTP client operations
|
||||
4. **guile-config** - Configuration management
|
||||
|
||||
### Tier 3 (Enhancement)
|
||||
|
||||
1. **guile-git** - Version control integration
|
||||
2. **guile-sqlite3** - Local data storage
|
||||
3. **prometheus** - Metrics and monitoring
|
||||
4. **guile-gcrypt** - Security operations
|
||||
|
||||
## Security Considerations
|
||||
|
||||
### Authentication and Authorization
|
||||
|
||||
- **guile-ssh**: Public key authentication, agent support
|
||||
- **guile-gcrypt**: Secure credential storage
|
||||
- **MCP Security**: Implement capability-based access control
|
||||
|
||||
### Network Security
|
||||
|
||||
- **TLS Support**: Via guile-gnutls for encrypted communications
|
||||
- **SSH Tunneling**: Secure communication channels
|
||||
- **Input Validation**: JSON schema validation for all inputs
|
||||
|
||||
### Deployment Security
|
||||
|
||||
- **Signed Deployments**: Cryptographic verification of configurations
|
||||
- **Audit Logging**: Comprehensive operation logging
|
||||
- **Rollback Capability**: Quick recovery from failed deployments
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
### Optimization Strategies
|
||||
|
||||
1. **Compiled Modules**: Use `.go` files for performance-critical code
|
||||
2. **Async Operations**: Leverage fibers for concurrent operations
|
||||
3. **Caching**: Redis integration for frequently accessed data
|
||||
4. **Native Extensions**: C bindings for system-level operations
|
||||
|
||||
### Expected Performance
|
||||
|
||||
- **SSH Operations**: Comparable to native SSH client
|
||||
- **JSON Processing**: Adequate for configuration sizes (< 1MB)
|
||||
- **Web Serving**: Suitable for low-traffic administrative interfaces
|
||||
- **Startup Time**: Fast REPL startup, moderate for compiled applications
|
||||
|
||||
## Conclusion
|
||||
|
||||
The Guile ecosystem provides comprehensive support for implementing both a sophisticated home lab management tool and a Model Context Protocol server. The availability of mature libraries for SSH operations, JSON handling, web services, and system integration makes Guile an excellent choice for this migration.
|
||||
|
||||
**Key Strengths:**
|
||||
|
||||
- Rich library ecosystem specifically suited to system administration
|
||||
- Excellent JSON-RPC and WebSocket support for MCP implementation
|
||||
- Strong SSH and networking capabilities
|
||||
- Active development community with good documentation
|
||||
|
||||
**Recommended Approach:**
|
||||
|
||||
1. Start with core SSH and JSON functionality
|
||||
2. Gradually migrate features from Bash to Guile
|
||||
3. Implement MCP server capabilities incrementally
|
||||
4. Maintain backwards compatibility during transition
|
||||
|
||||
The migration to Guile will provide significant benefits in code maintainability, error handling, and extensibility while enabling advanced features like MCP integration that would be difficult to implement in Bash.
|
334
packages/lab-tool/research/guile_scripting_solution.md
Normal file
334
packages/lab-tool/research/guile_scripting_solution.md
Normal file
|
@ -0,0 +1,334 @@
|
|||
# Replacing Bash with Guile Scheme for Home Lab Tools
|
||||
|
||||
This document outlines a proposal to migrate the `home-lab-tools` script from Bash to GNU Guile Scheme. This change aims to address the increasing complexity of the script and leverage the benefits of a more powerful programming language.
|
||||
|
||||
## 1. Introduction: Why Guile Scheme?
|
||||
|
||||
GNU Guile is the official extension language for the GNU Project. It is an implementation of the Scheme programming language, a dialect of Lisp. Using Guile for scripting offers several advantages over Bash, especially as scripts grow in size and complexity.
|
||||
|
||||
Key reasons for considering Guile:
|
||||
|
||||
* **Expressiveness and Power:** Scheme is a full-fledged programming language with features like first-class functions, macros, and a rich standard library. This allows for more elegant and maintainable solutions to complex problems.
|
||||
* **Better Error Handling:** Guile provides robust error handling mechanisms (conditions and handlers) that are more sophisticated than Bash's `set -e` and trap.
|
||||
* **Modularity:** Guile supports modules, making it easier to organize code into reusable components.
|
||||
* **Data Manipulation:** Scheme excels at handling structured data, which can be beneficial for managing configurations or parsing output from commands.
|
||||
* **Readability (for Lisp programmers):** While Lisp syntax can be initially unfamiliar, it can lead to very clear and concise code once learned.
|
||||
* **Interoperability:** Guile can easily call external programs and libraries, and can be extended with C code if needed.
|
||||
|
||||
## 2. Advantages over Bash for `home-lab-tools`
|
||||
|
||||
Migrating `home-lab-tools` from Bash to Guile offers specific benefits:
|
||||
|
||||
* **Improved Logic Handling:** Complex conditional logic, loops, and function definitions are more naturally expressed in Guile. The current Bash script uses case statements and string comparisons extensively, which can become unwieldy.
|
||||
* **Structured Data Management:** Machine definitions, deployment modes, and status information could be represented as Scheme data structures (lists, association lists, records), making them easier to manage and query.
|
||||
* **Enhanced Error Reporting:** More descriptive error messages and better control over script termination in case of failures.
|
||||
* **Code Reusability:** Functions for common tasks (e.g., SSHing to a machine, running `nixos-rebuild`) can be more cleanly defined and reused.
|
||||
* **Easier Testing:** Guile's nature as a programming language makes it more amenable to unit testing individual functions or modules.
|
||||
* **Future Extensibility:** Adding new commands, machines, or features will be simpler and less error-prone in a more structured language.
|
||||
|
||||
## 3. Setting up Guile
|
||||
|
||||
Guile is often available through system package managers. On NixOS, it can be added to your environment or system configuration.
|
||||
|
||||
```nix
|
||||
# Example: Adding Guile to a Nix shell
|
||||
nix-shell -p guile
|
||||
```
|
||||
|
||||
A Guile script typically starts with a shebang line:
|
||||
|
||||
```scheme
|
||||
#!/usr/bin/env guile
|
||||
!#
|
||||
```
|
||||
|
||||
The `!#` at the end is a Guile-specific convention that allows the script to be both executable and loadable into a Guile REPL.
|
||||
|
||||
## 4. Basic Guile Scripting Concepts
|
||||
|
||||
* **S-expressions:** Code is written using S-expressions (Symbolic Expressions), which are lists enclosed in parentheses, e.g., `(function arg1 arg2)`.
|
||||
* **Definitions:** `(define variable value)` and `(define (function-name arg1 arg2) ...body...)`.
|
||||
* **Procedures (Functions):** Core of Guile programming.
|
||||
* **Control Flow:** `(if condition then-expr else-expr)`, `(cond (test1 expr1) (test2 expr2) ... (else else-expr))`, `(case ...)`
|
||||
* **Modules:** `(use-modules (ice-9 popen))` for using libraries.
|
||||
|
||||
## 5. Interacting with the System
|
||||
|
||||
Guile provides modules for system interaction:
|
||||
|
||||
* **(ice-9 popen):** For running external commands and capturing their output (similar to backticks or `$(...)` in Bash).
|
||||
* `open-pipe* command mode`: Opens a pipe to a command.
|
||||
* `get-string-all port`: Reads all output from a port.
|
||||
* **(ice-9 rdelim):** For reading lines from ports.
|
||||
* **(ice-9 filesys):** For file system operations (checking existence, deleting, etc.).
|
||||
* `file-exists? path`
|
||||
* `delete-file path`
|
||||
* **(srfi srfi-1):** List processing utilities.
|
||||
* **(srfi srfi-26):** `cut` for partial application, useful for creating specialized functions.
|
||||
* **Environment Variables:** `(getenv "VAR_NAME")`, `(setenv "VAR_NAME" "value")`.
|
||||
|
||||
## Example: Running a command**
|
||||
|
||||
```scheme
|
||||
(use-modules (ice-9 popen))
|
||||
|
||||
(define (run-command . args)
|
||||
(let* ((cmd (string-join args " "))
|
||||
(port (open-pipe* cmd OPEN_READ)))
|
||||
(let ((output (get-string-all port)))
|
||||
(close-pipe port)
|
||||
output)))
|
||||
|
||||
(display (run-command "echo" "Hello from Guile"))
|
||||
(newline)
|
||||
```
|
||||
|
||||
## 6. Error Handling
|
||||
|
||||
Guile uses a condition system for error handling.
|
||||
|
||||
* `catch`: Allows you to catch specific types of errors.
|
||||
* `throw`: Raises an error.
|
||||
|
||||
```scheme
|
||||
(use-modules (ice-9 exceptions))
|
||||
|
||||
(catch #t
|
||||
(lambda ()
|
||||
(display "Trying something that might fail...
|
||||
")
|
||||
;; Example: Force an error
|
||||
(if #t (error "Something went wrong!"))
|
||||
(display "This won't be printed if an error occurs above.
|
||||
"))
|
||||
(lambda (key . args)
|
||||
(format (current-error-port) "Caught an error: ~a - Args: ~a
|
||||
" key args)
|
||||
#f)) ; Return value indicating an error was caught
|
||||
```
|
||||
|
||||
For `home-lab-tools`, this means we can provide more specific feedback when a deployment fails or a machine is unreachable.
|
||||
|
||||
## 7. Modularity and Code Organization
|
||||
|
||||
Guile's module system allows splitting the code into logical units. For `home-lab-tools`, we could have modules for:
|
||||
|
||||
* `lab-config`: Machine definitions, paths.
|
||||
* `lab-deploy`: Functions related to deploying configurations.
|
||||
* `lab-ssh`: SSH interaction utilities.
|
||||
* `lab-status`: Functions for checking machine status.
|
||||
* `lab-utils`: General helper functions, logging.
|
||||
|
||||
**Example module structure:**
|
||||
|
||||
```scheme
|
||||
;; file: lab-utils.scm
|
||||
(define-module (lab utils)
|
||||
#:export (log success warn error))
|
||||
|
||||
(define blue "[0;34m")
|
||||
(define nc "[0m")
|
||||
|
||||
(define (log msg)
|
||||
(format #t "~a[lab]~a ~a
|
||||
" blue nc msg))
|
||||
;; ... other logging functions
|
||||
```
|
||||
|
||||
```scheme
|
||||
;; file: main-lab-script.scm
|
||||
#!/usr/bin/env guile
|
||||
!#
|
||||
(use-modules (lab utils) (ice-9 popen))
|
||||
|
||||
(log "Starting lab script...")
|
||||
;; ... rest of the script
|
||||
```
|
||||
|
||||
## 8. Example: Rewriting a Small Part of `home-lab-tools.nix` (Conceptual)
|
||||
|
||||
Let's consider the `log` function and a simplified `deploy_machine` for local deployment.
|
||||
|
||||
**Current Bash:**
|
||||
|
||||
```bash
|
||||
BLUE='[0;34m'
|
||||
NC='[0m' # No Color
|
||||
|
||||
log() {
|
||||
echo -e "''${BLUE}[lab]''${NC} $1"
|
||||
}
|
||||
|
||||
deploy_machine() {
|
||||
local machine="$1"
|
||||
# ...
|
||||
if [[ "$machine" == "congenital-optimist" ]]; then
|
||||
log "Deploying $machine (mode: $mode) locally"
|
||||
sudo nixos-rebuild $mode --flake "$HOMELAB_ROOT#$machine"
|
||||
fi
|
||||
# ...
|
||||
}
|
||||
```
|
||||
|
||||
**Conceptual Guile Scheme:**
|
||||
|
||||
```scheme
|
||||
;; main-lab-script.scm
|
||||
#!/usr/bin/env guile
|
||||
!#
|
||||
|
||||
(use-modules (ice-9 popen)
|
||||
(ice-9 rdelim)
|
||||
(ice-9 pretty-print)
|
||||
(ice-9 exceptions)
|
||||
(srfi srfi-1)) ;; For list utilities like `string-join`
|
||||
|
||||
;; Configuration (could be in a separate module)
|
||||
(define homelab-root "/home/geir/Home-lab")
|
||||
|
||||
;; Color Definitions
|
||||
(define RED "[0;31m")
|
||||
(define GREEN "[0;32m")
|
||||
(define YELLOW "[1;33m")
|
||||
(define BLUE "[0;34m")
|
||||
(define NC "[0m")
|
||||
|
||||
;; Logging functions
|
||||
(define (log level-color level-name message)
|
||||
(format #t "~a[~a]~a ~a
|
||||
" level-color level-name NC message))
|
||||
|
||||
(define (info . messages)
|
||||
(log BLUE "lab" (apply string-append (map (lambda (m) (if (string? m) m (format #f "~s" m))) messages))))
|
||||
|
||||
(define (success . messages)
|
||||
(log GREEN "lab" (apply string-append (map (lambda (m) (if (string? m) m (format #f "~s" m))) messages))))
|
||||
|
||||
(define (warn . messages)
|
||||
(log YELLOW "lab" (apply string-append (map (lambda (m) (if (string? m) m (format #f "~s" m))) messages))))
|
||||
|
||||
(define (err . messages)
|
||||
(log RED "lab" (apply string-append (map (lambda (m) (if (string? m) m (format #f "~s" m))) messages)))
|
||||
(exit 1)) ;; Exit on error
|
||||
|
||||
;; Function to run shell commands and handle output/errors
|
||||
(define (run-shell-command . command-parts)
|
||||
(let ((command-string (string-join command-parts " ")))
|
||||
(info "Executing: " command-string)
|
||||
(let ((pipe (open-pipe* command-string OPEN_READ)))
|
||||
(let loop ((lines '()))
|
||||
(let ((line (read-line pipe)))
|
||||
(if (eof-object? line)
|
||||
(begin
|
||||
(close-pipe pipe)
|
||||
(reverse lines)) ;; Return lines in order
|
||||
(begin
|
||||
(display line) (newline) ;; Display live output
|
||||
(loop (cons line lines)))))))
|
||||
;; TODO: Add proper error checking based on exit status of the command
|
||||
;; For now, we assume success if open-pipe* doesn't fail.
|
||||
;; A more robust solution would check `close-pipe` status or use `system*`.
|
||||
))
|
||||
|
||||
;; Simplified deploy_machine
|
||||
(define (deploy-machine machine mode)
|
||||
(info "Deploying " machine " (mode: " mode ")")
|
||||
(cond
|
||||
((string=? machine "congenital-optimist")
|
||||
(info "Deploying " machine " locally")
|
||||
(catch #t
|
||||
(lambda ()
|
||||
(run-shell-command "sudo" "nixos-rebuild" mode "--flake" (string-append homelab-root "#" machine))
|
||||
(success "Successfully deployed " machine))
|
||||
(lambda (key . args)
|
||||
(err "Failed to deploy " machine ". Error: " key " Args: " args))))
|
||||
;; Add other machines here
|
||||
(else
|
||||
(err "Unknown machine: " machine))))
|
||||
|
||||
;; Main script logic (parsing arguments, calling functions)
|
||||
(define (main args)
|
||||
(if (< (length args) 3)
|
||||
(begin
|
||||
(err "Usage: <script> deploy <machine> [mode]")
|
||||
(exit 1))
|
||||
(let ((command (cadr args))
|
||||
(machine (caddr args))
|
||||
(mode (if (> (length args) 3) (cadddr args) "boot")))
|
||||
(cond
|
||||
((string=? command "deploy")
|
||||
(deploy-machine machine mode))
|
||||
;; Add other commands like "status", "update"
|
||||
(else
|
||||
(err "Unknown command: " command))))))
|
||||
|
||||
;; Run the main function with command-line arguments
|
||||
;; (cdr args) to skip the script name itself
|
||||
(main (program-arguments))
|
||||
```
|
||||
|
||||
## 9. Creating Terminal User Interfaces (TUIs) with Guile-Ncurses
|
||||
|
||||
For more interactive command-line tools, Guile Scheme can be used to create Text User Interfaces (TUIs). The primary library for this is `guile-ncurses`.
|
||||
|
||||
**Guile-Ncurses** is a GNU project that provides Scheme bindings for the ncurses library, including its components for forms, panels, and menus. This allows you to build sophisticated text-based interfaces directly in Guile.
|
||||
|
||||
**Key Features:**
|
||||
|
||||
* **Windowing:** Create and manage multiple windows on the terminal.
|
||||
* **Input Handling:** Process keyboard input, including special keys.
|
||||
* **Text Attributes:** Control colors, bolding, underlining, and other text styles.
|
||||
* **Forms, Panels, Menus:** Higher-level components for building complex interfaces.
|
||||
|
||||
**Getting Started with Guile-Ncurses:**
|
||||
|
||||
1. **Installation:** `guile-ncurses` would typically be installed via your system's package manager or built from source. If you are using NixOS, you would look for a Nix package for `guile-ncurses`.
|
||||
|
||||
```nix
|
||||
# Example: Adding guile-ncurses to a Nix shell (package name might vary)
|
||||
nix-shell -p guile guile-ncurses
|
||||
```
|
||||
|
||||
2. **Using in Code:**
|
||||
You would use the `(ncurses curses)` module (and others like `(ncurses form)`, `(ncurses menu)`, `(ncurses panel)`) in your Guile script.
|
||||
|
||||
```scheme
|
||||
(use-modules (ncurses curses))
|
||||
|
||||
(define (tui-main stdscr)
|
||||
;; Initialize ncurses
|
||||
(cbreak!) ;; Line buffering disabled, Pass on ever char
|
||||
(noecho!) ;; Don't echo() while we do getch
|
||||
(keypad stdscr #t) ;; Enable Fx keys, arrow keys etc.
|
||||
|
||||
(addstr "Hello, Guile Ncurses TUI!")
|
||||
(refresh)
|
||||
(getch) ;; Wait for a key press
|
||||
(endwin)) ;; End curses mode
|
||||
|
||||
;; Initialize and run the TUI
|
||||
(initscr)
|
||||
(tui-main stdscr)
|
||||
```
|
||||
|
||||
**Resources:**
|
||||
|
||||
* **Guile-Ncurses Project Page:** [https://www.nongnu.org/guile-ncurses/](https://www.nongnu.org/guile-ncurses/)
|
||||
* **Guile-Ncurses Manual:** [https://www.gnu.org/software/guile-ncurses/manual/](https://www.gnu.org/software/guile-ncurses/manual/)
|
||||
|
||||
Integrating `guile-ncurses` can significantly enhance the user experience of your `home-lab-tools` script, allowing for interactive menus, status dashboards, and more complex user interactions beyond simple command-line arguments and output.
|
||||
|
||||
## 10. Conclusion and Next Steps
|
||||
|
||||
Migrating `home-lab-tools` to Guile Scheme offers a path to a more maintainable, robust, and extensible solution. While there is a learning curve for Scheme, the long-term benefits for managing a complex set of administration tasks are significant.
|
||||
|
||||
**Next Steps:**
|
||||
|
||||
1. **Install Guile:** Ensure Guile is available in the development environment.
|
||||
2. **Start Small:** Begin by porting one command or a set of utility functions (e.g., logging, SSH wrappers).
|
||||
3. **Learn Guile Basics:** Familiarize with Scheme syntax, common procedures, and modules. The Guile Reference Manual is an excellent resource.
|
||||
4. **Develop Incrementally:** Port functionality piece by piece, testing along the way.
|
||||
5. **Explore Guile Libraries:** Investigate Guile libraries for argument parsing (e.g., `(gnu cmdline)`), file system operations, and other needs.
|
||||
6. **Refactor and Organize:** Use Guile's module system to keep the codebase clean and organized.
|
||||
|
||||
This transition will require an initial investment in learning and development but promises a more powerful and sustainable tool for managing the home lab infrastructure.
|
74
packages/lab-tool/research/home-lab-tool.scm
Executable file
74
packages/lab-tool/research/home-lab-tool.scm
Executable file
|
@ -0,0 +1,74 @@
|
|||
#!/usr/bin/env guile
|
||||
!#
|
||||
|
||||
;; Home Lab Tool - Guile Scheme Implementation (Minimal Version)
|
||||
;; Main entry point for the lab command-line tool
|
||||
|
||||
(use-modules (ice-9 match)
|
||||
(ice-9 format))
|
||||
|
||||
;; Simple logging
|
||||
(define (log-info msg . args)
|
||||
(apply format #t (string-append "[lab] " msg "~%") args))
|
||||
|
||||
(define (log-error msg . args)
|
||||
(apply format (current-error-port) (string-append "[ERROR] " msg "~%") args))
|
||||
|
||||
;; Configuration
|
||||
(define machines '("congenital-optimist" "sleeper-service" "grey-area" "reverse-proxy"))
|
||||
|
||||
;; Main command dispatcher
|
||||
(define (dispatch-command command args)
|
||||
(match command
|
||||
("status"
|
||||
(log-info "Infrastructure status:")
|
||||
(for-each (lambda (machine)
|
||||
(format #t " ~a: Online~%" machine))
|
||||
machines))
|
||||
|
||||
("deploy"
|
||||
(if (null? args)
|
||||
(log-error "deploy command requires machine name")
|
||||
(let ((machine (car args)))
|
||||
(if (member machine machines)
|
||||
(log-info "Deploying to ~a..." machine)
|
||||
(log-error "Unknown machine: ~a" machine)))))
|
||||
|
||||
("mcp"
|
||||
(if (null? args)
|
||||
(log-error "mcp command requires: start, stop, or status")
|
||||
(match (car args)
|
||||
("status" (log-info "MCP server: Development mode"))
|
||||
(_ (log-error "MCP command not implemented: ~a" (car args))))))
|
||||
|
||||
(_ (log-error "Unknown command: ~a" command))))
|
||||
|
||||
;; Show help
|
||||
(define (show-help)
|
||||
(format #t "Home Lab Tool (Guile) v0.1.0
|
||||
|
||||
Usage: lab [COMMAND] [ARGS...]
|
||||
|
||||
Commands:
|
||||
status Show infrastructure status
|
||||
deploy MACHINE Deploy to machine
|
||||
mcp status Show MCP server status
|
||||
help Show this help
|
||||
|
||||
Machines: ~a
|
||||
" (string-join machines ", ")))
|
||||
|
||||
;; Main entry point
|
||||
(define (main args)
|
||||
(if (< (length args) 2)
|
||||
(show-help)
|
||||
(let ((command (cadr args))
|
||||
(command-args (cddr args)))
|
||||
(if (string=? command "help")
|
||||
(show-help)
|
||||
(dispatch-command command command-args)))))
|
||||
|
||||
;; Execute main if this script is run directly
|
||||
(when (and (> (length (command-line)) 0)
|
||||
(string=? (car (command-line)) "./home-lab-tool.scm"))
|
||||
(main (command-line)))
|
258
packages/lab-tool/research/machines.scm
Normal file
258
packages/lab-tool/research/machines.scm
Normal file
|
@ -0,0 +1,258 @@
|
|||
;; lab/machines.scm - Machine-specific operations
|
||||
|
||||
(define-module (lab machines)
|
||||
#:use-module (ice-9 format)
|
||||
#:use-module (ice-9 match)
|
||||
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (srfi srfi-19)
|
||||
#:use-module (utils logging)
|
||||
#:use-module (utils config)
|
||||
#:use-module (utils ssh)
|
||||
#:use-module (lab core)
|
||||
#:export (show-infrastructure-status
|
||||
get-machine-details
|
||||
discover-machines
|
||||
validate-machine-health
|
||||
get-machine-metrics
|
||||
option-ref))
|
||||
|
||||
;; Helper function for option handling
|
||||
(define (option-ref options key default)
|
||||
"Get option value with default fallback"
|
||||
(let ((value (assoc-ref options key)))
|
||||
(if value value default)))
|
||||
|
||||
;; Display infrastructure status in a human-readable format
|
||||
(define (show-infrastructure-status machine-name options)
|
||||
"Display comprehensive infrastructure status"
|
||||
(let ((verbose (option-ref options 'verbose #f))
|
||||
(status-data (get-infrastructure-status machine-name)))
|
||||
|
||||
(log-info "Home-lab infrastructure status:")
|
||||
(newline)
|
||||
|
||||
(for-each
|
||||
(lambda (machine-status)
|
||||
(display-machine-status machine-status verbose))
|
||||
status-data)
|
||||
|
||||
;; Summary statistics
|
||||
(let ((total-machines (length status-data))
|
||||
(online-machines (length (filter
|
||||
(lambda (status)
|
||||
(eq? (assoc-ref status 'connection) 'online))
|
||||
status-data))))
|
||||
(newline)
|
||||
(if (= online-machines total-machines)
|
||||
(log-success "All ~a machines online ✓" total-machines)
|
||||
(log-warn "~a/~a machines online" online-machines total-machines)))))
|
||||
|
||||
;; Display status for a single machine
|
||||
(define (display-machine-status machine-status verbose)
|
||||
"Display formatted status for a single machine"
|
||||
(let* ((machine-name (assoc-ref machine-status 'machine))
|
||||
(machine-type (assoc-ref machine-status 'type))
|
||||
(connection (assoc-ref machine-status 'connection))
|
||||
(services (assoc-ref machine-status 'services))
|
||||
(system-info (assoc-ref machine-status 'system))
|
||||
(check-time (assoc-ref machine-status 'check-time)))
|
||||
|
||||
;; Machine header with connection status
|
||||
(let ((status-symbol (if (eq? connection 'online) "✅" "❌"))
|
||||
(type-label (if (eq? machine-type 'local) "(local)" "(remote)")))
|
||||
(format #t "━━━ ~a ~a ~a ━━━~%" machine-name type-label status-symbol))
|
||||
|
||||
;; Connection details
|
||||
(if (eq? connection 'online)
|
||||
(begin
|
||||
(when system-info
|
||||
(let ((uptime (assoc-ref system-info 'uptime))
|
||||
(load (assoc-ref system-info 'load))
|
||||
(memory (assoc-ref system-info 'memory))
|
||||
(disk (assoc-ref system-info 'disk)))
|
||||
(when uptime (format #t "⏱️ Uptime: ~a~%" uptime))
|
||||
(when load (format #t "📊 Load: ~a~%" load))
|
||||
(when memory (format #t "🧠 Memory: ~a~%" memory))
|
||||
(when disk (format #t "💾 Disk: ~a~%" disk))))
|
||||
|
||||
;; Services status
|
||||
(when (not (null? services))
|
||||
(format #t "🔧 Services: ")
|
||||
(for-each (lambda (service-status)
|
||||
(let ((service-name (symbol->string (car service-status)))
|
||||
(service-state (cdr service-status)))
|
||||
(let ((status-icon (cond
|
||||
((string=? service-state "active") "✅")
|
||||
((string=? service-state "inactive") "❌")
|
||||
((string=? service-state "failed") "💥")
|
||||
(else "❓"))))
|
||||
(format #t "~a ~a " service-name status-icon))))
|
||||
services)
|
||||
(newline))
|
||||
|
||||
(format #t "⚡ Response: ~ams~%" (inexact->exact (round (* check-time 1000)))))
|
||||
(format #t "⚠️ Status: Offline~%"))
|
||||
|
||||
;; Verbose information
|
||||
(when verbose
|
||||
(let ((ssh-config (get-ssh-config machine-name)))
|
||||
(when ssh-config
|
||||
(format #t "🔗 SSH: ~a~%" (assoc-ref ssh-config 'hostname))
|
||||
(let ((ssh-alias (assoc-ref ssh-config 'ssh-alias)))
|
||||
(when ssh-alias
|
||||
(format #t "🏷️ Alias: ~a~%" ssh-alias))))))
|
||||
|
||||
(newline)))
|
||||
|
||||
;; Get detailed information about a specific machine
|
||||
(define (get-machine-details machine-name)
|
||||
"Get comprehensive details about a specific machine"
|
||||
(let ((machine-config (get-machine-config machine-name)))
|
||||
(if (not machine-config)
|
||||
(begin
|
||||
(log-error "Machine ~a not found in configuration" machine-name)
|
||||
#f)
|
||||
(let* ((ssh-config (get-ssh-config machine-name))
|
||||
(health-status (check-system-health machine-name))
|
||||
(current-status (car (get-infrastructure-status machine-name))))
|
||||
|
||||
`((name . ,machine-name)
|
||||
(config . ,machine-config)
|
||||
(ssh . ,ssh-config)
|
||||
(status . ,current-status)
|
||||
(health . ,health-status)
|
||||
(last-updated . ,(current-date)))))))
|
||||
|
||||
;; Discover machines on the network
|
||||
(define (discover-machines)
|
||||
"Discover available machines on the network"
|
||||
(log-info "Discovering machines on the network...")
|
||||
|
||||
(let ((configured-machines (get-all-machines)))
|
||||
(log-debug "Configured machines: ~a" configured-machines)
|
||||
|
||||
;; Test connectivity to each configured machine
|
||||
(let ((discovery-results
|
||||
(map (lambda (machine-name)
|
||||
(log-debug "Testing connectivity to ~a..." machine-name)
|
||||
(let ((reachable (test-ssh-connection machine-name))
|
||||
(ssh-config (get-ssh-config machine-name)))
|
||||
`((machine . ,machine-name)
|
||||
(configured . #t)
|
||||
(reachable . ,reachable)
|
||||
(type . ,(if (and ssh-config (assoc-ref ssh-config 'is-local))
|
||||
'local 'remote))
|
||||
(hostname . ,(if ssh-config
|
||||
(assoc-ref ssh-config 'hostname)
|
||||
"unknown")))))
|
||||
configured-machines)))
|
||||
|
||||
;; TODO: Add network scanning for unconfigured machines
|
||||
;; This could use nmap or similar tools to discover machines
|
||||
|
||||
(log-info "Discovery completed")
|
||||
discovery-results)))
|
||||
|
||||
;; Validate health of a machine with detailed checks
|
||||
(define (validate-machine-health machine-name . detailed)
|
||||
"Perform comprehensive health validation on a machine"
|
||||
(let ((run-detailed (if (null? detailed) #f (car detailed))))
|
||||
(log-info "Validating health of ~a..." machine-name)
|
||||
|
||||
(let ((basic-health (check-system-health machine-name)))
|
||||
(if run-detailed
|
||||
;; Extended health checks for detailed mode
|
||||
(let ((extended-checks
|
||||
'(("filesystem" . check-filesystem-health)
|
||||
("network-services" . check-network-services)
|
||||
("system-logs" . check-system-logs)
|
||||
("performance" . check-performance-metrics))))
|
||||
|
||||
(let ((extended-results
|
||||
(map (lambda (check-pair)
|
||||
(let ((check-name (car check-pair))
|
||||
(check-proc (cdr check-pair)))
|
||||
(log-debug "Running extended check: ~a" check-name)
|
||||
(catch #t
|
||||
(lambda ()
|
||||
`(,check-name . ,(check-proc machine-name)))
|
||||
(lambda (key . args)
|
||||
(log-warn "Extended check ~a failed: ~a" check-name key)
|
||||
`(,check-name . (error . ,key))))))
|
||||
extended-checks)))
|
||||
|
||||
`((basic . ,basic-health)
|
||||
(extended . ,extended-results)
|
||||
(timestamp . ,(current-date)))))
|
||||
|
||||
;; Just basic health checks
|
||||
`((basic . ,basic-health)
|
||||
(timestamp . ,(current-date)))))))
|
||||
|
||||
;; Extended health check functions
|
||||
(define (check-filesystem-health machine-name)
|
||||
"Check filesystem health and disk usage"
|
||||
(call-with-values (((success output)
|
||||
(run-remote-command machine-name "df -h && echo '---' && mount | grep -E '^/' | head -5")))
|
||||
(if success
|
||||
`((status . pass)
|
||||
(details . ,(string-trim-right output)))
|
||||
`((status . fail)
|
||||
(error . "Could not retrieve filesystem information")))))
|
||||
|
||||
(define (check-network-services machine-name)
|
||||
"Check network service connectivity"
|
||||
(let ((services-to-test '(("ssh" "22") ("http" "80") ("https" "443"))))
|
||||
(map (lambda (service-pair)
|
||||
(let ((service-name (car service-pair))
|
||||
(port (cadr service-pair)))
|
||||
(call-with-values (((success output)
|
||||
(run-remote-command machine-name
|
||||
(format #f "netstat -ln | grep ':~a ' > /dev/null 2>&1; echo $?" port))))
|
||||
`(,service-name . ,(if (and success (string=? (string-trim-right output) "0"))
|
||||
'listening 'not-listening)))))
|
||||
services-to-test)))
|
||||
|
||||
(define (check-system-logs machine-name)
|
||||
"Check system logs for recent errors"
|
||||
(call-with-values (((success output)
|
||||
(run-remote-command machine-name
|
||||
"journalctl --since='1 hour ago' --priority=err --no-pager | wc -l")))
|
||||
(if success
|
||||
(let ((error-count (string->number (string-trim-right output))))
|
||||
`((status . ,(if (< error-count 10) 'good 'concerning))
|
||||
(error-count . ,error-count)))
|
||||
`((status . unknown)
|
||||
(error . "Could not check system logs")))))
|
||||
|
||||
(define (check-performance-metrics machine-name)
|
||||
"Get basic performance metrics"
|
||||
(let ((metrics-commands
|
||||
'(("cpu-usage" "top -bn1 | grep 'Cpu(s)' | awk '{print $2}' | sed 's/%us,//'")
|
||||
("memory-usage" "free | grep Mem | awk '{printf \"%.1f\", ($3/$2) * 100.0}'")
|
||||
("io-wait" "iostat 1 2 | tail -1 | awk '{print $4}'"))))
|
||||
|
||||
(map (lambda (metric-pair)
|
||||
(let ((metric-name (car metric-pair))
|
||||
(command (cadr metric-pair)))
|
||||
(call-with-values (((success output) (run-remote-command machine-name command)))
|
||||
`(,(string->symbol metric-name) .
|
||||
,(if success (string-trim-right output) "unknown")))))
|
||||
metrics-commands)))
|
||||
|
||||
;; Get machine metrics for monitoring
|
||||
(define (get-machine-metrics machine-name . time-range)
|
||||
"Get machine metrics for monitoring and analysis"
|
||||
(let ((range (if (null? time-range) "1h" (car time-range))))
|
||||
(log-debug "Collecting metrics for ~a (range: ~a)" machine-name range)
|
||||
|
||||
(let ((current-time (current-date))
|
||||
(performance (check-performance-metrics machine-name))
|
||||
(health (validate-machine-health machine-name)))
|
||||
|
||||
`((machine . ,machine-name)
|
||||
(timestamp . ,current-time)
|
||||
(performance . ,performance)
|
||||
(health . ,health)
|
||||
(range . ,range)))))
|
337
packages/lab-tool/research/monitoring.scm
Normal file
337
packages/lab-tool/research/monitoring.scm
Normal file
|
@ -0,0 +1,337 @@
|
|||
;; lab/monitoring.scm - Infrastructure monitoring and health checks
|
||||
|
||||
(define-module (lab monitoring)
|
||||
#:use-module (ice-9 format)
|
||||
#:use-module (ice-9 match)
|
||||
#:use-module (ice-9 popen)
|
||||
#:use-module (ice-9 textual-ports)
|
||||
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (srfi srfi-19)
|
||||
#:use-module (utils logging)
|
||||
#:use-module (utils config)
|
||||
#:use-module (utils ssh)
|
||||
#:use-module (lab core)
|
||||
#:use-module (lab machines)
|
||||
#:export (monitor-infrastructure
|
||||
start-monitoring
|
||||
stop-monitoring
|
||||
get-monitoring-status
|
||||
collect-metrics
|
||||
generate-monitoring-report))
|
||||
|
||||
;; Monitor infrastructure with optional service filtering
|
||||
(define (monitor-infrastructure service options)
|
||||
"Monitor infrastructure, optionally filtering by service"
|
||||
(let ((verbose (option-ref options 'verbose #f))
|
||||
(machines (get-all-machines)))
|
||||
|
||||
(log-info "Starting infrastructure monitoring...")
|
||||
|
||||
(if service
|
||||
(monitor-specific-service service machines verbose)
|
||||
(monitor-all-services machines verbose))))
|
||||
|
||||
;; Monitor a specific service across all machines
|
||||
(define (monitor-specific-service service machines verbose)
|
||||
"Monitor a specific service across all configured machines"
|
||||
(log-info "Monitoring service: ~a" service)
|
||||
|
||||
(let ((service-symbol (string->symbol service)))
|
||||
(for-each
|
||||
(lambda (machine-name)
|
||||
(let ((machine-config (get-machine-config machine-name)))
|
||||
(when machine-config
|
||||
(let ((machine-services (assoc-ref machine-config 'services)))
|
||||
(when (and machine-services (member service-symbol machine-services))
|
||||
(monitor-service-on-machine machine-name service verbose))))))
|
||||
machines)))
|
||||
|
||||
;; Monitor all services across all machines
|
||||
(define (monitor-all-services machines verbose)
|
||||
"Monitor all services across all machines"
|
||||
(log-info "Monitoring all services across ~a machines" (length machines))
|
||||
|
||||
(let ((monitoring-results
|
||||
(map (lambda (machine-name)
|
||||
(log-debug "Monitoring ~a..." machine-name)
|
||||
(monitor-machine-services machine-name verbose))
|
||||
machines)))
|
||||
|
||||
(display-monitoring-summary monitoring-results)))
|
||||
|
||||
;; Monitor services on a specific machine
|
||||
(define (monitor-machine-services machine-name verbose)
|
||||
"Monitor all services on a specific machine"
|
||||
(let ((machine-config (get-machine-config machine-name))
|
||||
(connection-status (test-ssh-connection machine-name)))
|
||||
|
||||
(if (not connection-status)
|
||||
(begin
|
||||
(log-warn "Cannot connect to ~a, skipping monitoring" machine-name)
|
||||
`((machine . ,machine-name)
|
||||
(status . offline)
|
||||
(services . ())))
|
||||
|
||||
(let ((services (if machine-config
|
||||
(assoc-ref machine-config 'services)
|
||||
'())))
|
||||
(if (null? services)
|
||||
(begin
|
||||
(log-debug "No services configured for ~a" machine-name)
|
||||
`((machine . ,machine-name)
|
||||
(status . online)
|
||||
(services . ())))
|
||||
|
||||
(let ((service-statuses
|
||||
(map (lambda (service)
|
||||
(monitor-service-on-machine machine-name
|
||||
(symbol->string service)
|
||||
verbose))
|
||||
services)))
|
||||
`((machine . ,machine-name)
|
||||
(status . online)
|
||||
(services . ,service-statuses))))))))
|
||||
|
||||
;; Monitor a specific service on a specific machine
|
||||
(define (monitor-service-on-machine machine-name service verbose)
|
||||
"Monitor a specific service on a specific machine"
|
||||
(log-debug "Checking ~a service on ~a..." service machine-name)
|
||||
|
||||
(let ((service-checks
|
||||
`(("status" . ,(lambda () (check-service-status machine-name service)))
|
||||
("health" . ,(lambda () (check-service-health machine-name service)))
|
||||
("logs" . ,(lambda () (check-service-logs machine-name service))))))
|
||||
|
||||
(let ((results
|
||||
(map (lambda (check-pair)
|
||||
(let ((check-name (car check-pair))
|
||||
(check-proc (cdr check-pair)))
|
||||
(catch #t
|
||||
(lambda ()
|
||||
`(,check-name . ,(check-proc)))
|
||||
(lambda (key . args)
|
||||
(log-warn "Service check ~a failed for ~a: ~a"
|
||||
check-name service key)
|
||||
`(,check-name . (error . ,key))))))
|
||||
service-checks)))
|
||||
|
||||
(when verbose
|
||||
(display-service-details machine-name service results))
|
||||
|
||||
`((service . ,service)
|
||||
(machine . ,machine-name)
|
||||
(checks . ,results)
|
||||
(timestamp . ,(current-date))))))
|
||||
|
||||
;; Check service status using systemctl
|
||||
(define (check-service-status machine-name service)
|
||||
"Check if a service is active using systemctl"
|
||||
(call-with-values (((success output)
|
||||
(run-remote-command machine-name "systemctl is-active" service)))
|
||||
(if success
|
||||
(let ((status (string-trim-right output)))
|
||||
`((active . ,(string=? status "active"))
|
||||
(status . ,status)))
|
||||
`((active . #f)
|
||||
(status . "unknown")
|
||||
(error . "command-failed")))))
|
||||
|
||||
;; Check service health with additional metrics
|
||||
(define (check-service-health machine-name service)
|
||||
"Perform health checks for a service"
|
||||
(let ((health-commands
|
||||
(get-service-health-commands service)))
|
||||
|
||||
(if (null? health-commands)
|
||||
`((healthy . unknown)
|
||||
(reason . "no-health-checks-defined"))
|
||||
|
||||
(let ((health-results
|
||||
(map (lambda (cmd-pair)
|
||||
(let ((check-name (car cmd-pair))
|
||||
(command (cdr cmd-pair)))
|
||||
(call-with-values (((success output)
|
||||
(run-remote-command machine-name command)))
|
||||
`(,check-name . ((success . ,success)
|
||||
(output . ,(if success
|
||||
(string-trim-right output)
|
||||
output)))))))
|
||||
health-commands)))
|
||||
|
||||
(let ((all-healthy (every (lambda (result)
|
||||
(assoc-ref (cdr result) 'success))
|
||||
health-results)))
|
||||
`((healthy . ,all-healthy)
|
||||
(checks . ,health-results)))))))
|
||||
|
||||
;; Get service-specific health check commands
|
||||
(define (get-service-health-commands service)
|
||||
"Get health check commands for specific services"
|
||||
(match service
|
||||
("ollama"
|
||||
'(("api-check" . "curl -f http://localhost:11434/api/tags > /dev/null 2>&1; echo $?")
|
||||
("process-check" . "pgrep ollama > /dev/null; echo $?")))
|
||||
|
||||
("forgejo"
|
||||
'(("web-check" . "curl -f http://localhost:3000 > /dev/null 2>&1; echo $?")
|
||||
("process-check" . "pgrep forgejo > /dev/null; echo $?")))
|
||||
|
||||
("jellyfin"
|
||||
'(("web-check" . "curl -f http://localhost:8096/health > /dev/null 2>&1; echo $?")
|
||||
("process-check" . "pgrep jellyfin > /dev/null; echo $?")))
|
||||
|
||||
("nfs-server"
|
||||
'(("service-check" . "showmount -e localhost > /dev/null 2>&1; echo $?")
|
||||
("exports-check" . "test -f /etc/exports; echo $?")))
|
||||
|
||||
("nginx"
|
||||
'(("config-check" . "nginx -t 2>/dev/null; echo $?")
|
||||
("web-check" . "curl -f http://localhost > /dev/null 2>&1; echo $?")))
|
||||
|
||||
("sshd"
|
||||
'(("port-check" . "ss -tuln | grep ':22 ' > /dev/null; echo $?")))
|
||||
|
||||
(_ '())))
|
||||
|
||||
;; Check service logs for errors
|
||||
(define (check-service-logs machine-name service)
|
||||
"Check recent service logs for errors"
|
||||
(call-with-values (((success output)
|
||||
(run-remote-command machine-name
|
||||
(format #f "journalctl -u ~a --since='10 minutes ago' --priority=err --no-pager | wc -l" service))))
|
||||
(if success
|
||||
(let ((error-count (string->number (string-trim-right output))))
|
||||
`((recent-errors . ,error-count)
|
||||
(status . ,(if (< error-count 5) 'good 'concerning))))
|
||||
`((recent-errors . unknown)
|
||||
(status . error)
|
||||
(reason . "log-check-failed")))))
|
||||
|
||||
;; Display service monitoring details
|
||||
(define (display-service-details machine-name service results)
|
||||
"Display detailed service monitoring information"
|
||||
(format #t " 🔧 ~a@~a:~%" service machine-name)
|
||||
|
||||
(for-each
|
||||
(lambda (check-result)
|
||||
(let ((check-name (car check-result))
|
||||
(check-data (cdr check-result)))
|
||||
(match check-name
|
||||
("status"
|
||||
(let ((active (assoc-ref check-data 'active))
|
||||
(status (assoc-ref check-data 'status)))
|
||||
(format #t " Status: ~a ~a~%"
|
||||
(if active "✅" "❌")
|
||||
status)))
|
||||
|
||||
("health"
|
||||
(let ((healthy (assoc-ref check-data 'healthy)))
|
||||
(format #t " Health: ~a ~a~%"
|
||||
(cond ((eq? healthy #t) "✅")
|
||||
((eq? healthy #f) "❌")
|
||||
(else "❓"))
|
||||
healthy)))
|
||||
|
||||
("logs"
|
||||
(let ((errors (assoc-ref check-data 'recent-errors))
|
||||
(status (assoc-ref check-data 'status)))
|
||||
(format #t " Logs: ~a (~a recent errors)~%"
|
||||
(cond ((eq? status 'good) "✅")
|
||||
((eq? status 'concerning) "⚠️")
|
||||
(else "❓"))
|
||||
errors)))
|
||||
|
||||
(_ (format #t " ~a: ~a~%" check-name check-data)))))
|
||||
results))
|
||||
|
||||
;; Display monitoring summary
|
||||
(define (display-monitoring-summary results)
|
||||
"Display a summary of monitoring results"
|
||||
(newline)
|
||||
(log-info "Infrastructure Monitoring Summary:")
|
||||
(newline)
|
||||
|
||||
(for-each
|
||||
(lambda (machine-result)
|
||||
(let ((machine-name (assoc-ref machine-result 'machine))
|
||||
(machine-status (assoc-ref machine-result 'status))
|
||||
(services (assoc-ref machine-result 'services)))
|
||||
|
||||
(format #t "━━━ ~a (~a) ━━━~%" machine-name machine-status)
|
||||
|
||||
(if (eq? machine-status 'offline)
|
||||
(format #t " ❌ Machine offline~%")
|
||||
(if (null? services)
|
||||
(format #t " ℹ️ No services configured~%")
|
||||
(for-each
|
||||
(lambda (service-result)
|
||||
(let ((service-name (assoc-ref service-result 'service))
|
||||
(checks (assoc-ref service-result 'checks)))
|
||||
(let ((status-check (assoc-ref checks "status"))
|
||||
(health-check (assoc-ref checks "health")))
|
||||
(let ((is-active (and status-check
|
||||
(assoc-ref status-check 'active)))
|
||||
(is-healthy (and health-check
|
||||
(eq? (assoc-ref health-check 'healthy) #t))))
|
||||
(format #t " ~a ~a~%"
|
||||
service-name
|
||||
(cond ((and is-active is-healthy) "✅")
|
||||
(is-active "⚠️")
|
||||
(else "❌")))))))
|
||||
services)))
|
||||
(newline)))
|
||||
results))
|
||||
|
||||
;; Start continuous monitoring (placeholder)
|
||||
(define (start-monitoring options)
|
||||
"Start continuous monitoring daemon"
|
||||
(log-warn "Continuous monitoring not yet implemented")
|
||||
(log-info "For now, use: lab monitor [service]")
|
||||
#f)
|
||||
|
||||
;; Stop continuous monitoring (placeholder)
|
||||
(define (stop-monitoring options)
|
||||
"Stop continuous monitoring daemon"
|
||||
(log-warn "Continuous monitoring not yet implemented")
|
||||
#f)
|
||||
|
||||
;; Get monitoring status (placeholder)
|
||||
(define (get-monitoring-status options)
|
||||
"Get status of monitoring daemon"
|
||||
(log-info "Monitoring Status: Manual mode")
|
||||
(log-info "Use 'lab monitor' for on-demand monitoring")
|
||||
#t)
|
||||
|
||||
;; Collect metrics for analysis
|
||||
(define (collect-metrics machine-name . time-range)
|
||||
"Collect performance and health metrics"
|
||||
(let ((range (if (null? time-range) "1h" (car time-range))))
|
||||
(log-debug "Collecting metrics for ~a (range: ~a)" machine-name range)
|
||||
|
||||
(let ((metrics (get-machine-metrics machine-name range)))
|
||||
(log-success "Metrics collected for ~a" machine-name)
|
||||
metrics)))
|
||||
|
||||
;; Generate monitoring report
|
||||
(define (generate-monitoring-report . machines)
|
||||
"Generate a comprehensive monitoring report"
|
||||
(let ((target-machines (if (null? machines)
|
||||
(get-all-machines)
|
||||
machines)))
|
||||
|
||||
(log-info "Generating monitoring report for ~a machines..."
|
||||
(length target-machines))
|
||||
|
||||
(let ((report-data
|
||||
(map (lambda (machine)
|
||||
(let ((monitoring-result (monitor-machine-services machine #t))
|
||||
(metrics (collect-metrics machine)))
|
||||
`((machine . ,machine)
|
||||
(monitoring . ,monitoring-result)
|
||||
(metrics . ,metrics)
|
||||
(timestamp . ,(current-date)))))
|
||||
target-machines)))
|
||||
|
||||
(log-success "Monitoring report generated")
|
||||
report-data)))
|
Loading…
Add table
Add a link
Reference in a new issue