grokking simplicity and refactoring
This commit is contained in:
parent
89a7fe100d
commit
fb4361d938
67 changed files with 6275 additions and 56 deletions
|
@ -1,258 +0,0 @@
|
|||
;; lab/machines.scm - Machine-specific operations
|
||||
|
||||
(define-module (lab machines)
|
||||
#:use-module (ice-9 format)
|
||||
#:use-module (ice-9 match)
|
||||
#:use-module (ice-9 call-with-values)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (srfi srfi-19)
|
||||
#:use-module (utils logging)
|
||||
#:use-module (utils config)
|
||||
#:use-module (utils ssh)
|
||||
#:use-module (lab core)
|
||||
#:export (show-infrastructure-status
|
||||
get-machine-details
|
||||
discover-machines
|
||||
validate-machine-health
|
||||
get-machine-metrics
|
||||
option-ref))
|
||||
|
||||
;; Helper function for option handling
|
||||
(define (option-ref options key default)
|
||||
"Get option value with default fallback"
|
||||
(let ((value (assoc-ref options key)))
|
||||
(if value value default)))
|
||||
|
||||
;; Display infrastructure status in a human-readable format
|
||||
(define (show-infrastructure-status machine-name options)
|
||||
"Display comprehensive infrastructure status"
|
||||
(let ((verbose (option-ref options 'verbose #f))
|
||||
(status-data (get-infrastructure-status machine-name)))
|
||||
|
||||
(log-info "Home-lab infrastructure status:")
|
||||
(newline)
|
||||
|
||||
(for-each
|
||||
(lambda (machine-status)
|
||||
(display-machine-status machine-status verbose))
|
||||
status-data)
|
||||
|
||||
;; Summary statistics
|
||||
(let ((total-machines (length status-data))
|
||||
(online-machines (length (filter
|
||||
(lambda (status)
|
||||
(eq? (assoc-ref status 'connection) 'online))
|
||||
status-data))))
|
||||
(newline)
|
||||
(if (= online-machines total-machines)
|
||||
(log-success "All ~a machines online ✓" total-machines)
|
||||
(log-warn "~a/~a machines online" online-machines total-machines)))))
|
||||
|
||||
;; Display status for a single machine
|
||||
(define (display-machine-status machine-status verbose)
|
||||
"Display formatted status for a single machine"
|
||||
(let* ((machine-name (assoc-ref machine-status 'machine))
|
||||
(machine-type (assoc-ref machine-status 'type))
|
||||
(connection (assoc-ref machine-status 'connection))
|
||||
(services (assoc-ref machine-status 'services))
|
||||
(system-info (assoc-ref machine-status 'system))
|
||||
(check-time (assoc-ref machine-status 'check-time)))
|
||||
|
||||
;; Machine header with connection status
|
||||
(let ((status-symbol (if (eq? connection 'online) "✅" "❌"))
|
||||
(type-label (if (eq? machine-type 'local) "(local)" "(remote)")))
|
||||
(format #t "━━━ ~a ~a ~a ━━━~%" machine-name type-label status-symbol))
|
||||
|
||||
;; Connection details
|
||||
(if (eq? connection 'online)
|
||||
(begin
|
||||
(when system-info
|
||||
(let ((uptime (assoc-ref system-info 'uptime))
|
||||
(load (assoc-ref system-info 'load))
|
||||
(memory (assoc-ref system-info 'memory))
|
||||
(disk (assoc-ref system-info 'disk)))
|
||||
(when uptime (format #t "⏱️ Uptime: ~a~%" uptime))
|
||||
(when load (format #t "📊 Load: ~a~%" load))
|
||||
(when memory (format #t "🧠 Memory: ~a~%" memory))
|
||||
(when disk (format #t "💾 Disk: ~a~%" disk))))
|
||||
|
||||
;; Services status
|
||||
(when (not (null? services))
|
||||
(format #t "🔧 Services: ")
|
||||
(for-each (lambda (service-status)
|
||||
(let ((service-name (symbol->string (car service-status)))
|
||||
(service-state (cdr service-status)))
|
||||
(let ((status-icon (cond
|
||||
((string=? service-state "active") "✅")
|
||||
((string=? service-state "inactive") "❌")
|
||||
((string=? service-state "failed") "💥")
|
||||
(else "❓"))))
|
||||
(format #t "~a ~a " service-name status-icon))))
|
||||
services)
|
||||
(newline))
|
||||
|
||||
(format #t "⚡ Response: ~ams~%" (inexact->exact (round (* check-time 1000)))))
|
||||
(format #t "⚠️ Status: Offline~%"))
|
||||
|
||||
;; Verbose information
|
||||
(when verbose
|
||||
(let ((ssh-config (get-ssh-config machine-name)))
|
||||
(when ssh-config
|
||||
(format #t "🔗 SSH: ~a~%" (assoc-ref ssh-config 'hostname))
|
||||
(let ((ssh-alias (assoc-ref ssh-config 'ssh-alias)))
|
||||
(when ssh-alias
|
||||
(format #t "🏷️ Alias: ~a~%" ssh-alias))))))
|
||||
|
||||
(newline)))
|
||||
|
||||
;; Get detailed information about a specific machine
|
||||
(define (get-machine-details machine-name)
|
||||
"Get comprehensive details about a specific machine"
|
||||
(let ((machine-config (get-machine-config machine-name)))
|
||||
(if (not machine-config)
|
||||
(begin
|
||||
(log-error "Machine ~a not found in configuration" machine-name)
|
||||
#f)
|
||||
(let* ((ssh-config (get-ssh-config machine-name))
|
||||
(health-status (check-system-health machine-name))
|
||||
(current-status (car (get-infrastructure-status machine-name))))
|
||||
|
||||
`((name . ,machine-name)
|
||||
(config . ,machine-config)
|
||||
(ssh . ,ssh-config)
|
||||
(status . ,current-status)
|
||||
(health . ,health-status)
|
||||
(last-updated . ,(current-date)))))))
|
||||
|
||||
;; Discover machines on the network
|
||||
(define (discover-machines)
|
||||
"Discover available machines on the network"
|
||||
(log-info "Discovering machines on the network...")
|
||||
|
||||
(let ((configured-machines (get-all-machines)))
|
||||
(log-debug "Configured machines: ~a" configured-machines)
|
||||
|
||||
;; Test connectivity to each configured machine
|
||||
(let ((discovery-results
|
||||
(map (lambda (machine-name)
|
||||
(log-debug "Testing connectivity to ~a..." machine-name)
|
||||
(let ((reachable (test-ssh-connection machine-name))
|
||||
(ssh-config (get-ssh-config machine-name)))
|
||||
`((machine . ,machine-name)
|
||||
(configured . #t)
|
||||
(reachable . ,reachable)
|
||||
(type . ,(if (and ssh-config (assoc-ref ssh-config 'is-local))
|
||||
'local 'remote))
|
||||
(hostname . ,(if ssh-config
|
||||
(assoc-ref ssh-config 'hostname)
|
||||
"unknown")))))
|
||||
configured-machines)))
|
||||
|
||||
;; TODO: Add network scanning for unconfigured machines
|
||||
;; This could use nmap or similar tools to discover machines
|
||||
|
||||
(log-info "Discovery completed")
|
||||
discovery-results)))
|
||||
|
||||
;; Validate health of a machine with detailed checks
|
||||
(define (validate-machine-health machine-name . detailed)
|
||||
"Perform comprehensive health validation on a machine"
|
||||
(let ((run-detailed (if (null? detailed) #f (car detailed))))
|
||||
(log-info "Validating health of ~a..." machine-name)
|
||||
|
||||
(let ((basic-health (check-system-health machine-name)))
|
||||
(if run-detailed
|
||||
;; Extended health checks for detailed mode
|
||||
(let ((extended-checks
|
||||
'(("filesystem" . check-filesystem-health)
|
||||
("network-services" . check-network-services)
|
||||
("system-logs" . check-system-logs)
|
||||
("performance" . check-performance-metrics))))
|
||||
|
||||
(let ((extended-results
|
||||
(map (lambda (check-pair)
|
||||
(let ((check-name (car check-pair))
|
||||
(check-proc (cdr check-pair)))
|
||||
(log-debug "Running extended check: ~a" check-name)
|
||||
(catch #t
|
||||
(lambda ()
|
||||
`(,check-name . ,(check-proc machine-name)))
|
||||
(lambda (key . args)
|
||||
(log-warn "Extended check ~a failed: ~a" check-name key)
|
||||
`(,check-name . (error . ,key))))))
|
||||
extended-checks)))
|
||||
|
||||
`((basic . ,basic-health)
|
||||
(extended . ,extended-results)
|
||||
(timestamp . ,(current-date)))))
|
||||
|
||||
;; Just basic health checks
|
||||
`((basic . ,basic-health)
|
||||
(timestamp . ,(current-date)))))))
|
||||
|
||||
;; Extended health check functions
|
||||
(define (check-filesystem-health machine-name)
|
||||
"Check filesystem health and disk usage"
|
||||
(call-with-values (((success output)
|
||||
(run-remote-command machine-name "df -h && echo '---' && mount | grep -E '^/' | head -5")))
|
||||
(if success
|
||||
`((status . pass)
|
||||
(details . ,(string-trim-right output)))
|
||||
`((status . fail)
|
||||
(error . "Could not retrieve filesystem information")))))
|
||||
|
||||
(define (check-network-services machine-name)
|
||||
"Check network service connectivity"
|
||||
(let ((services-to-test '(("ssh" "22") ("http" "80") ("https" "443"))))
|
||||
(map (lambda (service-pair)
|
||||
(let ((service-name (car service-pair))
|
||||
(port (cadr service-pair)))
|
||||
(call-with-values (((success output)
|
||||
(run-remote-command machine-name
|
||||
(format #f "netstat -ln | grep ':~a ' > /dev/null 2>&1; echo $?" port))))
|
||||
`(,service-name . ,(if (and success (string=? (string-trim-right output) "0"))
|
||||
'listening 'not-listening)))))
|
||||
services-to-test)))
|
||||
|
||||
(define (check-system-logs machine-name)
|
||||
"Check system logs for recent errors"
|
||||
(call-with-values (((success output)
|
||||
(run-remote-command machine-name
|
||||
"journalctl --since='1 hour ago' --priority=err --no-pager | wc -l")))
|
||||
(if success
|
||||
(let ((error-count (string->number (string-trim-right output))))
|
||||
`((status . ,(if (< error-count 10) 'good 'concerning))
|
||||
(error-count . ,error-count)))
|
||||
`((status . unknown)
|
||||
(error . "Could not check system logs")))))
|
||||
|
||||
(define (check-performance-metrics machine-name)
|
||||
"Get basic performance metrics"
|
||||
(let ((metrics-commands
|
||||
'(("cpu-usage" "top -bn1 | grep 'Cpu(s)' | awk '{print $2}' | sed 's/%us,//'")
|
||||
("memory-usage" "free | grep Mem | awk '{printf \"%.1f\", ($3/$2) * 100.0}'")
|
||||
("io-wait" "iostat 1 2 | tail -1 | awk '{print $4}'"))))
|
||||
|
||||
(map (lambda (metric-pair)
|
||||
(let ((metric-name (car metric-pair))
|
||||
(command (cadr metric-pair)))
|
||||
(call-with-values (((success output) (run-remote-command machine-name command)))
|
||||
`(,(string->symbol metric-name) .
|
||||
,(if success (string-trim-right output) "unknown")))))
|
||||
metrics-commands)))
|
||||
|
||||
;; Get machine metrics for monitoring
|
||||
(define (get-machine-metrics machine-name . time-range)
|
||||
"Get machine metrics for monitoring and analysis"
|
||||
(let ((range (if (null? time-range) "1h" (car time-range))))
|
||||
(log-debug "Collecting metrics for ~a (range: ~a)" machine-name range)
|
||||
|
||||
(let ((current-time (current-date))
|
||||
(performance (check-performance-metrics machine-name))
|
||||
(health (validate-machine-health machine-name)))
|
||||
|
||||
`((machine . ,machine-name)
|
||||
(timestamp . ,current-time)
|
||||
(performance . ,performance)
|
||||
(health . ,health)
|
||||
(range . ,range)))))
|
Loading…
Add table
Add a link
Reference in a new issue