cleaned up and maybe finished the guile lab tool
This commit is contained in:
parent
4290973048
commit
74142365eb
24 changed files with 895 additions and 20 deletions
75
packages/lab-tool/archive/core/health.scm
Normal file
75
packages/lab-tool/archive/core/health.scm
Normal file
|
@ -0,0 +1,75 @@
|
|||
;; lab/core/health.scm - Health check functionality
|
||||
|
||||
(define-module (lab core health)
|
||||
#:use-module (ice-9 rdelim)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (lab core logging)
|
||||
#:use-module (lab core ssh)
|
||||
#:export (check-system-health
|
||||
check-disk-space
|
||||
check-system-load
|
||||
check-critical-services
|
||||
check-network-connectivity))
|
||||
|
||||
(define (check-system-health machine-name)
|
||||
"Perform comprehensive health check on a machine"
|
||||
(log-info "Performing health check on ~a..." machine-name)
|
||||
|
||||
(let ((health-checks
|
||||
'(("connectivity" . test-ssh-connection)
|
||||
("disk-space" . check-disk-space)
|
||||
("system-load" . check-system-load)
|
||||
("critical-services" . check-critical-services)
|
||||
("network" . check-network-connectivity))))
|
||||
|
||||
(map (lambda (check-pair)
|
||||
(let ((check-name (car check-pair))
|
||||
(check-proc (cdr check-pair)))
|
||||
(log-debug "Running ~a check..." check-name)
|
||||
(catch #t
|
||||
(lambda ()
|
||||
(let ((result (check-proc machine-name)))
|
||||
`(,check-name . ((status . ,(if result 'pass 'fail))
|
||||
(result . ,result)))))
|
||||
(lambda (key . args)
|
||||
(log-warn "Health check ~a failed: ~a" check-name key)
|
||||
`(,check-name . ((status . error)
|
||||
(error . ,key)))))))
|
||||
health-checks)))
|
||||
|
||||
(define (check-disk-space machine-name)
|
||||
"Check if disk space is below critical threshold"
|
||||
(call-with-values
|
||||
(lambda () (run-remote-command machine-name "df / | tail -1 | awk '{print $5}' | sed 's/%//'"))
|
||||
(lambda (success output)
|
||||
(if success
|
||||
(let ((usage (string->number (string-trim-right output))))
|
||||
(< usage 90)) ; Pass if usage < 90%
|
||||
#f))))
|
||||
|
||||
(define (check-system-load machine-name)
|
||||
"Check if system load is reasonable"
|
||||
(call-with-values
|
||||
(lambda () (run-remote-command machine-name "cat /proc/loadavg | cut -d' ' -f1"))
|
||||
(lambda (success output)
|
||||
(if success
|
||||
(let ((load (string->number (string-trim-right output))))
|
||||
(< load 5.0)) ; Pass if load < 5.0
|
||||
#f))))
|
||||
|
||||
(define (check-critical-services machine-name)
|
||||
"Check that critical services are running"
|
||||
(let ((critical-services '("sshd")))
|
||||
(every (lambda (service)
|
||||
(call-with-values
|
||||
(lambda () (run-remote-command machine-name "systemctl is-active" service))
|
||||
(lambda (success output)
|
||||
(and success (string=? (string-trim-right output) "active")))))
|
||||
critical-services)))
|
||||
|
||||
(define (check-network-connectivity machine-name)
|
||||
"Check basic network connectivity"
|
||||
(call-with-values
|
||||
(lambda () (run-remote-command machine-name "ping -c 1 -W 5 8.8.8.8 > /dev/null 2>&1; echo $?"))
|
||||
(lambda (success output)
|
||||
(and success (string=? (string-trim-right output) "0")))))
|
29
packages/lab-tool/archive/core/logging.scm
Normal file
29
packages/lab-tool/archive/core/logging.scm
Normal file
|
@ -0,0 +1,29 @@
|
|||
;; lab/core/logging.scm - Logging functionality
|
||||
|
||||
(define-module (lab core logging)
|
||||
#:use-module (ice-9 format)
|
||||
#:export (log-info
|
||||
log-debug
|
||||
log-success
|
||||
log-error
|
||||
log-warn))
|
||||
|
||||
(define (log-info format-str . args)
|
||||
"Log info message"
|
||||
(apply format #t (string-append "[INFO] " format-str "~%") args))
|
||||
|
||||
(define (log-debug format-str . args)
|
||||
"Log debug message"
|
||||
(apply format #t (string-append "[DEBUG] " format-str "~%") args))
|
||||
|
||||
(define (log-success format-str . args)
|
||||
"Log success message"
|
||||
(apply format #t (string-append "[SUCCESS] " format-str "~%") args))
|
||||
|
||||
(define (log-error format-str . args)
|
||||
"Log error message"
|
||||
(apply format #t (string-append "[ERROR] " format-str "~%") args))
|
||||
|
||||
(define (log-warn format-str . args)
|
||||
"Log warning message"
|
||||
(apply format #t (string-append "[WARN] " format-str "~%") args))
|
24
packages/lab-tool/archive/core/ssh.scm
Normal file
24
packages/lab-tool/archive/core/ssh.scm
Normal file
|
@ -0,0 +1,24 @@
|
|||
;; lab/core/ssh.scm - SSH operations
|
||||
|
||||
(define-module (lab core ssh)
|
||||
#:use-module (ice-9 format)
|
||||
#:use-module (ice-9 popen)
|
||||
#:use-module (ice-9 rdelim)
|
||||
#:use-module (ice-9 textual-ports)
|
||||
#:export (test-ssh-connection
|
||||
run-remote-command))
|
||||
|
||||
(define (test-ssh-connection machine-name)
|
||||
"Test SSH connection to machine"
|
||||
(zero? (system (format #f "ssh -o ConnectTimeout=5 -o BatchMode=yes ~a exit 2>/dev/null" machine-name))))
|
||||
|
||||
(define (run-remote-command machine-name command . args)
|
||||
"Run command on remote machine via SSH"
|
||||
(let* ((full-command (if (null? args)
|
||||
command
|
||||
(string-join (cons command args) " ")))
|
||||
(ssh-command (format #f "ssh ~a '~a'" machine-name full-command))
|
||||
(port (open-input-pipe ssh-command))
|
||||
(output (read-string port))
|
||||
(status (close-pipe port)))
|
||||
(values (zero? status) output)))
|
84
packages/lab-tool/archive/core/status.scm
Normal file
84
packages/lab-tool/archive/core/status.scm
Normal file
|
@ -0,0 +1,84 @@
|
|||
;; lab/core/status.scm - Infrastructure status functionality
|
||||
|
||||
(define-module (lab core status)
|
||||
#:use-module (ice-9 rdelim)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (srfi srfi-19)
|
||||
#:use-module (lab core logging)
|
||||
#:use-module (lab core config)
|
||||
#:use-module (lab core ssh)
|
||||
#:export (get-infrastructure-status
|
||||
get-machine-services-status
|
||||
get-machine-system-info))
|
||||
|
||||
(define (get-infrastructure-status . args)
|
||||
"Get status of all machines or specific machine if provided"
|
||||
(let ((target-machine (if (null? args) #f (car args)))
|
||||
(machines (if (null? args)
|
||||
(get-all-machines)
|
||||
(list (car args)))))
|
||||
|
||||
(log-info "Checking infrastructure status...")
|
||||
|
||||
(map (lambda (machine-name)
|
||||
(let ((start-time (current-time)))
|
||||
(log-debug "Checking ~a..." machine-name)
|
||||
|
||||
(let* ((ssh-config (get-ssh-config machine-name))
|
||||
(is-local (and ssh-config (assoc-ref ssh-config 'is-local)))
|
||||
(connection-status (test-ssh-connection machine-name))
|
||||
(services-status (if connection-status
|
||||
(get-machine-services-status machine-name)
|
||||
'()))
|
||||
(system-info (if connection-status
|
||||
(get-machine-system-info machine-name)
|
||||
#f))
|
||||
(elapsed (- (current-time) start-time)))
|
||||
|
||||
`((machine . ,machine-name)
|
||||
(type . ,(if is-local 'local 'remote))
|
||||
(connection . ,(if connection-status 'online 'offline))
|
||||
(services . ,services-status)
|
||||
(system . ,system-info)
|
||||
(check-time . ,elapsed)))))
|
||||
machines)))
|
||||
|
||||
(define (get-machine-services-status machine-name)
|
||||
"Check status of services on a machine"
|
||||
(let ((machine-config (get-machine-config machine-name)))
|
||||
(if machine-config
|
||||
(let ((services (assoc-ref machine-config 'services)))
|
||||
(if services
|
||||
(map (lambda (service)
|
||||
(call-with-values
|
||||
(lambda () (run-remote-command machine-name
|
||||
"systemctl is-active"
|
||||
(symbol->string service)))
|
||||
(lambda (success output)
|
||||
`(,service . ,(if success
|
||||
(string-trim-right output)
|
||||
"unknown")))))
|
||||
services)
|
||||
'()))
|
||||
'())))
|
||||
|
||||
(define (get-machine-system-info machine-name)
|
||||
"Get basic system information from a machine"
|
||||
(let ((info-commands
|
||||
'(("uptime" "uptime -p")
|
||||
("load" "cat /proc/loadavg | cut -d' ' -f1-3")
|
||||
("memory" "free -h | grep Mem | awk '{print $3\"/\"$2}'")
|
||||
("disk" "df -h / | tail -1 | awk '{print $5}'")
|
||||
("kernel" "uname -r"))))
|
||||
|
||||
(fold (lambda (cmd-pair acc)
|
||||
(let ((key (car cmd-pair))
|
||||
(command (cadr cmd-pair)))
|
||||
(call-with-values
|
||||
(lambda () (run-remote-command machine-name command))
|
||||
(lambda (success output)
|
||||
(if success
|
||||
(assoc-set! acc (string->symbol key) (string-trim-right output))
|
||||
acc)))))
|
||||
'()
|
||||
info-commands)))
|
12
packages/lab-tool/archive/core/utils.scm
Normal file
12
packages/lab-tool/archive/core/utils.scm
Normal file
|
@ -0,0 +1,12 @@
|
|||
;; lab/core/utils.scm - Utility functions
|
||||
|
||||
(define-module (lab core utils)
|
||||
#:use-module (ice-9 format)
|
||||
#:export (with-spinner))
|
||||
|
||||
(define (with-spinner message proc)
|
||||
"Execute procedure with spinner (stub implementation)"
|
||||
(display (format #f "~a..." message))
|
||||
(let ((result (proc)))
|
||||
(display " done.\n")
|
||||
result))
|
Loading…
Add table
Add a link
Reference in a new issue