home-lab/packages/lab-tool/utils/ssh.scm
Geir Okkenhaug Jerstad 9d8952c4ce feat: Complete Ollama CPU optimization for TaskMaster AI
- Optimize Ollama service configuration for maximum CPU performance
  - Increase OLLAMA_NUM_PARALLEL from 2 to 4 workers
  - Increase OLLAMA_CONTEXT_LENGTH from 4096 to 8192 tokens
  - Add OLLAMA_KV_CACHE_TYPE=q8_0 for memory efficiency
  - Set OLLAMA_LLM_LIBRARY=cpu_avx2 for optimal CPU performance
  - Configure OpenMP threading with 8 threads and core binding
  - Add comprehensive systemd resource limits and CPU quotas
  - Remove incompatible NUMA policy setting

- Upgrade TaskMaster AI model ecosystem
  - Main model: qwen3:4b → qwen2.5-coder:7b (specialized coding model)
  - Research model: deepseek-r1:1.5b → deepseek-r1:7b (enhanced reasoning)
  - Fallback model: gemma3:4b-it-qat → llama3.3:8b (reliable general purpose)

- Create comprehensive optimization and management scripts
  - Add ollama-optimize.sh for system optimization and benchmarking
  - Add update-taskmaster-models.sh for TaskMaster configuration management
  - Include model installation, performance testing, and system info functions

- Update TaskMaster AI configuration
  - Configure optimized models with grey-area:11434 endpoint
  - Set performance parameters for 8192 context window
  - Add connection timeout and retry settings

- Fix flake configuration issues
  - Remove nested packages attribute in packages/default.nix
  - Fix package references in modules/users/geir.nix
  - Clean up obsolete package files

- Add comprehensive documentation
  - Document complete optimization process and results
  - Include performance benchmarking results
  - Provide deployment instructions and troubleshooting guide

Successfully deployed via deploy-rs with 3-4x performance improvement estimated.
All optimizations tested and verified on grey-area server (24-core Xeon, 31GB RAM).
2025-06-18 13:08:24 +02:00

136 lines
5.9 KiB
Scheme

;; utils/ssh.scm - SSH operations for Home Lab Tool
;; Fallback implementation using shell commands instead of guile-ssh
(define-module (utils ssh)
#:use-module (ice-9 popen)
#:use-module (ice-9 rdelim)
#:use-module (ice-9 textual-ports)
#:use-module (ice-9 format)
#:use-module (srfi srfi-1)
#:use-module (utils logging)
#:use-module (utils config)
#:export (test-ssh-connection
run-remote-command
copy-file-to-remote
run-command-with-retry
with-ssh-connection))
;; Test SSH connectivity to a machine
(define (test-ssh-connection machine-name)
(let ((ssh-config (get-ssh-config machine-name)))
(if (not ssh-config)
(begin
(log-error "No SSH configuration found for ~a" machine-name)
#f)
(if (assoc-ref ssh-config 'is-local)
(begin
(log-debug "Machine ~a is local, skipping SSH test" machine-name)
#t)
(let ((hostname (assoc-ref ssh-config 'hostname))
(ssh-alias (assoc-ref ssh-config 'ssh-alias)))
(log-debug "Testing SSH connection to ~a (~a)" machine-name hostname)
(catch #t
(lambda ()
;; Use system ssh command for compatibility with existing configuration
(let* ((test-cmd (if ssh-alias
(format #f "ssh -o ConnectTimeout=5 -o BatchMode=yes ~a echo OK" ssh-alias)
(format #f "ssh -o ConnectTimeout=5 -o BatchMode=yes ~a echo OK" hostname)))
(port (open-pipe* OPEN_READ "/bin/sh" "-c" test-cmd))
(output (get-string-all port))
(status (close-pipe port)))
(if (zero? status)
(begin
(log-debug "SSH connection to ~a successful" machine-name)
#t)
(begin
(log-warn "SSH connection to ~a failed (exit: ~a)" machine-name status)
#f))))
(lambda (key . args)
(log-error "SSH test failed for ~a: ~a ~a" machine-name key args)
#f)))))))
;; Run a command on a remote machine
(define (run-remote-command machine-name command . args)
(let ((ssh-config (get-ssh-config machine-name))
(full-command (if (null? args)
command
(format #f "~a ~a" command (string-join args " ")))))
(if (not ssh-config)
(values #f "No SSH configuration found")
(if (assoc-ref ssh-config 'is-local)
;; Local execution
(begin
(log-debug "Executing locally: ~a" full-command)
(let* ((port (open-pipe* OPEN_READ "/bin/sh" "-c" full-command))
(output (get-string-all port))
(status (close-pipe port)))
(values (zero? status) output)))
;; Remote execution
(let ((ssh-alias (assoc-ref ssh-config 'ssh-alias))
(hostname (assoc-ref ssh-config 'hostname)))
(log-debug "Executing on ~a: ~a" machine-name full-command)
(let* ((ssh-cmd (format #f "ssh ~a '~a'"
(or ssh-alias hostname)
full-command))
(port (open-pipe* OPEN_READ "/bin/sh" "-c" ssh-cmd))
(output (get-string-all port))
(status (close-pipe port)))
(values (zero? status) output)))))))
;; Copy file to remote machine using scp
(define (copy-file-to-remote machine-name local-path remote-path)
(let ((ssh-config (get-ssh-config machine-name)))
(if (not ssh-config)
(begin
(log-error "No SSH configuration found for ~a" machine-name)
#f)
(if (assoc-ref ssh-config 'is-local)
;; Local copy
(begin
(log-debug "Copying locally: ~a -> ~a" local-path remote-path)
(let* ((copy-cmd (format #f "cp '~a' '~a'" local-path remote-path))
(status (system copy-cmd)))
(zero? status)))
;; Remote copy
(let ((ssh-alias (assoc-ref ssh-config 'ssh-alias))
(hostname (assoc-ref ssh-config 'hostname)))
(log-debug "Copying to ~a: ~a -> ~a" machine-name local-path remote-path)
(let* ((scp-cmd (format #f "scp '~a' '~a:~a'"
local-path
(or ssh-alias hostname)
remote-path))
(status (system scp-cmd)))
(if (zero? status)
(begin
(log-debug "File copy successful")
#t)
(begin
(log-error "File copy failed (exit: ~a)" status)
#f))))))))
;; Run command with retry logic
(define (run-command-with-retry machine-name command max-retries . args)
(let loop ((retries 0))
(call-with-values
(lambda () (apply run-remote-command machine-name command args))
(lambda (success output)
(if success
(values #t output)
(if (< retries max-retries)
(begin
(log-warn "Command failed, retrying (~a/~a)..." (+ retries 1) max-retries)
(sleep 2)
(loop (+ retries 1)))
(values #f output)))))))
;; Execute a thunk with SSH connection context
(define (with-ssh-connection machine-name thunk)
(if (test-ssh-connection machine-name)
(catch #t
(lambda () (thunk))
(lambda (key . args)
(log-error "SSH operation failed: ~a ~a" key args)
#f))
(begin
(log-error "Cannot establish SSH connection to ~a" machine-name)
#f)))