
- Updated lab status command to use admin SSH aliases (admin-sleeper, admin-grey, admin-reverse) - Fixed SSH authentication issues by using correct admin keys - Improved verbose mode to show detailed connection attempts - Updated legacy deployment to use admin aliases for consistency - Now properly connects to sleeper-service and grey-area via admin access - reverse-proxy showing as unreachable due to fail2ban (expected security behavior) Resolves SSH connectivity issues that were blocking task completion assessment.
396 lines
12 KiB
Nix
396 lines
12 KiB
Nix
{
|
||
lib,
|
||
stdenv,
|
||
writeShellScriptBin,
|
||
rsync,
|
||
openssh,
|
||
...
|
||
}:
|
||
writeShellScriptBin "lab" ''
|
||
#!/usr/bin/env bash
|
||
|
||
# Home-lab administration tools
|
||
# Deploy and manage NixOS configurations across home lab infrastructure
|
||
|
||
set -euo pipefail
|
||
|
||
# Configuration
|
||
HOMELAB_ROOT="/home/geir/Home-lab"
|
||
TEMP_CONFIG_DIR="/tmp/home-lab-config"
|
||
|
||
# Color output
|
||
RED='\033[0;31m'
|
||
GREEN='\033[0;32m'
|
||
YELLOW='\033[1;33m'
|
||
BLUE='\033[0;34m'
|
||
NC='\033[0m' # No Color
|
||
|
||
log() {
|
||
echo -e "''${BLUE}[lab]''${NC} $1"
|
||
}
|
||
|
||
success() {
|
||
echo -e "''${GREEN}[lab]''${NC} $1"
|
||
}
|
||
|
||
warn() {
|
||
echo -e "''${YELLOW}[lab]''${NC} $1"
|
||
}
|
||
|
||
error() {
|
||
echo -e "''${RED}[lab]''${NC} $1" >&2
|
||
}
|
||
|
||
# Deployment function
|
||
deploy_machine() {
|
||
local machine="$1"
|
||
local mode="''${2:-boot}" # boot, test, or switch
|
||
|
||
case "$machine" in
|
||
"congenital-optimist")
|
||
# Local deployment - no SSH needed
|
||
log "Deploying $machine (mode: $mode) locally"
|
||
|
||
# Deploy the configuration locally
|
||
log "Running nixos-rebuild $mode locally..."
|
||
if ! sudo nixos-rebuild $mode --flake "$HOMELAB_ROOT#$machine"; then
|
||
error "Failed to deploy configuration to $machine"
|
||
exit 1
|
||
fi
|
||
|
||
success "Successfully deployed $machine"
|
||
return 0
|
||
;;
|
||
"sleeper-service")
|
||
local target_host="admin-sleeper"
|
||
;;
|
||
"grey-area")
|
||
local target_host="admin-grey"
|
||
;;
|
||
"reverse-proxy")
|
||
local target_host="admin-reverse"
|
||
;;
|
||
*)
|
||
error "Unknown machine: $machine"
|
||
error "Available machines: congenital-optimist, sleeper-service, grey-area, reverse-proxy"
|
||
exit 1
|
||
;;
|
||
esac
|
||
|
||
log "Deploying $machine (mode: $mode)"
|
||
|
||
# Sync configuration to target machine
|
||
log "Syncing configuration to $target_host..."
|
||
if ! ${rsync}/bin/rsync -av --delete "$HOMELAB_ROOT/" "$target_host:$TEMP_CONFIG_DIR/"; then
|
||
error "Failed to sync configuration to $machine"
|
||
exit 1
|
||
fi
|
||
|
||
# Deploy the configuration
|
||
log "Running nixos-rebuild $mode on $machine..."
|
||
if ! ${openssh}/bin/ssh "$target_host" "cd $TEMP_CONFIG_DIR && sudo nixos-rebuild $mode --flake .#$machine"; then
|
||
error "Failed to deploy configuration to $machine"
|
||
exit 1
|
||
fi
|
||
|
||
success "Successfully deployed $machine"
|
||
}
|
||
|
||
# Deploy with deploy-rs function
|
||
deploy_rs_machine() {
|
||
local machine="$1"
|
||
local dry_run="''${2:-false}"
|
||
|
||
log "Using deploy-rs for $machine deployment"
|
||
|
||
cd "$HOMELAB_ROOT"
|
||
|
||
if [[ "$dry_run" == "true" ]]; then
|
||
log "Running dry-run deployment..."
|
||
if ! nix run github:serokell/deploy-rs -- ".#$machine" --dry-activate; then
|
||
error "Deploy-rs dry-run failed for $machine"
|
||
return 1
|
||
fi
|
||
success "Deploy-rs dry-run completed for $machine"
|
||
else
|
||
log "Running actual deployment..."
|
||
if ! nix run github:serokell/deploy-rs -- ".#$machine"; then
|
||
error "Deploy-rs deployment failed for $machine"
|
||
return 1
|
||
fi
|
||
success "Deploy-rs deployment completed for $machine"
|
||
fi
|
||
}
|
||
|
||
# Update flake inputs function
|
||
update_flake() {
|
||
log "Updating flake inputs..."
|
||
cd "$HOMELAB_ROOT"
|
||
|
||
if ! nix flake update; then
|
||
error "Failed to update flake inputs"
|
||
return 1
|
||
fi
|
||
|
||
log "Checking updated flake configuration..."
|
||
if ! nix flake check; then
|
||
error "Flake check failed after update"
|
||
return 1
|
||
fi
|
||
|
||
success "Flake inputs updated successfully"
|
||
|
||
# Show what changed
|
||
log "Flake lock changes:"
|
||
git diff --no-index /dev/null flake.lock | grep "+" | head -10 || true
|
||
}
|
||
|
||
# Hybrid update: flake update + deploy-rs deployment
|
||
hybrid_update() {
|
||
local target="''${1:-all}"
|
||
local dry_run="''${2:-false}"
|
||
|
||
log "Starting hybrid update process (target: $target, dry-run: $dry_run)"
|
||
|
||
# Step 1: Update flake inputs
|
||
if ! update_flake; then
|
||
error "Failed to update flake - aborting hybrid update"
|
||
return 1
|
||
fi
|
||
|
||
# Step 2: Deploy with deploy-rs
|
||
if [[ "$target" == "all" ]]; then
|
||
local machines=("sleeper-service" "grey-area" "reverse-proxy" "congenital-optimist")
|
||
local failed_machines=()
|
||
|
||
for machine in "''${machines[@]}"; do
|
||
log "Deploying updated configuration to $machine..."
|
||
if deploy_rs_machine "$machine" "$dry_run"; then
|
||
success "✓ $machine updated successfully"
|
||
else
|
||
error "✗ Failed to update $machine"
|
||
failed_machines+=("$machine")
|
||
fi
|
||
echo ""
|
||
done
|
||
|
||
if [[ ''${#failed_machines[@]} -eq 0 ]]; then
|
||
success "All machines updated successfully with hybrid approach!"
|
||
else
|
||
error "Failed to update: ''${failed_machines[*]}"
|
||
return 1
|
||
fi
|
||
else
|
||
deploy_rs_machine "$target" "$dry_run"
|
||
fi
|
||
}
|
||
|
||
# Update all machines function (legacy method)
|
||
update_all_machines() {
|
||
local mode="''${1:-boot}" # boot, test, or switch
|
||
local machines=("congenital-optimist" "sleeper-service" "grey-area" "reverse-proxy")
|
||
local failed_machines=()
|
||
|
||
log "Starting update of all machines (mode: $mode) - using legacy method"
|
||
|
||
for machine in "''${machines[@]}"; do
|
||
log "Updating $machine..."
|
||
if deploy_machine "$machine" "$mode"; then
|
||
success "✓ $machine updated successfully"
|
||
else
|
||
error "✗ Failed to update $machine"
|
||
failed_machines+=("$machine")
|
||
fi
|
||
echo "" # Add spacing between machines
|
||
done
|
||
|
||
if [[ ''${#failed_machines[@]} -eq 0 ]]; then
|
||
success "All machines updated successfully!"
|
||
else
|
||
error "Failed to update: ''${failed_machines[*]}"
|
||
exit 1
|
||
fi
|
||
}
|
||
|
||
# Show deployment status
|
||
show_status() {
|
||
log "Home-lab infrastructure status:"
|
||
|
||
# Check congenital-optimist (local)
|
||
if /run/current-system/sw/bin/systemctl is-active --quiet tailscaled; then
|
||
success " congenital-optimist: ✓ Online (local)"
|
||
else
|
||
warn " congenital-optimist: ⚠ Tailscale inactive"
|
||
fi
|
||
|
||
# Check if -v (verbose) flag is passed
|
||
local verbose=0
|
||
if [[ "''${1:-}" == "-v" ]]; then
|
||
verbose=1
|
||
fi
|
||
|
||
# Check remote machines
|
||
for machine in sleeper-service grey-area reverse-proxy; do
|
||
# Set admin alias for SSH connection
|
||
local admin_alias
|
||
case "$machine" in
|
||
"sleeper-service")
|
||
admin_alias="admin-sleeper"
|
||
tailscale_hostname="sleeper-service.tail807ea.ts.net"
|
||
;;
|
||
"grey-area")
|
||
admin_alias="admin-grey"
|
||
tailscale_hostname="grey-area.tail807ea.ts.net"
|
||
;;
|
||
"reverse-proxy")
|
||
admin_alias="admin-reverse"
|
||
tailscale_hostname="reverse-proxy.tail807ea.ts.net"
|
||
;;
|
||
esac
|
||
|
||
# Test SSH connectivity with debug info if in verbose mode
|
||
if [[ $verbose -eq 1 ]]; then
|
||
log "Testing SSH connection to $admin_alias (admin alias)..."
|
||
${openssh}/bin/ssh -v -o ConnectTimeout=5 -o BatchMode=yes "$admin_alias" "echo ✓ SSH connection to $admin_alias successful" 2>&1
|
||
|
||
log "Testing SSH connection to sma@$tailscale_hostname (Tailscale direct)..."
|
||
${openssh}/bin/ssh -v -o ConnectTimeout=5 -o BatchMode=yes -i ~/.ssh/id_ed25519_admin "sma@$tailscale_hostname" "echo ✓ SSH connection to $tailscale_hostname successful" 2>&1
|
||
fi
|
||
|
||
# Try admin alias first (should work for all machines)
|
||
if ${openssh}/bin/ssh -o ConnectTimeout=3 -o BatchMode=yes "$admin_alias" "echo OK" >/dev/null 2>&1; then
|
||
success " $machine: ✓ Online (admin access)"
|
||
# Fallback to direct Tailscale connection with admin key
|
||
elif ${openssh}/bin/ssh -o ConnectTimeout=5 -o BatchMode=yes -i ~/.ssh/id_ed25519_admin "sma@$tailscale_hostname" "echo OK" >/dev/null 2>&1; then
|
||
success " $machine: ✓ Online (Tailscale)"
|
||
else
|
||
warn " $machine: ⚠ Unreachable"
|
||
if [[ $verbose -eq 1 ]]; then
|
||
log " ℹ️ Note: Tried both admin alias ($admin_alias) and direct Tailscale connection"
|
||
log " ℹ️ Check if machine is online and SSH service is running"
|
||
fi
|
||
fi
|
||
done
|
||
}
|
||
|
||
# Main command handling
|
||
case "''${1:-}" in
|
||
"deploy")
|
||
if [[ $# -lt 2 ]]; then
|
||
error "Usage: lab deploy <machine> [mode]"
|
||
error "Machines: congenital-optimist, sleeper-service, grey-area, reverse-proxy"
|
||
error "Modes: boot (default), test, switch"
|
||
exit 1
|
||
fi
|
||
|
||
machine="$2"
|
||
mode="''${3:-boot}"
|
||
|
||
if [[ ! "$mode" =~ ^(boot|test|switch)$ ]]; then
|
||
error "Invalid mode: $mode. Use boot, test, or switch"
|
||
exit 1
|
||
fi
|
||
|
||
deploy_machine "$machine" "$mode"
|
||
;;
|
||
|
||
"deploy-rs")
|
||
if [[ $# -lt 2 ]]; then
|
||
error "Usage: lab deploy-rs <machine> [--dry-run]"
|
||
error "Machines: congenital-optimist, sleeper-service, grey-area, reverse-proxy"
|
||
exit 1
|
||
fi
|
||
|
||
machine="$2"
|
||
dry_run="false"
|
||
|
||
if [[ "''${3:-}" == "--dry-run" ]]; then
|
||
dry_run="true"
|
||
fi
|
||
|
||
deploy_rs_machine "$machine" "$dry_run"
|
||
;;
|
||
|
||
"update-flake")
|
||
update_flake
|
||
;;
|
||
|
||
"hybrid-update")
|
||
target="''${2:-all}"
|
||
dry_run="false"
|
||
|
||
if [[ "''${3:-}" == "--dry-run" ]]; then
|
||
dry_run="true"
|
||
fi
|
||
|
||
hybrid_update "$target" "$dry_run"
|
||
;;
|
||
|
||
"status")
|
||
show_status "''${2:-}"
|
||
;;
|
||
|
||
"update")
|
||
mode="''${2:-boot}"
|
||
|
||
if [[ ! "$mode" =~ ^(boot|test|switch)$ ]]; then
|
||
error "Invalid mode: $mode. Use boot, test, or switch"
|
||
exit 1
|
||
fi
|
||
|
||
update_all_machines "$mode"
|
||
;;
|
||
|
||
*)
|
||
echo "Home-lab Management Tool"
|
||
echo ""
|
||
echo "Usage: lab <command> [options]"
|
||
echo ""
|
||
echo "Available commands:"
|
||
echo " deploy <machine> [mode] - Deploy configuration to a machine (legacy method)"
|
||
echo " Machines: congenital-optimist, sleeper-service, grey-area, reverse-proxy"
|
||
echo " Modes: boot (default), test, switch"
|
||
echo " deploy-rs <machine> [opts] - Deploy using deploy-rs (modern method)"
|
||
echo " Options: --dry-run"
|
||
echo " update [mode] - Update all machines (legacy method)"
|
||
echo " Modes: boot (default), test, switch"
|
||
echo " update-flake - Update flake inputs and check configuration"
|
||
echo " hybrid-update [target] [opts] - Update flake + deploy with deploy-rs"
|
||
echo " Target: machine name or 'all' (default)"
|
||
echo " Options: --dry-run"
|
||
echo " status - Check infrastructure connectivity"
|
||
echo ""
|
||
echo "Deployment Methods:"
|
||
echo " Legacy (SSH + rsync): Reliable, tested, slower"
|
||
echo " Deploy-rs: Modern, automatic rollback, parallel deployment"
|
||
echo " Hybrid: Combines flake updates with deploy-rs safety"
|
||
echo ""
|
||
echo "Ollama AI Tools (when available):"
|
||
echo " ollama-cli <command> - Manage Ollama service and models"
|
||
echo " monitor-ollama [opts] - Monitor Ollama service health"
|
||
echo ""
|
||
echo "Examples:"
|
||
echo " # Legacy deployment"
|
||
echo " lab deploy sleeper-service boot # Deploy and set for next boot"
|
||
echo " lab deploy grey-area switch # Deploy and switch immediately"
|
||
echo " lab update boot # Update all machines for next boot"
|
||
echo ""
|
||
echo " # Modern deploy-rs deployment"
|
||
echo " lab deploy-rs sleeper-service # Deploy with automatic rollback"
|
||
echo " lab deploy-rs grey-area --dry-run # Test deployment without applying"
|
||
echo ""
|
||
echo " # Hybrid approach (recommended for updates)"
|
||
echo " lab hybrid-update sleeper-service # Update flake + deploy specific machine"
|
||
echo " lab hybrid-update all --dry-run # Test update all machines"
|
||
echo " lab update-flake # Just update flake inputs"
|
||
echo ""
|
||
echo " # Status and monitoring"
|
||
echo " lab status # Check all machines"
|
||
echo ""
|
||
echo " # Ollama AI tools"
|
||
echo " ollama-cli status # Check Ollama service status"
|
||
echo " ollama-cli models # List installed AI models"
|
||
echo " monitor-ollama --test-inference # Full Ollama health check"
|
||
;;
|
||
esac
|
||
''
|