#!/usr/bin/env bash # Ollama Monitoring Script # Provides comprehensive monitoring of Ollama service health and performance set -euo pipefail # Configuration OLLAMA_HOST="${OLLAMA_HOST:-127.0.0.1}" OLLAMA_PORT="${OLLAMA_PORT:-11434}" OLLAMA_URL="http://${OLLAMA_HOST}:${OLLAMA_PORT}" # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color # Functions print_header() { echo -e "${BLUE}=== $1 ===${NC}" } print_success() { echo -e "${GREEN}✓${NC} $1" } print_warning() { echo -e "${YELLOW}⚠${NC} $1" } print_error() { echo -e "${RED}✗${NC} $1" } check_service_status() { print_header "Service Status" if systemctl is-active --quiet ollama; then print_success "Ollama service is running" # Get service uptime started=$(systemctl show ollama --property=ActiveEnterTimestamp --value) if [[ -n "$started" ]]; then echo " Started: $started" fi # Get service memory usage memory=$(systemctl show ollama --property=MemoryCurrent --value) if [[ "$memory" != "[not set]" ]] && [[ -n "$memory" ]]; then memory_mb=$((memory / 1024 / 1024)) echo " Memory usage: ${memory_mb}MB" fi else print_error "Ollama service is not running" echo " Try: sudo systemctl start ollama" return 1 fi } check_api_connectivity() { print_header "API Connectivity" if curl -s --connect-timeout 5 "$OLLAMA_URL/api/tags" >/dev/null; then print_success "API is responding" # Get API version if available version=$(curl -s "$OLLAMA_URL/api/version" 2>/dev/null | jq -r '.version // "unknown"' 2>/dev/null || echo "unknown") if [[ "$version" != "unknown" ]]; then echo " Version: $version" fi else print_error "API is not responding" echo " URL: $OLLAMA_URL" return 1 fi } check_models() { print_header "Installed Models" models_json=$(curl -s "$OLLAMA_URL/api/tags" 2>/dev/null) if [[ $? -eq 0 ]] && [[ -n "$models_json" ]]; then model_count=$(echo "$models_json" | jq '.models | length' 2>/dev/null || echo "0") if [[ "$model_count" -gt 0 ]]; then print_success "$model_count models installed" echo "$models_json" | jq -r '.models[]? | " \(.name) (\(.size | . / 1024 / 1024 / 1024 | floor)GB) - Modified: \(.modified_at)"' 2>/dev/null || { echo "$models_json" | jq -r '.models[]?.name // "Unknown model"' 2>/dev/null | sed 's/^/ /' } else print_warning "No models installed" echo " Try: ollama pull llama3.3:8b" fi else print_error "Could not retrieve model list" return 1 fi } check_disk_space() { print_header "Disk Space" ollama_dir="/var/lib/ollama" if [[ -d "$ollama_dir" ]]; then # Get disk usage for ollama directory usage=$(du -sh "$ollama_dir" 2>/dev/null | cut -f1 || echo "unknown") available=$(df -h "$ollama_dir" | tail -1 | awk '{print $4}' || echo "unknown") echo " Ollama data usage: $usage" echo " Available space: $available" # Check if we're running low on space available_bytes=$(df "$ollama_dir" | tail -1 | awk '{print $4}' || echo "0") if [[ "$available_bytes" -lt 10485760 ]]; then # Less than 10GB print_warning "Low disk space (less than 10GB available)" else print_success "Sufficient disk space available" fi else print_warning "Ollama data directory not found: $ollama_dir" fi } check_model_downloads() { print_header "Model Download Status" if systemctl is-active --quiet ollama-model-download; then print_warning "Model download in progress" echo " Check progress: journalctl -u ollama-model-download -f" elif systemctl is-enabled --quiet ollama-model-download; then if systemctl show ollama-model-download --property=Result --value | grep -q "success"; then print_success "Model downloads completed successfully" else result=$(systemctl show ollama-model-download --property=Result --value) print_warning "Model download service result: $result" echo " Check logs: journalctl -u ollama-model-download" fi else print_warning "Model download service not enabled" fi } check_health_monitoring() { print_header "Health Monitoring" if systemctl is-enabled --quiet ollama-health-check; then last_run=$(systemctl show ollama-health-check --property=LastTriggerUSec --value) if [[ "$last_run" != "n/a" ]] && [[ -n "$last_run" ]]; then last_run_human=$(date -d "@$((last_run / 1000000))" 2>/dev/null || echo "unknown") echo " Last health check: $last_run_human" fi if systemctl show ollama-health-check --property=Result --value | grep -q "success"; then print_success "Health checks passing" else result=$(systemctl show ollama-health-check --property=Result --value) print_warning "Health check result: $result" fi else print_warning "Health monitoring not enabled" fi } test_inference() { print_header "Inference Test" # Get first available model first_model=$(curl -s "$OLLAMA_URL/api/tags" 2>/dev/null | jq -r '.models[0].name // empty' 2>/dev/null) if [[ -n "$first_model" ]]; then echo " Testing with model: $first_model" start_time=$(date +%s.%N) response=$(curl -s -X POST "$OLLAMA_URL/api/generate" \ -H "Content-Type: application/json" \ -d "{\"model\": \"$first_model\", \"prompt\": \"Hello\", \"stream\": false}" \ 2>/dev/null | jq -r '.response // empty' 2>/dev/null) end_time=$(date +%s.%N) if [[ -n "$response" ]]; then duration=$(echo "$end_time - $start_time" | bc 2>/dev/null || echo "unknown") print_success "Inference test successful" echo " Response time: ${duration}s" echo " Response: ${response:0:100}${response:100:1:+...}" else print_error "Inference test failed" echo " Try: ollama run $first_model 'Hello'" fi else print_warning "No models available for testing" fi } show_recent_logs() { print_header "Recent Logs (last 10 lines)" echo "Service logs:" journalctl -u ollama --no-pager -n 5 --output=short-iso | sed 's/^/ /' if [[ -f "/var/log/ollama.log" ]]; then echo "Application logs:" tail -5 /var/log/ollama.log 2>/dev/null | sed 's/^/ /' || echo " No application logs found" fi } show_performance_stats() { print_header "Performance Statistics" # CPU usage (if available) if command -v top >/dev/null; then cpu_usage=$(top -b -n1 -p "$(pgrep ollama || echo 1)" 2>/dev/null | tail -1 | awk '{print $9}' || echo "unknown") echo " CPU usage: ${cpu_usage}%" fi # Memory usage details if [[ -f "/sys/fs/cgroup/system.slice/ollama.service/memory.current" ]]; then memory_current=$(cat /sys/fs/cgroup/system.slice/ollama.service/memory.current) memory_mb=$((memory_current / 1024 / 1024)) echo " Memory usage: ${memory_mb}MB" if [[ -f "/sys/fs/cgroup/system.slice/ollama.service/memory.max" ]]; then memory_max=$(cat /sys/fs/cgroup/system.slice/ollama.service/memory.max) if [[ "$memory_max" != "max" ]]; then memory_max_mb=$((memory_max / 1024 / 1024)) usage_percent=$(( (memory_current * 100) / memory_max )) echo " Memory limit: ${memory_max_mb}MB (${usage_percent}% used)" fi fi fi # Load average if [[ -f "/proc/loadavg" ]]; then load_avg=$(cat /proc/loadavg | cut -d' ' -f1-3) echo " System load: $load_avg" fi } # Main execution main() { echo -e "${BLUE}Ollama Service Monitor${NC}" echo "Timestamp: $(date)" echo "Host: ${OLLAMA_HOST}:${OLLAMA_PORT}" echo # Run all checks check_service_status || exit 1 echo check_api_connectivity || exit 1 echo check_models echo check_disk_space echo check_model_downloads echo check_health_monitoring echo check_performance_stats echo # Only run inference test if requested if [[ "${1:-}" == "--test-inference" ]]; then test_inference echo fi # Only show logs if requested if [[ "${1:-}" == "--show-logs" ]] || [[ "${2:-}" == "--show-logs" ]]; then show_recent_logs echo fi print_success "Monitoring complete" } # Help function show_help() { echo "Ollama Service Monitor" echo echo "Usage: $0 [OPTIONS]" echo echo "Options:" echo " --test-inference Run a simple inference test" echo " --show-logs Show recent service logs" echo " --help Show this help message" echo echo "Environment variables:" echo " OLLAMA_HOST Ollama host (default: 127.0.0.1)" echo " OLLAMA_PORT Ollama port (default: 11434)" echo echo "Examples:" echo " $0 # Basic monitoring" echo " $0 --test-inference # Include inference test" echo " $0 --show-logs # Include recent logs" echo " $0 --test-inference --show-logs # Full monitoring" } # Handle command line arguments case "${1:-}" in --help|-h) show_help exit 0 ;; *) main "$@" ;; esac