diff --git a/dotfiles/README.md b/dotfiles/README.md index fa6f187..6309110 100644 --- a/dotfiles/README.md +++ b/dotfiles/README.md @@ -5,9 +5,7 @@ This directory contains per-user configurations and dotfiles for the Home-lab in ## Directory Organization ### `geir/` - Primary user configuration for geir: - - `user.nix` - NixOS user configuration (packages, groups, shell) - `dotfiles/` - Literate programming dotfiles using org-mode - `README.org` - Main literate configuration file @@ -16,9 +14,7 @@ Primary user configuration for geir: - `editors/` - Editor configurations (neovim, vscode) ### Future Users - Additional user directories will follow the same pattern: - - `admin/` - Administrative user for system management - `service/` - Service accounts for automation - `guest/` - Temporary/guest user configurations @@ -26,27 +22,21 @@ Additional user directories will follow the same pattern: ## User Configuration Philosophy ### NixOS Integration - Each user has a `user.nix` file that defines: - - User account settings (shell, groups, home directory) - User-specific packages - System-level user configurations - Integration with home lab services ### Literate Dotfiles - Each user's `dotfiles/README.org` serves as: - - Single source of truth for all user configurations - Self-documenting setup with rationale - Auto-tangling to generate actual dotfiles - Version-controlled configuration history ### Multi-Machine Consistency - User configurations are designed to work across machines: - - congenital-optimist: Full development environment - sleeper-service: Minimal server access - Future machines: Consistent user experience @@ -54,9 +44,7 @@ User configurations are designed to work across machines: ## Dotfiles Structure ### `dotfiles/README.org` - Main literate configuration file containing: - - Shell configuration (zsh, starship, aliases) - Editor configurations (emacs, neovim) - Development tool settings @@ -64,7 +52,6 @@ Main literate configuration file containing: - Machine-specific customizations ### Subdirectories - - `emacs/` - Generated Emacs configuration files - `shell/` - Generated shell configuration files - `editors/` - Generated editor configuration files @@ -72,7 +59,6 @@ Main literate configuration file containing: ## Usage Examples ### Importing User Configuration - ```nix # In machine configuration imports = [ @@ -81,14 +67,12 @@ imports = [ ``` ### Adding New User - 1. Create user directory: `users/newuser/` 2. Copy and adapt `user.nix` template 3. Create `dotfiles/README.org` with user-specific configs 4. Import in machine configurations as needed ### Tangling Dotfiles - ```bash # From user's dotfiles directory cd users/geir/dotfiles @@ -114,4 +98,4 @@ emacs --batch -l org --eval "(org-babel-tangle-file \"README.org\")" - **User Directories**: lowercase (e.g., `geir/`, `admin/`) - **Configuration Files**: descriptive names (e.g., `user.nix`, `README.org`) -- **Generated Files**: follow target application conventions +- **Generated Files**: follow target application conventions \ No newline at end of file diff --git a/modules/sound/disable-auto-rnnoise.nix b/modules/sound/disable-auto-rnnoise.nix deleted file mode 100644 index 1d908d3..0000000 --- a/modules/sound/disable-auto-rnnoise.nix +++ /dev/null @@ -1,25 +0,0 @@ -{ - config, - lib, - pkgs, - ... -}: { - # Optional configuration to disable automatic RNNoise filter - # This can be imported if the automatic filter causes distortion - - services.pipewire = { - extraConfig.pipewire."15-disable-auto-rnnoise" = { - "context.modules" = [ - # Commenting out the automatic RNNoise filter - # Users should use EasyEffects for manual noise suppression instead - # { - # name = "libpipewire-module-filter-chain"; - # args = { - # "node.description" = "Noise Canceling Source"; - # # ... rest of RNNoise config - # }; - # } - ]; - }; - }; -} diff --git a/modules/sound/pipewire.nix b/modules/sound/pipewire.nix index b72de34..ea50a81 100644 --- a/modules/sound/pipewire.nix +++ b/modules/sound/pipewire.nix @@ -24,8 +24,8 @@ "context.properties" = { "default.clock.rate" = 48000; "default.clock.quantum" = 1024; - "default.clock.min-quantum" = 64; - "default.clock.max-quantum" = 8192; + "default.clock.min-quantum" = 32; + "default.clock.max-quantum" = 2048; }; "context.modules" = [ @@ -40,10 +40,10 @@ type = "ladspa"; name = "rnnoise"; plugin = "${pkgs.rnnoise-plugin}/lib/ladspa/librnnoise_ladspa.so"; - label = "noise_suppressor_mono"; + label = "noise_suppressor_stereo"; control = { - "VAD Threshold (%)" = 95.0; - "VAD Grace Period (ms)" = 100; + "VAD Threshold (%)" = 50.0; + "VAD Grace Period (ms)" = 200; "Retroactive VAD Grace (ms)" = 0; }; } @@ -85,9 +85,6 @@ # Validation script (writeShellScriptBin "validate-audio" (builtins.readFile ./validate-audio.sh)) - # Troubleshoot script for voice distortion - (writeShellScriptBin "troubleshoot-voice-distortion" (builtins.readFile ./troubleshoot-voice-distortion.sh)) - # Optional: Professional audio tools # qjackctl # JACK control GUI (for JACK applications) # carla # Audio plugin host diff --git a/modules/sound/troubleshoot-voice-distortion.sh b/modules/sound/troubleshoot-voice-distortion.sh deleted file mode 100755 index ccdedf7..0000000 --- a/modules/sound/troubleshoot-voice-distortion.sh +++ /dev/null @@ -1,322 +0,0 @@ -#!/usr/bin/env bash - -# Voice Distortion Troubleshoot Script -# This script helps diagnose and fix voice distortion issues in PipeWire - -# Use safer error handling - don't exit on all errors -set -uo pipefail - -echo "π€ Voice Distortion Troubleshoot Tool" -echo "====================================" -echo "" - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -CYAN='\033[0;36m' -NC='\033[0m' # No Color - -success() { - echo -e "${GREEN}β $1${NC}" -} - -warning() { - echo -e "${YELLOW}β οΈ $1${NC}" -} - -error() { - echo -e "${RED}β $1${NC}" -} - -info() { - echo -e "${BLUE}βΉοΈ $1${NC}" -} - -highlight() { - echo -e "${CYAN}π§ $1${NC}" -} - -echo "Let's diagnose your voice distortion issue step by step..." -echo "" - -# 1. Check current audio settings -echo "1. Current Audio Configuration" -echo "==============================" - -if command -v wpctl >/dev/null 2>&1; then - echo "Default devices:" - wpctl status | head -20 - echo "" - - # Get default source - DEFAULT_SOURCE=$(wpctl inspect @DEFAULT_AUDIO_SOURCE@ 2>/dev/null | grep "node.name" | head -1 | sed 's/.*"\(.*\)".*/\1/' || echo "unknown") - info "Current default source: $DEFAULT_SOURCE" - - # Check sample rate - CURRENT_RATE=$(pw-metadata -n settings | grep "clock.rate" | awk '{print $3}' || echo "unknown") - info "Current sample rate: $CURRENT_RATE Hz" - - # Check buffer size - CURRENT_QUANTUM=$(pw-metadata -n settings | grep "clock.quantum" | awk '{print $3}' || echo "unknown") - info "Current buffer size: $CURRENT_QUANTUM samples" - -else - error "wpctl not available" -fi - -echo "" - -# 2. Check for common distortion causes -echo "2. Distortion Diagnosis" -echo "======================" - -# Check if using RNNoise filter -if command -v pw-dump >/dev/null 2>&1 && command -v jq >/dev/null 2>&1; then - if pw-dump 2>/dev/null | jq -r '.[] | select(.info.props."node.name" == "rnnoise_source")' 2>/dev/null | grep -q "rnnoise" 2>/dev/null; then - warning "You're using the RNNoise filter chain - this might be causing distortion" - echo " The automatic filter chain can sometimes cause artifacts" - else - info "Not using automatic RNNoise filter" - fi -else - warning "Cannot check RNNoise filter status (pw-dump or jq not available)" -fi - -# Check for high CPU usage -if command -v pw-top >/dev/null 2>&1; then - highlight "Checking PipeWire performance (5 seconds)..." - if timeout 5 pw-top --batch-mode 2>/dev/null | tail -10 2>/dev/null; then - info "Performance check completed" - else - warning "Could not check performance - pw-top failed" - fi -else - info "pw-top not available for performance checking" -fi - -# Check input levels -if command -v wpctl >/dev/null 2>&1; then - echo "" - echo "Current microphone volume levels:" - if wpctl get-volume @DEFAULT_AUDIO_SOURCE@ 2>/dev/null; then - info "Volume check completed" - else - warning "Could not get volume info - no default audio source?" - fi -else - warning "wpctl not available for volume checking" -fi - -echo "" - -# 3. Quick fixes -echo "3. Quick Fixes to Try" -echo "====================" -echo "" - -echo "Choose a solution to try:" -echo "" -echo "A) Disable automatic RNNoise filter (recommended first step)" -echo "B) Lower microphone input gain" -echo "C) Reduce buffer size for lower latency" -echo "D) Use EasyEffects instead of filter chain" -echo "E) Reset to safe audio settings" -echo "F) Test different sample rates" -echo "G) Monitor audio in real-time" -echo "H) All of the above (comprehensive fix)" -echo "" - -read -p "Enter your choice (A-H): " choice - -case $choice in - A|a) - echo "" - highlight "Disabling automatic RNNoise filter..." - if command -v pw-dump >/dev/null 2>&1 && command -v jq >/dev/null 2>&1 && command -v pw-cli >/dev/null 2>&1; then - # Find and remove RNNoise filter nodes - FILTER_IDS=$(pw-dump 2>/dev/null | jq -r '.[] | select(.info.props."node.name" == "rnnoise_source") | .id' 2>/dev/null || echo "") - if [ -n "$FILTER_IDS" ]; then - echo "$FILTER_IDS" | while read -r id; do - if [ -n "$id" ]; then - echo "Removing filter node $id" - pw-cli destroy "$id" 2>/dev/null || warning "Could not remove filter $id" - fi - done - success "RNNoise filter removal attempted" - else - info "No RNNoise filter found to remove" - fi - echo "Try speaking now. If distortion is gone, use EasyEffects for noise suppression instead." - else - warning "Required tools not available (pw-dump, jq, pw-cli)" - echo "Try manually: systemctl --user restart pipewire" - fi - ;; - - B|b) - echo "" - highlight "Lowering microphone input gain to 50%..." - wpctl set-volume @DEFAULT_AUDIO_SOURCE@ 50% - success "Microphone gain reduced to 50%" - echo "Test your voice now. Adjust further if needed with: wpctl set-volume @DEFAULT_AUDIO_SOURCE@ X%" - ;; - - C|c) - echo "" - highlight "Setting lower buffer size for reduced latency..." - pw-metadata -n settings 0 clock.force-quantum 512 - success "Buffer size set to 512 samples" - echo "This should reduce latency but may increase CPU usage" - ;; - - D|d) - echo "" - highlight "Launching EasyEffects for manual noise suppression..." - if command -v easyeffects >/dev/null 2>&1; then - easyeffects & - success "EasyEffects launched" - echo "" - echo "In EasyEffects:" - echo "1. Go to 'Input' tab" - echo "2. Add 'RNNoise' effect" - echo "3. Set 'VAD Threshold' to 95% (very conservative)" - echo "4. Set 'Wet' signal to 50-70% (not 100%)" - echo "5. Disable any other aggressive processing" - else - error "EasyEffects not available" - fi - ;; - - E|e) - echo "" - highlight "Resetting to safe audio settings..." - # Reset quantum - pw-metadata -n settings 0 clock.force-quantum 0 - # Reset rate - pw-metadata -n settings 0 clock.force-rate 0 - # Set reasonable volume - wpctl set-volume @DEFAULT_AUDIO_SOURCE@ 70% - # Restart audio services - systemctl --user restart pipewire pipewire-pulse wireplumber - success "Audio settings reset to defaults" - echo "Wait 5 seconds for services to restart, then test your voice" - ;; - - F|f) - echo "" - highlight "Testing different sample rates..." - echo "Current rate: $(pw-metadata -n settings | grep clock.rate | awk '{print $3}' || echo 'default')" - echo "" - echo "Trying 44100 Hz..." - pw-metadata -n settings 0 clock.force-rate 44100 - sleep 2 - echo "Test your voice now. Press Enter to continue..." - read - echo "Trying 48000 Hz..." - pw-metadata -n settings 0 clock.force-rate 48000 - sleep 2 - echo "Test your voice now. Press Enter to continue..." - read - echo "Back to automatic rate..." - pw-metadata -n settings 0 clock.force-rate 0 - success "Rate testing complete" - ;; - - G|g) - echo "" - highlight "Starting real-time audio monitoring..." - echo "Press Ctrl+C to stop monitoring" - echo "" - if command -v pw-top >/dev/null 2>&1; then - pw-top - else - echo "Monitoring with wpctl status (updating every 2 seconds):" - while true; do - clear - echo "=== PipeWire Status ===" - wpctl status - echo "" - echo "=== Microphone Volume ===" - wpctl get-volume @DEFAULT_AUDIO_SOURCE@ - echo "" - echo "Press Ctrl+C to stop" - sleep 2 - done - fi - ;; - - H|h) - echo "" - highlight "Running comprehensive fix..." - - # Step 1: Disable RNNoise filter - echo "1/6: Disabling automatic RNNoise filter..." - if command -v pw-dump >/dev/null 2>&1 && command -v jq >/dev/null 2>&1; then - FILTER_IDS=$(pw-dump 2>/dev/null | jq -r '.[] | select(.info.props."node.name" == "rnnoise_source") | .id' 2>/dev/null || echo "") - if [ -n "$FILTER_IDS" ]; then - echo "$FILTER_IDS" | while read -r id; do - if [ -n "$id" ]; then - pw-cli destroy "$id" 2>/dev/null || true - fi - done - fi - fi - - # Step 2: Reset audio settings - echo "2/6: Resetting audio settings..." - pw-metadata -n settings 0 clock.force-quantum 0 2>/dev/null || true - pw-metadata -n settings 0 clock.force-rate 0 2>/dev/null || true - - # Step 3: Set conservative volume - echo "3/6: Setting conservative microphone gain..." - wpctl set-volume @DEFAULT_AUDIO_SOURCE@ 60% 2>/dev/null || warning "Could not set volume" - - # Step 4: Restart services - echo "4/6: Restarting audio services..." - systemctl --user restart pipewire pipewire-pulse wireplumber 2>/dev/null || warning "Could not restart services" - - # Step 5: Wait for restart - echo "5/6: Waiting for services to stabilize..." - sleep 5 - - # Step 6: Launch EasyEffects - echo "6/6: Launching EasyEffects for manual control..." - if command -v easyeffects >/dev/null 2>&1; then - easyeffects & - success "Comprehensive fix applied!" - echo "" - echo "Next steps:" - echo "1. Test your voice without any effects first" - echo "2. In EasyEffects, gradually add noise suppression:" - echo " - Start with RNNoise at 50% wet signal" - echo " - Use VAD threshold of 95% or higher" - echo " - Avoid aggressive compression or EQ" - echo "3. If still distorted, try lowering input gain further" - else - warning "EasyEffects not available for manual control" - fi - ;; - - *) - error "Invalid choice" - ;; -esac - -echo "" -echo "π― Additional Tips to Prevent Distortion:" -echo "=========================================" -echo "" -echo "β’ Keep microphone gain below 80% to avoid clipping" -echo "β’ Use RNNoise conservatively (50-70% wet signal, not 100%)" -echo "β’ Check for background applications using audio" -echo "β’ Ensure your microphone hardware supports 48kHz" -echo "β’ Consider using a better quality microphone" -echo "β’ Avoid stacking multiple noise reduction effects" -echo "" - -echo "Run this script again anytime with: troubleshoot-voice-distortion" -echo "" -echo "β Script completed successfully!" -exit 0 diff --git a/research/netdata-home-lab-research.md b/research/netdata-home-lab-research.md deleted file mode 100644 index 2830f61..0000000 --- a/research/netdata-home-lab-research.md +++ /dev/null @@ -1,607 +0,0 @@ -# Netdata Research: Metrics Aggregation for Home Lab - -*Research conducted June 19, 2025* - -## Executive Summary - -Netdata is a highly viable metrics aggregation solution for your home lab infrastructure. It offers real-time monitoring with per-second granularity, minimal resource usage, and excellent scalability through its Parent-Child architecture. The recent addition of a beta MCP (Model Context Protocol) server makes it particularly interesting for integration with AI tooling and your existing workflow. - -## Key Advantages for Home Lab Use - -### 1. **Real-Time Monitoring Excellence** - -- **Per-second metrics collection** - True real-time visibility -- **1-second dashboard latency** - Instant feedback for troubleshooting -- **Zero sampling** - Complete data fidelity -- **800+ integrations** out of the box - -### 2. **Resource Efficiency** - -- **Most energy-efficient monitoring tool** according to University of Amsterdam study -- **40x better storage efficiency** compared to traditional solutions -- **22x faster responses** than alternatives -- **Uses only 15% of resources** compared to similar tools - -### 3. **Perfect Home Lab Architecture** - -- **Zero-configuration deployment** - Auto-discovers services -- **Distributed by design** - No centralized data collection required -- **Edge-based ML** - Anomaly detection runs locally on each node -- **Parent-Child streaming** - Centralize dashboards while keeping data local - -### 4. **Advanced Features** - -- **Built-in ML anomaly detection** - One model per metric, trained locally -- **Pre-configured alerts** - 400+ ready-to-use alert templates -- **Multiple notification channels** - Slack, Discord, email, PagerDuty, etc. -- **Export capabilities** - Prometheus, InfluxDB, Graphite integration - -## Architecture Options for Home Lab - -### Option 1: Standalone Deployment (Simple) - -``` -βββββββββββββββββββ βββββββββββββββββββ βββββββββββββββββββ -β Machine 1 β β Machine 2 β β Machine N β -β (Netdata β β (Netdata β β (Netdata β -β Agent) β β Agent) β β Agent) β -βββββββββββββββββββ βββββββββββββββββββ βββββββββββββββββββ - β β β - βββββββββββββββββββββββΌββββββββββββββββββββββ - β - βββββββββββββββββββ - β Netdata Cloud β - β (Optional) β - βββββββββββββββββββ -``` - -**Benefits:** - -- Simple setup and maintenance -- Each node retains its own data -- No single point of failure -- Perfect for learning and small deployments - -### Option 2: Parent-Child Architecture (Recommended) - -``` - βββββββββββββββββββ - β Netdata Parent β - β (Central Hub) β - β - Dashboards β - β - Long retentionβ - β - Alerts β - βββββββββββββββββββ - β - ββββββββββββββββΌβββββββββββββββ - β β β - βββββββββββββββββββ βββββββββββββββββββ βββββββββββββββββββ - β Netdata Child β β Netdata Child β β Netdata Child β - β (NixOS VMs) β β (Containers) β β (IoT devices) β - β - Thin mode β β - Thin mode β β - Thin mode β - β - Local buffer β β - Local buffer β β - Local buffer β - βββββββββββββββββββ βββββββββββββββββββ βββββββββββββββββββ -``` - -**Benefits:** - -- Centralized dashboards and alerting -- Extended retention on Parent node -- Reduced resource usage on Child nodes -- Better for production-like home lab setups - -### Option 3: High Availability Cluster (Advanced) - -``` - βββββββββββββββββββ βββββββββββββββββββ - β Netdata Parent 1ββββββΊβ Netdata Parent 2β - β (Primary) β β (Backup) β - βββββββββββββββββββ βββββββββββββββββββ - β β - ββββββββββΌββββββββββββββββββββββββΌβββββββββ - β β β β -βββββββββββ βββββββββββ βββββββββββ βββββββββββ -βChild 1 β βChild 2 β βChild 3 β βChild N β -βββββββββββ βββββββββββ βββββββββββ βββββββββββ -``` - -**Benefits:** - -- No single point of failure -- Automatic failover -- Load distribution -- Production-grade reliability - -## Integration with Your NixOS Infrastructure - -### NixOS Configuration - -```nix -# In your NixOS configuration.nix -{ - services.netdata = { - enable = true; - config = { - global = { - "default port" = "19999"; - "memory mode" = "ram"; # For children - # "memory mode" = "save"; # For parents - }; - - # For Parent nodes - streaming = { - enabled = "yes"; - "allow from" = "*"; - "default memory mode" = "ram"; - }; - - # For Child nodes - stream = { - enabled = "yes"; - destination = "parent.yourdomain.local"; - "api key" = "your-api-key"; - }; - }; - }; - - # Open firewall for Netdata - networking.firewall.allowedTCPPorts = [ 19999 ]; -} -``` - -### Deployment Strategy for Your Lab - -1. **Reverse Proxy** (grey-area): Netdata Parent + Nginx reverse proxy -2. **Sleeper Service** (NFS): Netdata Child with storage monitoring -3. **Congenital Optimist**: Netdata Child with system monitoring -4. **VM workloads**: Netdata Children in thin mode - -## MCP Server Integration (Beta Feature) - -Netdata recently introduced an **MCP (Model Context Protocol) server in beta**. This is particularly relevant for your AI-integrated workflow: - -### What It Offers - -- **AI-powered metric analysis** through standardized MCP interface -- **Integration with Claude, ChatGPT, and other LLMs** for intelligent monitoring -- **Natural language queries** about your infrastructure metrics -- **Automated root cause analysis** using AI reasoning -- **Contextual alerting** with AI-generated insights - -### Potential Use Cases - -```bash -# Example MCP interactions (conceptual) -"What's causing high CPU on sleeper-service?" -"Show me network anomalies from the last hour" -"Compare current metrics to last week's baseline" -"Generate a performance report for grey-area" -``` - -### Integration with Your Existing MCP Setup - -Since you're already using MCP servers (TaskMaster, Context7), adding Netdata's MCP server would create a powerful monitoring-AI pipeline: - -``` -Your Infrastructure β Netdata β MCP Server β AI Analysis β Insights -``` - -## Comparison with Alternatives - -### vs. Prometheus + Grafana - -| Feature | Netdata | Prometheus + Grafana | -|---------|---------|---------------------| -| Setup Complexity | Zero-config | Complex setup | -| Real-time Data | 1-second | 15-second minimum | -| Resource Usage | Very low | Higher | -| Built-in ML | Yes | No | -| Dashboards | Auto-generated | Manual creation | -| Storage Efficiency | 40x better | Standard | - -### vs. Zabbix - -| Feature | Netdata | Zabbix | -|---------|---------|---------| -| Agent Overhead | Minimal | Higher | -| Configuration | Auto-discovery | Manual setup | -| Scalability | Horizontal | Vertical | -| Modern UI | Yes | Traditional | -| Cloud Integration | Native | Limited | - -### vs. DataDog/Commercial SaaS - -| Feature | Netdata | Commercial SaaS | -|---------|---------|-----------------| -| Cost | Open Source | Expensive | -| Data Sovereignty | Local | Vendor-hosted | -| Customization | Full control | Limited | -| Lock-in Risk | None | High | - -## Implementation Roadmap - -### Phase 1: Basic Deployment (Week 1) - -1. Deploy Netdata Parent on **grey-area** -2. Install Netdata Children on main nodes -3. Configure basic streaming -4. Set up reverse proxy for external access - -### Phase 2: Integration (Week 2-3) - -1. Configure alerts and notifications -2. Set up Prometheus export for existing tools -3. Integrate with your existing monitoring stack -4. Configure retention policies - -### Phase 3: Advanced Features (Week 4+) - -1. Enable MCP server (beta) -2. Set up high availability if needed -3. Custom dashboard creation -4. Advanced alert tuning - -## Potential Challenges - -### 1. **Learning Curve** - -- New terminology (Parent/Child vs traditional) -- Different approach to metrics storage -- **Mitigation**: Excellent documentation and active community - -### 2. **Beta MCP Server** - -- Still in beta development -- Limited documentation -- **Mitigation**: Conservative adoption, wait for stability - -### 3. **Integration Complexity** - -- May need adaptation of existing monitoring workflows -- **Mitigation**: Gradual migration, parallel running during transition - -## Resource Requirements - -### Minimal Setup (Per Node) - -- **CPU**: 1-2% of a single core -- **RAM**: 20-100MB depending on metrics count -- **Disk**: 100MB for agent + retention data -- **Network**: Minimal bandwidth for streaming - -### Parent Node (Centralized) - -- **CPU**: 2-4 cores for 10-20 children -- **RAM**: 2-4GB for extended retention -- **Disk**: 10-50GB depending on retention period -- **Network**: Higher bandwidth for ingesting streams - -## Recommendations - -### For Your Home Lab: **Strong Yes** - -1. **Start with Parent-Child architecture** on grey-area as Parent -2. **Deploy gradually** - begin with critical nodes -3. **Integrate with existing Prometheus** via export -4. **Monitor MCP server development** for AI integration -5. **Consider as primary monitoring solution** due to superior efficiency - -### Specific Benefits for Your Use Case - -- **Perfect fit for NixOS** - declarative configuration -- **Complements your AI workflow** - MCP integration potential -- **Scales with lab growth** - from single nodes to complex topologies -- **Energy efficient** - important for home lab power consumption -- **Real-time visibility** - excellent for development and testing - -## Next Steps - -1. **Proof of Concept**: Deploy on grey-area as standalone -2. **Evaluate**: Run for 1-2 weeks alongside current monitoring -3. **Expand**: Add children nodes if satisfied -4. **Integrate**: Connect with existing toolchain -5. **MCP Beta**: Request early access to MCP server - -## Conclusion - -Netdata represents a modern, efficient approach to infrastructure monitoring that aligns well with your home lab's goals. Its combination of real-time capabilities, minimal resource usage, and emerging AI integration through MCP makes it an excellent choice for sophisticated home lab environments. The Parent-Child architecture provides enterprise-grade capabilities while maintaining the simplicity needed for home lab management. - -The addition of MCP server support positions Netdata at the forefront of AI-integrated monitoring, making it particularly appealing given your existing investment in MCP-based tooling. - -## References - -- [Netdata GitHub Repository](https://github.com/netdata/netdata) -- [Netdata Documentation](https://learn.netdata.cloud/) -- [University of Amsterdam Energy Efficiency Study](https://www.ivanomalavolta.com/files/papers/ICSOC_2023.pdf) -- [Netdata vs Prometheus Comparison](https://www.netdata.cloud/blog/netdata-vs-prometheus-2025/) -- [Netdata MCP Server Documentation](https://github.com/netdata/netdata/blob/master/docs/mcp.md) (Beta) - -## Netdata API for Custom Web Dashboards - -Netdata provides a comprehensive REST API that makes it perfect for integrating with custom web dashboards. The API is exposed locally on each Netdata agent and can be used to fetch real-time metrics in various formats. - -### API Overview - -**Base URL**: `http://localhost:19999/api/v1/` - -**Primary Endpoints**: -- `/api/v1/data` - Query time-series data -- `/api/v1/charts` - Get available charts -- `/api/v1/allmetrics` - Get all metrics in shell-friendly format -- `/api/v1/badge.svg` - Generate SVG badges - -### Key API Features for Dashboard Integration - -1. **Multiple Output Formats** - - JSON (default) - - CSV - - TSV - - JSONP - - Plain text - - Shell variables - -2. **Real-Time Data Access** - - Per-second granularity - - Live streaming capabilities - - Historical data queries - -3. **Flexible Query Parameters** - - Time range selection - - Data grouping and aggregation - - Dimension filtering - - Custom sampling intervals - -### API Query Examples - -#### Basic Data Query -```bash -# Get CPU system data for the last 60 seconds -curl "http://localhost:19999/api/v1/data?chart=system.cpu&after=-60&dimensions=system" - -# Response format: -{ - "api": 1, - "id": "system.cpu", - "name": "system.cpu", - "update_every": 1, - "first_entry": 1640995200, - "last_entry": 1640995260, - "before": 1640995260, - "after": 1640995200, - "dimension_names": ["guest_nice", "guest", "steal", "softirq", "irq", "system", "user", "nice", "iowait"], - "dimension_ids": ["guest_nice", "guest", "steal", "softirq", "irq", "system", "user", "nice", "iowait"], - "latest_values": [0, 0, 0, 0.502513, 0, 2.512563, 5.025126, 0, 0.502513], - "view_update_every": 1, - "dimensions": 9, - "points": 61, - "format": "json", - "result": { - "data": [ - [1640995201, 0, 0, 0, 0.0025, 0, 0.0125, 0.025, 0, 0.0025], - [1640995202, 0, 0, 0, 0.005, 0, 0.0275, 0.0525, 0, 0.005] - // ... more data points - ] - } -} -``` - -#### Available Charts Discovery -```bash -# Get all available charts -curl "http://localhost:19999/api/v1/charts" - -# Returns JSON with all chart definitions including: -# - Chart IDs and names -# - Available dimensions -# - Update frequencies -# - Chart types and units -``` - -#### Memory Usage Example -```bash -# Get memory usage data with specific grouping -curl "http://localhost:19999/api/v1/data?chart=system.ram&after=-300&points=60&group=average" -``` - -#### Network Interface Metrics -```bash -# Get network traffic for specific interface -curl "http://localhost:19999/api/v1/data?chart=net.eth0&after=-60&dimensions=received,sent" -``` - -#### All Metrics in Shell Format -```bash -# Perfect for scripting and automation -curl "http://localhost:19999/api/v1/allmetrics" - -# Example output: -NETDATA_SYSTEM_CPU_USER=2.5 -NETDATA_SYSTEM_CPU_SYSTEM=1.2 -NETDATA_SYSTEM_RAM_USED=4096 -# ... all metrics as shell variables -``` - -### Advanced Query Parameters - -| Parameter | Description | Example | -|-----------|-------------|---------| -| `chart` | Chart ID to query | `system.cpu` | -| `after` | Start time (unix timestamp or relative) | `-60` (60 seconds ago) | -| `before` | End time (unix timestamp or relative) | `-30` (30 seconds ago) | -| `points` | Number of data points to return | `100` | -| `group` | Grouping method | `average`, `max`, `min`, `sum` | -| `gtime` | Group time in seconds | `60` (1-minute averages) | -| `dimensions` | Specific dimensions to include | `user,system,iowait` | -| `format` | Output format | `json`, `csv`, `jsonp` | -| `options` | Query options | `unaligned`, `percentage` | - -### Web Dashboard Integration Strategies - -#### 1. Direct AJAX Calls -```javascript -// Fetch CPU data for dashboard widget -fetch('http://localhost:19999/api/v1/data?chart=system.cpu&after=-60&points=60') - .then(response => response.json()) - .then(data => { - // Process data for chart library (Chart.js, D3, etc.) - updateCPUChart(data.result.data); - }); -``` - -#### 2. Server-Side Proxy -```javascript -// Proxy through your web server to avoid CORS issues -fetch('/api/netdata/system.cpu?after=-60') - .then(response => response.json()) - .then(data => updateWidget(data)); -``` - -#### 3. Real-Time Updates -```javascript -// Poll for updates every second -setInterval(() => { - fetch('http://localhost:19999/api/v1/data?chart=system.cpu&after=-1&points=1') - .then(response => response.json()) - .then(data => updateRealTimeMetrics(data)); -}, 1000); -``` - -### Custom Dashboard Implementation Example - -```html - - -
-