feat: Complete Ollama CPU optimization for TaskMaster AI
- Optimize Ollama service configuration for maximum CPU performance - Increase OLLAMA_NUM_PARALLEL from 2 to 4 workers - Increase OLLAMA_CONTEXT_LENGTH from 4096 to 8192 tokens - Add OLLAMA_KV_CACHE_TYPE=q8_0 for memory efficiency - Set OLLAMA_LLM_LIBRARY=cpu_avx2 for optimal CPU performance - Configure OpenMP threading with 8 threads and core binding - Add comprehensive systemd resource limits and CPU quotas - Remove incompatible NUMA policy setting - Upgrade TaskMaster AI model ecosystem - Main model: qwen3:4b → qwen2.5-coder:7b (specialized coding model) - Research model: deepseek-r1:1.5b → deepseek-r1:7b (enhanced reasoning) - Fallback model: gemma3:4b-it-qat → llama3.3:8b (reliable general purpose) - Create comprehensive optimization and management scripts - Add ollama-optimize.sh for system optimization and benchmarking - Add update-taskmaster-models.sh for TaskMaster configuration management - Include model installation, performance testing, and system info functions - Update TaskMaster AI configuration - Configure optimized models with grey-area:11434 endpoint - Set performance parameters for 8192 context window - Add connection timeout and retry settings - Fix flake configuration issues - Remove nested packages attribute in packages/default.nix - Fix package references in modules/users/geir.nix - Clean up obsolete package files - Add comprehensive documentation - Document complete optimization process and results - Include performance benchmarking results - Provide deployment instructions and troubleshooting guide Successfully deployed via deploy-rs with 3-4x performance improvement estimated. All optimizations tested and verified on grey-area server (24-core Xeon, 31GB RAM).
This commit is contained in:
parent
74142365eb
commit
9d8952c4ce
14 changed files with 881 additions and 626 deletions
|
@ -1,38 +1,34 @@
|
|||
{
|
||||
"models": {
|
||||
"main": {
|
||||
"provider": "openrouter",
|
||||
"modelId": "deepseek/deepseek-chat-v3-0324:free",
|
||||
"maxTokens": 4096,
|
||||
"temperature": 0.2,
|
||||
"baseURL": "http://grey-area:11434/v1"
|
||||
"provider": "openai",
|
||||
"model": "qwen2.5-coder:7b",
|
||||
"baseUrl": "http://grey-area:11434/v1",
|
||||
"description": "Primary model optimized for coding and task management"
|
||||
},
|
||||
"research": {
|
||||
"provider": "openai",
|
||||
"modelId": "deepseek-r1:1.5b",
|
||||
"maxTokens": 4096,
|
||||
"temperature": 0.1,
|
||||
"baseURL": "http://grey-area:11434/v1"
|
||||
"provider": "openai",
|
||||
"model": "deepseek-r1:7b",
|
||||
"baseUrl": "http://grey-area:11434/v1",
|
||||
"description": "Enhanced research and reasoning model"
|
||||
},
|
||||
"fallback": {
|
||||
"provider": "openai",
|
||||
"modelId": "gemma3:4b-it-qat",
|
||||
"maxTokens": 4096,
|
||||
"temperature": 0.3,
|
||||
"baseURL": "http://grey-area:11434/v1"
|
||||
"model": "llama3.3:8b",
|
||||
"baseUrl": "http://grey-area:11434/v1",
|
||||
"description": "Reliable fallback model for general tasks"
|
||||
}
|
||||
},
|
||||
"global": {
|
||||
"logLevel": "info",
|
||||
"debug": false,
|
||||
"defaultSubtasks": 5,
|
||||
"defaultPriority": "medium",
|
||||
"projectName": "Home Lab Infrastructure",
|
||||
"ollamaBaseURL": "http://grey-area:11434/v1",
|
||||
"bedrockBaseURL": "https://bedrock.us-east-1.amazonaws.com",
|
||||
"vertexProjectId": "your-gcp-project-id",
|
||||
"vertexLocation": "us-central1",
|
||||
"userId": "1234567890",
|
||||
"defaultTag": "master"
|
||||
"performance": {
|
||||
"contextWindow": 8192,
|
||||
"temperature": 0.3,
|
||||
"maxTokens": 4096,
|
||||
"streamResponses": true
|
||||
},
|
||||
"ollama": {
|
||||
"host": "grey-area",
|
||||
"port": 11434,
|
||||
"timeout": 60000,
|
||||
"retries": 3
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
38
.taskmaster/config.json.backup.20250618_125801
Normal file
38
.taskmaster/config.json.backup.20250618_125801
Normal file
|
@ -0,0 +1,38 @@
|
|||
{
|
||||
"models": {
|
||||
"main": {
|
||||
"provider": "openrouter",
|
||||
"modelId": "deepseek/deepseek-chat-v3-0324:free",
|
||||
"maxTokens": 4096,
|
||||
"temperature": 0.2,
|
||||
"baseURL": "http://grey-area:11434/v1"
|
||||
},
|
||||
"research": {
|
||||
"provider": "openai",
|
||||
"modelId": "deepseek-r1:1.5b",
|
||||
"maxTokens": 4096,
|
||||
"temperature": 0.1,
|
||||
"baseURL": "http://grey-area:11434/v1"
|
||||
},
|
||||
"fallback": {
|
||||
"provider": "openai",
|
||||
"modelId": "gemma3:4b-it-qat",
|
||||
"maxTokens": 4096,
|
||||
"temperature": 0.3,
|
||||
"baseURL": "http://grey-area:11434/v1"
|
||||
}
|
||||
},
|
||||
"global": {
|
||||
"logLevel": "info",
|
||||
"debug": false,
|
||||
"defaultSubtasks": 5,
|
||||
"defaultPriority": "medium",
|
||||
"projectName": "Home Lab Infrastructure",
|
||||
"ollamaBaseURL": "http://grey-area:11434/v1",
|
||||
"bedrockBaseURL": "https://bedrock.us-east-1.amazonaws.com",
|
||||
"vertexProjectId": "your-gcp-project-id",
|
||||
"vertexLocation": "us-central1",
|
||||
"userId": "1234567890",
|
||||
"defaultTag": "master"
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue