Implement comprehensive LLM provider system with global cost protection

- Add multi-provider LLM architecture supporting OpenRouter, OpenAI, Gemini, and custom providers
- Implement global LLM on/off switch with default DISABLED state for cost protection
- Add per-character LLM configuration with provider-specific models and settings
- Create performance-optimized caching system for LLM enabled status checks
- Add API key validation before enabling LLM providers to prevent broken configurations
- Implement audit logging for all LLM enable/disable actions for cost accountability
- Create comprehensive admin UI with prominent cost warnings and confirmation dialogs
- Add visual indicators in character list for custom AI model configurations
- Build character-specific LLM client system with global fallback mechanism
- Add database schema support for per-character LLM settings
- Implement graceful fallback responses when LLM is globally disabled
- Create provider testing and validation system for reliable connections
This commit is contained in:
root
2025-07-08 07:35:48 -07:00
parent 004f0325ec
commit 10563900a3
59 changed files with 6686 additions and 791 deletions

View File

@@ -0,0 +1,79 @@
# Example LLM Provider Configuration
# Copy this section to your main fishbowl_config.json under "llm" -> "providers"
llm:
# Legacy config (still supported for backwards compatibility)
base_url: "${LLM_BASE_URL:http://localhost:11434}"
model: "${LLM_MODEL:llama2}"
timeout: ${LLM_TIMEOUT:300}
max_tokens: ${LLM_MAX_TOKENS:2000}
temperature: ${LLM_TEMPERATURE:0.8}
# New multi-provider configuration
providers:
# OpenRouter (supports many models including Claude, GPT, Llama)
openrouter:
type: "openrouter"
enabled: ${OPENROUTER_ENABLED:false}
priority: 100 # Highest priority
config:
api_key: "${OPENROUTER_API_KEY:}"
base_url: "https://openrouter.ai/api/v1"
model: "${OPENROUTER_MODEL:anthropic/claude-3-sonnet}"
timeout: 300
max_tokens: 2000
temperature: 0.8
app_name: "discord-fishbowl"
# OpenAI
openai:
type: "openai"
enabled: ${OPENAI_ENABLED:false}
priority: 90
config:
api_key: "${OPENAI_API_KEY:}"
base_url: "https://api.openai.com/v1"
model: "${OPENAI_MODEL:gpt-4o-mini}"
timeout: 300
max_tokens: 2000
temperature: 0.8
# Google Gemini
gemini:
type: "gemini"
enabled: ${GEMINI_ENABLED:false}
priority: 80
config:
api_key: "${GEMINI_API_KEY:}"
base_url: "https://generativelanguage.googleapis.com/v1beta"
model: "${GEMINI_MODEL:gemini-1.5-flash}"
timeout: 300
max_tokens: 2000
temperature: 0.8
# Custom/Local (KoboldCPP, Ollama, etc.)
custom:
type: "custom"
enabled: ${CUSTOM_LLM_ENABLED:true}
priority: 70 # Lower priority - fallback
config:
base_url: "${LLM_BASE_URL:http://192.168.1.200:5005/v1}"
model: "${LLM_MODEL:koboldcpp/Broken-Tutu-24B-Transgression-v2.0.i1-Q4_K_M}"
api_key: "${LLM_API_KEY:x}"
timeout: 300
max_tokens: 2000
temperature: 0.8
api_format: "openai" # or "ollama"
# Ollama (local models)
ollama:
type: "custom"
enabled: ${OLLAMA_ENABLED:false}
priority: 60
config:
base_url: "http://localhost:11434"
model: "${OLLAMA_MODEL:llama3}"
timeout: 300
max_tokens: 2000
temperature: 0.8
api_format: "ollama"