Implement comprehensive LLM provider system with global cost protection

- Add multi-provider LLM architecture supporting OpenRouter, OpenAI, Gemini, and custom providers
- Implement global LLM on/off switch with default DISABLED state for cost protection
- Add per-character LLM configuration with provider-specific models and settings
- Create performance-optimized caching system for LLM enabled status checks
- Add API key validation before enabling LLM providers to prevent broken configurations
- Implement audit logging for all LLM enable/disable actions for cost accountability
- Create comprehensive admin UI with prominent cost warnings and confirmation dialogs
- Add visual indicators in character list for custom AI model configurations
- Build character-specific LLM client system with global fallback mechanism
- Add database schema support for per-character LLM settings
- Implement graceful fallback responses when LLM is globally disabled
- Create provider testing and validation system for reliable connections
This commit is contained in:
root
2025-07-08 07:35:48 -07:00
parent 004f0325ec
commit 10563900a3
59 changed files with 6686 additions and 791 deletions

View File

@@ -20,7 +20,7 @@ from bot.discord_client import FishbowlBot
from bot.message_handler import MessageHandler, CommandHandler
from conversation.engine import ConversationEngine
from conversation.scheduler import ConversationScheduler
from llm.client import llm_client
from llm.multi_provider_client import multi_llm_client, initialize_llm_client
from rag.vector_store import vector_store_manager
from rag.community_knowledge import initialize_community_knowledge_rag
from rag.memory_sharing import MemorySharingManager
@@ -72,12 +72,21 @@ class FishbowlApplication:
await create_tables()
logger.info("Database initialized")
# Check LLM availability (non-blocking)
is_available = await llm_client.check_model_availability()
if not is_available:
logger.warning("LLM model not available at startup. Bot will continue and retry connections.")
# Initialize multi-provider LLM client
logger.info("Initializing multi-provider LLM system...")
await initialize_llm_client()
# Check provider health (non-blocking)
health_status = await multi_llm_client.health_check()
provider_info = multi_llm_client.get_provider_info()
healthy_providers = [name for name, healthy in health_status.items() if healthy]
if healthy_providers:
current_provider = multi_llm_client.get_current_provider()
logger.info(f"LLM providers available: {healthy_providers}")
logger.info(f"Current primary provider: {current_provider}")
else:
logger.info(f"LLM model '{llm_client.model}' is available")
logger.warning("No LLM providers are healthy! Bot will continue and retry connections.")
# Initialize RAG systems
logger.info("Initializing RAG systems...")
@@ -106,6 +115,10 @@ class FishbowlApplication:
# Initialize MCP servers
logger.info("Initializing MCP servers...")
# Initialize self-modification server
self.mcp_servers.append(mcp_server)
logger.info("Self-modification MCP server initialized")
# Initialize file system server
await filesystem_server.initialize(self.vector_store, character_names)
self.mcp_servers.append(filesystem_server)
@@ -248,20 +261,21 @@ class FishbowlApplication:
signal.signal(signal.SIGBREAK, signal_handler)
async def _llm_cleanup_loop(self):
"""Background task to clean up completed LLM requests"""
"""Background task to monitor LLM provider health"""
try:
while not self.shutdown_event.is_set():
await llm_client.cleanup_pending_requests()
pending_count = llm_client.get_pending_count()
# Check provider health periodically
health_status = await multi_llm_client.health_check()
unhealthy_providers = [name for name, healthy in health_status.items() if not healthy]
if pending_count > 0:
logger.debug(f"LLM cleanup: {pending_count} pending background requests")
if unhealthy_providers:
logger.debug(f"Unhealthy LLM providers: {unhealthy_providers}")
# Wait 30 seconds before next cleanup
await asyncio.sleep(30)
# Wait 60 seconds before next health check
await asyncio.sleep(60)
except asyncio.CancelledError:
logger.info("LLM cleanup task cancelled")
logger.info("LLM monitoring task cancelled")
except Exception as e:
logger.error(f"Error in LLM cleanup loop: {e}")