Implement comprehensive LLM provider system with global cost protection

- Add multi-provider LLM architecture supporting OpenRouter, OpenAI, Gemini, and custom providers - Implement global LLM on/off switch with default DISABLED state for cost protection - Add per-character LLM configuration with provider-specific models and settings - Create performance-optimized caching system for LLM enabled status checks - Add API key validation before enabling LLM providers to prevent broken configurations - Implement audit logging for all LLM enable/disable actions for cost accountability - Create comprehensive admin UI with prominent cost warnings and confirmation dialogs - Add visual indicators in character list for custom AI model configurations - Build character-specific LLM client system with global fallback mechanism - Add database schema support for per-character LLM settings - Implement graceful fallback responses when LLM is globally disabled - Create provider testing and validation system for reliable connections
2025-07-08 07:35:48 -07:00
parent 004f0325ec
commit 10563900a3
59 changed files with 6686 additions and 791 deletions
--- a/src/main.py
+++ b/src/main.py
@@ -20,7 +20,7 @@ from bot.discord_client import FishbowlBot
 from bot.message_handler import MessageHandler, CommandHandler
 from conversation.engine import ConversationEngine
 from conversation.scheduler import ConversationScheduler
-from llm.client import llm_client
+from llm.multi_provider_client import multi_llm_client, initialize_llm_client
 from rag.vector_store import vector_store_manager
 from rag.community_knowledge import initialize_community_knowledge_rag
 from rag.memory_sharing import MemorySharingManager
@@ -72,12 +72,21 @@ class FishbowlApplication:
            await create_tables()
            logger.info("Database initialized")
            
-            # Check LLM availability (non-blocking)
-            is_available = await llm_client.check_model_availability()
-            if not is_available:
-                logger.warning("LLM model not available at startup. Bot will continue and retry connections.")
+            # Initialize multi-provider LLM client
+            logger.info("Initializing multi-provider LLM system...")
+            await initialize_llm_client()
+            
+            # Check provider health (non-blocking)
+            health_status = await multi_llm_client.health_check()
+            provider_info = multi_llm_client.get_provider_info()
+            
+            healthy_providers = [name for name, healthy in health_status.items() if healthy]
+            if healthy_providers:
+                current_provider = multi_llm_client.get_current_provider()
+                logger.info(f"LLM providers available: {healthy_providers}")
+                logger.info(f"Current primary provider: {current_provider}")
            else:
-                logger.info(f"LLM model '{llm_client.model}' is available")
+                logger.warning("No LLM providers are healthy! Bot will continue and retry connections.")
            
            # Initialize RAG systems
            logger.info("Initializing RAG systems...")
@@ -106,6 +115,10 @@ class FishbowlApplication:
            # Initialize MCP servers
            logger.info("Initializing MCP servers...")
            
+            # Initialize self-modification server
+            self.mcp_servers.append(mcp_server)
+            logger.info("Self-modification MCP server initialized")
+            
            # Initialize file system server
            await filesystem_server.initialize(self.vector_store, character_names)
            self.mcp_servers.append(filesystem_server)
@@ -248,20 +261,21 @@ class FishbowlApplication:
            signal.signal(signal.SIGBREAK, signal_handler)
    
    async def _llm_cleanup_loop(self):
-        """Background task to clean up completed LLM requests"""
+        """Background task to monitor LLM provider health"""
        try:
            while not self.shutdown_event.is_set():
-                await llm_client.cleanup_pending_requests()
-                pending_count = llm_client.get_pending_count()
+                # Check provider health periodically
+                health_status = await multi_llm_client.health_check()
+                unhealthy_providers = [name for name, healthy in health_status.items() if not healthy]
                
-                if pending_count > 0:
-                    logger.debug(f"LLM cleanup: {pending_count} pending background requests")
+                if unhealthy_providers:
+                    logger.debug(f"Unhealthy LLM providers: {unhealthy_providers}")
                
-                # Wait 30 seconds before next cleanup
-                await asyncio.sleep(30)
+                # Wait 60 seconds before next health check
+                await asyncio.sleep(60)
                
        except asyncio.CancelledError:
-            logger.info("LLM cleanup task cancelled")
+            logger.info("LLM monitoring task cancelled")
        except Exception as e:
            logger.error(f"Error in LLM cleanup loop: {e}")