Fix comprehensive system issues and implement proper vector database backend selection

- Fix reflection memory spam despite zero active characters in scheduler.py - Add character enable/disable functionality to admin interface - Fix Docker configuration with proper network setup and service dependencies - Resolve admin interface JavaScript errors and login issues - Fix MCP import paths for updated package structure - Add comprehensive character management with audit logging - Implement proper character state management and persistence - Fix database connectivity and initialization issues - Add missing audit service for admin operations - Complete Docker stack integration with all required services 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-06 19:54:49 -07:00
parent 5480219901
commit 004f0325ec
37 changed files with 6037 additions and 185 deletions
--- a/src/llm/client.py
+++ b/src/llm/client.py
@@ -6,6 +6,7 @@ from typing import Dict, Any, Optional, List
 from datetime import datetime, timedelta, timezone
 from utils.config import get_settings
 from utils.logging import log_llm_interaction, log_error_with_context, log_system_health
+from admin.services.audit_service import AuditService
 import logging

 logger = logging.getLogger(__name__)
@@ -17,7 +18,8 @@ class LLMClient:
        self.settings = get_settings()
        self.base_url = self.settings.llm.base_url
        self.model = self.settings.llm.model
-        self.timeout = self.settings.llm.timeout
+        # Force 5-minute timeout for self-hosted large models
+        self.timeout = 300
        self.max_tokens = self.settings.llm.max_tokens
        self.temperature = self.settings.llm.temperature
        
@@ -31,8 +33,8 @@ class LLMClient:
        
        # Background task queue for long-running requests
        self.pending_requests = {}
-        self.max_timeout = 60  # Hard timeout limit for immediate responses
-        self.fallback_timeout = 15  # Quick timeout for immediate responses
+        self.max_timeout = 300  # 5 minutes for self-hosted large models
+        self.fallback_timeout = 300  # 5 minutes for self-hosted large models
        
        # Health monitoring
        self.health_stats = {
@@ -77,6 +79,12 @@ class LLMClient:
                        "stream": False
                    }
                    
+                    # Debug logging
+                    logger.debug(f"LLM Request for {character_name}:")
+                    logger.debug(f"Model: {self.model}")
+                    logger.debug(f"Prompt (first 500 chars): {prompt[:500]}...")
+                    logger.debug(f"Full prompt length: {len(prompt)} chars")
+                    
                    response = await client.post(
                        f"{self.base_url}/chat/completions",
                        json=request_data,
@@ -87,8 +95,10 @@ class LLMClient:
                    
                    if 'choices' in result and result['choices'] and 'message' in result['choices'][0]:
                        generated_text = result['choices'][0]['message']['content'].strip()
+                        logger.debug(f"LLM Response for {character_name}: {generated_text[:200]}...")
                    else:
                        generated_text = None
+                        logger.debug(f"LLM Response for {character_name}: Invalid response format")
                        
                except (httpx.HTTPStatusError, httpx.RequestError, KeyError):
                    # Fallback to Ollama API
@@ -136,6 +146,20 @@ class LLMClient:
                        duration
                    )
                    
+                    # AUDIT: Log performance metric
+                    await AuditService.log_performance_metric(
+                        metric_name="llm_response_time",
+                        metric_value=duration,
+                        metric_unit="seconds",
+                        component="llm_client",
+                        additional_data={
+                            "model": self.model,
+                            "character_name": character_name,
+                            "prompt_length": len(prompt),
+                            "response_length": len(generated_text)
+                        }
+                    )
+                    
                    return generated_text
                else:
                    logger.error(f"No response from LLM: {result}")
@@ -368,7 +392,41 @@ class LLMClient:
        )
    
    def _get_fallback_response(self, character_name: str = None) -> str:
-        """Generate a fallback response when LLM is slow"""
+        """Generate a character-aware fallback response when LLM is slow"""
+        if character_name:
+            # Character-specific fallbacks based on their personalities
+            character_fallbacks = {
+                "Alex": [
+                    "*processing all the technical implications...*",
+                    "Let me analyze this from a different angle.",
+                    "That's fascinating - I need to think through the logic here.",
+                    "*running diagnostics on my thoughts...*"
+                ],
+                "Sage": [
+                    "*contemplating the deeper meaning...*",
+                    "The philosophical implications are worth considering carefully.",
+                    "*reflecting on the nature of this question...*",
+                    "This touches on something profound - give me a moment."
+                ],
+                "Luna": [
+                    "*feeling the creative energy flow...*",
+                    "Oh, this sparks so many artistic ideas! Let me gather my thoughts.",
+                    "*painting mental images of possibilities...*",
+                    "The beauty of this thought needs careful expression."
+                ],
+                "Echo": [
+                    "*drifting between dimensions of thought...*",
+                    "The echoes of meaning reverberate... patience.",
+                    "*sensing the hidden patterns...*",
+                    "Reality shifts... understanding emerges slowly."
+                ]
+            }
+            
+            if character_name in character_fallbacks:
+                import random
+                return random.choice(character_fallbacks[character_name])
+        
+        # Generic fallbacks
        fallback_responses = [
            "*thinking deeply about this...*",
            "*processing thoughts...*",