Fix comprehensive system issues and implement proper vector database backend selection
- Fix reflection memory spam despite zero active characters in scheduler.py - Add character enable/disable functionality to admin interface - Fix Docker configuration with proper network setup and service dependencies - Resolve admin interface JavaScript errors and login issues - Fix MCP import paths for updated package structure - Add comprehensive character management with audit logging - Implement proper character state management and persistence - Fix database connectivity and initialization issues - Add missing audit service for admin operations - Complete Docker stack integration with all required services 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -6,6 +6,7 @@ from typing import Dict, Any, Optional, List
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from utils.config import get_settings
|
||||
from utils.logging import log_llm_interaction, log_error_with_context, log_system_health
|
||||
from admin.services.audit_service import AuditService
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -17,7 +18,8 @@ class LLMClient:
|
||||
self.settings = get_settings()
|
||||
self.base_url = self.settings.llm.base_url
|
||||
self.model = self.settings.llm.model
|
||||
self.timeout = self.settings.llm.timeout
|
||||
# Force 5-minute timeout for self-hosted large models
|
||||
self.timeout = 300
|
||||
self.max_tokens = self.settings.llm.max_tokens
|
||||
self.temperature = self.settings.llm.temperature
|
||||
|
||||
@@ -31,8 +33,8 @@ class LLMClient:
|
||||
|
||||
# Background task queue for long-running requests
|
||||
self.pending_requests = {}
|
||||
self.max_timeout = 60 # Hard timeout limit for immediate responses
|
||||
self.fallback_timeout = 15 # Quick timeout for immediate responses
|
||||
self.max_timeout = 300 # 5 minutes for self-hosted large models
|
||||
self.fallback_timeout = 300 # 5 minutes for self-hosted large models
|
||||
|
||||
# Health monitoring
|
||||
self.health_stats = {
|
||||
@@ -77,6 +79,12 @@ class LLMClient:
|
||||
"stream": False
|
||||
}
|
||||
|
||||
# Debug logging
|
||||
logger.debug(f"LLM Request for {character_name}:")
|
||||
logger.debug(f"Model: {self.model}")
|
||||
logger.debug(f"Prompt (first 500 chars): {prompt[:500]}...")
|
||||
logger.debug(f"Full prompt length: {len(prompt)} chars")
|
||||
|
||||
response = await client.post(
|
||||
f"{self.base_url}/chat/completions",
|
||||
json=request_data,
|
||||
@@ -87,8 +95,10 @@ class LLMClient:
|
||||
|
||||
if 'choices' in result and result['choices'] and 'message' in result['choices'][0]:
|
||||
generated_text = result['choices'][0]['message']['content'].strip()
|
||||
logger.debug(f"LLM Response for {character_name}: {generated_text[:200]}...")
|
||||
else:
|
||||
generated_text = None
|
||||
logger.debug(f"LLM Response for {character_name}: Invalid response format")
|
||||
|
||||
except (httpx.HTTPStatusError, httpx.RequestError, KeyError):
|
||||
# Fallback to Ollama API
|
||||
@@ -136,6 +146,20 @@ class LLMClient:
|
||||
duration
|
||||
)
|
||||
|
||||
# AUDIT: Log performance metric
|
||||
await AuditService.log_performance_metric(
|
||||
metric_name="llm_response_time",
|
||||
metric_value=duration,
|
||||
metric_unit="seconds",
|
||||
component="llm_client",
|
||||
additional_data={
|
||||
"model": self.model,
|
||||
"character_name": character_name,
|
||||
"prompt_length": len(prompt),
|
||||
"response_length": len(generated_text)
|
||||
}
|
||||
)
|
||||
|
||||
return generated_text
|
||||
else:
|
||||
logger.error(f"No response from LLM: {result}")
|
||||
@@ -368,7 +392,41 @@ class LLMClient:
|
||||
)
|
||||
|
||||
def _get_fallback_response(self, character_name: str = None) -> str:
|
||||
"""Generate a fallback response when LLM is slow"""
|
||||
"""Generate a character-aware fallback response when LLM is slow"""
|
||||
if character_name:
|
||||
# Character-specific fallbacks based on their personalities
|
||||
character_fallbacks = {
|
||||
"Alex": [
|
||||
"*processing all the technical implications...*",
|
||||
"Let me analyze this from a different angle.",
|
||||
"That's fascinating - I need to think through the logic here.",
|
||||
"*running diagnostics on my thoughts...*"
|
||||
],
|
||||
"Sage": [
|
||||
"*contemplating the deeper meaning...*",
|
||||
"The philosophical implications are worth considering carefully.",
|
||||
"*reflecting on the nature of this question...*",
|
||||
"This touches on something profound - give me a moment."
|
||||
],
|
||||
"Luna": [
|
||||
"*feeling the creative energy flow...*",
|
||||
"Oh, this sparks so many artistic ideas! Let me gather my thoughts.",
|
||||
"*painting mental images of possibilities...*",
|
||||
"The beauty of this thought needs careful expression."
|
||||
],
|
||||
"Echo": [
|
||||
"*drifting between dimensions of thought...*",
|
||||
"The echoes of meaning reverberate... patience.",
|
||||
"*sensing the hidden patterns...*",
|
||||
"Reality shifts... understanding emerges slowly."
|
||||
]
|
||||
}
|
||||
|
||||
if character_name in character_fallbacks:
|
||||
import random
|
||||
return random.choice(character_fallbacks[character_name])
|
||||
|
||||
# Generic fallbacks
|
||||
fallback_responses = [
|
||||
"*thinking deeply about this...*",
|
||||
"*processing thoughts...*",
|
||||
|
||||
Reference in New Issue
Block a user