Fix comprehensive system issues and implement proper vector database backend selection

- Fix remaining datetime timezone errors across all database operations
- Implement dynamic vector database backend (Qdrant/ChromaDB) based on install.py configuration
- Add LLM timeout handling with immediate fallback responses for slow self-hosted models
- Use proper install.py configuration (2000 max tokens, 5min timeout, correct LLM endpoint)
- Fix PostgreSQL schema to use timezone-aware columns throughout
- Implement async LLM request handling with background processing
- Add configurable prompt limits and conversation history controls
- Start missing database services (PostgreSQL, Redis) automatically
- Fix environment variable mapping between install.py and application code
- Resolve all timezone-naive vs timezone-aware datetime conflicts

System now properly uses Qdrant vector database as specified in install.py instead of hardcoded ChromaDB.
Characters respond immediately with fallback messages during long LLM processing times.
All database timezone errors resolved with proper timestamptz columns.
This commit is contained in:
root
2025-07-05 21:31:52 -07:00
parent 4c474eeb23
commit 5480219901
38 changed files with 777 additions and 380 deletions

View File

@@ -3,7 +3,7 @@ import httpx
import json
import time
from typing import Dict, Any, Optional, List
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from utils.config import get_settings
from utils.logging import log_llm_interaction, log_error_with_context, log_system_health
import logging
@@ -29,17 +29,23 @@ class LLMClient:
self.cache = {}
self.cache_ttl = 300 # 5 minutes
# Background task queue for long-running requests
self.pending_requests = {}
self.max_timeout = 60 # Hard timeout limit for immediate responses
self.fallback_timeout = 15 # Quick timeout for immediate responses
# Health monitoring
self.health_stats = {
'total_requests': 0,
'successful_requests': 0,
'failed_requests': 0,
'average_response_time': 0,
'last_health_check': datetime.utcnow()
'last_health_check': datetime.now(timezone.utc)
}
async def generate_response(self, prompt: str, character_name: str = None,
max_tokens: int = None, temperature: float = None) -> Optional[str]:
max_tokens: int = None, temperature: float = None,
use_fallback: bool = True) -> Optional[str]:
"""Generate response using LLM"""
try:
# Rate limiting check
@@ -55,8 +61,11 @@ class LLMClient:
start_time = time.time()
# Use shorter timeout for immediate responses, longer for background
effective_timeout = self.fallback_timeout if use_fallback else min(self.timeout, self.max_timeout)
# Try OpenAI-compatible API first (KoboldCPP, etc.)
async with httpx.AsyncClient(timeout=self.timeout) as client:
async with httpx.AsyncClient(timeout=effective_timeout) as client:
try:
# OpenAI-compatible request
request_data = {
@@ -134,9 +143,24 @@ class LLMClient:
return None
except httpx.TimeoutException:
logger.error(f"LLM request timeout for {character_name}")
self._update_stats(False, self.timeout)
return None
if use_fallback:
logger.warning(f"LLM request timeout for {character_name}, using fallback response")
# Queue for background processing if needed
if self.timeout > self.max_timeout:
background_task = asyncio.create_task(self.generate_response(
prompt, character_name, max_tokens, temperature, use_fallback=False
))
request_id = f"{character_name}_{time.time()}"
self.pending_requests[request_id] = background_task
# Return a fallback response immediately
fallback_response = self._get_fallback_response(character_name)
self._update_stats(False, effective_timeout)
return fallback_response
else:
logger.error(f"LLM background request timeout for {character_name}")
self._update_stats(False, effective_timeout)
return None
except httpx.HTTPError as e:
logger.error(f"LLM HTTP error for {character_name}: {e}")
self._update_stats(False, time.time() - start_time)
@@ -231,11 +255,11 @@ class LLMClient:
'response_time': duration,
'model': self.model,
'base_url': self.base_url,
'timestamp': datetime.utcnow().isoformat()
'timestamp': datetime.now(timezone.utc).isoformat()
}
# Update health check time
self.health_stats['last_health_check'] = datetime.utcnow()
self.health_stats['last_health_check'] = datetime.now(timezone.utc)
return health_status
@@ -246,7 +270,7 @@ class LLMClient:
'error': str(e),
'model': self.model,
'base_url': self.base_url,
'timestamp': datetime.utcnow().isoformat()
'timestamp': datetime.now(timezone.utc).isoformat()
}
def get_statistics(self) -> Dict[str, Any]:
@@ -342,6 +366,67 @@ class LLMClient:
self.health_stats['average_response_time'] = (
(current_avg * (total_requests - 1) + duration) / total_requests
)
def _get_fallback_response(self, character_name: str = None) -> str:
"""Generate a fallback response when LLM is slow"""
fallback_responses = [
"*thinking deeply about this...*",
"*processing thoughts...*",
"*contemplating the discussion...*",
"*reflecting on what you've said...*",
"*considering different perspectives...*",
"Hmm, that's an interesting point to consider.",
"I need a moment to think about that.",
"That's worth reflecting on carefully.",
"*taking time to formulate thoughts...*"
]
import random
return random.choice(fallback_responses)
async def generate_response_with_fallback(self, prompt: str, character_name: str = None,
max_tokens: int = None, temperature: float = None) -> str:
"""Generate response with guaranteed fallback if LLM is slow"""
try:
# Try immediate response first
response = await self.generate_response(
prompt, character_name, max_tokens, temperature, use_fallback=True
)
if response:
return response
else:
# Return fallback if no response
return self._get_fallback_response(character_name)
except Exception as e:
log_error_with_context(e, {
"character_name": character_name,
"prompt_length": len(prompt)
})
return self._get_fallback_response(character_name)
async def cleanup_pending_requests(self):
"""Clean up completed background requests"""
completed_requests = []
for request_id, task in self.pending_requests.items():
if task.done():
completed_requests.append(request_id)
try:
result = await task
if result:
logger.info(f"Background LLM request {request_id} completed successfully")
except Exception as e:
logger.error(f"Background LLM request {request_id} failed: {e}")
# Remove completed requests
for request_id in completed_requests:
del self.pending_requests[request_id]
def get_pending_count(self) -> int:
"""Get number of pending background requests"""
return len(self.pending_requests)
class PromptManager:
"""Manages prompt templates and optimization"""