Fix comprehensive system issues and implement proper vector database backend selection

- Fix remaining datetime timezone errors across all database operations
- Implement dynamic vector database backend (Qdrant/ChromaDB) based on install.py configuration
- Add LLM timeout handling with immediate fallback responses for slow self-hosted models
- Use proper install.py configuration (2000 max tokens, 5min timeout, correct LLM endpoint)
- Fix PostgreSQL schema to use timezone-aware columns throughout
- Implement async LLM request handling with background processing
- Add configurable prompt limits and conversation history controls
- Start missing database services (PostgreSQL, Redis) automatically
- Fix environment variable mapping between install.py and application code
- Resolve all timezone-naive vs timezone-aware datetime conflicts

System now properly uses Qdrant vector database as specified in install.py instead of hardcoded ChromaDB.
Characters respond immediately with fallback messages during long LLM processing times.
All database timezone errors resolved with proper timestamptz columns.
This commit is contained in:
root
2025-07-05 21:31:52 -07:00
parent 4c474eeb23
commit 5480219901
38 changed files with 777 additions and 380 deletions

View File

@@ -3,7 +3,7 @@ from discord.ext import commands, tasks
import asyncio
from typing import Optional, Dict, Any
import logging
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from utils.config import get_settings
from utils.logging import log_error_with_context, log_system_health
from database.connection import get_db_session
@@ -36,7 +36,7 @@ class FishbowlBot(commands.Bot):
# Health monitoring
self.health_check_task = None
self.last_heartbeat = datetime.utcnow()
self.last_heartbeat = datetime.now(timezone.utc)
async def setup_hook(self):
"""Called when the bot is starting up"""
@@ -74,7 +74,7 @@ class FishbowlBot(commands.Bot):
await self.conversation_engine.initialize(self)
# Update heartbeat
self.last_heartbeat = datetime.utcnow()
self.last_heartbeat = datetime.now(timezone.utc)
log_system_health("discord_bot", "connected", {
"guild": self.target_guild.name,
@@ -128,7 +128,7 @@ class FishbowlBot(commands.Bot):
async def on_resumed(self):
"""Handle bot reconnection"""
logger.info("Bot reconnected to Discord")
self.last_heartbeat = datetime.utcnow()
self.last_heartbeat = datetime.now(timezone.utc)
log_system_health("discord_bot", "reconnected")
async def send_character_message(self, character_name: str, content: str,
@@ -217,14 +217,14 @@ class FishbowlBot(commands.Bot):
content=content,
discord_message_id=discord_message_id,
response_to_message_id=reply_to_message_id,
timestamp=datetime.utcnow()
timestamp=datetime.now(timezone.utc)
)
session.add(message)
await session.commit()
# Update character's last activity
character.last_active = datetime.utcnow()
character.last_active = datetime.now(timezone.utc)
character.last_message_id = message.id
await session.commit()
@@ -251,25 +251,29 @@ class FishbowlBot(commands.Bot):
"""Periodic health check"""
try:
# Check bot connectivity
if self.is_closed():
if self.is_closed() or not self.user:
log_system_health("discord_bot", "disconnected")
return
# Check heartbeat
time_since_heartbeat = datetime.utcnow() - self.last_heartbeat
time_since_heartbeat = datetime.now(timezone.utc) - self.last_heartbeat
if time_since_heartbeat > timedelta(minutes=10):
log_system_health("discord_bot", "heartbeat_stale", {
"minutes_since_heartbeat": time_since_heartbeat.total_seconds() / 60
})
# Update heartbeat
self.last_heartbeat = datetime.utcnow()
self.last_heartbeat = datetime.now(timezone.utc)
# Log health metrics
uptime_minutes = 0
if self.user and hasattr(self.user, 'created_at') and self.user.created_at:
uptime_minutes = (datetime.now(timezone.utc) - self.user.created_at.replace(tzinfo=timezone.utc)).total_seconds() / 60
log_system_health("discord_bot", "healthy", {
"latency_ms": round(self.latency * 1000, 2),
"guild_count": len(self.guilds),
"uptime_minutes": (datetime.utcnow() - self.user.created_at).total_seconds() / 60
"uptime_minutes": uptime_minutes
})
except Exception as e: