Initial implementation of autonomous Discord LLM fishbowl
Core Features: - Full autonomous AI character ecosystem with multi-personality support - Advanced RAG system with personal, community, and creative memory layers - MCP integration for character self-modification and file system access - PostgreSQL database with comprehensive character relationship tracking - Redis caching and ChromaDB vector storage for semantic memory retrieval - Dynamic personality evolution based on interactions and self-reflection - Community knowledge management with tradition and norm identification - Sophisticated conversation engine with natural scheduling and topic management - Docker containerization and production-ready deployment configuration Architecture: - Multi-layer vector databases for personal, community, and creative knowledge - Character file systems with personal and shared digital spaces - Autonomous self-modification with safety validation and audit trails - Memory importance scoring with time-based decay and consolidation - Community health monitoring and cultural evolution tracking - RAG-powered conversation context and relationship optimization Characters can: - Develop authentic personalities through experience-based learning - Create and build upon original creative works and philosophical insights - Form complex relationships with memory of past interactions - Modify their own personality traits through self-reflection cycles - Contribute to and learn from shared community knowledge - Manage personal digital spaces with diaries, creative works, and reflections - Engage in collaborative projects and community decision-making System supports indefinite autonomous operation with continuous character development, community culture evolution, and creative collaboration.
This commit is contained in:
394
src/llm/client.py
Normal file
394
src/llm/client.py
Normal file
@@ -0,0 +1,394 @@
|
||||
import asyncio
|
||||
import httpx
|
||||
import json
|
||||
import time
|
||||
from typing import Dict, Any, Optional, List
|
||||
from datetime import datetime, timedelta
|
||||
from ..utils.config import get_settings
|
||||
from ..utils.logging import log_llm_interaction, log_error_with_context, log_system_health
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class LLMClient:
|
||||
"""Async LLM client for interacting with local LLM APIs (Ollama, etc.)"""
|
||||
|
||||
def __init__(self):
|
||||
self.settings = get_settings()
|
||||
self.base_url = self.settings.llm.base_url
|
||||
self.model = self.settings.llm.model
|
||||
self.timeout = self.settings.llm.timeout
|
||||
self.max_tokens = self.settings.llm.max_tokens
|
||||
self.temperature = self.settings.llm.temperature
|
||||
|
||||
# Rate limiting
|
||||
self.request_times = []
|
||||
self.max_requests_per_minute = 30
|
||||
|
||||
# Response caching
|
||||
self.cache = {}
|
||||
self.cache_ttl = 300 # 5 minutes
|
||||
|
||||
# Health monitoring
|
||||
self.health_stats = {
|
||||
'total_requests': 0,
|
||||
'successful_requests': 0,
|
||||
'failed_requests': 0,
|
||||
'average_response_time': 0,
|
||||
'last_health_check': datetime.utcnow()
|
||||
}
|
||||
|
||||
async def generate_response(self, prompt: str, character_name: str = None,
|
||||
max_tokens: int = None, temperature: float = None) -> Optional[str]:
|
||||
"""Generate response using LLM"""
|
||||
try:
|
||||
# Rate limiting check
|
||||
if not await self._check_rate_limit():
|
||||
logger.warning(f"Rate limit exceeded for {character_name}")
|
||||
return None
|
||||
|
||||
# Check cache first
|
||||
cache_key = self._generate_cache_key(prompt, character_name, max_tokens, temperature)
|
||||
cached_response = self._get_cached_response(cache_key)
|
||||
if cached_response:
|
||||
return cached_response
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Prepare request
|
||||
request_data = {
|
||||
"model": self.model,
|
||||
"prompt": prompt,
|
||||
"options": {
|
||||
"temperature": temperature or self.temperature,
|
||||
"num_predict": max_tokens or self.max_tokens,
|
||||
"top_p": 0.9,
|
||||
"top_k": 40,
|
||||
"repeat_penalty": 1.1
|
||||
},
|
||||
"stream": False
|
||||
}
|
||||
|
||||
# Make API call
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
response = await client.post(
|
||||
f"{self.base_url}/api/generate",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
if 'response' in result and result['response']:
|
||||
generated_text = result['response'].strip()
|
||||
|
||||
# Cache the response
|
||||
self._cache_response(cache_key, generated_text)
|
||||
|
||||
# Update stats
|
||||
duration = time.time() - start_time
|
||||
self._update_stats(True, duration)
|
||||
|
||||
# Log interaction
|
||||
log_llm_interaction(
|
||||
character_name or "unknown",
|
||||
len(prompt),
|
||||
len(generated_text),
|
||||
self.model,
|
||||
duration
|
||||
)
|
||||
|
||||
return generated_text
|
||||
else:
|
||||
logger.error(f"No response from LLM: {result}")
|
||||
self._update_stats(False, time.time() - start_time)
|
||||
return None
|
||||
|
||||
except httpx.TimeoutException:
|
||||
logger.error(f"LLM request timeout for {character_name}")
|
||||
self._update_stats(False, self.timeout)
|
||||
return None
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(f"LLM HTTP error for {character_name}: {e}")
|
||||
self._update_stats(False, time.time() - start_time)
|
||||
return None
|
||||
except Exception as e:
|
||||
log_error_with_context(e, {
|
||||
"character_name": character_name,
|
||||
"prompt_length": len(prompt),
|
||||
"model": self.model
|
||||
})
|
||||
self._update_stats(False, time.time() - start_time)
|
||||
return None
|
||||
|
||||
async def generate_batch_responses(self, prompts: List[Dict[str, Any]]) -> List[Optional[str]]:
|
||||
"""Generate multiple responses in batch"""
|
||||
tasks = []
|
||||
for prompt_data in prompts:
|
||||
task = self.generate_response(
|
||||
prompt=prompt_data['prompt'],
|
||||
character_name=prompt_data.get('character_name'),
|
||||
max_tokens=prompt_data.get('max_tokens'),
|
||||
temperature=prompt_data.get('temperature')
|
||||
)
|
||||
tasks.append(task)
|
||||
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Convert exceptions to None
|
||||
return [result if not isinstance(result, Exception) else None for result in results]
|
||||
|
||||
async def check_model_availability(self) -> bool:
|
||||
"""Check if the LLM model is available"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10) as client:
|
||||
response = await client.get(f"{self.base_url}/api/tags")
|
||||
response.raise_for_status()
|
||||
|
||||
models = response.json()
|
||||
available_models = [model.get('name', '') for model in models.get('models', [])]
|
||||
|
||||
is_available = any(self.model in model_name for model_name in available_models)
|
||||
|
||||
log_system_health(
|
||||
"llm_client",
|
||||
"available" if is_available else "model_not_found",
|
||||
{"model": self.model, "available_models": available_models}
|
||||
)
|
||||
|
||||
return is_available
|
||||
|
||||
except Exception as e:
|
||||
log_error_with_context(e, {"model": self.model})
|
||||
log_system_health("llm_client", "unavailable", {"error": str(e)})
|
||||
return False
|
||||
|
||||
async def get_model_info(self) -> Dict[str, Any]:
|
||||
"""Get information about the current model"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10) as client:
|
||||
response = await client.post(
|
||||
f"{self.base_url}/api/show",
|
||||
json={"name": self.model}
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
return response.json()
|
||||
|
||||
except Exception as e:
|
||||
log_error_with_context(e, {"model": self.model})
|
||||
return {}
|
||||
|
||||
async def health_check(self) -> Dict[str, Any]:
|
||||
"""Perform health check on LLM service"""
|
||||
try:
|
||||
start_time = time.time()
|
||||
|
||||
# Test with simple prompt
|
||||
test_prompt = "Respond with 'OK' if you can understand this message."
|
||||
response = await self.generate_response(test_prompt, "health_check")
|
||||
|
||||
duration = time.time() - start_time
|
||||
|
||||
health_status = {
|
||||
'status': 'healthy' if response else 'unhealthy',
|
||||
'response_time': duration,
|
||||
'model': self.model,
|
||||
'base_url': self.base_url,
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
# Update health check time
|
||||
self.health_stats['last_health_check'] = datetime.utcnow()
|
||||
|
||||
return health_status
|
||||
|
||||
except Exception as e:
|
||||
log_error_with_context(e, {"component": "llm_health_check"})
|
||||
return {
|
||||
'status': 'error',
|
||||
'error': str(e),
|
||||
'model': self.model,
|
||||
'base_url': self.base_url,
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
def get_statistics(self) -> Dict[str, Any]:
|
||||
"""Get client statistics"""
|
||||
return {
|
||||
'total_requests': self.health_stats['total_requests'],
|
||||
'successful_requests': self.health_stats['successful_requests'],
|
||||
'failed_requests': self.health_stats['failed_requests'],
|
||||
'success_rate': (
|
||||
self.health_stats['successful_requests'] / self.health_stats['total_requests']
|
||||
if self.health_stats['total_requests'] > 0 else 0
|
||||
),
|
||||
'average_response_time': self.health_stats['average_response_time'],
|
||||
'cache_size': len(self.cache),
|
||||
'last_health_check': self.health_stats['last_health_check'].isoformat()
|
||||
}
|
||||
|
||||
async def _check_rate_limit(self) -> bool:
|
||||
"""Check if we're within rate limits"""
|
||||
now = time.time()
|
||||
|
||||
# Remove old requests (older than 1 minute)
|
||||
self.request_times = [t for t in self.request_times if now - t < 60]
|
||||
|
||||
# Check if we can make another request
|
||||
if len(self.request_times) >= self.max_requests_per_minute:
|
||||
return False
|
||||
|
||||
# Add current request time
|
||||
self.request_times.append(now)
|
||||
return True
|
||||
|
||||
def _generate_cache_key(self, prompt: str, character_name: str = None,
|
||||
max_tokens: int = None, temperature: float = None) -> str:
|
||||
"""Generate cache key for response"""
|
||||
import hashlib
|
||||
|
||||
cache_data = {
|
||||
'prompt': prompt,
|
||||
'character_name': character_name,
|
||||
'max_tokens': max_tokens or self.max_tokens,
|
||||
'temperature': temperature or self.temperature,
|
||||
'model': self.model
|
||||
}
|
||||
|
||||
cache_string = json.dumps(cache_data, sort_keys=True)
|
||||
return hashlib.md5(cache_string.encode()).hexdigest()
|
||||
|
||||
def _get_cached_response(self, cache_key: str) -> Optional[str]:
|
||||
"""Get cached response if available and not expired"""
|
||||
if cache_key in self.cache:
|
||||
cached_data = self.cache[cache_key]
|
||||
if time.time() - cached_data['timestamp'] < self.cache_ttl:
|
||||
return cached_data['response']
|
||||
else:
|
||||
# Remove expired cache entry
|
||||
del self.cache[cache_key]
|
||||
|
||||
return None
|
||||
|
||||
def _cache_response(self, cache_key: str, response: str):
|
||||
"""Cache response"""
|
||||
self.cache[cache_key] = {
|
||||
'response': response,
|
||||
'timestamp': time.time()
|
||||
}
|
||||
|
||||
# Clean up old cache entries if cache is too large
|
||||
if len(self.cache) > 100:
|
||||
# Remove oldest entries
|
||||
oldest_keys = sorted(
|
||||
self.cache.keys(),
|
||||
key=lambda k: self.cache[k]['timestamp']
|
||||
)[:20]
|
||||
|
||||
for key in oldest_keys:
|
||||
del self.cache[key]
|
||||
|
||||
def _update_stats(self, success: bool, duration: float):
|
||||
"""Update health statistics"""
|
||||
self.health_stats['total_requests'] += 1
|
||||
|
||||
if success:
|
||||
self.health_stats['successful_requests'] += 1
|
||||
else:
|
||||
self.health_stats['failed_requests'] += 1
|
||||
|
||||
# Update average response time
|
||||
total_requests = self.health_stats['total_requests']
|
||||
current_avg = self.health_stats['average_response_time']
|
||||
|
||||
# Rolling average
|
||||
self.health_stats['average_response_time'] = (
|
||||
(current_avg * (total_requests - 1) + duration) / total_requests
|
||||
)
|
||||
|
||||
class PromptManager:
|
||||
"""Manages prompt templates and optimization"""
|
||||
|
||||
def __init__(self):
|
||||
self.templates = {
|
||||
'character_response': """You are {character_name}, responding in a Discord chat.
|
||||
|
||||
{personality_context}
|
||||
|
||||
{conversation_context}
|
||||
|
||||
{memory_context}
|
||||
|
||||
{relationship_context}
|
||||
|
||||
Respond naturally as {character_name}. Keep it conversational and authentic to your personality.""",
|
||||
|
||||
'conversation_starter': """You are {character_name} in a Discord chat.
|
||||
|
||||
{personality_context}
|
||||
|
||||
Start a conversation about: {topic}
|
||||
|
||||
Be natural and engaging. Your response should invite others to participate.""",
|
||||
|
||||
'self_reflection': """You are {character_name}. Reflect on your recent experiences:
|
||||
|
||||
{personality_context}
|
||||
|
||||
{recent_experiences}
|
||||
|
||||
Consider:
|
||||
- How these experiences have affected you
|
||||
- Any changes in your perspective
|
||||
- Your relationships with others
|
||||
- Your personal growth
|
||||
|
||||
Share your thoughtful reflection."""
|
||||
}
|
||||
|
||||
def build_prompt(self, template_name: str, **kwargs) -> str:
|
||||
"""Build prompt from template"""
|
||||
template = self.templates.get(template_name)
|
||||
if not template:
|
||||
raise ValueError(f"Template '{template_name}' not found")
|
||||
|
||||
try:
|
||||
return template.format(**kwargs)
|
||||
except KeyError as e:
|
||||
raise ValueError(f"Missing required parameter for template '{template_name}': {e}")
|
||||
|
||||
def optimize_prompt(self, prompt: str, max_length: int = 2000) -> str:
|
||||
"""Optimize prompt for better performance"""
|
||||
# Truncate if too long
|
||||
if len(prompt) > max_length:
|
||||
# Try to cut at paragraph boundaries
|
||||
paragraphs = prompt.split('\n\n')
|
||||
optimized = ""
|
||||
|
||||
for paragraph in paragraphs:
|
||||
if len(optimized + paragraph) <= max_length:
|
||||
optimized += paragraph + '\n\n'
|
||||
else:
|
||||
break
|
||||
|
||||
if optimized:
|
||||
return optimized.strip()
|
||||
else:
|
||||
# Fallback to simple truncation
|
||||
return prompt[:max_length] + "..."
|
||||
|
||||
return prompt
|
||||
|
||||
def add_template(self, name: str, template: str):
|
||||
"""Add custom prompt template"""
|
||||
self.templates[name] = template
|
||||
|
||||
def get_template_names(self) -> List[str]:
|
||||
"""Get list of available template names"""
|
||||
return list(self.templates.keys())
|
||||
|
||||
# Global instances
|
||||
llm_client = LLMClient()
|
||||
prompt_manager = PromptManager()
|
||||
Reference in New Issue
Block a user