Initial implementation of autonomous Discord LLM fishbowl

Core Features: - Full autonomous AI character ecosystem with multi-personality support - Advanced RAG system with personal, community, and creative memory layers - MCP integration for character self-modification and file system access - PostgreSQL database with comprehensive character relationship tracking - Redis caching and ChromaDB vector storage for semantic memory retrieval - Dynamic personality evolution based on interactions and self-reflection - Community knowledge management with tradition and norm identification - Sophisticated conversation engine with natural scheduling and topic management - Docker containerization and production-ready deployment configuration Architecture: - Multi-layer vector databases for personal, community, and creative knowledge - Character file systems with personal and shared digital spaces - Autonomous self-modification with safety validation and audit trails - Memory importance scoring with time-based decay and consolidation - Community health monitoring and cultural evolution tracking - RAG-powered conversation context and relationship optimization Characters can: - Develop authentic personalities through experience-based learning - Create and build upon original creative works and philosophical insights - Form complex relationships with memory of past interactions - Modify their own personality traits through self-reflection cycles - Contribute to and learn from shared community knowledge - Manage personal digital spaces with diaries, creative works, and reflections - Engage in collaborative projects and community decision-making System supports indefinite autonomous operation with continuous character development, community culture evolution, and creative collaboration.
2025-07-04 21:33:27 -07:00
commit f22a68afa6
42 changed files with 10456 additions and 0 deletions
--- a/src/llm/client.py
+++ b/src/llm/client.py
@@ -0,0 +1,394 @@
+import asyncio
+import httpx
+import json
+import time
+from typing import Dict, Any, Optional, List
+from datetime import datetime, timedelta
+from ..utils.config import get_settings
+from ..utils.logging import log_llm_interaction, log_error_with_context, log_system_health
+import logging
+
+logger = logging.getLogger(__name__)
+
+class LLMClient:
+    """Async LLM client for interacting with local LLM APIs (Ollama, etc.)"""
+    
+    def __init__(self):
+        self.settings = get_settings()
+        self.base_url = self.settings.llm.base_url
+        self.model = self.settings.llm.model
+        self.timeout = self.settings.llm.timeout
+        self.max_tokens = self.settings.llm.max_tokens
+        self.temperature = self.settings.llm.temperature
+        
+        # Rate limiting
+        self.request_times = []
+        self.max_requests_per_minute = 30
+        
+        # Response caching
+        self.cache = {}
+        self.cache_ttl = 300  # 5 minutes
+        
+        # Health monitoring
+        self.health_stats = {
+            'total_requests': 0,
+            'successful_requests': 0,
+            'failed_requests': 0,
+            'average_response_time': 0,
+            'last_health_check': datetime.utcnow()
+        }
+    
+    async def generate_response(self, prompt: str, character_name: str = None, 
+                              max_tokens: int = None, temperature: float = None) -> Optional[str]:
+        """Generate response using LLM"""
+        try:
+            # Rate limiting check
+            if not await self._check_rate_limit():
+                logger.warning(f"Rate limit exceeded for {character_name}")
+                return None
+            
+            # Check cache first
+            cache_key = self._generate_cache_key(prompt, character_name, max_tokens, temperature)
+            cached_response = self._get_cached_response(cache_key)
+            if cached_response:
+                return cached_response
+            
+            start_time = time.time()
+            
+            # Prepare request
+            request_data = {
+                "model": self.model,
+                "prompt": prompt,
+                "options": {
+                    "temperature": temperature or self.temperature,
+                    "num_predict": max_tokens or self.max_tokens,
+                    "top_p": 0.9,
+                    "top_k": 40,
+                    "repeat_penalty": 1.1
+                },
+                "stream": False
+            }
+            
+            # Make API call
+            async with httpx.AsyncClient(timeout=self.timeout) as client:
+                response = await client.post(
+                    f"{self.base_url}/api/generate",
+                    json=request_data,
+                    headers={"Content-Type": "application/json"}
+                )
+                
+                response.raise_for_status()
+                result = response.json()
+                
+                if 'response' in result and result['response']:
+                    generated_text = result['response'].strip()
+                    
+                    # Cache the response
+                    self._cache_response(cache_key, generated_text)
+                    
+                    # Update stats
+                    duration = time.time() - start_time
+                    self._update_stats(True, duration)
+                    
+                    # Log interaction
+                    log_llm_interaction(
+                        character_name or "unknown",
+                        len(prompt),
+                        len(generated_text),
+                        self.model,
+                        duration
+                    )
+                    
+                    return generated_text
+                else:
+                    logger.error(f"No response from LLM: {result}")
+                    self._update_stats(False, time.time() - start_time)
+                    return None
+                    
+        except httpx.TimeoutException:
+            logger.error(f"LLM request timeout for {character_name}")
+            self._update_stats(False, self.timeout)
+            return None
+        except httpx.HTTPError as e:
+            logger.error(f"LLM HTTP error for {character_name}: {e}")
+            self._update_stats(False, time.time() - start_time)
+            return None
+        except Exception as e:
+            log_error_with_context(e, {
+                "character_name": character_name,
+                "prompt_length": len(prompt),
+                "model": self.model
+            })
+            self._update_stats(False, time.time() - start_time)
+            return None
+    
+    async def generate_batch_responses(self, prompts: List[Dict[str, Any]]) -> List[Optional[str]]:
+        """Generate multiple responses in batch"""
+        tasks = []
+        for prompt_data in prompts:
+            task = self.generate_response(
+                prompt=prompt_data['prompt'],
+                character_name=prompt_data.get('character_name'),
+                max_tokens=prompt_data.get('max_tokens'),
+                temperature=prompt_data.get('temperature')
+            )
+            tasks.append(task)
+        
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+        
+        # Convert exceptions to None
+        return [result if not isinstance(result, Exception) else None for result in results]
+    
+    async def check_model_availability(self) -> bool:
+        """Check if the LLM model is available"""
+        try:
+            async with httpx.AsyncClient(timeout=10) as client:
+                response = await client.get(f"{self.base_url}/api/tags")
+                response.raise_for_status()
+                
+                models = response.json()
+                available_models = [model.get('name', '') for model in models.get('models', [])]
+                
+                is_available = any(self.model in model_name for model_name in available_models)
+                
+                log_system_health(
+                    "llm_client",
+                    "available" if is_available else "model_not_found",
+                    {"model": self.model, "available_models": available_models}
+                )
+                
+                return is_available
+                
+        except Exception as e:
+            log_error_with_context(e, {"model": self.model})
+            log_system_health("llm_client", "unavailable", {"error": str(e)})
+            return False
+    
+    async def get_model_info(self) -> Dict[str, Any]:
+        """Get information about the current model"""
+        try:
+            async with httpx.AsyncClient(timeout=10) as client:
+                response = await client.post(
+                    f"{self.base_url}/api/show",
+                    json={"name": self.model}
+                )
+                response.raise_for_status()
+                
+                return response.json()
+                
+        except Exception as e:
+            log_error_with_context(e, {"model": self.model})
+            return {}
+    
+    async def health_check(self) -> Dict[str, Any]:
+        """Perform health check on LLM service"""
+        try:
+            start_time = time.time()
+            
+            # Test with simple prompt
+            test_prompt = "Respond with 'OK' if you can understand this message."
+            response = await self.generate_response(test_prompt, "health_check")
+            
+            duration = time.time() - start_time
+            
+            health_status = {
+                'status': 'healthy' if response else 'unhealthy',
+                'response_time': duration,
+                'model': self.model,
+                'base_url': self.base_url,
+                'timestamp': datetime.utcnow().isoformat()
+            }
+            
+            # Update health check time
+            self.health_stats['last_health_check'] = datetime.utcnow()
+            
+            return health_status
+            
+        except Exception as e:
+            log_error_with_context(e, {"component": "llm_health_check"})
+            return {
+                'status': 'error',
+                'error': str(e),
+                'model': self.model,
+                'base_url': self.base_url,
+                'timestamp': datetime.utcnow().isoformat()
+            }
+    
+    def get_statistics(self) -> Dict[str, Any]:
+        """Get client statistics"""
+        return {
+            'total_requests': self.health_stats['total_requests'],
+            'successful_requests': self.health_stats['successful_requests'],
+            'failed_requests': self.health_stats['failed_requests'],
+            'success_rate': (
+                self.health_stats['successful_requests'] / self.health_stats['total_requests']
+                if self.health_stats['total_requests'] > 0 else 0
+            ),
+            'average_response_time': self.health_stats['average_response_time'],
+            'cache_size': len(self.cache),
+            'last_health_check': self.health_stats['last_health_check'].isoformat()
+        }
+    
+    async def _check_rate_limit(self) -> bool:
+        """Check if we're within rate limits"""
+        now = time.time()
+        
+        # Remove old requests (older than 1 minute)
+        self.request_times = [t for t in self.request_times if now - t < 60]
+        
+        # Check if we can make another request
+        if len(self.request_times) >= self.max_requests_per_minute:
+            return False
+        
+        # Add current request time
+        self.request_times.append(now)
+        return True
+    
+    def _generate_cache_key(self, prompt: str, character_name: str = None, 
+                           max_tokens: int = None, temperature: float = None) -> str:
+        """Generate cache key for response"""
+        import hashlib
+        
+        cache_data = {
+            'prompt': prompt,
+            'character_name': character_name,
+            'max_tokens': max_tokens or self.max_tokens,
+            'temperature': temperature or self.temperature,
+            'model': self.model
+        }
+        
+        cache_string = json.dumps(cache_data, sort_keys=True)
+        return hashlib.md5(cache_string.encode()).hexdigest()
+    
+    def _get_cached_response(self, cache_key: str) -> Optional[str]:
+        """Get cached response if available and not expired"""
+        if cache_key in self.cache:
+            cached_data = self.cache[cache_key]
+            if time.time() - cached_data['timestamp'] < self.cache_ttl:
+                return cached_data['response']
+            else:
+                # Remove expired cache entry
+                del self.cache[cache_key]
+        
+        return None
+    
+    def _cache_response(self, cache_key: str, response: str):
+        """Cache response"""
+        self.cache[cache_key] = {
+            'response': response,
+            'timestamp': time.time()
+        }
+        
+        # Clean up old cache entries if cache is too large
+        if len(self.cache) > 100:
+            # Remove oldest entries
+            oldest_keys = sorted(
+                self.cache.keys(),
+                key=lambda k: self.cache[k]['timestamp']
+            )[:20]
+            
+            for key in oldest_keys:
+                del self.cache[key]
+    
+    def _update_stats(self, success: bool, duration: float):
+        """Update health statistics"""
+        self.health_stats['total_requests'] += 1
+        
+        if success:
+            self.health_stats['successful_requests'] += 1
+        else:
+            self.health_stats['failed_requests'] += 1
+        
+        # Update average response time
+        total_requests = self.health_stats['total_requests']
+        current_avg = self.health_stats['average_response_time']
+        
+        # Rolling average
+        self.health_stats['average_response_time'] = (
+            (current_avg * (total_requests - 1) + duration) / total_requests
+        )
+
+class PromptManager:
+    """Manages prompt templates and optimization"""
+    
+    def __init__(self):
+        self.templates = {
+            'character_response': """You are {character_name}, responding in a Discord chat.
+
+{personality_context}
+
+{conversation_context}
+
+{memory_context}
+
+{relationship_context}
+
+Respond naturally as {character_name}. Keep it conversational and authentic to your personality.""",
+            
+            'conversation_starter': """You are {character_name} in a Discord chat.
+
+{personality_context}
+
+Start a conversation about: {topic}
+
+Be natural and engaging. Your response should invite others to participate.""",
+            
+            'self_reflection': """You are {character_name}. Reflect on your recent experiences:
+
+{personality_context}
+
+{recent_experiences}
+
+Consider:
+- How these experiences have affected you
+- Any changes in your perspective
+- Your relationships with others
+- Your personal growth
+
+Share your thoughtful reflection."""
+        }
+    
+    def build_prompt(self, template_name: str, **kwargs) -> str:
+        """Build prompt from template"""
+        template = self.templates.get(template_name)
+        if not template:
+            raise ValueError(f"Template '{template_name}' not found")
+        
+        try:
+            return template.format(**kwargs)
+        except KeyError as e:
+            raise ValueError(f"Missing required parameter for template '{template_name}': {e}")
+    
+    def optimize_prompt(self, prompt: str, max_length: int = 2000) -> str:
+        """Optimize prompt for better performance"""
+        # Truncate if too long
+        if len(prompt) > max_length:
+            # Try to cut at paragraph boundaries
+            paragraphs = prompt.split('\n\n')
+            optimized = ""
+            
+            for paragraph in paragraphs:
+                if len(optimized + paragraph) <= max_length:
+                    optimized += paragraph + '\n\n'
+                else:
+                    break
+            
+            if optimized:
+                return optimized.strip()
+            else:
+                # Fallback to simple truncation
+                return prompt[:max_length] + "..."
+        
+        return prompt
+    
+    def add_template(self, name: str, template: str):
+        """Add custom prompt template"""
+        self.templates[name] = template
+    
+    def get_template_names(self) -> List[str]:
+        """Get list of available template names"""
+        return list(self.templates.keys())
+
+# Global instances
+llm_client = LLMClient()
+prompt_manager = PromptManager()