Implement comprehensive LLM provider system with global cost protection

- Add multi-provider LLM architecture supporting OpenRouter, OpenAI, Gemini, and custom providers - Implement global LLM on/off switch with default DISABLED state for cost protection - Add per-character LLM configuration with provider-specific models and settings - Create performance-optimized caching system for LLM enabled status checks - Add API key validation before enabling LLM providers to prevent broken configurations - Implement audit logging for all LLM enable/disable actions for cost accountability - Create comprehensive admin UI with prominent cost warnings and confirmation dialogs - Add visual indicators in character list for custom AI model configurations - Build character-specific LLM client system with global fallback mechanism - Add database schema support for per-character LLM settings - Implement graceful fallback responses when LLM is globally disabled - Create provider testing and validation system for reliable connections
2025-07-08 07:35:48 -07:00
parent 004f0325ec
commit 10563900a3
59 changed files with 6686 additions and 791 deletions
--- a/src/llm/providers/gemini_provider.py
+++ b/src/llm/providers/gemini_provider.py
@@ -0,0 +1,124 @@
+"""
+Google Gemini Provider for LLM requests
+"""
+
+import httpx
+import json
+from typing import Dict, Any, List
+from .base import BaseLLMProvider, LLMRequest, LLMResponse
+
+
+class GeminiProvider(BaseLLMProvider):
+    """Google Gemini API provider"""
+    
+    def __init__(self, config: Dict[str, Any]):
+        super().__init__(config)
+        self.api_key = config.get('api_key')
+        self.base_url = config.get('base_url', 'https://generativelanguage.googleapis.com/v1beta')
+        self.model = config.get('model', 'gemini-1.5-flash')
+        self.timeout = config.get('timeout', 300)
+        
+    @property
+    def requires_api_key(self) -> bool:
+        return True
+    
+    def get_supported_models(self) -> List[str]:
+        return [
+            'gemini-1.5-flash',
+            'gemini-1.5-pro',
+            'gemini-1.0-pro'
+        ]
+    
+    async def generate_response(self, request: LLMRequest) -> LLMResponse:
+        """Generate response using Gemini API"""
+        try:
+            # Gemini uses a different API format
+            payload = {
+                'contents': [
+                    {
+                        'parts': [
+                            {
+                                'text': request.prompt
+                            }
+                        ]
+                    }
+                ],
+                'generationConfig': {
+                    'temperature': request.temperature or self.config.get('temperature', 0.8),
+                    'maxOutputTokens': request.max_tokens or self.config.get('max_tokens', 2000),
+                    'candidateCount': 1
+                }
+            }
+            
+            url = f"{self.base_url}/models/{self.model}:generateContent"
+            params = {'key': self.api_key}
+            
+            async with httpx.AsyncClient() as client:
+                response = await client.post(
+                    url,
+                    params=params,
+                    json=payload,
+                    timeout=self.timeout
+                )
+                
+                if response.status_code == 200:
+                    data = response.json()
+                    
+                    # Extract content from Gemini response format
+                    if 'candidates' in data and len(data['candidates']) > 0:
+                        candidate = data['candidates'][0]
+                        if 'content' in candidate and 'parts' in candidate['content']:
+                            content = candidate['content']['parts'][0]['text']
+                            
+                            # Extract token usage if available
+                            tokens_used = None
+                            if 'usageMetadata' in data:
+                                tokens_used = data['usageMetadata'].get('totalTokenCount')
+                            
+                            return LLMResponse(
+                                content=content,
+                                success=True,
+                                provider='gemini',
+                                model=self.model,
+                                tokens_used=tokens_used
+                            )
+                    
+                    return LLMResponse(
+                        content="",
+                        success=False,
+                        error="Gemini API returned unexpected response format",
+                        provider='gemini'
+                    )
+                else:
+                    error_text = response.text
+                    return LLMResponse(
+                        content="",
+                        success=False,
+                        error=f"Gemini API error: {response.status_code} - {error_text}",
+                        provider='gemini'
+                    )
+                        
+        except Exception as e:
+            return LLMResponse(
+                content="",
+                success=False,
+                error=f"Gemini provider error: {str(e)}",
+                provider='gemini'
+            )
+    
+    async def health_check(self) -> bool:
+        """Check Gemini API health"""
+        try:
+            url = f"{self.base_url}/models"
+            params = {'key': self.api_key}
+            
+            async with httpx.AsyncClient() as client:
+                response = await client.get(
+                    url,
+                    params=params,
+                    timeout=10
+                )
+                return response.status_code == 200
+                    
+        except Exception:
+            return False