Implement comprehensive LLM provider system with global cost protection

- Add multi-provider LLM architecture supporting OpenRouter, OpenAI, Gemini, and custom providers
- Implement global LLM on/off switch with default DISABLED state for cost protection
- Add per-character LLM configuration with provider-specific models and settings
- Create performance-optimized caching system for LLM enabled status checks
- Add API key validation before enabling LLM providers to prevent broken configurations
- Implement audit logging for all LLM enable/disable actions for cost accountability
- Create comprehensive admin UI with prominent cost warnings and confirmation dialogs
- Add visual indicators in character list for custom AI model configurations
- Build character-specific LLM client system with global fallback mechanism
- Add database schema support for per-character LLM settings
- Implement graceful fallback responses when LLM is globally disabled
- Create provider testing and validation system for reliable connections
This commit is contained in:
root
2025-07-08 07:35:48 -07:00
parent 004f0325ec
commit 10563900a3
59 changed files with 6686 additions and 791 deletions

View File

@@ -0,0 +1,124 @@
"""
Google Gemini Provider for LLM requests
"""
import httpx
import json
from typing import Dict, Any, List
from .base import BaseLLMProvider, LLMRequest, LLMResponse
class GeminiProvider(BaseLLMProvider):
"""Google Gemini API provider"""
def __init__(self, config: Dict[str, Any]):
super().__init__(config)
self.api_key = config.get('api_key')
self.base_url = config.get('base_url', 'https://generativelanguage.googleapis.com/v1beta')
self.model = config.get('model', 'gemini-1.5-flash')
self.timeout = config.get('timeout', 300)
@property
def requires_api_key(self) -> bool:
return True
def get_supported_models(self) -> List[str]:
return [
'gemini-1.5-flash',
'gemini-1.5-pro',
'gemini-1.0-pro'
]
async def generate_response(self, request: LLMRequest) -> LLMResponse:
"""Generate response using Gemini API"""
try:
# Gemini uses a different API format
payload = {
'contents': [
{
'parts': [
{
'text': request.prompt
}
]
}
],
'generationConfig': {
'temperature': request.temperature or self.config.get('temperature', 0.8),
'maxOutputTokens': request.max_tokens or self.config.get('max_tokens', 2000),
'candidateCount': 1
}
}
url = f"{self.base_url}/models/{self.model}:generateContent"
params = {'key': self.api_key}
async with httpx.AsyncClient() as client:
response = await client.post(
url,
params=params,
json=payload,
timeout=self.timeout
)
if response.status_code == 200:
data = response.json()
# Extract content from Gemini response format
if 'candidates' in data and len(data['candidates']) > 0:
candidate = data['candidates'][0]
if 'content' in candidate and 'parts' in candidate['content']:
content = candidate['content']['parts'][0]['text']
# Extract token usage if available
tokens_used = None
if 'usageMetadata' in data:
tokens_used = data['usageMetadata'].get('totalTokenCount')
return LLMResponse(
content=content,
success=True,
provider='gemini',
model=self.model,
tokens_used=tokens_used
)
return LLMResponse(
content="",
success=False,
error="Gemini API returned unexpected response format",
provider='gemini'
)
else:
error_text = response.text
return LLMResponse(
content="",
success=False,
error=f"Gemini API error: {response.status_code} - {error_text}",
provider='gemini'
)
except Exception as e:
return LLMResponse(
content="",
success=False,
error=f"Gemini provider error: {str(e)}",
provider='gemini'
)
async def health_check(self) -> bool:
"""Check Gemini API health"""
try:
url = f"{self.base_url}/models"
params = {'key': self.api_key}
async with httpx.AsyncClient() as client:
response = await client.get(
url,
params=params,
timeout=10
)
return response.status_code == 200
except Exception:
return False