Fix Docker startup script and complete application deployment
- Update docker-start.sh to force correct profiles (qdrant, admin) - Fix PostgreSQL port mapping from 5432 to 15432 across all configs - Resolve MCP import conflicts by renaming src/mcp to src/mcp_servers - Fix admin interface StaticFiles mount syntax error - Update LLM client to support both Ollama and OpenAI-compatible APIs - Configure host networking for Discord bot container access - Correct database connection handling for async context managers - Update environment variables and Docker compose configurations - Add missing production dependencies and Dockerfile improvements
This commit is contained in:
@@ -4,8 +4,8 @@ import json
|
||||
import time
|
||||
from typing import Dict, Any, Optional, List
|
||||
from datetime import datetime, timedelta
|
||||
from ..utils.config import get_settings
|
||||
from ..utils.logging import log_llm_interaction, log_error_with_context, log_system_health
|
||||
from utils.config import get_settings
|
||||
from utils.logging import log_llm_interaction, log_error_with_context, log_system_health
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -55,33 +55,61 @@ class LLMClient:
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Prepare request
|
||||
request_data = {
|
||||
"model": self.model,
|
||||
"prompt": prompt,
|
||||
"options": {
|
||||
"temperature": temperature or self.temperature,
|
||||
"num_predict": max_tokens or self.max_tokens,
|
||||
"top_p": 0.9,
|
||||
"top_k": 40,
|
||||
"repeat_penalty": 1.1
|
||||
},
|
||||
"stream": False
|
||||
}
|
||||
|
||||
# Make API call
|
||||
# Try OpenAI-compatible API first (KoboldCPP, etc.)
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
response = await client.post(
|
||||
f"{self.base_url}/api/generate",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
try:
|
||||
# OpenAI-compatible request
|
||||
request_data = {
|
||||
"model": self.model,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"temperature": temperature or self.temperature,
|
||||
"max_tokens": max_tokens or self.max_tokens,
|
||||
"top_p": 0.9,
|
||||
"stream": False
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{self.base_url}/chat/completions",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
if 'choices' in result and result['choices'] and 'message' in result['choices'][0]:
|
||||
generated_text = result['choices'][0]['message']['content'].strip()
|
||||
else:
|
||||
generated_text = None
|
||||
|
||||
except (httpx.HTTPStatusError, httpx.RequestError, KeyError):
|
||||
# Fallback to Ollama API
|
||||
request_data = {
|
||||
"model": self.model,
|
||||
"prompt": prompt,
|
||||
"options": {
|
||||
"temperature": temperature or self.temperature,
|
||||
"num_predict": max_tokens or self.max_tokens,
|
||||
"top_p": 0.9,
|
||||
"top_k": 40,
|
||||
"repeat_penalty": 1.1
|
||||
},
|
||||
"stream": False
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{self.base_url}/api/generate",
|
||||
json=request_data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
if 'response' in result and result['response']:
|
||||
generated_text = result['response'].strip()
|
||||
else:
|
||||
generated_text = None
|
||||
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
if 'response' in result and result['response']:
|
||||
generated_text = result['response'].strip()
|
||||
if generated_text:
|
||||
|
||||
# Cache the response
|
||||
self._cache_response(cache_key, generated_text)
|
||||
@@ -143,11 +171,18 @@ class LLMClient:
|
||||
"""Check if the LLM model is available"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10) as client:
|
||||
response = await client.get(f"{self.base_url}/api/tags")
|
||||
response.raise_for_status()
|
||||
|
||||
models = response.json()
|
||||
available_models = [model.get('name', '') for model in models.get('models', [])]
|
||||
# Try OpenAI-compatible API first (KoboldCPP, etc.)
|
||||
try:
|
||||
response = await client.get(f"{self.base_url}/models")
|
||||
response.raise_for_status()
|
||||
models = response.json()
|
||||
available_models = [model.get('id', '') for model in models.get('data', [])]
|
||||
except (httpx.HTTPStatusError, httpx.RequestError):
|
||||
# Fallback to Ollama API
|
||||
response = await client.get(f"{self.base_url}/api/tags")
|
||||
response.raise_for_status()
|
||||
models = response.json()
|
||||
available_models = [model.get('name', '') for model in models.get('models', [])]
|
||||
|
||||
is_available = any(self.model in model_name for model_name in available_models)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user