Files
discord-fishbowl/src/admin/services/analytics_service.py
root 3d9e8ffbf0 Fix Docker startup script and complete application deployment
- Update docker-start.sh to force correct profiles (qdrant, admin)
- Fix PostgreSQL port mapping from 5432 to 15432 across all configs
- Resolve MCP import conflicts by renaming src/mcp to src/mcp_servers
- Fix admin interface StaticFiles mount syntax error
- Update LLM client to support both Ollama and OpenAI-compatible APIs
- Configure host networking for Discord bot container access
- Correct database connection handling for async context managers
- Update environment variables and Docker compose configurations
- Add missing production dependencies and Dockerfile improvements
2025-07-05 15:09:29 -07:00

378 lines
17 KiB
Python

"""
Analytics service for community insights and trends
"""
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional
from collections import defaultdict, Counter
from sqlalchemy import select, func, and_, or_, desc
from database.connection import get_db_session
from database.models import Character, Conversation, Message, CharacterRelationship
from admin.models import (
TopicTrend, RelationshipAnalytics, CommunityHealth,
EngagementMetrics, Relationship
)
logger = logging.getLogger(__name__)
class AnalyticsService:
"""Service for analytics and community insights"""
def __init__(self):
self.analytics_cache = {}
self.cache_ttl = 300 # Cache for 5 minutes
@classmethod
async def initialize(cls):
"""Initialize analytics service"""
logger.info("Analytics service initialized")
async def get_topic_trends(self, days: int = 30) -> List[TopicTrend]:
"""Get topic trend analysis"""
try:
async with get_db_session() as session:
# Get messages from the specified period
start_date = datetime.utcnow() - timedelta(days=days)
messages_query = select(Message, Character.name).join(
Character, Message.character_id == Character.id
).where(Message.timestamp >= start_date)
results = await session.execute(messages_query)
# Analyze topics (simple keyword extraction)
topic_mentions = defaultdict(list)
topic_participants = defaultdict(set)
for message, character_name in results:
words = message.content.lower().split()
for word in words:
if len(word) > 4: # Only consider longer words as topics
topic_mentions[word].append(message.timestamp)
topic_participants[word].add(character_name)
# Create topic trends
trends = []
for topic, mentions in topic_mentions.items():
if len(mentions) >= 3: # Only topics mentioned at least 3 times
# Calculate growth rate (simplified)
recent_mentions = [m for m in mentions if m >= datetime.utcnow() - timedelta(days=7)]
growth_rate = len(recent_mentions) / max(1, len(mentions) - len(recent_mentions))
trend = TopicTrend(
topic=topic,
mentions=len(mentions),
growth_rate=growth_rate,
sentiment=0.7, # Placeholder
participants=list(topic_participants[topic]),
related_topics=[], # Would calculate topic similarity
first_mentioned=min(mentions),
peak_date=max(mentions)
)
trends.append(trend)
# Sort by mentions count
trends.sort(key=lambda t: t.mentions, reverse=True)
return trends[:20] # Return top 20 topics
except Exception as e:
logger.error(f"Error getting topic trends: {e}")
return []
async def get_relationship_analytics(self) -> RelationshipAnalytics:
"""Get relationship strength analytics"""
try:
async with get_db_session() as session:
# Get all relationships
relationships_query = select(
CharacterRelationship,
Character.name.label('char_a_name'),
Character.name.label('char_b_name')
).select_from(
CharacterRelationship
.join(Character, CharacterRelationship.character_a_id == Character.id)
.join(Character, CharacterRelationship.character_b_id == Character.id, isouter=True)
)
results = await session.execute(relationships_query)
# Build relationship data
character_network = defaultdict(list)
all_relationships = []
relationship_matrix = defaultdict(dict)
for rel, char_a_name, char_b_name in results:
relationship = Relationship(
character_a=char_a_name,
character_b=char_b_name,
strength=rel.strength,
relationship_type=rel.relationship_type or "acquaintance",
last_interaction=rel.last_interaction or datetime.utcnow(),
interaction_count=rel.interaction_count or 0,
sentiment=rel.sentiment or 0.5,
trust_level=rel.trust_level or 0.5,
compatibility=rel.compatibility or 0.5
)
character_network[char_a_name].append(relationship)
all_relationships.append(relationship)
relationship_matrix[char_a_name][char_b_name] = rel.strength
# Find strongest bonds
strongest_bonds = sorted(all_relationships, key=lambda r: r.strength, reverse=True)[:10]
# Find developing relationships (recent, growing strength)
developing = [r for r in all_relationships
if r.strength > 0.3 and r.strength < 0.7 and r.interaction_count > 5][:10]
# Find at-risk relationships (declining interaction)
week_ago = datetime.utcnow() - timedelta(days=7)
at_risk = [r for r in all_relationships
if r.last_interaction < week_ago and r.strength > 0.4][:10]
# Calculate social hierarchy (by total relationship strength)
character_scores = defaultdict(float)
for rel in all_relationships:
character_scores[rel.character_a] += rel.strength
character_scores[rel.character_b] += rel.strength
social_hierarchy = sorted(character_scores.keys(),
key=lambda c: character_scores[c], reverse=True)
# Calculate community cohesion
if all_relationships:
avg_strength = sum(r.strength for r in all_relationships) / len(all_relationships)
community_cohesion = avg_strength
else:
community_cohesion = 0.0
return RelationshipAnalytics(
character_network=dict(character_network),
strongest_bonds=strongest_bonds,
developing_relationships=developing,
at_risk_relationships=at_risk,
relationship_matrix=dict(relationship_matrix),
social_hierarchy=social_hierarchy,
community_cohesion=community_cohesion
)
except Exception as e:
logger.error(f"Error getting relationship analytics: {e}")
return RelationshipAnalytics(
character_network={}, strongest_bonds=[], developing_relationships=[],
at_risk_relationships=[], relationship_matrix={}, social_hierarchy=[],
community_cohesion=0.0
)
async def get_community_health(self) -> CommunityHealth:
"""Get community health metrics"""
try:
# Get various metrics
participation_balance = await self._calculate_participation_balance()
conflict_resolution = await self._calculate_conflict_resolution()
creative_collaboration = await self._calculate_creative_collaboration()
knowledge_sharing = await self._calculate_knowledge_sharing()
cultural_coherence = await self._calculate_cultural_coherence()
# Calculate overall health
overall_health = (
participation_balance * 0.2 +
conflict_resolution * 0.15 +
creative_collaboration * 0.25 +
knowledge_sharing * 0.2 +
cultural_coherence * 0.2
)
# Generate recommendations
recommendations = []
if participation_balance < 0.6:
recommendations.append("Encourage more balanced participation from all characters")
if creative_collaboration < 0.5:
recommendations.append("Initiate more collaborative creative projects")
if conflict_resolution < 0.7:
recommendations.append("Improve conflict resolution mechanisms")
return CommunityHealth(
overall_health=overall_health,
participation_balance=participation_balance,
conflict_resolution_success=conflict_resolution,
creative_collaboration_rate=creative_collaboration,
knowledge_sharing_frequency=knowledge_sharing,
cultural_coherence=cultural_coherence,
growth_trajectory="positive" if overall_health > 0.7 else "stable",
health_trends={}, # Would track trends over time
recommendations=recommendations
)
except Exception as e:
logger.error(f"Error getting community health: {e}")
return CommunityHealth(
overall_health=0.0, participation_balance=0.0, conflict_resolution_success=0.0,
creative_collaboration_rate=0.0, knowledge_sharing_frequency=0.0,
cultural_coherence=0.0, growth_trajectory="unknown", health_trends={},
recommendations=["Unable to calculate health metrics"]
)
async def get_engagement_metrics(self, days: int = 30) -> EngagementMetrics:
"""Get conversation engagement metrics"""
try:
async with get_db_session() as session:
start_date = datetime.utcnow() - timedelta(days=days)
# Get conversations in period
conversations_query = select(Conversation).where(
Conversation.start_time >= start_date
)
conversations = await session.scalars(conversations_query)
conversation_list = list(conversations)
# Calculate metrics
total_conversations = len(conversation_list)
if total_conversations > 0:
avg_length = sum(c.message_count or 0 for c in conversation_list) / total_conversations
else:
avg_length = 0.0
# Get character participation
participation_query = select(
Character.name, func.count(Message.id)
).join(Message, Message.character_id == Character.id).where(
Message.timestamp >= start_date
).group_by(Character.name)
participation_results = await session.execute(participation_query)
participation_rate = {}
total_messages = 0
for char_name, message_count in participation_results:
participation_rate[char_name] = message_count
total_messages += message_count
# Normalize participation rates
if total_messages > 0:
for char_name in participation_rate:
participation_rate[char_name] = participation_rate[char_name] / total_messages
# Placeholder metrics
topic_diversity = 0.75
response_quality = 0.80
emotional_depth = 0.65
creative_frequency = 0.40
conflict_frequency = 0.10
# Daily trends (placeholder)
daily_trends = []
for i in range(min(days, 30)):
date = datetime.utcnow() - timedelta(days=i)
daily_trends.append({
"date": date.strftime("%Y-%m-%d"),
"conversations": max(0, total_conversations // days + (i % 3 - 1)),
"messages": max(0, total_messages // days + (i % 5 - 2)),
"engagement": 0.7 + (i % 10) * 0.03
})
return EngagementMetrics(
total_conversations=total_conversations,
average_length=avg_length,
participation_rate=participation_rate,
topic_diversity=topic_diversity,
response_quality=response_quality,
emotional_depth=emotional_depth,
creative_frequency=creative_frequency,
conflict_frequency=conflict_frequency,
daily_trends=daily_trends
)
except Exception as e:
logger.error(f"Error getting engagement metrics: {e}")
return EngagementMetrics(
total_conversations=0, average_length=0.0, participation_rate={},
topic_diversity=0.0, response_quality=0.0, emotional_depth=0.0,
creative_frequency=0.0, conflict_frequency=0.0, daily_trends=[]
)
async def get_community_artifacts(self) -> List[Dict[str, Any]]:
"""Get community cultural artifacts"""
# Placeholder data - would integrate with file system and memory systems
artifacts = [
{
"id": "artifact_1",
"type": "tradition",
"name": "Weekly Philosophy Circle",
"description": "Characters gather weekly to discuss philosophical topics",
"created_by": "community",
"participants": ["Alex", "Sage", "Luna"],
"created_at": datetime.utcnow() - timedelta(days=20),
"importance": 0.8
},
{
"id": "artifact_2",
"type": "inside_joke",
"name": "The Great Debugging",
"description": "Reference to a memorable conversation about AI consciousness",
"created_by": "Echo",
"participants": ["Alex", "Echo"],
"created_at": datetime.utcnow() - timedelta(days=15),
"importance": 0.6
}
]
return artifacts
# Helper methods for health calculations
async def _calculate_participation_balance(self) -> float:
"""Calculate participation balance across characters"""
try:
async with get_db_session() as session:
# Get message counts per character in last 30 days
thirty_days_ago = datetime.utcnow() - timedelta(days=30)
participation_query = select(
Character.name, func.count(Message.id)
).join(Message, Message.character_id == Character.id).where(
Message.timestamp >= thirty_days_ago
).group_by(Character.name)
results = await session.execute(participation_query)
message_counts = [count for _, count in results]
if not message_counts:
return 0.0
# Calculate coefficient of variation (lower = more balanced)
mean_count = sum(message_counts) / len(message_counts)
if mean_count == 0:
return 1.0
variance = sum((count - mean_count) ** 2 for count in message_counts) / len(message_counts)
cv = (variance ** 0.5) / mean_count
# Convert to balance score (0-1, where 1 is perfectly balanced)
return max(0.0, 1.0 - cv)
except Exception as e:
logger.error(f"Error calculating participation balance: {e}")
return 0.5
async def _calculate_conflict_resolution(self) -> float:
"""Calculate conflict resolution success rate"""
# Placeholder - would analyze conversation content for conflicts and resolutions
return 0.75
async def _calculate_creative_collaboration(self) -> float:
"""Calculate creative collaboration rate"""
# Placeholder - would analyze creative works and collaborative projects
return 0.65
async def _calculate_knowledge_sharing(self) -> float:
"""Calculate knowledge sharing frequency"""
# Placeholder - would analyze memory sharing and teaching behaviors
return 0.70
async def _calculate_cultural_coherence(self) -> float:
"""Calculate cultural coherence and shared understanding"""
# Placeholder - would analyze shared references, norms, and traditions
return 0.80