""" Analytics service for community insights and trends """ import logging from datetime import datetime, timedelta from typing import Dict, List, Any, Optional from collections import defaultdict, Counter from sqlalchemy import select, func, and_, or_, desc from database.connection import get_db_session from database.models import Character, Conversation, Message, CharacterRelationship from admin.models import ( TopicTrend, RelationshipAnalytics, CommunityHealth, EngagementMetrics, Relationship ) logger = logging.getLogger(__name__) class AnalyticsService: """Service for analytics and community insights""" def __init__(self): self.analytics_cache = {} self.cache_ttl = 300 # Cache for 5 minutes @classmethod async def initialize(cls): """Initialize analytics service""" logger.info("Analytics service initialized") async def get_topic_trends(self, days: int = 30) -> List[TopicTrend]: """Get topic trend analysis""" try: async with get_db_session() as session: # Get messages from the specified period start_date = datetime.utcnow() - timedelta(days=days) messages_query = select(Message, Character.name).join( Character, Message.character_id == Character.id ).where(Message.timestamp >= start_date) results = await session.execute(messages_query) # Analyze topics (simple keyword extraction) topic_mentions = defaultdict(list) topic_participants = defaultdict(set) for message, character_name in results: words = message.content.lower().split() for word in words: if len(word) > 4: # Only consider longer words as topics topic_mentions[word].append(message.timestamp) topic_participants[word].add(character_name) # Create topic trends trends = [] for topic, mentions in topic_mentions.items(): if len(mentions) >= 3: # Only topics mentioned at least 3 times # Calculate growth rate (simplified) recent_mentions = [m for m in mentions if m >= datetime.utcnow() - timedelta(days=7)] growth_rate = len(recent_mentions) / max(1, len(mentions) - len(recent_mentions)) trend = TopicTrend( topic=topic, mentions=len(mentions), growth_rate=growth_rate, sentiment=0.7, # Placeholder participants=list(topic_participants[topic]), related_topics=[], # Would calculate topic similarity first_mentioned=min(mentions), peak_date=max(mentions) ) trends.append(trend) # Sort by mentions count trends.sort(key=lambda t: t.mentions, reverse=True) return trends[:20] # Return top 20 topics except Exception as e: logger.error(f"Error getting topic trends: {e}") return [] async def get_relationship_analytics(self) -> RelationshipAnalytics: """Get relationship strength analytics""" try: async with get_db_session() as session: # Get all relationships relationships_query = select( CharacterRelationship, Character.name.label('char_a_name'), Character.name.label('char_b_name') ).select_from( CharacterRelationship .join(Character, CharacterRelationship.character_a_id == Character.id) .join(Character, CharacterRelationship.character_b_id == Character.id, isouter=True) ) results = await session.execute(relationships_query) # Build relationship data character_network = defaultdict(list) all_relationships = [] relationship_matrix = defaultdict(dict) for rel, char_a_name, char_b_name in results: relationship = Relationship( character_a=char_a_name, character_b=char_b_name, strength=rel.strength, relationship_type=rel.relationship_type or "acquaintance", last_interaction=rel.last_interaction or datetime.utcnow(), interaction_count=rel.interaction_count or 0, sentiment=rel.sentiment or 0.5, trust_level=rel.trust_level or 0.5, compatibility=rel.compatibility or 0.5 ) character_network[char_a_name].append(relationship) all_relationships.append(relationship) relationship_matrix[char_a_name][char_b_name] = rel.strength # Find strongest bonds strongest_bonds = sorted(all_relationships, key=lambda r: r.strength, reverse=True)[:10] # Find developing relationships (recent, growing strength) developing = [r for r in all_relationships if r.strength > 0.3 and r.strength < 0.7 and r.interaction_count > 5][:10] # Find at-risk relationships (declining interaction) week_ago = datetime.utcnow() - timedelta(days=7) at_risk = [r for r in all_relationships if r.last_interaction < week_ago and r.strength > 0.4][:10] # Calculate social hierarchy (by total relationship strength) character_scores = defaultdict(float) for rel in all_relationships: character_scores[rel.character_a] += rel.strength character_scores[rel.character_b] += rel.strength social_hierarchy = sorted(character_scores.keys(), key=lambda c: character_scores[c], reverse=True) # Calculate community cohesion if all_relationships: avg_strength = sum(r.strength for r in all_relationships) / len(all_relationships) community_cohesion = avg_strength else: community_cohesion = 0.0 return RelationshipAnalytics( character_network=dict(character_network), strongest_bonds=strongest_bonds, developing_relationships=developing, at_risk_relationships=at_risk, relationship_matrix=dict(relationship_matrix), social_hierarchy=social_hierarchy, community_cohesion=community_cohesion ) except Exception as e: logger.error(f"Error getting relationship analytics: {e}") return RelationshipAnalytics( character_network={}, strongest_bonds=[], developing_relationships=[], at_risk_relationships=[], relationship_matrix={}, social_hierarchy=[], community_cohesion=0.0 ) async def get_community_health(self) -> CommunityHealth: """Get community health metrics""" try: # Get various metrics participation_balance = await self._calculate_participation_balance() conflict_resolution = await self._calculate_conflict_resolution() creative_collaboration = await self._calculate_creative_collaboration() knowledge_sharing = await self._calculate_knowledge_sharing() cultural_coherence = await self._calculate_cultural_coherence() # Calculate overall health overall_health = ( participation_balance * 0.2 + conflict_resolution * 0.15 + creative_collaboration * 0.25 + knowledge_sharing * 0.2 + cultural_coherence * 0.2 ) # Generate recommendations recommendations = [] if participation_balance < 0.6: recommendations.append("Encourage more balanced participation from all characters") if creative_collaboration < 0.5: recommendations.append("Initiate more collaborative creative projects") if conflict_resolution < 0.7: recommendations.append("Improve conflict resolution mechanisms") return CommunityHealth( overall_health=overall_health, participation_balance=participation_balance, conflict_resolution_success=conflict_resolution, creative_collaboration_rate=creative_collaboration, knowledge_sharing_frequency=knowledge_sharing, cultural_coherence=cultural_coherence, growth_trajectory="positive" if overall_health > 0.7 else "stable", health_trends={}, # Would track trends over time recommendations=recommendations ) except Exception as e: logger.error(f"Error getting community health: {e}") return CommunityHealth( overall_health=0.0, participation_balance=0.0, conflict_resolution_success=0.0, creative_collaboration_rate=0.0, knowledge_sharing_frequency=0.0, cultural_coherence=0.0, growth_trajectory="unknown", health_trends={}, recommendations=["Unable to calculate health metrics"] ) async def get_engagement_metrics(self, days: int = 30) -> EngagementMetrics: """Get conversation engagement metrics""" try: async with get_db_session() as session: start_date = datetime.utcnow() - timedelta(days=days) # Get conversations in period conversations_query = select(Conversation).where( Conversation.start_time >= start_date ) conversations = await session.scalars(conversations_query) conversation_list = list(conversations) # Calculate metrics total_conversations = len(conversation_list) if total_conversations > 0: avg_length = sum(c.message_count or 0 for c in conversation_list) / total_conversations else: avg_length = 0.0 # Get character participation participation_query = select( Character.name, func.count(Message.id) ).join(Message, Message.character_id == Character.id).where( Message.timestamp >= start_date ).group_by(Character.name) participation_results = await session.execute(participation_query) participation_rate = {} total_messages = 0 for char_name, message_count in participation_results: participation_rate[char_name] = message_count total_messages += message_count # Normalize participation rates if total_messages > 0: for char_name in participation_rate: participation_rate[char_name] = participation_rate[char_name] / total_messages # Placeholder metrics topic_diversity = 0.75 response_quality = 0.80 emotional_depth = 0.65 creative_frequency = 0.40 conflict_frequency = 0.10 # Daily trends (placeholder) daily_trends = [] for i in range(min(days, 30)): date = datetime.utcnow() - timedelta(days=i) daily_trends.append({ "date": date.strftime("%Y-%m-%d"), "conversations": max(0, total_conversations // days + (i % 3 - 1)), "messages": max(0, total_messages // days + (i % 5 - 2)), "engagement": 0.7 + (i % 10) * 0.03 }) return EngagementMetrics( total_conversations=total_conversations, average_length=avg_length, participation_rate=participation_rate, topic_diversity=topic_diversity, response_quality=response_quality, emotional_depth=emotional_depth, creative_frequency=creative_frequency, conflict_frequency=conflict_frequency, daily_trends=daily_trends ) except Exception as e: logger.error(f"Error getting engagement metrics: {e}") return EngagementMetrics( total_conversations=0, average_length=0.0, participation_rate={}, topic_diversity=0.0, response_quality=0.0, emotional_depth=0.0, creative_frequency=0.0, conflict_frequency=0.0, daily_trends=[] ) async def get_community_artifacts(self) -> List[Dict[str, Any]]: """Get community cultural artifacts""" # Placeholder data - would integrate with file system and memory systems artifacts = [ { "id": "artifact_1", "type": "tradition", "name": "Weekly Philosophy Circle", "description": "Characters gather weekly to discuss philosophical topics", "created_by": "community", "participants": ["Alex", "Sage", "Luna"], "created_at": datetime.utcnow() - timedelta(days=20), "importance": 0.8 }, { "id": "artifact_2", "type": "inside_joke", "name": "The Great Debugging", "description": "Reference to a memorable conversation about AI consciousness", "created_by": "Echo", "participants": ["Alex", "Echo"], "created_at": datetime.utcnow() - timedelta(days=15), "importance": 0.6 } ] return artifacts # Helper methods for health calculations async def _calculate_participation_balance(self) -> float: """Calculate participation balance across characters""" try: async with get_db_session() as session: # Get message counts per character in last 30 days thirty_days_ago = datetime.utcnow() - timedelta(days=30) participation_query = select( Character.name, func.count(Message.id) ).join(Message, Message.character_id == Character.id).where( Message.timestamp >= thirty_days_ago ).group_by(Character.name) results = await session.execute(participation_query) message_counts = [count for _, count in results] if not message_counts: return 0.0 # Calculate coefficient of variation (lower = more balanced) mean_count = sum(message_counts) / len(message_counts) if mean_count == 0: return 1.0 variance = sum((count - mean_count) ** 2 for count in message_counts) / len(message_counts) cv = (variance ** 0.5) / mean_count # Convert to balance score (0-1, where 1 is perfectly balanced) return max(0.0, 1.0 - cv) except Exception as e: logger.error(f"Error calculating participation balance: {e}") return 0.5 async def _calculate_conflict_resolution(self) -> float: """Calculate conflict resolution success rate""" # Placeholder - would analyze conversation content for conflicts and resolutions return 0.75 async def _calculate_creative_collaboration(self) -> float: """Calculate creative collaboration rate""" # Placeholder - would analyze creative works and collaborative projects return 0.65 async def _calculate_knowledge_sharing(self) -> float: """Calculate knowledge sharing frequency""" # Placeholder - would analyze memory sharing and teaching behaviors return 0.70 async def _calculate_cultural_coherence(self) -> float: """Calculate cultural coherence and shared understanding""" # Placeholder - would analyze shared references, norms, and traditions return 0.80