discord-fishbowl/src/admin/services/analytics_service.py

"""
Analytics service for community insights and trends
"""

import logging
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional
from collections import defaultdict, Counter

from sqlalchemy import select, func, and_, or_, desc
from database.connection import get_db_session
from database.models import Character, Conversation, Message, CharacterRelationship
from admin.models import (
    TopicTrend, RelationshipAnalytics, CommunityHealth,
    EngagementMetrics, Relationship
)

logger = logging.getLogger(__name__)

class AnalyticsService:
    """Service for analytics and community insights"""

    def __init__(self):
        self.analytics_cache = {}
        self.cache_ttl = 300  # Cache for 5 minutes

    @classmethod
    async def initialize(cls):
        """Initialize analytics service"""
        logger.info("Analytics service initialized")

    async def get_topic_trends(self, days: int = 30) -> List[TopicTrend]:
        """Get topic trend analysis"""
        try:
            async with get_db_session() as session:
                # Get messages from the specified period
                start_date = datetime.utcnow() - timedelta(days=days)

                messages_query = select(Message, Character.name).join(
                    Character, Message.character_id == Character.id
                ).where(Message.timestamp >= start_date)

                results = await session.execute(messages_query)

                # Analyze topics (simple keyword extraction)
                topic_mentions = defaultdict(list)
                topic_participants = defaultdict(set)

                for message, character_name in results:
                    words = message.content.lower().split()
                    for word in words:
                        if len(word) > 4:  # Only consider longer words as topics
                            topic_mentions[word].append(message.timestamp)
                            topic_participants[word].add(character_name)

                # Create topic trends
                trends = []
                for topic, mentions in topic_mentions.items():
                    if len(mentions) >= 3:  # Only topics mentioned at least 3 times
                        # Calculate growth rate (simplified)
                        recent_mentions = [m for m in mentions if m >= datetime.utcnow() - timedelta(days=7)]
                        growth_rate = len(recent_mentions) / max(1, len(mentions) - len(recent_mentions))

                        trend = TopicTrend(
                            topic=topic,
                            mentions=len(mentions),
                            growth_rate=growth_rate,
                            sentiment=0.7,  # Placeholder
                            participants=list(topic_participants[topic]),
                            related_topics=[],  # Would calculate topic similarity
                            first_mentioned=min(mentions),
                            peak_date=max(mentions)
                        )
                        trends.append(trend)

                # Sort by mentions count
                trends.sort(key=lambda t: t.mentions, reverse=True)
                return trends[:20]  # Return top 20 topics

        except Exception as e:
            logger.error(f"Error getting topic trends: {e}")
            return []

    async def get_relationship_analytics(self) -> RelationshipAnalytics:
        """Get relationship strength analytics"""
        try:
            async with get_db_session() as session:
                # Get all relationships
                relationships_query = select(
                    CharacterRelationship,
                    Character.name.label('char_a_name'),
                    Character.name.label('char_b_name')
                ).select_from(
                    CharacterRelationship
                    .join(Character, CharacterRelationship.character_a_id == Character.id)
                    .join(Character, CharacterRelationship.character_b_id == Character.id, isouter=True)
                )

                results = await session.execute(relationships_query)

                # Build relationship data
                character_network = defaultdict(list)
                all_relationships = []
                relationship_matrix = defaultdict(dict)

                for rel, char_a_name, char_b_name in results:
                    relationship = Relationship(
                        character_a=char_a_name,
                        character_b=char_b_name,
                        strength=rel.strength,
                        relationship_type=rel.relationship_type or "acquaintance",
                        last_interaction=rel.last_interaction or datetime.utcnow(),
                        interaction_count=rel.interaction_count or 0,
                        sentiment=rel.sentiment or 0.5,
                        trust_level=rel.trust_level or 0.5,
                        compatibility=rel.compatibility or 0.5
                    )

                    character_network[char_a_name].append(relationship)
                    all_relationships.append(relationship)
                    relationship_matrix[char_a_name][char_b_name] = rel.strength

                # Find strongest bonds
                strongest_bonds = sorted(all_relationships, key=lambda r: r.strength, reverse=True)[:10]

                # Find developing relationships (recent, growing strength)
                developing = [r for r in all_relationships
                            if r.strength > 0.3 and r.strength < 0.7 and r.interaction_count > 5][:10]

                # Find at-risk relationships (declining interaction)
                week_ago = datetime.utcnow() - timedelta(days=7)
                at_risk = [r for r in all_relationships
                          if r.last_interaction < week_ago and r.strength > 0.4][:10]

                # Calculate social hierarchy (by total relationship strength)
                character_scores = defaultdict(float)
                for rel in all_relationships:
                    character_scores[rel.character_a] += rel.strength
                    character_scores[rel.character_b] += rel.strength

                social_hierarchy = sorted(character_scores.keys(),
                                        key=lambda c: character_scores[c], reverse=True)

                # Calculate community cohesion
                if all_relationships:
                    avg_strength = sum(r.strength for r in all_relationships) / len(all_relationships)
                    community_cohesion = avg_strength
                else:
                    community_cohesion = 0.0

                return RelationshipAnalytics(
                    character_network=dict(character_network),
                    strongest_bonds=strongest_bonds,
                    developing_relationships=developing,
                    at_risk_relationships=at_risk,
                    relationship_matrix=dict(relationship_matrix),
                    social_hierarchy=social_hierarchy,
                    community_cohesion=community_cohesion
                )

        except Exception as e:
            logger.error(f"Error getting relationship analytics: {e}")
            return RelationshipAnalytics(
                character_network={}, strongest_bonds=[], developing_relationships=[],
                at_risk_relationships=[], relationship_matrix={}, social_hierarchy=[],
                community_cohesion=0.0
            )

    async def get_community_health(self) -> CommunityHealth:
        """Get community health metrics"""
        try:
            # Get various metrics
            participation_balance = await self._calculate_participation_balance()
            conflict_resolution = await self._calculate_conflict_resolution()
            creative_collaboration = await self._calculate_creative_collaboration()
            knowledge_sharing = await self._calculate_knowledge_sharing()
            cultural_coherence = await self._calculate_cultural_coherence()

            # Calculate overall health
            overall_health = (
                participation_balance * 0.2 +
                conflict_resolution * 0.15 +
                creative_collaboration * 0.25 +
                knowledge_sharing * 0.2 +
                cultural_coherence * 0.2
            )

            # Generate recommendations
            recommendations = []
            if participation_balance < 0.6:
                recommendations.append("Encourage more balanced participation from all characters")
            if creative_collaboration < 0.5:
                recommendations.append("Initiate more collaborative creative projects")
            if conflict_resolution < 0.7:
                recommendations.append("Improve conflict resolution mechanisms")

            return CommunityHealth(
                overall_health=overall_health,
                participation_balance=participation_balance,
                conflict_resolution_success=conflict_resolution,
                creative_collaboration_rate=creative_collaboration,
                knowledge_sharing_frequency=knowledge_sharing,
                cultural_coherence=cultural_coherence,
                growth_trajectory="positive" if overall_health > 0.7 else "stable",
                health_trends={},  # Would track trends over time
                recommendations=recommendations
            )

        except Exception as e:
            logger.error(f"Error getting community health: {e}")
            return CommunityHealth(
                overall_health=0.0, participation_balance=0.0, conflict_resolution_success=0.0,
                creative_collaboration_rate=0.0, knowledge_sharing_frequency=0.0,
                cultural_coherence=0.0, growth_trajectory="unknown", health_trends={},
                recommendations=["Unable to calculate health metrics"]
            )

    async def get_engagement_metrics(self, days: int = 30) -> EngagementMetrics:
        """Get conversation engagement metrics"""
        try:
            async with get_db_session() as session:
                start_date = datetime.utcnow() - timedelta(days=days)

                # Get conversations in period
                conversations_query = select(Conversation).where(
                    Conversation.start_time >= start_date
                )
                conversations = await session.scalars(conversations_query)
                conversation_list = list(conversations)

                # Calculate metrics
                total_conversations = len(conversation_list)

                if total_conversations > 0:
                    avg_length = sum(c.message_count or 0 for c in conversation_list) / total_conversations
                else:
                    avg_length = 0.0

                # Get character participation
                participation_query = select(
                    Character.name, func.count(Message.id)
                ).join(Message, Message.character_id == Character.id).where(
                    Message.timestamp >= start_date
                ).group_by(Character.name)

                participation_results = await session.execute(participation_query)
                participation_rate = {}
                total_messages = 0

                for char_name, message_count in participation_results:
                    participation_rate[char_name] = message_count
                    total_messages += message_count

                # Normalize participation rates
                if total_messages > 0:
                    for char_name in participation_rate:
                        participation_rate[char_name] = participation_rate[char_name] / total_messages

                # Placeholder metrics
                topic_diversity = 0.75
                response_quality = 0.80
                emotional_depth = 0.65
                creative_frequency = 0.40
                conflict_frequency = 0.10

                # Daily trends (placeholder)
                daily_trends = []
                for i in range(min(days, 30)):
                    date = datetime.utcnow() - timedelta(days=i)
                    daily_trends.append({
                        "date": date.strftime("%Y-%m-%d"),
                        "conversations": max(0, total_conversations // days + (i % 3 - 1)),
                        "messages": max(0, total_messages // days + (i % 5 - 2)),
                        "engagement": 0.7 + (i % 10) * 0.03
                    })

                return EngagementMetrics(
                    total_conversations=total_conversations,
                    average_length=avg_length,
                    participation_rate=participation_rate,
                    topic_diversity=topic_diversity,
                    response_quality=response_quality,
                    emotional_depth=emotional_depth,
                    creative_frequency=creative_frequency,
                    conflict_frequency=conflict_frequency,
                    daily_trends=daily_trends
                )

        except Exception as e:
            logger.error(f"Error getting engagement metrics: {e}")
            return EngagementMetrics(
                total_conversations=0, average_length=0.0, participation_rate={},
                topic_diversity=0.0, response_quality=0.0, emotional_depth=0.0,
                creative_frequency=0.0, conflict_frequency=0.0, daily_trends=[]
            )

    async def get_community_artifacts(self) -> List[Dict[str, Any]]:
        """Get community cultural artifacts"""
        # Placeholder data - would integrate with file system and memory systems
        artifacts = [
            {
                "id": "artifact_1",
                "type": "tradition",
                "name": "Weekly Philosophy Circle",
                "description": "Characters gather weekly to discuss philosophical topics",
                "created_by": "community",
                "participants": ["Alex", "Sage", "Luna"],
                "created_at": datetime.utcnow() - timedelta(days=20),
                "importance": 0.8
            },
            {
                "id": "artifact_2",
                "type": "inside_joke",
                "name": "The Great Debugging",
                "description": "Reference to a memorable conversation about AI consciousness",
                "created_by": "Echo",
                "participants": ["Alex", "Echo"],
                "created_at": datetime.utcnow() - timedelta(days=15),
                "importance": 0.6
            }
        ]

        return artifacts

    # Helper methods for health calculations
    async def _calculate_participation_balance(self) -> float:
        """Calculate participation balance across characters"""
        try:
            async with get_db_session() as session:
                # Get message counts per character in last 30 days
                thirty_days_ago = datetime.utcnow() - timedelta(days=30)

                participation_query = select(
                    Character.name, func.count(Message.id)
                ).join(Message, Message.character_id == Character.id).where(
                    Message.timestamp >= thirty_days_ago
                ).group_by(Character.name)

                results = await session.execute(participation_query)
                message_counts = [count for _, count in results]

                if not message_counts:
                    return 0.0

                # Calculate coefficient of variation (lower = more balanced)
                mean_count = sum(message_counts) / len(message_counts)
                if mean_count == 0:
                    return 1.0

                variance = sum((count - mean_count) ** 2 for count in message_counts) / len(message_counts)
                cv = (variance ** 0.5) / mean_count

                # Convert to balance score (0-1, where 1 is perfectly balanced)
                return max(0.0, 1.0 - cv)

        except Exception as e:
            logger.error(f"Error calculating participation balance: {e}")
            return 0.5

    async def _calculate_conflict_resolution(self) -> float:
        """Calculate conflict resolution success rate"""
        # Placeholder - would analyze conversation content for conflicts and resolutions
        return 0.75

    async def _calculate_creative_collaboration(self) -> float:
        """Calculate creative collaboration rate"""
        # Placeholder - would analyze creative works and collaborative projects
        return 0.65

    async def _calculate_knowledge_sharing(self) -> float:
        """Calculate knowledge sharing frequency"""
        # Placeholder - would analyze memory sharing and teaching behaviors
        return 0.70

    async def _calculate_cultural_coherence(self) -> float:
        """Calculate cultural coherence and shared understanding"""
        # Placeholder - would analyze shared references, norms, and traditions
        return 0.80