Initial implementation of autonomous Discord LLM fishbowl
Core Features: - Full autonomous AI character ecosystem with multi-personality support - Advanced RAG system with personal, community, and creative memory layers - MCP integration for character self-modification and file system access - PostgreSQL database with comprehensive character relationship tracking - Redis caching and ChromaDB vector storage for semantic memory retrieval - Dynamic personality evolution based on interactions and self-reflection - Community knowledge management with tradition and norm identification - Sophisticated conversation engine with natural scheduling and topic management - Docker containerization and production-ready deployment configuration Architecture: - Multi-layer vector databases for personal, community, and creative knowledge - Character file systems with personal and shared digital spaces - Autonomous self-modification with safety validation and audit trails - Memory importance scoring with time-based decay and consolidation - Community health monitoring and cultural evolution tracking - RAG-powered conversation context and relationship optimization Characters can: - Develop authentic personalities through experience-based learning - Create and build upon original creative works and philosophical insights - Form complex relationships with memory of past interactions - Modify their own personality traits through self-reflection cycles - Contribute to and learn from shared community knowledge - Manage personal digital spaces with diaries, creative works, and reflections - Engage in collaborative projects and community decision-making System supports indefinite autonomous operation with continuous character development, community culture evolution, and creative collaboration.
This commit is contained in:
519
src/rag/vector_store.py
Normal file
519
src/rag/vector_store.py
Normal file
@@ -0,0 +1,519 @@
|
||||
import asyncio
|
||||
import chromadb
|
||||
import numpy as np
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
import json
|
||||
import hashlib
|
||||
from dataclasses import dataclass, asdict
|
||||
from enum import Enum
|
||||
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from ..utils.logging import log_error_with_context, log_character_action
|
||||
from ..utils.config import get_settings
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class MemoryType(Enum):
|
||||
PERSONAL = "personal"
|
||||
RELATIONSHIP = "relationship"
|
||||
CREATIVE = "creative"
|
||||
COMMUNITY = "community"
|
||||
REFLECTION = "reflection"
|
||||
EXPERIENCE = "experience"
|
||||
|
||||
@dataclass
|
||||
class VectorMemory:
|
||||
id: str
|
||||
content: str
|
||||
memory_type: MemoryType
|
||||
character_name: str
|
||||
timestamp: datetime
|
||||
importance: float
|
||||
metadata: Dict[str, Any]
|
||||
embedding: Optional[List[float]] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"id": self.id,
|
||||
"content": self.content,
|
||||
"memory_type": self.memory_type.value,
|
||||
"character_name": self.character_name,
|
||||
"timestamp": self.timestamp.isoformat(),
|
||||
"importance": self.importance,
|
||||
"metadata": self.metadata
|
||||
}
|
||||
|
||||
class VectorStoreManager:
|
||||
"""Manages multi-layer vector databases for character memories"""
|
||||
|
||||
def __init__(self, data_path: str = "./data/vector_stores"):
|
||||
self.data_path = Path(data_path)
|
||||
self.data_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialize embedding model
|
||||
self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
||||
|
||||
# Initialize ChromaDB client
|
||||
self.chroma_client = chromadb.PersistentClient(path=str(self.data_path))
|
||||
|
||||
# Collection references
|
||||
self.personal_collections: Dict[str, chromadb.Collection] = {}
|
||||
self.community_collection = None
|
||||
self.creative_collections: Dict[str, chromadb.Collection] = {}
|
||||
|
||||
# Memory importance decay
|
||||
self.importance_decay_rate = 0.95
|
||||
self.consolidation_threshold = 0.8
|
||||
|
||||
async def initialize(self, character_names: List[str]):
|
||||
"""Initialize collections for all characters"""
|
||||
try:
|
||||
# Initialize personal memory collections
|
||||
for character_name in character_names:
|
||||
collection_name = f"personal_{character_name.lower()}"
|
||||
self.personal_collections[character_name] = self.chroma_client.get_or_create_collection(
|
||||
name=collection_name,
|
||||
metadata={"type": "personal", "character": character_name}
|
||||
)
|
||||
|
||||
# Initialize creative collections
|
||||
creative_collection_name = f"creative_{character_name.lower()}"
|
||||
self.creative_collections[character_name] = self.chroma_client.get_or_create_collection(
|
||||
name=creative_collection_name,
|
||||
metadata={"type": "creative", "character": character_name}
|
||||
)
|
||||
|
||||
# Initialize community collection
|
||||
self.community_collection = self.chroma_client.get_or_create_collection(
|
||||
name="community_knowledge",
|
||||
metadata={"type": "community"}
|
||||
)
|
||||
|
||||
logger.info(f"Initialized vector stores for {len(character_names)} characters")
|
||||
|
||||
except Exception as e:
|
||||
log_error_with_context(e, {"component": "vector_store_init"})
|
||||
raise
|
||||
|
||||
async def store_memory(self, memory: VectorMemory) -> str:
|
||||
"""Store a memory in appropriate vector database"""
|
||||
try:
|
||||
# Generate embedding
|
||||
if not memory.embedding:
|
||||
memory.embedding = await self._generate_embedding(memory.content)
|
||||
|
||||
# Generate unique ID if not provided
|
||||
if not memory.id:
|
||||
memory.id = self._generate_memory_id(memory)
|
||||
|
||||
# Select appropriate collection
|
||||
collection = self._get_collection_for_memory(memory)
|
||||
|
||||
if not collection:
|
||||
raise ValueError(f"No collection found for memory type: {memory.memory_type}")
|
||||
|
||||
# Prepare metadata
|
||||
metadata = memory.metadata.copy()
|
||||
metadata.update({
|
||||
"character_name": memory.character_name,
|
||||
"timestamp": memory.timestamp.isoformat(),
|
||||
"importance": memory.importance,
|
||||
"memory_type": memory.memory_type.value
|
||||
})
|
||||
|
||||
# Store in collection
|
||||
collection.add(
|
||||
ids=[memory.id],
|
||||
embeddings=[memory.embedding],
|
||||
documents=[memory.content],
|
||||
metadatas=[metadata]
|
||||
)
|
||||
|
||||
log_character_action(
|
||||
memory.character_name,
|
||||
"stored_vector_memory",
|
||||
{"memory_type": memory.memory_type.value, "importance": memory.importance}
|
||||
)
|
||||
|
||||
return memory.id
|
||||
|
||||
except Exception as e:
|
||||
log_error_with_context(e, {
|
||||
"character": memory.character_name,
|
||||
"memory_type": memory.memory_type.value
|
||||
})
|
||||
raise
|
||||
|
||||
async def query_memories(self, character_name: str, query: str,
|
||||
memory_types: List[MemoryType] = None,
|
||||
limit: int = 10, min_importance: float = 0.0) -> List[VectorMemory]:
|
||||
"""Query character's memories using semantic search"""
|
||||
try:
|
||||
# Generate query embedding
|
||||
query_embedding = await self._generate_embedding(query)
|
||||
|
||||
# Determine which collections to search
|
||||
collections_to_search = []
|
||||
|
||||
if not memory_types:
|
||||
memory_types = [MemoryType.PERSONAL, MemoryType.RELATIONSHIP,
|
||||
MemoryType.EXPERIENCE, MemoryType.REFLECTION]
|
||||
|
||||
for memory_type in memory_types:
|
||||
collection = self._get_collection_for_type(character_name, memory_type)
|
||||
if collection:
|
||||
collections_to_search.append((collection, memory_type))
|
||||
|
||||
# Search each collection
|
||||
all_results = []
|
||||
|
||||
for collection, memory_type in collections_to_search:
|
||||
try:
|
||||
results = collection.query(
|
||||
query_embeddings=[query_embedding],
|
||||
n_results=limit,
|
||||
where={"character_name": character_name} if memory_type != MemoryType.COMMUNITY else None
|
||||
)
|
||||
|
||||
# Convert results to VectorMemory objects
|
||||
for i, (doc, metadata, distance) in enumerate(zip(
|
||||
results['documents'][0],
|
||||
results['metadatas'][0],
|
||||
results['distances'][0]
|
||||
)):
|
||||
if metadata.get('importance', 0) >= min_importance:
|
||||
memory = VectorMemory(
|
||||
id=results['ids'][0][i],
|
||||
content=doc,
|
||||
memory_type=MemoryType(metadata['memory_type']),
|
||||
character_name=metadata['character_name'],
|
||||
timestamp=datetime.fromisoformat(metadata['timestamp']),
|
||||
importance=metadata['importance'],
|
||||
metadata=metadata
|
||||
)
|
||||
memory.metadata['similarity_score'] = 1 - distance # Convert distance to similarity
|
||||
all_results.append(memory)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error querying collection {memory_type}: {e}")
|
||||
continue
|
||||
|
||||
# Sort by relevance (similarity + importance)
|
||||
all_results.sort(
|
||||
key=lambda m: m.metadata.get('similarity_score', 0) * 0.7 + m.importance * 0.3,
|
||||
reverse=True
|
||||
)
|
||||
|
||||
return all_results[:limit]
|
||||
|
||||
except Exception as e:
|
||||
log_error_with_context(e, {"character": character_name, "query": query})
|
||||
return []
|
||||
|
||||
async def query_community_knowledge(self, query: str, limit: int = 5) -> List[VectorMemory]:
|
||||
"""Query community knowledge base"""
|
||||
try:
|
||||
if not self.community_collection:
|
||||
return []
|
||||
|
||||
query_embedding = await self._generate_embedding(query)
|
||||
|
||||
results = self.community_collection.query(
|
||||
query_embeddings=[query_embedding],
|
||||
n_results=limit
|
||||
)
|
||||
|
||||
memories = []
|
||||
for i, (doc, metadata, distance) in enumerate(zip(
|
||||
results['documents'][0],
|
||||
results['metadatas'][0],
|
||||
results['distances'][0]
|
||||
)):
|
||||
memory = VectorMemory(
|
||||
id=results['ids'][0][i],
|
||||
content=doc,
|
||||
memory_type=MemoryType.COMMUNITY,
|
||||
character_name=metadata.get('character_name', 'community'),
|
||||
timestamp=datetime.fromisoformat(metadata['timestamp']),
|
||||
importance=metadata['importance'],
|
||||
metadata=metadata
|
||||
)
|
||||
memory.metadata['similarity_score'] = 1 - distance
|
||||
memories.append(memory)
|
||||
|
||||
return sorted(memories, key=lambda m: m.metadata.get('similarity_score', 0), reverse=True)
|
||||
|
||||
except Exception as e:
|
||||
log_error_with_context(e, {"query": query, "component": "community_knowledge"})
|
||||
return []
|
||||
|
||||
async def get_creative_knowledge(self, character_name: str, query: str, limit: int = 5) -> List[VectorMemory]:
|
||||
"""Query character's creative knowledge base"""
|
||||
try:
|
||||
if character_name not in self.creative_collections:
|
||||
return []
|
||||
|
||||
collection = self.creative_collections[character_name]
|
||||
query_embedding = await self._generate_embedding(query)
|
||||
|
||||
results = collection.query(
|
||||
query_embeddings=[query_embedding],
|
||||
n_results=limit
|
||||
)
|
||||
|
||||
memories = []
|
||||
for i, (doc, metadata, distance) in enumerate(zip(
|
||||
results['documents'][0],
|
||||
results['metadatas'][0],
|
||||
results['distances'][0]
|
||||
)):
|
||||
memory = VectorMemory(
|
||||
id=results['ids'][0][i],
|
||||
content=doc,
|
||||
memory_type=MemoryType.CREATIVE,
|
||||
character_name=character_name,
|
||||
timestamp=datetime.fromisoformat(metadata['timestamp']),
|
||||
importance=metadata['importance'],
|
||||
metadata=metadata
|
||||
)
|
||||
memory.metadata['similarity_score'] = 1 - distance
|
||||
memories.append(memory)
|
||||
|
||||
return sorted(memories, key=lambda m: m.metadata.get('similarity_score', 0), reverse=True)
|
||||
|
||||
except Exception as e:
|
||||
log_error_with_context(e, {"character": character_name, "query": query})
|
||||
return []
|
||||
|
||||
async def consolidate_memories(self, character_name: str) -> Dict[str, Any]:
|
||||
"""Consolidate similar memories to save space"""
|
||||
try:
|
||||
consolidated_count = 0
|
||||
|
||||
# Get all personal memories for character
|
||||
collection = self.personal_collections.get(character_name)
|
||||
if not collection:
|
||||
return {"consolidated_count": 0}
|
||||
|
||||
# Get all memories
|
||||
all_memories = collection.get()
|
||||
|
||||
if len(all_memories['ids']) < 10: # Not enough memories to consolidate
|
||||
return {"consolidated_count": 0}
|
||||
|
||||
# Find similar memory clusters
|
||||
clusters = await self._find_similar_clusters(all_memories)
|
||||
|
||||
# Consolidate each cluster
|
||||
for cluster in clusters:
|
||||
if len(cluster) >= 3: # Only consolidate if 3+ similar memories
|
||||
consolidated_memory = await self._create_consolidated_memory(cluster, character_name)
|
||||
|
||||
if consolidated_memory:
|
||||
# Store consolidated memory
|
||||
await self.store_memory(consolidated_memory)
|
||||
|
||||
# Remove original memories
|
||||
collection.delete(ids=[mem['id'] for mem in cluster])
|
||||
|
||||
consolidated_count += len(cluster) - 1
|
||||
|
||||
log_character_action(
|
||||
character_name,
|
||||
"consolidated_memories",
|
||||
{"consolidated_count": consolidated_count}
|
||||
)
|
||||
|
||||
return {"consolidated_count": consolidated_count}
|
||||
|
||||
except Exception as e:
|
||||
log_error_with_context(e, {"character": character_name})
|
||||
return {"consolidated_count": 0}
|
||||
|
||||
async def decay_memory_importance(self, character_name: str):
|
||||
"""Apply time-based decay to memory importance"""
|
||||
try:
|
||||
collection = self.personal_collections.get(character_name)
|
||||
if not collection:
|
||||
return
|
||||
|
||||
# Get all memories
|
||||
all_memories = collection.get(include=['metadatas'])
|
||||
|
||||
updates = []
|
||||
for memory_id, metadata in zip(all_memories['ids'], all_memories['metadatas']):
|
||||
# Calculate age in days
|
||||
timestamp = datetime.fromisoformat(metadata['timestamp'])
|
||||
age_days = (datetime.utcnow() - timestamp).days
|
||||
|
||||
# Apply decay
|
||||
current_importance = metadata['importance']
|
||||
decayed_importance = current_importance * (self.importance_decay_rate ** age_days)
|
||||
|
||||
if abs(decayed_importance - current_importance) > 0.01: # Only update if significant change
|
||||
metadata['importance'] = decayed_importance
|
||||
updates.append((memory_id, metadata))
|
||||
|
||||
# Update in batches
|
||||
if updates:
|
||||
for memory_id, metadata in updates:
|
||||
collection.update(
|
||||
ids=[memory_id],
|
||||
metadatas=[metadata]
|
||||
)
|
||||
|
||||
logger.info(f"Applied importance decay to {len(updates)} memories for {character_name}")
|
||||
|
||||
except Exception as e:
|
||||
log_error_with_context(e, {"character": character_name})
|
||||
|
||||
async def _generate_embedding(self, text: str) -> List[float]:
|
||||
"""Generate embedding for text"""
|
||||
try:
|
||||
# Use asyncio to avoid blocking
|
||||
loop = asyncio.get_event_loop()
|
||||
embedding = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: self.embedding_model.encode(text).tolist()
|
||||
)
|
||||
return embedding
|
||||
except Exception as e:
|
||||
log_error_with_context(e, {"text_length": len(text)})
|
||||
# Return zero embedding as fallback
|
||||
return [0.0] * 384 # MiniLM embedding size
|
||||
|
||||
def _get_collection_for_memory(self, memory: VectorMemory) -> Optional[chromadb.Collection]:
|
||||
"""Get appropriate collection for memory"""
|
||||
if memory.memory_type == MemoryType.COMMUNITY:
|
||||
return self.community_collection
|
||||
elif memory.memory_type == MemoryType.CREATIVE:
|
||||
return self.creative_collections.get(memory.character_name)
|
||||
else:
|
||||
return self.personal_collections.get(memory.character_name)
|
||||
|
||||
def _get_collection_for_type(self, character_name: str, memory_type: MemoryType) -> Optional[chromadb.Collection]:
|
||||
"""Get collection for specific memory type and character"""
|
||||
if memory_type == MemoryType.COMMUNITY:
|
||||
return self.community_collection
|
||||
elif memory_type == MemoryType.CREATIVE:
|
||||
return self.creative_collections.get(character_name)
|
||||
else:
|
||||
return self.personal_collections.get(character_name)
|
||||
|
||||
def _generate_memory_id(self, memory: VectorMemory) -> str:
|
||||
"""Generate unique ID for memory"""
|
||||
content_hash = hashlib.md5(memory.content.encode()).hexdigest()[:8]
|
||||
timestamp_str = memory.timestamp.strftime("%Y%m%d_%H%M%S")
|
||||
return f"{memory.character_name}_{memory.memory_type.value}_{timestamp_str}_{content_hash}"
|
||||
|
||||
async def _find_similar_clusters(self, memories: Dict[str, List]) -> List[List[Dict]]:
|
||||
"""Find clusters of similar memories for consolidation"""
|
||||
# This is a simplified clustering - in production you'd use proper clustering algorithms
|
||||
clusters = []
|
||||
processed = set()
|
||||
|
||||
for i, memory_id in enumerate(memories['ids']):
|
||||
if memory_id in processed:
|
||||
continue
|
||||
|
||||
cluster = [{'id': memory_id, 'content': memories['documents'][i], 'metadata': memories['metadatas'][i]}]
|
||||
processed.add(memory_id)
|
||||
|
||||
# Find similar memories (simplified similarity check)
|
||||
for j, other_id in enumerate(memories['ids'][i+1:], i+1):
|
||||
if other_id in processed:
|
||||
continue
|
||||
|
||||
# Simple similarity check based on content overlap
|
||||
content1 = memories['documents'][i].lower()
|
||||
content2 = memories['documents'][j].lower()
|
||||
|
||||
words1 = set(content1.split())
|
||||
words2 = set(content2.split())
|
||||
|
||||
overlap = len(words1 & words2) / len(words1 | words2) if words1 | words2 else 0
|
||||
|
||||
if overlap > 0.3: # 30% word overlap threshold
|
||||
cluster.append({'id': other_id, 'content': memories['documents'][j], 'metadata': memories['metadatas'][j]})
|
||||
processed.add(other_id)
|
||||
|
||||
if len(cluster) > 1:
|
||||
clusters.append(cluster)
|
||||
|
||||
return clusters
|
||||
|
||||
async def _create_consolidated_memory(self, cluster: List[Dict], character_name: str) -> Optional[VectorMemory]:
|
||||
"""Create a consolidated memory from a cluster of similar memories"""
|
||||
try:
|
||||
# Combine content
|
||||
contents = [mem['content'] for mem in cluster]
|
||||
combined_content = f"Consolidated memory: {' | '.join(contents[:3])}" # Limit to first 3
|
||||
|
||||
if len(cluster) > 3:
|
||||
combined_content += f" | ... and {len(cluster) - 3} more similar memories"
|
||||
|
||||
# Calculate average importance
|
||||
avg_importance = sum(mem['metadata']['importance'] for mem in cluster) / len(cluster)
|
||||
|
||||
# Get earliest timestamp
|
||||
timestamps = [datetime.fromisoformat(mem['metadata']['timestamp']) for mem in cluster]
|
||||
earliest_timestamp = min(timestamps)
|
||||
|
||||
# Create consolidated memory
|
||||
consolidated = VectorMemory(
|
||||
id="", # Will be generated
|
||||
content=combined_content,
|
||||
memory_type=MemoryType.PERSONAL,
|
||||
character_name=character_name,
|
||||
timestamp=earliest_timestamp,
|
||||
importance=avg_importance,
|
||||
metadata={
|
||||
"consolidated": True,
|
||||
"original_count": len(cluster),
|
||||
"consolidation_date": datetime.utcnow().isoformat()
|
||||
}
|
||||
)
|
||||
|
||||
return consolidated
|
||||
|
||||
except Exception as e:
|
||||
log_error_with_context(e, {"character": character_name, "cluster_size": len(cluster)})
|
||||
return None
|
||||
|
||||
def get_store_statistics(self, character_name: str) -> Dict[str, Any]:
|
||||
"""Get statistics about character's vector stores"""
|
||||
try:
|
||||
stats = {
|
||||
"personal_memories": 0,
|
||||
"creative_memories": 0,
|
||||
"community_memories": 0,
|
||||
"total_memories": 0
|
||||
}
|
||||
|
||||
# Personal memories
|
||||
if character_name in self.personal_collections:
|
||||
personal_count = self.personal_collections[character_name].count()
|
||||
stats["personal_memories"] = personal_count
|
||||
stats["total_memories"] += personal_count
|
||||
|
||||
# Creative memories
|
||||
if character_name in self.creative_collections:
|
||||
creative_count = self.creative_collections[character_name].count()
|
||||
stats["creative_memories"] = creative_count
|
||||
stats["total_memories"] += creative_count
|
||||
|
||||
# Community memories (shared)
|
||||
if self.community_collection:
|
||||
stats["community_memories"] = self.community_collection.count()
|
||||
|
||||
return stats
|
||||
|
||||
except Exception as e:
|
||||
log_error_with_context(e, {"character": character_name})
|
||||
return {"error": str(e)}
|
||||
|
||||
# Global vector store manager
|
||||
vector_store_manager = VectorStoreManager()
|
||||
Reference in New Issue
Block a user