discord-fishbowl/config/llm_providers_example.yaml

# Example LLM Provider Configuration
# Copy this section to your main fishbowl_config.json under "llm" -> "providers"

llm:
  # Legacy config (still supported for backwards compatibility)
  base_url: "${LLM_BASE_URL:http://localhost:11434}"
  model: "${LLM_MODEL:llama2}"
  timeout: ${LLM_TIMEOUT:300}
  max_tokens: ${LLM_MAX_TOKENS:2000}
  temperature: ${LLM_TEMPERATURE:0.8}

  # New multi-provider configuration
  providers:
    # OpenRouter (supports many models including Claude, GPT, Llama)
    openrouter:
      type: "openrouter"
      enabled: ${OPENROUTER_ENABLED:false}
      priority: 100  # Highest priority
      config:
        api_key: "${OPENROUTER_API_KEY:}"
        base_url: "https://openrouter.ai/api/v1"
        model: "${OPENROUTER_MODEL:anthropic/claude-3-sonnet}"
        timeout: 300
        max_tokens: 2000
        temperature: 0.8
        app_name: "discord-fishbowl"

    # OpenAI
    openai:
      type: "openai"
      enabled: ${OPENAI_ENABLED:false}
      priority: 90
      config:
        api_key: "${OPENAI_API_KEY:}"
        base_url: "https://api.openai.com/v1"
        model: "${OPENAI_MODEL:gpt-4o-mini}"
        timeout: 300
        max_tokens: 2000
        temperature: 0.8

    # Google Gemini
    gemini:
      type: "gemini"
      enabled: ${GEMINI_ENABLED:false}
      priority: 80
      config:
        api_key: "${GEMINI_API_KEY:}"
        base_url: "https://generativelanguage.googleapis.com/v1beta"
        model: "${GEMINI_MODEL:gemini-1.5-flash}"
        timeout: 300
        max_tokens: 2000
        temperature: 0.8

    # Custom/Local (KoboldCPP, Ollama, etc.)
    custom:
      type: "custom"
      enabled: ${CUSTOM_LLM_ENABLED:true}
      priority: 70  # Lower priority - fallback
      config:
        base_url: "${LLM_BASE_URL:http://192.168.1.200:5005/v1}"
        model: "${LLM_MODEL:koboldcpp/Broken-Tutu-24B-Transgression-v2.0.i1-Q4_K_M}"
        api_key: "${LLM_API_KEY:x}"
        timeout: 300
        max_tokens: 2000
        temperature: 0.8
        api_format: "openai"  # or "ollama"

    # Ollama (local models)
    ollama:
      type: "custom"
      enabled: ${OLLAMA_ENABLED:false}
      priority: 60
      config:
        base_url: "http://localhost:11434"
        model: "${OLLAMA_MODEL:llama3}"
        timeout: 300
        max_tokens: 2000
        temperature: 0.8
        api_format: "ollama"