LLM Configuration
This page provides comprehensive documentation for configuring Large Language Models (LLMs) in PraisonAI, including retry mechanisms, timeout settings, custom headers, and advanced optimization options.Core LLM Configuration
Basic Setup
from praisonaiagents import Agent
agent = Agent(
name="Assistant",
llm="gpt-4o",
llm={
"temperature": 0.7,
"max_tokens": 4000,
"timeout": 60,
"api_key": "your-api-key"
}
)
Provider-Specific Configuration
# OpenAI Configuration
openai_config = {
"model": "gpt-4o",
"api_key": "sk-...",
"organization": "org-...",
"base_url": "https://api.openai.com/v1",
"timeout": 60,
"max_retries": 3,
"temperature": 0.7,
"max_tokens": 4000,
"presence_penalty": 0.1,
"frequency_penalty": 0.1
}
# Anthropic Configuration
anthropic_config = {
"model": "claude-3-sonnet-20240229",
"api_key": "sk-ant-...",
"base_url": "https://api.anthropic.com",
"timeout": 90,
"max_retries": 3,
"temperature": 0.7,
"max_tokens": 4000,
"anthropic_version": "2023-06-01"
}
# Custom/Local LLM Configuration
custom_config = {
"model": "custom-model",
"base_url": "http://localhost:8000",
"timeout": 120,
"headers": {
"Authorization": "Bearer custom-token"
}
}
Retry Logic Configuration
Basic Retry Settings
retry_config = {
"max_retries": 3,
"retry_delay": 2.0, # seconds
"retry_multiplier": 2.0, # exponential backoff multiplier
"max_retry_delay": 30.0, # maximum delay between retries
"retry_on_status": [429, 500, 502, 503, 504], # HTTP status codes
"retry_on_errors": [
"RateLimitError",
"APIConnectionError",
"Timeout",
"ServiceUnavailableError"
]
}
Advanced Retry Logic
advanced_retry_config = {
"retry_strategy": "exponential_backoff_with_jitter",
"max_retries": 5,
"base_delay": 1.0,
"max_delay": 60.0,
"jitter": 0.1, # 10% randomization
# Error-specific retry behavior
"error_retry_config": {
"RateLimitError": {
"max_retries": 10,
"base_delay": 5.0,
"respect_retry_after": True
},
"APIConnectionError": {
"max_retries": 3,
"base_delay": 2.0,
"increase_timeout": True
},
"InsufficientQuotaError": {
"max_retries": 0, # Don't retry
"fallback_model": "gpt-3.5-turbo"
}
},
# Circuit breaker configuration
"circuit_breaker": {
"enabled": True,
"failure_threshold": 5,
"recovery_timeout": 60,
"half_open_requests": 2
}
}
Custom Retry Logic Implementation
def custom_retry_handler(error, attempt, config):
"""Custom retry logic for specific scenarios"""
if isinstance(error, RateLimitError):
# Extract retry-after header if available
retry_after = error.response.headers.get('retry-after', 60)
return min(retry_after, config['max_delay'])
elif isinstance(error, ModelOverloadedError):
# Switch to a different model
config['fallback_model'] = "gpt-3.5-turbo"
return config['base_delay'] * (2 ** attempt)
else:
# Default exponential backoff
return min(
config['base_delay'] * (config['retry_multiplier'] ** attempt),
config['max_delay']
)
llm_config = {
"retry_handler": custom_retry_handler,
"max_retries": 5
}
Timeout Configuration
Timeout Settings
timeout_config = {
# Basic timeout
"timeout": 60, # seconds
# Detailed timeout configuration
"timeout_config": {
"connect": 5.0, # Connection timeout
"read": 60.0, # Read timeout
"write": 10.0, # Write timeout
"pool": 5.0 # Connection pool timeout
},
# Dynamic timeout based on request
"dynamic_timeout": {
"base": 30,
"per_token": 0.01, # Additional time per token
"min": 10,
"max": 300
},
# Timeout retry behavior
"timeout_retry": {
"increase_factor": 1.5, # Increase timeout on retry
"max_timeout": 300
}
}
Request-Specific Timeouts
# Configure timeouts based on operation type
operation_timeouts = {
"completion": {
"timeout": 60,
"dynamic": True,
"factors": {
"max_tokens": 0.01,
"temperature": 1.2 # Higher temperature = more time
}
},
"embedding": {
"timeout": 30,
"batch_factor": 0.1 # Per item in batch
},
"chat": {
"timeout": 90,
"message_factor": 5 # Per message in history
}
}
Custom Headers Configuration
Basic Headers
headers_config = {
"headers": {
"Authorization": "Bearer your-api-key",
"Content-Type": "application/json",
"User-Agent": "PraisonAI/1.0",
"X-Custom-Header": "custom-value"
}
}
Dynamic Headers
import uuid
def generate_headers(request_type, model, **kwargs):
"""Generate headers dynamically based on request"""
headers = {
"User-Agent": f"PraisonAI/1.0 ({request_type})",
"X-Model": model,
"X-Request-ID": str(uuid.uuid4()),
"X-Client-Version": "1.0.0"
}
# Add authentication
if api_key := kwargs.get('api_key'):
headers["Authorization"] = f"Bearer {api_key}"
# Add custom headers for specific providers
if "anthropic" in model:
headers["anthropic-version"] = "2023-06-01"
elif "openai" in model:
headers["OpenAI-Beta"] = "assistants=v1"
return headers
llm_config = {
"headers_generator": generate_headers,
"static_headers": {
"X-Environment": "production"
}
}
Provider-Specific Headers
import time
# OpenAI specific headers
openai_headers = {
"OpenAI-Organization": "org-xxx",
"OpenAI-Beta": "assistants=v1",
"X-Request-ID": "unique-request-id"
}
# Anthropic specific headers
anthropic_headers = {
"anthropic-version": "2023-06-01",
"X-Request-Source": "praisonai"
}
# Custom authentication headers
custom_auth_headers = {
"X-API-Key": "your-api-key",
"X-API-Secret": "your-secret",
"X-Timestamp": str(int(time.time())),
"X-Signature": "generated-signature"
}
Advanced LLM Configuration
Load Balancing
load_balancing_config = {
"strategy": "round_robin", # or "least_latency", "weighted"
"endpoints": [
{
"url": "https://api.openai.com/v1",
"weight": 0.6,
"models": ["gpt-4o", "gpt-3.5-turbo"]
},
{
"url": "https://api.anthropic.com",
"weight": 0.4,
"models": ["claude-3-sonnet"]
}
],
"health_check": {
"enabled": True,
"interval": 60,
"timeout": 5,
"failure_threshold": 3
}
}
Model Fallback Configuration
fallback_config = {
"primary_model": "gpt-4o",
"fallback_chain": [
{
"model": "gpt-4-turbo",
"condition": "rate_limit",
"max_attempts": 2
},
{
"model": "gpt-3.5-turbo",
"condition": "any_error",
"temperature_adjustment": -0.2 # More deterministic
},
{
"model": "claude-3-sonnet",
"condition": "repeated_failure",
"provider_switch": True
}
],
"fallback_strategy": "progressive", # or "immediate"
"preserve_context": True
}
Request Optimization
optimization_config = {
# Request batching
"batching": {
"enabled": True,
"max_batch_size": 10,
"batch_timeout": 0.1, # seconds
"dynamic_batching": True
},
# Response streaming
"streaming": {
"enabled": True,
"chunk_size": 100,
"buffer_size": 1000,
"timeout_per_chunk": 30
},
# Caching
"cache": {
"enabled": True,
"ttl": 3600,
"max_size": 1000,
"key_strategy": "semantic", # or "exact"
"similarity_threshold": 0.95
},
# Token optimization
"token_optimization": {
"compress_prompts": True,
"remove_redundancy": True,
"dynamic_max_tokens": True,
"reserve_completion_tokens": 500
}
}
Rate Limiting Configuration
rate_limit_config = {
"rate_limits": {
"requests_per_minute": 60,
"tokens_per_minute": 90000,
"requests_per_day": 10000
},
"rate_limit_strategy": "adaptive", # or "fixed", "burst"
"burst_config": {
"burst_size": 10,
"refill_rate": 1.0 # per second
},
"quota_management": {
"track_usage": True,
"warn_at_percentage": 80,
"hard_limit_behavior": "queue" # or "reject", "fallback"
}
}
Complete Configuration Example
from praisonaiagents import Agent
# Comprehensive LLM configuration
agent = Agent(
name="ProductionAgent",
llm="gpt-4o",
llm={
# Model settings
"temperature": 0.7,
"max_tokens": 4000,
"top_p": 0.9,
"presence_penalty": 0.1,
"frequency_penalty": 0.1,
# Timeout configuration
"timeout": 60,
"timeout_config": {
"connect": 5,
"read": 60,
"dynamic": True
},
# Retry configuration
"max_retries": 5,
"retry_delay": 2.0,
"retry_multiplier": 2.0,
"retry_on_status": [429, 500, 502, 503],
# Headers
"headers": {
"User-Agent": "PraisonAI/1.0",
"X-Request-Source": "production"
},
# Advanced features
"streaming": True,
"cache_enabled": True,
"fallback_models": ["gpt-3.5-turbo"],
# Rate limiting
"rate_limit_config": {
"requests_per_minute": 60,
"adaptive": True
}
}
)
Environment Variables
# Basic LLM settings
export OPENAI_API_KEY="sk-..."
export OPENAI_MODEL="gpt-4o"
export OPENAI_TEMPERATURE="0.7"
# Timeout settings
export PRAISONAI_LLM_TIMEOUT="60"
export PRAISONAI_LLM_CONNECT_TIMEOUT="5"
export PRAISONAI_LLM_READ_TIMEOUT="60"
# Retry settings
export PRAISONAI_LLM_MAX_RETRIES="3"
export PRAISONAI_LLM_RETRY_DELAY="2"
export PRAISONAI_LLM_RETRY_MULTIPLIER="2"
# Headers
export PRAISONAI_LLM_USER_AGENT="PraisonAI/1.0"
export PRAISONAI_LLM_CUSTOM_HEADERS='{"X-Custom": "value"}'
# Advanced settings
export PRAISONAI_LLM_STREAMING="true"
export PRAISONAI_LLM_CACHE_ENABLED="true"
export PRAISONAI_LLM_RATE_LIMIT="60"
Monitoring and Debugging
monitoring_config = {
"logging": {
"log_requests": True,
"log_responses": True,
"log_level": "INFO",
"sanitize_keys": ["api_key", "authorization"]
},
"metrics": {
"track_latency": True,
"track_tokens": True,
"track_costs": True,
"export_interval": 60
},
"debugging": {
"capture_raw_responses": False,
"validate_responses": True,
"break_on_error": False
}
}
See Also
- Model Configuration - Supported models and providers
- Agent Configuration - Agent-level LLM settings
- Best Practices - LLM configuration guidelines

