LLM Configuration

This page provides comprehensive documentation for configuring Large Language Models (LLMs) in PraisonAI, including retry mechanisms, timeout settings, custom headers, and advanced optimization options.

Core LLM Configuration

Basic Setup

from praisonaiagents import Agent

agent = Agent(
    name="Assistant",
    llm="gpt-4o",
    llm={
        "temperature": 0.7,
        "max_tokens": 4000,
        "timeout": 60,
        "api_key": "your-api-key"
    }
)

Provider-Specific Configuration

# OpenAI Configuration
openai_config = {
    "model": "gpt-4o",
    "api_key": "sk-...",
    "organization": "org-...",
    "base_url": "https://api.openai.com/v1",
    "timeout": 60,
    "max_retries": 3,
    "temperature": 0.7,
    "max_tokens": 4000,
    "presence_penalty": 0.1,
    "frequency_penalty": 0.1
}

# Anthropic Configuration
anthropic_config = {
    "model": "claude-3-sonnet-20240229",
    "api_key": "sk-ant-...",
    "base_url": "https://api.anthropic.com",
    "timeout": 90,
    "max_retries": 3,
    "temperature": 0.7,
    "max_tokens": 4000,
    "anthropic_version": "2023-06-01"
}

# Custom/Local LLM Configuration
custom_config = {
    "model": "custom-model",
    "base_url": "http://localhost:8000",
    "timeout": 120,
    "headers": {
        "Authorization": "Bearer custom-token"
    }
}

Retry Logic Configuration

Basic Retry Settings

retry_config = {
    "max_retries": 3,
    "retry_delay": 2.0,  # seconds
    "retry_multiplier": 2.0,  # exponential backoff multiplier
    "max_retry_delay": 30.0,  # maximum delay between retries
    "retry_on_status": [429, 500, 502, 503, 504],  # HTTP status codes
    "retry_on_errors": [
        "RateLimitError",
        "APIConnectionError",
        "Timeout",
        "ServiceUnavailableError"
    ]
}

Advanced Retry Logic

advanced_retry_config = {
    "retry_strategy": "exponential_backoff_with_jitter",
    "max_retries": 5,
    "base_delay": 1.0,
    "max_delay": 60.0,
    "jitter": 0.1,  # 10% randomization
    
    # Error-specific retry behavior
    "error_retry_config": {
        "RateLimitError": {
            "max_retries": 10,
            "base_delay": 5.0,
            "respect_retry_after": True
        },
        "APIConnectionError": {
            "max_retries": 3,
            "base_delay": 2.0,
            "increase_timeout": True
        },
        "InsufficientQuotaError": {
            "max_retries": 0,  # Don't retry
            "fallback_model": "gpt-3.5-turbo"
        }
    },
    
    # Circuit breaker configuration (for custom integrations)
    # Note: Tool circuit breakers are automatic - see /features/tool-circuit-breaker
    "circuit_breaker": {
        "failure_threshold": 5,
        "recovery_timeout": 60.0,
        "success_threshold": 2,
        "timeout": 30.0,
        "graceful_degradation": True
    }
}

Custom Retry Logic Implementation

def custom_retry_handler(error, attempt, config):
    """Custom retry logic for specific scenarios"""
    if isinstance(error, RateLimitError):
        # Extract retry-after header if available
        retry_after = error.response.headers.get('retry-after', 60)
        return min(retry_after, config['max_delay'])
    
    elif isinstance(error, ModelOverloadedError):
        # Switch to a different model
        config['fallback_model'] = "gpt-3.5-turbo"
        return config['base_delay'] * (2 ** attempt)
    
    else:
        # Default exponential backoff
        return min(
            config['base_delay'] * (config['retry_multiplier'] ** attempt),
            config['max_delay']
        )

llm_config = {
    "retry_handler": custom_retry_handler,
    "max_retries": 5
}

Timeout Configuration

Timeout Settings

timeout_config = {
    # Basic timeout
    "timeout": 60,  # seconds
    
    # Detailed timeout configuration
    "timeout_config": {
        "connect": 5.0,  # Connection timeout
        "read": 60.0,    # Read timeout
        "write": 10.0,   # Write timeout
        "pool": 5.0      # Connection pool timeout
    },
    
    # Dynamic timeout based on request
    "dynamic_timeout": {
        "base": 30,
        "per_token": 0.01,  # Additional time per token
        "min": 10,
        "max": 300
    },
    
    # Timeout retry behavior
    "timeout_retry": {
        "increase_factor": 1.5,  # Increase timeout on retry
        "max_timeout": 300
    }
}

Request-Specific Timeouts

# Configure timeouts based on operation type
operation_timeouts = {
    "completion": {
        "timeout": 60,
        "dynamic": True,
        "factors": {
            "max_tokens": 0.01,
            "temperature": 1.2  # Higher temperature = more time
        }
    },
    "embedding": {
        "timeout": 30,
        "batch_factor": 0.1  # Per item in batch
    },
    "chat": {
        "timeout": 90,
        "message_factor": 5  # Per message in history
    }
}

Custom Headers Configuration

Basic Headers

headers_config = {
    "headers": {
        "Authorization": "Bearer your-api-key",
        "Content-Type": "application/json",
        "User-Agent": "PraisonAI/1.0",
        "X-Custom-Header": "custom-value"
    }
}

Dynamic Headers

import uuid

def generate_headers(request_type, model, **kwargs):
    """Generate headers dynamically based on request"""
    headers = {
        "User-Agent": f"PraisonAI/1.0 ({request_type})",
        "X-Model": model,
        "X-Request-ID": str(uuid.uuid4()),
        "X-Client-Version": "1.0.0"
    }
    
    # Add authentication
    if api_key := kwargs.get('api_key'):
        headers["Authorization"] = f"Bearer {api_key}"
    
    # Add custom headers for specific providers
    if "anthropic" in model:
        headers["anthropic-version"] = "2023-06-01"
    elif "openai" in model:
        headers["OpenAI-Beta"] = "assistants=v1"
    
    return headers

llm_config = {
    "headers_generator": generate_headers,
    "static_headers": {
        "X-Environment": "production"
    }
}

Provider-Specific Headers

import time

# OpenAI specific headers
openai_headers = {
    "OpenAI-Organization": "org-xxx",
    "OpenAI-Beta": "assistants=v1",
    "X-Request-ID": "unique-request-id"
}

# Anthropic specific headers
anthropic_headers = {
    "anthropic-version": "2023-06-01",
    "X-Request-Source": "praisonai"
}

# Custom authentication headers
custom_auth_headers = {
    "X-API-Key": "your-api-key",
    "X-API-Secret": "your-secret",
    "X-Timestamp": str(int(time.time())),
    "X-Signature": "generated-signature"
}

Advanced LLM Configuration

Load Balancing

load_balancing_config = {
    "strategy": "round_robin",  # or "least_latency", "weighted"
    "endpoints": [
        {
            "url": "https://api.openai.com/v1",
            "weight": 0.6,
            "models": ["gpt-4o", "gpt-3.5-turbo"]
        },
        {
            "url": "https://api.anthropic.com",
            "weight": 0.4,
            "models": ["claude-3-sonnet"]
        }
    ],
    "health_check": {
        "enabled": True,
        "interval": 60,
        "timeout": 5,
        "failure_threshold": 3
    }
}

Model Fallback Configuration

fallback_config = {
    "primary_model": "gpt-4o",
    "fallback_chain": [
        {
            "model": "gpt-4-turbo",
            "condition": "rate_limit",
            "max_attempts": 2
        },
        {
            "model": "gpt-3.5-turbo",
            "condition": "any_error",
            "temperature_adjustment": -0.2  # More deterministic
        },
        {
            "model": "claude-3-sonnet",
            "condition": "repeated_failure",
            "provider_switch": True
        }
    ],
    "fallback_strategy": "progressive",  # or "immediate"
    "preserve_context": True
}

Request Optimization

optimization_config = {
    # Request batching
    "batching": {
        "enabled": True,
        "max_batch_size": 10,
        "batch_timeout": 0.1,  # seconds
        "dynamic_batching": True
    },
    
    # Response streaming
    "streaming": {
        "enabled": True,
        "chunk_size": 100,
        "buffer_size": 1000,
        "timeout_per_chunk": 30
    },
    
    # Caching
    "cache": {
        "enabled": True,
        "ttl": 3600,
        "max_size": 1000,
        "key_strategy": "semantic",  # or "exact"
        "similarity_threshold": 0.95
    },
    
    # Token optimization
    "token_optimization": {
        "compress_prompts": True,
        "remove_redundancy": True,
        "dynamic_max_tokens": True,
        "reserve_completion_tokens": 500
    }
}

Rate Limiting Configuration

rate_limit_config = {
    "rate_limits": {
        "requests_per_minute": 60,
        "tokens_per_minute": 90000,
        "requests_per_day": 10000
    },
    "rate_limit_strategy": "adaptive",  # or "fixed", "burst"
    "burst_config": {
        "burst_size": 10,
        "refill_rate": 1.0  # per second
    },
    "quota_management": {
        "track_usage": True,
        "warn_at_percentage": 80,
        "hard_limit_behavior": "queue"  # or "reject", "fallback"
    }
}

Complete Configuration Example

from praisonaiagents import Agent

# Comprehensive LLM configuration
agent = Agent(
    name="ProductionAgent",
    llm="gpt-4o",
    llm={
        # Model settings
        "temperature": 0.7,
        "max_tokens": 4000,
        "top_p": 0.9,
        "presence_penalty": 0.1,
        "frequency_penalty": 0.1,
        
        # Timeout configuration
        "timeout": 60,
        "timeout_config": {
            "connect": 5,
            "read": 60,
            "dynamic": True
        },
        
        # Retry configuration
        "max_retries": 5,
        "retry_delay": 2.0,
        "retry_multiplier": 2.0,
        "retry_on_status": [429, 500, 502, 503],
        
        # Headers
        "headers": {
            "User-Agent": "PraisonAI/1.0",
            "X-Request-Source": "production"
        },
        
        # Advanced features
        "streaming": True,
        "cache_enabled": True,
        "fallback_models": ["gpt-3.5-turbo"],
        
        # Rate limiting
        "rate_limit_config": {
            "requests_per_minute": 60,
            "adaptive": True
        }
    }
)

Environment Variables

# Basic LLM settings
export OPENAI_API_KEY="sk-..."
export OPENAI_MODEL="gpt-4o"
export OPENAI_TEMPERATURE="0.7"

# Timeout settings
export PRAISONAI_LLM_TIMEOUT="60"
export PRAISONAI_LLM_CONNECT_TIMEOUT="5"
export PRAISONAI_LLM_READ_TIMEOUT="60"

# Retry settings
export PRAISONAI_LLM_MAX_RETRIES="3"
export PRAISONAI_LLM_RETRY_DELAY="2"
export PRAISONAI_LLM_RETRY_MULTIPLIER="2"

# Headers
export PRAISONAI_LLM_USER_AGENT="PraisonAI/1.0"
export PRAISONAI_LLM_CUSTOM_HEADERS='{"X-Custom": "value"}'

# Advanced settings
export PRAISONAI_LLM_STREAMING="true"
export PRAISONAI_LLM_CACHE_ENABLED="true"
export PRAISONAI_LLM_RATE_LIMIT="60"

Monitoring and Debugging

monitoring_config = {
    "logging": {
        "log_requests": True,
        "log_responses": True,
        "log_level": "INFO",
        "sanitize_keys": ["api_key", "authorization"]
    },
    "metrics": {
        "track_latency": True,
        "track_tokens": True,
        "track_costs": True,
        "export_interval": 60
    },
    "debugging": {
        "capture_raw_responses": False,
        "validate_responses": True,
        "break_on_error": False
    }
}

Getting Started

Core Concepts

Guides

Features

Models

Databases

Observability

Memory

Knowledge

RAG

Persistence

Tools

Other Features

Developers

Configuration

Best Practices

Getting Started (No Code)

Documentation Index

​LLM Configuration

​Core LLM Configuration

​Basic Setup

​Provider-Specific Configuration

​Retry Logic Configuration

​Basic Retry Settings

​Advanced Retry Logic

​Custom Retry Logic Implementation

​Timeout Configuration

​Timeout Settings

​Request-Specific Timeouts

​Custom Headers Configuration

​Basic Headers

​Dynamic Headers

​Provider-Specific Headers

​Advanced LLM Configuration

​Load Balancing

​Model Fallback Configuration

​Request Optimization

​Rate Limiting Configuration

​Complete Configuration Example

​Environment Variables

​Monitoring and Debugging

​See Also