RAG Module
The RAG module provides a thin orchestration layer over Knowledge for retrieval-augmented generation.
Core Classes
RAG
The main pipeline class that orchestrates retrieval and generation.
from praisonaiagents.rag import RAG, RAGConfig
rag = RAG(
knowledge=knowledge, # Required: Knowledge instance
llm=None, # Optional: LLM instance
config=RAGConfig(), # Optional: Configuration
reranker=None, # Optional: Reranker instance
context_builder=None, # Optional: Custom context builder
citation_formatter=None, # Optional: Custom citation formatter
)
Methods
query(question, **kwargs) -> RAGResult
Execute a RAG query and return result with citations.
result = rag.query("What is the main finding?")
print(result.answer)
print(result.citations)
aquery(question, **kwargs) -> RAGResult
Async version of query.
result = await rag.aquery("What is the conclusion?")
stream(question, **kwargs) -> Iterator[str]
Stream response tokens.
for chunk in rag.stream("Summarize the document"):
print(chunk, end="", flush=True)
astream(question, **kwargs) -> AsyncIterator[str]
Async streaming.
async for chunk in rag.astream("Explain the methodology"):
print(chunk, end="", flush=True)
get_citations(question, **kwargs) -> List[Citation]
Get citations without generating an answer.
citations = rag.get_citations("What sources mention X?")
for c in citations:
print(f"[{c.id}] {c.source}: {c.text[:100]}")
RAGConfig
Configuration for the RAG pipeline.
from praisonaiagents.rag import RAGConfig, RetrievalStrategy
config = RAGConfig(
top_k=5, # Chunks to retrieve
min_score=0.0, # Minimum relevance score
max_context_tokens=4000, # Context token limit
include_citations=True, # Include citations
retrieval_strategy=RetrievalStrategy.BASIC, # Retrieval strategy
rerank=False, # Enable reranking
rerank_top_k=3, # Results after rerank
template="...", # Prompt template
system_prompt=None, # System prompt
stream=False, # Stream by default
)
Retrieval Strategies
from praisonaiagents.rag import RetrievalStrategy
RetrievalStrategy.BASIC # Simple vector search
RetrievalStrategy.FUSION # Reciprocal rank fusion
RetrievalStrategy.HYBRID # Dense + sparse retrieval
RAGResult
Result from a RAG query.
@dataclass
class RAGResult:
answer: str # Generated answer
citations: List[Citation] # Source citations
context_used: str # Context passed to LLM
query: str # Original query
metadata: Dict[str, Any] # Timing, stats, etc.
Properties
has_citations - Boolean indicating if citations exist
format_answer_with_citations() - Format answer with source references
Citation
Source citation for RAG answers.
@dataclass
class Citation:
id: str # Citation ID (e.g., "1")
source: str # Source document
text: str # Text snippet
score: float # Relevance score
doc_id: Optional[str] # Document identifier
chunk_id: Optional[str] # Chunk identifier
offset: Optional[int] # Character offset
metadata: Dict[str, Any] # Additional metadata
Protocols
The RAG module uses protocols for extensibility.
ContextBuilderProtocol
Custom context assembly logic.
from typing import Any, Dict, List, Union
from praisonaiagents.knowledge.models import SearchResultItem
from praisonaiagents.rag.protocols import ContextBuilderProtocol
ResultItem = Union[Dict[str, Any], SearchResultItem]
class MyContextBuilder:
def build(
self,
results: List[ResultItem],
max_tokens: int = 4000,
deduplicate: bool = True,
) -> str:
# Custom context building logic
return assembled_context
Custom citation formatting.
from praisonaiagents.rag.protocols import CitationFormatterProtocol
class MyCitationFormatter:
def format(
self,
results: List[Dict[str, Any]],
start_id: int = 1,
) -> List[Citation]:
# Custom citation formatting
return citations
Context Utilities
Helper functions for context building that accept both dict and SearchResultItem formats.
Dict results
SearchResultItem results
from praisonaiagents.rag import build_context, deduplicate_chunks
# Traditional dict format
results = [
{"text": "First content", "metadata": {"filename": "a.pdf"}},
{"text": "Second content", "metadata": {"filename": "b.pdf"}},
]
# Build context from dict results
context, used_results = build_context(
results=results,
max_tokens=4000,
deduplicate=True,
include_source=True,
)
# Remove duplicate chunks
unique = deduplicate_chunks(results)
from praisonaiagents.rag import build_context, deduplicate_chunks
from praisonaiagents.knowledge.models import SearchResultItem
# SearchResultItem format
results = [
SearchResultItem(text="First content", source="a.pdf", filename="a.pdf"),
SearchResultItem(text="Second content", source="b.pdf", filename="b.pdf"),
]
# Mixed dict + object input also works
results.append({"text": "Third content", "metadata": {"filename": "c.pdf"}})
# Build context from SearchResultItem objects
context, used_results = build_context(
results=results,
max_tokens=2000,
include_source=True,
)
unique = deduplicate_chunks(results)
build_context and deduplicate_chunks accept a mix of dict results and SearchResultItem objects. When include_source=True, the label is taken from metadata["filename"] / metadata["source"] first, falling back to the top-level filename / source attribute on the item, and finally to Source N.
The context utilities support two input formats with automatic fallback for metadata lookups:
| Lookup | 1st choice | 2nd choice | Fallback |
|---|
source | metadata["source"] | item.source (object) / item["source"] (dict) | "" |
filename | metadata["filename"] | item.filename / item["filename"] | "" |
text | item.text / item["text"] | item.memory / item["memory"] | "" (item skipped in build_context) |
Integration with Knowledge
RAG uses Knowledge for all retrieval operations:
from praisonaiagents import Knowledge
from praisonaiagents.rag import RAG
# Knowledge handles indexing
knowledge = Knowledge()
knowledge.add("documents/")
# RAG handles answering
rag = RAG(knowledge=knowledge)
result = rag.query("What is discussed?")
Error Handling
from praisonaiagents.rag import RAG
rag = RAG(knowledge=knowledge)
try:
result = rag.query("Question")
except Exception as e:
print(f"RAG error: {e}")
- Batch indexing: Add multiple documents at once
- Tune top_k: Start with 5, adjust based on quality
- Use min_score: Filter low-relevance results
- Enable reranking: For higher precision (costs latency)
- Stream responses: Better UX for long answers