RAG Starter Kit
A minimal but complete Retrieval-Augmented Generation setup with ChromaDB, OpenAI embeddings, and a query interface. From zero to RAG in 5 minutes.
Description
Build a working RAG system that can ingest documents and answer questions using retrieved context. Uses ChromaDB for vector storage and OpenAI for embeddings and generation.
Requirements
pip install chromadb openai tiktoken rich
Project Structure
rag-demo/
├── ingest.py # Load documents into vector DB
├── query.py # Query the RAG system
├── rag.py # Core RAG logic
└── documents/ # Your source documents
rag.py - Core Module
"""
RAG Core - Embeddings, storage, and retrieval
"""
import os
from typing import List
import chromadb
from chromadb.utils import embedding_functions
from openai import OpenAI
# ════════════════════════════════════════════════════════════════════════════
# Configuration
# ════════════════════════════════════════════════════════════════════════════
CHROMA_PATH = "./chroma_db"
COLLECTION_NAME = "documents"
EMBEDDING_MODEL = "text-embedding-3-small"
LLM_MODEL = "gpt-4o-mini"
CHUNK_SIZE = 500
CHUNK_OVERLAP = 50
# ════════════════════════════════════════════════════════════════════════════
# Chunking
# ════════════════════════════════════════════════════════════════════════════
def chunk_text(text: str, chunk_size: int = CHUNK_SIZE, overlap: int = CHUNK_OVERLAP) -> List[str]:
"""Split text into overlapping chunks."""
chunks = []
start = 0
while start < len(text):
end = start + chunk_size
chunk = text[start:end]
chunks.append(chunk.strip())
start = end - overlap
return [c for c in chunks if c] # Remove empty chunks
# ════════════════════════════════════════════════════════════════════════════
# Vector Store
# ════════════════════════════════════════════════════════════════════════════
def get_collection():
"""Get or create ChromaDB collection with OpenAI embeddings."""
client = chromadb.PersistentClient(path=CHROMA_PATH)
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.getenv("OPENAI_API_KEY"),
model_name=EMBEDDING_MODEL
)
return client.get_or_create_collection(
name=COLLECTION_NAME,
embedding_function=openai_ef,
metadata={"hnsw:space": "cosine"}
)
def add_documents(texts: List[str], metadatas: List[dict] = None):
"""Add documents to the vector store."""
collection = get_collection()
# Generate unique IDs
existing_count = collection.count()
ids = [f"doc_{existing_count + i}" for i in range(len(texts))]
collection.add(
documents=texts,
metadatas=metadatas or [{}] * len(texts),
ids=ids
)
return len(texts)
def search(query: str, n_results: int = 3) -> List[dict]:
"""Search for similar documents."""
collection = get_collection()
results = collection.query(
query_texts=[query],
n_results=n_results,
include=["documents", "metadatas", "distances"]
)
return [
{
"content": doc,
"metadata": meta,
"distance": dist
}
for doc, meta, dist in zip(
results["documents"][0],
results["metadatas"][0],
results["distances"][0]
)
]
# ════════════════════════════════════════════════════════════════════════════
# Generation
# ════════════════════════════════════════════════════════════════════════════
def generate_answer(query: str, context: List[dict]) -> str:
"""Generate answer using retrieved context."""
client = OpenAI()
# Build context string
context_str = "\n\n---\n\n".join([
f"[Source: {c['metadata'].get('source', 'unknown')}]\n{c['content']}"
for c in context
])
system_prompt = """You are a helpful assistant that answers questions based on the provided context.
Rules:
- Only use information from the context to answer
- If the context doesn't contain the answer, say so
- Cite sources when possible
- Be concise but complete"""
user_prompt = f"""Context:
{context_str}
Question: {query}
Answer:"""
response = client.chat.completions.create(
model=LLM_MODEL,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=0.2,
)
return response.choices[0].message.content
# ════════════════════════════════════════════════════════════════════════════
# RAG Pipeline
# ════════════════════════════════════════════════════════════════════════════
def query_rag(question: str, n_results: int = 3) -> dict:
"""Full RAG pipeline: retrieve and generate."""
# Retrieve
context = search(question, n_results=n_results)
# Generate
answer = generate_answer(question, context)
return {
"question": question,
"answer": answer,
"sources": context
}
ingest.py - Document Loader
#!/usr/bin/env python3
"""
Ingest documents into the RAG system.
Usage: python ingest.py documents/
"""
import sys
from pathlib import Path
from rich.console import Console
from rich.progress import track
from rag import chunk_text, add_documents
console = Console()
def load_file(path: Path) -> str:
"""Load text from a file."""
suffix = path.suffix.lower()
if suffix in [".txt", ".md"]:
return path.read_text(encoding="utf-8")
elif suffix == ".pdf":
# pip install pymupdf
import fitz
doc = fitz.open(path)
return "\n".join(page.get_text() for page in doc)
else:
console.print(f"[yellow]Skipping unsupported: {path}[/]")
return ""
def ingest_directory(dir_path: str):
"""Ingest all documents from a directory."""
path = Path(dir_path)
if not path.exists():
console.print(f"[red]Directory not found: {dir_path}[/]")
return
files = list(path.glob("**/*"))
files = [f for f in files if f.is_file()]
console.print(f"[cyan]Found {len(files)} files[/]")
total_chunks = 0
for file in track(files, description="Processing..."):
text = load_file(file)
if not text:
continue
chunks = chunk_text(text)
metadatas = [{"source": str(file.name)} for _ in chunks]
added = add_documents(chunks, metadatas)
total_chunks += added
console.print(f"[green]✓ Ingested {total_chunks} chunks[/]")
if __name__ == "__main__":
if len(sys.argv) < 2:
console.print("[yellow]Usage: python ingest.py <directory>[/]")
sys.exit(1)
ingest_directory(sys.argv[1])
query.py - Interactive Query
#!/usr/bin/env python3
"""
Query the RAG system.
Usage: python query.py "What is...?"
"""
import sys
from rich.console import Console
from rich.panel import Panel
from rich.markdown import Markdown
from rag import query_rag
console = Console()
def main():
if len(sys.argv) < 2:
# Interactive mode
console.print("[cyan]RAG Query Interface[/] (type 'quit' to exit)\n")
while True:
try:
question = console.input("[bold green]❯[/] ")
if question.lower() in ["quit", "exit", "q"]:
break
if not question.strip():
continue
result = query_rag(question)
display_result(result)
except KeyboardInterrupt:
break
else:
question = " ".join(sys.argv[1:])
result = query_rag(question)
display_result(result)
def display_result(result: dict):
"""Pretty print the RAG result."""
console.print()
console.print(Panel(
Markdown(result["answer"]),
title="[bold green]Answer[/]",
border_style="green"
))
console.print("\n[dim]Sources:[/]")
for src in result["sources"]:
score = 1 - src["distance"] # Convert distance to similarity
console.print(f" [cyan]•[/] {src['metadata'].get('source', 'unknown')} [dim](relevance: {score:.1%})[/]")
console.print()
if __name__ == "__main__":
main()
Usage
# 1. Set your API key
export OPENAI_API_KEY=sk-...
# 2. Add some documents
mkdir documents
echo "Paris is the capital of France..." > documents/france.txt
# 3. Ingest documents
python ingest.py documents/
# 4. Query
python query.py "What is the capital of France?"
# Or interactive mode
python query.py
Enhancements
- Hybrid search: Combine vector search with BM25 keyword search
- Reranking: Use a cross-encoder to rerank retrieved chunks
- Streaming: Stream the LLM response for better UX
- Citations: Add inline citations to the generated answer
- Evaluation: Use RAGAS or similar to evaluate retrieval quality