python RAG February 8, 2026

RAG Starter Kit

A minimal but complete Retrieval-Augmented Generation setup with ChromaDB, OpenAI embeddings, and a query interface. From zero to RAG in 5 minutes.

ragchromadbembeddingsvector-databaseretrieval

Description

Build a working RAG system that can ingest documents and answer questions using retrieved context. Uses ChromaDB for vector storage and OpenAI for embeddings and generation.

Requirements

pip install chromadb openai tiktoken rich

Project Structure

rag-demo/
├── ingest.py      # Load documents into vector DB
├── query.py       # Query the RAG system
├── rag.py         # Core RAG logic
└── documents/     # Your source documents

rag.py - Core Module

"""
RAG Core - Embeddings, storage, and retrieval
"""

import os
from typing import List
import chromadb
from chromadb.utils import embedding_functions
from openai import OpenAI

# ════════════════════════════════════════════════════════════════════════════
# Configuration
# ════════════════════════════════════════════════════════════════════════════

CHROMA_PATH = "./chroma_db"
COLLECTION_NAME = "documents"
EMBEDDING_MODEL = "text-embedding-3-small"
LLM_MODEL = "gpt-4o-mini"
CHUNK_SIZE = 500
CHUNK_OVERLAP = 50

# ════════════════════════════════════════════════════════════════════════════
# Chunking
# ════════════════════════════════════════════════════════════════════════════

def chunk_text(text: str, chunk_size: int = CHUNK_SIZE, overlap: int = CHUNK_OVERLAP) -> List[str]:
    """Split text into overlapping chunks."""
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunk = text[start:end]
        chunks.append(chunk.strip())
        start = end - overlap
    return [c for c in chunks if c]  # Remove empty chunks

# ════════════════════════════════════════════════════════════════════════════
# Vector Store
# ════════════════════════════════════════════════════════════════════════════

def get_collection():
    """Get or create ChromaDB collection with OpenAI embeddings."""
    client = chromadb.PersistentClient(path=CHROMA_PATH)
    
    openai_ef = embedding_functions.OpenAIEmbeddingFunction(
        api_key=os.getenv("OPENAI_API_KEY"),
        model_name=EMBEDDING_MODEL
    )
    
    return client.get_or_create_collection(
        name=COLLECTION_NAME,
        embedding_function=openai_ef,
        metadata={"hnsw:space": "cosine"}
    )

def add_documents(texts: List[str], metadatas: List[dict] = None):
    """Add documents to the vector store."""
    collection = get_collection()
    
    # Generate unique IDs
    existing_count = collection.count()
    ids = [f"doc_{existing_count + i}" for i in range(len(texts))]
    
    collection.add(
        documents=texts,
        metadatas=metadatas or [{}] * len(texts),
        ids=ids
    )
    
    return len(texts)

def search(query: str, n_results: int = 3) -> List[dict]:
    """Search for similar documents."""
    collection = get_collection()
    
    results = collection.query(
        query_texts=[query],
        n_results=n_results,
        include=["documents", "metadatas", "distances"]
    )
    
    return [
        {
            "content": doc,
            "metadata": meta,
            "distance": dist
        }
        for doc, meta, dist in zip(
            results["documents"][0],
            results["metadatas"][0],
            results["distances"][0]
        )
    ]

# ════════════════════════════════════════════════════════════════════════════
# Generation
# ════════════════════════════════════════════════════════════════════════════

def generate_answer(query: str, context: List[dict]) -> str:
    """Generate answer using retrieved context."""
    client = OpenAI()
    
    # Build context string
    context_str = "\n\n---\n\n".join([
        f"[Source: {c['metadata'].get('source', 'unknown')}]\n{c['content']}"
        for c in context
    ])
    
    system_prompt = """You are a helpful assistant that answers questions based on the provided context.
    
Rules:
- Only use information from the context to answer
- If the context doesn't contain the answer, say so
- Cite sources when possible
- Be concise but complete"""

    user_prompt = f"""Context:
{context_str}

Question: {query}

Answer:"""

    response = client.chat.completions.create(
        model=LLM_MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=0.2,
    )
    
    return response.choices[0].message.content

# ════════════════════════════════════════════════════════════════════════════
# RAG Pipeline
# ════════════════════════════════════════════════════════════════════════════

def query_rag(question: str, n_results: int = 3) -> dict:
    """Full RAG pipeline: retrieve and generate."""
    # Retrieve
    context = search(question, n_results=n_results)
    
    # Generate
    answer = generate_answer(question, context)
    
    return {
        "question": question,
        "answer": answer,
        "sources": context
    }

ingest.py - Document Loader

#!/usr/bin/env python3
"""
Ingest documents into the RAG system.
Usage: python ingest.py documents/
"""

import sys
from pathlib import Path
from rich.console import Console
from rich.progress import track
from rag import chunk_text, add_documents

console = Console()

def load_file(path: Path) -> str:
    """Load text from a file."""
    suffix = path.suffix.lower()
    
    if suffix in [".txt", ".md"]:
        return path.read_text(encoding="utf-8")
    elif suffix == ".pdf":
        # pip install pymupdf
        import fitz
        doc = fitz.open(path)
        return "\n".join(page.get_text() for page in doc)
    else:
        console.print(f"[yellow]Skipping unsupported: {path}[/]")
        return ""

def ingest_directory(dir_path: str):
    """Ingest all documents from a directory."""
    path = Path(dir_path)
    
    if not path.exists():
        console.print(f"[red]Directory not found: {dir_path}[/]")
        return
    
    files = list(path.glob("**/*"))
    files = [f for f in files if f.is_file()]
    
    console.print(f"[cyan]Found {len(files)} files[/]")
    
    total_chunks = 0
    for file in track(files, description="Processing..."):
        text = load_file(file)
        if not text:
            continue
        
        chunks = chunk_text(text)
        metadatas = [{"source": str(file.name)} for _ in chunks]
        
        added = add_documents(chunks, metadatas)
        total_chunks += added
    
    console.print(f"[green]✓ Ingested {total_chunks} chunks[/]")

if __name__ == "__main__":
    if len(sys.argv) < 2:
        console.print("[yellow]Usage: python ingest.py <directory>[/]")
        sys.exit(1)
    
    ingest_directory(sys.argv[1])

query.py - Interactive Query

#!/usr/bin/env python3
"""
Query the RAG system.
Usage: python query.py "What is...?"
"""

import sys
from rich.console import Console
from rich.panel import Panel
from rich.markdown import Markdown
from rag import query_rag

console = Console()

def main():
    if len(sys.argv) < 2:
        # Interactive mode
        console.print("[cyan]RAG Query Interface[/] (type 'quit' to exit)\n")
        while True:
            try:
                question = console.input("[bold green]❯[/] ")
                if question.lower() in ["quit", "exit", "q"]:
                    break
                if not question.strip():
                    continue
                    
                result = query_rag(question)
                display_result(result)
            except KeyboardInterrupt:
                break
    else:
        question = " ".join(sys.argv[1:])
        result = query_rag(question)
        display_result(result)

def display_result(result: dict):
    """Pretty print the RAG result."""
    console.print()
    console.print(Panel(
        Markdown(result["answer"]),
        title="[bold green]Answer[/]",
        border_style="green"
    ))
    
    console.print("\n[dim]Sources:[/]")
    for src in result["sources"]:
        score = 1 - src["distance"]  # Convert distance to similarity
        console.print(f"  [cyan]•[/] {src['metadata'].get('source', 'unknown')} [dim](relevance: {score:.1%})[/]")
    console.print()

if __name__ == "__main__":
    main()

Usage

# 1. Set your API key
export OPENAI_API_KEY=sk-...

# 2. Add some documents
mkdir documents
echo "Paris is the capital of France..." > documents/france.txt

# 3. Ingest documents
python ingest.py documents/

# 4. Query
python query.py "What is the capital of France?"

# Or interactive mode
python query.py

Enhancements

  • Hybrid search: Combine vector search with BM25 keyword search
  • Reranking: Use a cross-encoder to rerank retrieved chunks
  • Streaming: Stream the LLM response for better UX
  • Citations: Add inline citations to the generated answer
  • Evaluation: Use RAGAS or similar to evaluate retrieval quality