End-to-End RAG Pipeline
Complete walkthrough — create a collection, ingest documents, retrieve context, and build a prompt for an LLM.
This guide builds a complete RAG pipeline from scratch using PostgreSQL, pgvector, and the OpenAI embedder.
1. Set up the engine
package main
import (
"context"
"database/sql"
"fmt"
"log"
"os"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/uptrace/bun"
"github.com/uptrace/bun/dialect/pgdialect"
"github.com/uptrace/bun/driver/pgdriver"
"github.com/xraph/weave"
"github.com/xraph/weave/assembler"
"github.com/xraph/weave/embedder"
"github.com/xraph/weave/engine"
"github.com/xraph/weave/collection"
pgstore "github.com/xraph/weave/store/postgres"
pgvec "github.com/xraph/weave/vectorstore/pgvector"
)
func main() {
ctx := context.Background()
// PostgreSQL metadata store
sqldb := sql.OpenDB(pgdriver.NewConnector(
pgdriver.WithDSN(os.Getenv("DATABASE_URL")),
))
db := bun.NewDB(sqldb, pgdialect.New())
metaStore := pgstore.New(db)
// pgvector store
pool, err := pgxpool.New(ctx, os.Getenv("DATABASE_URL"))
if err != nil {
log.Fatal(err)
}
vecStore := pgvec.New(pool)
// OpenAI embedder
emb := embedder.NewOpenAI(
embedder.WithOpenAIKey(os.Getenv("OPENAI_API_KEY")),
)
// Build engine
eng, err := engine.New(
engine.WithStore(metaStore),
engine.WithVectorStore(vecStore),
engine.WithEmbedder(emb),
)
if err != nil {
log.Fatal(err)
}
// Run migrations
if err := metaStore.Migrate(ctx); err != nil {
log.Fatal("migrate:", err)
}
if err := vecStore.Migrate(ctx); err != nil {
log.Fatal("migrate vectors:", err)
}
// Set tenant scope
ctx = weave.WithTenant(ctx, "acme-corp")
ctx = weave.WithApp(ctx, "support-bot")
run(ctx, eng)
}2. Create a collection
func run(ctx context.Context, eng *engine.Engine) {
col := &collection.Collection{
Name: "product-docs",
EmbeddingModel: "text-embedding-3-small",
EmbeddingDims: 1536,
ChunkStrategy: "recursive",
ChunkSize: 512,
ChunkOverlap: 50,
}
if err := eng.CreateCollection(ctx, col); err != nil {
log.Fatal("create collection:", err)
}
fmt.Println("created collection:", col.ID)3. Ingest documents
documents := []struct{ title, content string }{
{
title: "Return Policy",
content: "Our return policy allows returns within 30 days of purchase. Items must be in original condition with all tags attached. Refunds are processed within 5-7 business days.",
},
{
title: "Shipping Policy",
content: "Standard shipping takes 5-7 business days. Expedited shipping takes 2-3 business days. We ship to all 50 US states and internationally to 45 countries.",
},
{
title: "Warranty Information",
content: "All products come with a 1-year limited warranty. The warranty covers manufacturing defects but not accidental damage. Contact support@acme.com to file a warranty claim.",
},
}
for _, doc := range documents {
result, err := eng.Ingest(ctx, &engine.IngestInput{
CollectionID: col.ID,
Title: doc.title,
Content: doc.content,
})
if err != nil {
log.Printf("ingest %q: %v", doc.title, err)
continue
}
fmt.Printf("ingested %q: %d chunks\n", doc.title, result.ChunkCount)
}4. Retrieve relevant context
userQuery := "How long do I have to return a product?"
results, err := eng.Retrieve(ctx, userQuery,
engine.WithCollection(col.ID),
engine.WithTopK(5),
engine.WithMinScore(0.7),
)
if err != nil {
log.Fatal("retrieve:", err)
}
fmt.Printf("retrieved %d chunks\n", len(results))
for _, r := range results {
fmt.Printf(" [%.2f] %s\n", r.Score, r.Chunk.Content[:60])
}5. Assemble context and call LLM
// Convert to retriever.Result for the assembler
retResults := make([]retriever.Result, len(results))
for i, r := range results {
retResults[i] = retriever.Result{Chunk: r.Chunk, Score: r.Score}
}
// Assemble with 3000-token budget
a := assembler.New(assembler.WithMaxTokens(3000))
assembled, err := a.Assemble(ctx, retResults)
if err != nil {
log.Fatal("assemble:", err)
}
// Build the final prompt
prompt := fmt.Sprintf(`You are a helpful customer support assistant.
Context:
%s
Question: %s
Answer based only on the context above.`, assembled.Context, userQuery)
fmt.Println("--- PROMPT ---")
fmt.Println(prompt)
// Pass prompt to your LLM of choice
}6. Cross-collection search
For applications with multiple collections (e.g. per-product-line docs), use HybridSearch:
results, err := eng.HybridSearch(ctx, userQuery,
&engine.HybridSearchParams{
Collections: []id.CollectionID{col1ID, col2ID},
TopK: 10,
MinScore: 0.7,
},
)Results are merged and re-ranked by score before being returned.
Complete code
The full runnable example is in examples/rag-pipeline/main.go in the Weave repository.