Wire the Pipeline

Connect search, build_prompt, generate_answer, and print inside chat_loop

💻

Writing code and entering commands is only available on desktop. Open this page on a larger screen to complete this chapter.

Connecting the pieces

All the functions you need already exist. chat_loop just needs to call them in sequence for each question.

Step	Function	Input	Output
1	`search`	`client`, `question`, `chunks`, `embeddings`	`top_chunks` — the most relevant dict chunks
2	`build_prompt`	`question`, `top_chunks`	`prompt` — the full text sent to the model
3	`generate_answer`	`client`, `prompt`	`answer` — the model's response text
4	`print`	`f"Assistant: {answer}"`	Displayed to the user

Each function takes the output of the previous one, forming a pipeline from raw question to displayed answer.

Instructions

After if not question: continue, call search(client, question, chunks, embeddings) and assign the result to top_chunks.
Call build_prompt(question, top_chunks) and assign the result to prompt.
Call generate_answer(client, prompt) and assign the result to answer.
Print f"Assistant: {answer}".

← Previous Chapter Next Chapter →

import json
import os
import sys
import time
import numpy as np
from dotenv import load_dotenv
from google import genai
from google.genai import types
from files import index_folder

def create_client():
    load_dotenv()
    api_key = os.getenv("GEMINI_API_KEY")
    client = genai.Client(api_key=api_key)
    return client

def embed_text(client, text):
    result = client.models.embed_content(model="gemini-embedding-001", contents=text, config=types.EmbedContentConfig(task_type="RETRIEVAL_DOCUMENT"))
    return result.embeddings[0].values

def embed_all_chunks(client, texts):
    BATCH_SIZE = 90
    embeddings = []
    for i in range(0, len(texts), BATCH_SIZE):
        batch = texts[i : i + BATCH_SIZE]
        for text in batch:
            embeddings.append(embed_text(client, text))
        if i + BATCH_SIZE < len(texts):
            print("Rate limit pause — waiting 60 seconds...")
            time.sleep(60)
    return embeddings

def cosine_similarity(vec_a, vec_b):
    dot = np.dot(vec_a, vec_b)
    norm = np.linalg.norm(vec_a) * np.linalg.norm(vec_b)
    return dot / norm

def search(client, query, chunks, embeddings, top_k=3):
    result = client.models.embed_content(model="gemini-embedding-001", contents=query, config=types.EmbedContentConfig(task_type="RETRIEVAL_QUERY"))
    query_vector = result.embeddings[0].values
    scores = [(cosine_similarity(query_vector, emb), chunk) for emb, chunk in zip(embeddings, chunks)]
    scores.sort(key=lambda x: x[0], reverse=True)
    return [chunk for _, chunk in scores[:top_k]]

def build_prompt(question, context_chunks):
    context = "\n\n".join(chunk["text"] for chunk in context_chunks)
    prompt = f"You are a helpful assistant. Answer the question using only the context below.\nIf the answer is not in the context, say \"I don't know.\"\n\nContext:\n{context}\n\nQuestion:\n{question}"
    return prompt

def generate_answer(client, prompt):
    response = client.models.generate_content(model="gemini-2.5-flash", contents=prompt)
    return response.text

def save_embeddings(chunks, embeddings, cache_path):
    data = {"chunks": chunks, "embeddings": embeddings}
    with open(cache_path, "w") as f:
        json.dump(data, f)

def load_embeddings(cache_path):
    if not os.path.exists(cache_path):
        return None
    with open(cache_path) as f:
        data = json.load(f)
    return data["chunks"], data["embeddings"]

def chat_loop(client, chunks, embeddings):
    print("Assistant ready. Type your question, or /help for commands.\n")
    while True:
        question = input("You: ").strip()
        if not question:
            continue
        # Step 1:
        # Step 2:
        # Step 3:
        # Step 4:

def main():
    if len(sys.argv) < 2:
        print("Usage: python app.py <folder>")
        sys.exit(1)
    folder = sys.argv[1]
    cache_path = folder.rstrip("/\\") + ".cache.json"
    client = create_client()
    cached = load_embeddings(cache_path)
    if cached:
        chunks, embeddings = cached
        print(f"Loaded cache from {cache_path}")
    else:
        print(f"Indexing {folder}...")
        chunks = index_folder(folder)
        texts = [chunk["text"] for chunk in chunks]
        file_count = len(set(chunk["source"] for chunk in chunks))
        print(f"Indexed {len(chunks)} chunks from {file_count} files.")
        embeddings = embed_all_chunks(client, texts)
        save_embeddings(chunks, embeddings, cache_path)
        print(f"Cache saved to {cache_path}")

if __name__ == "__main__":
    main()

Wire the Pipeline

Connecting the pieces

Instructions

Interactive Code Editor