Call Gemini to Generate an Answer
Build a RAG App — Chat with Your PDFsGenerate Answers and Finish the AppCall Gemini to Generate an Answer
Exit
Call Gemini to Generate an Answer
Send the prompt to Gemini Flash and return the response text
💻
Writing code and entering commands is only available on desktop. Open this page on a larger screen to complete this chapter.
The generation API call
Use client.models.generate_content() with your model name and prompt.
response = client.models.generate_content(
model="gemini-2.5-flash",
contents=prompt
)
return response.textgemini-2.5-flash is fast and available on the free tier. It is well-suited for document Q&A tasks.
Instructions
Complete the generate_answer function. The starter code provides the signature.
- Create a variable named
response. Assign itclient.models.generate_content(model="gemini-2.5-flash", contents=prompt). - Return
response.text.
import os
import time
import numpy as np
import pypdf
from dotenv import load_dotenv
from google import genai
from google.genai import types
def extract_text(pdf_path):
reader = pypdf.PdfReader(pdf_path)
pages = [page.extract_text() for page in reader.pages]
return "\n".join(pages)
def chunk_text(text, chunk_size=500, overlap=100):
chunks = []
for i in range(0, len(text), chunk_size - overlap):
chunks.append(text[i : i + chunk_size])
return chunks
def preview_chunks(chunks):
print(f"Total chunks: {len(chunks)}")
print(f"First chunk:\n{chunks[0]}")
def create_client():
load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")
client = genai.Client(api_key=api_key)
return client
def embed_text(client, text):
result = client.models.embed_content(model="gemini-embedding-001", contents=text, config=types.EmbedContentConfig(task_type="RETRIEVAL_DOCUMENT"))
return result.embeddings[0].values
def embed_all_chunks(client, chunks):
BATCH_SIZE = 90
embeddings = []
for i in range(0, len(chunks), BATCH_SIZE):
batch = chunks[i : i + BATCH_SIZE]
for chunk in batch:
embeddings.append(embed_text(client, chunk))
if i + BATCH_SIZE < len(chunks):
print("Rate limit pause — waiting 60 seconds...")
time.sleep(60)
return embeddings
def cosine_similarity(vec_a, vec_b):
dot = np.dot(vec_a, vec_b)
norm = np.linalg.norm(vec_a) * np.linalg.norm(vec_b)
return dot / norm
def search(client, query, chunks, embeddings, top_k=3):
result = client.models.embed_content(model="gemini-embedding-001", contents=query, config=types.EmbedContentConfig(task_type="RETRIEVAL_QUERY"))
query_vector = result.embeddings[0].values
scores = [(cosine_similarity(query_vector, emb), chunk) for emb, chunk in zip(embeddings, chunks)]
scores.sort(key=lambda x: x[0], reverse=True)
return [chunk for _, chunk in scores[:top_k]]
def test_search(client, pdf_path, question):
text = extract_text(pdf_path)
chunks = chunk_text(text)
embeddings = embed_all_chunks(client, chunks)
results = search(client, question, chunks, embeddings)
for i, chunk in enumerate(results, 1):
print(f"Result {i}:\n{chunk}\n")
def build_prompt(question, context_chunks):
context = "\n\n".join(context_chunks)
prompt = f"You are a helpful assistant. Answer the question using only the context below.\nIf the answer is not in the context, say \"I don't know.\"\n\nContext:\n{context}\n\nQuestion:\n{question}"
return prompt
def generate_answer(client, prompt):
# Step 1: Call client.models.generate_content()
# Step 2: Return response.text
Interactive Code Editor
Sign in to write and run code, track your progress, and unlock all chapters.
Sign In to Start Coding