Attempts to improve retrival quality , step 11 changed
!pip install google-generativeai scikit-learn numpy
# ================================
# STEP 1: IMPORTS + API SETUP
# ================================
import google.generativeai as genai
#FOR GOOGLE COLAB
from google.colab import userdata
#FOR OTHER ENVIRONS
#import os
import numpy as np
from sklearn.cluster import KMeans
from collections import defaultdict
# ๐ Replace with your API key
#FOR NON-COLAB
#api_key = os.environ.get("GEMINI_API_KEY")
#FOR COLAB
api_key = userdata.get("GEMINI_API_KEY")
#if not api_key:
# raise ValueError("API key not found. Please set GEMINI_API_KEY in Colab Secrets.")
genai.configure(api_key=api_key)
# ================================
# STEP 2: DATA SET
# ================================
documents = [
"CrashLoopBackOff occurs when a container repeatedly crashes after starting.",
"A container may crash due to missing environment variables.",
"Incorrect command or entrypoint can cause container startup failure.",
"Application errors inside the container often lead to restarts.",
"OOMKilled happens when a container exceeds its memory limit.",
"ImagePullBackOff occurs when Kubernetes cannot pull the container image.",
"Incorrect image name or tag can cause image pull failures.",
"Private registries require imagePullSecrets for authentication.",
"kubectl logs retrieves logs from a running container.",
"kubectl describe pod shows events and state transitions.",
"Pods remain pending if no node satisfies resource requests.",
"Node affinity restricts pods to specific nodes.",
"Taints prevent pods from being scheduled on certain nodes.",
"Tolerations allow pods to be scheduled on tainted nodes.",
"Liveness probes determine if a container should be restarted.",
"Readiness probes determine if a pod can receive traffic.",
"A failing readiness probe removes the pod from service endpoints.",
"ClusterIP services expose applications within the cluster.",
"NodePort services expose applications on node IPs.",
"PersistentVolumes provide storage independent of pods.",
"PersistentVolumeClaims request storage resources.",
"ConfigMaps store non-sensitive configuration data.",
"Secrets store sensitive data like passwords and tokens.",
"Deployments manage replica sets and pod updates.",
"Horizontal Pod Autoscaler scales based on CPU or metrics.",
"Pods stuck in Terminating state may have finalizers blocking deletion.",
"RBAC misconfiguration can block access to resources."
]
# ================================
# STEP 3: GENERATE EMBEDDINGS
# ================================
def get_embedding(text):
response = genai.embed_content(
#model="models/embedding-001", #this is now deprecated and you will get error
model="models/gemini-embedding-001",
content=text
)
return response["embedding"]
print("Generating embeddings...")
embeddings = []
for doc in documents:
emb = get_embedding(doc)
embeddings.append(emb)
embeddings = np.array(embeddings)
print(f"Embeddings shape: {embeddings.shape}")
# ================================
# STEP 4: CLUSTER uSING KMEANS
# ================================
NUM_CLUSTERS = 6 # ๐ง Tune this later
kmeans = KMeans(n_clusters=NUM_CLUSTERS, random_state=42)
labels = kmeans.fit_predict(embeddings)
print("Clustering complete.")
# ================================
# STEP 5: GROUP DOCUMENTS BY CLUSTER
# ================================
clustered_docs = defaultdict(list)
for doc, label in zip(documents, labels):
clustered_docs[label].append(doc)
# ================================
# STEP 6: BUILD SMART CHUNKS
# ================================
smart_chunks = []
for cluster_id, docs in clustered_docs.items():
chunk_text = f"Cluster {cluster_id}\n\n"
for d in docs:
chunk_text += f"- {d}\n"
smart_chunks.append(chunk_text.strip())
# ================================
# STEP 7: ADD METADATA (FINAL DATA PREPARED)
# ================================
prepared_data = []
for i, chunk in enumerate(smart_chunks):
prepared_data.append({
"id": f"chunk_{i}",
"cluster": i,
"text": chunk
})
# ================================
# STEP 8: INSPECT RESULTS
# ================================
for item in prepared_data:
print(item["text"])
print("=" * 60)
# ================================
# STEP 9: DEBUG CLUSTER QUALITY
# ================================
for cluster_id, docs in clustered_docs.items():
print(f"\n๐น Cluster {cluster_id} ({len(docs)} items)")
for d in docs:
print("-", d)
# ================================
# STEP 10: EMBED CHUNKS
# ================================
def get_embedding(text):
response = genai.embed_content(
model="models/gemini-embedding-001",
content=text
)
return response["embedding"]
print("Generating chunk embeddings...")
for item in prepared_data:
chunk_text = item["text"]
embedding = get_embedding(chunk_text)
# Attach embedding to the same object
item["embedding"] = embedding
print("Chunk embeddings added to prepared_data.")
# ================================
# STEP 11: RETRIEVAL ENGINE
# ================================
def normalize(vec):
return vec / np.linalg.norm(vec)
def cosine_similarity(a, b):
a = normalize(np.array(a))
b = normalize(np.array(b))
return np.dot(a, b)
def retrieve(query, top_k=3, initial_k=10, min_score=0.5):
# Step 1: Query embedding
query_embedding = normalize(np.array(get_embedding(query)))
scores = []
# Step 2: Compute similarity
for item in prepared_data:
chunk_embedding = normalize(np.array(item["embedding"]))
sim = cosine_similarity(query_embedding, chunk_embedding)
scores.append((sim, item))
# Step 3: Sort
scores = sorted(scores, key=lambda x: x[0], reverse=True)
# Step 4: Take more candidates (recall boost)
candidates = scores[:initial_k]
# Step 5: Re-ranking (keyword boost)
def rerank_score(sim, item):
text = item["text"].lower()
words = query.lower().split()
keyword_bonus = sum(word in text for word in words)
return sim + 0.05 * keyword_bonus
reranked = sorted(
[(rerank_score(sim, item), item) for sim, item in candidates],
key=lambda x: x[0],
reverse=True
)
# Step 6: Threshold filtering
filtered = [x for x in reranked if x[0] >= min_score]
# Step 7: Diversity (avoid same cluster)
selected = []
seen_clusters = set()
for sim, item in filtered:
if item["cluster"] not in seen_clusters:
selected.append((sim, item))
seen_clusters.add(item["cluster"])
if len(selected) == top_k:
break
return selected
# def cosine_similarity(a, b):
# a = np.array(a)
# b = np.array(b)
# return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
# def retrieve(query, top_k=3):
# # Step 1: Embed query
# query_embedding = get_embedding(query)
# scores = []
# # Step 2: Compare with all chunks
# for item in prepared_data:
# chunk_embedding = item["embedding"]
# sim = cosine_similarity(query_embedding, chunk_embedding)
# scores.append((sim, item))
# # Step 3: Sort by similarity (highest first)
# scores = sorted(scores, key=lambda x: x[0], reverse=True)
# # Step 4: Pick top-k
# top_results = scores[:top_k]
# return top_results
query = "Why is my pod crashing?"
results = retrieve(query, top_k=3)
for score, item in results:
print(f"Score: {score:.4f}")
print(item["text"])
print("=" * 60)
# ================================
# STEP 12: BUILD PROMPT
# ================================
def build_prompt(query, retrieved_chunks):
context = "\n\n".join([item["text"] for score, item in retrieved_chunks])
prompt = f"""
You are a Kubernetes expert.
Answer ONLY using the provided context.
If the answer is not in the context, say "I don't know".
Context:
{context}
Question:
{query}
Answer:
"""
return prompt
# ================================
# STEP 13: GENERATE ANSWER METHOD
# ================================
# def generate_answer(prompt):
# response = genai.generate_content(
# model="gemini-1.5-flash",
# contents=prompt
# )
# return response.text
def generate_answer(prompt):
model = genai.GenerativeModel("gemini-3-flash-preview")
response = model.generate_content(prompt)
return response.text
# ================================
# STEP 14: FINAL RAG PIPELINE EXECUTION (NOTE THIS WILL EXCLUDE THE INTIAL INGESTION PIPELINE UPTO STEP 11)
# ================================
def rag_pipeline(query, top_k=3):
# Step 1: Retrieve relevant chunks
retrieved_chunks = retrieve(query, top_k=top_k)
# Step 2: Build prompt
prompt = build_prompt(query, retrieved_chunks)
# Step 3: Generate answer
answer = generate_answer(prompt)
return answer, retrieved_chunks
# ================================
# STEP 15: TEST IT
# ================================
query = "Why is my pod crashing?"
answer, sources = rag_pipeline(query)
print("ANSWER:\n")
print(answer)
print("\n\nSOURCES:\n")
for score, item in sources:
print(f"Score: {score:.4f}")
print(item["text"])
print("=" * 60)
# ================================
# STEP 16: TEST RETRIEVAL
# ================================
test_queries = [
"Why is my pod crashing?",
"How to debug Kubernetes logs?",
"What causes OOMKilled?",
"How do services work in Kubernetes?",
"Why is my container restarting repeatedly?"
]
for query in test_queries:
print("\n" + "="*80)
print(f"QUERY: {query}\n")
results = retrieve(query, top_k=3)
for score, item in results:
print(f"Score: {score:.4f}")
print(item["text"])
print("-"*40)
====================================================================================
STEP 9 OUTPUT
====================================================================================
Generating embeddings...
Embeddings shape: (27, 3072)
Clustering complete.
Cluster 4
- CrashLoopBackOff occurs when a container repeatedly crashes after starting.
- ImagePullBackOff occurs when Kubernetes cannot pull the container image.
- kubectl logs retrieves logs from a running container.
- kubectl describe pod shows events and state transitions.
============================================================
Cluster 2
- A container may crash due to missing environment variables.
- Incorrect command or entrypoint can cause container startup failure.
- Application errors inside the container often lead to restarts.
- OOMKilled happens when a container exceeds its memory limit.
- Liveness probes determine if a container should be restarted.
============================================================
Cluster 5
- Incorrect image name or tag can cause image pull failures.
- Private registries require imagePullSecrets for authentication.
- ConfigMaps store non-sensitive configuration data.
- Secrets store sensitive data like passwords and tokens.
============================================================
Cluster 0
- Pods remain pending if no node satisfies resource requests.
- Node affinity restricts pods to specific nodes.
- Taints prevent pods from being scheduled on certain nodes.
- Tolerations allow pods to be scheduled on tainted nodes.
- Readiness probes determine if a pod can receive traffic.
- A failing readiness probe removes the pod from service endpoints.
- Horizontal Pod Autoscaler scales based on CPU or metrics.
- Pods stuck in Terminating state may have finalizers blocking deletion.
- RBAC misconfiguration can block access to resources.
============================================================
Cluster 1
- ClusterIP services expose applications within the cluster.
- NodePort services expose applications on node IPs.
============================================================
Cluster 3
- PersistentVolumes provide storage independent of pods.
- PersistentVolumeClaims request storage resources.
- Deployments manage replica sets and pod updates.
============================================================
๐น Cluster 4 (4 items)
- CrashLoopBackOff occurs when a container repeatedly crashes after starting.
- ImagePullBackOff occurs when Kubernetes cannot pull the container image.
- kubectl logs retrieves logs from a running container.
- kubectl describe pod shows events and state transitions.
๐น Cluster 2 (5 items)
- A container may crash due to missing environment variables.
- Incorrect command or entrypoint can cause container startup failure.
- Application errors inside the container often lead to restarts.
- OOMKilled happens when a container exceeds its memory limit.
- Liveness probes determine if a container should be restarted.
๐น Cluster 5 (4 items)
- Incorrect image name or tag can cause image pull failures.
- Private registries require imagePullSecrets for authentication.
- ConfigMaps store non-sensitive configuration data.
- Secrets store sensitive data like passwords and tokens.
๐น Cluster 0 (9 items)
- Pods remain pending if no node satisfies resource requests.
- Node affinity restricts pods to specific nodes.
- Taints prevent pods from being scheduled on certain nodes.
- Tolerations allow pods to be scheduled on tainted nodes.
- Readiness probes determine if a pod can receive traffic.
- A failing readiness probe removes the pod from service endpoints.
- Horizontal Pod Autoscaler scales based on CPU or metrics.
- Pods stuck in Terminating state may have finalizers blocking deletion.
- RBAC misconfiguration can block access to resources.
๐น Cluster 1 (2 items)
- ClusterIP services expose applications within the cluster.
- NodePort services expose applications on node IPs.
๐น Cluster 3 (3 items)
- PersistentVolumes provide storage independent of pods.
- PersistentVolumeClaims request storage resources.
- Deployments manage replica sets and pod updates.
====================================================================================
=====================================================================================
TESTING OUTPUT BEFORE RETREIVAL IMPROVEMENTS
=====================================================================================
================================================================================
QUERY: Why is my pod crashing?
Score: 0.6821
Cluster 2
- A container may crash due to missing environment variables.
- Incorrect command or entrypoint can cause container startup failure.
- Application errors inside the container often lead to restarts.
- OOMKilled happens when a container exceeds its memory limit.
- Liveness probes determine if a container should be restarted.
----------------------------------------
Score: 0.6681
Cluster 4
- CrashLoopBackOff occurs when a container repeatedly crashes after starting.
- ImagePullBackOff occurs when Kubernetes cannot pull the container image.
- kubectl logs retrieves logs from a running container.
- kubectl describe pod shows events and state transitions.
----------------------------------------
Score: 0.6377
Cluster 0
- Pods remain pending if no node satisfies resource requests.
- Node affinity restricts pods to specific nodes.
- Taints prevent pods from being scheduled on certain nodes.
- Tolerations allow pods to be scheduled on tainted nodes.
- Readiness probes determine if a pod can receive traffic.
- A failing readiness probe removes the pod from service endpoints.
- Horizontal Pod Autoscaler scales based on CPU or metrics.
- Pods stuck in Terminating state may have finalizers blocking deletion.
- RBAC misconfiguration can block access to resources.
----------------------------------------
================================================================================
QUERY: How to debug Kubernetes logs?
Score: 0.7323
Cluster 4
- CrashLoopBackOff occurs when a container repeatedly crashes after starting.
- ImagePullBackOff occurs when Kubernetes cannot pull the container image.
- kubectl logs retrieves logs from a running container.
- kubectl describe pod shows events and state transitions.
----------------------------------------
Score: 0.6242
Cluster 2
- A container may crash due to missing environment variables.
- Incorrect command or entrypoint can cause container startup failure.
- Application errors inside the container often lead to restarts.
- OOMKilled happens when a container exceeds its memory limit.
- Liveness probes determine if a container should be restarted.
----------------------------------------
Score: 0.6214
Cluster 0
- Pods remain pending if no node satisfies resource requests.
- Node affinity restricts pods to specific nodes.
- Taints prevent pods from being scheduled on certain nodes.
- Tolerations allow pods to be scheduled on tainted nodes.
- Readiness probes determine if a pod can receive traffic.
- A failing readiness probe removes the pod from service endpoints.
- Horizontal Pod Autoscaler scales based on CPU or metrics.
- Pods stuck in Terminating state may have finalizers blocking deletion.
- RBAC misconfiguration can block access to resources.
----------------------------------------
================================================================================
QUERY: What causes OOMKilled?
Score: 0.7013
Cluster 2
- A container may crash due to missing environment variables.
- Incorrect command or entrypoint can cause container startup failure.
- Application errors inside the container often lead to restarts.
- OOMKilled happens when a container exceeds its memory limit.
- Liveness probes determine if a container should be restarted.
----------------------------------------
Score: 0.5555
Cluster 0
- Pods remain pending if no node satisfies resource requests.
- Node affinity restricts pods to specific nodes.
- Taints prevent pods from being scheduled on certain nodes.
- Tolerations allow pods to be scheduled on tainted nodes.
- Readiness probes determine if a pod can receive traffic.
- A failing readiness probe removes the pod from service endpoints.
- Horizontal Pod Autoscaler scales based on CPU or metrics.
- Pods stuck in Terminating state may have finalizers blocking deletion.
- RBAC misconfiguration can block access to resources.
----------------------------------------
Score: 0.5520
Cluster 4
- CrashLoopBackOff occurs when a container repeatedly crashes after starting.
- ImagePullBackOff occurs when Kubernetes cannot pull the container image.
- kubectl logs retrieves logs from a running container.
- kubectl describe pod shows events and state transitions.
----------------------------------------
================================================================================
QUERY: How do services work in Kubernetes?
Score: 0.7298
Cluster 1
- ClusterIP services expose applications within the cluster.
- NodePort services expose applications on node IPs.
----------------------------------------
Score: 0.6497
Cluster 3
- PersistentVolumes provide storage independent of pods.
- PersistentVolumeClaims request storage resources.
- Deployments manage replica sets and pod updates.
----------------------------------------
Score: 0.6413
Cluster 0
- Pods remain pending if no node satisfies resource requests.
- Node affinity restricts pods to specific nodes.
- Taints prevent pods from being scheduled on certain nodes.
- Tolerations allow pods to be scheduled on tainted nodes.
- Readiness probes determine if a pod can receive traffic.
- A failing readiness probe removes the pod from service endpoints.
- Horizontal Pod Autoscaler scales based on CPU or metrics.
- Pods stuck in Terminating state may have finalizers blocking deletion.
- RBAC misconfiguration can block access to resources.
----------------------------------------
================================================================================
QUERY: Why is my container restarting repeatedly?
Score: 0.7256
Cluster 2
- A container may crash due to missing environment variables.
- Incorrect command or entrypoint can cause container startup failure.
- Application errors inside the container often lead to restarts.
- OOMKilled happens when a container exceeds its memory limit.
- Liveness probes determine if a container should be restarted.
----------------------------------------
Score: 0.6306
Cluster 4
- CrashLoopBackOff occurs when a container repeatedly crashes after starting.
- ImagePullBackOff occurs when Kubernetes cannot pull the container image.
- kubectl logs retrieves logs from a running container.
- kubectl describe pod shows events and state transitions.
----------------------------------------
Score: 0.5741
Cluster 5
- Incorrect image name or tag can cause image pull failures.
- Private registries require imagePullSecrets for authentication.
- ConfigMaps store non-sensitive configuration data.
- Secrets store sensitive data like passwords and tokens.
----------------------------------------
=====================================================================================
=====================================================================================
TESTING OUTPUT AFTER RETREIVAL IMPROVEMENTS
=====================================================================================
================================================================================
QUERY: Why is my pod crashing?
Score: 0.7377
Cluster 0
- Pods remain pending if no node satisfies resource requests.
- Node affinity restricts pods to specific nodes.
- Taints prevent pods from being scheduled on certain nodes.
- Tolerations allow pods to be scheduled on tainted nodes.
- Readiness probes determine if a pod can receive traffic.
- A failing readiness probe removes the pod from service endpoints.
- Horizontal Pod Autoscaler scales based on CPU or metrics.
- Pods stuck in Terminating state may have finalizers blocking deletion.
- RBAC misconfiguration can block access to resources.
----------------------------------------
Score: 0.7321
Cluster 2
- A container may crash due to missing environment variables.
- Incorrect command or entrypoint can cause container startup failure.
- Application errors inside the container often lead to restarts.
- OOMKilled happens when a container exceeds its memory limit.
- Liveness probes determine if a container should be restarted.
----------------------------------------
Score: 0.7181
Cluster 4
- CrashLoopBackOff occurs when a container repeatedly crashes after starting.
- ImagePullBackOff occurs when Kubernetes cannot pull the container image.
- kubectl logs retrieves logs from a running container.
- kubectl describe pod shows events and state transitions.
----------------------------------------
================================================================================
QUERY: How to debug Kubernetes logs?
Score: 0.8323
Cluster 4
- CrashLoopBackOff occurs when a container repeatedly crashes after starting.
- ImagePullBackOff occurs when Kubernetes cannot pull the container image.
- kubectl logs retrieves logs from a running container.
- kubectl describe pod shows events and state transitions.
----------------------------------------
Score: 0.6742
Cluster 2
- A container may crash due to missing environment variables.
- Incorrect command or entrypoint can cause container startup failure.
- Application errors inside the container often lead to restarts.
- OOMKilled happens when a container exceeds its memory limit.
- Liveness probes determine if a container should be restarted.
----------------------------------------
Score: 0.6714
Cluster 0
- Pods remain pending if no node satisfies resource requests.
- Node affinity restricts pods to specific nodes.
- Taints prevent pods from being scheduled on certain nodes.
- Tolerations allow pods to be scheduled on tainted nodes.
- Readiness probes determine if a pod can receive traffic.
- A failing readiness probe removes the pod from service endpoints.
- Horizontal Pod Autoscaler scales based on CPU or metrics.
- Pods stuck in Terminating state may have finalizers blocking deletion.
- RBAC misconfiguration can block access to resources.
----------------------------------------
================================================================================
QUERY: What causes OOMKilled?
Score: 0.7013
Cluster 2
- A container may crash due to missing environment variables.
- Incorrect command or entrypoint can cause container startup failure.
- Application errors inside the container often lead to restarts.
- OOMKilled happens when a container exceeds its memory limit.
- Liveness probes determine if a container should be restarted.
----------------------------------------
Score: 0.5555
Cluster 0
- Pods remain pending if no node satisfies resource requests.
- Node affinity restricts pods to specific nodes.
- Taints prevent pods from being scheduled on certain nodes.
- Tolerations allow pods to be scheduled on tainted nodes.
- Readiness probes determine if a pod can receive traffic.
- A failing readiness probe removes the pod from service endpoints.
- Horizontal Pod Autoscaler scales based on CPU or metrics.
- Pods stuck in Terminating state may have finalizers blocking deletion.
- RBAC misconfiguration can block access to resources.
----------------------------------------
Score: 0.5520
Cluster 4
- CrashLoopBackOff occurs when a container repeatedly crashes after starting.
- ImagePullBackOff occurs when Kubernetes cannot pull the container image.
- kubectl logs retrieves logs from a running container.
- kubectl describe pod shows events and state transitions.
----------------------------------------
================================================================================
QUERY: How do services work in Kubernetes?
Score: 0.8298
Cluster 1
- ClusterIP services expose applications within the cluster.
- NodePort services expose applications on node IPs.
----------------------------------------
Score: 0.7312
Cluster 4
- CrashLoopBackOff occurs when a container repeatedly crashes after starting.
- ImagePullBackOff occurs when Kubernetes cannot pull the container image.
- kubectl logs retrieves logs from a running container.
- kubectl describe pod shows events and state transitions.
----------------------------------------
Score: 0.6997
Cluster 3
- PersistentVolumes provide storage independent of pods.
- PersistentVolumeClaims request storage resources.
- Deployments manage replica sets and pod updates.
----------------------------------------
================================================================================
QUERY: Why is my container restarting repeatedly?
Score: 0.8256
Cluster 2
- A container may crash due to missing environment variables.
- Incorrect command or entrypoint can cause container startup failure.
- Application errors inside the container often lead to restarts.
- OOMKilled happens when a container exceeds its memory limit.
- Liveness probes determine if a container should be restarted.
----------------------------------------
Score: 0.6806
Cluster 4
- CrashLoopBackOff occurs when a container repeatedly crashes after starting.
- ImagePullBackOff occurs when Kubernetes cannot pull the container image.
- kubectl logs retrieves logs from a running container.
- kubectl describe pod shows events and state transitions.
----------------------------------------
Score: 0.6241
Cluster 5
- Incorrect image name or tag can cause image pull failures.
- Private registries require imagePullSecrets for authentication.
- ConfigMaps store non-sensitive configuration data.
- Secrets store sensitive data like passwords and tokens.
----------------------------------------
=====================================================================================
No comments:
Post a Comment