Tuesday, April 28, 2026

AGENTIC AI 002 : Agent to Parse Text and provide answers

!pip install langchain langchain-core langchain-community langchain-google-genai faiss-cpu langchain-text-splitters


from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate

from langchain_core.output_parsers import StrOutputParser


import os
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings

from google.colab import userdata
# --- Setup ---
os.environ["GOOGLE_API_KEY"] = userdata.get('geminiapikey')


documents = [
    "Our company offers 21 days of paid annual leave. Employees may work from home up to three days a week with manager approval. Health insurance covers the employee, spouse, and two children.",
    "Standard working hours are 9:00–18:00 with a 1-hour break; flexible start between 8:00–10:00 if 8 hours are met.",
    "The company observes 12 public holidays per calendar year, announced each December.",
    "Employees receive 12 paid sick days per year; a doctor’s note is required after 2+ consecutive days.",
    "Parental leave: 16 weeks paid maternity and 4 weeks paid paternity; additional unpaid leave per local law.",
    "Up to 7 unused annual leave days may carry over; cash encashment is not offered.",
    "Overtime must be pre-approved and is compensated at 1.5× or as time off in lieu within 60 days.",
    "Hybrid work: up to 3 WFH days/week with manager approval; availability required during core hours 10:00–16:00.",
    "Company provides a laptop; eligible roles receive a ₹2,000/month home-office stipend.",
    "Expenses must be submitted within 30 days via the expense portal; economy airfare and mid-tier hotels; receipts required for amounts over ₹1,000.",
    "Medical insurance covers employee, spouse, and up to two children; optional parental cover at employee cost.",
    "Payroll runs on the last business day of the month; reimbursements paid with the next payroll.",
    "Notice period is 30 days for voluntary resignation; the company may provide pay in lieu of notice.",
    "The company enforces a zero-tolerance policy for harassment or discrimination; violations may lead to termination.",
    "Employees must follow the IT acceptable-use policy; confidential data cannot be shared outside approved tools."
]

# --- Convert to docs ---
docs = [Document(page_content=d) for d in documents]

# --- Split ---
splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=30)
chunks = splitter.split_documents(docs)

# --- Embeddings ---
#embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-2-preview")

# --- Vector store ---
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 6})

# --- LLM ---
#llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0)

llm = ChatGoogleGenerativeAI(model="gemini-3-flash-preview", temperature=0)

# --- Prompt ---
prompt = ChatPromptTemplate.from_template("""
You are an HR assistant. Answer ONLY from the context.

Rules:
- Do not use outside knowledge
- If answer is not found, say "Not mentioned"
- Answer in a respectful way
- Answer in complete sentences

Context:
{context}

Question:
{question}
""")

# --- Helper ---
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# --- RAG Chain (LCEL) ---
rag_chain = (
    {
 

      "context": retriever | format_docs,
        "question": lambda x: x
    }
    | prompt
    | llm
    | StrOutputParser()
)

# --- Query ---
response = rag_chain.invoke("What is the sick leave policy?")

print(response)


No comments:

Post a Comment

What is Pydantic

Pydantic Pydantic is a data validation and settings management library for Python. ...