!pip install langchain langchain-core langchain-community langchain-google-genai faiss-cpu langchain-text-splitters
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import os
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from google.colab import userdata
# --- Setup ---
os.environ["GOOGLE_API_KEY"] = userdata.get('geminiapikey')
documents = [
"Our company offers 21 days of paid annual leave. Employees may work from home up to three days a week with manager approval. Health insurance covers the employee, spouse, and two children.",
"Standard working hours are 9:00–18:00 with a 1-hour break; flexible start between 8:00–10:00 if 8 hours are met.",
"The company observes 12 public holidays per calendar year, announced each December.",
"Employees receive 12 paid sick days per year; a doctor’s note is required after 2+ consecutive days.",
"Parental leave: 16 weeks paid maternity and 4 weeks paid paternity; additional unpaid leave per local law.",
"Up to 7 unused annual leave days may carry over; cash encashment is not offered.",
"Overtime must be pre-approved and is compensated at 1.5× or as time off in lieu within 60 days.",
"Hybrid work: up to 3 WFH days/week with manager approval; availability required during core hours 10:00–16:00.",
"Company provides a laptop; eligible roles receive a ₹2,000/month home-office stipend.",
"Expenses must be submitted within 30 days via the expense portal; economy airfare and mid-tier hotels; receipts required for amounts over ₹1,000.",
"Medical insurance covers employee, spouse, and up to two children; optional parental cover at employee cost.",
"Payroll runs on the last business day of the month; reimbursements paid with the next payroll.",
"Notice period is 30 days for voluntary resignation; the company may provide pay in lieu of notice.",
"The company enforces a zero-tolerance policy for harassment or discrimination; violations may lead to termination.",
"Employees must follow the IT acceptable-use policy; confidential data cannot be shared outside approved tools."
]
# --- Convert to docs ---
docs = [Document(page_content=d) for d in documents]
# --- Split ---
splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=30)
chunks = splitter.split_documents(docs)
# --- Embeddings ---
#embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-2-preview")
# --- Vector store ---
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 6})
# --- LLM ---
#llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0)
llm = ChatGoogleGenerativeAI(model="gemini-3-flash-preview", temperature=0)
# --- Prompt ---
prompt = ChatPromptTemplate.from_template("""
You are an HR assistant. Answer ONLY from the context.
Rules:
- Do not use outside knowledge
- If answer is not found, say "Not mentioned"
- Answer in a respectful way
- Answer in complete sentences
Context:
{context}
Question:
{question}
""")
# --- Helper ---
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
# --- RAG Chain (LCEL) ---
rag_chain = (
{
"context": retriever | format_docs,
"question": lambda x: x
}
| prompt
| llm
| StrOutputParser()
)
# --- Query ---
response = rag_chain.invoke("What is the sick leave policy?")
print(response)
No comments:
Post a Comment