Chat with PDF | Chatbot Bio-query use case
In visual code Studio, creat directory usecase3 and creat a file search.py
pip install chatpdf
Create a file names search.py
#load and split pdf
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("test.pdf")
pages_content = loader.load_and_split()
print(len(pages_content), pages_content)
You will get dictionary output with filename, meta data, page no etc.
Now type as below and Run
#load pdf and split
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("test.pdf")
pages_content = loader.load_and_split()
#print(len(pages_content), pages_content)
# #refer openai api embeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(pages_content, embeddings)
db.save_local("faiss_index") # to save local copy
nwdb = FAISS.load_local("faiss_index", embeddings,
allow_dangerous_deserialization=True)
query = "are there any educational qualification"
docs = nwdb.similarity_search(query) # to create similarity index
# print(docs)
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI() # creat chatbot
qa_chain = RetrievalQA.from_chain_type(llm, retriever=nwdb.as_retriever())
res =qa_chain({"query" : "are there any educational qualification"})
print(res)
#load pdf and split
from langchain.document_loaders import PyPDFLoader
# loader = PyPDFLoader("test.pdf")
# pages_content = loader.load_and_split()
# print(len(pages_content), pages_content)
# #refer openai api embeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
embeddings = OpenAIEmbeddings()
# two below lines are commented to avoid to create vectorbase again
# db = FAISS.from_documents(pages_content, embeddings)
# db.save_local("faiss_index") # to save local vectorbase copy
# you can see files faiss_index and faiss
nwdb = FAISS.load_local("faiss_index", embeddings,
allow_dangerous_deserialization=True)
query = "are there any educational qualification"
docs = nwdb.similarity_search(query) # to create similarity index
# print(docs) # Note see faiss_index directory with files
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI() # creat chatbot
qa_chain = RetrievalQA.from_chain_type(llm, retriever=nwdb.as_retriever())
res =qa_chain({"query" : "are there any educational qualification"})
# print(res)
# you can see faiss_index directory under which 2 files index.faiss and index.pkl
#addition for chatbot after app.py
def ask(user_query):
res = qa_chain({"query": user_query})
return res["result"]
import streamlit as st
from search import ask
with st.chat_message("assitant"):
st.write("Hello! May I helpYou ?")
prompt = st.chat_input("Type question")
if prompt:
st.chat_message("user").markdown(prompt)
response = ask(prompt)
st.chat_message("assistant").markdown(response)
Save and Run as streamlit run app.py to get streamlit.app tab in the browser
Now type in the text window Educational qualification in bullet points
and then press play button. You may get the screen as shown below:
Wow ! we have done a chatbot answering questions from my test.pdf
This is the simplest way with openai, embeddings, langchain and streamlit we could develope our custom made chatbot. This is the beginning. You can change code in app.py by refereinng streamlit documentation to change your frontent and enjoy!!!😊😊😊😊😊😊😊😊
Please use your PDF and change queries as per your PDF.🎈
No comments:
Post a Comment