conda install -c conda-forge chromadb

3.2?代碼實施

#import required libraries
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate)
#
import chainlit as cl
import PyPDF2
from io import BytesIO
from getpass import getpass
#
import os
from configparser import ConfigParser
env_config = ConfigParser()

# Retrieve the openai key from the environmental variables
def read_config(parser: ConfigParser, location: str) -> None:
assert parser.read(location), f"Could not read config {location}"
#
CONFIG_FILE = os.path.join("./env", "env.conf")
read_config(env_config, CONFIG_FILE)
api_key = env_config.get("openai", "api_key").strip()

#
os.environ["OPENAI_API_KEY"] = api_key

# Chunking the text
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=100)
#
#system template
system_template = """Use the following pieces of context to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
ALWAYS return a "SOURCES" part in your answer.
The "SOURCES" part should be a reference to the source of the document from which you got your answer.

Begin!
----------------
{summaries}"""
messages = [SystemMessagePromptTemplate.from_template(system_template),HumanMessagePromptTemplate.from_template("{question}"),]
prompt = ChatPromptTemplate.from_messages(messages)
chain_type_kwargs = {"prompt": prompt}

#Decorator to react to the user websocket connection event.
@cl.on_chat_start
async def init():
files = None

# Wait for the user to upload a PDF file
while files is None:
files = await cl.AskFileMessage(
content="Please upload a PDF file to begin!",
accept=["application/pdf"],
).send()

file = files[0]

msg = cl.Message(content=f"Processing {file.name}...") await msg.send() # Read the PDF file pdf_stream = BytesIO(file.content) pdf = PyPDF2.PdfReader(pdf_stream) pdf_text = "" for page in pdf.pages: pdf_text += page.extract_text() # Split the text into chunks texts = text_splitter.split_text(pdf_text) # Create metadata for each chunk metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))] # Create a Chroma vector store embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")) docsearch = await cl.make_async(Chroma.from_texts)( texts, embeddings, metadatas=metadatas ) # Create a chain that uses the Chroma vector store chain = RetrievalQAWithSourcesChain.from_chain_type( ChatOpenAI(temperature=0, openai_api_key=os.environ["OPENAI_API_KEY"]), chain_type="stuff", retriever=docsearch.as_retriever(), ) # Save the metadata and texts in the user session cl.user_session.set("metadatas", metadatas) cl.user_session.set("texts", texts) # Let the user know that the system is ready msg.content = f"{file.name} processed. You can now ask questions!" await msg.update() cl.user_session.set("chain", chain) # react to messages coming from the UI @cl.on_message async def process_response(res): chain = cl.user_session.get("chain") # type: RetrievalQAWithSourcesChain cb = cl.AsyncLangchainCallbackHandler( stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"]) cb.answer_reached = True res = await chain.acall(res, callbacks=[cb]) print(f"response: {res}") answer = res["answer"] sources = res["sources"].strip() source_elements = [] # Get the metadata and texts from the user session metadatas = cl.user_session.get("metadatas") all_sources = [m["source"] for m in metadatas] texts = cl.user_session.get("texts") if sources: found_sources = [] # Add the sources to the message for source in sources.split(","): source_name = source.strip().replace(".", "") # Get the index of the source try: index = all_sources.index(source_name) except ValueError: continue text = texts[index] found_sources.append(source_name) # Create the text element referenced in the message source_elements.append(cl.Text(content=text, name=source_name)) if found_sources: answer += f"\nSources: {', '.join(found_sources)}" else: answer += "\nNo sources found" if cb.has_streamed_final_answer: cb.final_stream.elements = source_elements await cb.final_stream.update() else: await cl.Message(content=answer, elements=source_elements).send()

3.3 運行應(yīng)用程序

chainlit run <name of the python script>

3.4 Chainlit UI

點擊返回的頁碼,詳細(xì)說明所引用的文檔內(nèi)容。

我們也可以更改設(shè)置。

參考文獻(xiàn):

[1]?https://medium.aiplanet.com/building-llm-application-for-document-question-answering-using-chainlit-d15d10469069

文章轉(zhuǎn)自微信公眾號@ArronAI

上一篇:

FastAPI:高效的Python Web框架的庫

下一篇:

LLM漫談(四)| ChatDOC:超越ChatPDF性能并支持更多功能的閱讀聊天工具
#你可能也喜歡這些API文章!

我們有何不同?

API服務(wù)商零注冊

多API并行試用

數(shù)據(jù)驅(qū)動選型,提升決策效率

查看全部API→
??

熱門場景實測,選對API

#AI文本生成大模型API

對比大模型API的內(nèi)容創(chuàng)意新穎性、情感共鳴力、商業(yè)轉(zhuǎn)化潛力

25個渠道
一鍵對比試用API 限時免費

#AI深度推理大模型API

對比大模型API的邏輯推理準(zhǔn)確性、分析深度、可視化建議合理性

10個渠道
一鍵對比試用API 限時免費