python

[GPT][문서 검색] Chain

으누아빠 2024. 4. 15. 18:53
반응형

 

import streamlit as st
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import CacheBackedEmbeddings, OpenAIEmbeddings
from langchain.storage import LocalFileStore
from langchain.vectorstores.faiss import FAISS
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough

st.set_page_config(
    page_title="DocumentGPT",
    page_icon="📃",
    )

chat = ChatOpenAI(
    temperature=0.1,
)

# 아래의 함수를 캐시한다.
@st.cache_data(show_spinner="Embedding file...")
def embed_file(file):
    file_content = file.read()
    file_path = f"./.cache/files/{file.name}"

    # 파일저장
    with open(file_path, "wb") as f:
        f.write(file_content)

    # 파일로더 생성
    loader = UnstructuredFileLoader(file_path)

    # 텍스트 분할
    splitter = CharacterTextSplitter(
        separator= "\n",
        chunk_size= 600,
        chunk_overlap= 100,
    )
    docs = loader.load_and_split(text_splitter=splitter)

    embeddings = OpenAIEmbeddings()

    #문서를 임베팅으로 변환 캐시를 사용하여 임베딩을 저장
    cache_dir = LocalFileStore(f"./.cache/embeddings/{file.name}")
    cache_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

    #embedding된 문서를 vector 저장소에 저장
    vectorstore = FAISS.from_documents(docs, cache_embeddings)

    # vectorstore의 검색기로 사용
    retriever = vectorstore.as_retriever()

    return retriever

# 메시지 전송 및 session_state에 저장
def send_message(message, role, save=True):
    with st.chat_message(role):
        st.markdown(message)
    if save:
        st.session_state["messages"].append({"message": message, "role": role})

# 이전 메시지 표시
def paint_history():
    for message in st.session_state["messages"]:
        send_message(
            message["message"],
            message["role"],
            save=False,
        )

def format_docs(docs):
    return "\n\n".join(document.page_content for document in docs)

prompt = ChatPromptTemplate.from_messages([
        (
            "system",
            """
            Answer the question using ONLY the following context. If you don't know the answer just say you don't know. DON'T make anything up.

            Context: {context}
            """,
        ),
        ("human", "{question}"),
])

st.title("DocumentGPT")

st.markdown("""
Welcome!

Use this chatbot to ask questions to an AI about your files!

Upload your files on the sidebar.
""")

# 파일 업로드 기능 추가
with st.sidebar:
    file = st.file_uploader(
        "Upload a .txt .pdf or .docx file",
        type=["pdf", "txt", "docx"],
    )

if file:
    retriever = embed_file(file)

    send_message("I'm ready! Ask away!", "ai", save=False)

    paint_history()

    message = st.chat_input("Ask anything about your file...")
    if message:
        send_message(message, "human")

        chain = (
            {
                "context": retriever | RunnableLambda(format_docs),
                "question": RunnablePassthrough(),
            }
            | prompt
            | chat
        )

        response = chain.invoke(message)
        send_message(response.content, "ai")

else:
    st.session_state["messages"] = []

model 과 연결하여 데이터 검색 하는 코드

'python' 카테고리의 다른 글

[GPT][PRIVATEGPT]HuggingFaceHub  (0) 2024.04.16
[GPT][문서검색] Streaming  (0) 2024.04.15
[GPT][문서검색] Chat History  (0) 2024.04.15
[GPT][문서검색] Uploading Documents  (0) 2024.04.15
[GPT] streamlit - Chat Messages  (0) 2024.04.08