[GPT][SITEGPT] Map Re Rank Chain

python

[GPT][SITEGPT] Map Re Rank Chain

으누아빠 2024. 5. 13. 18:06

from langchain.document_loaders import SitemapLoader

from langchain.schema.runnable import RunnableLambda, RunnablePassthrough

from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.vectorstores.faiss import FAISS

from langchain.embeddings import OpenAIEmbeddings

from langchain.chat_models import ChatOpenAI

from langchain.prompts import ChatPromptTemplate

from bs4 import BeautifulSoup

import streamlit as st

import asyncio

import sys

llm = ChatOpenAI(

temperature=0.1,

)

answers_prompt = ChatPromptTemplate.from_template(

"""

Using ONLY the following context answer the user's question. If you can't just say you don't know, don't make anything up.

Then, give a score to the answer between 0 and 5.

If the answer answers the user question the score should be high, else it should be low.

Make sure to always include the answer's score even if it's 0.

Context: {context}

Examples:

Question: How far away is the moon?

Answer: The moon is 384,400 km away.

Score: 5

Question: How far away is the sun?

Answer: I don't know

Score: 0

Your turn!

Question: {question}

"""

)

def get_answers(inputs):

docs = inputs["docs"]

question = inputs["question"]

answers_chain = answers_prompt | llm

print(docs)

return {

"question": question,

"answers": [

{

"answer": answers_chain.invoke(

{"question": question, "context": doc.page_content}

).content,

"source": doc.metadata["source"],

}

for doc in docs

}

def choose_answer(inputs):

answers = inputs["answers"]

question = inputs["question"]

choose_chain = choose_prompt | llm

condensed = "\n\n".join(

f"{answer['answer']}\nSource:{answer['source']}\n"

for answer in answers

)

return choose_chain.invoke(

{

"question": question,

"answers": condensed,

}

)

choose_prompt = ChatPromptTemplate.from_messages(

[

(

"system",

"""

Use ONLY the following pre-existing answers to answer the user's question.

Use the answers that have the highest score (more helpful) and favor the most recent ones.

Cite sources and return the sources of the answers as they are, do not change them.

Answers: {answers}

""",

("human", "{question}"),

]

)

def parse_page(soup: BeautifulSoup):

header = soup.find("header")

footer = soup.find("footer")

if header:

header.decompose()

if footer:

footer.decompose()

return (

str(soup.get_text())

.replace("\n", " ")

.replace("\xa0", " ")

.replace("CloseSearch Submit Blog", "")

)

@st.cache_data(show_spinner="Loading website...")

def load_website(url):

splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(

chunk_size=1000,

chunk_overlap=200,

)

loader = SitemapLoader(

url,

parsing_function=parse_page,

)

loader.requests_per_second = 2

docs = loader.load_and_split(text_splitter=splitter)

vector_store = FAISS.from_documents(docs, OpenAIEmbeddings())

return vector_store.as_retriever()

st.set_page_config(

page_title="SiteGPT",

page_icon="🖥️",

)

st.markdown(

"""

# SiteGPT

Ask questions about the content of a website.

Start by writing the URL of the website on the sidebar.

"""

)

if 'win32' in sys.platform:

# Windows specific event-loop policy & cmd

asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())

#asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())

cmds = [['C:/Windows/system32/HOSTNAME.EXE']]

else:

# Unix default event-loop policy & cmds

cmds = [

['du', '-sh', '/Users/fredrik/Desktop'],

['du', '-sh', '/Users/fredrik'],

['du', '-sh', '/Users/fredrik/Pictures']

]

with st.sidebar:

url = st.text_input(

"Write down a URL",

placeholder="https://example.com",

)

if url:

if ".xml" not in url:

with st.sidebar:

st.error("Please write down a Sitemap URL.")

else:

retriever = load_website(url)

query = st.text_input("Ask a question to the website.")

if query:

chain = (

{

"docs": retriever,

"question": RunnablePassthrough(),

}

| RunnableLambda(get_answers)

| RunnableLambda(choose_answer)

)

result = chain.invoke(query)

st.markdown(result.content.replace("$", "\$"))

Map re-rank

문서들마다 각각의 답변을 만들어 내고, 이에 점수를 부여
가장 높은 점수를 받은 답변을 최종 답변으로 채택함

문서가 구분되어 있고 높은 유사성을 요구할 때 유용

answers_prompt 의 내용을 확인하면 예시가 존재함

'python' 카테고리의 다른 글

[GPT][MEETINGGPT] Cutting The Audio (0)	2024.05.14
[GPT][MEETINGGPT] Audio Extraction (0)	2024.05.14
[GPT][SITEGPT] Parsing Function (0)	2024.05.13
[GPT][SITEGPT] SitemapLoader (0)	2024.05.13
[GPT][SITEGPT] AsyncChromiumLoader (0)	2024.05.02

현재글[GPT][SITEGPT] Map Re Rank Chain

250x250

시사, Flutter, IOS, 진도, 중도, 보수, 배포, LCEL, ES6, ChatPromptTemplate, 유튜브, RAG, javascript, chain, 안드로이드, 진보, mongodb, ConversationSummaryBufferMemory, 중보, MessagesPlaceholder,

Today :
Yesterday :

일	월	화	수	목	금	토
		1	2	3	4	5
6	7	8	9	10	11	12
13	14	15	16	17	18	19
20	21	22	23	24	25	26
27	28	29	30	31

OUR + YOUR SPACE