mirror of
https://github.com/langchain-ai/langchain-benchmarks.git
synced 2026-07-01 22:34:02 -04:00
Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 7a86824af7 | |||
| b1f40351b9 |
@@ -0,0 +1,4 @@
|
||||
# Benchmarking on LangChain Docs
|
||||
|
||||
|
||||
Evaluating various approaches for RAG on LangChain docs.
|
||||
@@ -0,0 +1,29 @@
|
||||
from run_evals import main
|
||||
|
||||
experiments = [
|
||||
# {
|
||||
# # "server_url": "http://localhost:1983/openai-functions-agent",
|
||||
# "model": "openai-functions-agent",
|
||||
# "project_name": "openai-functions-agent",
|
||||
# },
|
||||
{
|
||||
# "server_url": "http://localhost:1983/anthropic_chat",
|
||||
"model": "anthropic-chat",
|
||||
"project_name": "anthropic-chat",
|
||||
},
|
||||
# {
|
||||
# "model": "chat",
|
||||
# # "server_url": "http://localhost:1983/chat",
|
||||
# "project_name": "chat",
|
||||
# },
|
||||
# Not worth our time it's so bad and slow
|
||||
# {
|
||||
# # "server_url": "http://localhost:1983/anthropic_iterative_search",
|
||||
# "model": "anthropic-iterative-search",
|
||||
# "max_concurrency": 2,
|
||||
# "project_name": "anthropic-iterative-search",
|
||||
# },
|
||||
]
|
||||
|
||||
for experiment in experiments:
|
||||
main(**experiment, dataset_name="Chat Langchain Pub")
|
||||
@@ -0,0 +1,42 @@
|
||||
from fastapi import FastAPI
|
||||
from langserve import add_routes
|
||||
from chat_langchain.chain import chain, anthropic_chain
|
||||
from anthropic_iterative_search.chain import chain as anthropic_agent_chain
|
||||
from openai_functions_agent import agent_executor as openai_functions_agent_chain
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
# Edit this to add the chain you want to add
|
||||
add_routes(
|
||||
app,
|
||||
chain,
|
||||
path="/chat",
|
||||
# include_callback_events=True, # TODO: Include when fixed
|
||||
)
|
||||
|
||||
add_routes(
|
||||
app,
|
||||
anthropic_chain,
|
||||
path="/anthropic_chat",
|
||||
# include_callback_events=True, # TODO: Include when fixed
|
||||
)
|
||||
|
||||
add_routes(
|
||||
app,
|
||||
anthropic_agent_chain,
|
||||
path="/anthropic_iterative_search",
|
||||
# include_callback_events=True, # TODO: Include when fixed
|
||||
)
|
||||
|
||||
add_routes(app, openai_functions_agent_chain, path="/openai-functions-agent")
|
||||
|
||||
|
||||
def run_server(port: int = 1983):
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(app, host="0.0.0.0", port=port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_server()
|
||||
@@ -0,0 +1,18 @@
|
||||
from langchain.schema.runnable import RunnableLambda
|
||||
from langserve import add_routes
|
||||
from fastapi import FastAPI
|
||||
import uuid
|
||||
|
||||
def foo(uid: uuid.UUID) -> str:
|
||||
return f"The id is {uid}"
|
||||
|
||||
chain = RunnableLambda(foo)
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
add_routes(app, chain)
|
||||
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(app, port=8122)
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
|
||||
# anthropic-iterative-search
|
||||
|
||||
This template will create a virtual research assistant with the ability to search Wikipedia to find answers to your questions.
|
||||
|
||||
It is heavily inspired by [this notebook](https://github.com/anthropics/anthropic-cookbook/blob/main/long_context/wikipedia-search-cookbook.ipynb).
|
||||
|
||||
## Environment Setup
|
||||
|
||||
Set the `ANTHROPIC_API_KEY` environment variable to access the Anthropic models.
|
||||
|
||||
## Usage
|
||||
|
||||
To use this package, you should first have the LangChain CLI installed:
|
||||
|
||||
```shell
|
||||
pip install -U "langchain-cli[serve]"
|
||||
```
|
||||
|
||||
To create a new LangChain project and install this as the only package, you can do:
|
||||
|
||||
```shell
|
||||
langchain app new my-app --package anthropic-iterative-search
|
||||
```
|
||||
|
||||
If you want to add this to an existing project, you can just run:
|
||||
|
||||
```shell
|
||||
langchain app add anthropic-iterative-search
|
||||
```
|
||||
|
||||
And add the following code to your `server.py` file:
|
||||
```python
|
||||
from anthropic_iterative_search import chain as anthropic_iterative_search_chain
|
||||
|
||||
add_routes(app, anthropic_iterative_search_chain, path="/anthropic-iterative-search")
|
||||
```
|
||||
|
||||
(Optional) Let's now configure LangSmith.
|
||||
LangSmith will help us trace, monitor and debug LangChain applications.
|
||||
LangSmith is currently in private beta, you can sign up [here](https://smith.langchain.com/).
|
||||
If you don't have access, you can skip this section
|
||||
|
||||
|
||||
```shell
|
||||
export LANGCHAIN_TRACING_V2=true
|
||||
export LANGCHAIN_API_KEY=<your-api-key>
|
||||
export LANGCHAIN_PROJECT=<your-project> # if not specified, defaults to "default"
|
||||
```
|
||||
|
||||
If you are inside this directory, then you can spin up a LangServe instance directly by:
|
||||
|
||||
```shell
|
||||
langchain serve
|
||||
```
|
||||
|
||||
This will start the FastAPI app with a server is running locally at
|
||||
[http://localhost:8000](http://localhost:8000)
|
||||
|
||||
We can see all templates at [http://127.0.0.1:8000/docs](http://127.0.0.1:8000/docs)
|
||||
We can access the playground at [http://127.0.0.1:8000/anthropic-iterative-search/playground](http://127.0.0.1:8000/anthropic-iterative-search/playground)
|
||||
|
||||
We can access the template from code with:
|
||||
|
||||
```python
|
||||
from langserve.client import RemoteRunnable
|
||||
|
||||
runnable = RemoteRunnable("http://localhost:8000/anthropic-iterative-search")
|
||||
```
|
||||
+11
@@ -0,0 +1,11 @@
|
||||
from langchain.schema.runnable import ConfigurableField
|
||||
|
||||
from .chain import chain
|
||||
from .retriever_agent import executor
|
||||
|
||||
final_chain = chain.configurable_alternatives(
|
||||
ConfigurableField(id="chain"),
|
||||
default_key="response",
|
||||
# This adds a new option, with name `openai` that is equal to `ChatOpenAI()`
|
||||
retrieve=executor,
|
||||
)
|
||||
+16
@@ -0,0 +1,16 @@
|
||||
def _format_docs(docs):
|
||||
result = "\n".join(
|
||||
[
|
||||
f'<item index="{i+1}">\n<page_content>\n{r}\n</page_content>\n</item>'
|
||||
for i, r in enumerate(docs)
|
||||
]
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def format_agent_scratchpad(intermediate_steps):
|
||||
thoughts = ""
|
||||
for action, observation in intermediate_steps:
|
||||
thoughts += action.log
|
||||
thoughts += "</search_query>" + _format_docs(observation)
|
||||
return thoughts
|
||||
+29
@@ -0,0 +1,29 @@
|
||||
from langchain.chat_models import ChatAnthropic
|
||||
from langchain.prompts import ChatPromptTemplate
|
||||
from langchain.pydantic_v1 import BaseModel
|
||||
from langchain.schema.output_parser import StrOutputParser
|
||||
from langchain.schema.runnable import RunnableLambda
|
||||
|
||||
from .prompts import answer_prompt
|
||||
from .retriever_agent import executor
|
||||
|
||||
prompt = ChatPromptTemplate.from_template(answer_prompt)
|
||||
|
||||
model = ChatAnthropic(model="claude-2", temperature=0, max_tokens_to_sample=1000)
|
||||
|
||||
chain = (
|
||||
RunnableLambda(lambda x: {"query": x["question"]})
|
||||
| {"query": lambda x: x["query"], "information": executor | (lambda x: x["output"])}
|
||||
| prompt
|
||||
| model
|
||||
| StrOutputParser()
|
||||
)
|
||||
|
||||
# Add typing for the inputs to be used in the playground
|
||||
|
||||
|
||||
class Inputs(BaseModel):
|
||||
question: str
|
||||
|
||||
|
||||
chain = chain.with_types(input_type=Inputs)
|
||||
+37
@@ -0,0 +1,37 @@
|
||||
import re
|
||||
|
||||
from langchain.schema.agent import AgentAction, AgentFinish
|
||||
|
||||
from .agent_scratchpad import _format_docs
|
||||
|
||||
|
||||
def extract_between_tags(tag: str, string: str, strip: bool = True) -> str:
|
||||
ext_list = re.findall(f"<{tag}\s?>(.+?)</{tag}\s?>", string, re.DOTALL)
|
||||
if strip:
|
||||
ext_list = [e.strip() for e in ext_list]
|
||||
if ext_list:
|
||||
if len(ext_list) != 1:
|
||||
raise ValueError
|
||||
# Only return the first one
|
||||
return ext_list[0]
|
||||
|
||||
|
||||
def parse_output(outputs):
|
||||
partial_completion = outputs["partial_completion"]
|
||||
steps = outputs["intermediate_steps"]
|
||||
search_query = extract_between_tags(
|
||||
"search_query", partial_completion + "</search_query>"
|
||||
)
|
||||
if search_query is None:
|
||||
docs = []
|
||||
str_output = ""
|
||||
for action, observation in steps:
|
||||
docs.extend(observation)
|
||||
str_output += action.log
|
||||
str_output += "</search_query>" + _format_docs(observation)
|
||||
str_output += partial_completion
|
||||
return AgentFinish({"docs": docs, "output": str_output}, log=partial_completion)
|
||||
else:
|
||||
return AgentAction(
|
||||
tool="search", tool_input=search_query, log=partial_completion
|
||||
)
|
||||
+7
@@ -0,0 +1,7 @@
|
||||
retrieval_prompt = """{retriever_description} Before beginning to research the user's question, first think for a moment inside <scratchpad> tags about what information is necessary for a well-informed answer. If the user's question is complex, you may need to decompose the query into multiple subqueries and execute them individually. Sometimes the search engine will return empty search results, or the search results may not contain the information you need. In such cases, feel free to try again with a different query.
|
||||
|
||||
After each call to the Search Engine Tool, reflect briefly inside <search_quality></search_quality> tags about whether you now have enough information to answer, or whether more information is needed. If you have all the relevant information, write it in <information></information> tags, WITHOUT actually answering the question. Otherwise, issue a new search.
|
||||
|
||||
Here is the user's question: <question>{query}</question> Remind yourself to make short queries in your scratchpad as you plan out your strategy.""" # noqa: E501
|
||||
|
||||
answer_prompt = "Here is a user query: <query>{query}</query>. Here is some relevant information: <information>{information}</information>. Please answer the question using the relevant information." # noqa: E501
|
||||
+17
@@ -0,0 +1,17 @@
|
||||
from langchain.tools import tool
|
||||
from langchain_docs_retriever.retriever import get_retriever
|
||||
|
||||
# This is used to tell the model how to best use the retriever.
|
||||
|
||||
retriever_description = """You will be asked a question by a human user. You have access to the following tool to help answer the question. <tool_description> Search Engine Tool * The search engine will exclusively search over the LangChain documentation for pages similar to your query. It returns for each page its title and full page content. Use this tool if you want to get up-to-date and comprehensive information on a topic to help answer queries. Queries should be as atomic as possible -- they only need to address one part of the user's question. For example, if the user's query is "what is the color of a basketball?", your search query should be "basketball". Here's another example: if the user's question is "Who created the first neural network?", your first query should be "neural network". As you can see, these queries are quite short. Think keywords, not phrases. * At any time, you can make a call to the search engine using the following syntax: <search_query>query_word</search_query>. * You'll then get results back in <search_result> tags.</tool_description>""" # noqa: E501
|
||||
|
||||
retriever = get_retriever()
|
||||
|
||||
# This should be the same as the function name below
|
||||
RETRIEVER_TOOL_NAME = "search"
|
||||
|
||||
|
||||
@tool
|
||||
def search(query, callbacks = None):
|
||||
"""Search the LangChain docs with the retriever."""
|
||||
return retriever.get_relevant_documents(query, callbacks=callbacks)
|
||||
+41
@@ -0,0 +1,41 @@
|
||||
from langchain.agents import AgentExecutor
|
||||
from langchain.chat_models import ChatAnthropic
|
||||
from langchain.prompts import ChatPromptTemplate
|
||||
from langchain.schema.output_parser import StrOutputParser
|
||||
from langchain.schema.runnable import RunnableMap, RunnablePassthrough
|
||||
|
||||
from .agent_scratchpad import format_agent_scratchpad
|
||||
from .output_parser import parse_output
|
||||
from .prompts import retrieval_prompt
|
||||
from .retriever import retriever_description, search
|
||||
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("user", retrieval_prompt),
|
||||
("ai", "{agent_scratchpad}"),
|
||||
]
|
||||
)
|
||||
prompt = prompt.partial(retriever_description=retriever_description)
|
||||
|
||||
model = ChatAnthropic(model="claude-2", temperature=0, max_tokens_to_sample=1000)
|
||||
|
||||
chain = (
|
||||
RunnablePassthrough.assign(
|
||||
agent_scratchpad=lambda x: format_agent_scratchpad(x["intermediate_steps"])
|
||||
)
|
||||
| prompt
|
||||
| model.bind(stop_sequences=["</search_query>"])
|
||||
| StrOutputParser()
|
||||
)
|
||||
|
||||
agent_chain = (
|
||||
RunnableMap(
|
||||
{
|
||||
"partial_completion": chain,
|
||||
"intermediate_steps": lambda x: x["intermediate_steps"],
|
||||
}
|
||||
)
|
||||
| parse_output
|
||||
)
|
||||
|
||||
executor = AgentExecutor(agent=agent_chain, tools=[search])
|
||||
@@ -0,0 +1,12 @@
|
||||
from anthropic_iterative_search import final_chain
|
||||
|
||||
if __name__ == "__main__":
|
||||
query = (
|
||||
"Which movie came out first: Oppenheimer, or "
|
||||
"Are You There God It's Me Margaret?"
|
||||
)
|
||||
print(
|
||||
final_chain.with_config(configurable={"chain": "retrieve"}).invoke(
|
||||
{"query": query}
|
||||
)
|
||||
)
|
||||
+1488
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,22 @@
|
||||
[tool.poetry]
|
||||
name = "anthropic-iterative-search"
|
||||
version = "0.0.1"
|
||||
description = ""
|
||||
authors = []
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8.1,<4.0"
|
||||
langchain = ">=0.0.325"
|
||||
anthropic = "^0.5.0"
|
||||
wikipedia = "^1.4.0"
|
||||
|
||||
[tool.langserve]
|
||||
export_module = "anthropic_iterative_search"
|
||||
export_attr = "final_chain"
|
||||
|
||||
[build-system]
|
||||
requires = [
|
||||
"poetry-core",
|
||||
]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 LangChain, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -0,0 +1,66 @@
|
||||
# chat-langchain
|
||||
|
||||
TODO: What does this package do
|
||||
|
||||
## Environment Setup
|
||||
|
||||
TODO: What environment variables need to be set (if any)
|
||||
|
||||
## Usage
|
||||
|
||||
To use this package, you should first have the LangChain CLI installed:
|
||||
|
||||
```shell
|
||||
pip install -U "langchain-cli[serve]"
|
||||
```
|
||||
|
||||
To create a new LangChain project and install this as the only package, you can do:
|
||||
|
||||
```shell
|
||||
langchain app new my-app --package chat-langchain
|
||||
```
|
||||
|
||||
If you want to add this to an existing project, you can just run:
|
||||
|
||||
```shell
|
||||
langchain app add chat-langchain
|
||||
```
|
||||
|
||||
And add the following code to your `server.py` file:
|
||||
```python
|
||||
from chat_langchain import chain as chat_langchain_chain
|
||||
|
||||
add_routes(app, chat_langchain_chain, path="/chat-langchain")
|
||||
```
|
||||
|
||||
(Optional) Let's now configure LangSmith.
|
||||
LangSmith will help us trace, monitor and debug LangChain applications.
|
||||
LangSmith is currently in private beta, you can sign up [here](https://smith.langchain.com/).
|
||||
If you don't have access, you can skip this section
|
||||
|
||||
|
||||
```shell
|
||||
export LANGCHAIN_TRACING_V2=true
|
||||
export LANGCHAIN_API_KEY=<your-api-key>
|
||||
export LANGCHAIN_PROJECT=<your-project> # if not specified, defaults to "default"
|
||||
```
|
||||
|
||||
If you are inside this directory, then you can spin up a LangServe instance directly by:
|
||||
|
||||
```shell
|
||||
langchain serve
|
||||
```
|
||||
|
||||
This will start the FastAPI app with a server is running locally at
|
||||
[http://localhost:8000](http://localhost:8000)
|
||||
|
||||
We can see all templates at [http://127.0.0.1:8000/docs](http://127.0.0.1:8000/docs)
|
||||
We can access the playground at [http://127.0.0.1:8000/chat-langchain/playground](http://127.0.0.1:8000/chat-langchain/playground)
|
||||
|
||||
We can access the template from code with:
|
||||
|
||||
```python
|
||||
from langserve.client import RemoteRunnable
|
||||
|
||||
runnable = RemoteRunnable("http://localhost:8000/chat-langchain")
|
||||
```
|
||||
@@ -0,0 +1,3 @@
|
||||
from chat_langchain.chain import chain
|
||||
|
||||
__all__ = ["chain"]
|
||||
@@ -0,0 +1,172 @@
|
||||
"""Chat langchain 'engine'."""
|
||||
from operator import itemgetter
|
||||
from typing import Dict, List, Optional, Sequence
|
||||
|
||||
from langchain.chat_models import ChatAnthropic, ChatOpenAI
|
||||
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate
|
||||
from langchain.schema import Document
|
||||
from langchain.schema.language_model import BaseLanguageModel
|
||||
from langchain.schema.messages import AIMessage, HumanMessage
|
||||
from langchain.schema.output_parser import StrOutputParser
|
||||
from langchain.schema.retriever import BaseRetriever
|
||||
from langchain.schema.runnable import (
|
||||
ConfigurableField,
|
||||
Runnable,
|
||||
RunnableBranch,
|
||||
RunnableLambda,
|
||||
RunnableMap,
|
||||
)
|
||||
from langchain_docs_retriever.retriever import get_retriever
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
RESPONSE_TEMPLATE = """\
|
||||
You are an expert programmer and problem-solver, tasked with answering any question \
|
||||
about Langchain.
|
||||
|
||||
Generate a comprehensive and informative answer of 80 words or less for the \
|
||||
given question based solely on the provided search results (URL and content). You must \
|
||||
only use information from the provided search results. Use an unbiased and \
|
||||
journalistic tone. Combine search results together into a coherent answer. Do not \
|
||||
repeat text. Cite search results using [${{number}}] notation. Only cite the most \
|
||||
relevant results that answer the question accurately. Place these citations at the end \
|
||||
of the sentence or paragraph that reference them - do not put them all at the end. If \
|
||||
different results refer to different entities within the same name, write separate \
|
||||
answers for each entity.
|
||||
|
||||
You should use bullet points in your answer for readability. Put citations where they apply
|
||||
rather than putting them all at the end.
|
||||
|
||||
If there is nothing in the context relevant to the question at hand, just say "Hmm, \
|
||||
I'm not sure." Don't try to make up an answer.
|
||||
|
||||
Anything between the following `context` html blocks is retrieved from a knowledge \
|
||||
bank, not part of the conversation with the user.
|
||||
|
||||
<context>
|
||||
{context}
|
||||
<context/>
|
||||
|
||||
REMEMBER: If there is no relevant information within the context, just say "Hmm, I'm \
|
||||
not sure." Don't try to make up an answer. Anything between the preceding 'context' \
|
||||
html blocks is retrieved from a knowledge bank, not part of the conversation with the \
|
||||
user.\
|
||||
"""
|
||||
|
||||
REPHRASE_TEMPLATE = """\
|
||||
Given the following conversation and a follow up question, rephrase the follow up \
|
||||
question to be a standalone question.
|
||||
|
||||
Chat History:
|
||||
{chat_history}
|
||||
Follow Up Input: {question}
|
||||
Standalone Question:"""
|
||||
|
||||
|
||||
class ChatRequest(BaseModel):
|
||||
question: str
|
||||
chat_history: Optional[List[Dict[str, str]]]
|
||||
|
||||
|
||||
def create_retriever_chain(
|
||||
llm: BaseLanguageModel, retriever: BaseRetriever
|
||||
) -> Runnable:
|
||||
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(REPHRASE_TEMPLATE)
|
||||
condense_question_chain = (
|
||||
CONDENSE_QUESTION_PROMPT | llm | StrOutputParser()
|
||||
).with_config(
|
||||
run_name="CondenseQuestion",
|
||||
)
|
||||
conversation_chain = condense_question_chain | retriever
|
||||
return RunnableBranch(
|
||||
(
|
||||
RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
|
||||
run_name="HasChatHistoryCheck"
|
||||
),
|
||||
conversation_chain.with_config(run_name="RetrievalChainWithHistory"),
|
||||
),
|
||||
(
|
||||
RunnableLambda(itemgetter("question")).with_config(
|
||||
run_name="Itemgetter:question"
|
||||
)
|
||||
| retriever
|
||||
).with_config(run_name="RetrievalChainWithNoHistory"),
|
||||
).with_config(run_name="RouteDependingOnChatHistory")
|
||||
|
||||
|
||||
def format_docs(docs: Sequence[Document]) -> str:
|
||||
formatted_docs = []
|
||||
for i, doc in enumerate(docs):
|
||||
doc_string = f"<doc id='{i}'>{doc.page_content}</doc>"
|
||||
formatted_docs.append(doc_string)
|
||||
return "\n".join(formatted_docs)
|
||||
|
||||
|
||||
def serialize_history(request: ChatRequest):
|
||||
chat_history = request.get("chat_history") or []
|
||||
converted_chat_history = []
|
||||
for message in chat_history:
|
||||
if message.get("human") is not None:
|
||||
converted_chat_history.append(HumanMessage(content=message["human"]))
|
||||
if message.get("ai") is not None:
|
||||
converted_chat_history.append(AIMessage(content=message["ai"]))
|
||||
return converted_chat_history
|
||||
|
||||
|
||||
def create_chain(
|
||||
llm: BaseLanguageModel,
|
||||
retriever: BaseRetriever,
|
||||
) -> Runnable:
|
||||
retriever_chain = create_retriever_chain(
|
||||
llm,
|
||||
retriever,
|
||||
).with_config(run_name="FindDocs")
|
||||
_context = RunnableMap(
|
||||
{
|
||||
"context": retriever_chain | format_docs,
|
||||
"question": itemgetter("question"),
|
||||
"chat_history": itemgetter("chat_history"),
|
||||
}
|
||||
).with_config(run_name="RetrieveDocs")
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("system", RESPONSE_TEMPLATE),
|
||||
MessagesPlaceholder(variable_name="chat_history"),
|
||||
("human", "{question}"),
|
||||
]
|
||||
)
|
||||
|
||||
response_synthesizer = (prompt | llm | StrOutputParser()).with_config(
|
||||
run_name="GenerateResponse",
|
||||
)
|
||||
return (
|
||||
{
|
||||
"question": RunnableLambda(itemgetter("question")).with_config(
|
||||
run_name="Itemgetter:question"
|
||||
),
|
||||
"chat_history": RunnableLambda(serialize_history).with_config(
|
||||
run_name="SerializeHistory"
|
||||
),
|
||||
}
|
||||
| _context
|
||||
| response_synthesizer
|
||||
)
|
||||
|
||||
|
||||
llm = ChatOpenAI(
|
||||
model="gpt-3.5-turbo-16k",
|
||||
streaming=True,
|
||||
temperature=0,
|
||||
)
|
||||
retriever = get_retriever()
|
||||
chain = create_chain(
|
||||
llm,
|
||||
retriever,
|
||||
)
|
||||
chain = chain.with_types(input_type=ChatRequest)
|
||||
|
||||
anthropic_chain = create_chain(
|
||||
ChatAnthropic(model="claude-2", temperature=1),
|
||||
retriever,
|
||||
)
|
||||
anthropic_chain = anthropic_chain.with_types(input_type=ChatRequest)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,34 @@
|
||||
[tool.poetry]
|
||||
name = "chat-langchain"
|
||||
version = "0.0.1"
|
||||
description = ""
|
||||
authors = []
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
openai = "^0.28.1"
|
||||
python = "^3.10"
|
||||
fastapi = "^0.104.1"
|
||||
pydantic = "1.10"
|
||||
langchain = "^0.0.327"
|
||||
uvicorn = "^0.23.2"
|
||||
beautifulsoup4 = "^4.12.2"
|
||||
tiktoken = "^0.4.0"
|
||||
weaviate-client = "^3.23.2"
|
||||
psycopg2 = "^2.9.7"
|
||||
lxml = "^4.9.3"
|
||||
langserve = {extras = ["server"], version = "^0.0.21"}
|
||||
anthropic = "^0.5.0"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
langchain-cli = ">=0.0.4"
|
||||
fastapi = "^0.104.0"
|
||||
sse-starlette = "^1.6.5"
|
||||
|
||||
[tool.langserve]
|
||||
export_module = "chat_langchain"
|
||||
export_attr = "chain"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
+34
@@ -0,0 +1,34 @@
|
||||
import os
|
||||
|
||||
import weaviate
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.schema.retriever import BaseRetriever
|
||||
from langchain.vectorstores import Weaviate
|
||||
from langchain_docs_retriever.voyage import VoyageEmbeddings
|
||||
|
||||
WEAVIATE_DOCS_INDEX_NAME = "LangChain_agent_docs"
|
||||
WEAVIATE_URL = os.environ["WEAVIATE_URL"]
|
||||
WEAVIATE_API_KEY = os.environ["WEAVIATE_API_KEY"]
|
||||
|
||||
|
||||
def get_embeddings_model() -> Embeddings:
|
||||
if os.environ.get("VOYAGE_AI_URL") and os.environ.get("VOYAGE_AI_MODEL"):
|
||||
return VoyageEmbeddings()
|
||||
return OpenAIEmbeddings(chunk_size=200)
|
||||
|
||||
|
||||
def get_retriever() -> BaseRetriever:
|
||||
weaviate_client = weaviate.Client(
|
||||
url=WEAVIATE_URL,
|
||||
auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY),
|
||||
)
|
||||
weaviate_client = Weaviate(
|
||||
client=weaviate_client,
|
||||
index_name=WEAVIATE_DOCS_INDEX_NAME,
|
||||
text_key="text",
|
||||
embedding=get_embeddings_model(),
|
||||
by_text=False,
|
||||
attributes=["source", "title"],
|
||||
)
|
||||
return weaviate_client.as_retriever(search_kwargs=dict(k=6))
|
||||
+45
@@ -0,0 +1,45 @@
|
||||
from typing import List, Optional
|
||||
|
||||
import requests
|
||||
import json
|
||||
|
||||
from langchain.schema.embeddings import Embeddings
|
||||
from langchain.utils.iter import batch_iterate
|
||||
from langchain.utils import get_from_env
|
||||
|
||||
BATCH_SIZE = 6
|
||||
|
||||
|
||||
class VoyageEmbeddings(Embeddings):
|
||||
"""Voyage AI embedding model wrapper."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
url: Optional[str] = None,
|
||||
model: Optional[str] = None,
|
||||
batch_size: int = BATCH_SIZE,
|
||||
) -> None:
|
||||
self.url = url or get_from_env("url", "VOYAGE_AI_URL")
|
||||
self.model = model or get_from_env("model", "VOYAGE_AI_MODEL")
|
||||
self.batch_size = batch_size
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Embed search docs."""
|
||||
embeddings = []
|
||||
for batch in batch_iterate(self.batch_size, texts):
|
||||
data = json.dumps({"input": batch, "model": self.model})
|
||||
response = requests.post(
|
||||
self.url, headers={"Content-Type": "application/json"}, data=data
|
||||
)
|
||||
if response.status_code != 200:
|
||||
raise requests.HTTPError(
|
||||
f"Received status code {response.status_code} and response "
|
||||
f"{response.text}"
|
||||
)
|
||||
response_data = response.json()["data"]
|
||||
embeddings.extend([x["embedding"] for x in response_data])
|
||||
return embeddings
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""Embed query text."""
|
||||
return self.embed_documents([text])[0]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,30 @@
|
||||
[tool.poetry]
|
||||
name = "langchain-docs-retriever"
|
||||
version = "0.0.1"
|
||||
description = ""
|
||||
authors = []
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
fastapi = "^0.104.1"
|
||||
pydantic = "1.10"
|
||||
langchain = "^0.0.327"
|
||||
uvicorn = "^0.23.2"
|
||||
weaviate-client = "^3.23.2"
|
||||
psycopg2 = "^2.9.7"
|
||||
lxml = "^4.9.3"
|
||||
langserve = {extras = ["server"], version = "^0.0.21"}
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
langchain-cli = ">=0.0.4"
|
||||
fastapi = "^0.104.0"
|
||||
sse-starlette = "^1.6.5"
|
||||
|
||||
[tool.langserve]
|
||||
export_module = "chat_langchain"
|
||||
export_attr = "chain"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 LangChain, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -0,0 +1,72 @@
|
||||
|
||||
# openai-functions-agent
|
||||
|
||||
This template creates an agent that uses OpenAI function calling to communicate its decisions on what actions to take.
|
||||
|
||||
This example creates an agent that can optionally look up information on the internet using Tavily's search engine.
|
||||
|
||||
## Environment Setup
|
||||
|
||||
The following environment variables need to be set:
|
||||
|
||||
Set the `OPENAI_API_KEY` environment variable to access the OpenAI models.
|
||||
|
||||
Set the `TAVILY_API_KEY` environment variable to access Tavily.
|
||||
|
||||
## Usage
|
||||
|
||||
To use this package, you should first have the LangChain CLI installed:
|
||||
|
||||
```shell
|
||||
pip install -U "langchain-cli[serve]"
|
||||
```
|
||||
|
||||
To create a new LangChain project and install this as the only package, you can do:
|
||||
|
||||
```shell
|
||||
langchain app new my-app --package openai-functions-agent
|
||||
```
|
||||
|
||||
If you want to add this to an existing project, you can just run:
|
||||
|
||||
```shell
|
||||
langchain app add openai-functions-agent
|
||||
```
|
||||
|
||||
And add the following code to your `server.py` file:
|
||||
```python
|
||||
from openai_functions_agent import chain as openai_functions_agent_chain
|
||||
|
||||
add_routes(app, openai_functions_agent_chain, path="/openai-functions-agent")
|
||||
```
|
||||
|
||||
(Optional) Let's now configure LangSmith.
|
||||
LangSmith will help us trace, monitor and debug LangChain applications.
|
||||
LangSmith is currently in private beta, you can sign up [here](https://smith.langchain.com/).
|
||||
If you don't have access, you can skip this section
|
||||
|
||||
```shell
|
||||
export LANGCHAIN_TRACING_V2=true
|
||||
export LANGCHAIN_API_KEY=<your-api-key>
|
||||
export LANGCHAIN_PROJECT=<your-project> # if not specified, defaults to "default"
|
||||
```
|
||||
|
||||
If you are inside this directory, then you can spin up a LangServe instance directly by:
|
||||
|
||||
```shell
|
||||
langchain serve
|
||||
```
|
||||
|
||||
This will start the FastAPI app with a server is running locally at
|
||||
[http://localhost:8000](http://localhost:8000)
|
||||
|
||||
We can see all templates at [http://127.0.0.1:8000/docs](http://127.0.0.1:8000/docs)
|
||||
We can access the playground at [http://127.0.0.1:8000/openai-functions-agent/playground](http://127.0.0.1:8000/openai-functions-agent/playground)
|
||||
|
||||
We can access the template from code with:
|
||||
|
||||
```python
|
||||
from langserve.client import RemoteRunnable
|
||||
|
||||
runnable = RemoteRunnable("http://localhost:8000/openai-functions-agent")
|
||||
```
|
||||
@@ -0,0 +1,5 @@
|
||||
from openai_functions_agent.agent import agent_executor
|
||||
|
||||
if __name__ == "__main__":
|
||||
question = "who won the womens world cup in 2023?"
|
||||
print(agent_executor.invoke({"input": question, "chat_history": []}))
|
||||
+3
@@ -0,0 +1,3 @@
|
||||
from openai_functions_agent.agent import agent_executor
|
||||
|
||||
__all__ = ["agent_executor"]
|
||||
+85
@@ -0,0 +1,85 @@
|
||||
from typing import List, Tuple
|
||||
|
||||
from langchain.agents import AgentExecutor
|
||||
from langchain.agents.format_scratchpad import format_to_openai_functions
|
||||
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
from langchain.schema.messages import AIMessage, HumanMessage
|
||||
from langchain.tools import tool
|
||||
from langchain.tools.render import format_tool_to_openai_function
|
||||
from langchain_docs_retriever.retriever import get_retriever
|
||||
|
||||
# This is used to tell the model how to best use the retriever.
|
||||
|
||||
|
||||
_RETRIEVER = get_retriever()
|
||||
|
||||
|
||||
@tool
|
||||
def search(query, callbacks=None):
|
||||
"""Search the LangChain docs with the retriever."""
|
||||
return _RETRIEVER.get_relevant_documents(query, callbacks=callbacks)
|
||||
|
||||
|
||||
tools = [search]
|
||||
|
||||
llm = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0)
|
||||
assistant_system_message = """You are a helpful assistant tasked with answering technical questions about LangChain. \
|
||||
Use tools (only if necessary) to best answer the users questions. Do not make up information if you cannot find the answer using your tools."""
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("system", assistant_system_message),
|
||||
MessagesPlaceholder(variable_name="chat_history"),
|
||||
("user", "{input}"),
|
||||
MessagesPlaceholder(variable_name="agent_scratchpad"),
|
||||
]
|
||||
)
|
||||
|
||||
llm_with_tools = llm.bind(functions=[format_tool_to_openai_function(t) for t in tools])
|
||||
|
||||
|
||||
def _format_chat_history(chat_history: List[Tuple[str, str]]):
|
||||
buffer = []
|
||||
for human, ai in chat_history:
|
||||
buffer.append(HumanMessage(content=human))
|
||||
buffer.append(AIMessage(content=ai))
|
||||
return buffer
|
||||
|
||||
|
||||
agent = (
|
||||
{
|
||||
"input": lambda x: x["input"],
|
||||
"chat_history": lambda x: _format_chat_history(x["chat_history"]),
|
||||
"agent_scratchpad": lambda x: format_to_openai_functions(
|
||||
x["intermediate_steps"]
|
||||
),
|
||||
}
|
||||
| prompt
|
||||
| llm_with_tools
|
||||
| OpenAIFunctionsAgentOutputParser()
|
||||
)
|
||||
|
||||
|
||||
class AgentInput(BaseModel):
|
||||
input: str
|
||||
chat_history: List[Tuple[str, str]] = Field(..., extra={"widget": {"type": "chat"}})
|
||||
|
||||
|
||||
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False).with_types(
|
||||
input_type=AgentInput
|
||||
)
|
||||
|
||||
|
||||
class ChainInput(BaseModel):
|
||||
question: str
|
||||
|
||||
|
||||
def mapper(input: dict):
|
||||
return {"input": input["question"], "chat_history": []}
|
||||
|
||||
|
||||
agent_executor = (mapper | agent_executor | (lambda x: x["output"])).with_types(
|
||||
input_type=ChainInput
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,24 @@
|
||||
[tool.poetry]
|
||||
name = "openai-functions-agent"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = [
|
||||
"Lance Martin <lance@langchain.dev>",
|
||||
]
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8.1,<4.0"
|
||||
langchain = ">=0.0.325"
|
||||
openai = ">=0.5.0"
|
||||
tavily-python = "^0.1.9"
|
||||
|
||||
[tool.langserve]
|
||||
export_module = "openai_functions_agent"
|
||||
export_attr = "agent_executor"
|
||||
|
||||
[build-system]
|
||||
requires = [
|
||||
"poetry-core",
|
||||
]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
Generated
+2222
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,25 @@
|
||||
[tool.poetry]
|
||||
name = "langservehub-template"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = ["Your Name <you@example.com>"]
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.11"
|
||||
sse-starlette = "^1.6.5"
|
||||
tomli-w = "^1.0.0"
|
||||
uvicorn = "^0.23.2"
|
||||
fastapi = "^0.104"
|
||||
langserve = ">=0.0.16"
|
||||
chat-langchain = {path = "packages/chat-langchain"}
|
||||
langchain-docs-retriever = {path = "packages/langchain-docs-retriever", develop = true}
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
uvicorn = "^0.23.2"
|
||||
pygithub = "^2.1.1"
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
@@ -0,0 +1,85 @@
|
||||
import argparse
|
||||
from functools import partial
|
||||
from typing import Optional
|
||||
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.smith import RunEvalConfig
|
||||
from langsmith import Client
|
||||
from chat_langchain.chain import chain, anthropic_chain
|
||||
from anthropic_iterative_search.chain import chain as anthropic_agent_chain
|
||||
from openai_functions_agent import agent_executor as openai_functions_agent_chain
|
||||
|
||||
import uuid
|
||||
|
||||
ls_client = Client()
|
||||
|
||||
|
||||
def create_runnable(model: str):
|
||||
_map = {
|
||||
"chat": chain,
|
||||
"anthropic-chat": anthropic_chain,
|
||||
"anthropic-iterative-search": anthropic_agent_chain,
|
||||
"openai-functions-agent": openai_functions_agent_chain,
|
||||
}
|
||||
return _map[model]
|
||||
|
||||
|
||||
def get_eval_config():
|
||||
accuracy_criteria = {
|
||||
"accuracy": """
|
||||
Score 1: The answer is incorrect and unrelated to the question or reference document.
|
||||
Score 3: The answer shows slight relevance to the question or reference document but is largely incorrect.
|
||||
Score 5: The answer is partially correct but has significant errors or omissions.
|
||||
Score 7: The answer is mostly correct with minor errors or omissions, and aligns with the reference document.
|
||||
Score 10: The answer is correct, complete, and perfectly aligns with the reference document.
|
||||
|
||||
If the reference answer contains multiple alternatives, the predicted answer must only match one of the alternatives to be considered correct.
|
||||
If the predicted answer contains additional helpful and accurate information that is not present in the reference answer, it should still be considered correct.
|
||||
"""
|
||||
}
|
||||
|
||||
eval_llm = ChatOpenAI(model="gpt-4", temperature=0.0)
|
||||
return RunEvalConfig(
|
||||
evaluators=[
|
||||
RunEvalConfig.LabeledScoreString(
|
||||
criteria=accuracy_criteria, llm=eval_llm, normalize_by=10.0
|
||||
),
|
||||
# Mainly to compare with the above
|
||||
# Suspected to be less reliable.
|
||||
RunEvalConfig.EmbeddingDistance(),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def main(
|
||||
# server_url: str,
|
||||
model: str,
|
||||
dataset_name: str,
|
||||
max_concurrency: int = 5,
|
||||
project_name: Optional[str] = None,
|
||||
):
|
||||
eval_config = get_eval_config()
|
||||
if project_name is not None:
|
||||
project_name += uuid.uuid4().hex[:4]
|
||||
ls_client.run_on_dataset(
|
||||
dataset_name=dataset_name,
|
||||
llm_or_chain_factory=partial(create_runnable, model),
|
||||
evaluation=eval_config,
|
||||
concurrency_level=max_concurrency,
|
||||
project_name=project_name,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--url", type=str)
|
||||
parser.add_argument("--dataset-name", type=str, default="Chat Langchain Pub")
|
||||
parser.add_argument("--project-name", type=Optional[str], default=None)
|
||||
parser.add_argument("--max-concurrency", type=int, default=5)
|
||||
args = parser.parse_args()
|
||||
main(
|
||||
args.url,
|
||||
args.dataset_name,
|
||||
max_concurrency=args.max_concurrency,
|
||||
project_name=args.project_name,
|
||||
)
|
||||
Reference in New Issue
Block a user