Initial commit

This commit is contained in:
jacoblee93
2023-10-03 20:02:35 -07:00
commit 5227ebf036
31 changed files with 7617 additions and 0 deletions
+2
View File
@@ -0,0 +1,2 @@
nextjs/
assets/
+34
View File
@@ -0,0 +1,34 @@
name: Deploy Production
on:
push:
branches:
- main
jobs:
deploy-backend:
name: Deploy Backend to Fly
environment: Production
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: superfly/flyctl-actions/setup-flyctl@master
- run: flyctl deploy --wait-timeout 600
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
deploy-frontend:
name: Deploy Frontend to Vercel
runs-on: ubuntu-latest
environment: Production
needs: deploy-backend
env:
VERCEL_ORG_ID: ${{ secrets.VERCEL_ORG_ID }}
VERCEL_PROJECT_ID: ${{ secrets.VERCEL_PROJECT_ID }}
steps:
- uses: actions/checkout@v2
- name: Install Vercel CLI
run: npm install --global vercel@latest
- name: Pull Vercel Environment Information
run: vercel pull --yes --environment=production --token=${{ secrets.VERCEL_TOKEN }}
- name: Build Project Artifacts
run: vercel build --prod --token=${{ secrets.VERCEL_TOKEN }}
- name: Deploy Project Artifacts to Vercel
run: vercel deploy --prebuilt --prod --token=${{ secrets.VERCEL_TOKEN }}
+145
View File
@@ -0,0 +1,145 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# JetBrains
.idea
*.db
.DS_Store
vectorstore.pkl
langchain.readthedocs.io/
.vercel
bin/
pyvenv.cfg
node_modules/
.envrc
+15
View File
@@ -0,0 +1,15 @@
FROM python:3.11-buster
RUN pip install poetry==1.5.1
RUN poetry config virtualenvs.create false
COPY ./pyproject.toml ./poetry.lock* ./
RUN poetry install --no-interaction --no-ansi --no-root --no-directory
COPY ./*.py ./
RUN poetry install --no-interaction --no-ansi
CMD exec uvicorn main:app --host 0.0.0.0 --port 8080
+21
View File
@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 Harrison Chase
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
+8
View File
@@ -0,0 +1,8 @@
.PHONY: start
start:
uvicorn main:app --reload --port 8080
.PHONY: format
format:
black .
isort .
+2
View File
@@ -0,0 +1,2 @@
# Modify this Procfile to fit your needs
web: uvicorn main:app --host 0.0.0.0 --port 8080
+55
View File
@@ -0,0 +1,55 @@
# 🦜️🌐 WebLangChain
This repo is an example of performing retrieval using the entire internet as a document store.
**Try it live:** [weblangchain.vercel.app](https://weblangchain.vercel.app)
## ✅ Running locally
By default, WebLangChain uses [Tavily](https://tavily.com) to fetch content from webpages. You can get an API key from [by signing up](https://tavily.com/).
If you'd like to swap in a different base retriever (e.g. if you want to use your own data source), you can modify the `get_base_retriever()` method in `main.py`.
1. Install backend dependencies: `poetry install`.
1. Make sure to set your environment variables to configure the application:
```
export OPENAI_API_KEY=
export TAVILY_API_KEY=
# for tracing
export LANGCHAIN_TRACING_V2=true
export LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
export LANGCHAIN_API_KEY=
export LANGCHAIN_PROJECT=
```
1. Start the Python backend with `poetry run make start`.
1. Install frontend dependencies by running `cd nextjs`, then `yarn`.
1. Run the frontend with `yarn dev` for frontend.
1. Open [localhost:3000](http://localhost:3000) in your browser.
## ⚙️ How it works
The general retrieval flow looks like this:
1. Pull in raw content related to the user's initial query using a retriever that wraps Tavily's Search API.
- For subsequent conversation turns, we also rephrase the original query into a "standalone query" free of references to previous chat history.
2. Because the size of the raw documents usually exceed the maximum context window size of the model, we perform additional [contextual compression steps](https://python.langchain.com/docs/modules/data_connection/retrievers/contextual_compression/) to filter what we pass to the model.
- First, we split retrieved documents using a [text splitter](https://python.langchain.com/docs/modules/data_connection/document_transformers/).
- Then we use an [embeddings filter](https://python.langchain.com/docs/modules/data_connection/retrievers/contextual_compression/#embeddingsfilter) to remove any chunks that do not meet a similarity threshold with the initial query.
3. The retrieved context, the chat history, and the original question are passed to the LLM as context for the final generation.
Here's a LangSmith trace illustrating the above:
https://smith.langchain.com/public/f4493d9c-218b-404a-a890-31c15c56fff3/r
It's built using:
- [Tavily](https://tavily.com) as a retriever
- [LangChain](https://github.com/langchain-ai/langchain/) for orchestration
- [LangServe](https://github.com/langchain-ai/langserve) to directly expose LangChain runnables as endpoints
- [FastAPI](https://fastapi.tiangolo.com/)
- [Next.js](https://nextjs.org) for the frontend
## 🚀 Deployment
The live version is hosted on [Fly.dev](https://fly.dev) and [Vercel](https://vercel.com).
The backend Python logic is found in `main.py`, and the frontend Next.js app is under `nextjs/`.
+21
View File
@@ -0,0 +1,21 @@
# fly.toml app configuration file generated for weblangchain on 2023-10-03T08:36:06-07:00
#
# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
#
app = "weblangchain"
primary_region = "lax"
[build]
dockerfile = "Dockerfile"
[env]
PORT = "8080"
[http_service]
internal_port = 8080
force_https = true
auto_stop_machines = true
auto_start_machines = true
min_machines_running = 2
processes = ["app"]
+366
View File
@@ -0,0 +1,366 @@
"""Main entrypoint for the app."""
import asyncio
import json
import os
from operator import itemgetter
from typing import AsyncIterator, Dict, List, Optional, Sequence
import langsmith
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
from langchain.callbacks.tracers.log_stream import RunLogPatch
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import (ChatPromptTemplate, MessagesPlaceholder,
PromptTemplate)
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import (
DocumentCompressorPipeline, EmbeddingsFilter)
from langchain.schema import Document
from langchain.schema.document import Document
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema.messages import AIMessage, HumanMessage
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.retriever import BaseRetriever
from langchain.schema.runnable import (Runnable, RunnableBranch,
RunnableLambda, RunnableMap)
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langserve import add_routes
from langsmith import Client
from typing_extensions import TypedDict
RESPONSE_TEMPLATE = """\
You are an expert researcher and writer, tasked with answering any question.
Generate a comprehensive and informative, yet concise answer of 250 words or less for the \
given question based solely on the provided search results (URL and content). You must \
only use information from the provided search results. Use an unbiased and \
journalistic tone. Combine search results together into a coherent answer. Do not \
repeat text. Cite search results using [${{number}}] notation. Only cite the most \
relevant results that answer the question accurately. Place these citations at the end \
of the sentence or paragraph that reference them - do not put them all at the end. If \
different results refer to different entities within the same name, write separate \
answers for each entity. If you want to cite multiple results for the same sentence, \
format it as `[${{number1}}] [${{number2}}]`. However, you should NEVER do this with the \
same number - if you want to cite `number1` multiple times for a sentence, only do \
`[${{number1}}]` not `[${{number1}}] [${{number1}}]`
You should use bullet points in your answer for readability. Put citations where they apply \
rather than putting them all at the end.
If there is nothing in the context relevant to the question at hand, just say "Hmm, \
I'm not sure." Don't try to make up an answer.
Anything between the following `context` html blocks is retrieved from a knowledge \
bank, not part of the conversation with the user.
<context>
{context}
<context/>
REMEMBER: If there is no relevant information within the context, just say "Hmm, I'm \
not sure." Don't try to make up an answer. Anything between the preceding 'context' \
html blocks is retrieved from a knowledge bank, not part of the conversation with the \
user.\
"""
REPHRASE_TEMPLATE = """\
Given the following conversation and a follow up question, rephrase the follow up \
question to be a standalone question.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone Question:"""
client = Client()
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
expose_headers=["*"],
)
import os
from enum import Enum
from typing import Any, Dict, List, Optional
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.schema import Document
from langchain.schema.retriever import BaseRetriever
class SearchDepth(Enum):
BASIC = "basic"
ADVANCED = "advanced"
class TavilySearchAPIRetriever(BaseRetriever):
"""Tavily Search API retriever."""
k: int = 10
include_generated_answer: bool = False
include_raw_content: bool = False
include_images: bool = False
search_depth: SearchDepth = SearchDepth.BASIC
include_domains: Optional[List[str]] = None
exclude_domains: Optional[List[str]] = None
kwargs: Optional[Dict[str, Any]] = {}
api_key: Optional[str] = None
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
try:
from tavily import Client
except ImportError:
raise ValueError(
"Tavily python package not found. "
"Please install it with `pip install tavily-python`."
)
tavily = Client(api_key=self.api_key or os.environ["TAVILY_API_KEY"])
max_results = self.k if not self.include_generated_answer else self.k - 1
response = tavily.search(
query=query,
max_results=max_results,
search_depth=self.search_depth.value,
include_answer=self.include_generated_answer,
include_domains=self.include_domains,
exclude_domains=self.exclude_domains,
include_raw_content=self.include_raw_content,
include_images=self.include_images,
**self.kwargs,
)
docs = [
Document(
page_content=result.get("content", "")
if not self.include_raw_content
else result.get("raw_content", ""),
metadata={
"title": result.get("title", ""),
"source": result.get("url", ""),
**{
k: v
for k, v in result.items()
if k not in ("content", "title", "url", "raw_content")
},
"images": response.get("images"),
},
)
for result in response.get("results")
]
if self.include_generated_answer:
docs = [
Document(
page_content=response.get("answer", ""),
metadata={
"title": "Suggested Answer",
"source": "https://tavily.com/",
},
),
*docs,
]
return docs
class ChatRequest(TypedDict):
question: str
chat_history: Optional[List[Dict[str, str]]]
# conversation_id: Optional[str]
def get_base_retriever():
return TavilySearchAPIRetriever(
k=6, include_raw_content=True, include_images=True
)
def _get_retriever():
embeddings = OpenAIEmbeddings()
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=20)
relevance_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.8)
pipeline_compressor = DocumentCompressorPipeline(
transformers=[splitter, relevance_filter]
)
base_retriever = get_base_retriever()
return ContextualCompressionRetriever(
base_compressor=pipeline_compressor, base_retriever=base_retriever
).with_config(run_name="GetRelevantDocumentChunks")
def create_retriever_chain(
llm: BaseLanguageModel, retriever: BaseRetriever
) -> Runnable:
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(REPHRASE_TEMPLATE)
condense_question_chain = (
CONDENSE_QUESTION_PROMPT | llm | StrOutputParser()
).with_config(
run_name="CondenseQuestion",
)
conversation_chain = condense_question_chain | retriever
return RunnableBranch(
(
RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
run_name="HasHistoryCheck"
),
conversation_chain.with_config(run_name="RetrievalChainWithHistory"),
),
(
RunnableLambda(itemgetter("question")).with_config(
run_name="Itemgetter:question"
)
| retriever
).with_config(run_name="RetrievalChainWithNoHistory"),
)
def serialize_history(request: ChatRequest):
chat_history = request["chat_history"] or []
converted_chat_history = []
for message in chat_history:
if message.get("human") is not None:
converted_chat_history.append(HumanMessage(content=message["human"]))
if message.get("ai") is not None:
converted_chat_history.append(AIMessage(content=message["ai"]))
return converted_chat_history
def format_docs(docs: Sequence[Document]) -> str:
formatted_docs = []
for i, doc in enumerate(docs):
doc_string = f"<doc id='{i}'>{doc.page_content}</doc>"
formatted_docs.append(doc_string)
return "\n".join(formatted_docs)
def create_chain(
llm: BaseLanguageModel,
retriever: BaseRetriever,
) -> Runnable:
retriever_chain = create_retriever_chain(llm, retriever) | RunnableLambda(
format_docs
).with_config(run_name="FormatDocumentChunks")
_context = RunnableMap(
{
"context": retriever_chain.with_config(run_name="RetrievalChain"),
"question": RunnableLambda(itemgetter("question")).with_config(
run_name="Itemgetter:question"
),
"chat_history": RunnableLambda(itemgetter("chat_history")).with_config(
run_name="Itemgetter:chat_history"
),
}
)
prompt = ChatPromptTemplate.from_messages(
[
("system", RESPONSE_TEMPLATE),
MessagesPlaceholder(variable_name="chat_history"),
("human", "{question}"),
]
)
response_synthesizer = (prompt | llm | StrOutputParser()).with_config(
run_name="GenerateResponse",
)
return (
{
"question": RunnableLambda(itemgetter("question")).with_config(
run_name="Itemgetter:question"
),
"chat_history": RunnableLambda(serialize_history).with_config(
run_name="SerializeHistory"
),
}
| _context
| response_synthesizer
)
llm = ChatOpenAI(
model="gpt-3.5-turbo-16k",
# model="gpt-4",
streaming=True,
temperature=0,
)
retriever = _get_retriever()
chain = create_chain(llm, retriever)
add_routes(app, chain, path="/chat", input_type=ChatRequest)
@app.post("/feedback")
async def send_feedback(request: Request):
data = await request.json()
run_id = data.get("run_id")
if run_id is None:
return {
"result": "No LangSmith run ID provided",
"code": 400,
}
key = data.get("key", "user_score")
vals = {**data, "key": key}
client.create_feedback(**vals)
return {"result": "posted feedback successfully", "code": 200}
@app.patch("/feedback")
async def update_feedback(request: Request):
data = await request.json()
feedback_id = data.get("feedback_id")
if feedback_id is None:
return {
"result": "No feedback ID provided",
"code": 400,
}
client.update_feedback(
feedback_id,
score=data.get("score"),
comment=data.get("comment"),
)
return {"result": "patched feedback successfully", "code": 200}
# TODO: Update when async API is available
async def _arun(func, *args, **kwargs):
return await asyncio.get_running_loop().run_in_executor(None, func, *args, **kwargs)
async def aget_trace_url(run_id: str) -> str:
for i in range(5):
try:
await _arun(client.read_run, run_id)
break
except langsmith.utils.LangSmithError:
await asyncio.sleep(1**i)
if await _arun(client.run_is_shared, run_id):
return await _arun(client.read_run_shared_link, run_id)
return await _arun(client.share_run, run_id)
@app.post("/get_trace")
async def get_trace(request: Request):
data = await request.json()
run_id = data.get("run_id")
if run_id is None:
return {
"result": "No LangSmith run ID provided",
"code": 400,
}
return await aget_trace_url(run_id)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8080)
+12
View File
@@ -0,0 +1,12 @@
## For JS backend:
# LANGCHAIN_TRACING_V2=true
# LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
# LANGCHAIN_API_KEY="YOUR_LANGSMITH_KEY"
# LANGCHAIN_PROJECT="YOUR_PROJECT_NAME"
# NEXT_PUBLIC_API_BASE_URL="http://localhost:3000/api"
# OPENAI_API_KEY="YOUR_OPENAI_API_KEY"
# WEAVIATE_HOST="YOUR_WEAVIATE_HOST"
# WEAVIATE_API_KEY="YOUR_WEAVIATE_API_KEY"
# WEAVIATE_INDEX_NAME="YOUR_WEAVIATE_INDEX_NAME"
+3
View File
@@ -0,0 +1,3 @@
{
"extends": "next/core-web-vitals"
}
+37
View File
@@ -0,0 +1,37 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
# dependencies
/node_modules
/.pnp
.pnp.js
# testing
/coverage
# next.js
/.next/
/out/
# production
/build
# misc
.DS_Store
*.pem
# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# local env files
.env*.local
# vercel
.vercel
# typescript
*.tsbuildinfo
next-env.d.ts
.yarn/
+398
View File
@@ -0,0 +1,398 @@
import { toast } from "react-toastify";
import "react-toastify/dist/ReactToastify.css";
import { emojisplosion } from "emojisplosion";
import { useState, useRef } from "react";
import { SourceBubble, Source } from "./SourceBubble";
import {
VStack,
Flex,
Heading,
HStack,
Box,
Button,
Divider,
Spacer,
} from "@chakra-ui/react";
import { SearchIcon, InfoOutlineIcon } from "@chakra-ui/icons";
import { InlineCitation } from "./InlineCitation";
import { v4 as uuidv4 } from "uuid";
export type Message = {
id: string;
createdAt?: Date;
content: string;
role: "system" | "user" | "assistant" | "function";
runId?: string;
sources?: Source[];
name?: string;
function_call?: { name: string };
};
export interface Feedback {
feedback_id: string;
run_id: string;
key: string;
score: number;
comment?: string;
}
const filterSources = (sources: Source[]) => {
const filtered: Source[] = [];
const urlMap = new Map<string, number>();
const indexMap = new Map<number, number>();
sources.forEach((source, i) => {
const { url } = source;
const index = urlMap.get(url);
if (index === undefined) {
urlMap.set(url, i);
indexMap.set(i, filtered.length);
filtered.push(source);
} else {
const resolvedIndex = indexMap.get(index);
if (resolvedIndex !== undefined) {
indexMap.set(i, resolvedIndex);
}
}
});
return { filtered, indexMap };
};
const createAnswerElements = (
content: string,
filteredSources: Source[],
sourceIndexMap: Map<number, number>,
highlighedSourceLinkStates: boolean[],
setHighlightedSourceLinkStates: React.Dispatch<
React.SetStateAction<boolean[]>
>
) => {
const matches = Array.from(content.matchAll(/\[\^?(\d+)\^?\]/g));
const elements: JSX.Element[] = [];
let prevCitationEndIndex = 0;
let adjacentCitations: number[] = [];
matches.forEach((match) => {
const sourceNum = parseInt(match[1], 10);
const resolvedNum = sourceIndexMap.get(sourceNum) ?? 10;
if (prevCitationEndIndex !== match.index) {
adjacentCitations = [];
}
if (match.index !== null && resolvedNum < filteredSources.length) {
if (!adjacentCitations.includes(resolvedNum)) {
elements.push(
<span
key={`content:${prevCitationEndIndex}`}
dangerouslySetInnerHTML={{
__html: content.slice(prevCitationEndIndex, match.index),
}}
></span>
);
elements.push(
<span key={`span:${prevCitationEndIndex}`}>
<InlineCitation
key={`citation:${prevCitationEndIndex}`}
source={filteredSources[resolvedNum]}
sourceNumber={resolvedNum}
highlighted={highlighedSourceLinkStates[resolvedNum]}
onMouseEnter={() =>
setHighlightedSourceLinkStates(
filteredSources.map((_, i) => i === resolvedNum)
)
}
onMouseLeave={() =>
setHighlightedSourceLinkStates(filteredSources.map(() => false))
}
/>
</span>
);
adjacentCitations.push(resolvedNum);
}
prevCitationEndIndex = (match?.index ?? 0) + match[0].length;
}
});
elements.push(
<span
key={`content:${prevCitationEndIndex}`}
dangerouslySetInnerHTML={{ __html: content.slice(prevCitationEndIndex) }}
></span>
);
return elements;
};
export function ChatMessageBubble(props: {
message: Message;
aiEmoji?: string;
isMostRecent: boolean;
messageCompleted: boolean;
apiBaseUrl: string;
}) {
const { role, content, runId } = props.message;
const isUser = role === "user";
const [isLoading, setIsLoading] = useState(false);
const [traceIsLoading, setTraceIsLoading] = useState(false);
const [feedback, setFeedback] = useState<Feedback | null>(null);
const [comment, setComment] = useState("");
const [feedbackColor, setFeedbackColor] = useState("");
const upButtonRef = useRef(null);
const downButtonRef = useRef(null);
const cumulativeOffset = function (element: HTMLElement | null) {
var top = 0,
left = 0;
do {
top += element?.offsetTop || 0;
left += element?.offsetLeft || 0;
element = (element?.offsetParent as HTMLElement) || null;
} while (element);
return {
top: top,
left: left,
};
};
const sendFeedback = async (score: number, key: string) => {
let run_id = runId;
if (run_id === undefined) {
return;
}
if (isLoading) {
return;
}
setIsLoading(true);
let apiBaseUrl = props.apiBaseUrl;
let feedback_id = feedback?.feedback_id ?? uuidv4();
try {
const response = await fetch(apiBaseUrl + "/feedback", {
method: feedback?.feedback_id ? "PATCH" : "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
score,
run_id,
key,
feedback_id,
comment,
}),
});
const data = await response.json();
if (data.code === 200) {
setFeedback({ run_id, score, key, feedback_id });
score == 1 ? animateButton("upButton") : animateButton("downButton");
if (comment) {
setComment("");
}
}
} catch (e: any) {
console.error("Error:", e);
toast.error(e.message);
}
setIsLoading(false);
};
const viewTrace = async () => {
try {
setTraceIsLoading(true);
let apiBaseUrl = props.apiBaseUrl;
const response = await fetch(apiBaseUrl + "/get_trace", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
run_id: runId,
}),
});
const data = await response.json();
if (data.code === 400) {
toast.error("Unable to view trace");
throw new Error("Unable to view trace");
} else {
console.log(data);
const url = data.replace(/['"]+/g, "");
window.open(url, "_blank");
setTraceIsLoading(false);
}
} catch (e: any) {
console.error("Error:", e);
setTraceIsLoading(false);
toast.error(e.message);
}
};
const sources = props.message.sources ?? [];
const { filtered: filteredSources, indexMap: sourceIndexMap } =
filterSources(sources);
// Use an array of highlighted states as a state since React
// complains when creating states in a loop
const [highlighedSourceLinkStates, setHighlightedSourceLinkStates] = useState(
filteredSources.map(() => false)
);
const answerElements =
role === "assistant"
? createAnswerElements(
content,
filteredSources,
sourceIndexMap,
highlighedSourceLinkStates,
setHighlightedSourceLinkStates
)
: [];
const imageUrls = filteredSources[0]?.images ?? [];
const imageElements = imageUrls.map((imageUrl) => <img key={`image:${imageUrl}`} src={imageUrl} className="block h-full mr-2"></img>)
const animateButton = (buttonId: string) => {
let button: HTMLButtonElement | null;
if (buttonId === "upButton") {
button = upButtonRef.current;
} else if (buttonId === "downButton") {
button = downButtonRef.current;
} else {
return;
}
if (!button) return;
let resolvedButton = button as HTMLButtonElement;
resolvedButton.classList.add("animate-ping");
setTimeout(() => {
resolvedButton.classList.remove("animate-ping");
}, 500);
emojisplosion({
emojiCount: 10,
uniqueness: 1,
position() {
const offset = cumulativeOffset(button);
return {
x: offset.left + resolvedButton.clientWidth / 2,
y: offset.top + resolvedButton.clientHeight / 2,
};
},
emojis: buttonId === "upButton" ? ["👍"] : ["👎"],
});
};
return (
<VStack align="start" spacing={5} pb={5}>
{!isUser && filteredSources.length > 0 && (
<>
<Flex direction={"column"} width={"100%"}>
<VStack spacing={"5px"} align={"start"} width={"100%"}>
<Heading
fontSize="lg"
fontWeight={"medium"}
mb={1}
color={"blue.100"}
paddingBottom={"12px"}
className="flex items-center"
>
<SearchIcon className="mr-1"/>Sources
</Heading>
<HStack spacing={"10px"} maxWidth={"100%"} overflow={"auto"}>
{filteredSources.map((source, index) => (
<Box key={index} alignSelf={"stretch"} width={60}>
<SourceBubble
source={source}
highlighted={highlighedSourceLinkStates[index]}
index={index}
onMouseEnter={() =>
setHighlightedSourceLinkStates(
filteredSources.map((_, i) => i === index)
)
}
onMouseLeave={() =>
setHighlightedSourceLinkStates(
filteredSources.map(() => false)
)
}
/>
</Box>
))}
</HStack>
</VStack>
</Flex>
<Heading size="lg" fontWeight="medium" color="blue.100" className="flex items-center">
<InfoOutlineIcon className="mr-1" /> Answer
</Heading>
</>
)}
{isUser ? (
<Heading size="lg" fontWeight="medium" color="white">
{content}
</Heading>
) : (
<>
<Box className="whitespace-pre-wrap" color="white">
{answerElements}
</Box>
{(imageUrls.length && props.messageCompleted) ? (
<Flex className="w-full max-w-full flex h-[196px] overflow-auto">
{imageElements}
</Flex>
) : ""}
</>
)}
{props.message.role !== "user" &&
props.isMostRecent &&
props.messageCompleted && (
<HStack spacing={2}>
<Button
ref={upButtonRef}
size="sm"
variant="outline"
colorScheme={feedback === null ? "green" : "gray"}
onClick={() => {
if (feedback === null && props.message.runId) {
sendFeedback(1, "user_score");
animateButton("upButton");
setFeedbackColor("border-4 border-green-300");
} else {
toast.error("You have already provided your feedback.");
}
}}
>
👍
</Button>
<Button
ref={downButtonRef}
size="sm"
variant="outline"
colorScheme={feedback === null ? "red" : "gray"}
onClick={() => {
if (feedback === null && props.message.runId) {
sendFeedback(0, "user_score");
animateButton("downButton");
setFeedbackColor("border-4 border-red-300");
} else {
toast.error("You have already provided your feedback.");
}
}}
>
👎
</Button>
<Spacer />
<Button
size="sm"
variant="outline"
colorScheme={runId === null ? "blue" : "gray"}
onClick={(e) => {
e.preventDefault();
viewTrace();
}}
isLoading={traceIsLoading}
loadingText="🔄"
>
🛠🔗
</Button>
</HStack>
)}
{!isUser && <Divider mt={4} mb={4} />}
</VStack>
);
}
+267
View File
@@ -0,0 +1,267 @@
"use client";
import React, { useRef, useState } from "react";
import { v4 as uuidv4 } from "uuid";
import { EmptyState } from "../components/EmptyState";
import { ChatMessageBubble, Message } from "../components/ChatMessageBubble";
import { marked } from "marked";
import { Renderer } from "marked";
import { fetchEventSource } from '@microsoft/fetch-event-source';
import hljs from "highlight.js";
import "highlight.js/styles/gradient-dark.css";
import "react-toastify/dist/ReactToastify.css";
import {
Heading,
Flex,
IconButton,
Input,
InputGroup,
InputRightElement,
Spinner,
} from "@chakra-ui/react";
import { ArrowUpIcon } from "@chakra-ui/icons";
import { Source } from "./SourceBubble";
export function ChatWindow(props: {
apiBaseUrl: string;
placeholder?: string;
titleText?: string;
}) {
const conversationId = uuidv4();
const messageContainerRef = useRef<HTMLDivElement | null>(null);
const [messages, setMessages] = useState<Array<Message>>([]);
const [input, setInput] = useState("");
const [isLoading, setIsLoading] = useState(false);
const [chatHistory, setChatHistory] = useState<
{ human: string; ai: string }[]
>([]);
const { apiBaseUrl, titleText } = props;
const sendMessage = async (message?: string) => {
if (messageContainerRef.current) {
messageContainerRef.current.classList.add("grow");
}
if (isLoading) {
return;
}
const messageValue = message ?? input;
if (messageValue === "") return;
setInput("");
setMessages((prevMessages) => [
...prevMessages,
{ id: Math.random().toString(), content: messageValue, role: "user" },
]);
setIsLoading(true);
let accumulatedMessage = "";
let runId: string | undefined = undefined;
let sources: Source[] | undefined = undefined;
let messageIndex: number | null = null;
let renderer = new Renderer();
renderer.paragraph = (text) => {
return text + "\n";
};
renderer.list = (text) => {
return `${text}\n\n`;
};
renderer.listitem = (text) => {
return `\n• ${text}`;
};
renderer.code = (code, language) => {
const validLanguage = hljs.getLanguage(language || "")
? language
: "plaintext";
const highlightedCode = hljs.highlight(
validLanguage || "plaintext",
code
).value;
return `<pre class="highlight bg-gray-700" style="padding: 5px; border-radius: 5px; overflow: auto; overflow-wrap: anywhere; white-space: pre-wrap; max-width: 100%; display: block; line-height: 1.2"><code class="${language}" style="color: #d6e2ef; font-size: 12px; ">${highlightedCode}</code></pre>`;
};
marked.setOptions({ renderer });
try {
await fetchEventSource(apiBaseUrl + "/chat/stream_log", {
method: "POST",
headers: {
"Content-Type": "application/json",
"Accept": "text/event-stream",
},
body: JSON.stringify({
input: {
question: messageValue,
chat_history: chatHistory,
},
config: {
metadata: {
conversation_id: conversationId,
}
},
include_names: ["GetRelevantDocumentChunks"],
}),
onerror(e) {
throw e;
},
onmessage(msg) {
if (msg.event === "end") {
setChatHistory((prevChatHistory) => [
...prevChatHistory,
{ human: messageValue, ai: accumulatedMessage },
]);
setIsLoading(false);
return;
}
if (!msg.data) {
return;
}
const chunk = JSON.parse(msg.data);
for (const op of chunk.ops) {
if (op.path === "/logs/0/final_output" && Array.isArray(op.value?.documents)) {
sources = (op.value.documents ?? []).map((doc: {page_content: string, metadata: Record<string, unknown>}) => ({
url: doc.metadata.source,
title: doc.metadata.title,
images: doc.metadata.images,
}));
} else if (op.path === "/streamed_output/-") {
accumulatedMessage = accumulatedMessage + op.value;
} else if (!op.path && op.op === "replace") {
runId = op.value.id;
}
}
const parsedResult = marked.parse(accumulatedMessage);
setMessages((prevMessages) => {
let newMessages = [...prevMessages];
if (messageIndex === null) {
messageIndex = newMessages.length;
newMessages.push({
id: Math.random().toString(),
content: parsedResult.trim(),
runId: runId,
sources: sources,
role: "assistant",
});
} else {
newMessages[messageIndex].content = parsedResult.trim();
newMessages[messageIndex].runId = runId;
newMessages[messageIndex].sources = sources;
}
return newMessages;
});
}
});
} catch (e) {
setMessages((prevMessages) => prevMessages.slice(0, -1));
setIsLoading(false);
setInput(messageValue);
throw e;
}
};
const sendInitialQuestion = async (question: string) => {
await sendMessage(question);
};
return (
<div className={"flex flex-col items-center p-8 rounded grow max-h-full h-full" + (messages.length === 0 ? " justify-center mb-32" : "")}>
{messages.length > 0 && (
<Flex direction={"column"} alignItems={"center"} paddingBottom={"20px"}>
<Heading fontSize="2xl" fontWeight={"medium"} mb={1} color={"white"}>
{titleText}
</Heading>
<Heading fontSize="md" fontWeight={"normal"} mb={1} color={"white"}>
Powered by <a target="_blank" href="https://tavily.com" className="text-sky-400">Tavily</a>
</Heading>
<Heading fontSize="lg" fontWeight={"normal"} mb={1} color={"white"}>We appreciate feedback!</Heading>
</Flex>
)}
<div
className="flex flex-col-reverse w-full mb-2 overflow-auto"
ref={messageContainerRef}
>
{messages.length > 0 ? (
[...messages]
.reverse()
.map((m, index) => (
<ChatMessageBubble
key={m.id}
message={{ ...m }}
aiEmoji="🦜"
apiBaseUrl={apiBaseUrl}
isMostRecent={index === 0}
messageCompleted={!isLoading}
></ChatMessageBubble>
))
) : (
<EmptyState onChoice={sendInitialQuestion} />
)}
</div>
<InputGroup size="md" alignItems={"center"}>
<Input
value={input}
height={"55px"}
rounded={"full"}
type={"text"}
placeholder="Ask anything..."
textColor={"white"}
borderColor={"rgb(58, 58, 61)"}
onSubmit={(e) => {
e.preventDefault();
sendMessage();
}}
onChange={(e) => setInput(e.target.value)}
onKeyDown={(e) => {
if (e.key === "Enter" && !e.shiftKey) {
e.preventDefault();
sendMessage();
}
}}
/>
<InputRightElement h="full" paddingRight={"15px"}>
<IconButton
colorScheme="blue"
rounded={"full"}
aria-label="Send"
icon={isLoading ? <Spinner /> : <ArrowUpIcon />}
type="submit"
onClick={(e) => {
e.preventDefault();
sendMessage();
}}
/>
</InputRightElement>
</InputGroup>
{messages.length === 0 ? (<div className="w-full text-center flex flex-col">
<div className="flex grow justify-center w-full mt-4">
<div onMouseUp={(e) => sendInitialQuestion((e.target as HTMLDivElement).innerText)} className="bg-stone-700 px-2 py-1 mx-2 rounded cursor-pointer justify-center text-stone-200 hover:bg-stone-500">
what is langchain?
</div>
<div onMouseUp={(e) => sendInitialQuestion((e.target as HTMLDivElement).innerText)} className="bg-stone-700 px-2 py-1 mx-2 rounded cursor-pointer justify-center text-stone-200 hover:bg-stone-500">
history of mesopotamia
</div>
<div onMouseUp={(e) => sendInitialQuestion((e.target as HTMLDivElement).innerText)} className="bg-stone-700 px-2 py-1 mx-2 rounded cursor-pointer justify-center text-stone-200 hover:bg-stone-500">
how to build a discord bot
</div>
<div onMouseUp={(e) => sendInitialQuestion((e.target as HTMLDivElement).innerText)} className="bg-stone-700 px-2 py-1 mx-2 rounded cursor-pointer justify-center text-stone-200 hover:bg-stone-500">
leonardo dicaprio girlfriend
</div>
</div>
<div className="flex grow justify-center w-full mt-4">
<div onMouseUp={(e) => sendInitialQuestion((e.target as HTMLDivElement).innerText)} className="bg-stone-700 px-2 py-1 mx-2 rounded cursor-pointer justify-center text-stone-200 hover:bg-stone-500">
fun gift ideas for software engineers
</div>
<div onMouseUp={(e) => sendInitialQuestion((e.target as HTMLDivElement).innerText)} className="bg-stone-700 px-2 py-1 mx-2 rounded cursor-pointer justify-center text-stone-200 hover:bg-stone-500">
how does a prism separate light
</div>
<div onMouseUp={(e) => sendInitialQuestion((e.target as HTMLDivElement).innerText)} className="bg-stone-700 px-2 py-1 mx-2 rounded cursor-pointer justify-center text-stone-200 hover:bg-stone-500">
what bear is best
</div>
</div>
</div>) : ""}
</div>
);
}
+15
View File
@@ -0,0 +1,15 @@
import { Heading } from "@chakra-ui/react";
export function EmptyState(props: {
onChoice: (question: string) => any
}) {
return (
<div className="rounded flex flex-col items-center max-w-full md:p-8">
<Heading fontSize="3xl" fontWeight={"medium"} mb={1} color={"white"}>WebLangChain 🦜🔗</Heading>
<Heading fontSize="md" fontWeight={"normal"} mb={1} color={"white"}>
Powered by <a target="_blank" href="https://tavily.com" className="text-sky-400">Tavily</a>
</Heading>
<Heading fontSize="xl" fontWeight={"normal"} mb={1} color={"white"} marginTop={"10px"} textAlign={"center"}>Ask me anything about anything!{" "}</Heading>
</div>
);
}
+21
View File
@@ -0,0 +1,21 @@
import { Source } from "./SourceBubble";
export function InlineCitation(props: {
source: Source;
sourceNumber: number;
highlighted: boolean;
onMouseEnter: () => any;
onMouseLeave: () => any;
}) {
const { source, sourceNumber, highlighted, onMouseEnter, onMouseLeave } = props;
return (
<a href={source.url}
target="_blank"
className={`relative bottom-1.5 text-xs border rounded px-1 ${highlighted ? "bg-stone-500" : "bg-stone-700"}`}
onMouseEnter={onMouseEnter}
onMouseLeave={onMouseLeave}
>
{sourceNumber}
</a>
);
}
+67
View File
@@ -0,0 +1,67 @@
import 'react-toastify/dist/ReactToastify.css';
import { emojisplosion } from "emojisplosion";
export type Source = {
url: string;
title: string;
images: string[];
};
export function SourceBubble(props: {
source: Source;
highlighted: boolean;
index: number;
onMouseEnter: () => any;
onMouseLeave: () => any;
}) {
const cumulativeOffset = function(element: HTMLElement | null) {
var top = 0, left = 0;
do {
top += element?.offsetTop || 0;
left += element?.offsetLeft || 0;
element = (element?.offsetParent as HTMLElement) || null;
} while(element);
return {
top: top,
left: left
};
};
const animateButton = (buttonId: string) => {
const button = document.getElementById(buttonId);
button!.classList.add("animate-ping");
setTimeout(() => {
button!.classList.remove("animate-ping");
}, 500);
emojisplosion({
emojiCount: 10,
uniqueness: 1,
position() {
const offset = cumulativeOffset(button);
return {
x: offset.left + button!.clientWidth / 2,
y: offset.top + button!.clientHeight / 2,
};
},
emojis: buttonId === "upButton" ? ["👍"] : ["👎"],
});
};
const hostname = (new URL(props.source.url)).hostname.replace("www.", "");
return (
<a href={props.source.url}
target="_blank"
onMouseEnter={props.onMouseEnter}
onMouseLeave={props.onMouseLeave}>
<div className={`${props.highlighted ? "bg-stone-500" : "bg-stone-700"} rounded p-4 text-white h-full text-xs flex flex-col mb-4`}>
<div className="line-clamp-4">{props.source.title}</div>
<div className="text-white mt-auto">
{hostname} [{props.index}]
</div>
</div>
</a>
);
}
+29
View File
@@ -0,0 +1,29 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
body {
color: #f8f8f8;
background: #131318;
}
body input,
body textarea {
color: black;
}
a:hover {
border-bottom: 1px solid;
}
p {
margin: 8px 0;
}
code {
color: #ffa500;
}
li {
padding: 4px;
}
+27
View File
@@ -0,0 +1,27 @@
import './globals.css'
import type { Metadata } from 'next'
import { Inter } from 'next/font/google'
const inter = Inter({ subsets: ['latin'] })
export const metadata: Metadata = {
title: 'WebLangChain',
description: 'Chatbot that answers queries by doing research and citing sources',
}
export default function RootLayout({
children,
}: {
children: React.ReactNode
}) {
return (
<html lang="en" className="h-full">
<body className={`${inter.className} h-full`}>
<div className="flex flex-col h-full md:p-8 bg-zinc-900">
{children}
</div>
</body>
</html>
)
}
+19
View File
@@ -0,0 +1,19 @@
'use client';
import { ChatWindow } from "../app/components/ChatWindow";
import { ToastContainer } from "react-toastify";
import { ChakraProvider } from '@chakra-ui/react'
export default function Home() {
return (
<ChakraProvider>
<ToastContainer />
<ChatWindow
apiBaseUrl={process.env.NEXT_PUBLIC_API_BASE_URL ?? "http://localhost:8080"}
titleText="WebLangChain 🦜🔗"
placeholder="Ask anything..."
></ChatWindow>
</ChakraProvider>
);
}
+4
View File
@@ -0,0 +1,4 @@
/** @type {import('next').NextConfig} */
const nextConfig = {}
module.exports = nextConfig
+41
View File
@@ -0,0 +1,41 @@
{
"name": "weblangchain",
"version": "0.1.0",
"private": true,
"packageManager": "yarn@1.22.19",
"scripts": {
"dev": "next dev",
"build": "next build",
"start": "next start",
"lint": "next lint"
},
"dependencies": {
"@chakra-ui/icons": "^2.1.0",
"@chakra-ui/react": "^2.8.1",
"@emotion/react": "^11.11.1",
"@emotion/styled": "^11.11.0",
"@microsoft/fetch-event-source": "^2.0.1",
"@types/marked": "^5.0.1",
"@types/node": "20.4.9",
"@types/react": "18.2.20",
"@types/react-dom": "18.2.7",
"autoprefixer": "10.4.14",
"emojisplosion": "^2.6.1",
"eslint": "8.46.0",
"eslint-config-next": "13.4.13",
"framer-motion": "^10.16.4",
"graphql": "^16.8.1",
"highlight.js": "^11.8.0",
"langchain": "^0.0.155",
"langsmith": "^0.0.41",
"marked": "^7.0.2",
"next": "13.4.13",
"postcss": "8.4.27",
"react": "18.2.0",
"react-dom": "18.2.0",
"react-toastify": "^9.1.3",
"tailwindcss": "3.3.3",
"typescript": "5.1.6",
"weaviate-ts-client": "^1.5.0"
}
}
+6
View File
@@ -0,0 +1,6 @@
module.exports = {
plugins: {
tailwindcss: {},
autoprefixer: {},
},
}
Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

+20
View File
@@ -0,0 +1,20 @@
import type { Config } from 'tailwindcss'
const config: Config = {
content: [
'./pages/**/*.{js,ts,jsx,tsx,mdx}',
'./components/**/*.{js,ts,jsx,tsx,mdx}',
'./app/**/*.{js,ts,jsx,tsx,mdx}',
],
theme: {
extend: {
backgroundImage: {
'gradient-radial': 'radial-gradient(var(--tw-gradient-stops))',
'gradient-conic':
'conic-gradient(from 180deg at 50% 50%, var(--tw-gradient-stops))',
},
},
},
plugins: [],
}
export default config
+28
View File
@@ -0,0 +1,28 @@
{
"compilerOptions": {
"target": "es5",
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"forceConsistentCasingInFileNames": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [
{
"name": "next"
}
],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules"]
}
+4290
View File
File diff suppressed because it is too large Load Diff
Generated
+1630
View File
File diff suppressed because it is too large Load Diff
+27
View File
@@ -0,0 +1,27 @@
[tool.poetry]
name = "weblangchain"
version = "0.1.0"
description = ""
authors = ["SN <6432132+samnoyes@users.noreply.github.com>"]
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.10"
openai = "^0.28.0"
fastapi = "^0.103.1"
pydantic = "1.10"
langchain = "^0.0.306"
uvicorn = "^0.23.2"
tiktoken = "^0.4.0"
tavily-python = "^0.1.9"
langserve = "^0.0.3"
sse-starlette = "^1.6.5"
[tool.poetry.group.dev.dependencies]
black = "^23.9.1"
isort = "^5.12.0"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
+7
View File
@@ -0,0 +1,7 @@
{
"git": {
"deploymentEnabled": {
"main": false
}
}
}