Add Kay press release retriever, bump dep, fix citation bug (#18)

* Add Kay press release retriever, bump dep, fix citation bug

* Format

* Move to pypi dep
This commit is contained in:
Jacob Lee
2023-10-19 08:12:24 -07:00
committed by GitHub
parent 21f1224fa9
commit ed93fe4e35
5 changed files with 35 additions and 23 deletions
+12 -5
View File
@@ -32,9 +32,6 @@ from langchain.schema.runnable import (ConfigurableField, Runnable,
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Backup
from langchain.utilities import GoogleSearchAPIWrapper
from langchain.pydantic_v1 import BaseModel
from langserve import add_routes
from langsmith import Client
from typing_extensions import TypedDict
@@ -97,7 +94,7 @@ app.add_middleware(
)
class ChatRequest(BaseModel):
class ChatRequest(TypedDict):
question: str
chat_history: Optional[List[Dict[str, str]]]
@@ -180,11 +177,20 @@ def get_retriever():
base_kay_retriever = KayAiRetriever.create(
dataset_id="company",
data_types=["10-K", "10-Q"],
num_contexts=3,
num_contexts=6,
)
kay_retriever = ContextualCompressionRetriever(
base_compressor=pipeline_compressor, base_retriever=base_kay_retriever
)
base_kay_press_release_retriever = KayAiRetriever.create(
dataset_id="company",
data_types=["PressRelease"],
num_contexts=6,
)
kay_press_release_retriever = ContextualCompressionRetriever(
base_compressor=pipeline_compressor,
base_retriever=base_kay_press_release_retriever,
)
return tavily_retriever.configurable_alternatives(
# This gives this field an id
# When configuring the end runnable, we can then use this id to configure this field
@@ -193,6 +199,7 @@ def get_retriever():
google=google_retriever,
you=you_retriever,
kay=kay_retriever,
kay_press_release=kay_press_release_retriever,
).with_config(run_name="FinalSourceRetriever")
+5 -1
View File
@@ -72,7 +72,11 @@ const createAnswerElements = (
matches.forEach((match) => {
const sourceNum = parseInt(match[1], 10);
const resolvedNum = sourceIndexMap.get(sourceNum) ?? 10;
if (prevCitationEndIndex + 1 !== match.index) {
// Allow for one space between adjacent citations
if (
prevCitationEndIndex !== match.index &&
prevCitationEndIndex + 1 !== match.index
) {
adjacentCitations = [];
}
if (match.index !== null && resolvedNum < filteredSources.length) {
+9 -2
View File
@@ -26,7 +26,7 @@ import { ArrowUpIcon } from "@chakra-ui/icons";
import { Source } from "./SourceBubble";
import { DefaultQuestion } from "./DefaultQuestion";
type RetrieverName = "tavily" | "kay" | "you" | "google";
type RetrieverName = "tavily" | "kay" | "you" | "google" | "kay_press_release";
export function ChatWindow(props: {
apiBaseUrl: string;
@@ -209,6 +209,12 @@ export function ChatWindow(props: {
"Which companies reported data breaches?",
"What were the biggest strategy changes made by Roku in 2023?",
],
kay_press_release: [
"How is the healthcare industry adopting generative AI tools?",
"What were the major technological advancements in the renewable energy sector in 2023?",
"What happened to Intel's acquisition of Tower Semiconductor?",
"What were the biggest strategy changes made by Roku in 2023?",
],
};
const sendInitialQuestion = async (question: string) => {
@@ -249,11 +255,12 @@ export function ChatWindow(props: {
>
<option value="tavily">Tavily</option>
<option value="kay">Kay.ai SEC Filings</option>
<option value="kay_press_release">Kay.ai Press Releases</option>
<option value="you">You.com</option>
<option value="google">Google</option>
</Select>
<span className="shrink-0 ml-2 mr-2">and</span>
<Select onChange={(e) => setLlm(e.target.value)} minWidth={"186px"}>
<Select onChange={(e) => setLlm(e.target.value)} minWidth={"212px"}>
<option value="openai">GPT-3.5-Turbo</option>
<option value="anthropic">Claude-2</option>
</Select>
Generated
+8 -14
View File
@@ -989,32 +989,26 @@ text-helpers = ["chardet (>=5.1.0,<6.0.0)"]
[[package]]
name = "langserve"
version = "0.0.9"
version = "0.0.11"
description = ""
category = "main"
optional = false
python-versions = "^3.8.1"
files = []
develop = false
python-versions = ">=3.8.1,<4.0.0"
files = [
{file = "langserve-0.0.11-py3-none-any.whl", hash = "sha256:878f8ce94db4abab7a3f5d2d6c5cd58f3a9f0b8c8223f6bc4e4ee8e57a56b3f1"},
{file = "langserve-0.0.11.tar.gz", hash = "sha256:2f8d493540154c5808c5e368f009a0f916951c267bd39ec34faabe9006bd791e"},
]
[package.dependencies]
fastapi = {version = ">=0.90.1", optional = true}
httpx = ">=0.23.0"
langchain = ">=0.0.316"
pydantic = "^1"
sse-starlette = {version = "^1.3.0", optional = true}
pydantic = ">=1,<2"
[package.extras]
all = ["fastapi (>=0.90.1)", "httpx-sse (>=0.3.1)", "sse-starlette (>=1.3.0,<2.0.0)"]
client = ["httpx-sse (>=0.3.1)"]
server = ["fastapi (>=0.90.1)", "sse-starlette (>=1.3.0,<2.0.0)"]
[package.source]
type = "git"
url = "https://github.com/langchain-ai/langserve"
reference = "nc/playground"
resolved_reference = "62106399dc925976dd3a82ace8436df2368aee20"
[[package]]
name = "langsmith"
version = "0.0.46"
@@ -2075,4 +2069,4 @@ multidict = ">=4.0"
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "af8a37686fbbbb1738980223fc8fad4a2d46239f6b30f5fbf36754852c1a65b5"
content-hash = "0c1dcd26778b8fd83a6f512030fcdcabd8d80caf6d6609c7a978e7591143aa40"
+1 -1
View File
@@ -14,7 +14,7 @@ langchain = "^0.0.316"
uvicorn = "^0.23.2"
tiktoken = "^0.4.0"
tavily-python = "^0.1.9"
langserve = {git = "https://github.com/langchain-ai/langserve", rev = "nc/playground", extras = ["server"]}
langserve = "^0.0.11"
sse-starlette = "^1.6.5"
google-api-python-client = "^2.102.0"
html2text = "^2020.1.16"