mirror of
https://github.com/run-llama/LlamaIndexTS.git
synced 2026-07-01 22:14:03 -04:00
Compare commits
18 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| dcce45d524 | |||
| 9b1e6c861f | |||
| bbc5d17421 | |||
| ef4f235fc2 | |||
| f6b6a3d83e | |||
| b86213d8dd | |||
| 1f8e78531c | |||
| 17249a30ef | |||
| 9bc2de491e | |||
| 8077ad47de | |||
| 33fb5b4495 | |||
| c221764198 | |||
| e1ca0ad2fd | |||
| f7ac1913b9 | |||
| 03efd103cc | |||
| 31f6219f96 | |||
| 3c7c25d2f2 | |||
| 645a64b3bb |
@@ -0,0 +1,3 @@
|
||||
# Rename this file to `.env.local` to use environment variables locally with `next dev`
|
||||
# https://nextjs.org/docs/pages/building-your-application/configuring/environment-variables
|
||||
MY_HOST="example.com"
|
||||
@@ -0,0 +1,30 @@
|
||||
This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Next.js](https://nextjs.org/) bootstrapped with [`create-llama`](https://github.com/run-llama/LlamaIndexTS/tree/main/packages/create-llama).
|
||||
|
||||
## Getting Started
|
||||
|
||||
First, install the dependencies:
|
||||
|
||||
```
|
||||
npm install
|
||||
```
|
||||
|
||||
Second, run the development server:
|
||||
|
||||
```
|
||||
npm run dev
|
||||
```
|
||||
|
||||
Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
|
||||
|
||||
You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
|
||||
|
||||
This project uses [`next/font`](https://nextjs.org/docs/basic-features/font-optimization) to automatically optimize and load Inter, a custom Google Font.
|
||||
|
||||
## Learn More
|
||||
|
||||
To learn more about LlamaIndex, take a look at the following resources:
|
||||
|
||||
- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex (Python features).
|
||||
- [LlamaIndexTS Documentation](https://ts.llamaindex.ai) - learn about LlamaIndex (Typescript features).
|
||||
|
||||
You can check out [the LlamaIndexTS GitHub repository](https://github.com/run-llama/LlamaIndexTS) - your feedback and contributions are welcome!
|
||||
@@ -0,0 +1,4 @@
|
||||
export const STORAGE_DIR = "./data";
|
||||
export const STORAGE_CACHE_DIR = "./cache";
|
||||
export const CHUNK_SIZE = 512;
|
||||
export const CHUNK_OVERLAP = 20;
|
||||
@@ -0,0 +1,53 @@
|
||||
import {
|
||||
serviceContextFromDefaults,
|
||||
SimpleDirectoryReader,
|
||||
storageContextFromDefaults,
|
||||
VectorStoreIndex,
|
||||
} from "llamaindex";
|
||||
|
||||
import * as dotenv from "dotenv";
|
||||
|
||||
import {
|
||||
CHUNK_OVERLAP,
|
||||
CHUNK_SIZE,
|
||||
STORAGE_CACHE_DIR,
|
||||
STORAGE_DIR,
|
||||
} from "./constants.mjs";
|
||||
|
||||
// Load environment variables from local .env file
|
||||
dotenv.config();
|
||||
|
||||
async function getRuntime(func) {
|
||||
const start = Date.now();
|
||||
await func();
|
||||
const end = Date.now();
|
||||
return end - start;
|
||||
}
|
||||
|
||||
async function generateDatasource(serviceContext) {
|
||||
console.log(`Generating storage context...`);
|
||||
// Split documents, create embeddings and store them in the storage context
|
||||
const ms = await getRuntime(async () => {
|
||||
const storageContext = await storageContextFromDefaults({
|
||||
persistDir: STORAGE_CACHE_DIR,
|
||||
});
|
||||
const documents = await new SimpleDirectoryReader().loadData({
|
||||
directoryPath: STORAGE_DIR,
|
||||
});
|
||||
await VectorStoreIndex.fromDocuments(documents, {
|
||||
storageContext,
|
||||
serviceContext,
|
||||
});
|
||||
});
|
||||
console.log(`Storage context successfully generated in ${ms / 1000}s.`);
|
||||
}
|
||||
|
||||
(async () => {
|
||||
const serviceContext = serviceContextFromDefaults({
|
||||
chunkSize: CHUNK_SIZE,
|
||||
chunkOverlap: CHUNK_OVERLAP,
|
||||
});
|
||||
|
||||
await generateDatasource(serviceContext);
|
||||
console.log("Finished generating storage.");
|
||||
})();
|
||||
@@ -0,0 +1,59 @@
|
||||
import {
|
||||
ContextChatEngine,
|
||||
LLM,
|
||||
SimpleDocumentStore,
|
||||
VectorStoreIndex,
|
||||
genericFileSystem,
|
||||
serviceContextFromDefaults,
|
||||
storageContextFromDefaults,
|
||||
} from "llamaindex";
|
||||
import { CHUNK_OVERLAP, CHUNK_SIZE, STORAGE_CACHE_DIR } from "./constants.mjs";
|
||||
|
||||
async function getDataSource(llm: LLM) {
|
||||
const fs = genericFileSystem;
|
||||
await fs.writeFile(
|
||||
`${STORAGE_CACHE_DIR}/doc_store.json`,
|
||||
JSON.stringify(await import("../../../../cache/doc_store.json")),
|
||||
);
|
||||
await fs.writeFile(
|
||||
`${STORAGE_CACHE_DIR}/index_store.json`,
|
||||
JSON.stringify(await import("../../../../cache/index_store.json")),
|
||||
);
|
||||
await fs.writeFile(
|
||||
`${STORAGE_CACHE_DIR}/vector_store.json`,
|
||||
JSON.stringify(await import("../../../../cache/vector_store.json")),
|
||||
);
|
||||
|
||||
const serviceContext = serviceContextFromDefaults({
|
||||
llm,
|
||||
chunkSize: CHUNK_SIZE,
|
||||
chunkOverlap: CHUNK_OVERLAP,
|
||||
});
|
||||
let storageContext = await storageContextFromDefaults({
|
||||
persistDir: `${STORAGE_CACHE_DIR}`,
|
||||
});
|
||||
|
||||
const numberOfDocs = Object.keys(
|
||||
(storageContext.docStore as SimpleDocumentStore).toDict(),
|
||||
).length;
|
||||
if (numberOfDocs === 0) {
|
||||
throw new Error(
|
||||
`StorageContext is empty - call 'npm run generate' to generate the storage first`,
|
||||
);
|
||||
}
|
||||
return await VectorStoreIndex.init({
|
||||
storageContext,
|
||||
serviceContext,
|
||||
});
|
||||
}
|
||||
|
||||
export async function createChatEngine(llm: LLM) {
|
||||
const index = await getDataSource(llm);
|
||||
const retriever = index.asRetriever();
|
||||
retriever.similarityTopK = 5;
|
||||
|
||||
return new ContextChatEngine({
|
||||
chatModel: llm,
|
||||
retriever,
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
import {
|
||||
JSONValue,
|
||||
createCallbacksTransformer,
|
||||
createStreamDataTransformer,
|
||||
experimental_StreamData,
|
||||
trimStartOfStreamHelper,
|
||||
type AIStreamCallbacksAndOptions,
|
||||
} from "ai";
|
||||
|
||||
type ParserOptions = {
|
||||
image_url?: string;
|
||||
};
|
||||
|
||||
function createParser(
|
||||
res: AsyncGenerator<any>,
|
||||
data: experimental_StreamData,
|
||||
opts?: ParserOptions,
|
||||
) {
|
||||
const trimStartOfStream = trimStartOfStreamHelper();
|
||||
return new ReadableStream<string>({
|
||||
start() {
|
||||
// if image_url is provided, send it via the data stream
|
||||
if (opts?.image_url) {
|
||||
const message: JSONValue = {
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: opts.image_url,
|
||||
},
|
||||
};
|
||||
data.append(message);
|
||||
} else {
|
||||
data.append({}); // send an empty image response for the user's message
|
||||
}
|
||||
},
|
||||
async pull(controller): Promise<void> {
|
||||
const { value, done } = await res.next();
|
||||
if (done) {
|
||||
controller.close();
|
||||
data.append({}); // send an empty image response for the assistant's message
|
||||
data.close();
|
||||
return;
|
||||
}
|
||||
|
||||
const text = trimStartOfStream(value ?? "");
|
||||
if (text) {
|
||||
controller.enqueue(text);
|
||||
}
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export function LlamaIndexStream(
|
||||
res: AsyncGenerator<any>,
|
||||
opts?: {
|
||||
callbacks?: AIStreamCallbacksAndOptions;
|
||||
parserOptions?: ParserOptions;
|
||||
},
|
||||
): { stream: ReadableStream; data: experimental_StreamData } {
|
||||
const data = new experimental_StreamData();
|
||||
return {
|
||||
stream: createParser(res, data, opts?.parserOptions)
|
||||
.pipeThrough(createCallbacksTransformer(opts?.callbacks))
|
||||
.pipeThrough(createStreamDataTransformer(true)),
|
||||
data,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
import { Message, StreamingTextResponse } from "ai";
|
||||
import { ChatMessage, MessageContent, OpenAI } from "llamaindex";
|
||||
import { NextRequest, NextResponse } from "next/server";
|
||||
import { createChatEngine } from "./engine";
|
||||
import { LlamaIndexStream } from "./llamaindex-stream";
|
||||
|
||||
export const runtime = "edge";
|
||||
export const dynamic = "force-dynamic";
|
||||
|
||||
const getLastMessageContent = (
|
||||
textMessage: string,
|
||||
imageUrl: string | undefined,
|
||||
): MessageContent => {
|
||||
if (!imageUrl) return textMessage;
|
||||
return [
|
||||
{
|
||||
type: "text",
|
||||
text: textMessage,
|
||||
},
|
||||
{
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: imageUrl,
|
||||
},
|
||||
},
|
||||
];
|
||||
};
|
||||
|
||||
export async function POST(request: NextRequest) {
|
||||
try {
|
||||
const body = await request.json();
|
||||
const { messages, data }: { messages: Message[]; data: any } = body;
|
||||
const lastMessage = messages.pop();
|
||||
if (!messages || !lastMessage || lastMessage.role !== "user") {
|
||||
return NextResponse.json(
|
||||
{
|
||||
error:
|
||||
"messages are required in the request body and the last message must be from the user",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
|
||||
const llm = new OpenAI({
|
||||
model: (process.env.MODEL as any) ?? "gpt-3.5-turbo",
|
||||
maxTokens: 512,
|
||||
});
|
||||
|
||||
const chatEngine = await createChatEngine(llm);
|
||||
|
||||
const lastMessageContent = getLastMessageContent(
|
||||
lastMessage.content,
|
||||
data?.imageUrl,
|
||||
);
|
||||
|
||||
const response = await chatEngine.chat(
|
||||
lastMessageContent as MessageContent,
|
||||
messages as ChatMessage[],
|
||||
true,
|
||||
);
|
||||
|
||||
// Transform the response into a readable stream
|
||||
const { stream, data: streamData } = LlamaIndexStream(response, {
|
||||
parserOptions: {
|
||||
image_url: data?.imageUrl,
|
||||
},
|
||||
});
|
||||
|
||||
// Return a StreamingTextResponse, which can be consumed by the client
|
||||
return new StreamingTextResponse(stream, {}, streamData);
|
||||
} catch (error) {
|
||||
console.error("[LlamaIndex]", error);
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: (error as Error).message,
|
||||
},
|
||||
{
|
||||
status: 500,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
"use client";
|
||||
|
||||
import { useChat } from "ai/react";
|
||||
import { useMemo } from "react";
|
||||
import { insertDataIntoMessages } from "./transform";
|
||||
import { ChatInput, ChatMessages } from "./ui/chat";
|
||||
|
||||
export default function ChatSection() {
|
||||
const {
|
||||
messages,
|
||||
input,
|
||||
isLoading,
|
||||
handleSubmit,
|
||||
handleInputChange,
|
||||
reload,
|
||||
stop,
|
||||
data,
|
||||
} = useChat({
|
||||
api: process.env.NEXT_PUBLIC_CHAT_API,
|
||||
headers: {
|
||||
"Content-Type": "application/json", // using JSON because of vercel/ai 2.2.26
|
||||
},
|
||||
});
|
||||
|
||||
const transformedMessages = useMemo(() => {
|
||||
return insertDataIntoMessages(messages, data);
|
||||
}, [messages, data]);
|
||||
|
||||
return (
|
||||
<div className="space-y-4 max-w-5xl w-full">
|
||||
<ChatMessages
|
||||
messages={transformedMessages}
|
||||
isLoading={isLoading}
|
||||
reload={reload}
|
||||
stop={stop}
|
||||
/>
|
||||
<ChatInput
|
||||
input={input}
|
||||
handleSubmit={handleSubmit}
|
||||
handleInputChange={handleInputChange}
|
||||
isLoading={isLoading}
|
||||
multiModal={process.env.NEXT_PUBLIC_MODEL === "gpt-4-vision-preview"}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
import Image from "next/image";
|
||||
|
||||
export default function Header() {
|
||||
return (
|
||||
<div className="z-10 max-w-5xl w-full items-center justify-between font-mono text-sm lg:flex">
|
||||
<p className="fixed left-0 top-0 flex w-full justify-center border-b border-gray-300 bg-gradient-to-b from-zinc-200 pb-6 pt-8 backdrop-blur-2xl dark:border-neutral-800 dark:bg-zinc-800/30 dark:from-inherit lg:static lg:w-auto lg:rounded-xl lg:border lg:bg-gray-200 lg:p-4 lg:dark:bg-zinc-800/30">
|
||||
Get started by editing
|
||||
<code className="font-mono font-bold">app/page.tsx</code>
|
||||
</p>
|
||||
<div className="fixed bottom-0 left-0 flex h-48 w-full items-end justify-center bg-gradient-to-t from-white via-white dark:from-black dark:via-black lg:static lg:h-auto lg:w-auto lg:bg-none">
|
||||
<a
|
||||
href="https://www.llamaindex.ai/"
|
||||
className="flex items-center justify-center font-nunito text-lg font-bold gap-2"
|
||||
>
|
||||
<span>Built by LlamaIndex</span>
|
||||
<Image
|
||||
className="rounded-xl"
|
||||
src="/llama.png"
|
||||
alt="Llama Logo"
|
||||
width={40}
|
||||
height={40}
|
||||
priority
|
||||
/>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
import { JSONValue, Message } from "ai";
|
||||
|
||||
export const isValidMessageData = (rawData: JSONValue | undefined) => {
|
||||
if (!rawData || typeof rawData !== "object") return false;
|
||||
if (Object.keys(rawData).length === 0) return false;
|
||||
return true;
|
||||
};
|
||||
|
||||
export const insertDataIntoMessages = (
|
||||
messages: Message[],
|
||||
data: JSONValue[] | undefined,
|
||||
) => {
|
||||
if (!data) return messages;
|
||||
messages.forEach((message, i) => {
|
||||
const rawData = data[i];
|
||||
if (isValidMessageData(rawData)) message.data = rawData;
|
||||
});
|
||||
return messages;
|
||||
};
|
||||
@@ -0,0 +1,34 @@
|
||||
"use client";
|
||||
|
||||
import Image from "next/image";
|
||||
import { Message } from "./chat-messages";
|
||||
|
||||
export default function ChatAvatar(message: Message) {
|
||||
if (message.role === "user") {
|
||||
return (
|
||||
<div className="flex h-8 w-8 shrink-0 select-none items-center justify-center rounded-md border shadow bg-background">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
viewBox="0 0 256 256"
|
||||
fill="currentColor"
|
||||
className="h-4 w-4"
|
||||
>
|
||||
<path d="M230.92 212c-15.23-26.33-38.7-45.21-66.09-54.16a72 72 0 1 0-73.66 0c-27.39 8.94-50.86 27.82-66.09 54.16a8 8 0 1 0 13.85 8c18.84-32.56 52.14-52 89.07-52s70.23 19.44 89.07 52a8 8 0 1 0 13.85-8ZM72 96a56 56 0 1 1 56 56 56.06 56.06 0 0 1-56-56Z"></path>
|
||||
</svg>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex h-8 w-8 shrink-0 select-none items-center justify-center rounded-md border bg-black text-white">
|
||||
<Image
|
||||
className="rounded-md"
|
||||
src="/llama.png"
|
||||
alt="Llama Logo"
|
||||
width={24}
|
||||
height={24}
|
||||
priority
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
"use client";
|
||||
|
||||
export interface ChatInputProps {
|
||||
/** The current value of the input */
|
||||
input?: string;
|
||||
/** An input/textarea-ready onChange handler to control the value of the input */
|
||||
handleInputChange?: (
|
||||
e:
|
||||
| React.ChangeEvent<HTMLInputElement>
|
||||
| React.ChangeEvent<HTMLTextAreaElement>,
|
||||
) => void;
|
||||
/** Form submission handler to automatically reset input and append a user message */
|
||||
handleSubmit: (e: React.FormEvent<HTMLFormElement>) => void;
|
||||
isLoading: boolean;
|
||||
multiModal?: boolean;
|
||||
}
|
||||
|
||||
export default function ChatInput(props: ChatInputProps) {
|
||||
return (
|
||||
<>
|
||||
<form
|
||||
onSubmit={props.handleSubmit}
|
||||
className="flex items-start justify-between w-full max-w-5xl p-4 bg-white rounded-xl shadow-xl gap-4"
|
||||
>
|
||||
<input
|
||||
autoFocus
|
||||
name="message"
|
||||
placeholder="Type a message"
|
||||
className="w-full p-4 rounded-xl shadow-inner flex-1"
|
||||
value={props.input}
|
||||
onChange={props.handleInputChange}
|
||||
/>
|
||||
<button
|
||||
disabled={props.isLoading}
|
||||
type="submit"
|
||||
className="p-4 text-white rounded-xl shadow-xl bg-gradient-to-r from-cyan-500 to-sky-500 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
Send message
|
||||
</button>
|
||||
</form>
|
||||
</>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
"use client";
|
||||
|
||||
import ChatAvatar from "./chat-avatar";
|
||||
import { Message } from "./chat-messages";
|
||||
|
||||
export default function ChatItem(message: Message) {
|
||||
return (
|
||||
<div className="flex items-start gap-4 pt-5">
|
||||
<ChatAvatar {...message} />
|
||||
<p className="break-words">{message.content}</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
"use client";
|
||||
|
||||
import { useEffect, useRef } from "react";
|
||||
import ChatItem from "./chat-item";
|
||||
|
||||
export interface Message {
|
||||
id: string;
|
||||
content: string;
|
||||
role: string;
|
||||
}
|
||||
|
||||
export default function ChatMessages({
|
||||
messages,
|
||||
isLoading,
|
||||
reload,
|
||||
stop,
|
||||
}: {
|
||||
messages: Message[];
|
||||
isLoading?: boolean;
|
||||
stop?: () => void;
|
||||
reload?: () => void;
|
||||
}) {
|
||||
const scrollableChatContainerRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
const scrollToBottom = () => {
|
||||
if (scrollableChatContainerRef.current) {
|
||||
scrollableChatContainerRef.current.scrollTop =
|
||||
scrollableChatContainerRef.current.scrollHeight;
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
scrollToBottom();
|
||||
}, [messages.length]);
|
||||
|
||||
return (
|
||||
<div className="w-full max-w-5xl p-4 bg-white rounded-xl shadow-xl">
|
||||
<div
|
||||
className="flex flex-col gap-5 divide-y h-[50vh] overflow-auto"
|
||||
ref={scrollableChatContainerRef}
|
||||
>
|
||||
{messages.map((m: Message) => (
|
||||
<ChatItem key={m.id} {...m} />
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
import ChatInput from "./chat-input";
|
||||
import ChatMessages from "./chat-messages";
|
||||
|
||||
export type { ChatInputProps } from "./chat-input";
|
||||
export type { Message } from "./chat-messages";
|
||||
export { ChatInput, ChatMessages };
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 15 KiB |
@@ -0,0 +1,94 @@
|
||||
@tailwind base;
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
||||
|
||||
@layer base {
|
||||
:root {
|
||||
--background: 0 0% 100%;
|
||||
--foreground: 222.2 47.4% 11.2%;
|
||||
|
||||
--muted: 210 40% 96.1%;
|
||||
--muted-foreground: 215.4 16.3% 46.9%;
|
||||
|
||||
--popover: 0 0% 100%;
|
||||
--popover-foreground: 222.2 47.4% 11.2%;
|
||||
|
||||
--border: 214.3 31.8% 91.4%;
|
||||
--input: 214.3 31.8% 91.4%;
|
||||
|
||||
--card: 0 0% 100%;
|
||||
--card-foreground: 222.2 47.4% 11.2%;
|
||||
|
||||
--primary: 222.2 47.4% 11.2%;
|
||||
--primary-foreground: 210 40% 98%;
|
||||
|
||||
--secondary: 210 40% 96.1%;
|
||||
--secondary-foreground: 222.2 47.4% 11.2%;
|
||||
|
||||
--accent: 210 40% 96.1%;
|
||||
--accent-foreground: 222.2 47.4% 11.2%;
|
||||
|
||||
--destructive: 0 100% 50%;
|
||||
--destructive-foreground: 210 40% 98%;
|
||||
|
||||
--ring: 215 20.2% 65.1%;
|
||||
|
||||
--radius: 0.5rem;
|
||||
}
|
||||
|
||||
.dark {
|
||||
--background: 224 71% 4%;
|
||||
--foreground: 213 31% 91%;
|
||||
|
||||
--muted: 223 47% 11%;
|
||||
--muted-foreground: 215.4 16.3% 56.9%;
|
||||
|
||||
--accent: 216 34% 17%;
|
||||
--accent-foreground: 210 40% 98%;
|
||||
|
||||
--popover: 224 71% 4%;
|
||||
--popover-foreground: 215 20.2% 65.1%;
|
||||
|
||||
--border: 216 34% 17%;
|
||||
--input: 216 34% 17%;
|
||||
|
||||
--card: 224 71% 4%;
|
||||
--card-foreground: 213 31% 91%;
|
||||
|
||||
--primary: 210 40% 98%;
|
||||
--primary-foreground: 222.2 47.4% 1.2%;
|
||||
|
||||
--secondary: 222.2 47.4% 11.2%;
|
||||
--secondary-foreground: 210 40% 98%;
|
||||
|
||||
--destructive: 0 63% 31%;
|
||||
--destructive-foreground: 210 40% 98%;
|
||||
|
||||
--ring: 216 34% 17%;
|
||||
|
||||
--radius: 0.5rem;
|
||||
}
|
||||
}
|
||||
|
||||
@layer base {
|
||||
* {
|
||||
@apply border-border;
|
||||
}
|
||||
body {
|
||||
@apply bg-background text-foreground;
|
||||
font-feature-settings:
|
||||
"rlig" 1,
|
||||
"calt" 1;
|
||||
}
|
||||
.background-gradient {
|
||||
background-color: #fff;
|
||||
background-image: radial-gradient(
|
||||
at 21% 11%,
|
||||
rgba(186, 186, 233, 0.53) 0,
|
||||
transparent 50%
|
||||
),
|
||||
radial-gradient(at 85% 0, hsla(46, 57%, 78%, 0.52) 0, transparent 50%),
|
||||
radial-gradient(at 91% 36%, rgba(194, 213, 255, 0.68) 0, transparent 50%),
|
||||
radial-gradient(at 8% 40%, rgba(251, 218, 239, 0.46) 0, transparent 50%);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
import type { Metadata } from "next";
|
||||
import { Inter } from "next/font/google";
|
||||
import "./globals.css";
|
||||
|
||||
const inter = Inter({ subsets: ["latin"] });
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: "Create Llama App",
|
||||
description: "Generated by create-llama",
|
||||
};
|
||||
|
||||
export default function RootLayout({
|
||||
children,
|
||||
}: {
|
||||
children: React.ReactNode;
|
||||
}) {
|
||||
return (
|
||||
<html lang="en">
|
||||
<body className={inter.className}>{children}</body>
|
||||
</html>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
import Header from "@/app/components/header";
|
||||
import ChatSection from "./components/chat-section";
|
||||
|
||||
export default function Home() {
|
||||
return (
|
||||
<main className="flex min-h-screen flex-col items-center gap-10 p-24 background-gradient">
|
||||
<Header />
|
||||
<ChatSection />
|
||||
</main>
|
||||
);
|
||||
}
|
||||
Binary file not shown.
Vendored
+5
@@ -0,0 +1,5 @@
|
||||
/// <reference types="next" />
|
||||
/// <reference types="next/image-types/global" />
|
||||
|
||||
// NOTE: This file should not be edited
|
||||
// see https://nextjs.org/docs/basic-features/typescript for more information.
|
||||
@@ -0,0 +1,4 @@
|
||||
/** @type {import('next').NextConfig} */
|
||||
const nextConfig = {};
|
||||
|
||||
module.exports = nextConfig;
|
||||
@@ -0,0 +1,32 @@
|
||||
{
|
||||
"name": "demo-edge-runtime",
|
||||
"version": "0.1.0",
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
"build": "next build",
|
||||
"start": "next start",
|
||||
"lint": "next lint",
|
||||
"generate": "node app/api/chat/engine/generate.mjs"
|
||||
},
|
||||
"dependencies": {
|
||||
"ai": "^2.2.27",
|
||||
"dotenv": "^16.3.1",
|
||||
"llamaindex": "0.0.46",
|
||||
"next": "^14.0.4",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"supports-color": "^9.4.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.10.3",
|
||||
"@types/react": "^18.2.42",
|
||||
"@types/react-dom": "^18.2.17",
|
||||
"autoprefixer": "^10.4.16",
|
||||
"eslint": "^8.55.0",
|
||||
"eslint-config-next": "^14.0.3",
|
||||
"postcss": "^8.4.32",
|
||||
"tailwindcss": "^3.3.6",
|
||||
"typescript": "^5.3.2",
|
||||
"cross-env": "^7.0.3"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
module.exports = {
|
||||
plugins: {
|
||||
tailwindcss: {},
|
||||
autoprefixer: {},
|
||||
},
|
||||
};
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 36 KiB |
@@ -0,0 +1,78 @@
|
||||
import type { Config } from "tailwindcss";
|
||||
import { fontFamily } from "tailwindcss/defaultTheme";
|
||||
|
||||
const config: Config = {
|
||||
darkMode: ["class"],
|
||||
content: ["app/**/*.{ts,tsx}", "components/**/*.{ts,tsx}"],
|
||||
theme: {
|
||||
container: {
|
||||
center: true,
|
||||
padding: "2rem",
|
||||
screens: {
|
||||
"2xl": "1400px",
|
||||
},
|
||||
},
|
||||
extend: {
|
||||
colors: {
|
||||
border: "hsl(var(--border))",
|
||||
input: "hsl(var(--input))",
|
||||
ring: "hsl(var(--ring))",
|
||||
background: "hsl(var(--background))",
|
||||
foreground: "hsl(var(--foreground))",
|
||||
primary: {
|
||||
DEFAULT: "hsl(var(--primary))",
|
||||
foreground: "hsl(var(--primary-foreground))",
|
||||
},
|
||||
secondary: {
|
||||
DEFAULT: "hsl(var(--secondary))",
|
||||
foreground: "hsl(var(--secondary-foreground))",
|
||||
},
|
||||
destructive: {
|
||||
DEFAULT: "hsl(var(--destructive) / <alpha-value>)",
|
||||
foreground: "hsl(var(--destructive-foreground) / <alpha-value>)",
|
||||
},
|
||||
muted: {
|
||||
DEFAULT: "hsl(var(--muted))",
|
||||
foreground: "hsl(var(--muted-foreground))",
|
||||
},
|
||||
accent: {
|
||||
DEFAULT: "hsl(var(--accent))",
|
||||
foreground: "hsl(var(--accent-foreground))",
|
||||
},
|
||||
popover: {
|
||||
DEFAULT: "hsl(var(--popover))",
|
||||
foreground: "hsl(var(--popover-foreground))",
|
||||
},
|
||||
card: {
|
||||
DEFAULT: "hsl(var(--card))",
|
||||
foreground: "hsl(var(--card-foreground))",
|
||||
},
|
||||
},
|
||||
borderRadius: {
|
||||
xl: `calc(var(--radius) + 4px)`,
|
||||
lg: `var(--radius)`,
|
||||
md: `calc(var(--radius) - 2px)`,
|
||||
sm: "calc(var(--radius) - 4px)",
|
||||
},
|
||||
fontFamily: {
|
||||
sans: ["var(--font-sans)", ...fontFamily.sans],
|
||||
},
|
||||
keyframes: {
|
||||
"accordion-down": {
|
||||
from: { height: "0" },
|
||||
to: { height: "var(--radix-accordion-content-height)" },
|
||||
},
|
||||
"accordion-up": {
|
||||
from: { height: "var(--radix-accordion-content-height)" },
|
||||
to: { height: "0" },
|
||||
},
|
||||
},
|
||||
animation: {
|
||||
"accordion-down": "accordion-down 0.2s ease-out",
|
||||
"accordion-up": "accordion-up 0.2s ease-out",
|
||||
},
|
||||
},
|
||||
},
|
||||
plugins: [],
|
||||
};
|
||||
export default config;
|
||||
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "es5",
|
||||
"lib": ["dom", "dom.iterable", "esnext"],
|
||||
"allowJs": true,
|
||||
"skipLibCheck": true,
|
||||
"strict": true,
|
||||
"noEmit": true,
|
||||
"esModuleInterop": true,
|
||||
"module": "esnext",
|
||||
"moduleResolution": "bundler",
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"jsx": "preserve",
|
||||
"incremental": true,
|
||||
"plugins": [
|
||||
{
|
||||
"name": "next"
|
||||
}
|
||||
],
|
||||
"paths": {
|
||||
"@/*": ["./*"]
|
||||
},
|
||||
"forceConsistentCasingInFileNames": true
|
||||
},
|
||||
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
|
||||
"exclude": ["node_modules"]
|
||||
}
|
||||
+148
-2
@@ -4,6 +4,7 @@
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.9.1",
|
||||
"@aws-crypto/sha256-browser": "^5.2.0",
|
||||
"@datastax/astra-db-ts": "^0.1.2",
|
||||
"@mistralai/mistralai": "^0.0.7",
|
||||
"@notionhq/client": "^2.2.14",
|
||||
@@ -35,7 +36,8 @@
|
||||
"@types/node": "^18.19.6",
|
||||
"@types/papaparse": "^5.3.14",
|
||||
"@types/pg": "^8.10.9",
|
||||
"bunchee": "^4.3.3",
|
||||
"bunchee": "^4.4.0",
|
||||
"madge": "^6.1.0",
|
||||
"node-stdlib-browser": "^1.2.0",
|
||||
"typescript": "^5.3.3"
|
||||
},
|
||||
@@ -47,9 +49,152 @@
|
||||
"exports": {
|
||||
".": {
|
||||
"types": "./dist/index.d.mts",
|
||||
"edge-light": "./dist/index.edge-light.mjs",
|
||||
"import": "./dist/index.mjs",
|
||||
"require": "./dist/index.js"
|
||||
},
|
||||
"./storage/FileSystem": {
|
||||
"types": "./dist/storage/FileSystem.d.mts",
|
||||
"edge-light": "./dist/storage/FileSystem.edge-light.mjs",
|
||||
"import": "./dist/storage/FileSystem.mjs",
|
||||
"require": "./dist/storage/FileSystem.js"
|
||||
},
|
||||
"./ChatEngine": {
|
||||
"types": "./dist/ChatEngine.d.mts",
|
||||
"import": "./dist/ChatEngine.mjs",
|
||||
"require": "./dist/ChatEngine.js"
|
||||
},
|
||||
"./ChatHistory": {
|
||||
"types": "./dist/ChatHistory.d.mts",
|
||||
"import": "./dist/ChatHistory.mjs",
|
||||
"require": "./dist/ChatHistory.js"
|
||||
},
|
||||
"./constants": {
|
||||
"types": "./dist/constants.d.mts",
|
||||
"import": "./dist/constants.mjs",
|
||||
"require": "./dist/constants.js"
|
||||
},
|
||||
"./GlobalsHelper": {
|
||||
"types": "./dist/GlobalsHelper.d.mts",
|
||||
"import": "./dist/GlobalsHelper.mjs",
|
||||
"require": "./dist/GlobalsHelper.js"
|
||||
},
|
||||
"./Node": {
|
||||
"types": "./dist/Node.d.mts",
|
||||
"import": "./dist/Node.mjs",
|
||||
"require": "./dist/Node.js"
|
||||
},
|
||||
"./OutputParser": {
|
||||
"types": "./dist/OutputParser.d.mts",
|
||||
"import": "./dist/OutputParser.mjs",
|
||||
"require": "./dist/OutputParser.js"
|
||||
},
|
||||
"./Prompt": {
|
||||
"types": "./dist/Prompt.d.mts",
|
||||
"import": "./dist/Prompt.mjs",
|
||||
"require": "./dist/Prompt.js"
|
||||
},
|
||||
"./PromptHelper": {
|
||||
"types": "./dist/PromptHelper.d.mts",
|
||||
"import": "./dist/PromptHelper.mjs",
|
||||
"require": "./dist/PromptHelper.js"
|
||||
},
|
||||
"./QueryEngine": {
|
||||
"types": "./dist/QueryEngine.d.mts",
|
||||
"import": "./dist/QueryEngine.mjs",
|
||||
"require": "./dist/QueryEngine.js"
|
||||
},
|
||||
"./QuestionGenerator": {
|
||||
"types": "./dist/QuestionGenerator.d.mts",
|
||||
"import": "./dist/QuestionGenerator.mjs",
|
||||
"require": "./dist/QuestionGenerator.js"
|
||||
},
|
||||
"./Response": {
|
||||
"types": "./dist/Response.d.mts",
|
||||
"import": "./dist/Response.mjs",
|
||||
"require": "./dist/Response.js"
|
||||
},
|
||||
"./Retriever": {
|
||||
"types": "./dist/Retriever.d.mts",
|
||||
"import": "./dist/Retriever.mjs",
|
||||
"require": "./dist/Retriever.js"
|
||||
},
|
||||
"./ServiceContext": {
|
||||
"types": "./dist/ServiceContext.d.mts",
|
||||
"import": "./dist/ServiceContext.mjs",
|
||||
"require": "./dist/ServiceContext.js"
|
||||
},
|
||||
"./TextSplitter": {
|
||||
"types": "./dist/TextSplitter.d.mts",
|
||||
"import": "./dist/TextSplitter.mjs",
|
||||
"require": "./dist/TextSplitter.js"
|
||||
},
|
||||
"./Tool": {
|
||||
"types": "./dist/Tool.d.mts",
|
||||
"import": "./dist/Tool.mjs",
|
||||
"require": "./dist/Tool.js"
|
||||
},
|
||||
"./readers/AssemblyAI": {
|
||||
"types": "./dist/readers/AssemblyAI.d.mts",
|
||||
"import": "./dist/readers/AssemblyAI.mjs",
|
||||
"require": "./dist/readers/AssemblyAI.js"
|
||||
},
|
||||
"./readers/base": {
|
||||
"types": "./dist/readers/base.d.mts",
|
||||
"import": "./dist/readers/base.mjs",
|
||||
"require": "./dist/readers/base.js"
|
||||
},
|
||||
"./readers/CSVReader": {
|
||||
"types": "./dist/readers/CSVReader.d.mts",
|
||||
"import": "./dist/readers/CSVReader.mjs",
|
||||
"require": "./dist/readers/CSVReader.js"
|
||||
},
|
||||
"./readers/DocxReader": {
|
||||
"types": "./dist/readers/DocxReader.d.mts",
|
||||
"import": "./dist/readers/DocxReader.mjs",
|
||||
"require": "./dist/readers/DocxReader.js"
|
||||
},
|
||||
"./readers/HTMLReader": {
|
||||
"types": "./dist/readers/HTMLReader.d.mts",
|
||||
"import": "./dist/readers/HTMLReader.mjs",
|
||||
"require": "./dist/readers/HTMLReader.js"
|
||||
},
|
||||
"./readers/ImageReader": {
|
||||
"types": "./dist/readers/ImageReader.d.mts",
|
||||
"import": "./dist/readers/ImageReader.mjs",
|
||||
"require": "./dist/readers/ImageReader.js"
|
||||
},
|
||||
"./readers/MarkdownReader": {
|
||||
"types": "./dist/readers/MarkdownReader.d.mts",
|
||||
"import": "./dist/readers/MarkdownReader.mjs",
|
||||
"require": "./dist/readers/MarkdownReader.js"
|
||||
},
|
||||
"./readers/NotionReader": {
|
||||
"types": "./dist/readers/NotionReader.d.mts",
|
||||
"import": "./dist/readers/NotionReader.mjs",
|
||||
"require": "./dist/readers/NotionReader.js"
|
||||
},
|
||||
"./readers/PDFReader": {
|
||||
"types": "./dist/readers/PDFReader.d.mts",
|
||||
"import": "./dist/readers/PDFReader.mjs",
|
||||
"require": "./dist/readers/PDFReader.js"
|
||||
},
|
||||
"./readers/SimpleDirectoryReader": {
|
||||
"types": "./dist/readers/SimpleDirectoryReader.d.mts",
|
||||
"import": "./dist/readers/SimpleDirectoryReader.mjs",
|
||||
"require": "./dist/readers/SimpleDirectoryReader.js"
|
||||
},
|
||||
"./readers/SimpleMongoReader": {
|
||||
"types": "./dist/readers/SimpleMongoReader.d.mts",
|
||||
"import": "./dist/readers/SimpleMongoReader.mjs",
|
||||
"require": "./dist/readers/SimpleMongoReader.js"
|
||||
},
|
||||
"./environments": {
|
||||
"types": "./dist/environments.d.mts",
|
||||
"edge-light": "./dist/environments.edge-light.mjs",
|
||||
"import": "./dist/environments.mjs",
|
||||
"require": "./dist/environments.js"
|
||||
},
|
||||
"./examples/*": "./examples/*"
|
||||
},
|
||||
"files": [
|
||||
@@ -68,6 +213,7 @@
|
||||
"lint": "eslint .",
|
||||
"test": "jest",
|
||||
"build": "bunchee",
|
||||
"dev": "bunchee -w"
|
||||
"dev": "bunchee -w",
|
||||
"check:circular": "madge --circular ./src/index.ts"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { ChatHistory } from "./ChatHistory";
|
||||
import { NodeWithScore, TextNode } from "./Node";
|
||||
import {
|
||||
@@ -13,7 +12,9 @@ import { Response } from "./Response";
|
||||
import { BaseRetriever } from "./Retriever";
|
||||
import { ServiceContext, serviceContextFromDefaults } from "./ServiceContext";
|
||||
import { Event } from "./callbacks/CallbackManager";
|
||||
import { ChatMessage, LLM, OpenAI } from "./llm";
|
||||
import { randomUUID } from "./environments";
|
||||
import { ChatMessage, LLM } from "./llm";
|
||||
import { OpenAI } from "./llm/openai";
|
||||
import { BaseNodePostprocessor } from "./postprocessors";
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import { ChatMessage, LLM, MessageType, OpenAI } from "./llm/LLM";
|
||||
import {
|
||||
SummaryPrompt,
|
||||
defaultSummaryPrompt,
|
||||
messagesToHistoryStr,
|
||||
SummaryPrompt,
|
||||
} from "./Prompt";
|
||||
import { ChatMessage, LLM, MessageType } from "./llm/LLM";
|
||||
import { OpenAI } from "./llm/openai";
|
||||
|
||||
/**
|
||||
* A ChatHistory is used to keep the state of back and forth chat messages
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { encodingForModel } from "js-tiktoken";
|
||||
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { Event, EventTag, EventType } from "./callbacks/CallbackManager";
|
||||
import { randomUUID } from "./environments";
|
||||
|
||||
export enum Tokenizers {
|
||||
CL100K_BASE = "cl100k_base",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import _ from "lodash";
|
||||
import { createHash, randomUUID } from "node:crypto";
|
||||
import path from "node:path";
|
||||
import { createSHA256, randomUUID } from "./environments";
|
||||
|
||||
export enum NodeRelationship {
|
||||
SOURCE = "SOURCE",
|
||||
@@ -191,7 +190,7 @@ export class TextNode<T extends Metadata = Metadata> extends BaseNode<T> {
|
||||
* @returns
|
||||
*/
|
||||
generateHash() {
|
||||
const hashFunction = createHash("sha256");
|
||||
const hashFunction = createSHA256();
|
||||
hashFunction.update(`type=${this.getType()}`);
|
||||
hashFunction.update(
|
||||
`startCharIdx=${this.startCharIdx} endCharIdx=${this.endCharIdx}`,
|
||||
@@ -321,9 +320,12 @@ export class ImageNode<T extends Metadata = Metadata> extends TextNode<T> {
|
||||
}
|
||||
|
||||
getUrl(): URL {
|
||||
// id_ stores the relative path, convert it to the URL of the file
|
||||
const absPath = path.resolve(this.id_);
|
||||
return new URL(`file://${absPath}`);
|
||||
if (typeof this.image === "string") {
|
||||
return new URL(this.image);
|
||||
} else if (this.image instanceof Blob) {
|
||||
return new URL(URL.createObjectURL(this.image));
|
||||
}
|
||||
throw new Error("Invalid image type");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
import { SubQuestion } from "./QuestionGenerator";
|
||||
export interface SubQuestion {
|
||||
subQuestion: string;
|
||||
toolName: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* An OutputParser is used to extract structured data from the raw output of the LLM.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { ChatMessage } from "./llm/LLM";
|
||||
import { SubQuestion } from "./QuestionGenerator";
|
||||
import { ToolMetadata } from "./Tool";
|
||||
import { SubQuestion } from './OutputParser'
|
||||
|
||||
/**
|
||||
* A SimplePrompt is a function that takes a dictionary of inputs and returns a string.
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { NodeWithScore, TextNode } from "./Node";
|
||||
import {
|
||||
BaseQuestionGenerator,
|
||||
LLMQuestionGenerator,
|
||||
SubQuestion,
|
||||
} from "./QuestionGenerator";
|
||||
import { Response } from "./Response";
|
||||
import { BaseRetriever } from "./Retriever";
|
||||
import { ServiceContext, serviceContextFromDefaults } from "./ServiceContext";
|
||||
import { QueryEngineTool, ToolMetadata } from "./Tool";
|
||||
import { Event } from "./callbacks/CallbackManager";
|
||||
import { randomUUID } from "./environments";
|
||||
import { BaseNodePostprocessor } from "./postprocessors";
|
||||
import {
|
||||
BaseSynthesizer,
|
||||
CompactAndRefine,
|
||||
ResponseSynthesizer,
|
||||
} from "./synthesizers";
|
||||
import { SubQuestion } from './OutputParser'
|
||||
|
||||
/**
|
||||
* A query engine is a question answerer that can use one or more steps.
|
||||
|
||||
@@ -1,20 +1,17 @@
|
||||
import {
|
||||
BaseOutputParser,
|
||||
StructuredOutput,
|
||||
SubQuestionOutputParser,
|
||||
} from "./OutputParser";
|
||||
SubQuestion,
|
||||
SubQuestionOutputParser
|
||||
} from './OutputParser'
|
||||
import {
|
||||
SubQuestionPrompt,
|
||||
buildToolsText,
|
||||
defaultSubQuestionPrompt,
|
||||
} from "./Prompt";
|
||||
import { ToolMetadata } from "./Tool";
|
||||
import { LLM, OpenAI } from "./llm/LLM";
|
||||
|
||||
export interface SubQuestion {
|
||||
subQuestion: string;
|
||||
toolName: string;
|
||||
}
|
||||
import { LLM } from "./llm/LLM";
|
||||
import { OpenAI } from "./llm/openai";
|
||||
|
||||
/**
|
||||
* QuestionGenerators generate new questions for the LLM using tools and a user query.
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import { PromptHelper } from "./PromptHelper";
|
||||
import { CallbackManager } from "./callbacks/CallbackManager";
|
||||
import { BaseEmbedding, OpenAIEmbedding } from "./embeddings";
|
||||
import { LLM, OpenAI } from "./llm";
|
||||
import { LLM } from "./llm";
|
||||
import { OpenAI } from "./llm/openai";
|
||||
import { NodeParser, SimpleNodeParser } from "./nodeParsers";
|
||||
import { PromptHelper } from "./PromptHelper";
|
||||
|
||||
/**
|
||||
* The ServiceContext is a collection of components that are used in different parts of the application.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { EOL } from "node:os";
|
||||
// GitHub translated
|
||||
import { globalsHelper } from "./GlobalsHelper";
|
||||
import { DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE } from "./constants";
|
||||
import { EOL } from "./environments";
|
||||
|
||||
class TextSplit {
|
||||
textChunk: string;
|
||||
|
||||
@@ -1,14 +1,4 @@
|
||||
import { similarity } from "./utils";
|
||||
|
||||
/**
|
||||
* Similarity type
|
||||
* Default is cosine similarity. Dot product and negative Euclidean distance are also supported.
|
||||
*/
|
||||
export enum SimilarityType {
|
||||
DEFAULT = "cosine",
|
||||
DOT_PRODUCT = "dot_product",
|
||||
EUCLIDEAN = "euclidean",
|
||||
}
|
||||
import { similarity, SimilarityType } from "./utils";
|
||||
|
||||
export abstract class BaseEmbedding {
|
||||
similarity(
|
||||
|
||||
@@ -1,8 +1,18 @@
|
||||
import _ from "lodash";
|
||||
import { ImageType } from "../Node";
|
||||
import { DEFAULT_SIMILARITY_TOP_K } from "../constants";
|
||||
import { DEFAULT_FS, VectorStoreQueryMode } from "../storage";
|
||||
import { SimilarityType } from "./types";
|
||||
import { genericFileSystem } from "../storage/FileSystem";
|
||||
import { VectorStoreQueryMode } from "../storage/vectorStore/types";
|
||||
|
||||
/**
|
||||
* Similarity type
|
||||
* Default is cosine similarity. Dot product and negative Euclidean distance are also supported.
|
||||
*/
|
||||
export enum SimilarityType {
|
||||
DEFAULT = "cosine",
|
||||
DOT_PRODUCT = "dot_product",
|
||||
EUCLIDEAN = "euclidean",
|
||||
}
|
||||
|
||||
/**
|
||||
* The similarity between two embeddings.
|
||||
@@ -186,7 +196,7 @@ export function getTopKMMREmbeddings(
|
||||
}
|
||||
|
||||
async function blobToDataUrl(input: Blob) {
|
||||
const { fileTypeFromBuffer } = await import("file-type");
|
||||
const { fileTypeFromBuffer } = await import("../environments");
|
||||
const buffer = Buffer.from(await input.arrayBuffer());
|
||||
const type = await fileTypeFromBuffer(buffer);
|
||||
if (!type) {
|
||||
@@ -241,7 +251,7 @@ export async function imageToDataUrl(input: ImageType): Promise<string> {
|
||||
_.isString(input)
|
||||
) {
|
||||
// string or file URL
|
||||
const fs = DEFAULT_FS;
|
||||
const fs = genericFileSystem;
|
||||
const dataBuffer = await fs.readFile(
|
||||
input instanceof URL ? input.pathname : input,
|
||||
);
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
import { Sha256 } from "@aws-crypto/sha256-browser";
|
||||
|
||||
export const randomUUID = () => {
|
||||
return globalThis.crypto.randomUUID();
|
||||
};
|
||||
|
||||
export function dirname(path: string) {
|
||||
return path.split("/").at(-2)!;
|
||||
}
|
||||
|
||||
export function join(...paths: string[]) {
|
||||
return paths.join("/");
|
||||
}
|
||||
|
||||
export function fileTypeFromBuffer(buffer: Buffer) {
|
||||
if (buffer[0] === 0xff && buffer[1] === 0xd8 && buffer[2] === 0xff) {
|
||||
return { ext: "jpg", mime: "image/jpeg" };
|
||||
} else if (
|
||||
buffer[0] === 0x89 &&
|
||||
buffer[1] === 0x50 &&
|
||||
buffer[2] === 0x4e &&
|
||||
buffer[3] === 0x47
|
||||
) {
|
||||
return { ext: "png", mime: "image/png" };
|
||||
} else if (
|
||||
buffer[0] === 0x47 &&
|
||||
buffer[1] === 0x49 &&
|
||||
buffer[2] === 0x46 &&
|
||||
buffer[3] === 0x38
|
||||
) {
|
||||
return { ext: "gif", mime: "image/gif" };
|
||||
} else if (
|
||||
buffer[0] === 0x42 &&
|
||||
buffer[1] === 0x4d &&
|
||||
buffer[2] === 0x46 &&
|
||||
buffer[3] === 0x38
|
||||
) {
|
||||
return { ext: "bmp", mime: "image/bmp" };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export const createSHA256 = () => {
|
||||
return new Sha256();
|
||||
};
|
||||
|
||||
// on edge runtime, there is always pretend to be a linux system
|
||||
export const EOL = "\n";
|
||||
@@ -0,0 +1,17 @@
|
||||
import { createHash, randomUUID } from "node:crypto";
|
||||
import { EOL } from "node:os";
|
||||
|
||||
export { randomUUID };
|
||||
|
||||
export { dirname, join } from "node:path";
|
||||
|
||||
export const createSHA256 = () => {
|
||||
return createHash("sha256");
|
||||
};
|
||||
|
||||
export async function fileTypeFromBuffer(buffer: Buffer) {
|
||||
const { fileTypeFromBuffer } = await import("file-type")
|
||||
return fileTypeFromBuffer(buffer);
|
||||
}
|
||||
|
||||
export { EOL };
|
||||
@@ -0,0 +1,35 @@
|
||||
export * from "./ChatEngine";
|
||||
export * from "./ChatHistory";
|
||||
export * from "./GlobalsHelper";
|
||||
export * from "./Node";
|
||||
export * from "./OutputParser";
|
||||
export * from "./Prompt";
|
||||
export * from "./PromptHelper";
|
||||
export * from "./QueryEngine";
|
||||
export * from "./QuestionGenerator";
|
||||
export * from "./Response";
|
||||
export * from "./Retriever";
|
||||
export * from "./ServiceContext";
|
||||
export * from "./TextSplitter";
|
||||
export * from "./Tool";
|
||||
export * from "./callbacks/CallbackManager";
|
||||
export * from "./constants";
|
||||
export * from "./embeddings";
|
||||
export * from "./indices";
|
||||
// export * from "./llm";
|
||||
export * from "./llm/openai";
|
||||
export * from "./nodeParsers";
|
||||
export * from "./postprocessors";
|
||||
// export * from "./readers/AssemblyAI";
|
||||
// export * from "./readers/CSVReader";
|
||||
// export * from "./readers/DocxReader";
|
||||
// export * from "./readers/HTMLReader";
|
||||
// export * from "./readers/MarkdownReader";
|
||||
// export * from "./readers/NotionReader";
|
||||
// export * from "./readers/PDFReader";
|
||||
// export * from "./readers/SimpleDirectoryReader";
|
||||
// export * from "./readers/SimpleMongoReader";
|
||||
// export * from "./readers/base";
|
||||
export * from "./storage/FileSystem";
|
||||
export * from "./storage/StorageContext";
|
||||
export * from "./synthesizers";
|
||||
@@ -29,5 +29,22 @@ export * from "./readers/PDFReader";
|
||||
export * from "./readers/SimpleDirectoryReader";
|
||||
export * from "./readers/SimpleMongoReader";
|
||||
export * from "./readers/base";
|
||||
export * from "./storage";
|
||||
// #region storage
|
||||
export * from "./storage/FileSystem";
|
||||
export * from "./storage/StorageContext";
|
||||
export * from "./storage/constants";
|
||||
export { SimpleDocumentStore } from "./storage/docStore/SimpleDocumentStore";
|
||||
export * from "./storage/docStore/types";
|
||||
export { SimpleIndexStore } from "./storage/indexStore/SimpleIndexStore";
|
||||
export * from "./storage/indexStore/types";
|
||||
export { SimpleKVStore } from "./storage/kvStore/SimpleKVStore";
|
||||
export * from "./storage/kvStore/types";
|
||||
export { AstraDBVectorStore } from "./storage/vectorStore/AstraDBVectorStore";
|
||||
export { ChromaVectorStore } from "./storage/vectorStore/ChromaVectorStore";
|
||||
export { MongoDBAtlasVectorSearch } from "./storage/vectorStore/MongoDBAtlasVectorStore";
|
||||
export { PGVectorStore } from "./storage/vectorStore/PGVectorStore";
|
||||
export { PineconeVectorStore } from "./storage/vectorStore/PineconeVectorStore";
|
||||
export { SimpleVectorStore } from "./storage/vectorStore/SimpleVectorStore";
|
||||
export * from "./storage/vectorStore/types";
|
||||
// #endregion
|
||||
export * from "./synthesizers";
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { BaseNode, Document, jsonToNode } from "../Node";
|
||||
import { BaseQueryEngine } from "../QueryEngine";
|
||||
import { BaseRetriever } from "../Retriever";
|
||||
import { ServiceContext } from "../ServiceContext";
|
||||
import { randomUUID } from "../environments";
|
||||
import { StorageContext } from "../storage/StorageContext";
|
||||
import { BaseDocumentStore } from "../storage/docStore/types";
|
||||
import { BaseIndexStore } from "../storage/indexStore/types";
|
||||
|
||||
@@ -8,10 +8,10 @@ import {
|
||||
} from "../../ServiceContext";
|
||||
import { BaseNodePostprocessor } from "../../postprocessors";
|
||||
import {
|
||||
BaseDocumentStore,
|
||||
StorageContext,
|
||||
storageContextFromDefaults,
|
||||
} from "../../storage";
|
||||
} from "../../storage/StorageContext";
|
||||
import { BaseDocumentStore } from "../../storage/docStore/types";
|
||||
import { BaseSynthesizer } from "../../synthesizers";
|
||||
import {
|
||||
BaseIndex,
|
||||
|
||||
@@ -8,11 +8,10 @@ import {
|
||||
} from "../../ServiceContext";
|
||||
import { BaseNodePostprocessor } from "../../postprocessors";
|
||||
import {
|
||||
BaseDocumentStore,
|
||||
RefDocInfo,
|
||||
StorageContext,
|
||||
storageContextFromDefaults,
|
||||
} from "../../storage";
|
||||
} from "../../storage/StorageContext";
|
||||
import { BaseDocumentStore, RefDocInfo } from "../../storage/docStore/types";
|
||||
import {
|
||||
BaseSynthesizer,
|
||||
CompactAndRefine,
|
||||
|
||||
@@ -19,11 +19,11 @@ import {
|
||||
} from "../../embeddings";
|
||||
import { BaseNodePostprocessor } from "../../postprocessors";
|
||||
import {
|
||||
BaseIndexStore,
|
||||
StorageContext,
|
||||
VectorStore,
|
||||
storageContextFromDefaults,
|
||||
} from "../../storage";
|
||||
} from "../../storage/StorageContext";
|
||||
import { BaseIndexStore } from "../../storage/indexStore/types";
|
||||
import { VectorStore } from "../../storage/vectorStore/types";
|
||||
import { BaseSynthesizer } from "../../synthesizers";
|
||||
import {
|
||||
BaseIndex,
|
||||
|
||||
@@ -1,30 +1,12 @@
|
||||
import OpenAILLM, { ClientOptions as OpenAIClientOptions } from "openai";
|
||||
import {
|
||||
AnthropicStreamToken,
|
||||
CallbackManager,
|
||||
Event,
|
||||
EventType,
|
||||
OpenAIStreamToken,
|
||||
StreamCallbackResponse,
|
||||
} from "../callbacks/CallbackManager";
|
||||
|
||||
import { ChatCompletionMessageParam } from "openai/resources";
|
||||
import { LLMOptions } from "portkey-ai";
|
||||
import { Tokenizers, globalsHelper } from "../GlobalsHelper";
|
||||
import {
|
||||
ANTHROPIC_AI_PROMPT,
|
||||
ANTHROPIC_HUMAN_PROMPT,
|
||||
AnthropicSession,
|
||||
getAnthropicSession,
|
||||
} from "./anthropic";
|
||||
import {
|
||||
AzureOpenAIConfig,
|
||||
getAzureBaseUrl,
|
||||
getAzureConfigFromEnv,
|
||||
getAzureModel,
|
||||
shouldUseAzure,
|
||||
} from "./azure";
|
||||
import { OpenAISession, getOpenAISession } from "./openai";
|
||||
import { Tokenizers } from "../GlobalsHelper";
|
||||
import { PortkeySession, getPortkeySession } from "./portkey";
|
||||
import { ReplicateSession } from "./replicate";
|
||||
|
||||
@@ -189,238 +171,6 @@ export const ALL_AVAILABLE_OPENAI_MODELS = {
|
||||
...GPT35_MODELS,
|
||||
};
|
||||
|
||||
/**
|
||||
* OpenAI LLM implementation
|
||||
*/
|
||||
export class OpenAI extends BaseLLM {
|
||||
// Per completion OpenAI params
|
||||
model: keyof typeof ALL_AVAILABLE_OPENAI_MODELS | string;
|
||||
temperature: number;
|
||||
topP: number;
|
||||
maxTokens?: number;
|
||||
additionalChatOptions?: Omit<
|
||||
Partial<OpenAILLM.Chat.ChatCompletionCreateParams>,
|
||||
"max_tokens" | "messages" | "model" | "temperature" | "top_p" | "stream"
|
||||
>;
|
||||
|
||||
// OpenAI session params
|
||||
apiKey?: string = undefined;
|
||||
maxRetries: number;
|
||||
timeout?: number;
|
||||
session: OpenAISession;
|
||||
additionalSessionOptions?: Omit<
|
||||
Partial<OpenAIClientOptions>,
|
||||
"apiKey" | "maxRetries" | "timeout"
|
||||
>;
|
||||
|
||||
callbackManager?: CallbackManager;
|
||||
|
||||
constructor(
|
||||
init?: Partial<OpenAI> & {
|
||||
azure?: AzureOpenAIConfig;
|
||||
},
|
||||
) {
|
||||
super();
|
||||
this.model = init?.model ?? "gpt-3.5-turbo";
|
||||
this.temperature = init?.temperature ?? 0.1;
|
||||
this.topP = init?.topP ?? 1;
|
||||
this.maxTokens = init?.maxTokens ?? undefined;
|
||||
|
||||
this.maxRetries = init?.maxRetries ?? 10;
|
||||
this.timeout = init?.timeout ?? 60 * 1000; // Default is 60 seconds
|
||||
this.additionalChatOptions = init?.additionalChatOptions;
|
||||
this.additionalSessionOptions = init?.additionalSessionOptions;
|
||||
|
||||
if (init?.azure || shouldUseAzure()) {
|
||||
const azureConfig = getAzureConfigFromEnv({
|
||||
...init?.azure,
|
||||
model: getAzureModel(this.model),
|
||||
});
|
||||
|
||||
if (!azureConfig.apiKey) {
|
||||
throw new Error(
|
||||
"Azure API key is required for OpenAI Azure models. Please set the AZURE_OPENAI_KEY environment variable.",
|
||||
);
|
||||
}
|
||||
|
||||
this.apiKey = azureConfig.apiKey;
|
||||
this.session =
|
||||
init?.session ??
|
||||
getOpenAISession({
|
||||
azure: true,
|
||||
apiKey: this.apiKey,
|
||||
baseURL: getAzureBaseUrl(azureConfig),
|
||||
maxRetries: this.maxRetries,
|
||||
timeout: this.timeout,
|
||||
defaultQuery: { "api-version": azureConfig.apiVersion },
|
||||
...this.additionalSessionOptions,
|
||||
});
|
||||
} else {
|
||||
this.apiKey = init?.apiKey ?? undefined;
|
||||
this.session =
|
||||
init?.session ??
|
||||
getOpenAISession({
|
||||
apiKey: this.apiKey,
|
||||
maxRetries: this.maxRetries,
|
||||
timeout: this.timeout,
|
||||
...this.additionalSessionOptions,
|
||||
});
|
||||
}
|
||||
|
||||
this.callbackManager = init?.callbackManager;
|
||||
}
|
||||
|
||||
get metadata() {
|
||||
const contextWindow =
|
||||
ALL_AVAILABLE_OPENAI_MODELS[
|
||||
this.model as keyof typeof ALL_AVAILABLE_OPENAI_MODELS
|
||||
]?.contextWindow ?? 1024;
|
||||
return {
|
||||
model: this.model,
|
||||
temperature: this.temperature,
|
||||
topP: this.topP,
|
||||
maxTokens: this.maxTokens,
|
||||
contextWindow,
|
||||
tokenizer: Tokenizers.CL100K_BASE,
|
||||
};
|
||||
}
|
||||
|
||||
tokens(messages: ChatMessage[]): number {
|
||||
// for latest OpenAI models, see https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
|
||||
const tokenizer = globalsHelper.tokenizer(this.metadata.tokenizer);
|
||||
const tokensPerMessage = 3;
|
||||
let numTokens = 0;
|
||||
for (const message of messages) {
|
||||
numTokens += tokensPerMessage;
|
||||
for (const value of Object.values(message)) {
|
||||
numTokens += tokenizer(value).length;
|
||||
}
|
||||
}
|
||||
numTokens += 3; // every reply is primed with <|im_start|>assistant<|im_sep|>
|
||||
return numTokens;
|
||||
}
|
||||
|
||||
mapMessageType(
|
||||
messageType: MessageType,
|
||||
): "user" | "assistant" | "system" | "function" {
|
||||
switch (messageType) {
|
||||
case "user":
|
||||
return "user";
|
||||
case "assistant":
|
||||
return "assistant";
|
||||
case "system":
|
||||
return "system";
|
||||
case "function":
|
||||
return "function";
|
||||
default:
|
||||
return "user";
|
||||
}
|
||||
}
|
||||
|
||||
chat(
|
||||
params: LLMChatParamsStreaming,
|
||||
): Promise<AsyncIterable<ChatResponseChunk>>;
|
||||
chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>;
|
||||
async chat(
|
||||
params: LLMChatParamsNonStreaming | LLMChatParamsStreaming,
|
||||
): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> {
|
||||
const { messages, parentEvent, stream } = params;
|
||||
const baseRequestParams: OpenAILLM.Chat.ChatCompletionCreateParams = {
|
||||
model: this.model,
|
||||
temperature: this.temperature,
|
||||
max_tokens: this.maxTokens,
|
||||
messages: messages.map(
|
||||
(message) =>
|
||||
({
|
||||
role: this.mapMessageType(message.role),
|
||||
content: message.content,
|
||||
}) as ChatCompletionMessageParam,
|
||||
),
|
||||
top_p: this.topP,
|
||||
...this.additionalChatOptions,
|
||||
};
|
||||
// Streaming
|
||||
if (stream) {
|
||||
return this.streamChat(params);
|
||||
}
|
||||
// Non-streaming
|
||||
const response = await this.session.openai.chat.completions.create({
|
||||
...baseRequestParams,
|
||||
stream: false,
|
||||
});
|
||||
|
||||
const content = response.choices[0].message?.content ?? "";
|
||||
return {
|
||||
message: { content, role: response.choices[0].message.role },
|
||||
};
|
||||
}
|
||||
|
||||
protected async *streamChat({
|
||||
messages,
|
||||
parentEvent,
|
||||
}: LLMChatParamsStreaming): AsyncIterable<ChatResponseChunk> {
|
||||
const baseRequestParams: OpenAILLM.Chat.ChatCompletionCreateParams = {
|
||||
model: this.model,
|
||||
temperature: this.temperature,
|
||||
max_tokens: this.maxTokens,
|
||||
messages: messages.map(
|
||||
(message) =>
|
||||
({
|
||||
role: this.mapMessageType(message.role),
|
||||
content: message.content,
|
||||
}) as ChatCompletionMessageParam,
|
||||
),
|
||||
top_p: this.topP,
|
||||
...this.additionalChatOptions,
|
||||
};
|
||||
|
||||
//Now let's wrap our stream in a callback
|
||||
const onLLMStream = this.callbackManager?.onLLMStream
|
||||
? this.callbackManager.onLLMStream
|
||||
: () => {};
|
||||
|
||||
const chunk_stream: AsyncIterable<OpenAIStreamToken> =
|
||||
await this.session.openai.chat.completions.create({
|
||||
...baseRequestParams,
|
||||
stream: true,
|
||||
});
|
||||
|
||||
const event: Event = parentEvent
|
||||
? parentEvent
|
||||
: {
|
||||
id: "unspecified",
|
||||
type: "llmPredict" as EventType,
|
||||
};
|
||||
|
||||
//Indices
|
||||
var idx_counter: number = 0;
|
||||
for await (const part of chunk_stream) {
|
||||
if (!part.choices.length) continue;
|
||||
|
||||
//Increment
|
||||
part.choices[0].index = idx_counter;
|
||||
const is_done: boolean =
|
||||
part.choices[0].finish_reason === "stop" ? true : false;
|
||||
//onLLMStream Callback
|
||||
|
||||
const stream_callback: StreamCallbackResponse = {
|
||||
event: event,
|
||||
index: idx_counter,
|
||||
isDone: is_done,
|
||||
token: part,
|
||||
};
|
||||
onLLMStream(stream_callback);
|
||||
|
||||
idx_counter++;
|
||||
|
||||
yield {
|
||||
delta: part.choices[0].delta.content ?? "",
|
||||
};
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
export const ALL_AVAILABLE_LLAMADEUCE_MODELS = {
|
||||
"Llama-2-70b-chat-old": {
|
||||
contextWindow: 4096,
|
||||
@@ -696,133 +446,6 @@ export const ALL_AVAILABLE_ANTHROPIC_MODELS = {
|
||||
"claude-instant-1": { contextWindow: 100000 },
|
||||
};
|
||||
|
||||
/**
|
||||
* Anthropic LLM implementation
|
||||
*/
|
||||
|
||||
export class Anthropic extends BaseLLM {
|
||||
// Per completion Anthropic params
|
||||
model: keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS;
|
||||
temperature: number;
|
||||
topP: number;
|
||||
maxTokens?: number;
|
||||
|
||||
// Anthropic session params
|
||||
apiKey?: string = undefined;
|
||||
maxRetries: number;
|
||||
timeout?: number;
|
||||
session: AnthropicSession;
|
||||
|
||||
callbackManager?: CallbackManager;
|
||||
|
||||
constructor(init?: Partial<Anthropic>) {
|
||||
super();
|
||||
this.model = init?.model ?? "claude-2";
|
||||
this.temperature = init?.temperature ?? 0.1;
|
||||
this.topP = init?.topP ?? 0.999; // Per Ben Mann
|
||||
this.maxTokens = init?.maxTokens ?? undefined;
|
||||
|
||||
this.apiKey = init?.apiKey ?? undefined;
|
||||
this.maxRetries = init?.maxRetries ?? 10;
|
||||
this.timeout = init?.timeout ?? 60 * 1000; // Default is 60 seconds
|
||||
this.session =
|
||||
init?.session ??
|
||||
getAnthropicSession({
|
||||
apiKey: this.apiKey,
|
||||
maxRetries: this.maxRetries,
|
||||
timeout: this.timeout,
|
||||
});
|
||||
|
||||
this.callbackManager = init?.callbackManager;
|
||||
}
|
||||
|
||||
tokens(messages: ChatMessage[]): number {
|
||||
throw new Error("Method not implemented.");
|
||||
}
|
||||
|
||||
get metadata() {
|
||||
return {
|
||||
model: this.model,
|
||||
temperature: this.temperature,
|
||||
topP: this.topP,
|
||||
maxTokens: this.maxTokens,
|
||||
contextWindow: ALL_AVAILABLE_ANTHROPIC_MODELS[this.model].contextWindow,
|
||||
tokenizer: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
mapMessagesToPrompt(messages: ChatMessage[]) {
|
||||
return (
|
||||
messages.reduce((acc, message) => {
|
||||
return (
|
||||
acc +
|
||||
`${
|
||||
message.role === "system"
|
||||
? ""
|
||||
: message.role === "assistant"
|
||||
? ANTHROPIC_AI_PROMPT + " "
|
||||
: ANTHROPIC_HUMAN_PROMPT + " "
|
||||
}${message.content.trim()}`
|
||||
);
|
||||
}, "") + ANTHROPIC_AI_PROMPT
|
||||
);
|
||||
}
|
||||
|
||||
chat(
|
||||
params: LLMChatParamsStreaming,
|
||||
): Promise<AsyncIterable<ChatResponseChunk>>;
|
||||
chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>;
|
||||
async chat(
|
||||
params: LLMChatParamsNonStreaming | LLMChatParamsStreaming,
|
||||
): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> {
|
||||
const { messages, parentEvent, stream } = params;
|
||||
//Streaming
|
||||
if (stream) {
|
||||
return this.streamChat(messages, parentEvent);
|
||||
}
|
||||
|
||||
//Non-streaming
|
||||
const response = await this.session.anthropic.completions.create({
|
||||
model: this.model,
|
||||
prompt: this.mapMessagesToPrompt(messages),
|
||||
max_tokens_to_sample: this.maxTokens ?? 100000,
|
||||
temperature: this.temperature,
|
||||
top_p: this.topP,
|
||||
});
|
||||
|
||||
return {
|
||||
message: { content: response.completion.trimStart(), role: "assistant" },
|
||||
//^ We're trimming the start because Anthropic often starts with a space in the response
|
||||
// That space will be re-added when we generate the next prompt.
|
||||
};
|
||||
}
|
||||
|
||||
protected async *streamChat(
|
||||
messages: ChatMessage[],
|
||||
parentEvent?: Event | undefined,
|
||||
): AsyncIterable<ChatResponseChunk> {
|
||||
// AsyncIterable<AnthropicStreamToken>
|
||||
const stream: AsyncIterable<AnthropicStreamToken> =
|
||||
await this.session.anthropic.completions.create({
|
||||
model: this.model,
|
||||
prompt: this.mapMessagesToPrompt(messages),
|
||||
max_tokens_to_sample: this.maxTokens ?? 100000,
|
||||
temperature: this.temperature,
|
||||
top_p: this.topP,
|
||||
stream: true,
|
||||
});
|
||||
|
||||
var idx_counter: number = 0;
|
||||
for await (const part of stream) {
|
||||
//TODO: LLM Stream Callback, pending re-work.
|
||||
|
||||
idx_counter++;
|
||||
yield { delta: part.completion };
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
export class Portkey extends BaseLLM {
|
||||
apiKey?: string = undefined;
|
||||
baseURL?: string = undefined;
|
||||
|
||||
@@ -1,12 +1,26 @@
|
||||
import Anthropic, {
|
||||
import AnthropicBase, {
|
||||
AI_PROMPT,
|
||||
ClientOptions,
|
||||
HUMAN_PROMPT,
|
||||
} from "@anthropic-ai/sdk";
|
||||
import _ from "lodash";
|
||||
import {
|
||||
AnthropicStreamToken,
|
||||
CallbackManager,
|
||||
Event,
|
||||
} from "../callbacks/CallbackManager";
|
||||
import {
|
||||
ALL_AVAILABLE_ANTHROPIC_MODELS,
|
||||
BaseLLM,
|
||||
ChatMessage,
|
||||
ChatResponse,
|
||||
ChatResponseChunk,
|
||||
LLMChatParamsNonStreaming,
|
||||
LLMChatParamsStreaming,
|
||||
} from "./LLM";
|
||||
|
||||
export class AnthropicSession {
|
||||
anthropic: Anthropic;
|
||||
anthropic: AnthropicBase;
|
||||
|
||||
constructor(options: ClientOptions = {}) {
|
||||
if (!options.apiKey) {
|
||||
@@ -19,7 +33,7 @@ export class AnthropicSession {
|
||||
throw new Error("Set Anthropic Key in ANTHROPIC_API_KEY env variable"); // Overriding Anthropic package's error message
|
||||
}
|
||||
|
||||
this.anthropic = new Anthropic(options);
|
||||
this.anthropic = new AnthropicBase(options);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,3 +66,130 @@ export function getAnthropicSession(options: ClientOptions = {}) {
|
||||
|
||||
export const ANTHROPIC_HUMAN_PROMPT = HUMAN_PROMPT;
|
||||
export const ANTHROPIC_AI_PROMPT = AI_PROMPT;
|
||||
|
||||
/**
|
||||
* Anthropic LLM implementation
|
||||
*/
|
||||
|
||||
export class Anthropic extends BaseLLM {
|
||||
// Per completion Anthropic params
|
||||
model: keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS;
|
||||
temperature: number;
|
||||
topP: number;
|
||||
maxTokens?: number;
|
||||
|
||||
// Anthropic session params
|
||||
apiKey?: string = undefined;
|
||||
maxRetries: number;
|
||||
timeout?: number;
|
||||
session: AnthropicSession;
|
||||
|
||||
callbackManager?: CallbackManager;
|
||||
|
||||
constructor(init?: Partial<Anthropic>) {
|
||||
super();
|
||||
this.model = init?.model ?? "claude-2";
|
||||
this.temperature = init?.temperature ?? 0.1;
|
||||
this.topP = init?.topP ?? 0.999; // Per Ben Mann
|
||||
this.maxTokens = init?.maxTokens ?? undefined;
|
||||
|
||||
this.apiKey = init?.apiKey ?? undefined;
|
||||
this.maxRetries = init?.maxRetries ?? 10;
|
||||
this.timeout = init?.timeout ?? 60 * 1000; // Default is 60 seconds
|
||||
this.session =
|
||||
init?.session ??
|
||||
getAnthropicSession({
|
||||
apiKey: this.apiKey,
|
||||
maxRetries: this.maxRetries,
|
||||
timeout: this.timeout,
|
||||
});
|
||||
|
||||
this.callbackManager = init?.callbackManager;
|
||||
}
|
||||
|
||||
tokens(messages: ChatMessage[]): number {
|
||||
throw new Error("Method not implemented.");
|
||||
}
|
||||
|
||||
get metadata() {
|
||||
return {
|
||||
model: this.model,
|
||||
temperature: this.temperature,
|
||||
topP: this.topP,
|
||||
maxTokens: this.maxTokens,
|
||||
contextWindow: ALL_AVAILABLE_ANTHROPIC_MODELS[this.model].contextWindow,
|
||||
tokenizer: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
mapMessagesToPrompt(messages: ChatMessage[]) {
|
||||
return (
|
||||
messages.reduce((acc, message) => {
|
||||
return (
|
||||
acc +
|
||||
`${
|
||||
message.role === "system"
|
||||
? ""
|
||||
: message.role === "assistant"
|
||||
? ANTHROPIC_AI_PROMPT + " "
|
||||
: ANTHROPIC_HUMAN_PROMPT + " "
|
||||
}${message.content.trim()}`
|
||||
);
|
||||
}, "") + ANTHROPIC_AI_PROMPT
|
||||
);
|
||||
}
|
||||
|
||||
chat(
|
||||
params: LLMChatParamsStreaming,
|
||||
): Promise<AsyncIterable<ChatResponseChunk>>;
|
||||
chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>;
|
||||
async chat(
|
||||
params: LLMChatParamsNonStreaming | LLMChatParamsStreaming,
|
||||
): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> {
|
||||
const { messages, parentEvent, stream } = params;
|
||||
//Streaming
|
||||
if (stream) {
|
||||
return this.streamChat(messages, parentEvent);
|
||||
}
|
||||
|
||||
//Non-streaming
|
||||
const response = await this.session.anthropic.completions.create({
|
||||
model: this.model,
|
||||
prompt: this.mapMessagesToPrompt(messages),
|
||||
max_tokens_to_sample: this.maxTokens ?? 100000,
|
||||
temperature: this.temperature,
|
||||
top_p: this.topP,
|
||||
});
|
||||
|
||||
return {
|
||||
message: { content: response.completion.trimStart(), role: "assistant" },
|
||||
//^ We're trimming the start because Anthropic often starts with a space in the response
|
||||
// That space will be re-added when we generate the next prompt.
|
||||
};
|
||||
}
|
||||
|
||||
protected async *streamChat(
|
||||
messages: ChatMessage[],
|
||||
parentEvent?: Event | undefined,
|
||||
): AsyncIterable<ChatResponseChunk> {
|
||||
// AsyncIterable<AnthropicStreamToken>
|
||||
const stream: AsyncIterable<AnthropicStreamToken> =
|
||||
await this.session.anthropic.completions.create({
|
||||
model: this.model,
|
||||
prompt: this.mapMessagesToPrompt(messages),
|
||||
max_tokens_to_sample: this.maxTokens ?? 100000,
|
||||
temperature: this.temperature,
|
||||
top_p: this.topP,
|
||||
stream: true,
|
||||
});
|
||||
|
||||
var idx_counter: number = 0;
|
||||
for await (const part of stream) {
|
||||
//TODO: LLM Stream Callback, pending re-work.
|
||||
|
||||
idx_counter++;
|
||||
yield { delta: part.completion };
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { ok } from "node:assert";
|
||||
import { CallbackManager, Event } from "../callbacks/CallbackManager";
|
||||
import { BaseEmbedding } from "../embeddings";
|
||||
import { assertExists } from "../utils";
|
||||
import {
|
||||
ChatMessage,
|
||||
ChatResponse,
|
||||
@@ -101,8 +101,8 @@ export class Ollama extends BaseEmbedding implements LLM {
|
||||
};
|
||||
} else {
|
||||
const stream = response.body;
|
||||
ok(stream, "stream is null");
|
||||
ok(stream instanceof ReadableStream, "stream is not readable");
|
||||
assertExists(stream, "stream is null");
|
||||
assertExists(stream instanceof ReadableStream, "stream is not readable");
|
||||
return this.streamChat(stream, messageAccessor, parentEvent);
|
||||
}
|
||||
}
|
||||
@@ -172,8 +172,8 @@ export class Ollama extends BaseEmbedding implements LLM {
|
||||
};
|
||||
} else {
|
||||
const stream = response.body;
|
||||
ok(stream, "stream is null");
|
||||
ok(stream instanceof ReadableStream, "stream is not readable");
|
||||
assertExists(stream, "stream is null");
|
||||
assertExists(stream instanceof ReadableStream, "stream is not readable");
|
||||
return this.streamChat(stream, completionAccessor, parentEvent);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,16 +1,42 @@
|
||||
import _ from "lodash";
|
||||
import OpenAI, { ClientOptions } from "openai";
|
||||
import OpenAILLM, { ClientOptions as OpenAIClientOptions } from "openai";
|
||||
import { ChatCompletionMessageParam } from "openai/resources";
|
||||
import { Tokenizers, globalsHelper } from "../GlobalsHelper";
|
||||
import {
|
||||
CallbackManager,
|
||||
Event,
|
||||
EventType,
|
||||
OpenAIStreamToken,
|
||||
StreamCallbackResponse,
|
||||
} from "../callbacks/CallbackManager";
|
||||
import {
|
||||
ALL_AVAILABLE_OPENAI_MODELS,
|
||||
BaseLLM,
|
||||
ChatMessage,
|
||||
ChatResponse,
|
||||
ChatResponseChunk,
|
||||
LLMChatParamsNonStreaming,
|
||||
LLMChatParamsStreaming,
|
||||
MessageType,
|
||||
} from "./LLM";
|
||||
import {
|
||||
AzureOpenAIConfig,
|
||||
getAzureBaseUrl,
|
||||
getAzureConfigFromEnv,
|
||||
getAzureModel,
|
||||
shouldUseAzure,
|
||||
} from "./azure";
|
||||
|
||||
export class AzureOpenAI extends OpenAI {
|
||||
export class AzureOpenAI extends OpenAILLM {
|
||||
protected override authHeaders() {
|
||||
return { "api-key": this.apiKey };
|
||||
}
|
||||
}
|
||||
|
||||
export class OpenAISession {
|
||||
openai: OpenAI;
|
||||
openai: OpenAILLM;
|
||||
|
||||
constructor(options: ClientOptions & { azure?: boolean } = {}) {
|
||||
constructor(options: OpenAIClientOptions & { azure?: boolean } = {}) {
|
||||
if (!options.apiKey) {
|
||||
if (typeof process !== undefined) {
|
||||
options.apiKey = process.env.OPENAI_API_KEY;
|
||||
@@ -24,7 +50,7 @@ export class OpenAISession {
|
||||
if (options.azure) {
|
||||
this.openai = new AzureOpenAI(options);
|
||||
} else {
|
||||
this.openai = new OpenAI({
|
||||
this.openai = new OpenAILLM({
|
||||
...options,
|
||||
// defaultHeaders: { "OpenAI-Beta": "assistants=v1" },
|
||||
});
|
||||
@@ -35,8 +61,10 @@ export class OpenAISession {
|
||||
// I'm not 100% sure this is necessary vs. just starting a new session
|
||||
// every time we make a call. They say they try to reuse connections
|
||||
// so in theory this is more efficient, but we should test it in the future.
|
||||
let defaultOpenAISession: { session: OpenAISession; options: ClientOptions }[] =
|
||||
[];
|
||||
let defaultOpenAISession: {
|
||||
session: OpenAISession;
|
||||
options: OpenAIClientOptions;
|
||||
}[] = [];
|
||||
|
||||
/**
|
||||
* Get a session for the OpenAI API. If one already exists with the same options,
|
||||
@@ -45,7 +73,7 @@ let defaultOpenAISession: { session: OpenAISession; options: ClientOptions }[] =
|
||||
* @returns
|
||||
*/
|
||||
export function getOpenAISession(
|
||||
options: ClientOptions & { azure?: boolean } = {},
|
||||
options: OpenAIClientOptions & { azure?: boolean } = {},
|
||||
) {
|
||||
let session = defaultOpenAISession.find((session) => {
|
||||
return _.isEqual(session.options, options);
|
||||
@@ -58,3 +86,235 @@ export function getOpenAISession(
|
||||
|
||||
return session;
|
||||
}
|
||||
|
||||
/**
|
||||
* OpenAI LLM implementation
|
||||
*/
|
||||
export class OpenAI extends BaseLLM {
|
||||
// Per completion OpenAI params
|
||||
model: keyof typeof ALL_AVAILABLE_OPENAI_MODELS | string;
|
||||
temperature: number;
|
||||
topP: number;
|
||||
maxTokens?: number;
|
||||
additionalChatOptions?: Omit<
|
||||
Partial<OpenAILLM.Chat.ChatCompletionCreateParams>,
|
||||
"max_tokens" | "messages" | "model" | "temperature" | "top_p" | "stream"
|
||||
>;
|
||||
|
||||
// OpenAI session params
|
||||
apiKey?: string = undefined;
|
||||
maxRetries: number;
|
||||
timeout?: number;
|
||||
session: OpenAISession;
|
||||
additionalSessionOptions?: Omit<
|
||||
Partial<OpenAIClientOptions>,
|
||||
"apiKey" | "maxRetries" | "timeout"
|
||||
>;
|
||||
|
||||
callbackManager?: CallbackManager;
|
||||
|
||||
constructor(
|
||||
init?: Partial<OpenAI> & {
|
||||
azure?: AzureOpenAIConfig;
|
||||
},
|
||||
) {
|
||||
super();
|
||||
this.model = init?.model ?? "gpt-3.5-turbo";
|
||||
this.temperature = init?.temperature ?? 0.1;
|
||||
this.topP = init?.topP ?? 1;
|
||||
this.maxTokens = init?.maxTokens ?? undefined;
|
||||
|
||||
this.maxRetries = init?.maxRetries ?? 10;
|
||||
this.timeout = init?.timeout ?? 60 * 1000; // Default is 60 seconds
|
||||
this.additionalChatOptions = init?.additionalChatOptions;
|
||||
this.additionalSessionOptions = init?.additionalSessionOptions;
|
||||
|
||||
if (init?.azure || shouldUseAzure()) {
|
||||
const azureConfig = getAzureConfigFromEnv({
|
||||
...init?.azure,
|
||||
model: getAzureModel(this.model),
|
||||
});
|
||||
|
||||
if (!azureConfig.apiKey) {
|
||||
throw new Error(
|
||||
"Azure API key is required for OpenAI Azure models. Please set the AZURE_OPENAI_KEY environment variable.",
|
||||
);
|
||||
}
|
||||
|
||||
this.apiKey = azureConfig.apiKey;
|
||||
this.session =
|
||||
init?.session ??
|
||||
getOpenAISession({
|
||||
azure: true,
|
||||
apiKey: this.apiKey,
|
||||
baseURL: getAzureBaseUrl(azureConfig),
|
||||
maxRetries: this.maxRetries,
|
||||
timeout: this.timeout,
|
||||
defaultQuery: { "api-version": azureConfig.apiVersion },
|
||||
...this.additionalSessionOptions,
|
||||
});
|
||||
} else {
|
||||
this.apiKey = init?.apiKey ?? undefined;
|
||||
this.session =
|
||||
init?.session ??
|
||||
getOpenAISession({
|
||||
apiKey: this.apiKey,
|
||||
maxRetries: this.maxRetries,
|
||||
timeout: this.timeout,
|
||||
...this.additionalSessionOptions,
|
||||
});
|
||||
}
|
||||
|
||||
this.callbackManager = init?.callbackManager;
|
||||
}
|
||||
|
||||
get metadata() {
|
||||
const contextWindow =
|
||||
ALL_AVAILABLE_OPENAI_MODELS[
|
||||
this.model as keyof typeof ALL_AVAILABLE_OPENAI_MODELS
|
||||
]?.contextWindow ?? 1024;
|
||||
return {
|
||||
model: this.model,
|
||||
temperature: this.temperature,
|
||||
topP: this.topP,
|
||||
maxTokens: this.maxTokens,
|
||||
contextWindow,
|
||||
tokenizer: Tokenizers.CL100K_BASE,
|
||||
};
|
||||
}
|
||||
|
||||
tokens(messages: ChatMessage[]): number {
|
||||
// for latest OpenAI models, see https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
|
||||
const tokenizer = globalsHelper.tokenizer(this.metadata.tokenizer);
|
||||
const tokensPerMessage = 3;
|
||||
let numTokens = 0;
|
||||
for (const message of messages) {
|
||||
numTokens += tokensPerMessage;
|
||||
for (const value of Object.values(message)) {
|
||||
numTokens += tokenizer(value).length;
|
||||
}
|
||||
}
|
||||
numTokens += 3; // every reply is primed with <|im_start|>assistant<|im_sep|>
|
||||
return numTokens;
|
||||
}
|
||||
|
||||
mapMessageType(
|
||||
messageType: MessageType,
|
||||
): "user" | "assistant" | "system" | "function" {
|
||||
switch (messageType) {
|
||||
case "user":
|
||||
return "user";
|
||||
case "assistant":
|
||||
return "assistant";
|
||||
case "system":
|
||||
return "system";
|
||||
case "function":
|
||||
return "function";
|
||||
default:
|
||||
return "user";
|
||||
}
|
||||
}
|
||||
|
||||
chat(
|
||||
params: LLMChatParamsStreaming,
|
||||
): Promise<AsyncIterable<ChatResponseChunk>>;
|
||||
chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>;
|
||||
async chat(
|
||||
params: LLMChatParamsNonStreaming | LLMChatParamsStreaming,
|
||||
): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> {
|
||||
const { messages, parentEvent, stream } = params;
|
||||
const baseRequestParams: OpenAILLM.Chat.ChatCompletionCreateParams = {
|
||||
model: this.model,
|
||||
temperature: this.temperature,
|
||||
max_tokens: this.maxTokens,
|
||||
messages: messages.map(
|
||||
(message) =>
|
||||
({
|
||||
role: this.mapMessageType(message.role),
|
||||
content: message.content,
|
||||
}) as ChatCompletionMessageParam,
|
||||
),
|
||||
top_p: this.topP,
|
||||
...this.additionalChatOptions,
|
||||
};
|
||||
// Streaming
|
||||
if (stream) {
|
||||
return this.streamChat(params);
|
||||
}
|
||||
// Non-streaming
|
||||
const response = await this.session.openai.chat.completions.create({
|
||||
...baseRequestParams,
|
||||
stream: false,
|
||||
});
|
||||
|
||||
const content = response.choices[0].message?.content ?? "";
|
||||
return {
|
||||
message: { content, role: response.choices[0].message.role },
|
||||
};
|
||||
}
|
||||
|
||||
protected async *streamChat({
|
||||
messages,
|
||||
parentEvent,
|
||||
}: LLMChatParamsStreaming): AsyncIterable<ChatResponseChunk> {
|
||||
const baseRequestParams: OpenAILLM.Chat.ChatCompletionCreateParams = {
|
||||
model: this.model,
|
||||
temperature: this.temperature,
|
||||
max_tokens: this.maxTokens,
|
||||
messages: messages.map(
|
||||
(message) =>
|
||||
({
|
||||
role: this.mapMessageType(message.role),
|
||||
content: message.content,
|
||||
}) as ChatCompletionMessageParam,
|
||||
),
|
||||
top_p: this.topP,
|
||||
...this.additionalChatOptions,
|
||||
};
|
||||
|
||||
//Now let's wrap our stream in a callback
|
||||
const onLLMStream = this.callbackManager?.onLLMStream
|
||||
? this.callbackManager.onLLMStream
|
||||
: () => {};
|
||||
|
||||
const chunk_stream: AsyncIterable<OpenAIStreamToken> =
|
||||
await this.session.openai.chat.completions.create({
|
||||
...baseRequestParams,
|
||||
stream: true,
|
||||
});
|
||||
|
||||
const event: Event = parentEvent
|
||||
? parentEvent
|
||||
: {
|
||||
id: "unspecified",
|
||||
type: "llmPredict" as EventType,
|
||||
};
|
||||
|
||||
//Indices
|
||||
var idx_counter: number = 0;
|
||||
for await (const part of chunk_stream) {
|
||||
if (!part.choices.length) continue;
|
||||
|
||||
//Increment
|
||||
part.choices[0].index = idx_counter;
|
||||
const is_done: boolean =
|
||||
part.choices[0].finish_reason === "stop" ? true : false;
|
||||
//onLLMStream Callback
|
||||
|
||||
const stream_callback: StreamCallbackResponse = {
|
||||
event: event,
|
||||
index: idx_counter,
|
||||
isDone: is_done,
|
||||
token: part,
|
||||
};
|
||||
onLLMStream(stream_callback);
|
||||
|
||||
idx_counter++;
|
||||
|
||||
yield {
|
||||
delta: part.choices[0].delta.content ?? "",
|
||||
};
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,5 +28,3 @@ export function getReplicateSession(replicateKey: string | null = null) {
|
||||
|
||||
return defaultReplicateSession;
|
||||
}
|
||||
|
||||
export * from "openai";
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { OpenAI } from "./LLM";
|
||||
import { OpenAI } from "./openai";
|
||||
|
||||
export class TogetherLLM extends OpenAI {
|
||||
constructor(init?: Partial<OpenAI>) {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import Papa, { ParseConfig } from "papaparse";
|
||||
import { Document } from "../Node";
|
||||
import { DEFAULT_FS, GenericFileSystem } from "../storage/FileSystem";
|
||||
import { genericFileSystem, GenericFileSystem } from "../storage/FileSystem";
|
||||
import { BaseReader } from "./base";
|
||||
|
||||
/**
|
||||
@@ -40,7 +40,7 @@ export class PapaCSVReader implements BaseReader {
|
||||
*/
|
||||
async loadData(
|
||||
file: string,
|
||||
fs: GenericFileSystem = DEFAULT_FS,
|
||||
fs: GenericFileSystem = genericFileSystem,
|
||||
): Promise<Document[]> {
|
||||
const fileContent: string = await fs.readFile(file, "utf-8");
|
||||
const result = Papa.parse(fileContent, this.papaConfig);
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
import mammoth from "mammoth";
|
||||
import { Document } from "../Node";
|
||||
import { GenericFileSystem } from "../storage/FileSystem";
|
||||
import { DEFAULT_FS } from "../storage/constants";
|
||||
import { genericFileSystem, GenericFileSystem } from "../storage/FileSystem";
|
||||
import { BaseReader } from "./base";
|
||||
|
||||
export class DocxReader implements BaseReader {
|
||||
/** DocxParser */
|
||||
async loadData(
|
||||
file: string,
|
||||
fs: GenericFileSystem = DEFAULT_FS,
|
||||
fs: GenericFileSystem = genericFileSystem,
|
||||
): Promise<Document[]> {
|
||||
const dataBuffer = (await fs.readFile(file)) as any;
|
||||
const { value } = await mammoth.extractRawText({ buffer: dataBuffer });
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import { Document } from "../Node";
|
||||
import { DEFAULT_FS } from "../storage/constants";
|
||||
import { GenericFileSystem } from "../storage/FileSystem";
|
||||
import { genericFileSystem, GenericFileSystem } from "../storage/FileSystem";
|
||||
import { BaseReader } from "./base";
|
||||
|
||||
/**
|
||||
@@ -20,7 +19,7 @@ export class HTMLReader implements BaseReader {
|
||||
*/
|
||||
async loadData(
|
||||
file: string,
|
||||
fs: GenericFileSystem = DEFAULT_FS,
|
||||
fs: GenericFileSystem = genericFileSystem,
|
||||
): Promise<Document[]> {
|
||||
const dataBuffer = await fs.readFile(file, "utf-8");
|
||||
const htmlOptions = this.getOptions();
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import { Document, ImageDocument } from "../Node";
|
||||
import { DEFAULT_FS } from "../storage/constants";
|
||||
import { GenericFileSystem } from "../storage/FileSystem";
|
||||
import { GenericFileSystem, genericFileSystem } from "../storage/FileSystem";
|
||||
import { BaseReader } from "./base";
|
||||
|
||||
/**
|
||||
@@ -16,7 +15,7 @@ export class ImageReader implements BaseReader {
|
||||
*/
|
||||
async loadData(
|
||||
file: string,
|
||||
fs: GenericFileSystem = DEFAULT_FS,
|
||||
fs: GenericFileSystem = genericFileSystem,
|
||||
): Promise<Document[]> {
|
||||
const dataBuffer = await fs.readFile(file);
|
||||
const blob = new Blob([dataBuffer]);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { Document } from "../Node";
|
||||
import { DEFAULT_FS, GenericFileSystem } from "../storage";
|
||||
import { GenericFileSystem, genericFileSystem } from "../storage/FileSystem";
|
||||
import { BaseReader } from "./base";
|
||||
|
||||
type MarkdownTuple = [string | null, string];
|
||||
@@ -89,7 +89,7 @@ export class MarkdownReader implements BaseReader {
|
||||
|
||||
async loadData(
|
||||
file: string,
|
||||
fs: GenericFileSystem = DEFAULT_FS,
|
||||
fs: GenericFileSystem = genericFileSystem,
|
||||
): Promise<Document[]> {
|
||||
const content = await fs.readFile(file, { encoding: "utf-8" });
|
||||
const tups = this.parseTups(content);
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import { Document } from "../Node";
|
||||
import { GenericFileSystem } from "../storage/FileSystem";
|
||||
import { DEFAULT_FS } from "../storage/constants";
|
||||
import { genericFileSystem, GenericFileSystem } from "../storage/FileSystem";
|
||||
import { BaseReader } from "./base";
|
||||
|
||||
/**
|
||||
@@ -9,7 +8,7 @@ import { BaseReader } from "./base";
|
||||
export class PDFReader implements BaseReader {
|
||||
async loadData(
|
||||
file: string,
|
||||
fs: GenericFileSystem = DEFAULT_FS,
|
||||
fs: GenericFileSystem = genericFileSystem,
|
||||
): Promise<Document[]> {
|
||||
const content = (await fs.readFile(file)) as any;
|
||||
if (!(content instanceof Buffer)) {
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import _ from "lodash";
|
||||
import { Document } from "../Node";
|
||||
import { CompleteFileSystem, walk } from "../storage/FileSystem";
|
||||
import { DEFAULT_FS } from "../storage/constants";
|
||||
import {
|
||||
CompleteFileSystem,
|
||||
genericFileSystem,
|
||||
walk,
|
||||
} from "../storage/FileSystem";
|
||||
import { PapaCSVReader } from "./CSVReader";
|
||||
import { DocxReader } from "./DocxReader";
|
||||
import { HTMLReader } from "./HTMLReader";
|
||||
@@ -28,7 +31,7 @@ enum ReaderStatus {
|
||||
export class TextFileReader implements BaseReader {
|
||||
async loadData(
|
||||
file: string,
|
||||
fs: CompleteFileSystem = DEFAULT_FS as CompleteFileSystem,
|
||||
fs: CompleteFileSystem = genericFileSystem as CompleteFileSystem,
|
||||
): Promise<Document[]> {
|
||||
const dataBuffer = await fs.readFile(file, "utf-8");
|
||||
return [new Document({ text: dataBuffer, id_: file })];
|
||||
@@ -66,7 +69,7 @@ export class SimpleDirectoryReader implements BaseReader {
|
||||
|
||||
async loadData({
|
||||
directoryPath,
|
||||
fs = DEFAULT_FS as CompleteFileSystem,
|
||||
fs = genericFileSystem,
|
||||
defaultReader = new TextFileReader(),
|
||||
fileExtToReader = FILE_EXT_TO_READER,
|
||||
}: SimpleDirectoryReaderLoadDataProps): Promise<Document[]> {
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
import _ from "lodash";
|
||||
|
||||
/**
|
||||
* A filesystem interface that is meant to be compatible with
|
||||
* the 'fs' module from Node.js.
|
||||
* Allows for the use of similar inteface implementation on browsers.
|
||||
*/
|
||||
export interface GenericFileSystem {
|
||||
writeFile(path: string, content: string, options?: any): Promise<void>;
|
||||
readFile(path: string, options?: any): Promise<string>;
|
||||
access(path: string): Promise<void>;
|
||||
mkdir(path: string, options?: any): Promise<void>;
|
||||
}
|
||||
|
||||
export interface WalkableFileSystem {
|
||||
readdir(path: string): Promise<string[]>;
|
||||
stat(path: string): Promise<any>;
|
||||
}
|
||||
|
||||
/**
|
||||
* A filesystem implementation that stores files in memory.
|
||||
*/
|
||||
export class InMemoryFileSystem implements GenericFileSystem {
|
||||
private files: Record<string, any> = {};
|
||||
|
||||
async writeFile(path: string, content: string, options?: any): Promise<void> {
|
||||
this.files[path] = content;
|
||||
}
|
||||
|
||||
async readFile(path: string, options?: any): Promise<string> {
|
||||
if (!(path in this.files)) {
|
||||
throw new Error(`File ${path} does not exist`);
|
||||
}
|
||||
return this.files[path];
|
||||
}
|
||||
|
||||
async access(path: string): Promise<void> {
|
||||
if (!(path in this.files)) {
|
||||
throw new Error(`File ${path} does not exist`);
|
||||
}
|
||||
}
|
||||
|
||||
async mkdir(path: string, options?: any): Promise<void> {
|
||||
this.files[path] = _.get(this.files, path, null);
|
||||
}
|
||||
}
|
||||
|
||||
export type CompleteFileSystem = GenericFileSystem & WalkableFileSystem;
|
||||
|
||||
// FS utility functions
|
||||
|
||||
/**
|
||||
* Checks if a file exists.
|
||||
* Analogous to the os.path.exists function from Python.
|
||||
* @param fs The filesystem to use.
|
||||
* @param path The path to the file to check.
|
||||
* @returns A promise that resolves to true if the file exists, false otherwise.
|
||||
*/
|
||||
export async function exists(
|
||||
fs: GenericFileSystem,
|
||||
path: string,
|
||||
): Promise<boolean> {
|
||||
try {
|
||||
await fs.access(path);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively traverses a directory and yields all the paths to the files in it.
|
||||
* @param fs The filesystem to use.
|
||||
* @param dirPath The path to the directory to traverse.
|
||||
*/
|
||||
export async function* walk(
|
||||
fs: WalkableFileSystem,
|
||||
dirPath: string,
|
||||
): AsyncIterable<string> {
|
||||
if (fs instanceof InMemoryFileSystem) {
|
||||
throw new Error(
|
||||
"The InMemoryFileSystem does not support directory traversal.",
|
||||
);
|
||||
}
|
||||
|
||||
const entries = await fs.readdir(dirPath);
|
||||
for (const entry of entries) {
|
||||
const fullPath = `${dirPath}/${entry}`;
|
||||
const stats = await fs.stat(fullPath);
|
||||
if (stats.isDirectory()) {
|
||||
yield* walk(fs, fullPath);
|
||||
} else {
|
||||
yield fullPath;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
import { InMemoryFileSystem, type GenericFileSystem } from "./FileSystem.core";
|
||||
|
||||
export * from "./FileSystem.core";
|
||||
|
||||
export const genericFileSystem: GenericFileSystem = new InMemoryFileSystem();
|
||||
@@ -1,111 +1,25 @@
|
||||
import _ from "lodash";
|
||||
/**
|
||||
* A filesystem interface that is meant to be compatible with
|
||||
* the 'fs' module from Node.js.
|
||||
* Allows for the use of similar inteface implementation on
|
||||
* browsers.
|
||||
*/
|
||||
import nodeFS from "node:fs/promises";
|
||||
import { CompleteFileSystem } from "./FileSystem.core";
|
||||
|
||||
export interface GenericFileSystem {
|
||||
writeFile(path: string, content: string, options?: any): Promise<void>;
|
||||
readFile(path: string, options?: any): Promise<string>;
|
||||
access(path: string): Promise<void>;
|
||||
mkdir(path: string, options?: any): Promise<void>;
|
||||
}
|
||||
export * from "./FileSystem.core";
|
||||
|
||||
export interface WalkableFileSystem {
|
||||
readdir(path: string): Promise<string[]>;
|
||||
stat(path: string): Promise<any>;
|
||||
}
|
||||
|
||||
/**
|
||||
* A filesystem implementation that stores files in memory.
|
||||
*/
|
||||
export class InMemoryFileSystem implements GenericFileSystem {
|
||||
private files: Record<string, any> = {};
|
||||
|
||||
async writeFile(path: string, content: string, options?: any): Promise<void> {
|
||||
this.files[path] = _.cloneDeep(content);
|
||||
}
|
||||
|
||||
async readFile(path: string, options?: any): Promise<string> {
|
||||
if (!(path in this.files)) {
|
||||
throw new Error(`File ${path} does not exist`);
|
||||
}
|
||||
return _.cloneDeep(this.files[path]);
|
||||
}
|
||||
|
||||
async access(path: string): Promise<void> {
|
||||
if (!(path in this.files)) {
|
||||
throw new Error(`File ${path} does not exist`);
|
||||
}
|
||||
}
|
||||
|
||||
async mkdir(path: string, options?: any): Promise<void> {
|
||||
this.files[path] = _.get(this.files, path, null);
|
||||
}
|
||||
}
|
||||
|
||||
export type CompleteFileSystem = GenericFileSystem & WalkableFileSystem;
|
||||
|
||||
export function getNodeFS(): CompleteFileSystem {
|
||||
const fs = require("fs/promises");
|
||||
return fs;
|
||||
}
|
||||
|
||||
let fs = null;
|
||||
try {
|
||||
fs = getNodeFS();
|
||||
} catch (e) {
|
||||
fs = new InMemoryFileSystem();
|
||||
}
|
||||
export const DEFAULT_FS: GenericFileSystem | CompleteFileSystem =
|
||||
fs as GenericFileSystem;
|
||||
|
||||
// FS utility functions
|
||||
|
||||
/**
|
||||
* Checks if a file exists.
|
||||
* Analogous to the os.path.exists function from Python.
|
||||
* @param fs The filesystem to use.
|
||||
* @param path The path to the file to check.
|
||||
* @returns A promise that resolves to true if the file exists, false otherwise.
|
||||
*/
|
||||
export async function exists(
|
||||
fs: GenericFileSystem,
|
||||
path: string,
|
||||
): Promise<boolean> {
|
||||
try {
|
||||
await fs.access(path);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively traverses a directory and yields all the paths to the files in it.
|
||||
* @param fs The filesystem to use.
|
||||
* @param dirPath The path to the directory to traverse.
|
||||
*/
|
||||
export async function* walk(
|
||||
fs: WalkableFileSystem,
|
||||
dirPath: string,
|
||||
): AsyncIterable<string> {
|
||||
if (fs instanceof InMemoryFileSystem) {
|
||||
throw new Error(
|
||||
"The InMemoryFileSystem does not support directory traversal.",
|
||||
);
|
||||
}
|
||||
|
||||
const entries = await fs.readdir(dirPath);
|
||||
for (const entry of entries) {
|
||||
const fullPath = `${dirPath}/${entry}`;
|
||||
const stats = await fs.stat(fullPath);
|
||||
if (stats.isDirectory()) {
|
||||
yield* walk(fs, fullPath);
|
||||
} else {
|
||||
yield fullPath;
|
||||
}
|
||||
}
|
||||
}
|
||||
export const genericFileSystem: CompleteFileSystem = {
|
||||
readdir(path) {
|
||||
return nodeFS.readdir(path);
|
||||
},
|
||||
stat(path) {
|
||||
return nodeFS.stat(path);
|
||||
},
|
||||
access(path) {
|
||||
return nodeFS.access(path);
|
||||
},
|
||||
async mkdir(path, options) {
|
||||
await nodeFS.mkdir(path, options);
|
||||
},
|
||||
readFile(path, options) {
|
||||
return nodeFS.readFile(path, options) as any;
|
||||
},
|
||||
writeFile(path, content, options) {
|
||||
return nodeFS.writeFile(path, content, options);
|
||||
},
|
||||
};
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
import path from "path";
|
||||
import { GenericFileSystem } from "./FileSystem";
|
||||
import {
|
||||
DEFAULT_FS,
|
||||
DEFAULT_IMAGE_VECTOR_NAMESPACE,
|
||||
DEFAULT_NAMESPACE,
|
||||
} from "./constants";
|
||||
import { join } from "../environments";
|
||||
import { GenericFileSystem, genericFileSystem } from "./FileSystem";
|
||||
import { DEFAULT_IMAGE_VECTOR_NAMESPACE, DEFAULT_NAMESPACE } from "./constants";
|
||||
import { SimpleDocumentStore } from "./docStore/SimpleDocumentStore";
|
||||
import { BaseDocumentStore } from "./docStore/types";
|
||||
import { SimpleIndexStore } from "./indexStore/SimpleIndexStore";
|
||||
@@ -44,7 +40,7 @@ export async function storageContextFromDefaults({
|
||||
vectorStore = vectorStore || new SimpleVectorStore();
|
||||
imageVectorStore = storeImages ? new SimpleVectorStore() : imageVectorStore;
|
||||
} else {
|
||||
fs = fs || DEFAULT_FS;
|
||||
fs = fs || genericFileSystem;
|
||||
docStore =
|
||||
docStore ||
|
||||
(await SimpleDocumentStore.fromPersistDir(
|
||||
@@ -58,7 +54,7 @@ export async function storageContextFromDefaults({
|
||||
vectorStore || (await SimpleVectorStore.fromPersistDir(persistDir, fs));
|
||||
imageVectorStore = storeImages
|
||||
? await SimpleVectorStore.fromPersistDir(
|
||||
path.join(persistDir, DEFAULT_IMAGE_VECTOR_NAMESPACE),
|
||||
join(persistDir, DEFAULT_IMAGE_VECTOR_NAMESPACE),
|
||||
fs,
|
||||
)
|
||||
: imageVectorStore;
|
||||
|
||||
@@ -6,4 +6,3 @@ export const DEFAULT_VECTOR_STORE_PERSIST_FILENAME = "vector_store.json";
|
||||
export const DEFAULT_GRAPH_STORE_PERSIST_FILENAME = "graph_store.json";
|
||||
export const DEFAULT_NAMESPACE = "docstore";
|
||||
export const DEFAULT_IMAGE_VECTOR_NAMESPACE = "images";
|
||||
export { DEFAULT_FS } from "./FileSystem";
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import _ from "lodash";
|
||||
import path from "path";
|
||||
import { GenericFileSystem } from "../FileSystem";
|
||||
import { join } from "../../environments";
|
||||
import { GenericFileSystem, genericFileSystem } from "../FileSystem";
|
||||
import {
|
||||
DEFAULT_DOC_STORE_PERSIST_FILENAME,
|
||||
DEFAULT_FS,
|
||||
DEFAULT_NAMESPACE,
|
||||
DEFAULT_PERSIST_DIR,
|
||||
} from "../constants";
|
||||
@@ -28,10 +27,7 @@ export class SimpleDocumentStore extends KVDocumentStore {
|
||||
namespace?: string,
|
||||
fsModule?: GenericFileSystem,
|
||||
): Promise<SimpleDocumentStore> {
|
||||
const persistPath = path.join(
|
||||
persistDir,
|
||||
DEFAULT_DOC_STORE_PERSIST_FILENAME,
|
||||
);
|
||||
const persistPath = join(persistDir, DEFAULT_DOC_STORE_PERSIST_FILENAME);
|
||||
return await SimpleDocumentStore.fromPersistPath(
|
||||
persistPath,
|
||||
namespace,
|
||||
@@ -44,19 +40,19 @@ export class SimpleDocumentStore extends KVDocumentStore {
|
||||
namespace?: string,
|
||||
fs?: GenericFileSystem,
|
||||
): Promise<SimpleDocumentStore> {
|
||||
fs = fs || DEFAULT_FS;
|
||||
fs = fs || genericFileSystem;
|
||||
const simpleKVStore = await SimpleKVStore.fromPersistPath(persistPath, fs);
|
||||
return new SimpleDocumentStore(simpleKVStore, namespace);
|
||||
}
|
||||
|
||||
async persist(
|
||||
persistPath: string = path.join(
|
||||
persistPath: string = join(
|
||||
DEFAULT_PERSIST_DIR,
|
||||
DEFAULT_DOC_STORE_PERSIST_FILENAME,
|
||||
),
|
||||
fs?: GenericFileSystem,
|
||||
): Promise<void> {
|
||||
fs = fs || DEFAULT_FS;
|
||||
fs = fs || genericFileSystem;
|
||||
if (
|
||||
_.isObject(this.kvStore) &&
|
||||
this.kvStore instanceof BaseInMemoryKVStore
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
export * from "./FileSystem";
|
||||
export * from "./StorageContext";
|
||||
export * from "./constants";
|
||||
export { SimpleDocumentStore } from "./docStore/SimpleDocumentStore";
|
||||
export * from "./docStore/types";
|
||||
export { SimpleIndexStore } from "./indexStore/SimpleIndexStore";
|
||||
export * from "./indexStore/types";
|
||||
export { SimpleKVStore } from "./kvStore/SimpleKVStore";
|
||||
export * from "./kvStore/types";
|
||||
export { AstraDBVectorStore } from "./vectorStore/AstraDBVectorStore";
|
||||
export { ChromaVectorStore } from "./vectorStore/ChromaVectorStore";
|
||||
export { MongoDBAtlasVectorSearch } from "./vectorStore/MongoDBAtlasVectorStore";
|
||||
export { PGVectorStore } from "./vectorStore/PGVectorStore";
|
||||
export { PineconeVectorStore } from "./vectorStore/PineconeVectorStore";
|
||||
export { SimpleVectorStore } from "./vectorStore/SimpleVectorStore";
|
||||
export * from "./vectorStore/types";
|
||||
@@ -1,7 +1,6 @@
|
||||
import path from "path";
|
||||
import { GenericFileSystem } from "../FileSystem";
|
||||
import { join } from "../../environments";
|
||||
import { GenericFileSystem, genericFileSystem } from "../FileSystem";
|
||||
import {
|
||||
DEFAULT_FS,
|
||||
DEFAULT_INDEX_STORE_PERSIST_FILENAME,
|
||||
DEFAULT_PERSIST_DIR,
|
||||
} from "../constants";
|
||||
@@ -20,18 +19,15 @@ export class SimpleIndexStore extends KVIndexStore {
|
||||
|
||||
static async fromPersistDir(
|
||||
persistDir: string = DEFAULT_PERSIST_DIR,
|
||||
fs: GenericFileSystem = DEFAULT_FS,
|
||||
fs: GenericFileSystem = genericFileSystem,
|
||||
): Promise<SimpleIndexStore> {
|
||||
const persistPath = path.join(
|
||||
persistDir,
|
||||
DEFAULT_INDEX_STORE_PERSIST_FILENAME,
|
||||
);
|
||||
const persistPath = join(persistDir, DEFAULT_INDEX_STORE_PERSIST_FILENAME);
|
||||
return this.fromPersistPath(persistPath, fs);
|
||||
}
|
||||
|
||||
static async fromPersistPath(
|
||||
persistPath: string,
|
||||
fs: GenericFileSystem = DEFAULT_FS,
|
||||
fs: GenericFileSystem = genericFileSystem,
|
||||
): Promise<SimpleIndexStore> {
|
||||
let simpleKVStore = await SimpleKVStore.fromPersistPath(persistPath, fs);
|
||||
return new SimpleIndexStore(simpleKVStore);
|
||||
@@ -39,7 +35,7 @@ export class SimpleIndexStore extends KVIndexStore {
|
||||
|
||||
async persist(
|
||||
persistPath: string = DEFAULT_PERSIST_DIR,
|
||||
fs: GenericFileSystem = DEFAULT_FS,
|
||||
fs: GenericFileSystem = genericFileSystem,
|
||||
): Promise<void> {
|
||||
await this.kvStore.persist(persistPath, fs);
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import _ from "lodash";
|
||||
import path from "path";
|
||||
import { GenericFileSystem, exists } from "../FileSystem";
|
||||
import { DEFAULT_COLLECTION, DEFAULT_FS } from "../constants";
|
||||
import { dirname } from "../../environments";
|
||||
import { GenericFileSystem, exists, genericFileSystem } from "../FileSystem";
|
||||
import { DEFAULT_COLLECTION } from "../constants";
|
||||
import { BaseKVStore } from "./types";
|
||||
|
||||
export type DataType = Record<string, Record<string, any>>;
|
||||
@@ -61,9 +61,9 @@ export class SimpleKVStore extends BaseKVStore {
|
||||
}
|
||||
|
||||
async persist(persistPath: string, fs?: GenericFileSystem): Promise<void> {
|
||||
fs = fs || DEFAULT_FS;
|
||||
fs = fs || genericFileSystem;
|
||||
// TODO: decide on a way to polyfill path
|
||||
let dirPath = path.dirname(persistPath);
|
||||
let dirPath = dirname(persistPath);
|
||||
if (!(await exists(fs, dirPath))) {
|
||||
await fs.mkdir(dirPath);
|
||||
}
|
||||
@@ -74,8 +74,8 @@ export class SimpleKVStore extends BaseKVStore {
|
||||
persistPath: string,
|
||||
fs?: GenericFileSystem,
|
||||
): Promise<SimpleKVStore> {
|
||||
fs = fs || DEFAULT_FS;
|
||||
let dirPath = path.dirname(persistPath);
|
||||
fs = fs || genericFileSystem;
|
||||
let dirPath = dirname(persistPath);
|
||||
if (!(await exists(fs, dirPath))) {
|
||||
await fs.mkdir(dirPath);
|
||||
}
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
import _ from "lodash";
|
||||
import path from "path";
|
||||
import { BaseNode } from "../../Node";
|
||||
import {
|
||||
getTopKEmbeddings,
|
||||
getTopKEmbeddingsLearner,
|
||||
getTopKMMREmbeddings,
|
||||
} from "../../embeddings";
|
||||
import { GenericFileSystem, exists } from "../FileSystem";
|
||||
import { DEFAULT_FS, DEFAULT_PERSIST_DIR } from "../constants";
|
||||
import { dirname } from "../../environments";
|
||||
import { GenericFileSystem, exists, genericFileSystem } from "../FileSystem";
|
||||
import { DEFAULT_PERSIST_DIR } from "../constants";
|
||||
import {
|
||||
VectorStore,
|
||||
VectorStoreQuery,
|
||||
@@ -31,17 +31,17 @@ class SimpleVectorStoreData {
|
||||
export class SimpleVectorStore implements VectorStore {
|
||||
storesText: boolean = false;
|
||||
private data: SimpleVectorStoreData = new SimpleVectorStoreData();
|
||||
private fs: GenericFileSystem = DEFAULT_FS;
|
||||
private fs: GenericFileSystem = genericFileSystem;
|
||||
private persistPath: string | undefined;
|
||||
|
||||
constructor(data?: SimpleVectorStoreData, fs?: GenericFileSystem) {
|
||||
this.data = data || new SimpleVectorStoreData();
|
||||
this.fs = fs || DEFAULT_FS;
|
||||
this.fs = fs || genericFileSystem;
|
||||
}
|
||||
|
||||
static async fromPersistDir(
|
||||
persistDir: string = DEFAULT_PERSIST_DIR,
|
||||
fs: GenericFileSystem = DEFAULT_FS,
|
||||
fs: GenericFileSystem = genericFileSystem,
|
||||
): Promise<SimpleVectorStore> {
|
||||
let persistPath = `${persistDir}/vector_store.json`;
|
||||
return await SimpleVectorStore.fromPersistPath(persistPath, fs);
|
||||
@@ -148,7 +148,7 @@ export class SimpleVectorStore implements VectorStore {
|
||||
fs?: GenericFileSystem,
|
||||
): Promise<void> {
|
||||
fs = fs || this.fs;
|
||||
let dirPath = path.dirname(persistPath);
|
||||
let dirPath = dirname(persistPath);
|
||||
if (!(await exists(fs, dirPath))) {
|
||||
await fs.mkdir(dirPath);
|
||||
}
|
||||
@@ -160,9 +160,9 @@ export class SimpleVectorStore implements VectorStore {
|
||||
persistPath: string,
|
||||
fs?: GenericFileSystem,
|
||||
): Promise<SimpleVectorStore> {
|
||||
fs = fs || DEFAULT_FS;
|
||||
fs = fs || genericFileSystem;
|
||||
|
||||
let dirPath = path.dirname(persistPath);
|
||||
let dirPath = dirname(persistPath);
|
||||
if (!(await exists(fs, dirPath))) {
|
||||
await fs.mkdir(dirPath, { recursive: true });
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ import {
|
||||
import { OpenAIEmbedding } from "../embeddings";
|
||||
import { SummaryIndex } from "../indices/summary";
|
||||
import { VectorStoreIndex } from "../indices/vectorStore/VectorStoreIndex";
|
||||
import { OpenAI } from "../llm/LLM";
|
||||
import { OpenAI } from "../llm/openai";
|
||||
import { ResponseSynthesizer, SimpleResponseBuilder } from "../synthesizers";
|
||||
import { mockEmbeddingModel, mockLlmGeneration } from "./utility/mockOpenAI";
|
||||
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import os from "os";
|
||||
import path from "path";
|
||||
import nodeFS from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import {
|
||||
GenericFileSystem,
|
||||
InMemoryFileSystem,
|
||||
exists,
|
||||
getNodeFS,
|
||||
walk,
|
||||
} from "../storage/FileSystem";
|
||||
|
||||
@@ -16,8 +16,6 @@ type FileSystemUnderTest = {
|
||||
tempDir: string;
|
||||
};
|
||||
|
||||
const nodeFS = getNodeFS() as GenericFileSystem & any;
|
||||
|
||||
describe.each<FileSystemUnderTest>([
|
||||
{
|
||||
name: "InMemoryFileSystem",
|
||||
@@ -102,14 +100,13 @@ describe.each<FileSystemUnderTest>([
|
||||
});
|
||||
|
||||
describe("Test walk for Node.js fs", () => {
|
||||
const fs = getNodeFS();
|
||||
let tempDir: string;
|
||||
|
||||
beforeAll(async () => {
|
||||
tempDir = await nodeFS.mkdtemp(path.join(os.tmpdir(), "jest-"));
|
||||
await fs.writeFile(`${tempDir}/test.txt`, "Hello, world!");
|
||||
await fs.mkdir(`${tempDir}/subDir`);
|
||||
await fs.writeFile(`${tempDir}/subDir/test2.txt`, "Hello, again!");
|
||||
await nodeFS.writeFile(`${tempDir}/test.txt`, "Hello, world!");
|
||||
await nodeFS.mkdir(`${tempDir}/subDir`);
|
||||
await nodeFS.writeFile(`${tempDir}/subDir/test2.txt`, "Hello, again!");
|
||||
});
|
||||
|
||||
it("walks directory", async () => {
|
||||
@@ -119,7 +116,7 @@ describe("Test walk for Node.js fs", () => {
|
||||
]);
|
||||
|
||||
const actualFiles = new Set<string>();
|
||||
for await (let file of walk(fs, tempDir)) {
|
||||
for await (let file of walk(nodeFS, tempDir)) {
|
||||
expect(file).toBeTruthy();
|
||||
actualFiles.add(file);
|
||||
}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import { CallbackManager } from "../../callbacks/CallbackManager";
|
||||
import { OpenAIEmbedding } from "../../embeddings";
|
||||
import { globalsHelper } from "../../GlobalsHelper";
|
||||
import { LLMChatParamsBase, OpenAI } from "../../llm/LLM";
|
||||
import { LLMChatParamsBase } from "../../llm/LLM";
|
||||
import { OpenAI } from "../../llm/openai";
|
||||
|
||||
export function mockLlmGeneration({
|
||||
languageModel,
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
export function assertExists<T>(
|
||||
val: T | undefined | null,
|
||||
message = "Value unexpectedly not set",
|
||||
): asserts val is T {
|
||||
if (val === undefined || val === null) {
|
||||
throw new Error(message);
|
||||
}
|
||||
}
|
||||
Generated
+1998
-443
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,5 @@
|
||||
packages:
|
||||
- "apps/*"
|
||||
- "packages/*"
|
||||
- "packages/core/e2e/*"
|
||||
- "examples/"
|
||||
|
||||
+3
-1
@@ -7,7 +7,9 @@
|
||||
"outputs": ["dist/**", "build/**"]
|
||||
},
|
||||
"lint": {},
|
||||
"test": {},
|
||||
"test": {
|
||||
"dependsOn": ["^build"]
|
||||
},
|
||||
"dev": {
|
||||
"cache": false,
|
||||
"persistent": true
|
||||
|
||||
Reference in New Issue
Block a user