Compare commits

...

33 Commits

Author SHA1 Message Date
github-actions[bot] 7a2485cca2 Release 0.11.12 (#2050)
Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>
2025-07-02 11:41:55 +07:00
Marcus Schiesser 1329186a23 docs: clarify how to run docs 2025-07-02 11:33:48 +07:00
dependabot[bot] 5d6e7384f5 chore(deps-dev): bump @modelcontextprotocol/server-filesystem from 2025.3.28 to 2025.7.1 (#2055) 2025-07-02 11:26:18 +07:00
allen f2dfd305fb implement bm25 retriever (#2045)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2025-07-02 11:22:47 +07:00
Huu Le 3cd8a573df feat: update interpreter to always upload all files in the configured directory (#2057) 2025-07-02 10:57:04 +07:00
Laurie Voss 09c6077f6e Import path for llamaparsereader (#2056) 2025-07-01 16:51:25 -07:00
Logan 14cc65b4e3 add google analytics (#2053)
Co-authored-by: Alex Yang <himself65@outlook.com>
2025-07-01 11:18:14 -07:00
Marcus Schiesser c544d8f67c docs: review and update memory doc 2025-07-01 15:10:43 +07:00
Huu Le d578889e21 feat: new memory api (#2028)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2025-07-01 09:30:49 +07:00
Marcus Schiesser 9f745d1941 chore: revert to wrong opus change 2025-07-01 09:07:46 +07:00
Alex Yang f292e94dcd fix: change default claude model (#2052) 2025-06-30 15:19:40 -07:00
Marcus Schiesser 0fcc92f632 fix: sentence splitter must not trim whitespaces (#2046) 2025-06-30 17:32:04 +07:00
Marcus Schiesser 515a8b9111 fix: error logging for fromPersistPath (#2049) 2025-06-30 13:41:13 +07:00
github-actions[bot] 7e8efc6284 Release @llamaindex/tools@0.1.2 (#2048) 2025-06-30 11:40:54 +07:00
Wassim Chegham 0fcf65126d chore: export type MCPClientOptions (#2047)
Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>
2025-06-28 10:55:07 +07:00
github-actions[bot] a50acf634c Release 0.11.11 (#2044)
Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>
2025-06-27 14:51:09 +07:00
Thuc Pham 7039e1a214 chore: migrate to @google/genai SDK (#2038)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2025-06-27 12:09:26 +07:00
github-actions[bot] 785d010cd3 Release 0.11.10 (#2037)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
2025-06-26 14:29:33 +07:00
Marcus Schiesser b878032131 fix release step 2025-06-26 14:18:56 +07:00
Marcus Schiesser f7ec293a0f chore: Update workflow-core (#2042) 2025-06-26 14:03:03 +07:00
jerinthomascarmel 49a5e0a8cf feat(readers): add ExcelReader for parsing Excel files (run-llama#1959) (#2033)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
Co-authored-by: leehuwuj <leehuwuj@gmail.com>
2025-06-26 11:15:19 +07:00
Logan 118924799a Rename llama-flow -> workflows in docs (#2040) 2025-06-25 15:52:04 -07:00
allen ec8f673dae support filter to supabase vector search (#2036) 2025-06-25 16:17:54 +07:00
github-actions[bot] 85039a5360 Release @llamaindex/tools@0.1.0 (#2034) 2025-06-24 12:32:24 +07:00
Marcus Schiesser d7305edb53 fix changesets 2025-06-24 12:26:09 +07:00
Huu Le 096bf2bda1 feat: Add support for StreamableHTTP MCP Client (#2032) 2025-06-24 11:40:34 +07:00
jerinthomascarmel c5846bd7dc feat(readers): add XMLReader for parsing XML files (#1846) (#2031)
Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>
2025-06-24 10:46:32 +07:00
github-actions[bot] 97bbce6e13 Release 0.11.9 (#2023)
Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>
2025-06-20 12:28:01 +07:00
Marcus Schiesser 62699b7497 chore: improve performance of sentence splitter (#2030) 2025-06-20 12:16:24 +07:00
Broda Noel a89e187796 Add extraAbbreviations on sentence-splitter (#2029)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2025-06-20 11:27:06 +07:00
ANKIT VARSHNEY d8ac8d385d feat: add openai realtime api (#2006)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2025-06-20 10:22:04 +07:00
Marcus Schiesser a6cef9c6be chore: no core in examples (#2024) 2025-06-18 09:39:32 +07:00
Broda Noel c5b2691302 Add more Acronyms on SentenceSplitter (#2022)
Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>
2025-06-17 10:43:36 +07:00
279 changed files with 10494 additions and 3022 deletions
+1 -1
View File
@@ -25,7 +25,7 @@ Make sure you have Node.js LTS (Long-term Support) installed. You can check your
```shell
node -v
# v20.x.x
# v22.x.x
```
### Use pnpm
+60
View File
@@ -1,5 +1,65 @@
# @llamaindex/doc
## 0.2.32
### Patch Changes
- Updated dependencies [d578889]
- Updated dependencies [0fcc92f]
- Updated dependencies [515a8b9]
- @llamaindex/core@0.6.13
- llamaindex@0.11.12
- @llamaindex/cloud@4.0.17
- @llamaindex/node-parser@2.0.13
- @llamaindex/openai@0.4.7
- @llamaindex/readers@3.1.12
- @llamaindex/workflow@1.1.13
## 0.2.31
### Patch Changes
- Updated dependencies [7039e1a]
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
- @llamaindex/core@0.6.12
- @llamaindex/cloud@4.0.16
- @llamaindex/node-parser@2.0.12
- @llamaindex/openai@0.4.6
- @llamaindex/readers@3.1.11
- @llamaindex/workflow@1.1.12
## 0.2.30
### Patch Changes
- Updated dependencies [f7ec293]
- @llamaindex/workflow@1.1.11
- llamaindex@0.11.10
## 0.2.29
### Patch Changes
- Updated dependencies [c5846bd]
- @llamaindex/readers@3.1.10
## 0.2.28
### Patch Changes
- Updated dependencies [a89e187]
- Updated dependencies [62699b7]
- Updated dependencies [c5b2691]
- Updated dependencies [d8ac8d3]
- @llamaindex/core@0.6.11
- @llamaindex/openai@0.4.5
- @llamaindex/cloud@4.0.15
- llamaindex@0.11.9
- @llamaindex/node-parser@2.0.11
- @llamaindex/readers@3.1.9
- @llamaindex/workflow@1.1.10
## 0.2.27
### Patch Changes
+1 -1
View File
@@ -111,7 +111,7 @@ Key build process:
**Content Sources:**
- Local MDX files in `src/content/docs/`
- External docs from `@llama-flow/docs` package
- External docs from `@llamaindex/workflow-docs` package
- Generated API docs from TypeScript source
### Development Notes
+2
View File
@@ -3,6 +3,8 @@
This is a Next.js application generated with
[Create Fumadocs](https://github.com/fuma-nama/fumadocs).
> Note: Before running the development server, make sure to build the whole project first, see [CONTRIBUTING.md](../../CONTRIBUTING.md) for more details.
Run development server:
```bash
+2 -2
View File
@@ -23,8 +23,8 @@ const config = {
permanent: true,
},
{
source: "/docs/llamaflow/:path*.mdx",
destination: "/docs/llamaflow/:path*",
source: "/docs/workflows/:path*.mdx",
destination: "/docs/workflows/:path*",
permanent: true,
},
];
+4 -3
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/doc",
"version": "0.2.27",
"version": "0.2.32",
"private": true,
"scripts": {
"postinstall": "fumadocs-mdx",
@@ -15,7 +15,6 @@
"dependencies": {
"@huggingface/transformers": "^3.5.0",
"@icons-pack/react-simple-icons": "^10.1.0",
"@llama-flow/docs": "0.0.8",
"@llamaindex/chat-ui-docs": "^0.0.5",
"@llamaindex/cloud": "workspace:*",
"@llamaindex/core": "workspace:*",
@@ -23,8 +22,10 @@
"@llamaindex/openai": "workspace:*",
"@llamaindex/readers": "workspace:*",
"@llamaindex/workflow": "workspace:*",
"@llamaindex/workflow-docs": "0.1.1",
"@mdx-js/mdx": "^3.1.0",
"@monaco-editor/react": "^4.7.0",
"@next/third-parties": "^15.3.4",
"@number-flow/react": "^0.3.4",
"@radix-ui/react-dialog": "^1.1.2",
"@radix-ui/react-icons": "^1.3.2",
@@ -69,7 +70,7 @@
"twoslash": "^0.3.1",
"use-stick-to-bottom": "^1.0.42",
"web-tree-sitter": "^0.24.4",
"zod": "^3.23.8"
"zod": "^3.25.67"
},
"devDependencies": {
"@next/env": "^15.3.0",
+1 -1
View File
@@ -13,7 +13,7 @@ const INTERNAL_LINK_REGEX = /(?:(?:\]\(|\bhref=["'])\/docs\/([^")]+))/g;
// This captures relative links like [text](./path) or ![alt](../images/image.png)
const RELATIVE_LINK_REGEX = /(?:\]\()(?:\s*)(?:\.\.?)\//g;
const ALLOWED_LINKS = ["/docs/llamaflow", "/docs/chat-ui"];
const ALLOWED_LINKS = ["/docs/workflows", "/docs/chat-ui"];
interface LinkValidationResult {
file: string;
+2 -2
View File
@@ -11,9 +11,9 @@ import remarkMath from "remark-math";
export const docs = defineDocs({
dir: [
"./src/content/docs",
"./node_modules/@llama-flow/docs",
"./node_modules/@llamaindex/workflow-docs",
"./node_modules/@llamaindex/chat-ui-docs",
// NOTE: When adding external docs (like chat-ui or llama-flow above),
// NOTE: When adding external docs (like chat-ui or workflow-docs above),
// make sure to also update:
// 1. scripts/validate-links.mts - add to ALLOWED_LINKS array
// 2. next.config.mjs - add redirect for .mdx files
+2
View File
@@ -1,5 +1,6 @@
import { AIProvider } from "@/actions";
import { TooltipProvider } from "@/components/ui/tooltip";
import { GoogleAnalytics } from "@next/third-parties/google";
import { RootProvider } from "fumadocs-ui/provider";
import { Inter } from "next/font/google";
import type { ReactNode } from "react";
@@ -39,6 +40,7 @@ export default function Layout({ children }: { children: ReactNode }) {
</AIProvider>
</TooltipProvider>
</body>
<GoogleAnalytics gaId="G-NB9B8LW9W5" />
</html>
);
}
@@ -74,12 +74,21 @@ const server = mcp({
args: ["-y", "@modelcontextprotocol/server-filesystem", "."],
verbose: true,
});
// or by SSE
// or by StreamableHTTP transport
const server = mcp({
url: "http://localhost:8000/mcp",
verbose: true,
});
// if your MCP server is not using StreamableHTTP transport, you can also use SSE transport
// by setting useSSETransport to true.
// See: https://modelcontextprotocol.io/docs/concepts/transports#server-sent-events-sse-deprecated
const server = mcp({
url: "http://localhost:8000/mcp",
useSSETransport: true,
verbose: true,
});
// 3. Get tools from MCP server
const tools = await server.tools();
@@ -9,10 +9,13 @@ Workflows are designed to be flexible and can be used to build agents, RAG flows
To use workflows install this package:
```package-install
npm i @llamaindex/workflow
npm i @llamaindex/workflow-core
```
This package is a stable, production-ready version of our [llama-flow](/docs/llamaflow) project.
This contains the core functionality for the workflow system. You can read more about the core concepts in the [workflow-core](/docs/workflows) section.
While you can still reference the llama-flow documentation for detailed information about the underlying concepts, we recommend using the `@llamaindex/workflow` package for all new projects to ensure stability and long-term availability.
In contrast, the `@llamaindex/workflow` package contains more utiltities, such as prebuilt agents.
```package-install
npm i @llamaindex/workflow
```
@@ -0,0 +1,182 @@
---
title: Memory
description: Manage conversation history and context with agents
---
## Concept
Memory is a core component of agentic systems. It allows you to store and retrieve information from the past.
In LlamaIndexTS, you can create memory by using the `createMemory` function. This function will return a `Memory` object, which you can then use to store and retrieve information.
As the agent runs, it will make calls to `add()` to store information, and `get()` to retrieve information.
## Usage
A `Memory` object has both short-term memory (i.e. a FIFO queue of messages) and optionally long-term memory (i.e. extracting information over time).
`get()` always returns all messages stored in the memory. The longer the agent runs, this will exceed the context window of the agent. To avoid this, the agent is using the `getLLM` method to get the last X messages that fit into the context window.
### Configuring Memory for an Agent
Here we're creating a memory with a static block (read more about [memory blocks](#long-term-memory)) that contains some information about the user.
```ts twoslash
import { openai } from "@llamaindex/openai";
import { agent } from "@llamaindex/workflow";
import { createMemory, staticBlock } from "llamaindex";
const llm = openai({ model: "gpt-4.1-mini" });
// Create memory with predefined context
const memory = createMemory({
memoryBlocks: [
staticBlock({
content:
"The user is a software engineer who loves TypeScript and LlamaIndex.",
}),
],
});
// Create an agent with the memory
const workflow = agent({
name: "assistant",
llm,
memory,
});
const result = await workflow.run("What is my name?");
console.log("Response:", result.data.result);
```
### Using Vercel format
You can also put messages in Vercel format directly to the memory:
```ts
await memory.add({
id: "1",
createdAt: new Date(),
role: "user",
content: "Hello!",
options: {
parts: [
{
type: "file",
data: "base64...",
mimeType: "image/png",
},
],
},
});
```
If you call `get`, messages are usually retrieved in the LlamaIndexTS format (type `ChatMessage`). If you specify the `type` parameter using `get`, you can return the messages in different formats. E.g.: using `type: "vercel"`, you can return the messages in Vercel format:
```ts
const messages = await memory.get({ type: "vercel" });
console.log(messages);
```
## Customizing Memory
### Short-Term Memory
The `Memory` object will store all the messages that are added to the `Memory` object. Unless you call `clear()`, no messages are removed from the memory. This is the short-term memory (usually you will store the memory of one user session there) which is augmented by the long-term memory.
Calling `getLLM` will retrieve messages from long-term memory and ensure that the given `tokenLimit` is not reached. These are the messages that you will sent to the LLM.
For initialization, you call `createMemory` with the following options:
- `tokenLimit`: Maximum tokens for memory retrieval using `getLLM` (default: 30000).
- `shortTermTokenLimitRatio`: Ratio of tokens for short-term vs long-term memory (default: 0.7)
- `customAdapters`: Custom message adapters for different message formats. LlamaIndex (`ChatMessageAdapter`) and Vercel (`VercelMessageAdapter`) are built-in adapters.
- `memoryBlocks`: Memory blocks for long-term storage, see [Long-Term Memory](#long-term-memory)
Example:
```ts
const memory = createMemory({
tokenLimit=40000,
shortTermTokenLimitRatio=0.5,
});
```
### Long-Term Memory
Long-term memory is represented as `Memory Block` objects. These objects contain information that are from previous user sessions or from the beginning of the current conversation. When memory is retrieved (by calling `getLLM`), the short-term and long-term memories are merged together within the given `tokenLimit`.
Currently, there are two predefined memory blocks:
- `staticBlock`: A memory block that stores a static piece of information.
- `factExtractionBlock`: A memory block that extracts facts from the chat history.
This sounds a bit complicated, but it's actually quite simple. Let's look at an example:
```ts
import { createMemory, factExtractionBlock, staticBlock } from "llamaindex";
const memoryBlocks= [
staticBlock({
id: "core_info",
content: "My name is Logan, and I live in Saskatoon. I work at LlamaIndex.",
}),
factExtractionBlock({
id: "user-extracted_info",
priority: 1,
llm: llm,
maxFacts: 50,
}),
];
```
Here, we've setup two memory blocks:
- `core_info`: A static memory block that stores some core information about the user. This information will always be inserted into the memory. The type used is `MessageContent` to support multi-modal content.
- `extracted_info`: An extracted memory block that will extract information from the chat history. Here we've passed in the `llm` to use to extract facts from the chat history, and set the `maxFacts` to 50. If the number of extracted facts exceeds this limit, the `maxFacts` will be automatically summarized and reduced to leave room for new information.
You'll also notice that we've set the `priority` for the `factExtractionBlock` block. This is used to determine the handling when the memory blocks content (i.e. long-term memory) + short-term memory exceeds the token limit on the `Memory` object.
- `priority=0`: This block will always be kept in memory (`staticBlocks` always have priority 0.)
- `priority=1, 2, 3, etc`: This determines the order in which memory blocks are truncated when the memory exceeds the token limit, to help the overall short-term memory + long-term memory content be less than or equal to the `tokenLimit`.
Now, let's pass these blocks into the `createMemory` function:
```ts
const memory = createMemory({
tokenLimit: 40000,
memoryBlocks: memoryBlocks,
)
```
When memory is retrieved (using `getLLM`), the short-term and long-term memories are merged together. The `Memory` object will ensure that the short-term memory + long-term memory content is less than or equal to the `tokenLimit`. If it is longer, messages are retrieved in the following order:
1. StaticMemoryBlock (information always included)
2. LongTermMemoryBlock (depending on priority)
3. ShortTermMemoryBlock
4. Transient messages
The amount of short-term memory included is specified by the `shortTermTokenLimitRatio`. If it's set to `0.7`, 70% of the `tokenLimit` is used for short-term memory (not including the static memory block).
## Persistence with Snapshots
Save and restore memory state:
```ts twoslash
import { createMemory, loadMemory } from "llamaindex";
const memory = createMemory();
// Add some messages
await memory.add({ role: "user", content: "Hello!" });
// Create snapshot
const snapshot = memory.snapshot();
// Later, restore from the snapshot
const restoredMemory = loadMemory(snapshot);
```
## Examples
Want to learn more about the Memory class? Check out our example codes in [Github](https://github.com/run-llama/LlamaIndexTS/tree/main/examples/agents/memory).
@@ -1,4 +1,11 @@
{
"title": "Data",
"pages": ["index", "readers", "data_index", "ingestion_pipeline", "stores"]
"pages": [
"index",
"memory",
"readers",
"data_index",
"ingestion_pipeline",
"stores"
]
}
@@ -28,11 +28,12 @@ embedding vector(1536)
);
```
-- Create a function for similarity search
-- Create a function for similarity search with filtering support
```sql
create function match_documents (
query_embedding vector(1536),
match_count int
match_count int,
filter jsonb DEFAULT '{}'
) returns table (
id uuid,
content text,
@@ -52,6 +53,7 @@ metadata,
embedding,
1 - (embedding <=> query_embedding) as similarity
from documents
where metadata @> filter
order by embedding <=> query_embedding
limit match_count;
end;
@@ -96,6 +98,7 @@ const index = await VectorStoreIndex.fromDocuments(documents, {
```ts
const queryEngine = index.asQueryEngine();
// Basic query without filters
const response = await queryEngine.query({
query: "What is in the document?",
});
@@ -104,6 +107,32 @@ const response = await queryEngine.query({
console.log(response.toString());
```
## Query with filters
You can filter documents based on metadata when querying:
```ts
import { FilterOperator, MetadataFilters } from "llamaindex";
// Create a filter for documents with author = "Jane Smith"
const filters: MetadataFilters = {
filters: [
{
key: "author",
value: "Jane Smith",
operator: FilterOperator.EQ,
},
],
};
// Query with filters
const filteredResponse = await vectorStore.query({
queryEmbedding: embedModel.getQueryEmbedding("What is vector search?"),
similarityTopK: 5,
filters,
});
```
## Full code
```ts
@@ -11,58 +11,130 @@ npm i llamaindex @llamaindex/google
## Usage
```ts
import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
import { gemini, GEMINI_MODEL } from "@llamaindex/google";
import { Settings } from "llamaindex";
Settings.llm = new Gemini({
model: GEMINI_MODEL.GEMINI_PRO,
});
```
## Usage with Proxy
```ts
import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
import { Settings } from "llamaindex";
Settings.llm = new Gemini({
model: GEMINI_MODEL.GEMINI_PRO,
requestOptions: {
baseUrl: <YOUR_PROXY_URL> // optional, but useful for custom endpoints
}
Settings.llm = gemini({
model: GEMINI_MODEL.GEMINI_2_0_FLASH,
});
```
### Usage with Vertex AI
To use Gemini via Vertex AI you can use `GeminiVertexSession`.
GeminiVertexSession accepts the env variables: `GOOGLE_VERTEX_LOCATION` and `GOOGLE_VERTEX_PROJECT`
To use Gemini via Vertex AI, you can specify the vertex configuration:
```ts
import { Gemini, GEMINI_MODEL, GeminiVertexSession } from "@llamaindex/google";
import { gemini, GEMINI_MODEL } from "@llamaindex/google";
const gemini = new Gemini({
model: GEMINI_MODEL.GEMINI_PRO,
session: new GeminiVertexSession({
location: "us-central1", // optional if provided by GOOGLE_VERTEX_LOCATION env variable
project: "project1", // optional if provided by GOOGLE_VERTEX_PROJECT env variable
googleAuthOptions: {...}, // optional, but useful for production. It accepts all values from `GoogleAuthOptions`
}),
const llm = gemini({
model: GEMINI_MODEL.GEMINI_2_0_FLASH,
vertex: {
project: "your-cloud-project", // required for Vertex AI
location: "us-central1", // required for Vertex AI
},
});
```
[GoogleAuthOptions](https://github.com/googleapis/google-auth-library-nodejs/blob/main/src/auth/googleauth.ts)
To authenticate for local development:
```bash
npm i @google-cloud/vertexai
gcloud auth application-default login
```
To authenticate for production you'll have to use a [service account](https://cloud.google.com/docs/authentication/). `googleAuthOptions` has `credentials` which might be useful for you.
## Multimodal Usage
Gemini supports multimodal inputs including text, images, audio, and video:
```ts
import { gemini, GEMINI_MODEL } from "@llamaindex/google";
import fs from "fs";
const llm = gemini({ model: GEMINI_MODEL.GEMINI_2_0_FLASH });
const result = await llm.chat({
messages: [
{
role: "user",
content: [
{
type: "text",
text: "What's in this image?",
},
{
type: "image",
data: fs.readFileSync("./image.jpg").toString("base64"),
mimeType: "image/jpeg",
},
],
},
],
});
```
## Tool Calling
Gemini supports function calling with tools:
```ts
import { gemini, GEMINI_MODEL } from "@llamaindex/google";
import { tool } from "llamaindex";
import { z } from "zod";
const llm = gemini({ model: GEMINI_MODEL.GEMINI_2_0_FLASH });
const result = await llm.chat({
messages: [
{
content: "What's the weather in Tokyo?",
role: "user",
},
],
tools: [
tool({
name: "weather",
description: "Get the weather",
parameters: z.object({
location: z.string().describe("The location to get the weather for"),
}),
execute: ({ location }) => {
return `The weather in ${location} is sunny and hot`;
},
}),
],
});
```
## Live API (Real-time Conversations)
For real-time audio/video conversations using [Gemini Live API](https://ai.google.dev/gemini-api/docs/live).
The Live API is running directly in the frontend. That's why you have to generate an ephemeral key first on the server side and pass it to the frontend.
To use the Live API, make sure to pass `apiVersion: "v1alpha"` to the `httpOptions`.
```ts
import { gemini, GEMINI_MODEL } from "@llamaindex/google";
// Server-side: Generate ephemeral key
const serverLlm = gemini({
model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
httpOptions: { apiVersion: "v1alpha" },
});
const ephemeralKey = await serverLlm.live.getEphemeralKey();
// Client-side: Use ephemeral key for Live API
const llm = gemini({
apiKey: ephemeralKey,
model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
voiceName: "Zephyr",
httpOptions: { apiVersion: "v1alpha" },
});
const session = await llm.live.connect();
```
## Load and index documents
For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
@@ -90,11 +162,11 @@ const results = await queryEngine.query({
## Full Example
```ts
import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
import { gemini, GEMINI_MODEL } from "@llamaindex/google";
import { Document, VectorStoreIndex, Settings } from "llamaindex";
Settings.llm = new Gemini({
model: GEMINI_MODEL.GEMINI_PRO,
Settings.llm = gemini({
model: GEMINI_MODEL.GEMINI_2_0_FLASH,
});
async function main() {
@@ -104,9 +176,7 @@ async function main() {
const index = await VectorStoreIndex.fromDocuments([document]);
// Create a query engine
const queryEngine = index.asQueryEngine({
retriever,
});
const queryEngine = index.asQueryEngine();
const query = "What is the meaning of life?";
@@ -378,3 +378,186 @@ async function main() {
## API Reference
- [OpenAI](/docs/api/classes/OpenAI)
# OpenAI Live LLM
The OpenAI Live LLM integration in LlamaIndex provides real-time chat capabilities with support for audio streaming and tool calling.
## Basic Usage
```typescript
import { openai } from "@llamaindex/openai";
import { tool, ModalityType } from "llamaindex";
// Get the ephimeral key on the server
const serverllm = openai({
apiKey: "your-api-key",
model: "gpt-4o-realtime-preview-2025-06-03",
});
// Get an ephemeral key
// Usually this code is run on the server and the ephemeral key is passed to the
// client - the ephemeral key can be securely used on the client side
const ephemeralKey = await serverllm.live.getEphemeralKey();
// Create a client-side LLM instance with the ephemeral key
const llm = openai({
apiKey: ephemeralKey,
model: "gpt-4o-realtime-preview-2025-06-03"
});
// Create a live sessionimport { tool } from "llamaindex";
const session = await llm.live.connect({
systemInstruction: "You are a helpful assistant.",
});
// Send a message
session.sendMessage({
content: "Hello!",
role: "user",
});
```
## Tool Integration
Tools are handled server-side, making it simple to pass them to the live session:
```typescript
// Define your tools
const weatherTool = tool({
name: "weather",
description: "Get the weather for a location",
parameters: z.object({
location: z.string().describe("The location to get weather for"),
}),
execute: async ({ location }) => {
return `The weather in ${location} is sunny`;
},
});
// Create session with tools
const session = await llm.live.connect({
systemInstruction: "You are a helpful assistant.",
tools: [weatherTool],
});
```
## Audio Support
For audio capabilities:
```typescript
// Get microphone access
const userStream = await navigator.mediaDevices.getUserMedia({
audio: true,
});
// Create session with audio
const session = await llm.live.connect({
audioConfig: {
stream: userStream,
onTrack: (remoteStream) => {
// Handle incoming audio
audioElement.srcObject = remoteStream;
},
},
});
```
## Event Handling
Listen to events from the session:
```typescript
for await (const event of session.streamEvents()) {
if (liveEvents.open.include(event)) {
// Connection established
console.log("Connected!");
} else if (liveEvents.text.include(event)) {
// Received text response
console.log("Assistant:", event.text);
}
}
```
## Capabilities
The OpenAI Live LLM supports:
- Real-time text chat
- Audio streaming (if configured)
- Tool calling (server-side execution)
- Ephemeral key generation for secure sessions
## API Reference
### LiveLLM Methods
// Get an ephemeral key
// Usually this code is run on the server and the ephemeral key is passed to the
// client - the ephemeral key can be securely used on the client side
#### `connect(config?: LiveConnectConfig)`
Creates a new live session.
```typescript
interface LiveConnectConfig {
systemInstruction?: string;
tools?: BaseTool[];
audioConfig?: AudioConfig;
responseModality?: ModalityType[];
}
```
#### `getEphemeralKey()`
Gets a temporary key for the session.
### LiveLLMSession Methods
#### `sendMessage(message: ChatMessage)`
Sends a message to the assistant.
```typescript
interface ChatMessage {
content: string | MessageContentDetail[];
role: "user" | "assistant";
}
```
#### `disconnect()`
Closes the session and cleans up resources.
## Error Handling
```typescript
try {
const session = await llm.live.connect();
} catch (error) {
if (error instanceof Error) {
console.error("Connection failed:", error.message);
}
}
```
## Best Practices
1. **Tool Definition**
- Keep tool implementations server-side
- Use clear descriptions for tools
- Handle tool errors gracefully
2. **Session Management**
- Always disconnect sessions when done
- Clean up audio resources
- Handle reconnection scenarios
3. **Security**
- Use ephemeral keys for sessions
- Validate tool inputs
- Secure API key handling
@@ -11,6 +11,7 @@ A retriever in LlamaIndex is what is used to fetch `Node`s from an index using a
- [KeywordTableLLMRetriever](/docs/api/classes/KeywordTableLLMRetriever) uses an LLM to extract keywords from the query and retrieve relevant nodes based on keyword matches.
- [KeywordTableSimpleRetriever](/docs/api/classes/KeywordTableSimpleRetriever) uses a basic frequency-based approach to extract keywords and retrieve nodes.
- [KeywordTableRAKERetriever](/docs/api/classes/KeywordTableRAKERetriever) uses the RAKE (Rapid Automatic Keyword Extraction) algorithm to extract keywords from the query, focusing on co-occurrence and context for keyword-based retrieval.
- [Bm25Retriever](/docs/api/classes/Bm25Retriever) uses the BM25 algorithm to extract keywords from the query and retrieve relevant nodes based on keyword matches.
```typescript
const retriever = vectorIndex.asRetriever({
+1 -1
View File
@@ -1,3 +1,3 @@
{
"pages": ["llamaindex", "api", "llamaflow", "chat-ui"]
"pages": ["llamaindex", "api", "workflows", "chat-ui"]
}
+1 -1
View File
@@ -4,7 +4,7 @@
"tasks": {
"build": {
"inputs": [
"node_modules/@llama-flow/docs/**",
"node_modules/@llamaindex/workflow-docs/**",
"node_modules/@llamaindex/chat-ui-docs/**",
"src/**/*.ts",
"src/**/*.tsx",
@@ -1,5 +1,31 @@
# @llamaindex/cloudflare-worker-agent-test
## 0.0.173
### Patch Changes
- Updated dependencies [515a8b9]
- llamaindex@0.11.12
## 0.0.172
### Patch Changes
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
## 0.0.171
### Patch Changes
- llamaindex@0.11.10
## 0.0.170
### Patch Changes
- llamaindex@0.11.9
## 0.0.169
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/cloudflare-worker-agent-test",
"version": "0.0.169",
"version": "0.0.173",
"type": "module",
"private": true,
"scripts": {
@@ -1,5 +1,23 @@
# @llamaindex/llama-parse-browser-test
## 0.0.72
### Patch Changes
- @llamaindex/cloud@4.0.17
## 0.0.71
### Patch Changes
- @llamaindex/cloud@4.0.16
## 0.0.70
### Patch Changes
- @llamaindex/cloud@4.0.15
## 0.0.69
### Patch Changes
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/llama-parse-browser-test",
"private": true,
"version": "0.0.69",
"version": "0.0.72",
"type": "module",
"scripts": {
"dev": "vite",
+26
View File
@@ -1,5 +1,31 @@
# @llamaindex/next-agent-test
## 0.1.173
### Patch Changes
- Updated dependencies [515a8b9]
- llamaindex@0.11.12
## 0.1.172
### Patch Changes
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
## 0.1.171
### Patch Changes
- llamaindex@0.11.10
## 0.1.170
### Patch Changes
- llamaindex@0.11.9
## 0.1.169
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/next-agent-test",
"version": "0.1.169",
"version": "0.1.173",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,31 @@
# test-edge-runtime
## 0.1.172
### Patch Changes
- Updated dependencies [515a8b9]
- llamaindex@0.11.12
## 0.1.171
### Patch Changes
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
## 0.1.170
### Patch Changes
- llamaindex@0.11.10
## 0.1.169
### Patch Changes
- llamaindex@0.11.9
## 0.1.168
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/nextjs-edge-runtime-test",
"version": "0.1.168",
"version": "0.1.172",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,44 @@
# @llamaindex/next-node-runtime
## 0.1.41
### Patch Changes
- Updated dependencies [515a8b9]
- llamaindex@0.11.12
- @llamaindex/huggingface@0.1.17
- @llamaindex/readers@3.1.12
## 0.1.40
### Patch Changes
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
- @llamaindex/huggingface@0.1.16
- @llamaindex/readers@3.1.11
## 0.1.39
### Patch Changes
- llamaindex@0.11.10
## 0.1.38
### Patch Changes
- Updated dependencies [c5846bd]
- @llamaindex/readers@3.1.10
## 0.1.37
### Patch Changes
- llamaindex@0.11.9
- @llamaindex/huggingface@0.1.15
- @llamaindex/readers@3.1.9
## 0.1.36
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/next-node-runtime-test",
"version": "0.1.36",
"version": "0.1.41",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,31 @@
# vite-import-llamaindex
## 0.0.39
### Patch Changes
- Updated dependencies [515a8b9]
- llamaindex@0.11.12
## 0.0.38
### Patch Changes
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
## 0.0.37
### Patch Changes
- llamaindex@0.11.10
## 0.0.36
### Patch Changes
- llamaindex@0.11.9
## 0.0.35
### Patch Changes
@@ -1,7 +1,7 @@
{
"name": "vite-import-llamaindex",
"private": true,
"version": "0.0.35",
"version": "0.0.39",
"type": "module",
"scripts": {
"build": "vite build",
@@ -1 +1,9 @@
{"root":["./src/main.ts","./vite.config.ts"],"version":"5.7.3"}
{
"root": [
"./src/main.ts",
"./vite.config.ts",
"./tsconfig.json"
],
"errors": true,
"version": "5.7.3"
}
@@ -1,5 +1,31 @@
# @llamaindex/waku-query-engine-test
## 0.0.173
### Patch Changes
- Updated dependencies [515a8b9]
- llamaindex@0.11.12
## 0.0.172
### Patch Changes
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
## 0.0.171
### Patch Changes
- llamaindex@0.11.10
## 0.0.170
### Patch Changes
- llamaindex@0.11.9
## 0.0.169
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/waku-query-engine-test",
"version": "0.0.169",
"version": "0.0.173",
"type": "module",
"private": true,
"scripts": {
+1 -1
View File
@@ -10,7 +10,7 @@ import { mockLLMEvent } from "./utils.js";
let llm: LLM;
beforeEach(async () => {
Settings.llm = new Anthropic({
model: "claude-3-opus",
model: "claude-3.5-sonnet",
});
llm = Settings.llm;
});
+1 -1
View File
@@ -7,7 +7,7 @@
"dependencies": {
"@llamaindex/workflow": "1.1.1",
"llamaindex": "0.10.5",
"zod": "^3.23.8"
"zod": "^3.25.67"
},
"devDependencies": {
"tsx": "^4.19.1",
+1 -1
View File
@@ -27,6 +27,6 @@
"pg": "^8.12.0",
"pgvector": "0.2.0",
"tsx": "^4.19.3",
"zod": "^3.24.2"
"zod": "^3.25.67"
}
}
+171
View File
@@ -1,5 +1,176 @@
# examples
## 0.3.26
### Patch Changes
- Updated dependencies [d578889]
- Updated dependencies [0fcc92f]
- Updated dependencies [515a8b9]
- Updated dependencies [3cd8a57]
- Updated dependencies [f2dfd30]
- @llamaindex/core@0.6.13
- llamaindex@0.11.12
- @llamaindex/tools@0.1.3
- @llamaindex/bm25-retriever@0.0.2
- @llamaindex/cloud@4.0.17
- @llamaindex/node-parser@2.0.13
- @llamaindex/anthropic@0.3.15
- @llamaindex/assemblyai@0.1.12
- @llamaindex/clip@0.0.63
- @llamaindex/cohere@0.0.27
- @llamaindex/deepinfra@0.0.63
- @llamaindex/discord@0.1.12
- @llamaindex/google@0.3.12
- @llamaindex/huggingface@0.1.17
- @llamaindex/jinaai@0.0.23
- @llamaindex/mistral@0.1.13
- @llamaindex/mixedbread@0.0.27
- @llamaindex/notion@0.1.12
- @llamaindex/ollama@0.1.13
- @llamaindex/openai@0.4.7
- @llamaindex/perplexity@0.0.20
- @llamaindex/portkey-ai@0.0.55
- @llamaindex/replicate@0.0.55
- @llamaindex/astra@0.0.27
- @llamaindex/azure@0.1.24
- @llamaindex/chroma@0.0.27
- @llamaindex/elastic-search@0.1.13
- @llamaindex/firestore@1.0.20
- @llamaindex/milvus@0.1.22
- @llamaindex/mongodb@0.0.28
- @llamaindex/pinecone@0.1.13
- @llamaindex/postgres@0.0.56
- @llamaindex/qdrant@0.1.23
- @llamaindex/supabase@0.1.13
- @llamaindex/upstash@0.0.27
- @llamaindex/weaviate@0.0.28
- @llamaindex/vercel@0.1.13
- @llamaindex/voyage-ai@1.0.19
- @llamaindex/readers@3.1.12
- @llamaindex/workflow@1.1.13
- @llamaindex/deepseek@0.0.23
- @llamaindex/fireworks@0.0.23
- @llamaindex/groq@0.0.78
- @llamaindex/together@0.0.23
- @llamaindex/vllm@0.0.49
- @llamaindex/xai@0.0.10
## 0.3.25
### Patch Changes
- Updated dependencies [7039e1a]
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
- @llamaindex/core@0.6.12
- @llamaindex/google@0.3.11
- @llamaindex/cloud@4.0.16
- @llamaindex/node-parser@2.0.12
- @llamaindex/anthropic@0.3.14
- @llamaindex/assemblyai@0.1.11
- @llamaindex/clip@0.0.62
- @llamaindex/cohere@0.0.26
- @llamaindex/deepinfra@0.0.62
- @llamaindex/discord@0.1.11
- @llamaindex/huggingface@0.1.16
- @llamaindex/jinaai@0.0.22
- @llamaindex/mistral@0.1.12
- @llamaindex/mixedbread@0.0.26
- @llamaindex/notion@0.1.11
- @llamaindex/ollama@0.1.12
- @llamaindex/openai@0.4.6
- @llamaindex/perplexity@0.0.19
- @llamaindex/portkey-ai@0.0.54
- @llamaindex/replicate@0.0.54
- @llamaindex/astra@0.0.26
- @llamaindex/azure@0.1.23
- @llamaindex/chroma@0.0.26
- @llamaindex/elastic-search@0.1.12
- @llamaindex/firestore@1.0.19
- @llamaindex/milvus@0.1.21
- @llamaindex/mongodb@0.0.27
- @llamaindex/pinecone@0.1.12
- @llamaindex/postgres@0.0.55
- @llamaindex/qdrant@0.1.22
- @llamaindex/supabase@0.1.12
- @llamaindex/upstash@0.0.26
- @llamaindex/weaviate@0.0.27
- @llamaindex/vercel@0.1.12
- @llamaindex/voyage-ai@1.0.18
- @llamaindex/readers@3.1.11
- @llamaindex/tools@0.1.1
- @llamaindex/workflow@1.1.12
- @llamaindex/deepseek@0.0.22
- @llamaindex/fireworks@0.0.22
- @llamaindex/groq@0.0.77
- @llamaindex/together@0.0.22
- @llamaindex/vllm@0.0.48
- @llamaindex/xai@0.0.9
## 0.3.24
### Patch Changes
- Updated dependencies [096bf2b]
- Updated dependencies [c5846bd]
- @llamaindex/tools@0.1.0
- @llamaindex/readers@3.1.10
## 0.3.23
### Patch Changes
- Updated dependencies [a89e187]
- Updated dependencies [62699b7]
- Updated dependencies [c5b2691]
- Updated dependencies [d8ac8d3]
- @llamaindex/core@0.6.11
- @llamaindex/google@0.3.10
- @llamaindex/openai@0.4.5
- @llamaindex/cloud@4.0.15
- llamaindex@0.11.9
- @llamaindex/node-parser@2.0.11
- @llamaindex/anthropic@0.3.13
- @llamaindex/assemblyai@0.1.10
- @llamaindex/clip@0.0.61
- @llamaindex/cohere@0.0.25
- @llamaindex/deepinfra@0.0.61
- @llamaindex/discord@0.1.10
- @llamaindex/huggingface@0.1.15
- @llamaindex/jinaai@0.0.21
- @llamaindex/mistral@0.1.11
- @llamaindex/mixedbread@0.0.25
- @llamaindex/notion@0.1.10
- @llamaindex/ollama@0.1.11
- @llamaindex/perplexity@0.0.18
- @llamaindex/portkey-ai@0.0.53
- @llamaindex/replicate@0.0.53
- @llamaindex/astra@0.0.25
- @llamaindex/azure@0.1.22
- @llamaindex/chroma@0.0.25
- @llamaindex/elastic-search@0.1.11
- @llamaindex/firestore@1.0.18
- @llamaindex/milvus@0.1.20
- @llamaindex/mongodb@0.0.26
- @llamaindex/pinecone@0.1.11
- @llamaindex/postgres@0.0.54
- @llamaindex/qdrant@0.1.21
- @llamaindex/supabase@0.1.10
- @llamaindex/upstash@0.0.25
- @llamaindex/weaviate@0.0.26
- @llamaindex/vercel@0.1.11
- @llamaindex/voyage-ai@1.0.17
- @llamaindex/readers@3.1.9
- @llamaindex/tools@0.0.17
- @llamaindex/workflow@1.1.10
- @llamaindex/deepseek@0.0.21
- @llamaindex/fireworks@0.0.21
- @llamaindex/groq@0.0.76
- @llamaindex/together@0.0.21
- @llamaindex/vllm@0.0.47
- @llamaindex/xai@0.0.8
## 0.3.22
### Patch Changes
+1 -1
View File
@@ -1,4 +1,3 @@
import { tool } from "@llamaindex/core/tools";
import { openai } from "@llamaindex/openai";
import {
agent,
@@ -7,6 +6,7 @@ import {
multiAgent,
} from "@llamaindex/workflow";
import fs from "fs";
import { tool } from "llamaindex";
import os from "os";
import { z } from "zod";
+12 -3
View File
@@ -6,15 +6,24 @@ async function main() {
// Create an MCP server for filesystem tools
const server = mcp({
command: "npx",
args: ["-y", "@modelcontextprotocol/server-filesystem", "."],
args: ["-y", "@modelcontextprotocol/server-filesystem@latest", "."],
verbose: true,
});
// You can also connect to the MCP server using SSE
// See: https://modelcontextprotocol.io/docs/concepts/transports#server-sent-events-sse
//
// You can also connect to a remote MCP server using:
// 1. StreamableHTTP transport (recommended)
// See: https://modelcontextprotocol.io/docs/concepts/transports#streamable-http
// const server = mcp({
// url: "http://localhost:8000/mcp",
// verbose: true,
// });
// 2.Or using SSE transport (will be deprecated soon)
// See: https://modelcontextprotocol.io/docs/concepts/transports#server-sent-events-sse-deprecated
// const server = mcp({
// url: "http://localhost:8000/mcp",
// useSSETransport: true,
// verbose: true,
// });
try {
// Create an agent that uses the MCP tools
+36
View File
@@ -0,0 +1,36 @@
import { openai } from "@llamaindex/openai";
import { agent } from "@llamaindex/workflow";
import { createMemory, staticBlock } from "llamaindex";
// Simple example: Agent with Predefined Memory
async function simpleAgentMemoryExample() {
console.log("=== Simple Agent Memory Example ===");
const memory = createMemory({
memoryBlocks: [
staticBlock({
content:
"The user is a software engineer who loves TypeScript and LlamaIndex.",
}),
],
});
// Create agent workflow
const workflow = agent({
name: "assistant",
llm: openai({ model: "gpt-4.1-nano" }),
memory,
});
// Test - agent should remember John and the shopping cart context
console.log("\n--- Testing Memory Context ---");
const result = await workflow.run("Hi, my name is John. Do you know me?");
console.log("Assistant Response:", result.data.result);
const result2 = await workflow.run("What is my name?");
console.log("Assistant Response:", result2.data.result);
}
// Run the example
simpleAgentMemoryExample().catch(console.error);
+58
View File
@@ -0,0 +1,58 @@
import { openai } from "@llamaindex/openai";
import { createMemory } from "llamaindex";
// Example: Basic Memory Usage with Factory
async function basicMemoryExample() {
console.log("\n=== Example: Basic Memory Usage with Factory ===");
const memory = createMemory({ tokenLimit: 30 });
// Add messages to memory
await memory.add({
role: "user",
content: "Hi, my name is John and I'm a software engineer.",
});
await memory.add({
role: "assistant",
content: "Hello John! Nice to meet you. How can I help you today?",
});
await memory.add({
role: "user",
content: "I love working with TypeScript and React.",
});
// Not all messages are included because of token limit is set to 30
const llmMessages = await memory.getLLM();
console.log(
`\nLLM messages (${llmMessages.length} messages) limited by a small token limit:`,
);
llmMessages.forEach((msg, idx) => {
console.log(`${idx + 1}. ${msg.role}: ${msg.content}`);
});
// But the token limit above will be the window size of an LLM instance if you use getLLM with LLM
const llm = openai({ model: "gpt-4.1-mini" });
const llmMessagesWithLLM = await memory.getLLM(llm);
// Now all the messages are included because of the LLM window size of the model is much larger
console.log(
`\nLLM messages with LLM (${llmMessagesWithLLM.length} messages) limited by LLM window size:`,
);
llmMessagesWithLLM.forEach((msg, idx) => {
console.log(`${idx + 1}. ${msg.role}: ${msg.content}`);
});
}
// Main function
async function main() {
console.log("🧠 Basic Memory Factory Examples");
console.log("===============================");
try {
await basicMemoryExample();
} catch (error) {
console.error("Error running basic memory examples:", error);
}
}
main().catch(console.error);
+101
View File
@@ -0,0 +1,101 @@
import { openai } from "@llamaindex/openai";
import { createMemory, factExtractionBlock } from "llamaindex";
// Configure OpenAI
const llm = openai({ model: "gpt-4.1-mini" });
// Example: Memory with Fact Extraction
async function factExtractionMemoryExample() {
console.log("\n=== Memory with Fact Extraction ===");
// Create memory with a fact extraction
const memory = createMemory([], {
tokenLimit: 100,
shortTermTokenLimitRatio: 0.7, // 70% for short-term, 30% for long-term
memoryBlocks: [
factExtractionBlock({
id: "user-facts",
priority: 5,
llm: llm,
maxFacts: 10,
isLongTerm: true,
}),
],
});
// Simulate a conversation with facts
const conversationTurns = [
{
role: "user",
content: "Hi, I'm Sarah and I work as a data scientist at Google.",
},
{
role: "assistant",
content:
"Hello Sarah! It's great to meet you. Data science at Google must be exciting!",
},
{
role: "user",
content:
"Yes, I specialize in machine learning and natural language processing.",
},
{
role: "assistant",
content: "That's impressive! ML and NLP are fascinating fields.",
},
{
role: "user",
content:
"I have a PhD in Computer Science from Stanford, and I love hiking on weekends.",
},
{
role: "assistant",
content:
"Wow, Stanford PhD! And hiking is a great way to unwind from tech work.",
},
{
role: "user",
content: "I also have two cats named Whiskers and Mittens.",
},
{
role: "assistant",
content:
"Cats make wonderful companions! Whiskers and Mittens are cute names.",
},
];
// Add conversation turns to memory
console.log("Adding conversation to memory...");
for (const turn of conversationTurns) {
await memory.add(turn);
}
// Get messages - facts should be extracted and included
const messages = await memory.getLLM(llm);
console.log("\nMessages with extracted facts:");
messages.forEach((msg, idx) => {
console.log(`${idx + 1}. ${msg.role ?? "unknown"}: ${msg.content}`);
});
//Messages with extracted facts:
// 1. assistant: Cats make wonderful companions! Whiskers and Mittens are cute names.
// 2. user: I also have two cats named Whiskers and Mittens.
// 3. assistant: Wow, Stanford PhD! And hiking is a great way to unwind from tech work.
// 4. memory: Sarah works as a data scientist at Google
// Sarah specializes in machine learning and natural language processing
// Sarah has a PhD in Computer Science from Stanford
// Sarah enjoys hiking on weekends
}
// Main function
async function main() {
console.log("🧠 Fact Extraction Memory Example");
console.log("=================================");
try {
await factExtractionMemoryExample();
} catch (error) {
console.error("Error running fact extraction memory example:", error);
}
}
main().catch(console.error);
+62
View File
@@ -0,0 +1,62 @@
import { openai } from "@llamaindex/openai";
import { createMemory, staticBlock } from "llamaindex";
// Configure OpenAI
const llm = openai({ model: "gpt-4.1-mini" });
// Example: Memory with Static Blocks
async function staticMemoryBlockExample() {
console.log("\n=== Memory with Static Blocks ===");
console.log("- Memory always include static block");
console.log("- Memory cut off the messages within token limit\n");
// Create memory with a static block
const memory = createMemory([], {
tokenLimit: 30, // A small token limit which is not enough for the whole conversation below
memoryBlocks: [
staticBlock({
content:
"The user's name is John and he is a software engineer who loves TypeScript and LlamaIndex.",
}),
],
});
// Add some messages to the memory
await memory.add({
role: "user",
content: "What do you know about me?",
});
await memory.add({
role: "assistant",
content:
"Based on our conversation, I know you're John, a software engineer who enjoys working with TypeScript and LlamaIndex!",
});
await memory.add({
role: "user",
content: "Which language does LlamaIndex support?",
});
// Get messages
// static block will always be included
// only the last message will be included because of token limit set above
const messages = await memory.getLLM(llm);
messages.forEach((msg, idx) => {
console.log(`${idx + 1}. ${msg.role}: ${msg.content}`);
});
// Messages with static block:
// 1. user: The user's name is John and he is a software engineer who loves TypeScript and LlamaIndex.
// 2. user: Which language does LlamaIndex support?
}
// Main function
async function main() {
try {
await staticMemoryBlockExample();
} catch (error) {
console.error("Error running static memory blocks example:", error);
}
}
main().catch(console.error);
+1 -1
View File
@@ -1,6 +1,6 @@
import { Settings } from "@llamaindex/core/global";
import { openai } from "@llamaindex/openai";
import { agentHandler, createWorkflow, zodEvent } from "@llamaindex/workflow";
import { Settings } from "llamaindex";
import { z } from "zod";
// Create LLM instance
+72
View File
@@ -0,0 +1,72 @@
<?xml version="1.0" encoding="UTF-8"?>
<company name="MidSizeCorp" founded="2008">
<division name="Engineering" head="Dana White">
<department name="Frontend" lead="Alex Kim">
<team name="Web">
<employee id="E01">
<name>Jordan Lee</name>
<role>Lead Developer</role>
<projects>
<project code="PRJ101" status="active">
<title>User Portal</title>
<deadline>2025-08-01</deadline>
<tasks>
<task id="T1011">
<description>Implement login page</description>
<due>2025-05-10</due>
</task>
<task id="T1012">
<description>Design dashboard</description>
<due>2025-05-20</due>
</task>
</tasks>
</project>
</projects>
</employee>
<employee id="E02">
<name>Riley Chen</name>
<role>UI Designer</role>
</employee>
</team>
<team name="Mobile">
<employee id="E03">
<name>Sam Patel</name>
<role>iOS Developer</role>
</employee>
</team>
</department>
<department name="Backend" lead="Morgan Reed">
<team name="API">
<employee id="E04">
<name>Taylor Jones</name>
<role>API Engineer</role>
</employee>
</team>
<team name="Database">
<employee id="E05">
<name>Casey Nguyen</name>
<role>DB Administrator</role>
</employee>
</team>
</department>
</division>
<division name="Marketing" head="Pat Morgan">
<department name="Digital" lead="Alex Rivera">
<team name="Content">
<employee id="M01">
<name>Charlie Brooks</name>
<role>Content Strategist</role>
</employee>
</team>
</department>
</division>
<headquarters location="Chicago, USA">
<address>
<street>789 Lake Shore Drive</street>
<city>Chicago</city>
<zip>60601</zip>
</address>
</headquarters>
</company>
Binary file not shown.
+1 -1
View File
@@ -59,7 +59,7 @@ async function main() {
const anthropic = new Anthropic({
apiKey: process.env.ANTHROPIC_API_KEY,
model: "claude-3-opus",
model: "claude-3.5-sonnet",
});
// Create an ReActAgent with the function tools
@@ -61,7 +61,7 @@ async function main() {
// Create an OpenAIAgent with the function tools
const agent = new ReActAgent({
llm: new Anthropic({
model: "claude-3-opus",
model: "claude-3.5-sonnet",
}),
tools: [functionTool, functionTool2],
});
@@ -1,5 +1,5 @@
import { Anthropic } from "@llamaindex/anthropic";
import { ChatMemoryBuffer, SimpleChatEngine } from "llamaindex";
import { createMemory, SimpleChatEngine } from "llamaindex";
import { stdin as input, stdout as output } from "node:process";
import readline from "node:readline/promises";
@@ -9,14 +9,12 @@ import readline from "node:readline/promises";
model: "claude-3-7-sonnet",
});
// chatHistory will store all the messages in the conversation
const chatHistory = new ChatMemoryBuffer({
chatHistory: [
{
content: "You want to talk in rhymes.",
role: "system",
},
],
});
const chatHistory = createMemory([
{
content: "You want to talk in rhymes.",
role: "system",
},
]);
const chatEngine = new SimpleChatEngine({
llm,
memory: chatHistory,
+56 -8
View File
@@ -1,14 +1,16 @@
import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
import { gemini, GEMINI_MODEL } from "@llamaindex/google";
import fs from "fs";
import { tool } from "llamaindex";
import { z } from "zod";
(async () => {
if (!process.env.GOOGLE_API_KEY) {
throw new Error("Please set the GOOGLE_API_KEY environment variable.");
}
const gemini = new Gemini({
model: GEMINI_MODEL.GEMINI_PRO_1_5,
});
const result = await gemini.chat({
const llm = gemini({ model: GEMINI_MODEL.GEMINI_2_0_FLASH });
// normal chat
const result = await llm.chat({
messages: [
{ content: "You want to talk in rhymes.", role: "system" },
{
@@ -18,10 +20,10 @@ import fs from "fs";
},
],
});
console.log(result);
console.log("\n normal chat: \n", result);
// chat with file
const resultWithFile = await gemini.chat({
const resultWithFile = await llm.chat({
messages: [
{
role: "user",
@@ -39,6 +41,52 @@ import fs from "fs";
},
],
});
console.log("\n chat with file: \n", resultWithFile);
console.log(resultWithFile);
// chat with image base64
const resultWithImageFile = await llm.chat({
messages: [
{
role: "user",
content: [
{
type: "text",
text: "What's in this image?",
},
{
type: "image",
data: fs
.readFileSync("./multimodal/data/60.jpg")
.toString("base64"),
mimeType: "image/png",
},
],
},
],
});
console.log("\n chat with image base64: \n", resultWithImageFile);
// chat with tool
const resultWithTool = await llm.chat({
messages: [
{
content: "What's the weather in Tokyo?",
role: "user",
},
],
tools: [
tool({
name: "weather",
description: "Get the weather",
parameters: z.object({
location: z.string().describe("The location to get the weather for"),
}),
execute: ({ location }) => {
console.log("weather", location);
return `The weather in ${location} is sunny and hot`;
},
}),
],
});
console.log("\n chat with tool: \n", resultWithTool.message.options); // should have toolCall
})();
+8 -5
View File
@@ -1,11 +1,14 @@
import { Gemini, GEMINI_MODEL, GeminiVertexSession } from "@llamaindex/google";
import { gemini, GEMINI_MODEL } from "@llamaindex/google";
(async () => {
const gemini = new Gemini({
model: GEMINI_MODEL.GEMINI_PRO,
session: new GeminiVertexSession(),
const llm = gemini({
model: GEMINI_MODEL.GEMINI_2_0_FLASH,
vertex: {
project: "your-cloud-project", // update to your cloud project
location: "us-central1",
},
});
const result = await gemini.chat({
const result = await llm.chat({
messages: [
{ content: "You want to talk in rhymes.", role: "system" },
{
+10
View File
@@ -16,9 +16,19 @@ async function main() {
console.log("🚀 Initializing Gemini Live API example...");
// Server-side (token creation):
const serverllm = gemini({
model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
httpOptions: { apiVersion: "v1alpha" }, // must use v1alpha to generate ephemeral key
});
const ephemeralKey = await serverllm.live.getEphemeralKey();
// Client-side (Live API connection):
const llm = gemini({
apiKey: ephemeralKey, // use ephemeral key for client-side
model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
voiceName: "Zephyr",
httpOptions: { apiVersion: "v1alpha" }, // must use v1alpha to init client with ephemeral key
});
console.log("📡 Connecting to Gemini Live session...");
+12 -5
View File
@@ -3,8 +3,18 @@ import { liveEvents } from "llamaindex";
import { saveWavFile } from "./util";
async function main() {
const llm = gemini({
// Server-side (token creation):
const serverllm = gemini({
model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
httpOptions: { apiVersion: "v1alpha" }, // must use v1alpha to generate ephemeral key
});
const ephemeralKey = await serverllm.live.getEphemeralKey();
// Client-side (Live API connection):
const llm = gemini({
apiKey: ephemeralKey, // use ephemeral key for client-side
model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
httpOptions: { apiVersion: "v1alpha" }, // must use v1alpha to init client with ephemeral key
});
const session = await llm.live.connect();
@@ -23,10 +33,7 @@ async function main() {
content: "Say something about you for 10 seconds",
role: "user",
});
} else if (
liveEvents.audio.include(event) &&
typeof event.data === "string"
) {
} else if (liveEvents.audio.include(event)) {
const chunk = Buffer.from(event.data, "base64");
audioChunks.push(chunk);
console.log(`Received audio chunk: ${chunk.length} bytes`);
+1 -2
View File
@@ -1,6 +1,5 @@
import { ModalityType } from "@llamaindex/core/schema";
import { tool } from "@llamaindex/core/tools";
import { gemini, GEMINI_MODEL } from "@llamaindex/google";
import { ModalityType, tool } from "llamaindex";
import { liveEvents } from "llamaindex";
import { z } from "zod";
@@ -0,0 +1,24 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
dist
dist-ssr
*.local
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?
@@ -0,0 +1,54 @@
# OpenAI Realtime Chat with LlamaIndex
This is a demo application showcasing real-time audio and text chat capabilities using OpenAI's GPT-4 with voice through LlamaIndex. The application demonstrates bidirectional audio communication and text chat with an AI assistant.
## Features
- Real-time voice communication with GPT-4
- Text-based chat interface
- WebRTC-based audio streaming
- Bidirectional communication (both text and voice)
- React + TypeScript implementation
## Prerequisites
- Node.js (v18 or higher)
- OpenAI API key with access to GPT-4 voice models
- Modern browser with WebRTC support
## Getting Started
1. Install dependencies:
```bash
pnpm install
```
2. Start the development server:
```bash
pnpm run dev
```
## Usage
The application provides a simple interface where you can:
- Start/Stop a chat session
- Speak to the AI assistant through your microphone
- Receive audio responses from the assistant
- See text transcripts of the conversation
## Technical Details
This project uses:
- LlamaIndex for AI interaction management
- WebRTC for real-time audio streaming
- React for the UI
- Vite for development and building
- TypeScript for type safety
```
```
@@ -0,0 +1,28 @@
import js from "@eslint/js";
import reactHooks from "eslint-plugin-react-hooks";
import reactRefresh from "eslint-plugin-react-refresh";
import globals from "globals";
import tseslint from "typescript-eslint";
export default tseslint.config(
{ ignores: ["dist"] },
{
extends: [js.configs.recommended, ...tseslint.configs.recommended],
files: ["**/*.{ts,tsx}"],
languageOptions: {
ecmaVersion: 2020,
globals: globals.browser,
},
plugins: {
"react-hooks": reactHooks,
"react-refresh": reactRefresh,
},
rules: {
...reactHooks.configs.recommended.rules,
"react-refresh/only-export-components": [
"warn",
{ allowConstantExport: true },
],
},
},
);
@@ -0,0 +1,13 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Vite + React + TS</title>
</head>
<body>
<div id="root"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>
@@ -0,0 +1,29 @@
{
"name": "open-ai-realtime",
"private": true,
"version": "0.0.0",
"type": "module",
"scripts": {
"dev": "vite",
"build": "tsc -b && vite build",
"lint": "eslint .",
"preview": "vite preview"
},
"dependencies": {
"react": "^19.1.0",
"react-dom": "^19.1.0"
},
"devDependencies": {
"@eslint/js": "^9.25.0",
"@types/react": "^19.1.2",
"@types/react-dom": "^19.1.2",
"@vitejs/plugin-react": "^4.5.2",
"eslint": "^9.25.0",
"eslint-plugin-react-hooks": "^5.2.0",
"eslint-plugin-react-refresh": "^0.4.19",
"globals": "^16.0.0",
"typescript": "~5.8.3",
"typescript-eslint": "^8.30.1",
"vite": "^6.3.5"
}
}
@@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>

After

Width:  |  Height:  |  Size: 1.5 KiB

@@ -0,0 +1,183 @@
import { openai } from "@llamaindex/openai";
import { liveEvents, LiveLLMSession, ModalityType } from "llamaindex";
import { useEffect, useRef, useState } from "react";
const MicIcon = ({ isConnected }: { isConnected: boolean }) => (
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 24 24"
fill="currentColor"
strokeWidth="2"
strokeLinecap="round"
strokeLinejoin="round"
>
{isConnected ? (
<>
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z" />
<path d="M19 10v2a7 7 0 0 1-14 0v-2" />
<line x1="12" y1="19" x2="12" y2="23" />
<line x1="8" y1="23" x2="16" y2="23" />
</>
) : (
<>
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z" />
<path d="M19 10v2a7 7 0 0 1-14 0v-2" />
</>
)}
</svg>
);
const WaveAnimation = () => (
<div className="wave-animation">
{[...Array(3)].map((_, i) => (
<div key={i} className="wave" style={{ animationDelay: `${i * 0.2}s` }} />
))}
</div>
);
export const AudioChat = () => {
const [isConnected, setIsConnected] = useState(false);
const [messages, setMessages] = useState<
Array<{ role: string; content: string }>
>([]);
const [status, setStatus] = useState<string>("");
const audioRef = useRef<HTMLAudioElement>(null);
const sessionRef = useRef<LiveLLMSession | null>(null);
const [stream, setStream] = useState<MediaStream | null>(null);
const messagesEndRef = useRef<HTMLDivElement>(null);
const scrollToBottom = () => {
messagesEndRef.current?.scrollIntoView({ behavior: "smooth" });
};
useEffect(() => {
scrollToBottom();
}, [messages]);
useEffect(() => {
return () => {
if (stream) {
stream.getTracks().forEach((track) => track.stop());
}
};
}, [stream]);
const startChat = async () => {
try {
setStatus("Initializing microphone...");
const userStream = await navigator.mediaDevices.getUserMedia({
audio: true,
});
setStream(userStream);
setStatus("Connecting to AI...");
const apiKey = prompt("Please enter your OpenAI API key:");
if (!apiKey) {
throw new Error("API key is required");
}
// move this call to the server side for security reasons
// Do not store the API key in the frontend!
const serverllm = openai({
apiKey: apiKey,
model: "gpt-4o-realtime-preview-2025-06-03",
});
const tempKey = await serverllm.live.getEphemeralKey();
const llm = openai({
apiKey: tempKey,
model: "gpt-4o-realtime-preview-2025-06-03",
});
const session = await llm.live.connect({
systemInstruction: "You are a helpful assistant who speaks naturally.",
responseModality: [ModalityType.TEXT, ModalityType.AUDIO],
audioConfig: {
stream: userStream,
onTrack: (remoteStream) => {
if (audioRef.current && remoteStream) {
audioRef.current.srcObject = remoteStream;
audioRef.current.play().catch(console.error);
}
},
},
});
sessionRef.current = session;
setIsConnected(true);
setStatus("Connected! Listening...");
for await (const event of session.streamEvents()) {
if (liveEvents.open.include(event)) {
setMessages((prev) => [
...prev,
{
role: "user",
content: "Hello, I'm ready to chat!",
},
]);
session.sendMessage({
content: "Hello, I'm ready to chat!",
role: "user",
});
} else if (liveEvents.text.include(event)) {
setMessages((prev) => [
...prev,
{
role: "assistant",
content: event.text,
},
]);
}
}
} catch (error) {
console.error("Error starting chat:", error);
setStatus("Error connecting. Please try again.");
setIsConnected(false);
}
};
const stopChat = async () => {
setStatus("Disconnecting...");
if (sessionRef.current) {
await sessionRef.current.disconnect();
sessionRef.current = null;
}
if (stream) {
stream.getTracks().forEach((track) => track.stop());
setStream(null);
}
if (audioRef.current) {
audioRef.current.srcObject = null;
}
setIsConnected(false);
setStatus("");
};
return (
<div className="audio-chat-container">
<h1>AI Voice Chat</h1>
<div className="messages-container">
{messages.map((msg, idx) => (
<div key={idx} className={`message ${msg.role}`}>
{msg.content}
</div>
))}
<div ref={messagesEndRef} />
</div>
<div className="controls">
{status && <div className="status-indicator">{status}</div>}
<button
className={`mic-button ${isConnected ? "connected" : ""}`}
onClick={isConnected ? stopChat : startChat}
title={isConnected ? "Stop Chat" : "Start Chat"}
>
<MicIcon isConnected={isConnected} />
{isConnected && <WaveAnimation />}
</button>
<audio ref={audioRef} style={{ display: "none" }} />
</div>
</div>
);
};
@@ -0,0 +1,322 @@
:root {
--primary-color: #646cff;
--secondary-color: #535bf2;
--background-dark: #1a1a1a;
--chat-bg: #242424;
--text-primary: #ffffff;
--text-secondary: #888888;
--success-color: #4caf50;
--error-color: #f44336;
--gradient-start: #4776e6;
--gradient-end: #8e54e9;
}
body {
background-color: var(--background-dark);
color: var(--text-primary);
margin: 0;
min-height: 100vh;
font-family:
-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, Ubuntu,
Cantarell, "Open Sans", "Helvetica Neue", sans-serif;
}
#root {
max-width: 1280px;
height: 100vh;
margin: 0 auto;
padding: 2rem;
display: flex;
align-items: center;
justify-content: center;
}
.logo {
height: 6em;
padding: 1.5em;
will-change: filter;
transition: filter 300ms;
}
.logo:hover {
filter: drop-shadow(0 0 2em #646cffaa);
}
.logo.react:hover {
filter: drop-shadow(0 0 2em #61dafbaa);
}
@keyframes logo-spin {
from {
transform: rotate(0deg);
}
to {
transform: rotate(360deg);
}
}
@media (prefers-reduced-motion: no-preference) {
a:nth-of-type(2) .logo {
animation: logo-spin infinite 20s linear;
}
}
.card {
padding: 2em;
}
.read-the-docs {
color: #888;
}
.audio-chat-container {
display: flex;
flex-direction: column;
gap: 2rem;
width: 100%;
max-width: 800px;
height: 80vh;
margin: 0 auto;
padding: 2rem;
background: var(--chat-bg);
border-radius: 24px;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2);
position: relative;
overflow: hidden;
}
.audio-chat-container::before {
content: "";
position: absolute;
top: 0;
left: 0;
right: 0;
height: 4px;
background: linear-gradient(
to right,
var(--gradient-start),
var(--gradient-end)
);
}
.audio-chat-container h1 {
font-size: 2.5rem;
margin: 0;
background: linear-gradient(
to right,
var(--gradient-start),
var(--gradient-end)
);
-webkit-background-clip: text;
background-clip: text;
color: transparent;
text-align: center;
}
.messages-container {
display: flex;
flex-direction: column;
gap: 1rem;
flex: 1;
overflow-y: auto;
padding: 1rem;
border-radius: 16px;
background: rgba(255, 255, 255, 0.05);
backdrop-filter: blur(10px);
margin: 1rem 0;
}
.message {
padding: 1rem 1.5rem;
border-radius: 16px;
max-width: 80%;
text-align: left;
animation: messageSlide 0.3s ease-out;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}
@keyframes messageSlide {
from {
opacity: 0;
transform: translateY(20px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
.message.user {
background: linear-gradient(
135deg,
var(--gradient-start),
var(--gradient-end)
);
align-self: flex-end;
margin-left: 20%;
color: white;
}
.message.assistant {
background: rgba(255, 255, 255, 0.1);
align-self: flex-start;
margin-right: 20%;
border: 1px solid rgba(255, 255, 255, 0.1);
}
.controls {
display: flex;
justify-content: center;
align-items: center;
padding: 2rem;
position: relative;
}
.mic-button {
width: 80px;
height: 80px;
border-radius: 50%;
border: none;
background: linear-gradient(
135deg,
var(--gradient-start),
var(--gradient-end)
);
color: white;
cursor: pointer;
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
display: flex;
align-items: center;
justify-content: center;
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
position: relative;
overflow: hidden;
}
.mic-button::before {
content: "";
position: absolute;
top: 0;
left: 0;
right: 0;
bottom: 0;
background: linear-gradient(
135deg,
rgba(255, 255, 255, 0.1),
rgba(255, 255, 255, 0)
);
border-radius: 50%;
transition: transform 0.3s ease;
}
.mic-button:hover {
transform: scale(1.05);
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.3);
}
.mic-button:hover::before {
transform: translateY(-100%);
}
.mic-button.connected {
background: var(--error-color);
animation: pulseError 2s infinite;
}
.mic-button svg {
width: 32px;
height: 32px;
filter: drop-shadow(0 2px 4px rgba(0, 0, 0, 0.2));
transition: transform 0.3s ease;
}
.mic-button:hover svg {
transform: scale(1.1);
}
@keyframes pulseError {
0% {
box-shadow: 0 0 0 0 rgba(244, 67, 54, 0.4);
}
70% {
box-shadow: 0 0 0 20px rgba(244, 67, 54, 0);
}
100% {
box-shadow: 0 0 0 0 rgba(244, 67, 54, 0);
}
}
/* Status indicator */
.status-indicator {
position: absolute;
top: -30px;
left: 50%;
transform: translateX(-50%);
font-size: 0.9rem;
color: var(--text-secondary);
opacity: 0;
transition: opacity 0.3s ease;
}
.controls:hover .status-indicator {
opacity: 1;
}
/* Scrollbar styling */
.messages-container::-webkit-scrollbar {
width: 8px;
}
.messages-container::-webkit-scrollbar-track {
background: rgba(255, 255, 255, 0.05);
border-radius: 4px;
}
.messages-container::-webkit-scrollbar-thumb {
background: linear-gradient(var(--gradient-start), var(--gradient-end));
border-radius: 4px;
}
.messages-container::-webkit-scrollbar-thumb:hover {
background: linear-gradient(var(--gradient-end), var(--gradient-start));
}
/* Wave Animation */
.wave-animation {
position: absolute;
bottom: -15px;
left: 50%;
transform: translateX(-50%);
display: flex;
gap: 4px;
}
.wave {
width: 4px;
height: 15px;
background: currentColor;
border-radius: 2px;
animation: wave 0.5s ease-in-out infinite;
}
@keyframes wave {
0%,
100% {
transform: scaleY(0.5);
}
50% {
transform: scaleY(1.5);
}
}
/* Loading state */
.mic-button.loading {
animation: rotate 1s linear infinite;
}
@keyframes rotate {
from {
transform: rotate(0deg);
}
to {
transform: rotate(360deg);
}
}
@@ -0,0 +1,10 @@
import { StrictMode } from "react";
import { createRoot } from "react-dom/client";
import { AudioChat } from "./audio-chat.tsx";
import "./index.css";
createRoot(document.getElementById("root")!).render(
<StrictMode>
<AudioChat />
</StrictMode>,
);
@@ -0,0 +1 @@
/// <reference types="vite/client" />
@@ -0,0 +1,26 @@
{
"compilerOptions": {
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
"target": "ES2020",
"useDefineForClassFields": true,
"lib": ["ES2020", "DOM", "DOM.Iterable"],
"module": "ESNext",
"skipLibCheck": true,
/* Bundler mode */
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"verbatimModuleSyntax": true,
"moduleDetection": "force",
"noEmit": true,
"jsx": "react-jsx",
/* Linting */
"strict": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedSideEffectImports": true
},
"include": ["src"]
}
@@ -0,0 +1,4 @@
{
"files": [],
"references": [{ "path": "./tsconfig.app.json" }]
}
@@ -0,0 +1,7 @@
import react from "@vitejs/plugin-react";
import { defineConfig } from "vite";
// https://vite.dev/config/
export default defineConfig({
plugins: [react()],
});
+48 -47
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/examples",
"version": "0.3.22",
"version": "0.3.26",
"private": true,
"scripts": {
"lint": "eslint .",
@@ -11,51 +11,52 @@
"@azure/cosmos": "^4.1.1",
"@azure/identity": "^4.4.1",
"@azure/search-documents": "^12.1.0",
"@llamaindex/anthropic": "^0.3.12",
"@llamaindex/assemblyai": "^0.1.9",
"@llamaindex/astra": "^0.0.24",
"@llamaindex/azure": "^0.1.21",
"@llamaindex/chroma": "^0.0.24",
"@llamaindex/clip": "^0.0.60",
"@llamaindex/cloud": "^4.0.14",
"@llamaindex/cohere": "^0.0.24",
"@llamaindex/core": "^0.6.10",
"@llamaindex/deepinfra": "^0.0.60",
"@llamaindex/deepseek": "^0.0.20",
"@llamaindex/discord": "^0.1.9",
"@llamaindex/elastic-search": "^0.1.10",
"@llamaindex/anthropic": "^0.3.15",
"@llamaindex/assemblyai": "^0.1.12",
"@llamaindex/astra": "^0.0.27",
"@llamaindex/azure": "^0.1.24",
"@llamaindex/bm25-retriever": "^0.0.2",
"@llamaindex/chroma": "^0.0.27",
"@llamaindex/clip": "^0.0.63",
"@llamaindex/cloud": "^4.0.17",
"@llamaindex/cohere": "^0.0.27",
"@llamaindex/core": "^0.6.13",
"@llamaindex/deepinfra": "^0.0.63",
"@llamaindex/deepseek": "^0.0.23",
"@llamaindex/discord": "^0.1.12",
"@llamaindex/elastic-search": "^0.1.13",
"@llamaindex/env": "^0.1.30",
"@llamaindex/firestore": "^1.0.17",
"@llamaindex/fireworks": "^0.0.20",
"@llamaindex/google": "^0.3.9",
"@llamaindex/groq": "^0.0.75",
"@llamaindex/huggingface": "^0.1.14",
"@llamaindex/jinaai": "^0.0.20",
"@llamaindex/milvus": "^0.1.19",
"@llamaindex/mistral": "^0.1.10",
"@llamaindex/mixedbread": "^0.0.24",
"@llamaindex/mongodb": "^0.0.25",
"@llamaindex/node-parser": "^2.0.10",
"@llamaindex/notion": "^0.1.9",
"@llamaindex/ollama": "^0.1.10",
"@llamaindex/openai": "^0.4.4",
"@llamaindex/perplexity": "^0.0.17",
"@llamaindex/pinecone": "^0.1.10",
"@llamaindex/portkey-ai": "^0.0.52",
"@llamaindex/postgres": "^0.0.53",
"@llamaindex/qdrant": "^0.1.20",
"@llamaindex/readers": "^3.1.8",
"@llamaindex/replicate": "^0.0.52",
"@llamaindex/supabase": "^0.1.9",
"@llamaindex/together": "^0.0.20",
"@llamaindex/tools": "^0.0.16",
"@llamaindex/upstash": "^0.0.24",
"@llamaindex/vercel": "^0.1.10",
"@llamaindex/vllm": "^0.0.46",
"@llamaindex/voyage-ai": "^1.0.16",
"@llamaindex/weaviate": "^0.0.25",
"@llamaindex/workflow": "^1.1.9",
"@llamaindex/xai": "workspace:^0.0.7",
"@llamaindex/firestore": "^1.0.20",
"@llamaindex/fireworks": "^0.0.23",
"@llamaindex/google": "^0.3.12",
"@llamaindex/groq": "^0.0.78",
"@llamaindex/huggingface": "^0.1.17",
"@llamaindex/jinaai": "^0.0.23",
"@llamaindex/milvus": "^0.1.22",
"@llamaindex/mistral": "^0.1.13",
"@llamaindex/mixedbread": "^0.0.27",
"@llamaindex/mongodb": "^0.0.28",
"@llamaindex/node-parser": "^2.0.13",
"@llamaindex/notion": "^0.1.12",
"@llamaindex/ollama": "^0.1.13",
"@llamaindex/openai": "^0.4.7",
"@llamaindex/perplexity": "^0.0.20",
"@llamaindex/pinecone": "^0.1.13",
"@llamaindex/portkey-ai": "^0.0.55",
"@llamaindex/postgres": "^0.0.56",
"@llamaindex/qdrant": "^0.1.23",
"@llamaindex/readers": "^3.1.12",
"@llamaindex/replicate": "^0.0.55",
"@llamaindex/supabase": "^0.1.13",
"@llamaindex/together": "^0.0.23",
"@llamaindex/tools": "^0.1.3",
"@llamaindex/upstash": "^0.0.27",
"@llamaindex/vercel": "^0.1.13",
"@llamaindex/vllm": "^0.0.49",
"@llamaindex/voyage-ai": "^1.0.19",
"@llamaindex/weaviate": "^0.0.28",
"@llamaindex/workflow": "^1.1.13",
"@llamaindex/xai": "workspace:^0.0.10",
"@notionhq/client": "^2.2.15",
"@pinecone-database/pinecone": "^4.0.0",
"@vercel/postgres": "^0.10.0",
@@ -64,11 +65,11 @@
"commander": "^12.1.0",
"dotenv": "^16.4.5",
"js-tiktoken": "^1.0.14",
"llamaindex": "^0.11.8",
"llamaindex": "^0.11.12",
"mongodb": "6.7.0",
"postgres": "^3.4.4",
"wikipedia": "^2.1.2",
"zod": "^3.23.8"
"zod": "^3.25.67"
},
"devDependencies": {
"@types/node": "^22.9.0",
+8 -13
View File
@@ -2,11 +2,7 @@ import { stdin as input, stdout as output } from "node:process";
import readline from "node:readline/promises";
import { OpenAI } from "@llamaindex/openai";
import {
ChatSummaryMemoryBuffer,
Settings,
SimpleChatEngine,
} from "llamaindex";
import { createMemory, Settings, SimpleChatEngine } from "llamaindex";
if (process.env.NODE_ENV === "development") {
Settings.callbackManager.on("llm-end", (event) => {
@@ -15,10 +11,13 @@ if (process.env.NODE_ENV === "development") {
}
async function main() {
// Set maxTokens to 75% of the context window size of 4096
// This will trigger the summarizer once the chat history reaches 25% of the context window size (1024 tokens)
const llm = new OpenAI({ model: "gpt-3.5-turbo", maxTokens: 4096 * 0.75 });
const chatHistory = new ChatSummaryMemoryBuffer({ llm });
const llm = new OpenAI({ model: "gpt-3.5-turbo" });
const chatHistory = createMemory([
{
content: "You are a helpful assistant.",
role: "system",
},
]);
const chatEngine = new SimpleChatEngine({ llm });
const rl = readline.createInterface({ input, output });
@@ -29,10 +28,6 @@ async function main() {
chatHistory,
stream: true,
});
if (chatHistory.getLastSummary()) {
// Print the summary of the conversation so far that is produced by the SummaryChatHistory
console.log(`Summary: ${chatHistory.getLastSummary()?.content}`);
}
for await (const chunk of stream) {
process.stdout.write(chunk.response);
}
+5
View File
@@ -1,3 +1,4 @@
import { OpenAIEmbedding } from "@llamaindex/openai";
import {
Document,
SentenceSplitter,
@@ -7,6 +8,10 @@ import {
import { OldSentenceSplitter } from "./old-sentence-splitter";
export const STORAGE_DIR = "./data";
Settings.embedModel = new OpenAIEmbedding({
model: "text-embedding-3-small",
});
// Update node parser
(async () => {
// generate a document with a very long sentence (9000 words long)
+4 -1
View File
@@ -15,11 +15,14 @@
"start:llamaparse-json": "node --import tsx ./src/llamaparse-json.ts",
"start:discord": "node --import tsx ./src/discord.ts",
"start:json": "node --import tsx ./src/json.ts",
"start:obsidian": "node --import tsx ./src/obsidian.ts"
"start:obsidian": "node --import tsx ./src/obsidian.ts",
"start:xml": "node --import tsx ./src/xml.ts",
"start:excel": "node --import tsx ./src/excel.ts"
},
"dependencies": {
"@llamaindex/cloud": "workspace:* || ^2.0.24",
"@llamaindex/readers": "workspace:* || ^1.0.25",
"@llamaindex/excel": "workspace:*",
"llamaindex": "workspace:* || ^0.8.37"
},
"devDependencies": {
+20
View File
@@ -0,0 +1,20 @@
import { ExcelReader } from "@llamaindex/excel";
async function main() {
// Load PDF
const reader = new ExcelReader({
sheetSpecifier: 0,
concatRows: true,
fieldSeparator: ",",
keyValueSeparator: ":",
});
const documents = await reader.loadData("../data/sample_excel_sheet.xls");
for (const doc of documents) {
console.log(doc.text);
console.log("----");
}
}
main().catch(console.error);
+1 -1
View File
@@ -1,4 +1,4 @@
import { LlamaParseReader } from "@llamaindex/cloud";
import { LlamaParseReader } from "@llamaindex/cloud/reader";
import { openai, OpenAIEmbedding } from "@llamaindex/openai";
import { Settings, VectorStoreIndex } from "llamaindex";
@@ -1,4 +1,4 @@
import { LlamaParseReader } from "@llamaindex/cloud";
import { LlamaParseReader } from "@llamaindex/cloud/reader";
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
import { VectorStoreIndex } from "llamaindex";
+16
View File
@@ -0,0 +1,16 @@
import { XMLReader } from "@llamaindex/readers/xml";
async function main() {
// Load PDF
const reader = new XMLReader({
splitLevel: 2,
});
const documents = await reader.loadData("../data/company.xml");
for (const doc of documents) {
console.log(doc.text);
console.log("----");
}
}
main().catch(console.error);
+14
View File
@@ -0,0 +1,14 @@
# BM25 Retriever
In this guide, we introduce a bm25 retriever that search documents using the bm25 method. BM25 (Best Matching 25) is a ranking function that extends TF-IDF by considering term frequency saturation and document length. BM25 effectively ranks documents based on query term occurrence and rarity across the corpus.
## Setup
1. `cd` Into the `examples` directory
2. run `npm i`
## Example
```bash
`npx tsx ./retrievers/bm25/example.ts`
```
+33
View File
@@ -0,0 +1,33 @@
import { Bm25Retriever } from "@llamaindex/bm25-retriever";
import { OpenAIEmbedding } from "@llamaindex/openai";
import { PDFReader } from "@llamaindex/readers/pdf";
import { MetadataMode, Settings, VectorStoreIndex } from "llamaindex";
Settings.embedModel = new OpenAIEmbedding();
async function main() {
// Load PDF
const reader = new PDFReader();
const documents = await reader.loadData("./data/brk-2022.pdf");
// Split text and create embeddings. Store them in a VectorStoreIndex
const index = await VectorStoreIndex.fromDocuments(documents);
const retriever = new Bm25Retriever({
docStore: index.docStore,
topK: 3,
});
// Query the data
const response = await retriever.retrieve({
query: "What mistakes did Warren E. Buffett make?",
});
// Output response
response.forEach((r) => {
console.log(`Score: ${r.score}`);
console.log(`Text: ${r.node.getContent(MetadataMode.NONE)}`);
});
}
main().catch(console.error);
+1 -8
View File
@@ -1,8 +1,4 @@
import {
GEMINI_EMBEDDING_MODEL,
GeminiEmbedding,
GeminiSession,
} from "@llamaindex/google";
import { GEMINI_EMBEDDING_MODEL, GeminiEmbedding } from "@llamaindex/google";
import { QdrantVectorStore } from "@llamaindex/qdrant";
import {
Document,
@@ -12,9 +8,6 @@ import {
const embedding = new GeminiEmbedding({
model: GEMINI_EMBEDDING_MODEL.EMBEDDING_001,
session: new GeminiSession({
apiKey: process.env.GEMINI_API_KEY,
}),
});
async function main() {
+7 -1
View File
@@ -18,5 +18,11 @@
"module": "commonjs"
}
},
"include": ["./**/*.ts"]
"include": ["./**/*.ts"],
"exclude": [
"node_modules",
"dist",
"models/openai/live/browser/open-ai-realtime",
"**/browser/**"
]
}
+26
View File
@@ -1,5 +1,31 @@
# @llamaindex/autotool
## 8.0.12
### Patch Changes
- Updated dependencies [515a8b9]
- llamaindex@0.11.12
## 8.0.11
### Patch Changes
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
## 8.0.10
### Patch Changes
- llamaindex@0.11.10
## 8.0.9
### Patch Changes
- llamaindex@0.11.9
## 8.0.8
### Patch Changes
@@ -1,5 +1,35 @@
# @llamaindex/autotool-01-node-example
## 0.0.120
### Patch Changes
- Updated dependencies [515a8b9]
- llamaindex@0.11.12
- @llamaindex/autotool@8.0.12
## 0.0.119
### Patch Changes
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
- @llamaindex/autotool@8.0.11
## 0.0.118
### Patch Changes
- llamaindex@0.11.10
- @llamaindex/autotool@8.0.10
## 0.0.117
### Patch Changes
- llamaindex@0.11.9
- @llamaindex/autotool@8.0.9
## 0.0.116
### Patch Changes
@@ -13,5 +13,5 @@
"scripts": {
"start": "node --import tsx --import @llamaindex/autotool/node ./src/index.ts"
},
"version": "0.0.116"
"version": "0.0.120"
}
+1 -1
View File
@@ -6,7 +6,7 @@
"url": "git+https://github.com/run-llama/LlamaIndexTS.git",
"directory": "packages/autotool"
},
"version": "8.0.8",
"version": "8.0.12",
"description": "auto transpile your JS function to LLM Agent compatible",
"files": [
"dist",
+27
View File
@@ -1,5 +1,32 @@
# @llamaindex/cloud
## 4.0.17
### Patch Changes
- Updated dependencies [d578889]
- Updated dependencies [0fcc92f]
- Updated dependencies [515a8b9]
- @llamaindex/core@0.6.13
## 4.0.16
### Patch Changes
- Updated dependencies [7039e1a]
- Updated dependencies [7039e1a]
- @llamaindex/core@0.6.12
## 4.0.15
### Patch Changes
- Updated dependencies [a89e187]
- Updated dependencies [62699b7]
- Updated dependencies [c5b2691]
- Updated dependencies [d8ac8d3]
- @llamaindex/core@0.6.11
## 4.0.14
### Patch Changes
+2 -2
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/cloud",
"version": "4.0.14",
"version": "4.0.17",
"type": "module",
"license": "MIT",
"scripts": {
@@ -79,6 +79,6 @@
},
"dependencies": {
"p-retry": "^6.2.1",
"zod": "^3.25.7"
"zod": "^3.25.67"
}
}
+24
View File
@@ -1,5 +1,29 @@
# @llamaindex/core
## 0.6.13
### Patch Changes
- d578889: Add new memory API
- 0fcc92f: Fix: split sentences must not trim whitespaces
- 515a8b9: Fix: logging for fromPersistPath
## 0.6.12
### Patch Changes
- 7039e1a: Internal cleanup of base64 encoding
- 7039e1a: chore: migrate to @google/genai SDK
## 0.6.11
### Patch Changes
- a89e187: Feat: added custom abbreviations to sentence splitter
- 62699b7: Improve performance of sentence splitter
- c5b2691: Add more Acronyms on SentenceSplitter
- d8ac8d3: Feat: add support for openai realtime API
## 0.6.10
### Patch Changes
+3 -3
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/core",
"type": "module",
"version": "0.6.10",
"version": "0.6.13",
"description": "LlamaIndex Core Module",
"exports": {
"./agent": {
@@ -312,7 +312,7 @@
"@llamaindex/env": "workspace:*",
"@types/node": "^22.9.0",
"magic-bytes.js": "^1.10.0",
"zod": "^3.23.8",
"zod-to-json-schema": "^3.23.3"
"zod": "^3.25.67",
"zod-to-json-schema": "^3.24.6"
}
}
+2
View File
@@ -152,6 +152,7 @@ export type AgentParamsBase<
/**
* Worker will schedule tasks and handle the task execution
* @deprecated Use agent instead.
*/
export abstract class AgentWorker<
AI extends LLM,
@@ -250,6 +251,7 @@ export abstract class AgentWorker<
/**
* Runner will manage the task execution and provide a high-level API for the user
* @deprecated Use agent instead.
*/
export abstract class AgentRunner<
AI extends LLM,
+3
View File
@@ -62,6 +62,9 @@ export class LLMAgentWorker extends AgentWorker<LLM> {
taskHandler = AgentRunner.defaultTaskHandler;
}
/**
* @deprecated Use agent instead.
*/
export class LLMAgent extends AgentRunner<LLM> {
constructor(params: LLMAgentParams<LLM>) {
validateAgentParams(params);
+2 -4
View File
@@ -1,5 +1,5 @@
import type { ChatMessage, MessageContent } from "../llms";
import type { BaseMemory } from "../memory";
import type { Memory } from "../memory";
import { EngineResponse } from "../schema";
export interface BaseChatEngineParams<
@@ -9,9 +9,7 @@ export interface BaseChatEngineParams<
/**
* Optional chat history if you want to customize the chat history.
*/
chatHistory?:
| ChatMessage<AdditionalMessageOptions>[]
| BaseMemory<AdditionalMessageOptions>;
chatHistory?: ChatMessage<AdditionalMessageOptions>[] | Memory;
}
export interface StreamingChatEngineParams<
@@ -1,7 +1,7 @@
import { wrapEventCaller } from "../decorator";
import { Settings } from "../global";
import type { ChatMessage, LLM, MessageContent, MessageType } from "../llms";
import { BaseMemory, ChatMemoryBuffer } from "../memory";
import { Memory, createMemory } from "../memory";
import type { BaseNodePostprocessor } from "../postprocessor";
import {
type ContextSystemPrompt,
@@ -23,7 +23,7 @@ import type { ContextGenerator } from "./type";
export type ContextChatEngineOptions = {
retriever: BaseRetriever;
chatModel?: LLM | undefined;
chatHistory?: ChatMessage[] | undefined;
chatHistory?: ChatMessage[] | Memory | undefined;
contextSystemPrompt?: ContextSystemPrompt | undefined;
nodePostprocessors?: BaseNodePostprocessor[] | undefined;
systemPrompt?: string | undefined;
@@ -37,18 +37,21 @@ export type ContextChatEngineOptions = {
*/
export class ContextChatEngine extends PromptMixin implements BaseChatEngine {
chatModel: LLM;
memory: BaseMemory;
memory: Memory;
contextGenerator: ContextGenerator & PromptMixin;
systemPrompt?: string | undefined;
get chatHistory() {
return this.memory.getMessages();
return this.memory.getLLM();
}
constructor(init: ContextChatEngineOptions) {
super();
this.chatModel = init.chatModel ?? Settings.llm;
this.memory = new ChatMemoryBuffer({ chatHistory: init?.chatHistory });
this.memory =
init?.chatHistory instanceof Memory
? init.chatHistory
: createMemory(init?.chatHistory ?? []);
this.contextGenerator = new DefaultContextGenerator({
retriever: init.retriever,
contextSystemPrompt: init?.contextSystemPrompt,
@@ -87,12 +90,9 @@ export class ContextChatEngine extends PromptMixin implements BaseChatEngine {
): Promise<EngineResponse | AsyncIterable<EngineResponse>> {
const { message, stream } = params;
const chatHistory = params.chatHistory
? new ChatMemoryBuffer({
chatHistory:
params.chatHistory instanceof BaseMemory
? await params.chatHistory.getMessages()
: params.chatHistory,
})
? params.chatHistory instanceof Memory
? params.chatHistory
: createMemory(params.chatHistory)
: this.memory;
const requestMessages = await this.prepareRequestMessages(
message,
@@ -110,7 +110,7 @@ export class ContextChatEngine extends PromptMixin implements BaseChatEngine {
initialValue: "",
reducer: (accumulator, part) => (accumulator += part.delta),
finished: (accumulator) => {
chatHistory.put({ content: accumulator, role: "assistant" });
void chatHistory.add({ content: accumulator, role: "assistant" });
},
}),
(r) => EngineResponse.fromChatResponseChunk(r, requestMessages.nodes),
@@ -120,26 +120,26 @@ export class ContextChatEngine extends PromptMixin implements BaseChatEngine {
messages: requestMessages.messages,
additionalChatOptions: params.chatOptions as object,
});
chatHistory.put(response.message);
await chatHistory.add(response.message);
return EngineResponse.fromChatResponse(response, requestMessages.nodes);
}
reset() {
this.memory.reset();
async reset() {
await this.memory.clear();
}
private async prepareRequestMessages(
message: MessageContent,
chatHistory: BaseMemory,
chatHistory: Memory,
) {
chatHistory.put({
await chatHistory.add({
content: message,
role: "user",
});
const textOnly = extractText(message);
const context = await this.contextGenerator.generate(textOnly);
const systemMessage = this.prependSystemPrompt(context.message);
const messages = await chatHistory.getMessages([systemMessage]);
const messages = await chatHistory.getLLM(this.chatModel, [systemMessage]);
return { nodes: context.nodes, messages };
}
@@ -1,5 +1,5 @@
import type { LLM } from "../llms";
import { BaseMemory, ChatMemoryBuffer } from "../memory";
import { createMemory, Memory } from "../memory";
import { EngineResponse } from "../schema";
import { streamConverter, streamReducer } from "../utils";
import type {
@@ -16,20 +16,16 @@ import { Settings } from "../global";
*/
export class SimpleChatEngine implements BaseChatEngine {
memory: BaseMemory;
memory: Memory;
llm: LLM;
get chatHistory() {
return this.memory.getMessages();
return this.memory.getLLM();
}
constructor(init?: Partial<SimpleChatEngine>) {
this.llm = init?.llm ?? Settings.llm;
this.memory =
init?.memory ??
new ChatMemoryBuffer({
llm: this.llm,
});
this.memory = init?.memory ?? createMemory();
}
chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>;
@@ -43,19 +39,15 @@ export class SimpleChatEngine implements BaseChatEngine {
const { message, stream } = params;
const chatHistory = params.chatHistory
? new ChatMemoryBuffer({
llm: this.llm,
chatHistory:
params.chatHistory instanceof BaseMemory
? await params.chatHistory.getMessages()
: params.chatHistory,
})
? params.chatHistory instanceof Memory
? params.chatHistory
: createMemory(params.chatHistory)
: this.memory;
chatHistory.put({ content: message, role: "user" });
await chatHistory.add({ content: message, role: "user" });
if (stream) {
const stream = await this.llm.chat({
messages: await chatHistory.getMessages(),
messages: await chatHistory.getLLM(this.llm),
stream: true,
});
return streamConverter(
@@ -64,7 +56,7 @@ export class SimpleChatEngine implements BaseChatEngine {
initialValue: "",
reducer: (accumulator, part) => accumulator + part.delta,
finished: (accumulator) => {
chatHistory.put({ content: accumulator, role: "assistant" });
void chatHistory.add({ content: accumulator, role: "assistant" });
},
}),
EngineResponse.fromChatResponseChunk,
@@ -73,13 +65,13 @@ export class SimpleChatEngine implements BaseChatEngine {
const response = await this.llm.chat({
stream: false,
messages: await chatHistory.getMessages(),
messages: await chatHistory.getLLM(this.llm),
});
chatHistory.put(response.message);
await chatHistory.add(response.message);
return EngineResponse.fromChatResponse(response);
}
reset() {
this.memory.reset();
async reset() {
await this.memory.clear();
}
}
+2 -1
View File
@@ -1,4 +1,5 @@
import { extractText, streamConverter } from "../utils";
import { extractText } from "../utils/llms";
import { streamConverter } from "../utils/stream";
import type {
ChatResponse,
ChatResponseChunk,
+4 -1
View File
@@ -1,6 +1,9 @@
export { BaseLLM, ToolCallLLM } from "./base";
export { LiveLLM, LiveLLMSession, liveEvents, type LiveEvent } from "./live";
export { LiveLLM, LiveLLMCapability, LiveLLMSession } from "./live/live";
export { liveEvents, type LiveEvent } from "./live/live-types";
export type { MessageSender } from "./live/sender";
export type {
AudioConfig,
BaseTool,
BaseToolWithCall,
ChatMessage,
@@ -1,9 +1,7 @@
import type {
ChatMessage,
LiveConnectConfig,
MessageContentAudioDetail,
MessageContentTextDetail,
} from "./type";
} from "../type";
export type OpenEvent = { type: "open" };
@@ -63,45 +61,3 @@ export const liveEvents = {
e.type === "turnComplete",
},
};
export abstract class LiveLLMSession {
protected eventQueue: LiveEvent[] = [];
protected eventResolvers: ((value: LiveEvent) => void)[] = [];
protected closed = false;
abstract sendMessage(message: ChatMessage): void;
async *streamEvents(): AsyncIterable<LiveEvent> {
while (true) {
const event = await this.nextEvent();
if (event === undefined) {
break;
}
yield event;
}
}
abstract disconnect(): Promise<void>;
protected async nextEvent(): Promise<LiveEvent | undefined> {
if (this.eventQueue.length) {
return Promise.resolve(this.eventQueue.shift());
}
return new Promise((resolve) => {
this.eventResolvers.push(resolve);
});
}
//Uses an async queue to send events to the client
// if the consumer is waiting for an event, it will be resolved immediately
// otherwise, the event will be queued up and sent when the consumer is ready
pushEventToQueue(event: LiveEvent) {
if (this.eventResolvers.length) {
//resolving the promise with the event
this.eventResolvers.shift()!(event);
} else {
this.eventQueue.push(event);
}
}
}
export abstract class LiveLLM {
abstract connect(config?: LiveConnectConfig): Promise<LiveLLMSession>;
}
+124
View File
@@ -0,0 +1,124 @@
import type {
ChatMessage,
LiveConnectConfig,
MessageContentAudioDetail,
MessageContentDetail,
MessageContentImageDataDetail,
MessageContentVideoDetail,
} from "../type";
import type { LiveEvent } from "./live-types";
import type { MessageSender } from "./sender";
export enum LiveLLMCapability {
EPHEMERAL_KEY = "ephemeral_key",
AUDIO_CONFIG = "audio_config",
}
export abstract class LiveLLMSession {
protected eventQueue: LiveEvent[] = [];
protected eventResolvers: ((value: LiveEvent) => void)[] = [];
closed = false;
abstract get messageSender(): MessageSender;
private isTextMessage(content: MessageContentDetail) {
return content.type === "text";
}
private isAudioMessage(
content: MessageContentDetail,
): content is MessageContentAudioDetail {
return content.type === "audio";
}
private isImageMessage(
content: MessageContentDetail,
): content is MessageContentImageDataDetail {
return content.type === "image";
}
private isVideoMessage(
content: MessageContentDetail,
): content is MessageContentVideoDetail {
return content.type === "video";
}
sendMessage(message: ChatMessage) {
const { content, role } = message;
if (!Array.isArray(content)) {
this.messageSender.sendTextMessage(content, role);
} else {
for (const item of content) {
this.processMessage(item, role);
}
}
}
private processMessage(message: MessageContentDetail, role?: string) {
if (this.isTextMessage(message)) {
this.messageSender.sendTextMessage(message.text, role);
} else if (
this.isAudioMessage(message) &&
this.messageSender.sendAudioMessage
) {
this.messageSender.sendAudioMessage(message, role);
} else if (
this.isImageMessage(message) &&
this.messageSender.sendImageMessage
) {
this.messageSender.sendImageMessage(message, role);
} else if (
this.isVideoMessage(message) &&
this.messageSender.sendVideoMessage
) {
this.messageSender.sendVideoMessage(message, role);
}
}
async *streamEvents(): AsyncIterable<LiveEvent> {
while (true) {
const event = await this.nextEvent();
if (event === undefined) {
break;
}
yield event;
}
}
abstract disconnect(): Promise<void>;
protected async nextEvent(): Promise<LiveEvent | undefined> {
if (this.eventQueue.length) {
return Promise.resolve(this.eventQueue.shift());
}
return new Promise((resolve) => {
this.eventResolvers.push(resolve);
});
}
//Uses an async queue to send events to the client
// if the consumer is waiting for an event, it will be resolved immediately
// otherwise, the event will be queued up and sent when the consumer is ready
pushEventToQueue(event: LiveEvent) {
if (this.eventResolvers.length) {
//resolving the promise with the event
this.eventResolvers.shift()!(event);
} else {
this.eventQueue.push(event);
}
}
}
export abstract class LiveLLM {
/**
* Set of capabilities supported by this implementation.
* Override in subclasses as needed.
*/
capabilities: Set<LiveLLMCapability> = new Set();
abstract connect(config?: LiveConnectConfig): Promise<LiveLLMSession>;
abstract getEphemeralKey(): Promise<string | undefined>;
hasCapability(capability: LiveLLMCapability): boolean {
return this.capabilities.has(capability);
}
}
+15
View File
@@ -0,0 +1,15 @@
import type {
MessageContentAudioDetail,
MessageContentImageDataDetail,
MessageContentVideoDetail,
} from "../type";
export interface MessageSender {
sendTextMessage(message: string, role?: string): void;
sendAudioMessage?(content: MessageContentAudioDetail, role?: string): void;
sendImageMessage?(
content: MessageContentImageDataDetail,
role?: string,
): void;
sendVideoMessage?(content: MessageContentVideoDetail, role?: string): void;
}
+6
View File
@@ -290,8 +290,14 @@ export type ToolOutput = {
isError: boolean;
};
export interface AudioConfig {
stream?: MediaStream;
onTrack?: (track: MediaStream | null) => void;
}
export interface LiveConnectConfig {
tools?: BaseTool[];
responseModality?: ModalityType[];
systemInstruction?: string;
audioConfig?: AudioConfig;
}
+7
View File
@@ -0,0 +1,7 @@
import type { MemoryMessage } from "../types";
export interface MessageAdapter<T, TMessageOptions extends object = object> {
fromMemory(message: MemoryMessage<TMessageOptions>): T;
toMemory(message: T): MemoryMessage<TMessageOptions>;
isCompatible(message: unknown): message is T;
}

Some files were not shown because too many files have changed in this diff Show More