Release 0.11.12 (#2050 )

Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>
docs: clarify how to run docs
2026-07-01 22:14:03 -04:00 · 2025-07-02 11:41:55 +07:00 · 2025-07-02 11:33:48 +07:00 · 2025-07-02 11:26:18 +07:00 · 2025-07-02 11:22:47 +07:00 · 2025-07-02 10:57:04 +07:00
290 changed files with 12748 additions and 3183 deletions
@@ -25,7 +25,7 @@ Make sure you have Node.js LTS (Long-term Support) installed. You can check your

 ```shell
 node -v
-# v20.x.x
+# v22.x.x
 ```

 ### Use pnpm
@@ -1,5 +1,98 @@
 # @llamaindex/doc

+## 0.2.32
+
+### Patch Changes
+
+- Updated dependencies [d578889]
+- Updated dependencies [0fcc92f]
+- Updated dependencies [515a8b9]
+  - @llamaindex/core@0.6.13
+  - llamaindex@0.11.12
+  - @llamaindex/cloud@4.0.17
+  - @llamaindex/node-parser@2.0.13
+  - @llamaindex/openai@0.4.7
+  - @llamaindex/readers@3.1.12
+  - @llamaindex/workflow@1.1.13
+
+## 0.2.31
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+  - @llamaindex/core@0.6.12
+  - @llamaindex/cloud@4.0.16
+  - @llamaindex/node-parser@2.0.12
+  - @llamaindex/openai@0.4.6
+  - @llamaindex/readers@3.1.11
+  - @llamaindex/workflow@1.1.12
+
+## 0.2.30
+
+### Patch Changes
+
+- Updated dependencies [f7ec293]
+  - @llamaindex/workflow@1.1.11
+  - llamaindex@0.11.10
+
+## 0.2.29
+
+### Patch Changes
+
+- Updated dependencies [c5846bd]
+  - @llamaindex/readers@3.1.10
+
+## 0.2.28
+
+### Patch Changes
+
+- Updated dependencies [a89e187]
+- Updated dependencies [62699b7]
+- Updated dependencies [c5b2691]
+- Updated dependencies [d8ac8d3]
+  - @llamaindex/core@0.6.11
+  - @llamaindex/openai@0.4.5
+  - @llamaindex/cloud@4.0.15
+  - llamaindex@0.11.9
+  - @llamaindex/node-parser@2.0.11
+  - @llamaindex/readers@3.1.9
+  - @llamaindex/workflow@1.1.10
+
+## 0.2.27
+
+### Patch Changes
+
+- 8a51c16: Add natural language agent page
+- Updated dependencies [8a51c16]
+- Updated dependencies [1b5af14]
+  - @llamaindex/workflow@1.1.9
+  - @llamaindex/core@0.6.10
+  - llamaindex@0.11.8
+  - @llamaindex/cloud@4.0.14
+  - @llamaindex/node-parser@2.0.10
+  - @llamaindex/openai@0.4.4
+  - @llamaindex/readers@3.1.8
+
+## 0.2.26
+
+### Patch Changes
+
+- a4d394f: fix: correct SimpleDirectoryReader import path in documentation example
+- Updated dependencies [dbd857f]
+- Updated dependencies [3c857f4]
+  - @llamaindex/workflow@1.1.8
+  - llamaindex@0.11.7
+
+## 0.2.25
+
+### Patch Changes
+
+- Updated dependencies [40161fe]
+  - @llamaindex/workflow@1.1.7
+  - llamaindex@0.11.6
+
 ## 0.2.24

 ### Patch Changes
@@ -111,7 +111,7 @@ Key build process:
 **Content Sources:**

 - Local MDX files in `src/content/docs/`
- External docs from `@llama-flow/docs` package
+- External docs from `@llamaindex/workflow-docs` package
 - Generated API docs from TypeScript source

 ### Development Notes
@@ -3,6 +3,8 @@
 This is a Next.js application generated with
 [Create Fumadocs](https://github.com/fuma-nama/fumadocs).

+> Note: Before running the development server, make sure to build the whole project first, see [CONTRIBUTING.md](../../CONTRIBUTING.md) for more details.
+
 Run development server:

 ```bash
@@ -15,6 +15,20 @@ const config = {
    "twoslash",
    "typescript",
  ],
+  async redirects() {
+    return [
+      {
+        source: "/docs/chat-ui/:path*.mdx",
+        destination: "/docs/chat-ui/:path*",
+        permanent: true,
+      },
+      {
+        source: "/docs/workflows/:path*.mdx",
+        destination: "/docs/workflows/:path*",
+        permanent: true,
+      },
+    ];
+  },
  turbopack: {
    resolveAlias: {
      fs: { browser: "./fallback.js" },
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/doc",
-  "version": "0.2.24",
+  "version": "0.2.32",
  "private": true,
  "scripts": {
    "postinstall": "fumadocs-mdx",
@@ -15,16 +15,17 @@
  "dependencies": {
    "@huggingface/transformers": "^3.5.0",
    "@icons-pack/react-simple-icons": "^10.1.0",
-    "@llama-flow/docs": "0.0.8",
-    "@llamaindex/chat-ui-docs": "0.0.3",
+    "@llamaindex/chat-ui-docs": "^0.0.5",
    "@llamaindex/cloud": "workspace:*",
    "@llamaindex/core": "workspace:*",
    "@llamaindex/node-parser": "workspace:*",
    "@llamaindex/openai": "workspace:*",
    "@llamaindex/readers": "workspace:*",
    "@llamaindex/workflow": "workspace:*",
+    "@llamaindex/workflow-docs": "0.1.1",
    "@mdx-js/mdx": "^3.1.0",
    "@monaco-editor/react": "^4.7.0",
+    "@next/third-parties": "^15.3.4",
    "@number-flow/react": "^0.3.4",
    "@radix-ui/react-dialog": "^1.1.2",
    "@radix-ui/react-icons": "^1.3.2",
@@ -39,13 +40,13 @@
    "clsx": "2.1.1",
    "foxact": "^0.2.41",
    "framer-motion": "^11.11.17",
-    "fumadocs-core": "^15.2.7",
+    "fumadocs-core": "^15.5.0",
    "fumadocs-docgen": "^2.0.0",
-    "fumadocs-mdx": "^11.6.0",
-    "fumadocs-openapi": "^8.0.1",
-    "fumadocs-twoslash": "^3.1.1",
-    "fumadocs-typescript": "^4.0.2",
-    "fumadocs-ui": "^15.2.7",
+    "fumadocs-mdx": "^11.6.6",
+    "fumadocs-openapi": "^9.0.5",
+    "fumadocs-twoslash": "^3.1.3",
+    "fumadocs-typescript": "^4.0.5",
+    "fumadocs-ui": "^15.5.0",
    "hast-util-to-jsx-runtime": "^2.3.2",
    "llamaindex": "workspace:*",
    "lucide-react": "^0.460.0",
@@ -69,7 +70,7 @@
    "twoslash": "^0.3.1",
    "use-stick-to-bottom": "^1.0.42",
    "web-tree-sitter": "^0.24.4",
-    "zod": "^3.23.8"
+    "zod": "^3.25.67"
  },
  "devDependencies": {
    "@next/env": "^15.3.0",
@@ -13,7 +13,7 @@ const INTERNAL_LINK_REGEX = /(?:(?:\]\(|\bhref=["'])\/docs\/([^")]+))/g;
 // This captures relative links like [text](./path) or ![alt](../images/image.png)
 const RELATIVE_LINK_REGEX = /(?:\]\()(?:\s*)(?:\.\.?)\//g;

-const ALLOWED_LINKS = ["/docs/llamaflow", "/docs/chat-ui"];
+const ALLOWED_LINKS = ["/docs/workflows", "/docs/chat-ui"];

 interface LinkValidationResult {
  file: string;
@@ -11,8 +11,13 @@ import remarkMath from "remark-math";
 export const docs = defineDocs({
  dir: [
    "./src/content/docs",
-    "./node_modules/@llama-flow/docs",
+    "./node_modules/@llamaindex/workflow-docs",
    "./node_modules/@llamaindex/chat-ui-docs",
+    // NOTE: When adding external docs (like chat-ui or workflow-docs above),
+    // make sure to also update:
+    // 1. scripts/validate-links.mts - add to ALLOWED_LINKS array
+    // 2. next.config.mjs - add redirect for .mdx files
+    // 3. src/content/docs/meta.json - add to pages array
  ],
  docs: {
    async: true,
@@ -113,7 +113,8 @@ export default function HomePage() {
          description="Truly powerful retrieval-augmented generation applications use agentic techniques, and LlamaIndex.TS makes it easy to build them."
        >
          <CodeBlock
-            code={`import { SimpleDirectoryReader, VectorStoreIndex } from "llamaindex";
+            code={`import { VectorStoreIndex } from "llamaindex";
+import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
 import { openai } from "@llamaindex/openai";
 import { agent } from "@llamaindex/workflow";

@@ -1,5 +1,6 @@
 import { AIProvider } from "@/actions";
 import { TooltipProvider } from "@/components/ui/tooltip";
+import { GoogleAnalytics } from "@next/third-parties/google";
 import { RootProvider } from "fumadocs-ui/provider";
 import { Inter } from "next/font/google";
 import type { ReactNode } from "react";
@@ -39,6 +40,7 @@ export default function Layout({ children }: { children: ReactNode }) {
          </AIProvider>
        </TooltipProvider>
      </body>
+      <GoogleAnalytics gaId="G-NB9B8LW9W5" />
    </html>
  );
 }
@@ -1,4 +1,4 @@
 {
  "title": "Agents",
-  "pages": ["tool", "agent_workflow", "workflows"]
+  "pages": ["tool", "agent_workflow", "workflows", "natural_language_workflow"]
 }
@@ -0,0 +1,103 @@
+---
+title: Define workflows using natural language  
+---
+
+When working with Workflows, you have to write code to handle an event in the workflow. 
+Often, the logic of the handler is not too complex so that it can be expressed using natural language and executed by an LLM.  
+Besides the instructions, we just need the expected result event of the step, possible tool calls and optionally other events that can be emitted.  
+
+## Usage
+
+Let's take an example of a workflow that generates a joke, gets a critique for it, and then improves it.
+
+### Define the events
+
+First, we define the events for our workflow. We need one for writing the joke, one for critiquing it, and one for the final result:
+
+```typescript
+import { z } from "zod";
+import { zodEvent } from "@llamaindex/workflow";
+
+const writeJokeSchema = z.object({
+  description: z
+    .string()
+    .describe("The topic to write a joke or describe the joke to improve."),
+  writtenJoke: z.optional(z.string()).describe("The written joke."),
+  retriedTimes: z
+    .number()
+    .default(0)
+    .describe(
+      "The retried times for writing the joke. Always increase this from the input retriedTimes.",
+    ),
+});
+
+const critiqueSchema = z.object({
+  joke: z.string().describe("The joke to critique"),
+  retriedTimes: z.number().describe("The retried times for writing the joke."),
+});
+
+const finalResultSchema = z.object({
+  joke: z.string().describe("The joke to critique"),
+  critique: z.string().describe("The critique of the joke"),
+});
+
+const writeJokeEvent = zodEvent(writeJokeSchema, {
+  debugLabel: "writeJokeEvent",
+}); 
+const critiqueEvent = zodEvent(critiqueSchema, {
+  debugLabel: "critiqueEvent",
+}); 
+const finalResultEvent = zodEvent(finalResultSchema, {
+  debugLabel: "finalResultEvent",
+}); 
+```
+
+Note that your natural language workflows the events need to be created by the `zodEvent` function passing the zod schema as an argument. The agent needs the schema of the event data to correctly generate events.  
+Also, we need a `debugLabel` so the LLM can identify the event to emit in the workflow.
+
+### Define the workflow
+
+As usual you first create the workflow:
+
+```typescript
+import { agentHandler, createWorkflow } from "@llamaindex/workflow";
+
+const jokeFlow = createWorkflow();
+```
+
+Then you need to handle the events. For the handlers, instead of code, you're now going to use natural language by calling the `agentHandler` function.
+
+It only requires two parameters:
+- `instructions`: A prompt to guide the agent how to handle the steps.
+- `results`: The output events that the agent should return after handling the step.
+
+Then you will have a simple code to handle the step:
+
+```typescript
+jokeFlow.handle(
+  [writeJokeEvent],
+  agentHandler({
+    instructions: `You are a joke writer. You are given a topic and you need to write a joke about it.`,
+    results: [critiqueEvent],
+  }),
+);
+
+jokeFlow.handle(
+  [critiqueEvent],
+  agentHandler({
+    instructions: `
+You are given a joke and you need to critique it. Follow the following guidelines:
+1. You have maximum 3 times to improve the joke.
+2. If the joke is not good, increase the retriedTimes, describe how to improve the joke and send a writeJokeEvent.
+3. If the joke is good, trigger the finalResultEvent event.
+`,
+    results: [writeJokeEvent, finalResultEvent],
+  }),
+);
+```
+
+For advanced usage, you can add more functionality to `agentHandler` by using these parameters:
+- `events`: A list of additional events that the agent can emit to the workflow. E.g., your agent can emit a `uiEvent` to update the UI during the execution.
+- `tools`: A list of tools that the agent can use to handle the step. E.g., your agent can use a `search` tool to search the web.
+
+You can find more code examples in the [examples](https://github.com/run-llama/LlamaIndexTS/tree/main/examples/agents/natural) folder.
@@ -74,12 +74,21 @@ const server = mcp({
  args: ["-y", "@modelcontextprotocol/server-filesystem", "."],
  verbose: true,
 });
-// or by SSE
+// or by StreamableHTTP transport
 const server = mcp({
  url: "http://localhost:8000/mcp",
  verbose: true,
 });

+// if your MCP server is not using StreamableHTTP transport, you can also use SSE transport
+// by setting useSSETransport to true.
+// See: https://modelcontextprotocol.io/docs/concepts/transports#server-sent-events-sse-deprecated
+const server = mcp({
+  url: "http://localhost:8000/mcp",
+  useSSETransport: true,
+  verbose: true,
+});
+
 // 3. Get tools from MCP server
 const tools = await server.tools();

@@ -9,10 +9,13 @@ Workflows are designed to be flexible and can be used to build agents, RAG flows
 To use workflows install this package:

 ```package-install
-npm i @llamaindex/workflow
+npm i @llamaindex/workflow-core
 ```

-This package is a stable, production-ready version of our [llama-flow](/docs/llamaflow) project. 
+This contains the core functionality for the workflow system. You can read more about the core concepts in the [workflow-core](/docs/workflows) section.

-While you can still reference the llama-flow documentation for detailed information about the underlying concepts, we recommend using the `@llamaindex/workflow` package for all new projects to ensure stability and long-term availability.
+In contrast, the `@llamaindex/workflow` package contains more utiltities, such as prebuilt agents.

+```package-install
+npm i @llamaindex/workflow
+```
@@ -0,0 +1,182 @@
+---
+title: Memory
+description: Manage conversation history and context with agents
+---
+
+## Concept
+
+Memory is a core component of agentic systems. It allows you to store and retrieve information from the past.
+
+In LlamaIndexTS, you can create memory by using the `createMemory` function. This function will return a `Memory` object, which you can then use to store and retrieve information.
+
+As the agent runs, it will make calls to `add()` to store information, and `get()` to retrieve information. 
+
+## Usage
+
+A `Memory` object has both short-term memory (i.e. a FIFO queue of messages) and optionally long-term memory (i.e. extracting information over time).
+
+`get()` always returns all messages stored in the memory. The longer the agent runs, this will exceed the context window of the agent. To avoid this, the agent is using the `getLLM` method to get the last X messages that fit into the context window.
+
+### Configuring Memory for an Agent
+
+Here we're creating a memory with a static block (read more about [memory blocks](#long-term-memory)) that contains some information about the user.
+
+```ts twoslash
+import { openai } from "@llamaindex/openai";
+import { agent } from "@llamaindex/workflow";
+import { createMemory, staticBlock } from "llamaindex";
+
+const llm = openai({ model: "gpt-4.1-mini" });
+
+// Create memory with predefined context
+const memory = createMemory({
+  memoryBlocks: [
+    staticBlock({
+      content:
+        "The user is a software engineer who loves TypeScript and LlamaIndex.",
+    }),
+  ],
+});
+
+// Create an agent with the memory
+const workflow = agent({
+  name: "assistant",
+  llm,
+  memory,
+});
+
+const result = await workflow.run("What is my name?");
+console.log("Response:", result.data.result);
+```
+
+### Using Vercel format
+
+You can also put messages in Vercel format directly to the memory:
+
+```ts
+await memory.add({
+  id: "1",
+  createdAt: new Date(),
+  role: "user",
+  content: "Hello!",
+  options: {
+    parts: [
+      {
+        type: "file",
+        data: "base64...",
+        mimeType: "image/png",
+      },
+    ],
+  },
+});
+```
+
+If you call `get`, messages are usually retrieved in the LlamaIndexTS format (type `ChatMessage`). If you specify the `type` parameter using `get`, you can return the messages in different formats. E.g.: using `type: "vercel"`, you can return the messages in Vercel format:
+
+```ts
+const messages = await memory.get({ type: "vercel" });
+console.log(messages);
+```
+
+## Customizing Memory
+
+### Short-Term Memory
+
+The `Memory` object will store all the messages that are added to the `Memory` object. Unless you call `clear()`, no messages are removed from the memory. This is the short-term memory (usually you will store the memory of one user session there) which is augmented by the long-term memory.
+
+Calling `getLLM` will retrieve messages from long-term memory and ensure that the given `tokenLimit` is not reached. These are the messages that you will sent to the LLM.
+
+For initialization, you call `createMemory` with the following options:
+
+- `tokenLimit`: Maximum tokens for memory retrieval using `getLLM` (default: 30000).
+- `shortTermTokenLimitRatio`: Ratio of tokens for short-term vs long-term memory (default: 0.7)
+- `customAdapters`: Custom message adapters for different message formats. LlamaIndex (`ChatMessageAdapter`) and Vercel (`VercelMessageAdapter`) are built-in adapters.
+- `memoryBlocks`: Memory blocks for long-term storage, see [Long-Term Memory](#long-term-memory)
+
+Example:
+
+```ts
+const memory = createMemory({
+    tokenLimit=40000,
+    shortTermTokenLimitRatio=0.5,
+});
+```
+
+### Long-Term Memory
+
+Long-term memory is represented as `Memory Block` objects. These objects contain information that are from previous user sessions or from the beginning of the current conversation. When memory is retrieved (by calling `getLLM`), the short-term and long-term memories are merged together within the given `tokenLimit`. 
+
+Currently, there are two predefined memory blocks:
+
+- `staticBlock`: A memory block that stores a static piece of information.
+- `factExtractionBlock`: A memory block that extracts facts from the chat history.
+
+This sounds a bit complicated, but it's actually quite simple. Let's look at an example:
+
+```ts
+import { createMemory, factExtractionBlock, staticBlock } from "llamaindex";
+
+const memoryBlocks= [
+  staticBlock({
+    id: "core_info",
+    content: "My name is Logan, and I live in Saskatoon. I work at LlamaIndex.",
+  }),
+  factExtractionBlock({
+    id: "user-extracted_info",
+    priority: 1,
+    llm: llm,
+    maxFacts: 50,
+  }),
+];
+```
+
+Here, we've setup two memory blocks:
+
+- `core_info`: A static memory block that stores some core information about the user. This information will always be inserted into the memory. The type used is `MessageContent` to support multi-modal content.
+- `extracted_info`: An extracted memory block that will extract information from the chat history. Here we've passed in the `llm` to use to extract facts from the chat history, and set the `maxFacts` to 50. If the number of extracted facts exceeds this limit, the `maxFacts` will be automatically summarized and reduced to leave room for new information.
+
+You'll also notice that we've set the `priority` for the `factExtractionBlock` block. This is used to determine the handling when the memory blocks content (i.e. long-term memory) + short-term memory exceeds the token limit on the `Memory` object.
+
+- `priority=0`: This block will always be kept in memory (`staticBlocks` always have priority 0.)
+- `priority=1, 2, 3, etc`: This determines the order in which memory blocks are truncated when the memory exceeds the token limit, to help the overall short-term memory + long-term memory content be less than or equal to the `tokenLimit`.
+
+Now, let's pass these blocks into the `createMemory` function:
+
+```ts
+const memory = createMemory({
+  tokenLimit: 40000,
+  memoryBlocks: memoryBlocks,
+)
+```
+
+When memory is retrieved (using `getLLM`), the short-term and long-term memories are merged together. The `Memory` object will ensure that the short-term memory + long-term memory content is less than or equal to the `tokenLimit`. If it is longer, messages are retrieved in the following order:
+
+1. StaticMemoryBlock (information always included)
+2. LongTermMemoryBlock (depending on priority)
+3. ShortTermMemoryBlock 
+4. Transient messages
+
+The amount of short-term memory included is specified by the `shortTermTokenLimitRatio`. If it's set to `0.7`, 70% of the `tokenLimit` is used for short-term memory (not including the static memory block).
+
+## Persistence with Snapshots
+
+Save and restore memory state:
+
+```ts twoslash
+import { createMemory, loadMemory } from "llamaindex";
+
+const memory = createMemory();
+
+// Add some messages
+await memory.add({ role: "user", content: "Hello!" });
+
+// Create snapshot
+const snapshot = memory.snapshot();
+
+// Later, restore from the snapshot
+const restoredMemory = loadMemory(snapshot);
+```
+
+## Examples
+
+Want to learn more about the Memory class? Check out our example codes in [Github](https://github.com/run-llama/LlamaIndexTS/tree/main/examples/agents/memory).
@@ -1,4 +1,11 @@
 {
  "title": "Data",
-  "pages": ["index", "readers", "data_index", "ingestion_pipeline", "stores"]
+  "pages": [
+    "index",
+    "memory",
+    "readers",
+    "data_index",
+    "ingestion_pipeline",
+    "stores"
+  ]
 }
@@ -28,11 +28,12 @@ embedding vector(1536)
 );
 ```

-- Create a function for similarity search
+-- Create a function for similarity search with filtering support
 ```sql
 create function match_documents (
 query_embedding vector(1536),
-match_count int
+match_count int,
+filter jsonb DEFAULT '{}'
 ) returns table (
 id uuid,
 content text,
@@ -52,6 +53,7 @@ metadata,
 embedding,
 1 - (embedding <=> query_embedding) as similarity
 from documents
+where metadata @> filter
 order by embedding <=> query_embedding
 limit match_count;
 end;
@@ -96,6 +98,7 @@ const index = await VectorStoreIndex.fromDocuments(documents, {
 ```ts
 const queryEngine = index.asQueryEngine();

+// Basic query without filters
 const response = await queryEngine.query({
  query: "What is in the document?",
 });
@@ -104,6 +107,32 @@ const response = await queryEngine.query({
 console.log(response.toString());
 ```

+## Query with filters
+
+You can filter documents based on metadata when querying:
+
+```ts
+import { FilterOperator, MetadataFilters } from "llamaindex";
+
+// Create a filter for documents with author = "Jane Smith"
+const filters: MetadataFilters = {
+  filters: [
+    {
+      key: "author",
+      value: "Jane Smith",
+      operator: FilterOperator.EQ,
+    },
+  ],
+};
+
+// Query with filters
+const filteredResponse = await vectorStore.query({
+  queryEmbedding: embedModel.getQueryEmbedding("What is vector search?"),
+  similarityTopK: 5,
+  filters,
+});
+```
+
 ## Full code

 ```ts
@@ -11,58 +11,130 @@ npm i llamaindex @llamaindex/google
 ## Usage

 ```ts
-import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";
 import { Settings } from "llamaindex";

-Settings.llm = new Gemini({
-  model: GEMINI_MODEL.GEMINI_PRO,
-});
-```
-
-## Usage with Proxy
-
-```ts
-import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
-import { Settings } from "llamaindex";
-
-Settings.llm = new Gemini({
-  model: GEMINI_MODEL.GEMINI_PRO,
-  requestOptions: {
-    baseUrl: <YOUR_PROXY_URL>   // optional, but useful for custom endpoints
-  }
+Settings.llm = gemini({
+  model: GEMINI_MODEL.GEMINI_2_0_FLASH,
 });
 ```

 ### Usage with Vertex AI

-To use Gemini via Vertex AI you can use `GeminiVertexSession`.
-
-GeminiVertexSession accepts the env variables: `GOOGLE_VERTEX_LOCATION` and `GOOGLE_VERTEX_PROJECT`
+To use Gemini via Vertex AI, you can specify the vertex configuration:

 ```ts
-import { Gemini, GEMINI_MODEL, GeminiVertexSession } from "@llamaindex/google";
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";

-const gemini = new Gemini({
-  model: GEMINI_MODEL.GEMINI_PRO,
-  session: new GeminiVertexSession({
-    location: "us-central1",      // optional if provided by GOOGLE_VERTEX_LOCATION env variable
-    project: "project1",          // optional if provided by GOOGLE_VERTEX_PROJECT env variable
-    googleAuthOptions: {...},     // optional, but useful for production. It accepts all values from `GoogleAuthOptions`
-  }),
+const llm = gemini({
+  model: GEMINI_MODEL.GEMINI_2_0_FLASH,
+  vertex: {
+    project: "your-cloud-project",    // required for Vertex AI
+    location: "us-central1",          // required for Vertex AI
+  },
 });
 ```

-[GoogleAuthOptions](https://github.com/googleapis/google-auth-library-nodejs/blob/main/src/auth/googleauth.ts)
-
 To authenticate for local development:

 ```bash
-npm i @google-cloud/vertexai
 gcloud auth application-default login
 ```

 To authenticate for production you'll have to use a [service account](https://cloud.google.com/docs/authentication/). `googleAuthOptions` has `credentials` which might be useful for you.

+## Multimodal Usage
+
+Gemini supports multimodal inputs including text, images, audio, and video:
+
+```ts
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";
+import fs from "fs";
+
+const llm = gemini({ model: GEMINI_MODEL.GEMINI_2_0_FLASH });
+
+const result = await llm.chat({
+  messages: [
+    {
+      role: "user",
+      content: [
+        {
+          type: "text",
+          text: "What's in this image?",
+        },
+        {
+          type: "image",
+          data: fs.readFileSync("./image.jpg").toString("base64"),
+          mimeType: "image/jpeg",
+        },
+      ],
+    },
+  ],
+});
+```
+
+## Tool Calling
+
+Gemini supports function calling with tools:
+
+```ts
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";
+import { tool } from "llamaindex";
+import { z } from "zod";
+
+const llm = gemini({ model: GEMINI_MODEL.GEMINI_2_0_FLASH });
+
+const result = await llm.chat({
+  messages: [
+    {
+      content: "What's the weather in Tokyo?",
+      role: "user",
+    },
+  ],
+  tools: [
+    tool({
+      name: "weather",
+      description: "Get the weather",
+      parameters: z.object({
+        location: z.string().describe("The location to get the weather for"),
+      }),
+      execute: ({ location }) => {
+        return `The weather in ${location} is sunny and hot`;
+      },
+    }),
+  ],
+});
+```
+
+## Live API (Real-time Conversations)
+
+For real-time audio/video conversations using [Gemini Live API](https://ai.google.dev/gemini-api/docs/live). 
+
+The Live API is running directly in the frontend. That's why you have to generate an ephemeral key first on the server side and pass it to the frontend.
+
+To use the Live API, make sure to pass `apiVersion: "v1alpha"` to the `httpOptions`.
+
+```ts
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";
+
+// Server-side: Generate ephemeral key
+const serverLlm = gemini({
+  model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
+  httpOptions: { apiVersion: "v1alpha" },
+});
+const ephemeralKey = await serverLlm.live.getEphemeralKey();
+
+// Client-side: Use ephemeral key for Live API
+const llm = gemini({
+  apiKey: ephemeralKey,
+  model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
+  voiceName: "Zephyr",
+  httpOptions: { apiVersion: "v1alpha" },
+});
+
+const session = await llm.live.connect();
+```
+
 ## Load and index documents

 For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
@@ -90,11 +162,11 @@ const results = await queryEngine.query({
 ## Full Example

 ```ts
-import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";
 import { Document, VectorStoreIndex, Settings } from "llamaindex";

-Settings.llm = new Gemini({
-  model: GEMINI_MODEL.GEMINI_PRO,
+Settings.llm = gemini({
+  model: GEMINI_MODEL.GEMINI_2_0_FLASH,
 });

 async function main() {
@@ -104,9 +176,7 @@ async function main() {
  const index = await VectorStoreIndex.fromDocuments([document]);

  // Create a query engine
-  const queryEngine = index.asQueryEngine({
-    retriever,
-  });
+  const queryEngine = index.asQueryEngine();

  const query = "What is the meaning of life?";

@@ -378,3 +378,186 @@ async function main() {
 ## API Reference

 - [OpenAI](/docs/api/classes/OpenAI)
+
+
+# OpenAI Live LLM
+
+The OpenAI Live LLM integration in LlamaIndex provides real-time chat capabilities with support for audio streaming and tool calling.
+
+## Basic Usage
+
+```typescript
+import { openai } from "@llamaindex/openai";
+import { tool, ModalityType } from "llamaindex";
+
+// Get the ephimeral key on the server 
+const serverllm = openai({
+  apiKey: "your-api-key", 
+  model: "gpt-4o-realtime-preview-2025-06-03",
+});
+
+// Get an ephemeral key 
+// Usually this code is run on the server and the ephemeral key is passed to the
+// client - the ephemeral key can be securely used on the client side
+const ephemeralKey = await serverllm.live.getEphemeralKey();
+
+// Create a client-side LLM instance with the ephemeral key
+const llm = openai({
+  apiKey: ephemeralKey,
+  model: "gpt-4o-realtime-preview-2025-06-03"
+});
+
+// Create a live sessionimport { tool } from "llamaindex";
+const session = await llm.live.connect({
+  systemInstruction: "You are a helpful assistant.",
+});
+
+// Send a message
+session.sendMessage({
+  content: "Hello!",
+  role: "user",
+});
+```
+
+## Tool Integration
+
+Tools are handled server-side, making it simple to pass them to the live session:
+
+```typescript
+// Define your tools
+const weatherTool = tool({
+  name: "weather",
+  description: "Get the weather for a location",
+  parameters: z.object({
+    location: z.string().describe("The location to get weather for"),
+  }),
+  execute: async ({ location }) => {
+    return `The weather in ${location} is sunny`;
+  },
+});
+
+// Create session with tools
+const session = await llm.live.connect({
+  systemInstruction: "You are a helpful assistant.",
+  tools: [weatherTool],
+});
+```
+
+## Audio Support
+
+For audio capabilities:
+
+```typescript
+// Get microphone access
+const userStream = await navigator.mediaDevices.getUserMedia({
+  audio: true,
+});
+
+// Create session with audio
+const session = await llm.live.connect({
+  audioConfig: {
+    stream: userStream,
+    onTrack: (remoteStream) => {
+      // Handle incoming audio
+      audioElement.srcObject = remoteStream;
+    },
+  },
+});
+```
+
+## Event Handling
+
+Listen to events from the session:
+
+```typescript
+for await (const event of session.streamEvents()) {
+  if (liveEvents.open.include(event)) {
+    // Connection established
+    console.log("Connected!");
+  } else if (liveEvents.text.include(event)) {
+    // Received text response
+    console.log("Assistant:", event.text);
+  }
+}
+```
+
+## Capabilities
+
+The OpenAI Live LLM supports:
+
+- Real-time text chat
+- Audio streaming (if configured)
+- Tool calling (server-side execution)
+- Ephemeral key generation for secure sessions
+
+## API Reference
+
+### LiveLLM Methods
+// Get an ephemeral key 
+// Usually this code is run on the server and the ephemeral key is passed to the
+// client - the ephemeral key can be securely used on the client side
+
+#### `connect(config?: LiveConnectConfig)`
+
+Creates a new live session.
+
+```typescript
+interface LiveConnectConfig {
+  systemInstruction?: string;
+  tools?: BaseTool[];
+  audioConfig?: AudioConfig;
+  responseModality?: ModalityType[];
+}
+```
+
+#### `getEphemeralKey()`
+
+Gets a temporary key for the session.
+
+### LiveLLMSession Methods
+
+#### `sendMessage(message: ChatMessage)`
+
+Sends a message to the assistant.
+
+```typescript
+interface ChatMessage {
+  content: string | MessageContentDetail[];
+  role: "user" | "assistant";
+}
+```
+
+#### `disconnect()`
+
+Closes the session and cleans up resources.
+
+## Error Handling
+
+```typescript
+try {
+  const session = await llm.live.connect();
+} catch (error) {
+  if (error instanceof Error) {
+    console.error("Connection failed:", error.message);
+  }
+}
+```
+
+## Best Practices
+
+1. **Tool Definition**
+
+   - Keep tool implementations server-side
+   - Use clear descriptions for tools
+   - Handle tool errors gracefully
+
+2. **Session Management**
+
+   - Always disconnect sessions when done
+   - Clean up audio resources
+   - Handle reconnection scenarios
+
+3. **Security**
+   - Use ephemeral keys for sessions
+   - Validate tool inputs
+   - Secure API key handling
@@ -11,6 +11,7 @@ A retriever in LlamaIndex is what is used to fetch `Node`s from an index using a
 - [KeywordTableLLMRetriever](/docs/api/classes/KeywordTableLLMRetriever) uses an LLM to extract keywords from the query and retrieve relevant nodes based on keyword matches.
 - [KeywordTableSimpleRetriever](/docs/api/classes/KeywordTableSimpleRetriever) uses a basic frequency-based approach to extract keywords and retrieve nodes.
 - [KeywordTableRAKERetriever](/docs/api/classes/KeywordTableRAKERetriever) uses the RAKE (Rapid Automatic Keyword Extraction) algorithm to extract keywords from the query, focusing on co-occurrence and context for keyword-based retrieval.
+- [Bm25Retriever](/docs/api/classes/Bm25Retriever) uses the BM25 algorithm to extract keywords from the query and retrieve relevant nodes based on keyword matches.

 ```typescript
 const retriever = vectorIndex.asRetriever({
@@ -1,3 +1,3 @@
 {
-  "pages": ["llamaindex", "api", "llamaflow", "chat-ui"]
+  "pages": ["llamaindex", "api", "workflows", "chat-ui"]
 }
@@ -4,7 +4,7 @@
  "tasks": {
    "build": {
      "inputs": [
-        "node_modules/@llama-flow/docs/**",
+        "node_modules/@llamaindex/workflow-docs/**",
        "node_modules/@llamaindex/chat-ui-docs/**",
        "src/**/*.ts",
        "src/**/*.tsx",
@@ -1,5 +1,50 @@
 # @llamaindex/cloudflare-worker-agent-test

+## 0.0.173
+
+### Patch Changes
+
+- Updated dependencies [515a8b9]
+  - llamaindex@0.11.12
+
+## 0.0.172
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+
+## 0.0.171
+
+### Patch Changes
+
+- llamaindex@0.11.10
+
+## 0.0.170
+
+### Patch Changes
+
+- llamaindex@0.11.9
+
+## 0.0.169
+
+### Patch Changes
+
+- llamaindex@0.11.8
+
+## 0.0.168
+
+### Patch Changes
+
+- Updated dependencies [3c857f4]
+  - llamaindex@0.11.7
+
+## 0.0.167
+
+### Patch Changes
+
+- llamaindex@0.11.6
+
 ## 0.0.166

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/cloudflare-worker-agent-test",
-  "version": "0.0.166",
+  "version": "0.0.173",
  "type": "module",
  "private": true,
  "scripts": {
@@ -1,5 +1,29 @@
 # @llamaindex/llama-parse-browser-test

+## 0.0.72
+
+### Patch Changes
+
+- @llamaindex/cloud@4.0.17
+
+## 0.0.71
+
+### Patch Changes
+
+- @llamaindex/cloud@4.0.16
+
+## 0.0.70
+
+### Patch Changes
+
+- @llamaindex/cloud@4.0.15
+
+## 0.0.69
+
+### Patch Changes
+
+- @llamaindex/cloud@4.0.14
+
 ## 0.0.68

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/llama-parse-browser-test",
  "private": true,
-  "version": "0.0.68",
+  "version": "0.0.72",
  "type": "module",
  "scripts": {
    "dev": "vite",
@@ -1,5 +1,50 @@
 # @llamaindex/next-agent-test

+## 0.1.173
+
+### Patch Changes
+
+- Updated dependencies [515a8b9]
+  - llamaindex@0.11.12
+
+## 0.1.172
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+
+## 0.1.171
+
+### Patch Changes
+
+- llamaindex@0.11.10
+
+## 0.1.170
+
+### Patch Changes
+
+- llamaindex@0.11.9
+
+## 0.1.169
+
+### Patch Changes
+
+- llamaindex@0.11.8
+
+## 0.1.168
+
+### Patch Changes
+
+- Updated dependencies [3c857f4]
+  - llamaindex@0.11.7
+
+## 0.1.167
+
+### Patch Changes
+
+- llamaindex@0.11.6
+
 ## 0.1.166

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/next-agent-test",
-  "version": "0.1.166",
+  "version": "0.1.173",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,50 @@
 # test-edge-runtime

+## 0.1.172
+
+### Patch Changes
+
+- Updated dependencies [515a8b9]
+  - llamaindex@0.11.12
+
+## 0.1.171
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+
+## 0.1.170
+
+### Patch Changes
+
+- llamaindex@0.11.10
+
+## 0.1.169
+
+### Patch Changes
+
+- llamaindex@0.11.9
+
+## 0.1.168
+
+### Patch Changes
+
+- llamaindex@0.11.8
+
+## 0.1.167
+
+### Patch Changes
+
+- Updated dependencies [3c857f4]
+  - llamaindex@0.11.7
+
+## 0.1.166
+
+### Patch Changes
+
+- llamaindex@0.11.6
+
 ## 0.1.165

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/nextjs-edge-runtime-test",
-  "version": "0.1.165",
+  "version": "0.1.172",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,65 @@
 # @llamaindex/next-node-runtime

+## 0.1.41
+
+### Patch Changes
+
+- Updated dependencies [515a8b9]
+  - llamaindex@0.11.12
+  - @llamaindex/huggingface@0.1.17
+  - @llamaindex/readers@3.1.12
+
+## 0.1.40
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+  - @llamaindex/huggingface@0.1.16
+  - @llamaindex/readers@3.1.11
+
+## 0.1.39
+
+### Patch Changes
+
+- llamaindex@0.11.10
+
+## 0.1.38
+
+### Patch Changes
+
+- Updated dependencies [c5846bd]
+  - @llamaindex/readers@3.1.10
+
+## 0.1.37
+
+### Patch Changes
+
+- llamaindex@0.11.9
+- @llamaindex/huggingface@0.1.15
+- @llamaindex/readers@3.1.9
+
+## 0.1.36
+
+### Patch Changes
+
+- llamaindex@0.11.8
+- @llamaindex/huggingface@0.1.14
+- @llamaindex/readers@3.1.8
+
+## 0.1.35
+
+### Patch Changes
+
+- Updated dependencies [3c857f4]
+  - llamaindex@0.11.7
+
+## 0.1.34
+
+### Patch Changes
+
+- llamaindex@0.11.6
+
 ## 0.1.33

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/next-node-runtime-test",
-  "version": "0.1.33",
+  "version": "0.1.41",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,50 @@
 # vite-import-llamaindex

+## 0.0.39
+
+### Patch Changes
+
+- Updated dependencies [515a8b9]
+  - llamaindex@0.11.12
+
+## 0.0.38
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+
+## 0.0.37
+
+### Patch Changes
+
+- llamaindex@0.11.10
+
+## 0.0.36
+
+### Patch Changes
+
+- llamaindex@0.11.9
+
+## 0.0.35
+
+### Patch Changes
+
+- llamaindex@0.11.8
+
+## 0.0.34
+
+### Patch Changes
+
+- Updated dependencies [3c857f4]
+  - llamaindex@0.11.7
+
+## 0.0.33
+
+### Patch Changes
+
+- llamaindex@0.11.6
+
 ## 0.0.32

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "vite-import-llamaindex",
  "private": true,
-  "version": "0.0.32",
+  "version": "0.0.39",
  "type": "module",
  "scripts": {
    "build": "vite build",
@@ -1 +1,9 @@
-{"root":["./src/main.ts","./vite.config.ts"],"version":"5.7.3"}
+{
+  "root": [
+    "./src/main.ts",
+    "./vite.config.ts",
+    "./tsconfig.json"
+  ],
+  "errors": true,
+  "version": "5.7.3"
+}
@@ -1,5 +1,50 @@
 # @llamaindex/waku-query-engine-test

+## 0.0.173
+
+### Patch Changes
+
+- Updated dependencies [515a8b9]
+  - llamaindex@0.11.12
+
+## 0.0.172
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+
+## 0.0.171
+
+### Patch Changes
+
+- llamaindex@0.11.10
+
+## 0.0.170
+
+### Patch Changes
+
+- llamaindex@0.11.9
+
+## 0.0.169
+
+### Patch Changes
+
+- llamaindex@0.11.8
+
+## 0.0.168
+
+### Patch Changes
+
+- Updated dependencies [3c857f4]
+  - llamaindex@0.11.7
+
+## 0.0.167
+
+### Patch Changes
+
+- llamaindex@0.11.6
+
 ## 0.0.166

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/waku-query-engine-test",
-  "version": "0.0.166",
+  "version": "0.0.173",
  "type": "module",
  "private": true,
  "scripts": {
@@ -10,7 +10,7 @@ import { mockLLMEvent } from "./utils.js";
 let llm: LLM;
 beforeEach(async () => {
  Settings.llm = new Anthropic({
-    model: "claude-3-opus",
+    model: "claude-3.5-sonnet",
  });
  llm = Settings.llm;
 });
@@ -7,7 +7,7 @@
  "dependencies": {
    "@llamaindex/workflow": "1.1.1",
    "llamaindex": "0.10.5",
-    "zod": "^3.23.8"
+    "zod": "^3.25.67"
  },
  "devDependencies": {
    "tsx": "^4.19.1",
@@ -27,6 +27,6 @@
    "pg": "^8.12.0",
    "pgvector": "0.2.0",
    "tsx": "^4.19.3",
-    "zod": "^3.24.2"
+    "zod": "^3.25.67"
  }
 }
@@ -1,5 +1,245 @@
 # examples

+## 0.3.26
+
+### Patch Changes
+
+- Updated dependencies [d578889]
+- Updated dependencies [0fcc92f]
+- Updated dependencies [515a8b9]
+- Updated dependencies [3cd8a57]
+- Updated dependencies [f2dfd30]
+  - @llamaindex/core@0.6.13
+  - llamaindex@0.11.12
+  - @llamaindex/tools@0.1.3
+  - @llamaindex/bm25-retriever@0.0.2
+  - @llamaindex/cloud@4.0.17
+  - @llamaindex/node-parser@2.0.13
+  - @llamaindex/anthropic@0.3.15
+  - @llamaindex/assemblyai@0.1.12
+  - @llamaindex/clip@0.0.63
+  - @llamaindex/cohere@0.0.27
+  - @llamaindex/deepinfra@0.0.63
+  - @llamaindex/discord@0.1.12
+  - @llamaindex/google@0.3.12
+  - @llamaindex/huggingface@0.1.17
+  - @llamaindex/jinaai@0.0.23
+  - @llamaindex/mistral@0.1.13
+  - @llamaindex/mixedbread@0.0.27
+  - @llamaindex/notion@0.1.12
+  - @llamaindex/ollama@0.1.13
+  - @llamaindex/openai@0.4.7
+  - @llamaindex/perplexity@0.0.20
+  - @llamaindex/portkey-ai@0.0.55
+  - @llamaindex/replicate@0.0.55
+  - @llamaindex/astra@0.0.27
+  - @llamaindex/azure@0.1.24
+  - @llamaindex/chroma@0.0.27
+  - @llamaindex/elastic-search@0.1.13
+  - @llamaindex/firestore@1.0.20
+  - @llamaindex/milvus@0.1.22
+  - @llamaindex/mongodb@0.0.28
+  - @llamaindex/pinecone@0.1.13
+  - @llamaindex/postgres@0.0.56
+  - @llamaindex/qdrant@0.1.23
+  - @llamaindex/supabase@0.1.13
+  - @llamaindex/upstash@0.0.27
+  - @llamaindex/weaviate@0.0.28
+  - @llamaindex/vercel@0.1.13
+  - @llamaindex/voyage-ai@1.0.19
+  - @llamaindex/readers@3.1.12
+  - @llamaindex/workflow@1.1.13
+  - @llamaindex/deepseek@0.0.23
+  - @llamaindex/fireworks@0.0.23
+  - @llamaindex/groq@0.0.78
+  - @llamaindex/together@0.0.23
+  - @llamaindex/vllm@0.0.49
+  - @llamaindex/xai@0.0.10
+
+## 0.3.25
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+  - @llamaindex/core@0.6.12
+  - @llamaindex/google@0.3.11
+  - @llamaindex/cloud@4.0.16
+  - @llamaindex/node-parser@2.0.12
+  - @llamaindex/anthropic@0.3.14
+  - @llamaindex/assemblyai@0.1.11
+  - @llamaindex/clip@0.0.62
+  - @llamaindex/cohere@0.0.26
+  - @llamaindex/deepinfra@0.0.62
+  - @llamaindex/discord@0.1.11
+  - @llamaindex/huggingface@0.1.16
+  - @llamaindex/jinaai@0.0.22
+  - @llamaindex/mistral@0.1.12
+  - @llamaindex/mixedbread@0.0.26
+  - @llamaindex/notion@0.1.11
+  - @llamaindex/ollama@0.1.12
+  - @llamaindex/openai@0.4.6
+  - @llamaindex/perplexity@0.0.19
+  - @llamaindex/portkey-ai@0.0.54
+  - @llamaindex/replicate@0.0.54
+  - @llamaindex/astra@0.0.26
+  - @llamaindex/azure@0.1.23
+  - @llamaindex/chroma@0.0.26
+  - @llamaindex/elastic-search@0.1.12
+  - @llamaindex/firestore@1.0.19
+  - @llamaindex/milvus@0.1.21
+  - @llamaindex/mongodb@0.0.27
+  - @llamaindex/pinecone@0.1.12
+  - @llamaindex/postgres@0.0.55
+  - @llamaindex/qdrant@0.1.22
+  - @llamaindex/supabase@0.1.12
+  - @llamaindex/upstash@0.0.26
+  - @llamaindex/weaviate@0.0.27
+  - @llamaindex/vercel@0.1.12
+  - @llamaindex/voyage-ai@1.0.18
+  - @llamaindex/readers@3.1.11
+  - @llamaindex/tools@0.1.1
+  - @llamaindex/workflow@1.1.12
+  - @llamaindex/deepseek@0.0.22
+  - @llamaindex/fireworks@0.0.22
+  - @llamaindex/groq@0.0.77
+  - @llamaindex/together@0.0.22
+  - @llamaindex/vllm@0.0.48
+  - @llamaindex/xai@0.0.9
+
+## 0.3.24
+
+### Patch Changes
+
+- Updated dependencies [096bf2b]
+- Updated dependencies [c5846bd]
+  - @llamaindex/tools@0.1.0
+  - @llamaindex/readers@3.1.10
+
+## 0.3.23
+
+### Patch Changes
+
+- Updated dependencies [a89e187]
+- Updated dependencies [62699b7]
+- Updated dependencies [c5b2691]
+- Updated dependencies [d8ac8d3]
+  - @llamaindex/core@0.6.11
+  - @llamaindex/google@0.3.10
+  - @llamaindex/openai@0.4.5
+  - @llamaindex/cloud@4.0.15
+  - llamaindex@0.11.9
+  - @llamaindex/node-parser@2.0.11
+  - @llamaindex/anthropic@0.3.13
+  - @llamaindex/assemblyai@0.1.10
+  - @llamaindex/clip@0.0.61
+  - @llamaindex/cohere@0.0.25
+  - @llamaindex/deepinfra@0.0.61
+  - @llamaindex/discord@0.1.10
+  - @llamaindex/huggingface@0.1.15
+  - @llamaindex/jinaai@0.0.21
+  - @llamaindex/mistral@0.1.11
+  - @llamaindex/mixedbread@0.0.25
+  - @llamaindex/notion@0.1.10
+  - @llamaindex/ollama@0.1.11
+  - @llamaindex/perplexity@0.0.18
+  - @llamaindex/portkey-ai@0.0.53
+  - @llamaindex/replicate@0.0.53
+  - @llamaindex/astra@0.0.25
+  - @llamaindex/azure@0.1.22
+  - @llamaindex/chroma@0.0.25
+  - @llamaindex/elastic-search@0.1.11
+  - @llamaindex/firestore@1.0.18
+  - @llamaindex/milvus@0.1.20
+  - @llamaindex/mongodb@0.0.26
+  - @llamaindex/pinecone@0.1.11
+  - @llamaindex/postgres@0.0.54
+  - @llamaindex/qdrant@0.1.21
+  - @llamaindex/supabase@0.1.10
+  - @llamaindex/upstash@0.0.25
+  - @llamaindex/weaviate@0.0.26
+  - @llamaindex/vercel@0.1.11
+  - @llamaindex/voyage-ai@1.0.17
+  - @llamaindex/readers@3.1.9
+  - @llamaindex/tools@0.0.17
+  - @llamaindex/workflow@1.1.10
+  - @llamaindex/deepseek@0.0.21
+  - @llamaindex/fireworks@0.0.21
+  - @llamaindex/groq@0.0.76
+  - @llamaindex/together@0.0.21
+  - @llamaindex/vllm@0.0.47
+  - @llamaindex/xai@0.0.8
+
+## 0.3.22
+
+### Patch Changes
+
+- Updated dependencies [8a51c16]
+- Updated dependencies [1b5af14]
+  - @llamaindex/workflow@1.1.9
+  - @llamaindex/core@0.6.10
+  - llamaindex@0.11.8
+  - @llamaindex/cloud@4.0.14
+  - @llamaindex/node-parser@2.0.10
+  - @llamaindex/anthropic@0.3.12
+  - @llamaindex/assemblyai@0.1.9
+  - @llamaindex/clip@0.0.60
+  - @llamaindex/cohere@0.0.24
+  - @llamaindex/deepinfra@0.0.60
+  - @llamaindex/discord@0.1.9
+  - @llamaindex/google@0.3.9
+  - @llamaindex/huggingface@0.1.14
+  - @llamaindex/jinaai@0.0.20
+  - @llamaindex/mistral@0.1.10
+  - @llamaindex/mixedbread@0.0.24
+  - @llamaindex/notion@0.1.9
+  - @llamaindex/ollama@0.1.10
+  - @llamaindex/openai@0.4.4
+  - @llamaindex/perplexity@0.0.17
+  - @llamaindex/portkey-ai@0.0.52
+  - @llamaindex/replicate@0.0.52
+  - @llamaindex/astra@0.0.24
+  - @llamaindex/azure@0.1.21
+  - @llamaindex/chroma@0.0.24
+  - @llamaindex/elastic-search@0.1.10
+  - @llamaindex/firestore@1.0.17
+  - @llamaindex/milvus@0.1.19
+  - @llamaindex/mongodb@0.0.25
+  - @llamaindex/pinecone@0.1.10
+  - @llamaindex/postgres@0.0.53
+  - @llamaindex/qdrant@0.1.20
+  - @llamaindex/supabase@0.1.9
+  - @llamaindex/upstash@0.0.24
+  - @llamaindex/weaviate@0.0.25
+  - @llamaindex/vercel@0.1.10
+  - @llamaindex/voyage-ai@1.0.16
+  - @llamaindex/readers@3.1.8
+  - @llamaindex/tools@0.0.16
+  - @llamaindex/deepseek@0.0.20
+  - @llamaindex/fireworks@0.0.20
+  - @llamaindex/groq@0.0.75
+  - @llamaindex/together@0.0.20
+  - @llamaindex/vllm@0.0.46
+  - @llamaindex/xai@0.0.7
+
+## 0.3.21
+
+### Patch Changes
+
+- Updated dependencies [dbd857f]
+- Updated dependencies [3c857f4]
+  - @llamaindex/workflow@1.1.8
+  - llamaindex@0.11.7
+  - @llamaindex/tools@0.0.15
+
+## 0.3.20
+
+### Patch Changes
+
+- Updated dependencies [e7484ef]
+  - @llamaindex/weaviate@0.0.24
+
 ## 0.3.19

 ### Patch Changes
@@ -1,4 +1,3 @@
-import { tool } from "@llamaindex/core/tools";
 import { openai } from "@llamaindex/openai";
 import {
  agent,
@@ -7,6 +6,7 @@ import {
  multiAgent,
 } from "@llamaindex/workflow";
 import fs from "fs";
+import { tool } from "llamaindex";
 import os from "os";
 import { z } from "zod";

@@ -6,15 +6,24 @@ async function main() {
  // Create an MCP server for filesystem tools
  const server = mcp({
    command: "npx",
-    args: ["-y", "@modelcontextprotocol/server-filesystem", "."],
+    args: ["-y", "@modelcontextprotocol/server-filesystem@latest", "."],
    verbose: true,
  });
-  // You can also connect to the MCP server using SSE
-  // See: https://modelcontextprotocol.io/docs/concepts/transports#server-sent-events-sse
+  //
+  // You can also connect to a remote MCP server using:
+  // 1. StreamableHTTP transport (recommended)
+  // See: https://modelcontextprotocol.io/docs/concepts/transports#streamable-http
  // const server = mcp({
  //   url: "http://localhost:8000/mcp",
  //   verbose: true,
  // });
+  // 2.Or using SSE transport (will be deprecated soon)
+  // See: https://modelcontextprotocol.io/docs/concepts/transports#server-sent-events-sse-deprecated
+  // const server = mcp({
+  //   url: "http://localhost:8000/mcp",
+  //   useSSETransport: true,
+  //   verbose: true,
+  // });

  try {
    // Create an agent that uses the MCP tools
@@ -0,0 +1,36 @@
+import { openai } from "@llamaindex/openai";
+import { agent } from "@llamaindex/workflow";
+import { createMemory, staticBlock } from "llamaindex";
+
+// Simple example: Agent with Predefined Memory
+async function simpleAgentMemoryExample() {
+  console.log("=== Simple Agent Memory Example ===");
+
+  const memory = createMemory({
+    memoryBlocks: [
+      staticBlock({
+        content:
+          "The user is a software engineer who loves TypeScript and LlamaIndex.",
+      }),
+    ],
+  });
+
+  // Create agent workflow
+  const workflow = agent({
+    name: "assistant",
+    llm: openai({ model: "gpt-4.1-nano" }),
+    memory,
+  });
+
+  // Test - agent should remember John and the shopping cart context
+  console.log("\n--- Testing Memory Context ---");
+  const result = await workflow.run("Hi, my name is John. Do you know me?");
+
+  console.log("Assistant Response:", result.data.result);
+
+  const result2 = await workflow.run("What is my name?");
+  console.log("Assistant Response:", result2.data.result);
+}
+
+// Run the example
+simpleAgentMemoryExample().catch(console.error);
@@ -0,0 +1,58 @@
+import { openai } from "@llamaindex/openai";
+import { createMemory } from "llamaindex";
+
+// Example: Basic Memory Usage with Factory
+async function basicMemoryExample() {
+  console.log("\n=== Example: Basic Memory Usage with Factory ===");
+
+  const memory = createMemory({ tokenLimit: 30 });
+
+  // Add messages to memory
+  await memory.add({
+    role: "user",
+    content: "Hi, my name is John and I'm a software engineer.",
+  });
+
+  await memory.add({
+    role: "assistant",
+    content: "Hello John! Nice to meet you. How can I help you today?",
+  });
+
+  await memory.add({
+    role: "user",
+    content: "I love working with TypeScript and React.",
+  });
+  // Not all messages are included because of token limit is set to 30
+  const llmMessages = await memory.getLLM();
+  console.log(
+    `\nLLM messages (${llmMessages.length} messages) limited by a small token limit:`,
+  );
+  llmMessages.forEach((msg, idx) => {
+    console.log(`${idx + 1}. ${msg.role}: ${msg.content}`);
+  });
+
+  // But the token limit above will be the window size of an LLM instance if you use getLLM with LLM
+  const llm = openai({ model: "gpt-4.1-mini" });
+  const llmMessagesWithLLM = await memory.getLLM(llm);
+  // Now all the messages are included because of the LLM window size of the model is much larger
+  console.log(
+    `\nLLM messages with LLM (${llmMessagesWithLLM.length} messages) limited by LLM window size:`,
+  );
+  llmMessagesWithLLM.forEach((msg, idx) => {
+    console.log(`${idx + 1}. ${msg.role}: ${msg.content}`);
+  });
+}
+
+// Main function
+async function main() {
+  console.log("🧠 Basic Memory Factory Examples");
+  console.log("===============================");
+
+  try {
+    await basicMemoryExample();
+  } catch (error) {
+    console.error("Error running basic memory examples:", error);
+  }
+}
+
+main().catch(console.error);
@@ -0,0 +1,101 @@
+import { openai } from "@llamaindex/openai";
+import { createMemory, factExtractionBlock } from "llamaindex";
+
+// Configure OpenAI
+const llm = openai({ model: "gpt-4.1-mini" });
+
+// Example: Memory with Fact Extraction
+async function factExtractionMemoryExample() {
+  console.log("\n=== Memory with Fact Extraction ===");
+
+  // Create memory with a fact extraction
+  const memory = createMemory([], {
+    tokenLimit: 100,
+    shortTermTokenLimitRatio: 0.7, // 70% for short-term, 30% for long-term
+    memoryBlocks: [
+      factExtractionBlock({
+        id: "user-facts",
+        priority: 5,
+        llm: llm,
+        maxFacts: 10,
+        isLongTerm: true,
+      }),
+    ],
+  });
+
+  // Simulate a conversation with facts
+  const conversationTurns = [
+    {
+      role: "user",
+      content: "Hi, I'm Sarah and I work as a data scientist at Google.",
+    },
+    {
+      role: "assistant",
+      content:
+        "Hello Sarah! It's great to meet you. Data science at Google must be exciting!",
+    },
+    {
+      role: "user",
+      content:
+        "Yes, I specialize in machine learning and natural language processing.",
+    },
+    {
+      role: "assistant",
+      content: "That's impressive! ML and NLP are fascinating fields.",
+    },
+    {
+      role: "user",
+      content:
+        "I have a PhD in Computer Science from Stanford, and I love hiking on weekends.",
+    },
+    {
+      role: "assistant",
+      content:
+        "Wow, Stanford PhD! And hiking is a great way to unwind from tech work.",
+    },
+    {
+      role: "user",
+      content: "I also have two cats named Whiskers and Mittens.",
+    },
+    {
+      role: "assistant",
+      content:
+        "Cats make wonderful companions! Whiskers and Mittens are cute names.",
+    },
+  ];
+
+  // Add conversation turns to memory
+  console.log("Adding conversation to memory...");
+  for (const turn of conversationTurns) {
+    await memory.add(turn);
+  }
+
+  // Get messages - facts should be extracted and included
+  const messages = await memory.getLLM(llm);
+  console.log("\nMessages with extracted facts:");
+  messages.forEach((msg, idx) => {
+    console.log(`${idx + 1}. ${msg.role ?? "unknown"}: ${msg.content}`);
+  });
+  //Messages with extracted facts:
+  // 1. assistant: Cats make wonderful companions! Whiskers and Mittens are cute names.
+  // 2. user: I also have two cats named Whiskers and Mittens.
+  // 3. assistant: Wow, Stanford PhD! And hiking is a great way to unwind from tech work.
+  // 4. memory: Sarah works as a data scientist at Google
+  // Sarah specializes in machine learning and natural language processing
+  // Sarah has a PhD in Computer Science from Stanford
+  // Sarah enjoys hiking on weekends
+}
+
+// Main function
+async function main() {
+  console.log("🧠 Fact Extraction Memory Example");
+  console.log("=================================");
+
+  try {
+    await factExtractionMemoryExample();
+  } catch (error) {
+    console.error("Error running fact extraction memory example:", error);
+  }
+}
+
+main().catch(console.error);
@@ -0,0 +1,62 @@
+import { openai } from "@llamaindex/openai";
+import { createMemory, staticBlock } from "llamaindex";
+
+// Configure OpenAI
+const llm = openai({ model: "gpt-4.1-mini" });
+
+// Example: Memory with Static Blocks
+async function staticMemoryBlockExample() {
+  console.log("\n=== Memory with Static Blocks ===");
+  console.log("- Memory always include static block");
+  console.log("- Memory cut off the messages within token limit\n");
+
+  // Create memory with a static block
+  const memory = createMemory([], {
+    tokenLimit: 30, // A small token limit which is not enough for the whole conversation below
+    memoryBlocks: [
+      staticBlock({
+        content:
+          "The user's name is John and he is a software engineer who loves TypeScript and LlamaIndex.",
+      }),
+    ],
+  });
+
+  // Add some messages to the memory
+  await memory.add({
+    role: "user",
+    content: "What do you know about me?",
+  });
+
+  await memory.add({
+    role: "assistant",
+    content:
+      "Based on our conversation, I know you're John, a software engineer who enjoys working with TypeScript and LlamaIndex!",
+  });
+
+  await memory.add({
+    role: "user",
+    content: "Which language does LlamaIndex support?",
+  });
+
+  // Get messages
+  // static block will always be included
+  // only the last message will be included because of token limit set above
+  const messages = await memory.getLLM(llm);
+  messages.forEach((msg, idx) => {
+    console.log(`${idx + 1}. ${msg.role}: ${msg.content}`);
+  });
+  // Messages with static block:
+  // 1. user: The user's name is John and he is a software engineer who loves TypeScript and LlamaIndex.
+  // 2. user: Which language does LlamaIndex support?
+}
+
+// Main function
+async function main() {
+  try {
+    await staticMemoryBlockExample();
+  } catch (error) {
+    console.error("Error running static memory blocks example:", error);
+  }
+}
+
+main().catch(console.error);
@@ -0,0 +1,130 @@
+import { ToolCallLLM } from "llamaindex";
+
+import {
+  agentHandler,
+  createWorkflow,
+  workflowEvent,
+  zodEvent,
+} from "@llamaindex/workflow";
+
+import { openai } from "@llamaindex/openai";
+import { z } from "zod";
+
+// ===== 1. Define events =====
+// An event to trigger the workflow
+const planEvent = workflowEvent<{ topic: string }>();
+
+// Generate artifact event
+const ArtifactRequirementSchema = z.object({
+  type: z.literal("markdown"),
+  title: z.string().describe("The title of the artifact."),
+  requirement: z
+    .string()
+    .describe("The requirement for the artifact generation."),
+});
+
+const generateArtifactEvent = zodEvent(ArtifactRequirementSchema, {
+  debugLabel: "generateArtifactEvent",
+});
+
+// Artifact output event
+const ArtifactSchema = z.object({
+  type: z.literal("artifact"),
+  data: z.object({
+    type: z.literal("document"),
+    data: z.object({
+      title: z.string().describe("The title of the data."),
+      content: z.string().describe("The content of the data."),
+      type: z.enum(["markdown", "html"]).describe("The type of the data."),
+    }),
+  }),
+});
+const outputArtifactEvent = zodEvent(ArtifactSchema, {
+  debugLabel: "outputArtifactEvent",
+});
+
+// Events for updating UI
+// assume that we have a UI that can render different states of the workflow
+// and update the UI based on the state and the requirement
+export const UIEventSchema = z.object({
+  type: z.literal("ui_event"),
+  data: z.object({
+    state: z
+      .enum(["plan", "generate", "completed"])
+      .describe("The current state of the workflow."),
+    requirement: z
+      .string()
+      .optional()
+      .describe(
+        "An optional requirement creating or updating a document, if applicable.",
+      ),
+  }),
+});
+const uiEvent = zodEvent(UIEventSchema, { debugLabel: "uiEvent" });
+
+// ===== 2. Define workflow with agents using natural language =====
+// We have a document artifact workflow that made up of 2 steps:
+// 1. Generate requirement for the document
+// 2. Generate document content based on the requirement
+export function createDocumentArtifactWorkflow(llm: ToolCallLLM) {
+  const workflow = createWorkflow();
+
+  // Generate requirement for the document
+  workflow.handle(
+    [planEvent],
+    agentHandler({
+      instructions: `
+Your task is to analyze the request and provide requirements for document generation or update.
+1. Send an uiEvent with the \`plan\` to show UI what you are going to do.
+2. Analyze the conversation history and the user's request carefully to determine the completed tasks and the next steps.
+3. Return the generateArtifactEvent with the requirement for the next step of the document generation or update.
+`,
+      results: [generateArtifactEvent],
+      events: [uiEvent],
+      llm,
+    }),
+  );
+
+  // Generate document content based on the requirement
+  workflow.handle(
+    [generateArtifactEvent],
+    agentHandler({
+      instructions: `
+You are a skilled technical writer who can assist users with documentation.
+Your task is to generate document content based on the requirement and update the UI state.
+
+Here are the steps to handle this task:
+1. First, send an uiEvent with the \`generate\` state and the requirement you received from the input.
+2. Next, start generating the content based on the requirement then send an uiEvent with the \`completed\` state to update the state.
+3. Finally, return the outputArtifactEvent with the document values.
+`,
+      results: [outputArtifactEvent],
+      events: [uiEvent],
+      llm,
+    }),
+  );
+
+  return workflow;
+}
+
+async function main() {
+  const llm = openai({ model: "gpt-4.1-mini" });
+  const workflow = createDocumentArtifactWorkflow(llm);
+  const { stream, sendEvent } = workflow.createContext();
+
+  // Ask the workflow to generate a document about `llama`
+  sendEvent(planEvent.with({ topic: "llama" }));
+
+  await stream.until(outputArtifactEvent).forEach((event) => {
+    if (planEvent.include(event)) {
+      console.log("Starting workflow: ", event.data);
+    }
+    if (uiEvent.include(event)) {
+      console.log("UI event: ", event.data);
+    } else if (outputArtifactEvent.include(event)) {
+      console.log("Output artifact event: ", event.data);
+    }
+  });
+}
+
+main();
@@ -0,0 +1,93 @@
+import { openai } from "@llamaindex/openai";
+import { agentHandler, createWorkflow, zodEvent } from "@llamaindex/workflow";
+import { Settings } from "llamaindex";
+import { z } from "zod";
+
+// Create LLM instance
+const llm = openai({ model: "gpt-4.1-mini" });
+Settings.llm = llm;
+
+// Define our workflow events
+const writeJokeSchema = z.object({
+  description: z
+    .string()
+    .describe("The topic to write a joke or describe the joke to improve."),
+  writtenJoke: z.optional(z.string()).describe("The written joke."),
+  retriedTimes: z
+    .number()
+    .default(0)
+    .describe(
+      "The retried times for writing the joke. Always increase this from the input retriedTimes.",
+    ),
+});
+
+const critiqueSchema = z.object({
+  joke: z.string().describe("The joke to critique"),
+  retriedTimes: z.number().describe("The retried times for writing the joke."),
+});
+
+const finalResultSchema = z.object({
+  joke: z.string().describe("The joke to critique"),
+  critique: z.string().describe("The critique of the joke"),
+});
+
+const writeJokeEvent = zodEvent(writeJokeSchema, {
+  debugLabel: "writeJokeEvent",
+}); // Input topic for writing a joke
+const critiqueEvent = zodEvent(critiqueSchema, {
+  debugLabel: "critiqueEvent",
+}); // Ask for critique of the joke
+const finalResultEvent = zodEvent(finalResultSchema, {
+  debugLabel: "finalResultEvent",
+}); // Final result
+
+// Create our workflow
+const jokeFlow = createWorkflow();
+
+// Define handlers for each step
+// This step always write a joke based on the description
+jokeFlow.handle(
+  [writeJokeEvent],
+  agentHandler({
+    instructions: `You are a joke writer. You are given a topic and you need to write a joke about it.`,
+    results: [critiqueEvent],
+  }),
+);
+
+// This step critiques the joke and asks the writer to improve the joke or send a final result event for stopping.
+jokeFlow.handle(
+  [critiqueEvent],
+  agentHandler({
+    instructions: `
+You are given a joke and you need to critique it. Follow the following guidelines:
+1. You have maximum 3 times to improve the joke.
+2. If the joke is not good, increase the retriedTimes, describe how to improve the joke and send a writeJokeEvent.
+3. If the joke is good, trigger the finalResultEvent event.
+`,
+    results: [writeJokeEvent, finalResultEvent],
+  }),
+);
+
+// Usage
+async function main() {
+  const { stream, sendEvent } = jokeFlow.createContext();
+  sendEvent(writeJokeEvent.with({ description: "write a joke about llama" }));
+
+  await stream.until(finalResultEvent).forEach((event) => {
+    if (writeJokeEvent.include(event)) {
+      console.log(
+        "Triggering write joke: ",
+        JSON.stringify(event.data, null, 2),
+      );
+    } else if (critiqueEvent.include(event)) {
+      console.log("Written joke:  ", JSON.stringify(event.data, null, 2));
+    } else if (finalResultEvent.include(event)) {
+      console.log("Output: ", JSON.stringify(event.data, null, 2));
+    } else {
+      console.log("Unknown event: ", JSON.stringify(event.data, null, 2));
+    }
+  });
+  console.log("Done");
+}
+
+main().catch(console.error);
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<company name="MidSizeCorp" founded="2008">
+  <division name="Engineering" head="Dana White">
+    <department name="Frontend" lead="Alex Kim">
+      <team name="Web">
+        <employee id="E01">
+          <name>Jordan Lee</name>
+          <role>Lead Developer</role>
+          <projects>
+            <project code="PRJ101" status="active">
+              <title>User Portal</title>
+              <deadline>2025-08-01</deadline>
+              <tasks>
+                <task id="T1011">
+                  <description>Implement login page</description>
+                  <due>2025-05-10</due>
+                </task>
+                <task id="T1012">
+                  <description>Design dashboard</description>
+                  <due>2025-05-20</due>
+                </task>
+              </tasks>
+            </project>
+          </projects>
+        </employee>
+        <employee id="E02">
+          <name>Riley Chen</name>
+          <role>UI Designer</role>
+        </employee>
+      </team>
+      <team name="Mobile">
+        <employee id="E03">
+          <name>Sam Patel</name>
+          <role>iOS Developer</role>
+        </employee>
+      </team>
+    </department>
+    <department name="Backend" lead="Morgan Reed">
+      <team name="API">
+        <employee id="E04">
+          <name>Taylor Jones</name>
+          <role>API Engineer</role>
+        </employee>
+      </team>
+      <team name="Database">
+        <employee id="E05">
+          <name>Casey Nguyen</name>
+          <role>DB Administrator</role>
+        </employee>
+      </team>
+    </department>
+  </division>
+
+  <division name="Marketing" head="Pat Morgan">
+    <department name="Digital" lead="Alex Rivera">
+      <team name="Content">
+        <employee id="M01">
+          <name>Charlie Brooks</name>
+          <role>Content Strategist</role>
+        </employee>
+      </team>
+    </department>
+  </division>
+
+  <headquarters location="Chicago, USA">
+    <address>
+      <street>789 Lake Shore Drive</street>
+      <city>Chicago</city>
+      <zip>60601</zip>
+    </address>
+  </headquarters>
+</company>
@@ -59,7 +59,7 @@ async function main() {

  const anthropic = new Anthropic({
    apiKey: process.env.ANTHROPIC_API_KEY,
-    model: "claude-3-opus",
+    model: "claude-3.5-sonnet",
  });

  // Create an ReActAgent with the function tools
@@ -61,7 +61,7 @@ async function main() {
  // Create an OpenAIAgent with the function tools
  const agent = new ReActAgent({
    llm: new Anthropic({
-      model: "claude-3-opus",
+      model: "claude-3.5-sonnet",
    }),
    tools: [functionTool, functionTool2],
  });
@@ -1,5 +1,5 @@
 import { Anthropic } from "@llamaindex/anthropic";
-import { ChatMemoryBuffer, SimpleChatEngine } from "llamaindex";
+import { createMemory, SimpleChatEngine } from "llamaindex";
 import { stdin as input, stdout as output } from "node:process";
 import readline from "node:readline/promises";

@@ -9,14 +9,12 @@ import readline from "node:readline/promises";
    model: "claude-3-7-sonnet",
  });
  // chatHistory will store all the messages in the conversation
-  const chatHistory = new ChatMemoryBuffer({
-    chatHistory: [
-      {
-        content: "You want to talk in rhymes.",
-        role: "system",
-      },
-    ],
-  });
+  const chatHistory = createMemory([
+    {
+      content: "You want to talk in rhymes.",
+      role: "system",
+    },
+  ]);
  const chatEngine = new SimpleChatEngine({
    llm,
    memory: chatHistory,
@@ -1,14 +1,16 @@
-import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";
 import fs from "fs";
+import { tool } from "llamaindex";
+import { z } from "zod";

 (async () => {
  if (!process.env.GOOGLE_API_KEY) {
    throw new Error("Please set the GOOGLE_API_KEY environment variable.");
  }
-  const gemini = new Gemini({
-    model: GEMINI_MODEL.GEMINI_PRO_1_5,
-  });
-  const result = await gemini.chat({
+  const llm = gemini({ model: GEMINI_MODEL.GEMINI_2_0_FLASH });
+
+  // normal chat
+  const result = await llm.chat({
    messages: [
      { content: "You want to talk in rhymes.", role: "system" },
      {
@@ -18,10 +20,10 @@ import fs from "fs";
      },
    ],
  });
-  console.log(result);
+  console.log("\n normal chat: \n", result);

  // chat with file
-  const resultWithFile = await gemini.chat({
+  const resultWithFile = await llm.chat({
    messages: [
      {
        role: "user",
@@ -39,6 +41,52 @@ import fs from "fs";
      },
    ],
  });
+  console.log("\n chat with file: \n", resultWithFile);

-  console.log(resultWithFile);
+  // chat with image base64
+  const resultWithImageFile = await llm.chat({
+    messages: [
+      {
+        role: "user",
+        content: [
+          {
+            type: "text",
+            text: "What's in this image?",
+          },
+          {
+            type: "image",
+            data: fs
+              .readFileSync("./multimodal/data/60.jpg")
+              .toString("base64"),
+            mimeType: "image/png",
+          },
+        ],
+      },
+    ],
+  });
+  console.log("\n chat with image base64: \n", resultWithImageFile);
+
+  // chat with tool
+  const resultWithTool = await llm.chat({
+    messages: [
+      {
+        content: "What's the weather in Tokyo?",
+        role: "user",
+      },
+    ],
+    tools: [
+      tool({
+        name: "weather",
+        description: "Get the weather",
+        parameters: z.object({
+          location: z.string().describe("The location to get the weather for"),
+        }),
+        execute: ({ location }) => {
+          console.log("weather", location);
+          return `The weather in ${location} is sunny and hot`;
+        },
+      }),
+    ],
+  });
+  console.log("\n chat with tool: \n", resultWithTool.message.options); // should have toolCall
 })();
@@ -1,11 +1,14 @@
-import { Gemini, GEMINI_MODEL, GeminiVertexSession } from "@llamaindex/google";
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";

 (async () => {
-  const gemini = new Gemini({
-    model: GEMINI_MODEL.GEMINI_PRO,
-    session: new GeminiVertexSession(),
+  const llm = gemini({
+    model: GEMINI_MODEL.GEMINI_2_0_FLASH,
+    vertex: {
+      project: "your-cloud-project", // update to your cloud project
+      location: "us-central1",
+    },
  });
-  const result = await gemini.chat({
+  const result = await llm.chat({
    messages: [
      { content: "You want to talk in rhymes.", role: "system" },
      {
@@ -16,9 +16,19 @@ async function main() {

  console.log("🚀 Initializing Gemini Live API example...");

+  // Server-side (token creation):
+  const serverllm = gemini({
+    model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
+    httpOptions: { apiVersion: "v1alpha" }, // must use v1alpha to generate ephemeral key
+  });
+  const ephemeralKey = await serverllm.live.getEphemeralKey();
+
+  // Client-side (Live API connection):
  const llm = gemini({
+    apiKey: ephemeralKey, // use ephemeral key for client-side
    model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
    voiceName: "Zephyr",
+    httpOptions: { apiVersion: "v1alpha" }, // must use v1alpha to init client with ephemeral key
  });

  console.log("📡 Connecting to Gemini Live session...");
@@ -3,8 +3,18 @@ import { liveEvents } from "llamaindex";
 import { saveWavFile } from "./util";

 async function main() {
-  const llm = gemini({
+  // Server-side (token creation):
+  const serverllm = gemini({
    model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
+    httpOptions: { apiVersion: "v1alpha" }, // must use v1alpha to generate ephemeral key
+  });
+  const ephemeralKey = await serverllm.live.getEphemeralKey();
+
+  // Client-side (Live API connection):
+  const llm = gemini({
+    apiKey: ephemeralKey, // use ephemeral key for client-side
+    model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
+    httpOptions: { apiVersion: "v1alpha" }, // must use v1alpha to init client with ephemeral key
  });

  const session = await llm.live.connect();
@@ -23,10 +33,7 @@ async function main() {
        content: "Say something about you for 10 seconds",
        role: "user",
      });
-    } else if (
-      liveEvents.audio.include(event) &&
-      typeof event.data === "string"
-    ) {
+    } else if (liveEvents.audio.include(event)) {
      const chunk = Buffer.from(event.data, "base64");
      audioChunks.push(chunk);
      console.log(`Received audio chunk: ${chunk.length} bytes`);
@@ -1,6 +1,5 @@
-import { ModalityType } from "@llamaindex/core/schema";
-import { tool } from "@llamaindex/core/tools";
 import { gemini, GEMINI_MODEL } from "@llamaindex/google";
+import { ModalityType, tool } from "llamaindex";

 import { liveEvents } from "llamaindex";
 import { z } from "zod";
@@ -0,0 +1,24 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
@@ -0,0 +1,54 @@
+# OpenAI Realtime Chat with LlamaIndex
+
+This is a demo application showcasing real-time audio and text chat capabilities using OpenAI's GPT-4 with voice through LlamaIndex. The application demonstrates bidirectional audio communication and text chat with an AI assistant.
+
+## Features
+
+- Real-time voice communication with GPT-4
+- Text-based chat interface
+- WebRTC-based audio streaming
+- Bidirectional communication (both text and voice)
+- React + TypeScript implementation
+
+## Prerequisites
+
+- Node.js (v18 or higher)
+- OpenAI API key with access to GPT-4 voice models
+- Modern browser with WebRTC support
+
+## Getting Started
+
+1. Install dependencies:
+
+```bash
+pnpm install
+```
+
+2. Start the development server:
+
+```bash
+pnpm run dev
+```
+
+## Usage
+
+The application provides a simple interface where you can:
+
+- Start/Stop a chat session
+- Speak to the AI assistant through your microphone
+- Receive audio responses from the assistant
+- See text transcripts of the conversation
+
+## Technical Details
+
+This project uses:
+
+- LlamaIndex for AI interaction management
+- WebRTC for real-time audio streaming
+- React for the UI
+- Vite for development and building
+- TypeScript for type safety
+
+```
+
+```
@@ -0,0 +1,28 @@
+import js from "@eslint/js";
+import reactHooks from "eslint-plugin-react-hooks";
+import reactRefresh from "eslint-plugin-react-refresh";
+import globals from "globals";
+import tseslint from "typescript-eslint";
+
+export default tseslint.config(
+  { ignores: ["dist"] },
+  {
+    extends: [js.configs.recommended, ...tseslint.configs.recommended],
+    files: ["**/*.{ts,tsx}"],
+    languageOptions: {
+      ecmaVersion: 2020,
+      globals: globals.browser,
+    },
+    plugins: {
+      "react-hooks": reactHooks,
+      "react-refresh": reactRefresh,
+    },
+    rules: {
+      ...reactHooks.configs.recommended.rules,
+      "react-refresh/only-export-components": [
+        "warn",
+        { allowConstantExport: true },
+      ],
+    },
+  },
+);
@@ -0,0 +1,13 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <link rel="icon" type="image/svg+xml" href="/vite.svg" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Vite + React + TS</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.tsx"></script>
+  </body>
+</html>
@@ -0,0 +1,29 @@
+{
+  "name": "open-ai-realtime",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "tsc -b && vite build",
+    "lint": "eslint .",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "react": "^19.1.0",
+    "react-dom": "^19.1.0"
+  },
+  "devDependencies": {
+    "@eslint/js": "^9.25.0",
+    "@types/react": "^19.1.2",
+    "@types/react-dom": "^19.1.2",
+    "@vitejs/plugin-react": "^4.5.2",
+    "eslint": "^9.25.0",
+    "eslint-plugin-react-hooks": "^5.2.0",
+    "eslint-plugin-react-refresh": "^0.4.19",
+    "globals": "^16.0.0",
+    "typescript": "~5.8.3",
+    "typescript-eslint": "^8.30.1",
+    "vite": "^6.3.5"
+  }
+}
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>
@@ -0,0 +1,183 @@
+import { openai } from "@llamaindex/openai";
+import { liveEvents, LiveLLMSession, ModalityType } from "llamaindex";
+import { useEffect, useRef, useState } from "react";
+
+const MicIcon = ({ isConnected }: { isConnected: boolean }) => (
+  <svg
+    xmlns="http://www.w3.org/2000/svg"
+    viewBox="0 0 24 24"
+    fill="currentColor"
+    strokeWidth="2"
+    strokeLinecap="round"
+    strokeLinejoin="round"
+  >
+    {isConnected ? (
+      <>
+        <path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z" />
+        <path d="M19 10v2a7 7 0 0 1-14 0v-2" />
+        <line x1="12" y1="19" x2="12" y2="23" />
+        <line x1="8" y1="23" x2="16" y2="23" />
+      </>
+    ) : (
+      <>
+        <path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z" />
+        <path d="M19 10v2a7 7 0 0 1-14 0v-2" />
+      </>
+    )}
+  </svg>
+);
+
+const WaveAnimation = () => (
+  <div className="wave-animation">
+    {[...Array(3)].map((_, i) => (
+      <div key={i} className="wave" style={{ animationDelay: `${i * 0.2}s` }} />
+    ))}
+  </div>
+);
+
+export const AudioChat = () => {
+  const [isConnected, setIsConnected] = useState(false);
+  const [messages, setMessages] = useState<
+    Array<{ role: string; content: string }>
+  >([]);
+  const [status, setStatus] = useState<string>("");
+  const audioRef = useRef<HTMLAudioElement>(null);
+  const sessionRef = useRef<LiveLLMSession | null>(null);
+  const [stream, setStream] = useState<MediaStream | null>(null);
+  const messagesEndRef = useRef<HTMLDivElement>(null);
+
+  const scrollToBottom = () => {
+    messagesEndRef.current?.scrollIntoView({ behavior: "smooth" });
+  };
+
+  useEffect(() => {
+    scrollToBottom();
+  }, [messages]);
+
+  useEffect(() => {
+    return () => {
+      if (stream) {
+        stream.getTracks().forEach((track) => track.stop());
+      }
+    };
+  }, [stream]);
+
+  const startChat = async () => {
+    try {
+      setStatus("Initializing microphone...");
+      const userStream = await navigator.mediaDevices.getUserMedia({
+        audio: true,
+      });
+      setStream(userStream);
+
+      setStatus("Connecting to AI...");
+      const apiKey = prompt("Please enter your OpenAI API key:");
+      if (!apiKey) {
+        throw new Error("API key is required");
+      }
+
+      // move this call to the server side for security reasons
+      // Do not store the API key in the frontend!
+      const serverllm = openai({
+        apiKey: apiKey,
+        model: "gpt-4o-realtime-preview-2025-06-03",
+      });
+
+      const tempKey = await serverllm.live.getEphemeralKey();
+
+      const llm = openai({
+        apiKey: tempKey,
+        model: "gpt-4o-realtime-preview-2025-06-03",
+      });
+      const session = await llm.live.connect({
+        systemInstruction: "You are a helpful assistant who speaks naturally.",
+        responseModality: [ModalityType.TEXT, ModalityType.AUDIO],
+        audioConfig: {
+          stream: userStream,
+          onTrack: (remoteStream) => {
+            if (audioRef.current && remoteStream) {
+              audioRef.current.srcObject = remoteStream;
+              audioRef.current.play().catch(console.error);
+            }
+          },
+        },
+      });
+
+      sessionRef.current = session;
+      setIsConnected(true);
+      setStatus("Connected! Listening...");
+
+      for await (const event of session.streamEvents()) {
+        if (liveEvents.open.include(event)) {
+          setMessages((prev) => [
+            ...prev,
+            {
+              role: "user",
+              content: "Hello, I'm ready to chat!",
+            },
+          ]);
+          session.sendMessage({
+            content: "Hello, I'm ready to chat!",
+            role: "user",
+          });
+        } else if (liveEvents.text.include(event)) {
+          setMessages((prev) => [
+            ...prev,
+            {
+              role: "assistant",
+              content: event.text,
+            },
+          ]);
+        }
+      }
+    } catch (error) {
+      console.error("Error starting chat:", error);
+      setStatus("Error connecting. Please try again.");
+      setIsConnected(false);
+    }
+  };
+
+  const stopChat = async () => {
+    setStatus("Disconnecting...");
+    if (sessionRef.current) {
+      await sessionRef.current.disconnect();
+      sessionRef.current = null;
+    }
+    if (stream) {
+      stream.getTracks().forEach((track) => track.stop());
+      setStream(null);
+    }
+    if (audioRef.current) {
+      audioRef.current.srcObject = null;
+    }
+    setIsConnected(false);
+    setStatus("");
+  };
+
+  return (
+    <div className="audio-chat-container">
+      <h1>AI Voice Chat</h1>
+      <div className="messages-container">
+        {messages.map((msg, idx) => (
+          <div key={idx} className={`message ${msg.role}`}>
+            {msg.content}
+          </div>
+        ))}
+        <div ref={messagesEndRef} />
+      </div>
+
+      <div className="controls">
+        {status && <div className="status-indicator">{status}</div>}
+        <button
+          className={`mic-button ${isConnected ? "connected" : ""}`}
+          onClick={isConnected ? stopChat : startChat}
+          title={isConnected ? "Stop Chat" : "Start Chat"}
+        >
+          <MicIcon isConnected={isConnected} />
+          {isConnected && <WaveAnimation />}
+        </button>
+        <audio ref={audioRef} style={{ display: "none" }} />
+      </div>
+    </div>
+  );
+};
@@ -0,0 +1,322 @@
+:root {
+  --primary-color: #646cff;
+  --secondary-color: #535bf2;
+  --background-dark: #1a1a1a;
+  --chat-bg: #242424;
+  --text-primary: #ffffff;
+  --text-secondary: #888888;
+  --success-color: #4caf50;
+  --error-color: #f44336;
+  --gradient-start: #4776e6;
+  --gradient-end: #8e54e9;
+}
+
+body {
+  background-color: var(--background-dark);
+  color: var(--text-primary);
+  margin: 0;
+  min-height: 100vh;
+  font-family:
+    -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, Ubuntu,
+    Cantarell, "Open Sans", "Helvetica Neue", sans-serif;
+}
+
+#root {
+  max-width: 1280px;
+  height: 100vh;
+  margin: 0 auto;
+  padding: 2rem;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+}
+
+.logo {
+  height: 6em;
+  padding: 1.5em;
+  will-change: filter;
+  transition: filter 300ms;
+}
+.logo:hover {
+  filter: drop-shadow(0 0 2em #646cffaa);
+}
+.logo.react:hover {
+  filter: drop-shadow(0 0 2em #61dafbaa);
+}
+
+@keyframes logo-spin {
+  from {
+    transform: rotate(0deg);
+  }
+  to {
+    transform: rotate(360deg);
+  }
+}
+
+@media (prefers-reduced-motion: no-preference) {
+  a:nth-of-type(2) .logo {
+    animation: logo-spin infinite 20s linear;
+  }
+}
+
+.card {
+  padding: 2em;
+}
+
+.read-the-docs {
+  color: #888;
+}
+
+.audio-chat-container {
+  display: flex;
+  flex-direction: column;
+  gap: 2rem;
+  width: 100%;
+  max-width: 800px;
+  height: 80vh;
+  margin: 0 auto;
+  padding: 2rem;
+  background: var(--chat-bg);
+  border-radius: 24px;
+  box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2);
+  position: relative;
+  overflow: hidden;
+}
+
+.audio-chat-container::before {
+  content: "";
+  position: absolute;
+  top: 0;
+  left: 0;
+  right: 0;
+  height: 4px;
+  background: linear-gradient(
+    to right,
+    var(--gradient-start),
+    var(--gradient-end)
+  );
+}
+
+.audio-chat-container h1 {
+  font-size: 2.5rem;
+  margin: 0;
+  background: linear-gradient(
+    to right,
+    var(--gradient-start),
+    var(--gradient-end)
+  );
+  -webkit-background-clip: text;
+  background-clip: text;
+  color: transparent;
+  text-align: center;
+}
+
+.messages-container {
+  display: flex;
+  flex-direction: column;
+  gap: 1rem;
+  flex: 1;
+  overflow-y: auto;
+  padding: 1rem;
+  border-radius: 16px;
+  background: rgba(255, 255, 255, 0.05);
+  backdrop-filter: blur(10px);
+  margin: 1rem 0;
+}
+
+.message {
+  padding: 1rem 1.5rem;
+  border-radius: 16px;
+  max-width: 80%;
+  text-align: left;
+  animation: messageSlide 0.3s ease-out;
+  box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+}
+
+@keyframes messageSlide {
+  from {
+    opacity: 0;
+    transform: translateY(20px);
+  }
+  to {
+    opacity: 1;
+    transform: translateY(0);
+  }
+}
+
+.message.user {
+  background: linear-gradient(
+    135deg,
+    var(--gradient-start),
+    var(--gradient-end)
+  );
+  align-self: flex-end;
+  margin-left: 20%;
+  color: white;
+}
+
+.message.assistant {
+  background: rgba(255, 255, 255, 0.1);
+  align-self: flex-start;
+  margin-right: 20%;
+  border: 1px solid rgba(255, 255, 255, 0.1);
+}
+
+.controls {
+  display: flex;
+  justify-content: center;
+  align-items: center;
+  padding: 2rem;
+  position: relative;
+}
+
+.mic-button {
+  width: 80px;
+  height: 80px;
+  border-radius: 50%;
+  border: none;
+  background: linear-gradient(
+    135deg,
+    var(--gradient-start),
+    var(--gradient-end)
+  );
+  color: white;
+  cursor: pointer;
+  transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
+  position: relative;
+  overflow: hidden;
+}
+
+.mic-button::before {
+  content: "";
+  position: absolute;
+  top: 0;
+  left: 0;
+  right: 0;
+  bottom: 0;
+  background: linear-gradient(
+    135deg,
+    rgba(255, 255, 255, 0.1),
+    rgba(255, 255, 255, 0)
+  );
+  border-radius: 50%;
+  transition: transform 0.3s ease;
+}
+
+.mic-button:hover {
+  transform: scale(1.05);
+  box-shadow: 0 8px 25px rgba(0, 0, 0, 0.3);
+}
+
+.mic-button:hover::before {
+  transform: translateY(-100%);
+}
+
+.mic-button.connected {
+  background: var(--error-color);
+  animation: pulseError 2s infinite;
+}
+
+.mic-button svg {
+  width: 32px;
+  height: 32px;
+  filter: drop-shadow(0 2px 4px rgba(0, 0, 0, 0.2));
+  transition: transform 0.3s ease;
+}
+
+.mic-button:hover svg {
+  transform: scale(1.1);
+}
+
+@keyframes pulseError {
+  0% {
+    box-shadow: 0 0 0 0 rgba(244, 67, 54, 0.4);
+  }
+  70% {
+    box-shadow: 0 0 0 20px rgba(244, 67, 54, 0);
+  }
+  100% {
+    box-shadow: 0 0 0 0 rgba(244, 67, 54, 0);
+  }
+}
+
+/* Status indicator */
+.status-indicator {
+  position: absolute;
+  top: -30px;
+  left: 50%;
+  transform: translateX(-50%);
+  font-size: 0.9rem;
+  color: var(--text-secondary);
+  opacity: 0;
+  transition: opacity 0.3s ease;
+}
+
+.controls:hover .status-indicator {
+  opacity: 1;
+}
+
+/* Scrollbar styling */
+.messages-container::-webkit-scrollbar {
+  width: 8px;
+}
+
+.messages-container::-webkit-scrollbar-track {
+  background: rgba(255, 255, 255, 0.05);
+  border-radius: 4px;
+}
+
+.messages-container::-webkit-scrollbar-thumb {
+  background: linear-gradient(var(--gradient-start), var(--gradient-end));
+  border-radius: 4px;
+}
+
+.messages-container::-webkit-scrollbar-thumb:hover {
+  background: linear-gradient(var(--gradient-end), var(--gradient-start));
+}
+
+/* Wave Animation */
+.wave-animation {
+  position: absolute;
+  bottom: -15px;
+  left: 50%;
+  transform: translateX(-50%);
+  display: flex;
+  gap: 4px;
+}
+
+.wave {
+  width: 4px;
+  height: 15px;
+  background: currentColor;
+  border-radius: 2px;
+  animation: wave 0.5s ease-in-out infinite;
+}
+
+@keyframes wave {
+  0%,
+  100% {
+    transform: scaleY(0.5);
+  }
+  50% {
+    transform: scaleY(1.5);
+  }
+}
+
+/* Loading state */
+.mic-button.loading {
+  animation: rotate 1s linear infinite;
+}
+
+@keyframes rotate {
+  from {
+    transform: rotate(0deg);
+  }
+  to {
+    transform: rotate(360deg);
+  }
+}
@@ -0,0 +1,10 @@
+import { StrictMode } from "react";
+import { createRoot } from "react-dom/client";
+import { AudioChat } from "./audio-chat.tsx";
+import "./index.css";
+
+createRoot(document.getElementById("root")!).render(
+  <StrictMode>
+    <AudioChat />
+  </StrictMode>,
+);
@@ -0,0 +1 @@
+/// <reference types="vite/client" />
@@ -0,0 +1,26 @@
+{
+  "compilerOptions": {
+    "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
+    "target": "ES2020",
+    "useDefineForClassFields": true,
+    "lib": ["ES2020", "DOM", "DOM.Iterable"],
+    "module": "ESNext",
+    "skipLibCheck": true,
+
+    /* Bundler mode */
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "verbatimModuleSyntax": true,
+    "moduleDetection": "force",
+    "noEmit": true,
+    "jsx": "react-jsx",
+
+    /* Linting */
+    "strict": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "noFallthroughCasesInSwitch": true,
+    "noUncheckedSideEffectImports": true
+  },
+  "include": ["src"]
+}
@@ -0,0 +1,4 @@
+{
+  "files": [],
+  "references": [{ "path": "./tsconfig.app.json" }]
+}
@@ -0,0 +1,7 @@
+import react from "@vitejs/plugin-react";
+import { defineConfig } from "vite";
+
+// https://vite.dev/config/
+export default defineConfig({
+  plugins: [react()],
+});
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/examples",
-  "version": "0.3.19",
+  "version": "0.3.26",
  "private": true,
  "scripts": {
    "lint": "eslint .",
@@ -11,51 +11,52 @@
    "@azure/cosmos": "^4.1.1",
    "@azure/identity": "^4.4.1",
    "@azure/search-documents": "^12.1.0",
-    "@llamaindex/anthropic": "^0.3.11",
-    "@llamaindex/assemblyai": "^0.1.8",
-    "@llamaindex/astra": "^0.0.23",
-    "@llamaindex/azure": "^0.1.20",
-    "@llamaindex/chroma": "^0.0.23",
-    "@llamaindex/clip": "^0.0.59",
-    "@llamaindex/cloud": "^4.0.13",
-    "@llamaindex/cohere": "^0.0.23",
-    "@llamaindex/core": "^0.6.9",
-    "@llamaindex/deepinfra": "^0.0.59",
-    "@llamaindex/deepseek": "^0.0.19",
-    "@llamaindex/discord": "^0.1.8",
-    "@llamaindex/elastic-search": "^0.1.9",
+    "@llamaindex/anthropic": "^0.3.15",
+    "@llamaindex/assemblyai": "^0.1.12",
+    "@llamaindex/astra": "^0.0.27",
+    "@llamaindex/azure": "^0.1.24",
+    "@llamaindex/bm25-retriever": "^0.0.2",
+    "@llamaindex/chroma": "^0.0.27",
+    "@llamaindex/clip": "^0.0.63",
+    "@llamaindex/cloud": "^4.0.17",
+    "@llamaindex/cohere": "^0.0.27",
+    "@llamaindex/core": "^0.6.13",
+    "@llamaindex/deepinfra": "^0.0.63",
+    "@llamaindex/deepseek": "^0.0.23",
+    "@llamaindex/discord": "^0.1.12",
+    "@llamaindex/elastic-search": "^0.1.13",
    "@llamaindex/env": "^0.1.30",
-    "@llamaindex/firestore": "^1.0.16",
-    "@llamaindex/fireworks": "^0.0.19",
-    "@llamaindex/google": "^0.3.6",
-    "@llamaindex/groq": "^0.0.74",
-    "@llamaindex/huggingface": "^0.1.13",
-    "@llamaindex/jinaai": "^0.0.19",
-    "@llamaindex/milvus": "^0.1.18",
-    "@llamaindex/mistral": "^0.1.9",
-    "@llamaindex/mixedbread": "^0.0.23",
-    "@llamaindex/mongodb": "^0.0.24",
-    "@llamaindex/node-parser": "^2.0.9",
-    "@llamaindex/notion": "^0.1.8",
-    "@llamaindex/ollama": "^0.1.9",
-    "@llamaindex/openai": "^0.4.3",
-    "@llamaindex/perplexity": "^0.0.16",
-    "@llamaindex/pinecone": "^0.1.9",
-    "@llamaindex/portkey-ai": "^0.0.51",
-    "@llamaindex/postgres": "^0.0.52",
-    "@llamaindex/qdrant": "^0.1.19",
-    "@llamaindex/readers": "^3.1.7",
-    "@llamaindex/replicate": "^0.0.51",
-    "@llamaindex/supabase": "^0.1.8",
-    "@llamaindex/together": "^0.0.19",
-    "@llamaindex/tools": "^0.0.14",
-    "@llamaindex/upstash": "^0.0.23",
-    "@llamaindex/vercel": "^0.1.9",
-    "@llamaindex/vllm": "^0.0.45",
-    "@llamaindex/voyage-ai": "^1.0.15",
-    "@llamaindex/weaviate": "^0.0.23",
-    "@llamaindex/workflow": "^1.1.6",
-    "@llamaindex/xai": "workspace:^0.0.6",
+    "@llamaindex/firestore": "^1.0.20",
+    "@llamaindex/fireworks": "^0.0.23",
+    "@llamaindex/google": "^0.3.12",
+    "@llamaindex/groq": "^0.0.78",
+    "@llamaindex/huggingface": "^0.1.17",
+    "@llamaindex/jinaai": "^0.0.23",
+    "@llamaindex/milvus": "^0.1.22",
+    "@llamaindex/mistral": "^0.1.13",
+    "@llamaindex/mixedbread": "^0.0.27",
+    "@llamaindex/mongodb": "^0.0.28",
+    "@llamaindex/node-parser": "^2.0.13",
+    "@llamaindex/notion": "^0.1.12",
+    "@llamaindex/ollama": "^0.1.13",
+    "@llamaindex/openai": "^0.4.7",
+    "@llamaindex/perplexity": "^0.0.20",
+    "@llamaindex/pinecone": "^0.1.13",
+    "@llamaindex/portkey-ai": "^0.0.55",
+    "@llamaindex/postgres": "^0.0.56",
+    "@llamaindex/qdrant": "^0.1.23",
+    "@llamaindex/readers": "^3.1.12",
+    "@llamaindex/replicate": "^0.0.55",
+    "@llamaindex/supabase": "^0.1.13",
+    "@llamaindex/together": "^0.0.23",
+    "@llamaindex/tools": "^0.1.3",
+    "@llamaindex/upstash": "^0.0.27",
+    "@llamaindex/vercel": "^0.1.13",
+    "@llamaindex/vllm": "^0.0.49",
+    "@llamaindex/voyage-ai": "^1.0.19",
+    "@llamaindex/weaviate": "^0.0.28",
+    "@llamaindex/workflow": "^1.1.13",
+    "@llamaindex/xai": "workspace:^0.0.10",
    "@notionhq/client": "^2.2.15",
    "@pinecone-database/pinecone": "^4.0.0",
    "@vercel/postgres": "^0.10.0",
@@ -64,11 +65,11 @@
    "commander": "^12.1.0",
    "dotenv": "^16.4.5",
    "js-tiktoken": "^1.0.14",
-    "llamaindex": "^0.11.5",
+    "llamaindex": "^0.11.12",
    "mongodb": "6.7.0",
    "postgres": "^3.4.4",
    "wikipedia": "^2.1.2",
-    "zod": "^3.23.8"
+    "zod": "^3.25.67"
  },
  "devDependencies": {
    "@types/node": "^22.9.0",
@@ -2,11 +2,7 @@ import { stdin as input, stdout as output } from "node:process";
 import readline from "node:readline/promises";

 import { OpenAI } from "@llamaindex/openai";
-import {
-  ChatSummaryMemoryBuffer,
-  Settings,
-  SimpleChatEngine,
-} from "llamaindex";
+import { createMemory, Settings, SimpleChatEngine } from "llamaindex";

 if (process.env.NODE_ENV === "development") {
  Settings.callbackManager.on("llm-end", (event) => {
@@ -15,10 +11,13 @@ if (process.env.NODE_ENV === "development") {
 }

 async function main() {
-  // Set maxTokens to 75% of the context window size of 4096
-  // This will trigger the summarizer once the chat history reaches 25% of the context window size (1024 tokens)
-  const llm = new OpenAI({ model: "gpt-3.5-turbo", maxTokens: 4096 * 0.75 });
-  const chatHistory = new ChatSummaryMemoryBuffer({ llm });
+  const llm = new OpenAI({ model: "gpt-3.5-turbo" });
+  const chatHistory = createMemory([
+    {
+      content: "You are a helpful assistant.",
+      role: "system",
+    },
+  ]);
  const chatEngine = new SimpleChatEngine({ llm });
  const rl = readline.createInterface({ input, output });

@@ -29,10 +28,6 @@ async function main() {
      chatHistory,
      stream: true,
    });
-    if (chatHistory.getLastSummary()) {
-      // Print the summary of the conversation so far that is produced by the SummaryChatHistory
-      console.log(`Summary: ${chatHistory.getLastSummary()?.content}`);
-    }
    for await (const chunk of stream) {
      process.stdout.write(chunk.response);
    }
@@ -1,3 +1,4 @@
+import { OpenAIEmbedding } from "@llamaindex/openai";
 import {
  Document,
  SentenceSplitter,
@@ -7,6 +8,10 @@ import {
 import { OldSentenceSplitter } from "./old-sentence-splitter";
 export const STORAGE_DIR = "./data";

+Settings.embedModel = new OpenAIEmbedding({
+  model: "text-embedding-3-small",
+});
+
 // Update node parser
 (async () => {
  // generate a document with a very long sentence (9000 words long)
@@ -15,11 +15,14 @@
    "start:llamaparse-json": "node --import tsx ./src/llamaparse-json.ts",
    "start:discord": "node --import tsx ./src/discord.ts",
    "start:json": "node --import tsx ./src/json.ts",
-    "start:obsidian": "node --import tsx ./src/obsidian.ts"
+    "start:obsidian": "node --import tsx ./src/obsidian.ts",
+    "start:xml": "node --import tsx ./src/xml.ts",
+    "start:excel": "node --import tsx ./src/excel.ts"
  },
  "dependencies": {
    "@llamaindex/cloud": "workspace:* || ^2.0.24",
    "@llamaindex/readers": "workspace:* || ^1.0.25",
+    "@llamaindex/excel": "workspace:*",
    "llamaindex": "workspace:* || ^0.8.37"
  },
  "devDependencies": {
@@ -0,0 +1,20 @@
+import { ExcelReader } from "@llamaindex/excel";
+
+async function main() {
+  // Load PDF
+  const reader = new ExcelReader({
+    sheetSpecifier: 0,
+    concatRows: true,
+    fieldSeparator: ",",
+    keyValueSeparator: ":",
+  });
+
+  const documents = await reader.loadData("../data/sample_excel_sheet.xls");
+
+  for (const doc of documents) {
+    console.log(doc.text);
+    console.log("----");
+  }
+}
+
+main().catch(console.error);
@@ -1,4 +1,4 @@
-import { LlamaParseReader } from "@llamaindex/cloud";
+import { LlamaParseReader } from "@llamaindex/cloud/reader";
 import { openai, OpenAIEmbedding } from "@llamaindex/openai";
 import { Settings, VectorStoreIndex } from "llamaindex";

@@ -1,4 +1,4 @@
-import { LlamaParseReader } from "@llamaindex/cloud";
+import { LlamaParseReader } from "@llamaindex/cloud/reader";
 import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
 import { VectorStoreIndex } from "llamaindex";

@@ -0,0 +1,16 @@
+import { XMLReader } from "@llamaindex/readers/xml";
+
+async function main() {
+  // Load PDF
+  const reader = new XMLReader({
+    splitLevel: 2,
+  });
+  const documents = await reader.loadData("../data/company.xml");
+
+  for (const doc of documents) {
+    console.log(doc.text);
+    console.log("----");
+  }
+}
+
+main().catch(console.error);
@@ -0,0 +1,14 @@
+# BM25 Retriever
+
+In this guide, we introduce a bm25 retriever that search documents using the bm25 method. BM25 (Best Matching 25) is a ranking function that extends TF-IDF by considering term frequency saturation and document length. BM25 effectively ranks documents based on query term occurrence and rarity across the corpus.
+
+## Setup
+
+1. `cd` Into the `examples` directory
+2. run `npm i`
+
+## Example
+
+```bash
+`npx tsx ./retrievers/bm25/example.ts`
+```
@@ -0,0 +1,33 @@
+import { Bm25Retriever } from "@llamaindex/bm25-retriever";
+import { OpenAIEmbedding } from "@llamaindex/openai";
+import { PDFReader } from "@llamaindex/readers/pdf";
+import { MetadataMode, Settings, VectorStoreIndex } from "llamaindex";
+
+Settings.embedModel = new OpenAIEmbedding();
+
+async function main() {
+  // Load PDF
+  const reader = new PDFReader();
+  const documents = await reader.loadData("./data/brk-2022.pdf");
+
+  // Split text and create embeddings. Store them in a VectorStoreIndex
+  const index = await VectorStoreIndex.fromDocuments(documents);
+
+  const retriever = new Bm25Retriever({
+    docStore: index.docStore,
+    topK: 3,
+  });
+
+  // Query the data
+  const response = await retriever.retrieve({
+    query: "What mistakes did Warren E. Buffett make?",
+  });
+
+  // Output response
+  response.forEach((r) => {
+    console.log(`Score: ${r.score}`);
+    console.log(`Text: ${r.node.getContent(MetadataMode.NONE)}`);
+  });
+}
+
+main().catch(console.error);
@@ -1,8 +1,4 @@
-import {
-  GEMINI_EMBEDDING_MODEL,
-  GeminiEmbedding,
-  GeminiSession,
-} from "@llamaindex/google";
+import { GEMINI_EMBEDDING_MODEL, GeminiEmbedding } from "@llamaindex/google";
 import { QdrantVectorStore } from "@llamaindex/qdrant";
 import {
  Document,
@@ -12,9 +8,6 @@ import {

 const embedding = new GeminiEmbedding({
  model: GEMINI_EMBEDDING_MODEL.EMBEDDING_001,
-  session: new GeminiSession({
-    apiKey: process.env.GEMINI_API_KEY,
-  }),
 });

 async function main() {
@@ -18,5 +18,11 @@
      "module": "commonjs"
    }
  },
-  "include": ["./**/*.ts"]
+  "include": ["./**/*.ts"],
+  "exclude": [
+    "node_modules",
+    "dist",
+    "models/openai/live/browser/open-ai-realtime",
+    "**/browser/**"
+  ]
 }
@@ -1,5 +1,50 @@
 # @llamaindex/autotool

+## 8.0.12
+
+### Patch Changes
+
+- Updated dependencies [515a8b9]
+  - llamaindex@0.11.12
+
+## 8.0.11
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+
+## 8.0.10
+
+### Patch Changes
+
+- llamaindex@0.11.10
+
+## 8.0.9
+
+### Patch Changes
+
+- llamaindex@0.11.9
+
+## 8.0.8
+
+### Patch Changes
+
+- llamaindex@0.11.8
+
+## 8.0.7
+
+### Patch Changes
+
+- Updated dependencies [3c857f4]
+  - llamaindex@0.11.7
+
+## 8.0.6
+
+### Patch Changes
+
+- llamaindex@0.11.6
+
 ## 8.0.5

 ### Patch Changes
@@ -1,5 +1,57 @@
 # @llamaindex/autotool-01-node-example

+## 0.0.120
+
+### Patch Changes
+
+- Updated dependencies [515a8b9]
+  - llamaindex@0.11.12
+  - @llamaindex/autotool@8.0.12
+
+## 0.0.119
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+  - @llamaindex/autotool@8.0.11
+
+## 0.0.118
+
+### Patch Changes
+
+- llamaindex@0.11.10
+- @llamaindex/autotool@8.0.10
+
+## 0.0.117
+
+### Patch Changes
+
+- llamaindex@0.11.9
+- @llamaindex/autotool@8.0.9
+
+## 0.0.116
+
+### Patch Changes
+
+- llamaindex@0.11.8
+- @llamaindex/autotool@8.0.8
+
+## 0.0.115
+
+### Patch Changes
+
+- Updated dependencies [3c857f4]
+  - llamaindex@0.11.7
+  - @llamaindex/autotool@8.0.7
+
+## 0.0.114
+
+### Patch Changes
+
+- llamaindex@0.11.6
+- @llamaindex/autotool@8.0.6
+
 ## 0.0.113

 ### Patch Changes
@@ -13,5 +13,5 @@
  "scripts": {
    "start": "node --import tsx --import @llamaindex/autotool/node ./src/index.ts"
  },
-  "version": "0.0.113"
+  "version": "0.0.120"
 }
@@ -6,7 +6,7 @@
    "url": "git+https://github.com/run-llama/LlamaIndexTS.git",
    "directory": "packages/autotool"
  },
-  "version": "8.0.5",
+  "version": "8.0.12",
  "description": "auto transpile your JS function to LLM Agent compatible",
  "files": [
    "dist",
@@ -1,5 +1,39 @@
 # @llamaindex/cloud

+## 4.0.17
+
+### Patch Changes
+
+- Updated dependencies [d578889]
+- Updated dependencies [0fcc92f]
+- Updated dependencies [515a8b9]
+  - @llamaindex/core@0.6.13
+
+## 4.0.16
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+- Updated dependencies [7039e1a]
+  - @llamaindex/core@0.6.12
+
+## 4.0.15
+
+### Patch Changes
+
+- Updated dependencies [a89e187]
+- Updated dependencies [62699b7]
+- Updated dependencies [c5b2691]
+- Updated dependencies [d8ac8d3]
+  - @llamaindex/core@0.6.11
+
+## 4.0.14
+
+### Patch Changes
+
+- Updated dependencies [1b5af14]
+  - @llamaindex/core@0.6.10
+
 ## 4.0.13

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/cloud",
-  "version": "4.0.13",
+  "version": "4.0.17",
  "type": "module",
  "license": "MIT",
  "scripts": {
@@ -79,6 +79,6 @@
  },
  "dependencies": {
    "p-retry": "^6.2.1",
-    "zod": "^3.25.7"
+    "zod": "^3.25.67"
  }
 }
@@ -1,5 +1,35 @@
 # @llamaindex/core

+## 0.6.13
+
+### Patch Changes
+
+- d578889: Add new memory API
+- 0fcc92f: Fix: split sentences must not trim whitespaces
+- 515a8b9: Fix: logging for fromPersistPath
+
+## 0.6.12
+
+### Patch Changes
+
+- 7039e1a: Internal cleanup of base64 encoding
+- 7039e1a: chore: migrate to @google/genai SDK
+
+## 0.6.11
+
+### Patch Changes
+
+- a89e187: Feat: added custom abbreviations to sentence splitter
+- 62699b7: Improve performance of sentence splitter
+- c5b2691: Add more Acronyms on SentenceSplitter
+- d8ac8d3: Feat: add support for openai realtime API
+
+## 0.6.10
+
+### Patch Changes
+
+- 1b5af14: fix: jsonToNode for image nodes
+
 ## 0.6.9

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/core",
  "type": "module",
-  "version": "0.6.9",
+  "version": "0.6.13",
  "description": "LlamaIndex Core Module",
  "exports": {
    "./agent": {
@@ -312,7 +312,7 @@
    "@llamaindex/env": "workspace:*",
    "@types/node": "^22.9.0",
    "magic-bytes.js": "^1.10.0",
-    "zod": "^3.23.8",
-    "zod-to-json-schema": "^3.23.3"
+    "zod": "^3.25.67",
+    "zod-to-json-schema": "^3.24.6"
  }
 }
@@ -152,6 +152,7 @@ export type AgentParamsBase<

 /**
 * Worker will schedule tasks and handle the task execution
+ * @deprecated Use agent instead.
 */
 export abstract class AgentWorker<
  AI extends LLM,
@@ -250,6 +251,7 @@ export abstract class AgentWorker<

 /**
 * Runner will manage the task execution and provide a high-level API for the user
+ * @deprecated Use agent instead.
 */
 export abstract class AgentRunner<
  AI extends LLM,
@@ -62,6 +62,9 @@ export class LLMAgentWorker extends AgentWorker<LLM> {
  taskHandler = AgentRunner.defaultTaskHandler;
 }

+/**
+ * @deprecated Use agent instead.
+ */
 export class LLMAgent extends AgentRunner<LLM> {
  constructor(params: LLMAgentParams<LLM>) {
    validateAgentParams(params);
@@ -1,5 +1,5 @@
 import type { ChatMessage, MessageContent } from "../llms";
-import type { BaseMemory } from "../memory";
+import type { Memory } from "../memory";
 import { EngineResponse } from "../schema";

 export interface BaseChatEngineParams<
@@ -9,9 +9,7 @@ export interface BaseChatEngineParams<
  /**
   * Optional chat history if you want to customize the chat history.
   */
-  chatHistory?:
-    | ChatMessage<AdditionalMessageOptions>[]
-    | BaseMemory<AdditionalMessageOptions>;
+  chatHistory?: ChatMessage<AdditionalMessageOptions>[] | Memory;
 }

 export interface StreamingChatEngineParams<
@@ -1,7 +1,7 @@
 import { wrapEventCaller } from "../decorator";
 import { Settings } from "../global";
 import type { ChatMessage, LLM, MessageContent, MessageType } from "../llms";
-import { BaseMemory, ChatMemoryBuffer } from "../memory";
+import { Memory, createMemory } from "../memory";
 import type { BaseNodePostprocessor } from "../postprocessor";
 import {
  type ContextSystemPrompt,
@@ -23,7 +23,7 @@ import type { ContextGenerator } from "./type";
 export type ContextChatEngineOptions = {
  retriever: BaseRetriever;
  chatModel?: LLM | undefined;
-  chatHistory?: ChatMessage[] | undefined;
+  chatHistory?: ChatMessage[] | Memory | undefined;
  contextSystemPrompt?: ContextSystemPrompt | undefined;
  nodePostprocessors?: BaseNodePostprocessor[] | undefined;
  systemPrompt?: string | undefined;
@@ -37,18 +37,21 @@ export type ContextChatEngineOptions = {
 */
 export class ContextChatEngine extends PromptMixin implements BaseChatEngine {
  chatModel: LLM;
-  memory: BaseMemory;
+  memory: Memory;
  contextGenerator: ContextGenerator & PromptMixin;
  systemPrompt?: string | undefined;

  get chatHistory() {
-    return this.memory.getMessages();
+    return this.memory.getLLM();
  }

  constructor(init: ContextChatEngineOptions) {
    super();
    this.chatModel = init.chatModel ?? Settings.llm;
-    this.memory = new ChatMemoryBuffer({ chatHistory: init?.chatHistory });
+    this.memory =
+      init?.chatHistory instanceof Memory
+        ? init.chatHistory
+        : createMemory(init?.chatHistory ?? []);
    this.contextGenerator = new DefaultContextGenerator({
      retriever: init.retriever,
      contextSystemPrompt: init?.contextSystemPrompt,
@@ -87,12 +90,9 @@ export class ContextChatEngine extends PromptMixin implements BaseChatEngine {
  ): Promise<EngineResponse | AsyncIterable<EngineResponse>> {
    const { message, stream } = params;
    const chatHistory = params.chatHistory
-      ? new ChatMemoryBuffer({
-          chatHistory:
-            params.chatHistory instanceof BaseMemory
-              ? await params.chatHistory.getMessages()
-              : params.chatHistory,
-        })
+      ? params.chatHistory instanceof Memory
+        ? params.chatHistory
+        : createMemory(params.chatHistory)
      : this.memory;
    const requestMessages = await this.prepareRequestMessages(
      message,
@@ -110,7 +110,7 @@ export class ContextChatEngine extends PromptMixin implements BaseChatEngine {
          initialValue: "",
          reducer: (accumulator, part) => (accumulator += part.delta),
          finished: (accumulator) => {
-            chatHistory.put({ content: accumulator, role: "assistant" });
+            void chatHistory.add({ content: accumulator, role: "assistant" });
          },
        }),
        (r) => EngineResponse.fromChatResponseChunk(r, requestMessages.nodes),
@@ -120,26 +120,26 @@ export class ContextChatEngine extends PromptMixin implements BaseChatEngine {
      messages: requestMessages.messages,
      additionalChatOptions: params.chatOptions as object,
    });
-    chatHistory.put(response.message);
+    await chatHistory.add(response.message);
    return EngineResponse.fromChatResponse(response, requestMessages.nodes);
  }

-  reset() {
-    this.memory.reset();
+  async reset() {
+    await this.memory.clear();
  }

  private async prepareRequestMessages(
    message: MessageContent,
-    chatHistory: BaseMemory,
+    chatHistory: Memory,
  ) {
-    chatHistory.put({
+    await chatHistory.add({
      content: message,
      role: "user",
    });
    const textOnly = extractText(message);
    const context = await this.contextGenerator.generate(textOnly);
    const systemMessage = this.prependSystemPrompt(context.message);
-    const messages = await chatHistory.getMessages([systemMessage]);
+    const messages = await chatHistory.getLLM(this.chatModel, [systemMessage]);
    return { nodes: context.nodes, messages };
  }

@@ -1,5 +1,5 @@
 import type { LLM } from "../llms";
-import { BaseMemory, ChatMemoryBuffer } from "../memory";
+import { createMemory, Memory } from "../memory";
 import { EngineResponse } from "../schema";
 import { streamConverter, streamReducer } from "../utils";
 import type {
@@ -16,20 +16,16 @@ import { Settings } from "../global";
 */

 export class SimpleChatEngine implements BaseChatEngine {
-  memory: BaseMemory;
+  memory: Memory;
  llm: LLM;

  get chatHistory() {
-    return this.memory.getMessages();
+    return this.memory.getLLM();
  }

  constructor(init?: Partial<SimpleChatEngine>) {
    this.llm = init?.llm ?? Settings.llm;
-    this.memory =
-      init?.memory ??
-      new ChatMemoryBuffer({
-        llm: this.llm,
-      });
+    this.memory = init?.memory ?? createMemory();
  }

  chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>;
@@ -43,19 +39,15 @@ export class SimpleChatEngine implements BaseChatEngine {
    const { message, stream } = params;

    const chatHistory = params.chatHistory
-      ? new ChatMemoryBuffer({
-          llm: this.llm,
-          chatHistory:
-            params.chatHistory instanceof BaseMemory
-              ? await params.chatHistory.getMessages()
-              : params.chatHistory,
-        })
+      ? params.chatHistory instanceof Memory
+        ? params.chatHistory
+        : createMemory(params.chatHistory)
      : this.memory;
-    chatHistory.put({ content: message, role: "user" });
+    await chatHistory.add({ content: message, role: "user" });

    if (stream) {
      const stream = await this.llm.chat({
-        messages: await chatHistory.getMessages(),
+        messages: await chatHistory.getLLM(this.llm),
        stream: true,
      });
      return streamConverter(
@@ -64,7 +56,7 @@ export class SimpleChatEngine implements BaseChatEngine {
          initialValue: "",
          reducer: (accumulator, part) => accumulator + part.delta,
          finished: (accumulator) => {
-            chatHistory.put({ content: accumulator, role: "assistant" });
+            void chatHistory.add({ content: accumulator, role: "assistant" });
          },
        }),
        EngineResponse.fromChatResponseChunk,
@@ -73,13 +65,13 @@ export class SimpleChatEngine implements BaseChatEngine {

    const response = await this.llm.chat({
      stream: false,
-      messages: await chatHistory.getMessages(),
+      messages: await chatHistory.getLLM(this.llm),
    });
-    chatHistory.put(response.message);
+    await chatHistory.add(response.message);
    return EngineResponse.fromChatResponse(response);
  }

-  reset() {
-    this.memory.reset();
+  async reset() {
+    await this.memory.clear();
  }
 }
@@ -1,4 +1,5 @@
-import { extractText, streamConverter } from "../utils";
+import { extractText } from "../utils/llms";
+import { streamConverter } from "../utils/stream";
 import type {
  ChatResponse,
  ChatResponseChunk,
@@ -1,6 +1,9 @@
 export { BaseLLM, ToolCallLLM } from "./base";
-export { LiveLLM, LiveLLMSession, liveEvents, type LiveEvent } from "./live";
+export { LiveLLM, LiveLLMCapability, LiveLLMSession } from "./live/live";
+export { liveEvents, type LiveEvent } from "./live/live-types";
+export type { MessageSender } from "./live/sender";
 export type {
+  AudioConfig,
  BaseTool,
  BaseToolWithCall,
  ChatMessage,
@@ -1,9 +1,7 @@
 import type {
-  ChatMessage,
-  LiveConnectConfig,
  MessageContentAudioDetail,
  MessageContentTextDetail,
-} from "./type";
+} from "../type";

 export type OpenEvent = { type: "open" };

@@ -63,45 +61,3 @@ export const liveEvents = {
      e.type === "turnComplete",
  },
 };
-export abstract class LiveLLMSession {
-  protected eventQueue: LiveEvent[] = [];
-  protected eventResolvers: ((value: LiveEvent) => void)[] = [];
-  protected closed = false;
-  abstract sendMessage(message: ChatMessage): void;
-  async *streamEvents(): AsyncIterable<LiveEvent> {
-    while (true) {
-      const event = await this.nextEvent();
-      if (event === undefined) {
-        break;
-      }
-      yield event;
-    }
-  }
-  abstract disconnect(): Promise<void>;
-
-  protected async nextEvent(): Promise<LiveEvent | undefined> {
-    if (this.eventQueue.length) {
-      return Promise.resolve(this.eventQueue.shift());
-    }
-
-    return new Promise((resolve) => {
-      this.eventResolvers.push(resolve);
-    });
-  }
-
-  //Uses an async queue to send events to the client
-  // if the consumer is waiting for an event, it will be resolved immediately
-  // otherwise, the event will be queued up and sent when the consumer is ready
-  pushEventToQueue(event: LiveEvent) {
-    if (this.eventResolvers.length) {
-      //resolving the promise with the event
-      this.eventResolvers.shift()!(event);
-    } else {
-      this.eventQueue.push(event);
-    }
-  }
-}
-
-export abstract class LiveLLM {
-  abstract connect(config?: LiveConnectConfig): Promise<LiveLLMSession>;
-}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
github-actions[bot]	7a2485cca2	Release 0.11.12 (#2050 ) Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>	2025-07-02 11:41:55 +07:00
Marcus Schiesser	1329186a23	docs: clarify how to run docs	2025-07-02 11:33:48 +07:00
dependabot[bot]	5d6e7384f5	chore(deps-dev): bump @modelcontextprotocol/server-filesystem from 2025.3.28 to 2025.7.1 (#2055 )	2025-07-02 11:26:18 +07:00
allen	f2dfd305fb	implement bm25 retriever (#2045 ) Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>	2025-07-02 11:22:47 +07:00
Huu Le	3cd8a573df	feat: update interpreter to always upload all files in the configured directory (#2057 )	2025-07-02 10:57:04 +07:00
Laurie Voss	09c6077f6e	Import path for llamaparsereader (#2056 )	2025-07-01 16:51:25 -07:00
Logan	14cc65b4e3	add google analytics (#2053 ) Co-authored-by: Alex Yang <himself65@outlook.com>	2025-07-01 11:18:14 -07:00
Marcus Schiesser	c544d8f67c	docs: review and update memory doc	2025-07-01 15:10:43 +07:00
Huu Le	d578889e21	feat: new memory api (#2028 ) Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>	2025-07-01 09:30:49 +07:00
Marcus Schiesser	9f745d1941	chore: revert to wrong opus change	2025-07-01 09:07:46 +07:00
Alex Yang	f292e94dcd	fix: change default claude model (#2052 )	2025-06-30 15:19:40 -07:00
Marcus Schiesser	0fcc92f632	fix: sentence splitter must not trim whitespaces (#2046 )	2025-06-30 17:32:04 +07:00
Marcus Schiesser	515a8b9111	fix: error logging for fromPersistPath (#2049 )	2025-06-30 13:41:13 +07:00
github-actions[bot]	7e8efc6284	Release @llamaindex/tools@0.1.2 (#2048 )	2025-06-30 11:40:54 +07:00
Wassim Chegham	0fcf65126d	chore: export type MCPClientOptions (#2047 ) Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>	2025-06-28 10:55:07 +07:00
github-actions[bot]	a50acf634c	Release 0.11.11 (#2044 ) Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>	2025-06-27 14:51:09 +07:00
Thuc Pham	7039e1a214	chore: migrate to @google/genai SDK (#2038 ) Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>	2025-06-27 12:09:26 +07:00
github-actions[bot]	785d010cd3	Release 0.11.10 (#2037 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-06-26 14:29:33 +07:00
Marcus Schiesser	b878032131	fix release step	2025-06-26 14:18:56 +07:00
Marcus Schiesser	f7ec293a0f	chore: Update workflow-core (#2042 )	2025-06-26 14:03:03 +07:00
jerinthomascarmel	49a5e0a8cf	feat(readers): add ExcelReader for parsing Excel files (run-llama#1959) (#2033 ) Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de> Co-authored-by: leehuwuj <leehuwuj@gmail.com>	2025-06-26 11:15:19 +07:00
Logan	118924799a	Rename llama-flow -> workflows in docs (#2040 )	2025-06-25 15:52:04 -07:00
allen	ec8f673dae	support filter to supabase vector search (#2036 )	2025-06-25 16:17:54 +07:00
github-actions[bot]	85039a5360	Release @llamaindex/tools@0.1.0 (#2034 )	2025-06-24 12:32:24 +07:00
Marcus Schiesser	d7305edb53	fix changesets	2025-06-24 12:26:09 +07:00
Huu Le	096bf2bda1	feat: Add support for StreamableHTTP MCP Client (#2032 )	2025-06-24 11:40:34 +07:00
jerinthomascarmel	c5846bd7dc	feat(readers): add XMLReader for parsing XML files (#1846 ) (#2031 ) Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>	2025-06-24 10:46:32 +07:00
github-actions[bot]	97bbce6e13	Release 0.11.9 (#2023 ) Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>	2025-06-20 12:28:01 +07:00
Marcus Schiesser	62699b7497	chore: improve performance of sentence splitter (#2030 )	2025-06-20 12:16:24 +07:00
Broda Noel	a89e187796	Add `extraAbbreviations` on sentence-splitter (#2029 ) Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>	2025-06-20 11:27:06 +07:00
ANKIT VARSHNEY	d8ac8d385d	feat: add openai realtime api (#2006 ) Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>	2025-06-20 10:22:04 +07:00
Marcus Schiesser	a6cef9c6be	chore: no core in examples (#2024 )	2025-06-18 09:39:32 +07:00
Broda Noel	c5b2691302	Add more Acronyms on SentenceSplitter (#2022 ) Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>	2025-06-17 10:43:36 +07:00
github-actions[bot]	8122c7245e	Release 0.11.8 (#2018 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>	2025-06-12 16:20:58 +07:00
Huu Le	8a51c167f8	feat: use agent to handle a workflow step (#2014 ) Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>	2025-06-12 16:06:13 +07:00
Marcus Schiesser	1b5af1402d	fix: jsonToNode for image nodes (#2017 )	2025-06-12 11:59:05 +07:00
github-actions[bot]	fffe93fac8	Release 0.11.7 (#2013 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>	2025-06-12 10:34:24 +07:00
Marcus Schiesser	dbd857f6b5	chore: add changeset	2025-06-11 16:20:32 +07:00
정물결	a4d394f727	fix: correct SimpleDirectoryReader import path (#2011 )	2025-06-10 12:43:01 +07:00
Marcus Schiesser	3c857f4132	chore: move ajv to dev deps (#2012 )	2025-06-10 12:20:54 +07:00
Thuc Pham	36cfb93eb2	feat: export snapshot apis from llama-flow (#2009 )	2025-06-10 11:56:33 +07:00
github-actions[bot]	ab4762f026	Release (#2005 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-06-06 14:45:39 +07:00
Peter Goldstein	56763dc57d	Update to the latest Gemini 2.5 Pro Preview key (#2004 )	2025-06-06 11:25:41 +07:00
github-actions[bot]	5375fdd704	Release (#2003 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>	2025-06-05 09:57:35 +07:00
Marcus Schiesser	e7484efca5	feat: weaviate: Add metadata sanitization before adding node. Add err… (#2001 )	2025-06-04 11:48:18 +07:00
Marcus Schiesser	c958a1645a	docs: update chat-ui (#2002 )	2025-06-03 17:01:07 +07:00
github-actions[bot]	0140a257c4	Release 0.11.6 (#1999 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>	2025-06-02 18:03:31 +07:00
GhosT	40161fe8d2	chore: Bump @llama-flow/core package version (#1998 ) Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>	2025-06-02 17:28:47 +07:00
github-actions[bot]	d883fe7351	Release @llamaindex/google@0.3.7 (#1994 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-05-31 14:04:14 +07:00
Parham Saidi	2bc6914784	fix: ignore empty parts for gemini which confuses agent (#1993 )	2025-05-30 22:47:21 +07:00
				`@@ -0,0 +1 @@`
				<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>