Release 0.11.12 (#2050 )

Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>
docs: clarify how to run docs
2026-07-01 22:14:03 -04:00 · 2025-07-02 11:41:55 +07:00 · 2025-07-02 11:33:48 +07:00 · 2025-07-02 11:26:18 +07:00 · 2025-07-02 11:22:47 +07:00 · 2025-07-02 10:57:04 +07:00
243 changed files with 7368 additions and 2340 deletions
@@ -25,7 +25,7 @@ Make sure you have Node.js LTS (Long-term Support) installed. You can check your

 ```shell
 node -v
-# v20.x.x
+# v22.x.x
 ```

 ### Use pnpm
@@ -1,5 +1,49 @@
 # @llamaindex/doc

+## 0.2.32
+
+### Patch Changes
+
+- Updated dependencies [d578889]
+- Updated dependencies [0fcc92f]
+- Updated dependencies [515a8b9]
+  - @llamaindex/core@0.6.13
+  - llamaindex@0.11.12
+  - @llamaindex/cloud@4.0.17
+  - @llamaindex/node-parser@2.0.13
+  - @llamaindex/openai@0.4.7
+  - @llamaindex/readers@3.1.12
+  - @llamaindex/workflow@1.1.13
+
+## 0.2.31
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+  - @llamaindex/core@0.6.12
+  - @llamaindex/cloud@4.0.16
+  - @llamaindex/node-parser@2.0.12
+  - @llamaindex/openai@0.4.6
+  - @llamaindex/readers@3.1.11
+  - @llamaindex/workflow@1.1.12
+
+## 0.2.30
+
+### Patch Changes
+
+- Updated dependencies [f7ec293]
+  - @llamaindex/workflow@1.1.11
+  - llamaindex@0.11.10
+
+## 0.2.29
+
+### Patch Changes
+
+- Updated dependencies [c5846bd]
+  - @llamaindex/readers@3.1.10
+
 ## 0.2.28

 ### Patch Changes
@@ -111,7 +111,7 @@ Key build process:
 **Content Sources:**

 - Local MDX files in `src/content/docs/`
- External docs from `@llama-flow/docs` package
+- External docs from `@llamaindex/workflow-docs` package
 - Generated API docs from TypeScript source

 ### Development Notes
@@ -3,6 +3,8 @@
 This is a Next.js application generated with
 [Create Fumadocs](https://github.com/fuma-nama/fumadocs).

+> Note: Before running the development server, make sure to build the whole project first, see [CONTRIBUTING.md](../../CONTRIBUTING.md) for more details.
+
 Run development server:

 ```bash
@@ -23,8 +23,8 @@ const config = {
        permanent: true,
      },
      {
-        source: "/docs/llamaflow/:path*.mdx",
-        destination: "/docs/llamaflow/:path*",
+        source: "/docs/workflows/:path*.mdx",
+        destination: "/docs/workflows/:path*",
        permanent: true,
      },
    ];
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/doc",
-  "version": "0.2.28",
+  "version": "0.2.32",
  "private": true,
  "scripts": {
    "postinstall": "fumadocs-mdx",
@@ -15,7 +15,6 @@
  "dependencies": {
    "@huggingface/transformers": "^3.5.0",
    "@icons-pack/react-simple-icons": "^10.1.0",
-    "@llama-flow/docs": "0.0.8",
    "@llamaindex/chat-ui-docs": "^0.0.5",
    "@llamaindex/cloud": "workspace:*",
    "@llamaindex/core": "workspace:*",
@@ -23,8 +22,10 @@
    "@llamaindex/openai": "workspace:*",
    "@llamaindex/readers": "workspace:*",
    "@llamaindex/workflow": "workspace:*",
+    "@llamaindex/workflow-docs": "0.1.1",
    "@mdx-js/mdx": "^3.1.0",
    "@monaco-editor/react": "^4.7.0",
+    "@next/third-parties": "^15.3.4",
    "@number-flow/react": "^0.3.4",
    "@radix-ui/react-dialog": "^1.1.2",
    "@radix-ui/react-icons": "^1.3.2",
@@ -69,7 +70,7 @@
    "twoslash": "^0.3.1",
    "use-stick-to-bottom": "^1.0.42",
    "web-tree-sitter": "^0.24.4",
-    "zod": "^3.23.8"
+    "zod": "^3.25.67"
  },
  "devDependencies": {
    "@next/env": "^15.3.0",
@@ -13,7 +13,7 @@ const INTERNAL_LINK_REGEX = /(?:(?:\]\(|\bhref=["'])\/docs\/([^")]+))/g;
 // This captures relative links like [text](./path) or ![alt](../images/image.png)
 const RELATIVE_LINK_REGEX = /(?:\]\()(?:\s*)(?:\.\.?)\//g;

-const ALLOWED_LINKS = ["/docs/llamaflow", "/docs/chat-ui"];
+const ALLOWED_LINKS = ["/docs/workflows", "/docs/chat-ui"];

 interface LinkValidationResult {
  file: string;
@@ -11,9 +11,9 @@ import remarkMath from "remark-math";
 export const docs = defineDocs({
  dir: [
    "./src/content/docs",
-    "./node_modules/@llama-flow/docs",
+    "./node_modules/@llamaindex/workflow-docs",
    "./node_modules/@llamaindex/chat-ui-docs",
-    // NOTE: When adding external docs (like chat-ui or llama-flow above),
+    // NOTE: When adding external docs (like chat-ui or workflow-docs above),
    // make sure to also update:
    // 1. scripts/validate-links.mts - add to ALLOWED_LINKS array
    // 2. next.config.mjs - add redirect for .mdx files
@@ -1,5 +1,6 @@
 import { AIProvider } from "@/actions";
 import { TooltipProvider } from "@/components/ui/tooltip";
+import { GoogleAnalytics } from "@next/third-parties/google";
 import { RootProvider } from "fumadocs-ui/provider";
 import { Inter } from "next/font/google";
 import type { ReactNode } from "react";
@@ -39,6 +40,7 @@ export default function Layout({ children }: { children: ReactNode }) {
          </AIProvider>
        </TooltipProvider>
      </body>
+      <GoogleAnalytics gaId="G-NB9B8LW9W5" />
    </html>
  );
 }
@@ -74,12 +74,21 @@ const server = mcp({
  args: ["-y", "@modelcontextprotocol/server-filesystem", "."],
  verbose: true,
 });
-// or by SSE
+// or by StreamableHTTP transport
 const server = mcp({
  url: "http://localhost:8000/mcp",
  verbose: true,
 });

+// if your MCP server is not using StreamableHTTP transport, you can also use SSE transport
+// by setting useSSETransport to true.
+// See: https://modelcontextprotocol.io/docs/concepts/transports#server-sent-events-sse-deprecated
+const server = mcp({
+  url: "http://localhost:8000/mcp",
+  useSSETransport: true,
+  verbose: true,
+});
+
 // 3. Get tools from MCP server
 const tools = await server.tools();

@@ -9,10 +9,13 @@ Workflows are designed to be flexible and can be used to build agents, RAG flows
 To use workflows install this package:

 ```package-install
-npm i @llamaindex/workflow
+npm i @llamaindex/workflow-core
 ```

-This package is a stable, production-ready version of our [llama-flow](/docs/llamaflow) project. 
+This contains the core functionality for the workflow system. You can read more about the core concepts in the [workflow-core](/docs/workflows) section.

-While you can still reference the llama-flow documentation for detailed information about the underlying concepts, we recommend using the `@llamaindex/workflow` package for all new projects to ensure stability and long-term availability.
+In contrast, the `@llamaindex/workflow` package contains more utiltities, such as prebuilt agents.

+```package-install
+npm i @llamaindex/workflow
+```
@@ -0,0 +1,182 @@
+---
+title: Memory
+description: Manage conversation history and context with agents
+---
+
+## Concept
+
+Memory is a core component of agentic systems. It allows you to store and retrieve information from the past.
+
+In LlamaIndexTS, you can create memory by using the `createMemory` function. This function will return a `Memory` object, which you can then use to store and retrieve information.
+
+As the agent runs, it will make calls to `add()` to store information, and `get()` to retrieve information. 
+
+## Usage
+
+A `Memory` object has both short-term memory (i.e. a FIFO queue of messages) and optionally long-term memory (i.e. extracting information over time).
+
+`get()` always returns all messages stored in the memory. The longer the agent runs, this will exceed the context window of the agent. To avoid this, the agent is using the `getLLM` method to get the last X messages that fit into the context window.
+
+### Configuring Memory for an Agent
+
+Here we're creating a memory with a static block (read more about [memory blocks](#long-term-memory)) that contains some information about the user.
+
+```ts twoslash
+import { openai } from "@llamaindex/openai";
+import { agent } from "@llamaindex/workflow";
+import { createMemory, staticBlock } from "llamaindex";
+
+const llm = openai({ model: "gpt-4.1-mini" });
+
+// Create memory with predefined context
+const memory = createMemory({
+  memoryBlocks: [
+    staticBlock({
+      content:
+        "The user is a software engineer who loves TypeScript and LlamaIndex.",
+    }),
+  ],
+});
+
+// Create an agent with the memory
+const workflow = agent({
+  name: "assistant",
+  llm,
+  memory,
+});
+
+const result = await workflow.run("What is my name?");
+console.log("Response:", result.data.result);
+```
+
+### Using Vercel format
+
+You can also put messages in Vercel format directly to the memory:
+
+```ts
+await memory.add({
+  id: "1",
+  createdAt: new Date(),
+  role: "user",
+  content: "Hello!",
+  options: {
+    parts: [
+      {
+        type: "file",
+        data: "base64...",
+        mimeType: "image/png",
+      },
+    ],
+  },
+});
+```
+
+If you call `get`, messages are usually retrieved in the LlamaIndexTS format (type `ChatMessage`). If you specify the `type` parameter using `get`, you can return the messages in different formats. E.g.: using `type: "vercel"`, you can return the messages in Vercel format:
+
+```ts
+const messages = await memory.get({ type: "vercel" });
+console.log(messages);
+```
+
+## Customizing Memory
+
+### Short-Term Memory
+
+The `Memory` object will store all the messages that are added to the `Memory` object. Unless you call `clear()`, no messages are removed from the memory. This is the short-term memory (usually you will store the memory of one user session there) which is augmented by the long-term memory.
+
+Calling `getLLM` will retrieve messages from long-term memory and ensure that the given `tokenLimit` is not reached. These are the messages that you will sent to the LLM.
+
+For initialization, you call `createMemory` with the following options:
+
+- `tokenLimit`: Maximum tokens for memory retrieval using `getLLM` (default: 30000).
+- `shortTermTokenLimitRatio`: Ratio of tokens for short-term vs long-term memory (default: 0.7)
+- `customAdapters`: Custom message adapters for different message formats. LlamaIndex (`ChatMessageAdapter`) and Vercel (`VercelMessageAdapter`) are built-in adapters.
+- `memoryBlocks`: Memory blocks for long-term storage, see [Long-Term Memory](#long-term-memory)
+
+Example:
+
+```ts
+const memory = createMemory({
+    tokenLimit=40000,
+    shortTermTokenLimitRatio=0.5,
+});
+```
+
+### Long-Term Memory
+
+Long-term memory is represented as `Memory Block` objects. These objects contain information that are from previous user sessions or from the beginning of the current conversation. When memory is retrieved (by calling `getLLM`), the short-term and long-term memories are merged together within the given `tokenLimit`. 
+
+Currently, there are two predefined memory blocks:
+
+- `staticBlock`: A memory block that stores a static piece of information.
+- `factExtractionBlock`: A memory block that extracts facts from the chat history.
+
+This sounds a bit complicated, but it's actually quite simple. Let's look at an example:
+
+```ts
+import { createMemory, factExtractionBlock, staticBlock } from "llamaindex";
+
+const memoryBlocks= [
+  staticBlock({
+    id: "core_info",
+    content: "My name is Logan, and I live in Saskatoon. I work at LlamaIndex.",
+  }),
+  factExtractionBlock({
+    id: "user-extracted_info",
+    priority: 1,
+    llm: llm,
+    maxFacts: 50,
+  }),
+];
+```
+
+Here, we've setup two memory blocks:
+
+- `core_info`: A static memory block that stores some core information about the user. This information will always be inserted into the memory. The type used is `MessageContent` to support multi-modal content.
+- `extracted_info`: An extracted memory block that will extract information from the chat history. Here we've passed in the `llm` to use to extract facts from the chat history, and set the `maxFacts` to 50. If the number of extracted facts exceeds this limit, the `maxFacts` will be automatically summarized and reduced to leave room for new information.
+
+You'll also notice that we've set the `priority` for the `factExtractionBlock` block. This is used to determine the handling when the memory blocks content (i.e. long-term memory) + short-term memory exceeds the token limit on the `Memory` object.
+
+- `priority=0`: This block will always be kept in memory (`staticBlocks` always have priority 0.)
+- `priority=1, 2, 3, etc`: This determines the order in which memory blocks are truncated when the memory exceeds the token limit, to help the overall short-term memory + long-term memory content be less than or equal to the `tokenLimit`.
+
+Now, let's pass these blocks into the `createMemory` function:
+
+```ts
+const memory = createMemory({
+  tokenLimit: 40000,
+  memoryBlocks: memoryBlocks,
+)
+```
+
+When memory is retrieved (using `getLLM`), the short-term and long-term memories are merged together. The `Memory` object will ensure that the short-term memory + long-term memory content is less than or equal to the `tokenLimit`. If it is longer, messages are retrieved in the following order:
+
+1. StaticMemoryBlock (information always included)
+2. LongTermMemoryBlock (depending on priority)
+3. ShortTermMemoryBlock 
+4. Transient messages
+
+The amount of short-term memory included is specified by the `shortTermTokenLimitRatio`. If it's set to `0.7`, 70% of the `tokenLimit` is used for short-term memory (not including the static memory block).
+
+## Persistence with Snapshots
+
+Save and restore memory state:
+
+```ts twoslash
+import { createMemory, loadMemory } from "llamaindex";
+
+const memory = createMemory();
+
+// Add some messages
+await memory.add({ role: "user", content: "Hello!" });
+
+// Create snapshot
+const snapshot = memory.snapshot();
+
+// Later, restore from the snapshot
+const restoredMemory = loadMemory(snapshot);
+```
+
+## Examples
+
+Want to learn more about the Memory class? Check out our example codes in [Github](https://github.com/run-llama/LlamaIndexTS/tree/main/examples/agents/memory).
@@ -1,4 +1,11 @@
 {
  "title": "Data",
-  "pages": ["index", "readers", "data_index", "ingestion_pipeline", "stores"]
+  "pages": [
+    "index",
+    "memory",
+    "readers",
+    "data_index",
+    "ingestion_pipeline",
+    "stores"
+  ]
 }
@@ -28,11 +28,12 @@ embedding vector(1536)
 );
 ```

-- Create a function for similarity search
+-- Create a function for similarity search with filtering support
 ```sql
 create function match_documents (
 query_embedding vector(1536),
-match_count int
+match_count int,
+filter jsonb DEFAULT '{}'
 ) returns table (
 id uuid,
 content text,
@@ -52,6 +53,7 @@ metadata,
 embedding,
 1 - (embedding <=> query_embedding) as similarity
 from documents
+where metadata @> filter
 order by embedding <=> query_embedding
 limit match_count;
 end;
@@ -96,6 +98,7 @@ const index = await VectorStoreIndex.fromDocuments(documents, {
 ```ts
 const queryEngine = index.asQueryEngine();

+// Basic query without filters
 const response = await queryEngine.query({
  query: "What is in the document?",
 });
@@ -104,6 +107,32 @@ const response = await queryEngine.query({
 console.log(response.toString());
 ```

+## Query with filters
+
+You can filter documents based on metadata when querying:
+
+```ts
+import { FilterOperator, MetadataFilters } from "llamaindex";
+
+// Create a filter for documents with author = "Jane Smith"
+const filters: MetadataFilters = {
+  filters: [
+    {
+      key: "author",
+      value: "Jane Smith",
+      operator: FilterOperator.EQ,
+    },
+  ],
+};
+
+// Query with filters
+const filteredResponse = await vectorStore.query({
+  queryEmbedding: embedModel.getQueryEmbedding("What is vector search?"),
+  similarityTopK: 5,
+  filters,
+});
+```
+
 ## Full code

 ```ts
@@ -11,58 +11,130 @@ npm i llamaindex @llamaindex/google
 ## Usage

 ```ts
-import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";
 import { Settings } from "llamaindex";

-Settings.llm = new Gemini({
-  model: GEMINI_MODEL.GEMINI_PRO,
-});
-```
-
-## Usage with Proxy
-
-```ts
-import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
-import { Settings } from "llamaindex";
-
-Settings.llm = new Gemini({
-  model: GEMINI_MODEL.GEMINI_PRO,
-  requestOptions: {
-    baseUrl: <YOUR_PROXY_URL>   // optional, but useful for custom endpoints
-  }
+Settings.llm = gemini({
+  model: GEMINI_MODEL.GEMINI_2_0_FLASH,
 });
 ```

 ### Usage with Vertex AI

-To use Gemini via Vertex AI you can use `GeminiVertexSession`.
-
-GeminiVertexSession accepts the env variables: `GOOGLE_VERTEX_LOCATION` and `GOOGLE_VERTEX_PROJECT`
+To use Gemini via Vertex AI, you can specify the vertex configuration:

 ```ts
-import { Gemini, GEMINI_MODEL, GeminiVertexSession } from "@llamaindex/google";
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";

-const gemini = new Gemini({
-  model: GEMINI_MODEL.GEMINI_PRO,
-  session: new GeminiVertexSession({
-    location: "us-central1",      // optional if provided by GOOGLE_VERTEX_LOCATION env variable
-    project: "project1",          // optional if provided by GOOGLE_VERTEX_PROJECT env variable
-    googleAuthOptions: {...},     // optional, but useful for production. It accepts all values from `GoogleAuthOptions`
-  }),
+const llm = gemini({
+  model: GEMINI_MODEL.GEMINI_2_0_FLASH,
+  vertex: {
+    project: "your-cloud-project",    // required for Vertex AI
+    location: "us-central1",          // required for Vertex AI
+  },
 });
 ```

-[GoogleAuthOptions](https://github.com/googleapis/google-auth-library-nodejs/blob/main/src/auth/googleauth.ts)
-
 To authenticate for local development:

 ```bash
-npm i @google-cloud/vertexai
 gcloud auth application-default login
 ```

 To authenticate for production you'll have to use a [service account](https://cloud.google.com/docs/authentication/). `googleAuthOptions` has `credentials` which might be useful for you.

+## Multimodal Usage
+
+Gemini supports multimodal inputs including text, images, audio, and video:
+
+```ts
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";
+import fs from "fs";
+
+const llm = gemini({ model: GEMINI_MODEL.GEMINI_2_0_FLASH });
+
+const result = await llm.chat({
+  messages: [
+    {
+      role: "user",
+      content: [
+        {
+          type: "text",
+          text: "What's in this image?",
+        },
+        {
+          type: "image",
+          data: fs.readFileSync("./image.jpg").toString("base64"),
+          mimeType: "image/jpeg",
+        },
+      ],
+    },
+  ],
+});
+```
+
+## Tool Calling
+
+Gemini supports function calling with tools:
+
+```ts
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";
+import { tool } from "llamaindex";
+import { z } from "zod";
+
+const llm = gemini({ model: GEMINI_MODEL.GEMINI_2_0_FLASH });
+
+const result = await llm.chat({
+  messages: [
+    {
+      content: "What's the weather in Tokyo?",
+      role: "user",
+    },
+  ],
+  tools: [
+    tool({
+      name: "weather",
+      description: "Get the weather",
+      parameters: z.object({
+        location: z.string().describe("The location to get the weather for"),
+      }),
+      execute: ({ location }) => {
+        return `The weather in ${location} is sunny and hot`;
+      },
+    }),
+  ],
+});
+```
+
+## Live API (Real-time Conversations)
+
+For real-time audio/video conversations using [Gemini Live API](https://ai.google.dev/gemini-api/docs/live). 
+
+The Live API is running directly in the frontend. That's why you have to generate an ephemeral key first on the server side and pass it to the frontend.
+
+To use the Live API, make sure to pass `apiVersion: "v1alpha"` to the `httpOptions`.
+
+```ts
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";
+
+// Server-side: Generate ephemeral key
+const serverLlm = gemini({
+  model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
+  httpOptions: { apiVersion: "v1alpha" },
+});
+const ephemeralKey = await serverLlm.live.getEphemeralKey();
+
+// Client-side: Use ephemeral key for Live API
+const llm = gemini({
+  apiKey: ephemeralKey,
+  model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
+  voiceName: "Zephyr",
+  httpOptions: { apiVersion: "v1alpha" },
+});
+
+const session = await llm.live.connect();
+```
+
 ## Load and index documents

 For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
@@ -90,11 +162,11 @@ const results = await queryEngine.query({
 ## Full Example

 ```ts
-import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";
 import { Document, VectorStoreIndex, Settings } from "llamaindex";

-Settings.llm = new Gemini({
-  model: GEMINI_MODEL.GEMINI_PRO,
+Settings.llm = gemini({
+  model: GEMINI_MODEL.GEMINI_2_0_FLASH,
 });

 async function main() {
@@ -104,9 +176,7 @@ async function main() {
  const index = await VectorStoreIndex.fromDocuments([document]);

  // Create a query engine
-  const queryEngine = index.asQueryEngine({
-    retriever,
-  });
+  const queryEngine = index.asQueryEngine();

  const query = "What is the meaning of life?";

@@ -11,6 +11,7 @@ A retriever in LlamaIndex is what is used to fetch `Node`s from an index using a
 - [KeywordTableLLMRetriever](/docs/api/classes/KeywordTableLLMRetriever) uses an LLM to extract keywords from the query and retrieve relevant nodes based on keyword matches.
 - [KeywordTableSimpleRetriever](/docs/api/classes/KeywordTableSimpleRetriever) uses a basic frequency-based approach to extract keywords and retrieve nodes.
 - [KeywordTableRAKERetriever](/docs/api/classes/KeywordTableRAKERetriever) uses the RAKE (Rapid Automatic Keyword Extraction) algorithm to extract keywords from the query, focusing on co-occurrence and context for keyword-based retrieval.
+- [Bm25Retriever](/docs/api/classes/Bm25Retriever) uses the BM25 algorithm to extract keywords from the query and retrieve relevant nodes based on keyword matches.

 ```typescript
 const retriever = vectorIndex.asRetriever({
@@ -1,3 +1,3 @@
 {
-  "pages": ["llamaindex", "api", "llamaflow", "chat-ui"]
+  "pages": ["llamaindex", "api", "workflows", "chat-ui"]
 }
@@ -4,7 +4,7 @@
  "tasks": {
    "build": {
      "inputs": [
-        "node_modules/@llama-flow/docs/**",
+        "node_modules/@llamaindex/workflow-docs/**",
        "node_modules/@llamaindex/chat-ui-docs/**",
        "src/**/*.ts",
        "src/**/*.tsx",
@@ -1,5 +1,25 @@
 # @llamaindex/cloudflare-worker-agent-test

+## 0.0.173
+
+### Patch Changes
+
+- Updated dependencies [515a8b9]
+  - llamaindex@0.11.12
+
+## 0.0.172
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+
+## 0.0.171
+
+### Patch Changes
+
+- llamaindex@0.11.10
+
 ## 0.0.170

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/cloudflare-worker-agent-test",
-  "version": "0.0.170",
+  "version": "0.0.173",
  "type": "module",
  "private": true,
  "scripts": {
@@ -1,5 +1,17 @@
 # @llamaindex/llama-parse-browser-test

+## 0.0.72
+
+### Patch Changes
+
+- @llamaindex/cloud@4.0.17
+
+## 0.0.71
+
+### Patch Changes
+
+- @llamaindex/cloud@4.0.16
+
 ## 0.0.70

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/llama-parse-browser-test",
  "private": true,
-  "version": "0.0.70",
+  "version": "0.0.72",
  "type": "module",
  "scripts": {
    "dev": "vite",
@@ -1,5 +1,25 @@
 # @llamaindex/next-agent-test

+## 0.1.173
+
+### Patch Changes
+
+- Updated dependencies [515a8b9]
+  - llamaindex@0.11.12
+
+## 0.1.172
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+
+## 0.1.171
+
+### Patch Changes
+
+- llamaindex@0.11.10
+
 ## 0.1.170

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/next-agent-test",
-  "version": "0.1.170",
+  "version": "0.1.173",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,25 @@
 # test-edge-runtime

+## 0.1.172
+
+### Patch Changes
+
+- Updated dependencies [515a8b9]
+  - llamaindex@0.11.12
+
+## 0.1.171
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+
+## 0.1.170
+
+### Patch Changes
+
+- llamaindex@0.11.10
+
 ## 0.1.169

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/nextjs-edge-runtime-test",
-  "version": "0.1.169",
+  "version": "0.1.172",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,36 @@
 # @llamaindex/next-node-runtime

+## 0.1.41
+
+### Patch Changes
+
+- Updated dependencies [515a8b9]
+  - llamaindex@0.11.12
+  - @llamaindex/huggingface@0.1.17
+  - @llamaindex/readers@3.1.12
+
+## 0.1.40
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+  - @llamaindex/huggingface@0.1.16
+  - @llamaindex/readers@3.1.11
+
+## 0.1.39
+
+### Patch Changes
+
+- llamaindex@0.11.10
+
+## 0.1.38
+
+### Patch Changes
+
+- Updated dependencies [c5846bd]
+  - @llamaindex/readers@3.1.10
+
 ## 0.1.37

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/next-node-runtime-test",
-  "version": "0.1.37",
+  "version": "0.1.41",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,25 @@
 # vite-import-llamaindex

+## 0.0.39
+
+### Patch Changes
+
+- Updated dependencies [515a8b9]
+  - llamaindex@0.11.12
+
+## 0.0.38
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+
+## 0.0.37
+
+### Patch Changes
+
+- llamaindex@0.11.10
+
 ## 0.0.36

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "vite-import-llamaindex",
  "private": true,
-  "version": "0.0.36",
+  "version": "0.0.39",
  "type": "module",
  "scripts": {
    "build": "vite build",
@@ -1,5 +1,25 @@
 # @llamaindex/waku-query-engine-test

+## 0.0.173
+
+### Patch Changes
+
+- Updated dependencies [515a8b9]
+  - llamaindex@0.11.12
+
+## 0.0.172
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+
+## 0.0.171
+
+### Patch Changes
+
+- llamaindex@0.11.10
+
 ## 0.0.170

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/waku-query-engine-test",
-  "version": "0.0.170",
+  "version": "0.0.173",
  "type": "module",
  "private": true,
  "scripts": {
@@ -10,7 +10,7 @@ import { mockLLMEvent } from "./utils.js";
 let llm: LLM;
 beforeEach(async () => {
  Settings.llm = new Anthropic({
-    model: "claude-3-opus",
+    model: "claude-3.5-sonnet",
  });
  llm = Settings.llm;
 });
@@ -7,7 +7,7 @@
  "dependencies": {
    "@llamaindex/workflow": "1.1.1",
    "llamaindex": "0.10.5",
-    "zod": "^3.23.8"
+    "zod": "^3.25.67"
  },
  "devDependencies": {
    "tsx": "^4.19.1",
@@ -27,6 +27,6 @@
    "pg": "^8.12.0",
    "pgvector": "0.2.0",
    "tsx": "^4.19.3",
-    "zod": "^3.24.2"
+    "zod": "^3.25.67"
  }
 }
@@ -1,5 +1,122 @@
 # examples

+## 0.3.26
+
+### Patch Changes
+
+- Updated dependencies [d578889]
+- Updated dependencies [0fcc92f]
+- Updated dependencies [515a8b9]
+- Updated dependencies [3cd8a57]
+- Updated dependencies [f2dfd30]
+  - @llamaindex/core@0.6.13
+  - llamaindex@0.11.12
+  - @llamaindex/tools@0.1.3
+  - @llamaindex/bm25-retriever@0.0.2
+  - @llamaindex/cloud@4.0.17
+  - @llamaindex/node-parser@2.0.13
+  - @llamaindex/anthropic@0.3.15
+  - @llamaindex/assemblyai@0.1.12
+  - @llamaindex/clip@0.0.63
+  - @llamaindex/cohere@0.0.27
+  - @llamaindex/deepinfra@0.0.63
+  - @llamaindex/discord@0.1.12
+  - @llamaindex/google@0.3.12
+  - @llamaindex/huggingface@0.1.17
+  - @llamaindex/jinaai@0.0.23
+  - @llamaindex/mistral@0.1.13
+  - @llamaindex/mixedbread@0.0.27
+  - @llamaindex/notion@0.1.12
+  - @llamaindex/ollama@0.1.13
+  - @llamaindex/openai@0.4.7
+  - @llamaindex/perplexity@0.0.20
+  - @llamaindex/portkey-ai@0.0.55
+  - @llamaindex/replicate@0.0.55
+  - @llamaindex/astra@0.0.27
+  - @llamaindex/azure@0.1.24
+  - @llamaindex/chroma@0.0.27
+  - @llamaindex/elastic-search@0.1.13
+  - @llamaindex/firestore@1.0.20
+  - @llamaindex/milvus@0.1.22
+  - @llamaindex/mongodb@0.0.28
+  - @llamaindex/pinecone@0.1.13
+  - @llamaindex/postgres@0.0.56
+  - @llamaindex/qdrant@0.1.23
+  - @llamaindex/supabase@0.1.13
+  - @llamaindex/upstash@0.0.27
+  - @llamaindex/weaviate@0.0.28
+  - @llamaindex/vercel@0.1.13
+  - @llamaindex/voyage-ai@1.0.19
+  - @llamaindex/readers@3.1.12
+  - @llamaindex/workflow@1.1.13
+  - @llamaindex/deepseek@0.0.23
+  - @llamaindex/fireworks@0.0.23
+  - @llamaindex/groq@0.0.78
+  - @llamaindex/together@0.0.23
+  - @llamaindex/vllm@0.0.49
+  - @llamaindex/xai@0.0.10
+
+## 0.3.25
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+  - @llamaindex/core@0.6.12
+  - @llamaindex/google@0.3.11
+  - @llamaindex/cloud@4.0.16
+  - @llamaindex/node-parser@2.0.12
+  - @llamaindex/anthropic@0.3.14
+  - @llamaindex/assemblyai@0.1.11
+  - @llamaindex/clip@0.0.62
+  - @llamaindex/cohere@0.0.26
+  - @llamaindex/deepinfra@0.0.62
+  - @llamaindex/discord@0.1.11
+  - @llamaindex/huggingface@0.1.16
+  - @llamaindex/jinaai@0.0.22
+  - @llamaindex/mistral@0.1.12
+  - @llamaindex/mixedbread@0.0.26
+  - @llamaindex/notion@0.1.11
+  - @llamaindex/ollama@0.1.12
+  - @llamaindex/openai@0.4.6
+  - @llamaindex/perplexity@0.0.19
+  - @llamaindex/portkey-ai@0.0.54
+  - @llamaindex/replicate@0.0.54
+  - @llamaindex/astra@0.0.26
+  - @llamaindex/azure@0.1.23
+  - @llamaindex/chroma@0.0.26
+  - @llamaindex/elastic-search@0.1.12
+  - @llamaindex/firestore@1.0.19
+  - @llamaindex/milvus@0.1.21
+  - @llamaindex/mongodb@0.0.27
+  - @llamaindex/pinecone@0.1.12
+  - @llamaindex/postgres@0.0.55
+  - @llamaindex/qdrant@0.1.22
+  - @llamaindex/supabase@0.1.12
+  - @llamaindex/upstash@0.0.26
+  - @llamaindex/weaviate@0.0.27
+  - @llamaindex/vercel@0.1.12
+  - @llamaindex/voyage-ai@1.0.18
+  - @llamaindex/readers@3.1.11
+  - @llamaindex/tools@0.1.1
+  - @llamaindex/workflow@1.1.12
+  - @llamaindex/deepseek@0.0.22
+  - @llamaindex/fireworks@0.0.22
+  - @llamaindex/groq@0.0.77
+  - @llamaindex/together@0.0.22
+  - @llamaindex/vllm@0.0.48
+  - @llamaindex/xai@0.0.9
+
+## 0.3.24
+
+### Patch Changes
+
+- Updated dependencies [096bf2b]
+- Updated dependencies [c5846bd]
+  - @llamaindex/tools@0.1.0
+  - @llamaindex/readers@3.1.10
+
 ## 0.3.23

 ### Patch Changes
@@ -6,15 +6,24 @@ async function main() {
  // Create an MCP server for filesystem tools
  const server = mcp({
    command: "npx",
-    args: ["-y", "@modelcontextprotocol/server-filesystem", "."],
+    args: ["-y", "@modelcontextprotocol/server-filesystem@latest", "."],
    verbose: true,
  });
-  // You can also connect to the MCP server using SSE
-  // See: https://modelcontextprotocol.io/docs/concepts/transports#server-sent-events-sse
+  //
+  // You can also connect to a remote MCP server using:
+  // 1. StreamableHTTP transport (recommended)
+  // See: https://modelcontextprotocol.io/docs/concepts/transports#streamable-http
  // const server = mcp({
  //   url: "http://localhost:8000/mcp",
  //   verbose: true,
  // });
+  // 2.Or using SSE transport (will be deprecated soon)
+  // See: https://modelcontextprotocol.io/docs/concepts/transports#server-sent-events-sse-deprecated
+  // const server = mcp({
+  //   url: "http://localhost:8000/mcp",
+  //   useSSETransport: true,
+  //   verbose: true,
+  // });

  try {
    // Create an agent that uses the MCP tools
@@ -0,0 +1,36 @@
+import { openai } from "@llamaindex/openai";
+import { agent } from "@llamaindex/workflow";
+import { createMemory, staticBlock } from "llamaindex";
+
+// Simple example: Agent with Predefined Memory
+async function simpleAgentMemoryExample() {
+  console.log("=== Simple Agent Memory Example ===");
+
+  const memory = createMemory({
+    memoryBlocks: [
+      staticBlock({
+        content:
+          "The user is a software engineer who loves TypeScript and LlamaIndex.",
+      }),
+    ],
+  });
+
+  // Create agent workflow
+  const workflow = agent({
+    name: "assistant",
+    llm: openai({ model: "gpt-4.1-nano" }),
+    memory,
+  });
+
+  // Test - agent should remember John and the shopping cart context
+  console.log("\n--- Testing Memory Context ---");
+  const result = await workflow.run("Hi, my name is John. Do you know me?");
+
+  console.log("Assistant Response:", result.data.result);
+
+  const result2 = await workflow.run("What is my name?");
+  console.log("Assistant Response:", result2.data.result);
+}
+
+// Run the example
+simpleAgentMemoryExample().catch(console.error);
@@ -0,0 +1,58 @@
+import { openai } from "@llamaindex/openai";
+import { createMemory } from "llamaindex";
+
+// Example: Basic Memory Usage with Factory
+async function basicMemoryExample() {
+  console.log("\n=== Example: Basic Memory Usage with Factory ===");
+
+  const memory = createMemory({ tokenLimit: 30 });
+
+  // Add messages to memory
+  await memory.add({
+    role: "user",
+    content: "Hi, my name is John and I'm a software engineer.",
+  });
+
+  await memory.add({
+    role: "assistant",
+    content: "Hello John! Nice to meet you. How can I help you today?",
+  });
+
+  await memory.add({
+    role: "user",
+    content: "I love working with TypeScript and React.",
+  });
+  // Not all messages are included because of token limit is set to 30
+  const llmMessages = await memory.getLLM();
+  console.log(
+    `\nLLM messages (${llmMessages.length} messages) limited by a small token limit:`,
+  );
+  llmMessages.forEach((msg, idx) => {
+    console.log(`${idx + 1}. ${msg.role}: ${msg.content}`);
+  });
+
+  // But the token limit above will be the window size of an LLM instance if you use getLLM with LLM
+  const llm = openai({ model: "gpt-4.1-mini" });
+  const llmMessagesWithLLM = await memory.getLLM(llm);
+  // Now all the messages are included because of the LLM window size of the model is much larger
+  console.log(
+    `\nLLM messages with LLM (${llmMessagesWithLLM.length} messages) limited by LLM window size:`,
+  );
+  llmMessagesWithLLM.forEach((msg, idx) => {
+    console.log(`${idx + 1}. ${msg.role}: ${msg.content}`);
+  });
+}
+
+// Main function
+async function main() {
+  console.log("🧠 Basic Memory Factory Examples");
+  console.log("===============================");
+
+  try {
+    await basicMemoryExample();
+  } catch (error) {
+    console.error("Error running basic memory examples:", error);
+  }
+}
+
+main().catch(console.error);
@@ -0,0 +1,101 @@
+import { openai } from "@llamaindex/openai";
+import { createMemory, factExtractionBlock } from "llamaindex";
+
+// Configure OpenAI
+const llm = openai({ model: "gpt-4.1-mini" });
+
+// Example: Memory with Fact Extraction
+async function factExtractionMemoryExample() {
+  console.log("\n=== Memory with Fact Extraction ===");
+
+  // Create memory with a fact extraction
+  const memory = createMemory([], {
+    tokenLimit: 100,
+    shortTermTokenLimitRatio: 0.7, // 70% for short-term, 30% for long-term
+    memoryBlocks: [
+      factExtractionBlock({
+        id: "user-facts",
+        priority: 5,
+        llm: llm,
+        maxFacts: 10,
+        isLongTerm: true,
+      }),
+    ],
+  });
+
+  // Simulate a conversation with facts
+  const conversationTurns = [
+    {
+      role: "user",
+      content: "Hi, I'm Sarah and I work as a data scientist at Google.",
+    },
+    {
+      role: "assistant",
+      content:
+        "Hello Sarah! It's great to meet you. Data science at Google must be exciting!",
+    },
+    {
+      role: "user",
+      content:
+        "Yes, I specialize in machine learning and natural language processing.",
+    },
+    {
+      role: "assistant",
+      content: "That's impressive! ML and NLP are fascinating fields.",
+    },
+    {
+      role: "user",
+      content:
+        "I have a PhD in Computer Science from Stanford, and I love hiking on weekends.",
+    },
+    {
+      role: "assistant",
+      content:
+        "Wow, Stanford PhD! And hiking is a great way to unwind from tech work.",
+    },
+    {
+      role: "user",
+      content: "I also have two cats named Whiskers and Mittens.",
+    },
+    {
+      role: "assistant",
+      content:
+        "Cats make wonderful companions! Whiskers and Mittens are cute names.",
+    },
+  ];
+
+  // Add conversation turns to memory
+  console.log("Adding conversation to memory...");
+  for (const turn of conversationTurns) {
+    await memory.add(turn);
+  }
+
+  // Get messages - facts should be extracted and included
+  const messages = await memory.getLLM(llm);
+  console.log("\nMessages with extracted facts:");
+  messages.forEach((msg, idx) => {
+    console.log(`${idx + 1}. ${msg.role ?? "unknown"}: ${msg.content}`);
+  });
+  //Messages with extracted facts:
+  // 1. assistant: Cats make wonderful companions! Whiskers and Mittens are cute names.
+  // 2. user: I also have two cats named Whiskers and Mittens.
+  // 3. assistant: Wow, Stanford PhD! And hiking is a great way to unwind from tech work.
+  // 4. memory: Sarah works as a data scientist at Google
+  // Sarah specializes in machine learning and natural language processing
+  // Sarah has a PhD in Computer Science from Stanford
+  // Sarah enjoys hiking on weekends
+}
+
+// Main function
+async function main() {
+  console.log("🧠 Fact Extraction Memory Example");
+  console.log("=================================");
+
+  try {
+    await factExtractionMemoryExample();
+  } catch (error) {
+    console.error("Error running fact extraction memory example:", error);
+  }
+}
+
+main().catch(console.error);
@@ -0,0 +1,62 @@
+import { openai } from "@llamaindex/openai";
+import { createMemory, staticBlock } from "llamaindex";
+
+// Configure OpenAI
+const llm = openai({ model: "gpt-4.1-mini" });
+
+// Example: Memory with Static Blocks
+async function staticMemoryBlockExample() {
+  console.log("\n=== Memory with Static Blocks ===");
+  console.log("- Memory always include static block");
+  console.log("- Memory cut off the messages within token limit\n");
+
+  // Create memory with a static block
+  const memory = createMemory([], {
+    tokenLimit: 30, // A small token limit which is not enough for the whole conversation below
+    memoryBlocks: [
+      staticBlock({
+        content:
+          "The user's name is John and he is a software engineer who loves TypeScript and LlamaIndex.",
+      }),
+    ],
+  });
+
+  // Add some messages to the memory
+  await memory.add({
+    role: "user",
+    content: "What do you know about me?",
+  });
+
+  await memory.add({
+    role: "assistant",
+    content:
+      "Based on our conversation, I know you're John, a software engineer who enjoys working with TypeScript and LlamaIndex!",
+  });
+
+  await memory.add({
+    role: "user",
+    content: "Which language does LlamaIndex support?",
+  });
+
+  // Get messages
+  // static block will always be included
+  // only the last message will be included because of token limit set above
+  const messages = await memory.getLLM(llm);
+  messages.forEach((msg, idx) => {
+    console.log(`${idx + 1}. ${msg.role}: ${msg.content}`);
+  });
+  // Messages with static block:
+  // 1. user: The user's name is John and he is a software engineer who loves TypeScript and LlamaIndex.
+  // 2. user: Which language does LlamaIndex support?
+}
+
+// Main function
+async function main() {
+  try {
+    await staticMemoryBlockExample();
+  } catch (error) {
+    console.error("Error running static memory blocks example:", error);
+  }
+}
+
+main().catch(console.error);
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<company name="MidSizeCorp" founded="2008">
+  <division name="Engineering" head="Dana White">
+    <department name="Frontend" lead="Alex Kim">
+      <team name="Web">
+        <employee id="E01">
+          <name>Jordan Lee</name>
+          <role>Lead Developer</role>
+          <projects>
+            <project code="PRJ101" status="active">
+              <title>User Portal</title>
+              <deadline>2025-08-01</deadline>
+              <tasks>
+                <task id="T1011">
+                  <description>Implement login page</description>
+                  <due>2025-05-10</due>
+                </task>
+                <task id="T1012">
+                  <description>Design dashboard</description>
+                  <due>2025-05-20</due>
+                </task>
+              </tasks>
+            </project>
+          </projects>
+        </employee>
+        <employee id="E02">
+          <name>Riley Chen</name>
+          <role>UI Designer</role>
+        </employee>
+      </team>
+      <team name="Mobile">
+        <employee id="E03">
+          <name>Sam Patel</name>
+          <role>iOS Developer</role>
+        </employee>
+      </team>
+    </department>
+    <department name="Backend" lead="Morgan Reed">
+      <team name="API">
+        <employee id="E04">
+          <name>Taylor Jones</name>
+          <role>API Engineer</role>
+        </employee>
+      </team>
+      <team name="Database">
+        <employee id="E05">
+          <name>Casey Nguyen</name>
+          <role>DB Administrator</role>
+        </employee>
+      </team>
+    </department>
+  </division>
+
+  <division name="Marketing" head="Pat Morgan">
+    <department name="Digital" lead="Alex Rivera">
+      <team name="Content">
+        <employee id="M01">
+          <name>Charlie Brooks</name>
+          <role>Content Strategist</role>
+        </employee>
+      </team>
+    </department>
+  </division>
+
+  <headquarters location="Chicago, USA">
+    <address>
+      <street>789 Lake Shore Drive</street>
+      <city>Chicago</city>
+      <zip>60601</zip>
+    </address>
+  </headquarters>
+</company>
@@ -59,7 +59,7 @@ async function main() {

  const anthropic = new Anthropic({
    apiKey: process.env.ANTHROPIC_API_KEY,
-    model: "claude-3-opus",
+    model: "claude-3.5-sonnet",
  });

  // Create an ReActAgent with the function tools
@@ -61,7 +61,7 @@ async function main() {
  // Create an OpenAIAgent with the function tools
  const agent = new ReActAgent({
    llm: new Anthropic({
-      model: "claude-3-opus",
+      model: "claude-3.5-sonnet",
    }),
    tools: [functionTool, functionTool2],
  });
@@ -1,5 +1,5 @@
 import { Anthropic } from "@llamaindex/anthropic";
-import { ChatMemoryBuffer, SimpleChatEngine } from "llamaindex";
+import { createMemory, SimpleChatEngine } from "llamaindex";
 import { stdin as input, stdout as output } from "node:process";
 import readline from "node:readline/promises";

@@ -9,14 +9,12 @@ import readline from "node:readline/promises";
    model: "claude-3-7-sonnet",
  });
  // chatHistory will store all the messages in the conversation
-  const chatHistory = new ChatMemoryBuffer({
-    chatHistory: [
-      {
-        content: "You want to talk in rhymes.",
-        role: "system",
-      },
-    ],
-  });
+  const chatHistory = createMemory([
+    {
+      content: "You want to talk in rhymes.",
+      role: "system",
+    },
+  ]);
  const chatEngine = new SimpleChatEngine({
    llm,
    memory: chatHistory,
@@ -1,14 +1,16 @@
-import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";
 import fs from "fs";
+import { tool } from "llamaindex";
+import { z } from "zod";

 (async () => {
  if (!process.env.GOOGLE_API_KEY) {
    throw new Error("Please set the GOOGLE_API_KEY environment variable.");
  }
-  const gemini = new Gemini({
-    model: GEMINI_MODEL.GEMINI_PRO_1_5,
-  });
-  const result = await gemini.chat({
+  const llm = gemini({ model: GEMINI_MODEL.GEMINI_2_0_FLASH });
+
+  // normal chat
+  const result = await llm.chat({
    messages: [
      { content: "You want to talk in rhymes.", role: "system" },
      {
@@ -18,10 +20,10 @@ import fs from "fs";
      },
    ],
  });
-  console.log(result);
+  console.log("\n normal chat: \n", result);

  // chat with file
-  const resultWithFile = await gemini.chat({
+  const resultWithFile = await llm.chat({
    messages: [
      {
        role: "user",
@@ -39,6 +41,52 @@ import fs from "fs";
      },
    ],
  });
+  console.log("\n chat with file: \n", resultWithFile);

-  console.log(resultWithFile);
+  // chat with image base64
+  const resultWithImageFile = await llm.chat({
+    messages: [
+      {
+        role: "user",
+        content: [
+          {
+            type: "text",
+            text: "What's in this image?",
+          },
+          {
+            type: "image",
+            data: fs
+              .readFileSync("./multimodal/data/60.jpg")
+              .toString("base64"),
+            mimeType: "image/png",
+          },
+        ],
+      },
+    ],
+  });
+  console.log("\n chat with image base64: \n", resultWithImageFile);
+
+  // chat with tool
+  const resultWithTool = await llm.chat({
+    messages: [
+      {
+        content: "What's the weather in Tokyo?",
+        role: "user",
+      },
+    ],
+    tools: [
+      tool({
+        name: "weather",
+        description: "Get the weather",
+        parameters: z.object({
+          location: z.string().describe("The location to get the weather for"),
+        }),
+        execute: ({ location }) => {
+          console.log("weather", location);
+          return `The weather in ${location} is sunny and hot`;
+        },
+      }),
+    ],
+  });
+  console.log("\n chat with tool: \n", resultWithTool.message.options); // should have toolCall
 })();
@@ -1,11 +1,14 @@
-import { Gemini, GEMINI_MODEL, GeminiVertexSession } from "@llamaindex/google";
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";

 (async () => {
-  const gemini = new Gemini({
-    model: GEMINI_MODEL.GEMINI_PRO,
-    session: new GeminiVertexSession(),
+  const llm = gemini({
+    model: GEMINI_MODEL.GEMINI_2_0_FLASH,
+    vertex: {
+      project: "your-cloud-project", // update to your cloud project
+      location: "us-central1",
+    },
  });
-  const result = await gemini.chat({
+  const result = await llm.chat({
    messages: [
      { content: "You want to talk in rhymes.", role: "system" },
      {
@@ -16,9 +16,19 @@ async function main() {

  console.log("🚀 Initializing Gemini Live API example...");

+  // Server-side (token creation):
+  const serverllm = gemini({
+    model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
+    httpOptions: { apiVersion: "v1alpha" }, // must use v1alpha to generate ephemeral key
+  });
+  const ephemeralKey = await serverllm.live.getEphemeralKey();
+
+  // Client-side (Live API connection):
  const llm = gemini({
+    apiKey: ephemeralKey, // use ephemeral key for client-side
    model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
    voiceName: "Zephyr",
+    httpOptions: { apiVersion: "v1alpha" }, // must use v1alpha to init client with ephemeral key
  });

  console.log("📡 Connecting to Gemini Live session...");
@@ -3,8 +3,18 @@ import { liveEvents } from "llamaindex";
 import { saveWavFile } from "./util";

 async function main() {
-  const llm = gemini({
+  // Server-side (token creation):
+  const serverllm = gemini({
    model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
+    httpOptions: { apiVersion: "v1alpha" }, // must use v1alpha to generate ephemeral key
+  });
+  const ephemeralKey = await serverllm.live.getEphemeralKey();
+
+  // Client-side (Live API connection):
+  const llm = gemini({
+    apiKey: ephemeralKey, // use ephemeral key for client-side
+    model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
+    httpOptions: { apiVersion: "v1alpha" }, // must use v1alpha to init client with ephemeral key
  });

  const session = await llm.live.connect();
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/examples",
-  "version": "0.3.23",
+  "version": "0.3.26",
  "private": true,
  "scripts": {
    "lint": "eslint .",
@@ -11,51 +11,52 @@
    "@azure/cosmos": "^4.1.1",
    "@azure/identity": "^4.4.1",
    "@azure/search-documents": "^12.1.0",
-    "@llamaindex/anthropic": "^0.3.13",
-    "@llamaindex/assemblyai": "^0.1.10",
-    "@llamaindex/astra": "^0.0.25",
-    "@llamaindex/azure": "^0.1.22",
-    "@llamaindex/chroma": "^0.0.25",
-    "@llamaindex/clip": "^0.0.61",
-    "@llamaindex/cloud": "^4.0.15",
-    "@llamaindex/cohere": "^0.0.25",
-    "@llamaindex/core": "^0.6.11",
-    "@llamaindex/deepinfra": "^0.0.61",
-    "@llamaindex/deepseek": "^0.0.21",
-    "@llamaindex/discord": "^0.1.10",
-    "@llamaindex/elastic-search": "^0.1.11",
+    "@llamaindex/anthropic": "^0.3.15",
+    "@llamaindex/assemblyai": "^0.1.12",
+    "@llamaindex/astra": "^0.0.27",
+    "@llamaindex/azure": "^0.1.24",
+    "@llamaindex/bm25-retriever": "^0.0.2",
+    "@llamaindex/chroma": "^0.0.27",
+    "@llamaindex/clip": "^0.0.63",
+    "@llamaindex/cloud": "^4.0.17",
+    "@llamaindex/cohere": "^0.0.27",
+    "@llamaindex/core": "^0.6.13",
+    "@llamaindex/deepinfra": "^0.0.63",
+    "@llamaindex/deepseek": "^0.0.23",
+    "@llamaindex/discord": "^0.1.12",
+    "@llamaindex/elastic-search": "^0.1.13",
    "@llamaindex/env": "^0.1.30",
-    "@llamaindex/firestore": "^1.0.18",
-    "@llamaindex/fireworks": "^0.0.21",
-    "@llamaindex/google": "^0.3.10",
-    "@llamaindex/groq": "^0.0.76",
-    "@llamaindex/huggingface": "^0.1.15",
-    "@llamaindex/jinaai": "^0.0.21",
-    "@llamaindex/milvus": "^0.1.20",
-    "@llamaindex/mistral": "^0.1.11",
-    "@llamaindex/mixedbread": "^0.0.25",
-    "@llamaindex/mongodb": "^0.0.26",
-    "@llamaindex/node-parser": "^2.0.11",
-    "@llamaindex/notion": "^0.1.10",
-    "@llamaindex/ollama": "^0.1.11",
-    "@llamaindex/openai": "^0.4.5",
-    "@llamaindex/perplexity": "^0.0.18",
-    "@llamaindex/pinecone": "^0.1.11",
-    "@llamaindex/portkey-ai": "^0.0.53",
-    "@llamaindex/postgres": "^0.0.54",
-    "@llamaindex/qdrant": "^0.1.21",
-    "@llamaindex/readers": "^3.1.9",
-    "@llamaindex/replicate": "^0.0.53",
-    "@llamaindex/supabase": "^0.1.10",
-    "@llamaindex/together": "^0.0.21",
-    "@llamaindex/tools": "^0.0.17",
-    "@llamaindex/upstash": "^0.0.25",
-    "@llamaindex/vercel": "^0.1.11",
-    "@llamaindex/vllm": "^0.0.47",
-    "@llamaindex/voyage-ai": "^1.0.17",
-    "@llamaindex/weaviate": "^0.0.26",
-    "@llamaindex/workflow": "^1.1.10",
-    "@llamaindex/xai": "workspace:^0.0.8",
+    "@llamaindex/firestore": "^1.0.20",
+    "@llamaindex/fireworks": "^0.0.23",
+    "@llamaindex/google": "^0.3.12",
+    "@llamaindex/groq": "^0.0.78",
+    "@llamaindex/huggingface": "^0.1.17",
+    "@llamaindex/jinaai": "^0.0.23",
+    "@llamaindex/milvus": "^0.1.22",
+    "@llamaindex/mistral": "^0.1.13",
+    "@llamaindex/mixedbread": "^0.0.27",
+    "@llamaindex/mongodb": "^0.0.28",
+    "@llamaindex/node-parser": "^2.0.13",
+    "@llamaindex/notion": "^0.1.12",
+    "@llamaindex/ollama": "^0.1.13",
+    "@llamaindex/openai": "^0.4.7",
+    "@llamaindex/perplexity": "^0.0.20",
+    "@llamaindex/pinecone": "^0.1.13",
+    "@llamaindex/portkey-ai": "^0.0.55",
+    "@llamaindex/postgres": "^0.0.56",
+    "@llamaindex/qdrant": "^0.1.23",
+    "@llamaindex/readers": "^3.1.12",
+    "@llamaindex/replicate": "^0.0.55",
+    "@llamaindex/supabase": "^0.1.13",
+    "@llamaindex/together": "^0.0.23",
+    "@llamaindex/tools": "^0.1.3",
+    "@llamaindex/upstash": "^0.0.27",
+    "@llamaindex/vercel": "^0.1.13",
+    "@llamaindex/vllm": "^0.0.49",
+    "@llamaindex/voyage-ai": "^1.0.19",
+    "@llamaindex/weaviate": "^0.0.28",
+    "@llamaindex/workflow": "^1.1.13",
+    "@llamaindex/xai": "workspace:^0.0.10",
    "@notionhq/client": "^2.2.15",
    "@pinecone-database/pinecone": "^4.0.0",
    "@vercel/postgres": "^0.10.0",
@@ -64,11 +65,11 @@
    "commander": "^12.1.0",
    "dotenv": "^16.4.5",
    "js-tiktoken": "^1.0.14",
-    "llamaindex": "^0.11.9",
+    "llamaindex": "^0.11.12",
    "mongodb": "6.7.0",
    "postgres": "^3.4.4",
    "wikipedia": "^2.1.2",
-    "zod": "^3.23.8"
+    "zod": "^3.25.67"
  },
  "devDependencies": {
    "@types/node": "^22.9.0",
@@ -2,11 +2,7 @@ import { stdin as input, stdout as output } from "node:process";
 import readline from "node:readline/promises";

 import { OpenAI } from "@llamaindex/openai";
-import {
-  ChatSummaryMemoryBuffer,
-  Settings,
-  SimpleChatEngine,
-} from "llamaindex";
+import { createMemory, Settings, SimpleChatEngine } from "llamaindex";

 if (process.env.NODE_ENV === "development") {
  Settings.callbackManager.on("llm-end", (event) => {
@@ -15,10 +11,13 @@ if (process.env.NODE_ENV === "development") {
 }

 async function main() {
-  // Set maxTokens to 75% of the context window size of 4096
-  // This will trigger the summarizer once the chat history reaches 25% of the context window size (1024 tokens)
-  const llm = new OpenAI({ model: "gpt-3.5-turbo", maxTokens: 4096 * 0.75 });
-  const chatHistory = new ChatSummaryMemoryBuffer({ llm });
+  const llm = new OpenAI({ model: "gpt-3.5-turbo" });
+  const chatHistory = createMemory([
+    {
+      content: "You are a helpful assistant.",
+      role: "system",
+    },
+  ]);
  const chatEngine = new SimpleChatEngine({ llm });
  const rl = readline.createInterface({ input, output });

@@ -29,10 +28,6 @@ async function main() {
      chatHistory,
      stream: true,
    });
-    if (chatHistory.getLastSummary()) {
-      // Print the summary of the conversation so far that is produced by the SummaryChatHistory
-      console.log(`Summary: ${chatHistory.getLastSummary()?.content}`);
-    }
    for await (const chunk of stream) {
      process.stdout.write(chunk.response);
    }
@@ -15,11 +15,14 @@
    "start:llamaparse-json": "node --import tsx ./src/llamaparse-json.ts",
    "start:discord": "node --import tsx ./src/discord.ts",
    "start:json": "node --import tsx ./src/json.ts",
-    "start:obsidian": "node --import tsx ./src/obsidian.ts"
+    "start:obsidian": "node --import tsx ./src/obsidian.ts",
+    "start:xml": "node --import tsx ./src/xml.ts",
+    "start:excel": "node --import tsx ./src/excel.ts"
  },
  "dependencies": {
    "@llamaindex/cloud": "workspace:* || ^2.0.24",
    "@llamaindex/readers": "workspace:* || ^1.0.25",
+    "@llamaindex/excel": "workspace:*",
    "llamaindex": "workspace:* || ^0.8.37"
  },
  "devDependencies": {
@@ -0,0 +1,20 @@
+import { ExcelReader } from "@llamaindex/excel";
+
+async function main() {
+  // Load PDF
+  const reader = new ExcelReader({
+    sheetSpecifier: 0,
+    concatRows: true,
+    fieldSeparator: ",",
+    keyValueSeparator: ":",
+  });
+
+  const documents = await reader.loadData("../data/sample_excel_sheet.xls");
+
+  for (const doc of documents) {
+    console.log(doc.text);
+    console.log("----");
+  }
+}
+
+main().catch(console.error);
@@ -1,4 +1,4 @@
-import { LlamaParseReader } from "@llamaindex/cloud";
+import { LlamaParseReader } from "@llamaindex/cloud/reader";
 import { openai, OpenAIEmbedding } from "@llamaindex/openai";
 import { Settings, VectorStoreIndex } from "llamaindex";

@@ -1,4 +1,4 @@
-import { LlamaParseReader } from "@llamaindex/cloud";
+import { LlamaParseReader } from "@llamaindex/cloud/reader";
 import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
 import { VectorStoreIndex } from "llamaindex";

@@ -0,0 +1,16 @@
+import { XMLReader } from "@llamaindex/readers/xml";
+
+async function main() {
+  // Load PDF
+  const reader = new XMLReader({
+    splitLevel: 2,
+  });
+  const documents = await reader.loadData("../data/company.xml");
+
+  for (const doc of documents) {
+    console.log(doc.text);
+    console.log("----");
+  }
+}
+
+main().catch(console.error);
@@ -0,0 +1,14 @@
+# BM25 Retriever
+
+In this guide, we introduce a bm25 retriever that search documents using the bm25 method. BM25 (Best Matching 25) is a ranking function that extends TF-IDF by considering term frequency saturation and document length. BM25 effectively ranks documents based on query term occurrence and rarity across the corpus.
+
+## Setup
+
+1. `cd` Into the `examples` directory
+2. run `npm i`
+
+## Example
+
+```bash
+`npx tsx ./retrievers/bm25/example.ts`
+```
@@ -0,0 +1,33 @@
+import { Bm25Retriever } from "@llamaindex/bm25-retriever";
+import { OpenAIEmbedding } from "@llamaindex/openai";
+import { PDFReader } from "@llamaindex/readers/pdf";
+import { MetadataMode, Settings, VectorStoreIndex } from "llamaindex";
+
+Settings.embedModel = new OpenAIEmbedding();
+
+async function main() {
+  // Load PDF
+  const reader = new PDFReader();
+  const documents = await reader.loadData("./data/brk-2022.pdf");
+
+  // Split text and create embeddings. Store them in a VectorStoreIndex
+  const index = await VectorStoreIndex.fromDocuments(documents);
+
+  const retriever = new Bm25Retriever({
+    docStore: index.docStore,
+    topK: 3,
+  });
+
+  // Query the data
+  const response = await retriever.retrieve({
+    query: "What mistakes did Warren E. Buffett make?",
+  });
+
+  // Output response
+  response.forEach((r) => {
+    console.log(`Score: ${r.score}`);
+    console.log(`Text: ${r.node.getContent(MetadataMode.NONE)}`);
+  });
+}
+
+main().catch(console.error);
@@ -1,8 +1,4 @@
-import {
-  GEMINI_EMBEDDING_MODEL,
-  GeminiEmbedding,
-  GeminiSession,
-} from "@llamaindex/google";
+import { GEMINI_EMBEDDING_MODEL, GeminiEmbedding } from "@llamaindex/google";
 import { QdrantVectorStore } from "@llamaindex/qdrant";
 import {
  Document,
@@ -12,9 +8,6 @@ import {

 const embedding = new GeminiEmbedding({
  model: GEMINI_EMBEDDING_MODEL.EMBEDDING_001,
-  session: new GeminiSession({
-    apiKey: process.env.GEMINI_API_KEY,
-  }),
 });

 async function main() {
@@ -1,5 +1,25 @@
 # @llamaindex/autotool

+## 8.0.12
+
+### Patch Changes
+
+- Updated dependencies [515a8b9]
+  - llamaindex@0.11.12
+
+## 8.0.11
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+
+## 8.0.10
+
+### Patch Changes
+
+- llamaindex@0.11.10
+
 ## 8.0.9

 ### Patch Changes
@@ -1,5 +1,28 @@
 # @llamaindex/autotool-01-node-example

+## 0.0.120
+
+### Patch Changes
+
+- Updated dependencies [515a8b9]
+  - llamaindex@0.11.12
+  - @llamaindex/autotool@8.0.12
+
+## 0.0.119
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+  - @llamaindex/autotool@8.0.11
+
+## 0.0.118
+
+### Patch Changes
+
+- llamaindex@0.11.10
+- @llamaindex/autotool@8.0.10
+
 ## 0.0.117

 ### Patch Changes
@@ -13,5 +13,5 @@
  "scripts": {
    "start": "node --import tsx --import @llamaindex/autotool/node ./src/index.ts"
  },
-  "version": "0.0.117"
+  "version": "0.0.120"
 }
@@ -6,7 +6,7 @@
    "url": "git+https://github.com/run-llama/LlamaIndexTS.git",
    "directory": "packages/autotool"
  },
-  "version": "8.0.9",
+  "version": "8.0.12",
  "description": "auto transpile your JS function to LLM Agent compatible",
  "files": [
    "dist",
@@ -1,5 +1,22 @@
 # @llamaindex/cloud

+## 4.0.17
+
+### Patch Changes
+
+- Updated dependencies [d578889]
+- Updated dependencies [0fcc92f]
+- Updated dependencies [515a8b9]
+  - @llamaindex/core@0.6.13
+
+## 4.0.16
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+- Updated dependencies [7039e1a]
+  - @llamaindex/core@0.6.12
+
 ## 4.0.15

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/cloud",
-  "version": "4.0.15",
+  "version": "4.0.17",
  "type": "module",
  "license": "MIT",
  "scripts": {
@@ -79,6 +79,6 @@
  },
  "dependencies": {
    "p-retry": "^6.2.1",
-    "zod": "^3.25.7"
+    "zod": "^3.25.67"
  }
 }
@@ -1,5 +1,20 @@
 # @llamaindex/core

+## 0.6.13
+
+### Patch Changes
+
+- d578889: Add new memory API
+- 0fcc92f: Fix: split sentences must not trim whitespaces
+- 515a8b9: Fix: logging for fromPersistPath
+
+## 0.6.12
+
+### Patch Changes
+
+- 7039e1a: Internal cleanup of base64 encoding
+- 7039e1a: chore: migrate to @google/genai SDK
+
 ## 0.6.11

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/core",
  "type": "module",
-  "version": "0.6.11",
+  "version": "0.6.13",
  "description": "LlamaIndex Core Module",
  "exports": {
    "./agent": {
@@ -312,7 +312,7 @@
    "@llamaindex/env": "workspace:*",
    "@types/node": "^22.9.0",
    "magic-bytes.js": "^1.10.0",
-    "zod": "^3.23.8",
-    "zod-to-json-schema": "^3.23.3"
+    "zod": "^3.25.67",
+    "zod-to-json-schema": "^3.24.6"
  }
 }
@@ -152,6 +152,7 @@ export type AgentParamsBase<

 /**
 * Worker will schedule tasks and handle the task execution
+ * @deprecated Use agent instead.
 */
 export abstract class AgentWorker<
  AI extends LLM,
@@ -250,6 +251,7 @@ export abstract class AgentWorker<

 /**
 * Runner will manage the task execution and provide a high-level API for the user
+ * @deprecated Use agent instead.
 */
 export abstract class AgentRunner<
  AI extends LLM,
@@ -62,6 +62,9 @@ export class LLMAgentWorker extends AgentWorker<LLM> {
  taskHandler = AgentRunner.defaultTaskHandler;
 }

+/**
+ * @deprecated Use agent instead.
+ */
 export class LLMAgent extends AgentRunner<LLM> {
  constructor(params: LLMAgentParams<LLM>) {
    validateAgentParams(params);
@@ -1,5 +1,5 @@
 import type { ChatMessage, MessageContent } from "../llms";
-import type { BaseMemory } from "../memory";
+import type { Memory } from "../memory";
 import { EngineResponse } from "../schema";

 export interface BaseChatEngineParams<
@@ -9,9 +9,7 @@ export interface BaseChatEngineParams<
  /**
   * Optional chat history if you want to customize the chat history.
   */
-  chatHistory?:
-    | ChatMessage<AdditionalMessageOptions>[]
-    | BaseMemory<AdditionalMessageOptions>;
+  chatHistory?: ChatMessage<AdditionalMessageOptions>[] | Memory;
 }

 export interface StreamingChatEngineParams<
@@ -1,7 +1,7 @@
 import { wrapEventCaller } from "../decorator";
 import { Settings } from "../global";
 import type { ChatMessage, LLM, MessageContent, MessageType } from "../llms";
-import { BaseMemory, ChatMemoryBuffer } from "../memory";
+import { Memory, createMemory } from "../memory";
 import type { BaseNodePostprocessor } from "../postprocessor";
 import {
  type ContextSystemPrompt,
@@ -23,7 +23,7 @@ import type { ContextGenerator } from "./type";
 export type ContextChatEngineOptions = {
  retriever: BaseRetriever;
  chatModel?: LLM | undefined;
-  chatHistory?: ChatMessage[] | undefined;
+  chatHistory?: ChatMessage[] | Memory | undefined;
  contextSystemPrompt?: ContextSystemPrompt | undefined;
  nodePostprocessors?: BaseNodePostprocessor[] | undefined;
  systemPrompt?: string | undefined;
@@ -37,18 +37,21 @@ export type ContextChatEngineOptions = {
 */
 export class ContextChatEngine extends PromptMixin implements BaseChatEngine {
  chatModel: LLM;
-  memory: BaseMemory;
+  memory: Memory;
  contextGenerator: ContextGenerator & PromptMixin;
  systemPrompt?: string | undefined;

  get chatHistory() {
-    return this.memory.getMessages();
+    return this.memory.getLLM();
  }

  constructor(init: ContextChatEngineOptions) {
    super();
    this.chatModel = init.chatModel ?? Settings.llm;
-    this.memory = new ChatMemoryBuffer({ chatHistory: init?.chatHistory });
+    this.memory =
+      init?.chatHistory instanceof Memory
+        ? init.chatHistory
+        : createMemory(init?.chatHistory ?? []);
    this.contextGenerator = new DefaultContextGenerator({
      retriever: init.retriever,
      contextSystemPrompt: init?.contextSystemPrompt,
@@ -87,12 +90,9 @@ export class ContextChatEngine extends PromptMixin implements BaseChatEngine {
  ): Promise<EngineResponse | AsyncIterable<EngineResponse>> {
    const { message, stream } = params;
    const chatHistory = params.chatHistory
-      ? new ChatMemoryBuffer({
-          chatHistory:
-            params.chatHistory instanceof BaseMemory
-              ? await params.chatHistory.getMessages()
-              : params.chatHistory,
-        })
+      ? params.chatHistory instanceof Memory
+        ? params.chatHistory
+        : createMemory(params.chatHistory)
      : this.memory;
    const requestMessages = await this.prepareRequestMessages(
      message,
@@ -110,7 +110,7 @@ export class ContextChatEngine extends PromptMixin implements BaseChatEngine {
          initialValue: "",
          reducer: (accumulator, part) => (accumulator += part.delta),
          finished: (accumulator) => {
-            chatHistory.put({ content: accumulator, role: "assistant" });
+            void chatHistory.add({ content: accumulator, role: "assistant" });
          },
        }),
        (r) => EngineResponse.fromChatResponseChunk(r, requestMessages.nodes),
@@ -120,26 +120,26 @@ export class ContextChatEngine extends PromptMixin implements BaseChatEngine {
      messages: requestMessages.messages,
      additionalChatOptions: params.chatOptions as object,
    });
-    chatHistory.put(response.message);
+    await chatHistory.add(response.message);
    return EngineResponse.fromChatResponse(response, requestMessages.nodes);
  }

-  reset() {
-    this.memory.reset();
+  async reset() {
+    await this.memory.clear();
  }

  private async prepareRequestMessages(
    message: MessageContent,
-    chatHistory: BaseMemory,
+    chatHistory: Memory,
  ) {
-    chatHistory.put({
+    await chatHistory.add({
      content: message,
      role: "user",
    });
    const textOnly = extractText(message);
    const context = await this.contextGenerator.generate(textOnly);
    const systemMessage = this.prependSystemPrompt(context.message);
-    const messages = await chatHistory.getMessages([systemMessage]);
+    const messages = await chatHistory.getLLM(this.chatModel, [systemMessage]);
    return { nodes: context.nodes, messages };
  }

@@ -1,5 +1,5 @@
 import type { LLM } from "../llms";
-import { BaseMemory, ChatMemoryBuffer } from "../memory";
+import { createMemory, Memory } from "../memory";
 import { EngineResponse } from "../schema";
 import { streamConverter, streamReducer } from "../utils";
 import type {
@@ -16,20 +16,16 @@ import { Settings } from "../global";
 */

 export class SimpleChatEngine implements BaseChatEngine {
-  memory: BaseMemory;
+  memory: Memory;
  llm: LLM;

  get chatHistory() {
-    return this.memory.getMessages();
+    return this.memory.getLLM();
  }

  constructor(init?: Partial<SimpleChatEngine>) {
    this.llm = init?.llm ?? Settings.llm;
-    this.memory =
-      init?.memory ??
-      new ChatMemoryBuffer({
-        llm: this.llm,
-      });
+    this.memory = init?.memory ?? createMemory();
  }

  chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>;
@@ -43,19 +39,15 @@ export class SimpleChatEngine implements BaseChatEngine {
    const { message, stream } = params;

    const chatHistory = params.chatHistory
-      ? new ChatMemoryBuffer({
-          llm: this.llm,
-          chatHistory:
-            params.chatHistory instanceof BaseMemory
-              ? await params.chatHistory.getMessages()
-              : params.chatHistory,
-        })
+      ? params.chatHistory instanceof Memory
+        ? params.chatHistory
+        : createMemory(params.chatHistory)
      : this.memory;
-    chatHistory.put({ content: message, role: "user" });
+    await chatHistory.add({ content: message, role: "user" });

    if (stream) {
      const stream = await this.llm.chat({
-        messages: await chatHistory.getMessages(),
+        messages: await chatHistory.getLLM(this.llm),
        stream: true,
      });
      return streamConverter(
@@ -64,7 +56,7 @@ export class SimpleChatEngine implements BaseChatEngine {
          initialValue: "",
          reducer: (accumulator, part) => accumulator + part.delta,
          finished: (accumulator) => {
-            chatHistory.put({ content: accumulator, role: "assistant" });
+            void chatHistory.add({ content: accumulator, role: "assistant" });
          },
        }),
        EngineResponse.fromChatResponseChunk,
@@ -73,13 +65,13 @@ export class SimpleChatEngine implements BaseChatEngine {

    const response = await this.llm.chat({
      stream: false,
-      messages: await chatHistory.getMessages(),
+      messages: await chatHistory.getLLM(this.llm),
    });
-    chatHistory.put(response.message);
+    await chatHistory.add(response.message);
    return EngineResponse.fromChatResponse(response);
  }

-  reset() {
-    this.memory.reset();
+  async reset() {
+    await this.memory.clear();
  }
 }
@@ -0,0 +1,7 @@
+import type { MemoryMessage } from "../types";
+
+export interface MessageAdapter<T, TMessageOptions extends object = object> {
+  fromMemory(message: MemoryMessage<TMessageOptions>): T;
+  toMemory(message: T): MemoryMessage<TMessageOptions>;
+  isCompatible(message: unknown): message is T;
+}
@@ -0,0 +1,43 @@
+import { randomUUID } from "@llamaindex/env";
+import type { ChatMessage } from "../../llms";
+import type { MemoryMessage } from "../types";
+import { type MessageAdapter } from "./base";
+
+export class ChatMessageAdapter<
+  AdditionalMessageOptions extends object = object,
+> implements
+    MessageAdapter<
+      ChatMessage<AdditionalMessageOptions>,
+      AdditionalMessageOptions
+    >
+{
+  fromMemory(
+    message: MemoryMessage<AdditionalMessageOptions>,
+  ): ChatMessage<AdditionalMessageOptions> {
+    return {
+      content: message.content,
+      role: message.role,
+      options: message.options,
+    };
+  }
+  toMemory(
+    message: ChatMessage<AdditionalMessageOptions>,
+  ): MemoryMessage<AdditionalMessageOptions> {
+    return {
+      id: randomUUID(),
+      createdAt: new Date(),
+      ...message,
+    };
+  }
+  isCompatible(
+    message: unknown,
+  ): message is ChatMessage<AdditionalMessageOptions> {
+    return !!(
+      message &&
+      typeof message === "object" &&
+      "role" in message &&
+      message.role &&
+      "content" in message
+    );
+  }
+}
@@ -0,0 +1,3 @@
+export * from "./base";
+export * from "./chat";
+export * from "./vercel";
@@ -0,0 +1,198 @@
+import type {
+  ChatMessage,
+  MessageContent,
+  MessageContentDetail,
+} from "../../llms";
+import { extractText } from "../../utils";
+import type { MemoryMessage } from "../types";
+import type { MessageAdapter } from "./base";
+
+// UIMessage from the vercel/ai package (external)
+export type VercelMessage = {
+  id: string;
+  role: "system" | "user" | "assistant" | "data";
+  content: string;
+  createdAt?: Date | undefined;
+  annotations?: Array<unknown> | undefined;
+  parts: Array<{ type: string; [key: string]: unknown }>;
+};
+
+/**
+ * Utility class for converting between LlamaIndex ChatMessage and Vercel UI Message formats
+ */
+export class VercelMessageAdapter<
+  AdditionalMessageOptions extends object = object,
+> implements MessageAdapter<VercelMessage, AdditionalMessageOptions>
+{
+  /**
+   * Convert LlamaIndex ChatMessage to Vercel UI Message format
+   */
+  fromMemory(memoryMessage: MemoryMessage<object>): VercelMessage {
+    const parts = this.convertMessageContentToVercelParts(
+      memoryMessage.content,
+    );
+
+    // Convert role to UI message role
+    let role: VercelMessage["role"];
+    switch (memoryMessage.role) {
+      case "system":
+      case "user":
+      case "assistant":
+        role = memoryMessage.role;
+        break;
+      case "memory":
+        role = "system";
+        break;
+      case "developer":
+        role = "user";
+        break;
+      default:
+        role = "user"; // Default fallback, should not happen
+    }
+
+    return {
+      id: memoryMessage.id,
+      role,
+      content: extractText(memoryMessage.content),
+      parts,
+      createdAt: memoryMessage.createdAt,
+      annotations: memoryMessage.annotations,
+    };
+  }
+  /**
+   * Convert Vercel UI Message to LlamaIndex ChatMessage format
+   */
+  toMemory(uiMessage: VercelMessage): MemoryMessage<AdditionalMessageOptions> {
+    // Convert UI message role to MessageType
+    let role: ChatMessage["role"];
+    switch (uiMessage.role) {
+      case "system":
+      case "user":
+      case "assistant":
+        role = uiMessage.role;
+        break;
+      case "data":
+        role = "user"; // Map data role to user
+        break;
+      default:
+        role = "user"; // Default fallback, should not happen
+    }
+
+    // Convert parts to MessageContent
+    const content = this.convertVercelPartsToMessageContent(uiMessage.parts);
+
+    return {
+      id: uiMessage.id,
+      content: content ?? uiMessage.content,
+      role,
+      createdAt: uiMessage.createdAt,
+      annotations: uiMessage.annotations,
+    };
+  }
+
+  /**
+   * Validate if object matches VercelMessage structure
+   */
+  isCompatible(message: unknown): message is VercelMessage {
+    return !!(
+      message &&
+      typeof message === "object" &&
+      "role" in message &&
+      "content" in message &&
+      "parts" in message
+    );
+  }
+
+  /**
+   * Convert UI parts to MessageContent
+   */
+  private convertVercelPartsToMessageContent(
+    parts: VercelMessage["parts"],
+  ): MessageContent | null {
+    if (parts.length === 0) {
+      return null;
+    }
+
+    const details: MessageContentDetail[] = [];
+
+    for (const part of parts) {
+      switch (part.type) {
+        case "file": {
+          details.push({
+            type: "file",
+            data: part.data as string,
+            mimeType: part.mimeType as string,
+          });
+          break;
+        }
+        default:
+          // For other part types, convert to text
+          details.push({
+            type: "text",
+            text: part.text as string,
+          });
+          break;
+      }
+    }
+
+    // If only one text detail, return as string
+    if (details.length === 1 && details[0]?.type === "text") {
+      return details[0].text;
+    }
+
+    return details;
+  }
+
+  /**
+   * Convert MessageContent to UI parts
+   */
+  private convertMessageContentToVercelParts(
+    content: MessageContent,
+  ): VercelMessage["parts"] {
+    if (typeof content === "string") {
+      return [
+        {
+          type: "text",
+          text: content,
+        },
+      ];
+    }
+
+    const parts: VercelMessage["parts"] = [];
+
+    for (const detail of content) {
+      switch (detail.type) {
+        case "text":
+          parts.push({
+            type: "text",
+            text: detail.text,
+          });
+          break;
+        case "image_url":
+          parts.push({
+            type: "text",
+            text: `[Image URL: ${detail.image_url.url}]`,
+          });
+          break;
+        case "audio":
+        case "video":
+        case "image":
+        case "file":
+          parts.push({
+            type: "file",
+            data: detail.data,
+            mimeType: detail.type,
+          });
+          break;
+        default:
+          // For unknown types, create a text representation
+          parts.push({
+            type: "text",
+            text: JSON.stringify(detail),
+          });
+      }
+    }
+
+    return parts;
+  }
+}
@@ -0,0 +1,50 @@
+import { randomUUID } from "@llamaindex/env";
+import type { MemoryMessage } from "../types";
+
+export type MemoryBlockOptions = {
+  /**
+   * The id of the memory block.
+   */
+  id?: string;
+  /**
+   * The priority of the memory block.
+   * Note: if priority is 0, the block content is always included in the memory context.
+   */
+  priority: number;
+  /**
+   * Whether the memory block is long term.
+   * Default is true.
+   */
+  isLongTerm?: boolean;
+};
+
+/**
+ * A base class for memory blocks.
+ */
+export abstract class BaseMemoryBlock<
+  TAdditionalMessageOptions extends object = object,
+> {
+  public readonly id: string;
+  public readonly priority: number;
+  public readonly isLongTerm: boolean;
+
+  constructor(options: MemoryBlockOptions) {
+    this.id = options.id ?? `memory-block-${randomUUID()}`;
+    this.priority = options.priority;
+    this.isLongTerm = options.isLongTerm ?? true;
+  }
+
+  /**
+   * Pull the memory block content (async).
+   *
+   * @returns The memory block content as an array of ChatMessage.
+   */
+  abstract get(): Promise<MemoryMessage<TAdditionalMessageOptions>[]>;
+
+  /**
+   * Store the messages in the memory block.
+   */
+  abstract put(
+    messages: MemoryMessage<TAdditionalMessageOptions>[],
+  ): Promise<void>;
+}
@@ -0,0 +1,153 @@
+import type { LLM, MessageType } from "../../llms";
+import type { MemoryMessage } from "../types";
+import { BaseMemoryBlock, type MemoryBlockOptions } from "./base";
+
+const DEFAULT_EXTRACTION_PROMPT = `
+You are a precise fact extraction system designed to identify key information from conversations.
+
+CONVERSATION SEGMENT:
+{{conversation}}
+
+EXISTING FACTS:
+{{existing_facts}}
+
+INSTRUCTIONS: 
+1. Review the conversation segment provided above.
+2. Extract specific, concrete facts the user has disclosed or important information discovered
+3. Focus on factual information like preferences, personal details, requirements, constraints, or context
+4. Do not include opinions, summaries, or interpretations - only extract explicit information
+5. Do not duplicate facts that are already in the existing facts list
+
+Respond with the new facts from the conversation segment using the following JSON format:
+{
+  "facts": ["fact1", "fact2", "fact3", ...]
+}
+`;
+
+const DEFAULT_SUMMARY_PROMPT = `
+You are a precise fact condensing system designed to summarize facts in a concise manner.
+
+EXISTING FACTS:
+{{existing_facts}}
+
+INSTRUCTIONS:
+1. Review the current list of existing facts
+2. Condense the facts into a more concise list, less than {{ max_facts }} facts
+3. Focus on factual information like preferences, personal details, requirements, constraints, or context
+4. Do not include opinions, summaries, or interpretations - only extract explicit information
+5. Do not duplicate facts that are already in the existing facts list
+
+Respond with the condensed facts using the following JSON format:
+{
+  "facts": ["fact1", "fact2", "fact3", ...]
+}
+`;
+
+/**
+ * The options for the fact extraction memory block.
+ */
+export type FactExtractionMemoryBlockOptions = {
+  /**
+   * The fact extraction model to use.
+   */
+  llm: LLM;
+  /**
+   * The maximum number of facts to extract.
+   */
+  maxFacts: number;
+  /**
+   * The prompt to use for fact extraction.
+   */
+  extractionPrompt?: string;
+  /**
+   * The prompt to use for fact summary.
+   */
+  summaryPrompt?: string;
+} & MemoryBlockOptions & {
+    isLongTerm?: true;
+  };
+
+/**
+ * A memory block that stores facts extracted from conversations.
+ */
+export class FactExtractionMemoryBlock<
+  TAdditionalMessageOptions extends object = object,
+> extends BaseMemoryBlock<TAdditionalMessageOptions> {
+  private readonly llm: LLM;
+  private facts: string[] = [];
+  private readonly maxFacts: number;
+  private readonly extractionPrompt: string;
+  private readonly summaryPrompt: string;
+
+  constructor(options: FactExtractionMemoryBlockOptions) {
+    super(options);
+    this.llm = options.llm;
+    this.maxFacts = options.maxFacts;
+    this.extractionPrompt =
+      options.extractionPrompt ?? DEFAULT_EXTRACTION_PROMPT;
+    this.summaryPrompt = options.summaryPrompt ?? DEFAULT_SUMMARY_PROMPT;
+  }
+
+  async get(): Promise<MemoryMessage<TAdditionalMessageOptions>[]> {
+    const fact = {
+      id: this.id,
+      content: this.facts.join("\n"),
+      role: "memory" as MessageType,
+    };
+    return [fact];
+  }
+
+  async put(
+    messages: MemoryMessage<TAdditionalMessageOptions>[],
+  ): Promise<void> {
+    if (messages.length === 0) {
+      return;
+    }
+    // Format existing facts
+    const existingFactsStr = `{ facts: [${this.facts.join(", ")}] }`;
+    // Format conversation
+    const conversation = `\n\t${messages.map((m) => m.content).join("\n\t")}`;
+    // Format prompt
+    const prompt = this.extractionPrompt
+      .replace("{{conversation}}", conversation)
+      .replace("{{existing_facts}}", existingFactsStr);
+    // Call the LLM
+    const response = await this.llm.complete({
+      prompt,
+    });
+    // Parse and validate the response
+    const newFacts = JSON.parse(response.text);
+    if (newFacts.facts === undefined || !Array.isArray(newFacts.facts)) {
+      throw new Error(
+        `[FactExtraction] Invalid response from LLM: ${response.text}`,
+      );
+    }
+    // No new facts, so no need to update the facts
+    if (newFacts.facts.length === 0) {
+      return;
+    }
+    // Update the facts
+    this.facts.push(...newFacts.facts);
+
+    // Condense the facts
+    if (this.facts.length > this.maxFacts) {
+      const existingFactsStr = `{ facts: [${this.facts.join(", ")}] }`;
+      const prompt = this.summaryPrompt
+        .replace("{{existing_facts}}", existingFactsStr)
+        .replace("{{max_facts}}", this.maxFacts.toString());
+      const response = await this.llm.complete({
+        prompt,
+      });
+      const condensedFacts = JSON.parse(response.text);
+      if (
+        condensedFacts.facts === undefined ||
+        !Array.isArray(condensedFacts.facts) ||
+        condensedFacts.facts.length === 0
+      ) {
+        throw new Error("Invalid response from LLM");
+      }
+      // Only get the first maxFacts facts (in case the LLM returned more)
+      this.facts = condensedFacts.facts.slice(0, this.maxFacts);
+    }
+  }
+}
@@ -0,0 +1,3 @@
+export { BaseMemoryBlock } from "./base";
+export { FactExtractionMemoryBlock } from "./fact";
+export { StaticMemoryBlock } from "./static";
@@ -0,0 +1,51 @@
+import type { MessageContent, MessageType } from "../../llms";
+import type { MemoryMessage } from "../types";
+import { BaseMemoryBlock, type MemoryBlockOptions } from "./base";
+
+export type StaticMemoryBlockOptions = {
+  /**
+   * The static content to store.
+   */
+  content: MessageContent;
+  /**
+   * The role of the message.
+   */
+  messageRole?: MessageType;
+} & Omit<MemoryBlockOptions, "priority" | "isLongTerm">;
+
+/**
+ * A memory block that stores static content that doesn't change.
+ * Static content is always included in the memory context.
+ */
+export class StaticMemoryBlock<
+  TAdditionalMessageOptions extends object = object,
+> extends BaseMemoryBlock<TAdditionalMessageOptions> {
+  private readonly content: MessageContent;
+  private readonly messageRole: MessageType;
+
+  constructor(options: StaticMemoryBlockOptions) {
+    super({ ...options, priority: 0, isLongTerm: false });
+    this.content = options.content;
+    this.messageRole = options.messageRole ?? "user";
+  }
+
+  /**
+   * Returns the static content.
+   * The messages parameter is ignored since this block contains static content.
+   */
+  async get(): Promise<MemoryMessage<TAdditionalMessageOptions>[]> {
+    return [
+      {
+        id: this.id,
+        role: this.messageRole,
+        content: this.content,
+      },
+    ];
+  }
+
+  async put(
+    _messages: MemoryMessage<TAdditionalMessageOptions>[],
+  ): Promise<void> {
+    // No-op: static content doesn't change
+  }
+}
@@ -1,13 +1,14 @@
-import { Settings } from "../global";
-import type { ChatMessage } from "../llms";
-import { type BaseChatStore, SimpleChatStore } from "../storage/chat-store";
-import { extractText } from "../utils";
+import { Settings } from "../../global";
+import type { ChatMessage } from "../../llms";
+import { type BaseChatStore, SimpleChatStore } from "../../storage/chat-store";
+import { extractText } from "../../utils";

 export const DEFAULT_TOKEN_LIMIT_RATIO = 0.75;
 export const DEFAULT_CHAT_STORE_KEY = "chat_history";

 /**
 * A ChatMemory is used to keep the state of back and forth chat messages
+ * @deprecated Use Memory instead.
 */
 export abstract class BaseMemory<
  AdditionalMessageOptions extends object = object,
@@ -55,6 +56,9 @@ export abstract class BaseMemory<
  }
 }

+/**
+ * @deprecated Use Memory with snapshot feature with your own storage instead.
+ */
 export abstract class BaseChatStoreMemory<
  AdditionalMessageOptions extends object = object,
 > extends BaseMemory<AdditionalMessageOptions> {
@@ -1,6 +1,6 @@
-import { Settings } from "../global";
-import type { ChatMessage, LLM } from "../llms";
-import { type BaseChatStore } from "../storage/chat-store";
+import { Settings } from "../../global";
+import type { ChatMessage, LLM } from "../../llms";
+import { type BaseChatStore } from "../../storage/chat-store";
 import { BaseChatStoreMemory, DEFAULT_TOKEN_LIMIT_RATIO } from "./base";

 type ChatMemoryBufferOptions<AdditionalMessageOptions extends object = object> =
@@ -12,6 +12,9 @@ type ChatMemoryBufferOptions<AdditionalMessageOptions extends object = object> =
    llm?: LLM<object, AdditionalMessageOptions> | undefined;
  };

+/**
+ * @deprecated Use Memory instead.
+ */
 export class ChatMemoryBuffer<
  AdditionalMessageOptions extends object = object,
 > extends BaseChatStoreMemory<AdditionalMessageOptions> {
@@ -1,10 +1,13 @@
 import { type Tokenizer, tokenizers } from "@llamaindex/env/tokenizers";
-import { Settings } from "../global";
-import type { ChatMessage, LLM, MessageType } from "../llms";
-import { defaultSummaryPrompt, type SummaryPrompt } from "../prompts";
-import { extractText, messagesToHistory } from "../utils";
+import { Settings } from "../../global";
+import type { ChatMessage, LLM, MessageType } from "../../llms";
+import { defaultSummaryPrompt, type SummaryPrompt } from "../../prompts";
+import { extractText, messagesToHistory } from "../../utils";
 import { BaseMemory } from "./base";

+/**
+ * @deprecated Use Memory instead.
+ */
 export class ChatSummaryMemoryBuffer extends BaseMemory {
  /**
   * Tokenizer function that converts text to tokens,
@@ -0,0 +1,136 @@
+import type { ChatMessage } from "../llms";
+import { ChatMessageAdapter } from "./adapter/chat";
+import {
+  FactExtractionMemoryBlock,
+  type FactExtractionMemoryBlockOptions,
+} from "./block/fact";
+import {
+  StaticMemoryBlock,
+  type StaticMemoryBlockOptions,
+} from "./block/static";
+import { DEFAULT_TOKEN_LIMIT, Memory, type MemoryOptions } from "./memory";
+import type { MemoryMessage } from "./types";
+
+/**
+ * Create a Memory instance with default options
+ * @returns A new Memory instance with default configuration
+ */
+export function createMemory<TMessageOptions extends object = object>(): Memory<
+  Record<string, never>,
+  TMessageOptions
+>;
+
+/**
+ * Create a Memory instance with options only
+ * @param options - Memory configuration options
+ * @returns A new Memory instance
+ */
+export function createMemory<TMessageOptions extends object = object>(
+  options: MemoryOptions<TMessageOptions>,
+): Memory<Record<string, never>, TMessageOptions>;
+
+/**
+ * Create a Memory instance with ChatMessage array (IDs will be generated)
+ * @param messages - Initial ChatMessage array for the memory
+ * @param options - Memory configuration options
+ * @returns A new Memory instance
+ */
+export function createMemory<TMessageOptions extends object = object>(
+  messages: ChatMessage<TMessageOptions>[],
+  options?: MemoryOptions<TMessageOptions>,
+): Memory<Record<string, never>, TMessageOptions>;
+
+/**
+ * Create a Memory instance with MemoryMessage array and options
+ * @param messages - Initial MemoryMessage array for the memory
+ * @param options - Memory configuration options
+ * @returns A new Memory instance
+ */
+export function createMemory<TMessageOptions extends object = object>(
+  messages: MemoryMessage<TMessageOptions>[],
+  options: MemoryOptions<TMessageOptions>,
+): Memory<Record<string, never>, TMessageOptions>;
+
+/**
+ * Create a Memory instance
+ * @param messagesOrOptions - Either initial messages or options
+ * @param options - Memory configuration options (when first param is messages)
+ * @returns A new Memory instance
+ */
+export function createMemory<TMessageOptions extends object = object>(
+  messagesOrOptions:
+    | ChatMessage<TMessageOptions>[]
+    | MemoryMessage<TMessageOptions>[]
+    | MemoryOptions<TMessageOptions> = [],
+  options: MemoryOptions<TMessageOptions> = {},
+): Memory<Record<string, never>, TMessageOptions> {
+  let messages: MemoryMessage<TMessageOptions>[] = [];
+
+  if (Array.isArray(messagesOrOptions)) {
+    const firstMessage = messagesOrOptions[0];
+    if (firstMessage) {
+      if ("id" in firstMessage) {
+        messages = messagesOrOptions as MemoryMessage<TMessageOptions>[];
+      } else {
+        const adapter = new ChatMessageAdapter<TMessageOptions>();
+        messages = messagesOrOptions.map((chatMessage) =>
+          adapter.toMemory(chatMessage),
+        );
+      }
+    }
+  }
+  return new Memory<Record<string, never>, TMessageOptions>(messages, options);
+}
+
+/**
+ * create a StaticMemoryBlock
+ * @param options - Configuration options for the static memory block
+ * @returns A new StaticMemoryBlock instance
+ */
+export function staticBlock<TMessageOptions extends object = object>(
+  options: StaticMemoryBlockOptions,
+): StaticMemoryBlock<TMessageOptions> {
+  return new StaticMemoryBlock<TMessageOptions>(options);
+}
+
+/**
+ * create a FactExtractionMemoryBlock
+ * @param options - Configuration options for the fact extraction memory block
+ * @returns A new FactExtractionMemoryBlock instance
+ */
+export function factExtractionBlock<TMessageOptions extends object = object>(
+  options: FactExtractionMemoryBlockOptions,
+): FactExtractionMemoryBlock<TMessageOptions> {
+  return new FactExtractionMemoryBlock<TMessageOptions>(options);
+}
+
+/**
+ * Creates a new Memory instance from a snapshot
+ * @param snapshot The snapshot to load from
+ * @param options Optional MemoryOptions to apply when loading (including memory blocks)
+ * @returns A new Memory instance with the snapshot data and provided options
+ */
+export function loadMemory<TMessageOptions extends object = object>(
+  snapshot: string,
+  options?: MemoryOptions<TMessageOptions>,
+): Memory<Record<string, never>, TMessageOptions> {
+  const { messages, tokenLimit, memoryCursor } = JSON.parse(snapshot);
+
+  // Merge snapshot data with provided options
+  const mergedOptions: MemoryOptions<TMessageOptions> = {
+    tokenLimit: options?.tokenLimit ?? tokenLimit ?? DEFAULT_TOKEN_LIMIT,
+    ...(options?.shortTermTokenLimitRatio && {
+      shortTermTokenLimitRatio: options.shortTermTokenLimitRatio,
+    }),
+    ...(options?.customAdapters && {
+      customAdapters: options.customAdapters,
+    }),
+    memoryBlocks: options?.memoryBlocks ?? [],
+    memoryCursor: memoryCursor ?? 0,
+  };
+
+  return new Memory<Record<string, never>, TMessageOptions>(
+    messages,
+    mergedOptions,
+  );
+}
@@ -1,3 +1,9 @@
-export { BaseMemory } from "./base";
-export { ChatMemoryBuffer } from "./chat-memory-buffer";
-export { ChatSummaryMemoryBuffer } from "./summary-memory";
+export { BaseMemory } from "./deprecated/base";
+export { ChatMemoryBuffer } from "./deprecated/chat-memory-buffer";
+export { ChatSummaryMemoryBuffer } from "./deprecated/summary-memory";
+
+export * from "./adapter";
+export * from "./block";
+export * from "./factories";
+export { Memory } from "./memory";
+export * from "./types";
@@ -0,0 +1,401 @@
+import { Settings } from "../global";
+import type { ChatMessage, LLM } from "../llms";
+import { extractText } from "../utils";
+import { type MessageAdapter } from "./adapter/base";
+import { ChatMessageAdapter } from "./adapter/chat";
+import { VercelMessageAdapter } from "./adapter/vercel";
+import type { BaseMemoryBlock } from "./block/base.js";
+import { DEFAULT_TOKEN_LIMIT_RATIO } from "./deprecated/base";
+import type { MemoryMessage } from "./types";
+
+export const DEFAULT_TOKEN_LIMIT = 30000;
+const DEFAULT_SHORT_TERM_TOKEN_LIMIT_RATIO = 0.7;
+
+type BuiltinAdapters<TMessageOptions extends object = object> = {
+  vercel: VercelMessageAdapter;
+  llamaindex: ChatMessageAdapter<TMessageOptions>;
+};
+
+export type MemoryOptions<TMessageOptions extends object = object> = {
+  tokenLimit?: number;
+  /**
+   * How much of the token limit is used for short term memory.
+   * The remaining token limit is used for long term memory.
+   * Default is 0.5.
+   */
+  shortTermTokenLimitRatio?: number;
+  customAdapters?: Record<string, MessageAdapter<unknown, object>>;
+  memoryBlocks?: BaseMemoryBlock<TMessageOptions>[];
+  /**
+   * The cursor position for tracking processed messages into long-term memory.
+   * Used internally for memory restoration from snapshots.
+   */
+  memoryCursor?: number;
+};
+
+export class Memory<
+  TAdapters extends Record<
+    string,
+    MessageAdapter<unknown, TMessageOptions>
+  > = Record<string, never>,
+  TMessageOptions extends object = object,
+> {
+  /**
+   * Hold all messages put into the memory.
+   */
+  private messages: MemoryMessage<TMessageOptions>[] = [];
+  /**
+   * The token limit for memory retrieval results.
+   */
+  private tokenLimit: number = DEFAULT_TOKEN_LIMIT;
+  /**
+   * The ratio of the token limit for short term memory.
+   */
+  private shortTermTokenLimitRatio: number =
+    DEFAULT_SHORT_TERM_TOKEN_LIMIT_RATIO;
+  /**
+   * The adapters for the memory.
+   */
+  private adapters: TAdapters & BuiltinAdapters<TMessageOptions>;
+  /**
+   * The memory blocks for the memory.
+   */
+  private memoryBlocks: BaseMemoryBlock<TMessageOptions>[] = [];
+  /**
+   * The cursor for the messages that have been processed into long-term memory.
+   */
+  private memoryCursor: number = 0;
+
+  constructor(
+    messages: MemoryMessage<TMessageOptions>[] = [],
+    options: MemoryOptions<TMessageOptions> = {},
+  ) {
+    this.messages = messages;
+    this.tokenLimit = options.tokenLimit ?? DEFAULT_TOKEN_LIMIT;
+    this.shortTermTokenLimitRatio =
+      options.shortTermTokenLimitRatio ?? DEFAULT_SHORT_TERM_TOKEN_LIMIT_RATIO;
+    this.memoryBlocks = options.memoryBlocks ?? [];
+    this.memoryCursor = options.memoryCursor ?? 0;
+
+    this.adapters = {
+      ...options.customAdapters,
+      vercel: new VercelMessageAdapter(),
+      llamaindex: new ChatMessageAdapter(),
+    } as TAdapters & BuiltinAdapters<TMessageOptions>;
+  }
+
+  /**
+   * Add a message to the memory
+   * @param message - The message to add to the memory
+   */
+  async add(message: unknown): Promise<void> {
+    let memoryMessage: MemoryMessage<TMessageOptions> | null = null;
+
+    // Try to find a compatible adapter among the other adapters
+    for (const key in this.adapters) {
+      const adapter = this.adapters[key as keyof typeof this.adapters];
+      if (adapter?.isCompatible(message)) {
+        memoryMessage = adapter.toMemory(message);
+        break;
+      }
+    }
+
+    if (memoryMessage) {
+      this.messages.push(memoryMessage);
+      // Automatically manage memory blocks when new messages are added
+      await this.manageMemoryBlocks();
+    } else {
+      throw new Error(
+        `None of the adapters ${Object.keys(this.adapters).join(", ")} are compatible with the message. ${JSON.stringify(message)}`,
+      );
+    }
+  }
+
+  /**
+   * Get the messages of specific type from the memory
+   * @param options - The options for the get method
+   * @returns The messages of specific type
+   */
+  async get<
+    K extends keyof (TAdapters &
+      BuiltinAdapters<TMessageOptions>) = "llamaindex",
+  >(
+    options: {
+      type?: K;
+      transientMessages?: ChatMessage<TMessageOptions>[];
+    } = {},
+  ): Promise<
+    K extends keyof (TAdapters & BuiltinAdapters<TMessageOptions>)
+      ? ReturnType<
+          (TAdapters & BuiltinAdapters<TMessageOptions>)[K]["fromMemory"]
+        >[]
+      : never
+  > {
+    const { type = "llamaindex", transientMessages } = options;
+    const adapter = this.adapters[type as keyof typeof this.adapters];
+    if (!adapter) {
+      throw new Error(`No adapter registered for type "${String(type)}"`);
+    }
+
+    let messages = this.messages;
+
+    if (transientMessages && transientMessages.length > 0) {
+      messages = [
+        ...this.messages,
+        ...transientMessages.map((m) => this.adapters.llamaindex.toMemory(m)),
+      ];
+    }
+
+    // Convert memory messages to chat messages for memory block processing
+    const chatMessages = messages.map((m) => adapter.fromMemory(m));
+    return chatMessages as unknown as Promise<
+      K extends keyof (TAdapters & BuiltinAdapters<TMessageOptions>)
+        ? ReturnType<
+            (TAdapters & BuiltinAdapters<TMessageOptions>)[K]["fromMemory"]
+          >[]
+        : never
+    >;
+  }
+
+  /**
+   * Get the messages from the memory, optionally including transient messages.
+   * only return messages that are within context window of the LLM
+   * @param llm - To fit the result messages to the context window of the LLM. If not provided, the default token limit will be used.
+   * @param transientMessages - Optional transient messages to include.
+   * @returns The messages from the memory, optionally including transient messages.
+   */
+  async getLLM(
+    llm?: LLM,
+    transientMessages?: ChatMessage<TMessageOptions>[],
+  ): Promise<ChatMessage[]> {
+    // Priority of result messages:
+    // [Fixed blocks (priority=0), Long term blocks, Short term messages(oldest to newest), Transient messages]
+
+    const contextWindow = llm?.metadata.contextWindow;
+    const tokenLimit = contextWindow
+      ? Math.ceil(contextWindow * DEFAULT_TOKEN_LIMIT_RATIO)
+      : this.tokenLimit;
+
+    // Start with fixed block messages (priority=0)
+    // as it must always be included in the retrieval result
+    const messages = await this.getMemoryBlockMessages(
+      this.memoryBlocks.filter((block) => block.priority === 0),
+      tokenLimit,
+    );
+    // remaining token limit for short-term and memory blocks content
+    const remainingTokenLimit =
+      tokenLimit -
+      this.countMessagesToken([...messages, ...(transientMessages || [])]);
+
+    // if transient messages are provided, we need to check if they fit within the token limit
+    if (remainingTokenLimit < 0) {
+      throw new Error(
+        `Could not fit fixed blocks and transient messages within memory context`,
+      );
+    }
+
+    // Get messages for short-term and memory blocks
+    const shortTermTokenLimit = Math.ceil(
+      remainingTokenLimit * this.shortTermTokenLimitRatio,
+    );
+    const memoryBlocksTokenLimit = remainingTokenLimit - shortTermTokenLimit;
+
+    // Add long-term memory blocks (priority > 0)
+    const longTermBlocks = [...this.memoryBlocks]
+      .filter((block) => block.priority !== 0)
+      .sort((a, b) => b.priority - a.priority);
+    const longTermBlockMessages = await this.getMemoryBlockMessages(
+      longTermBlocks,
+      memoryBlocksTokenLimit,
+    );
+    messages.push(...longTermBlockMessages);
+
+    // Process short-term messages (newest first for token efficiency, but maintain chronological order in result)
+    const shortTermMessagesResult: ChatMessage<TMessageOptions>[] = [];
+    const unprocessedMessages = this.messages.slice(this.memoryCursor);
+
+    // Process from newest to oldest for token efficiency
+    for (let i = unprocessedMessages.length - 1; i >= 0; i--) {
+      const memoryMessage = unprocessedMessages[i];
+      if (!memoryMessage) continue;
+      const chatMessage = this.adapters.llamaindex.fromMemory(memoryMessage);
+
+      // Check if adding this message would exceed token limit
+      const newTokenCount =
+        this.countMessagesToken(shortTermMessagesResult) +
+        this.countMessagesToken([chatMessage]) +
+        this.countMessagesToken(transientMessages || []);
+
+      if (newTokenCount > shortTermTokenLimit) {
+        // Token limit reached, stop processing older messages
+        break;
+      }
+      shortTermMessagesResult.push(chatMessage);
+    }
+    // reverse the short-term messages to maintain chronological order (oldest to newest)
+    messages.push(...shortTermMessagesResult.reverse());
+
+    // Add transient messages at the end
+    if (transientMessages && transientMessages.length > 0) {
+      messages.push(...transientMessages);
+    }
+
+    return messages;
+  }
+
+  /**
+   * Get the content from the memory blocks
+   * also convert the content to chat messages
+   * @param blocks - The blocks to get the content from
+   * @param tokenLimit - The token limit for the memory blocks, if not provided, all the memory blocks will be included
+   */
+  private async getMemoryBlockMessages(
+    blocks: BaseMemoryBlock<TMessageOptions>[],
+    tokenLimit?: number,
+  ): Promise<ChatMessage<TMessageOptions>[]> {
+    if (blocks.length === 0) {
+      return [];
+    }
+
+    // Sort memory blocks by priority (highest first)
+    const sortedBlocks = [...blocks].sort((a, b) => b.priority - a.priority);
+    const memoryContent: ChatMessage<TMessageOptions>[] = [];
+
+    // Get up to the token limit of the memory blocks
+    let addedTokenCount = 0;
+    for (const block of sortedBlocks) {
+      try {
+        const content = await block.get();
+        for (const message of content) {
+          const chatMessage = this.adapters.llamaindex.fromMemory(message);
+          const messageTokenCount = this.countMessagesToken([chatMessage]);
+          if (tokenLimit && addedTokenCount + messageTokenCount > tokenLimit) {
+            return memoryContent;
+          }
+          memoryContent.push(chatMessage);
+          addedTokenCount += messageTokenCount;
+        }
+      } catch (error) {
+        console.warn(
+          `Failed to get content from memory block ${block.id}:`,
+          error,
+        );
+      }
+    }
+
+    return memoryContent;
+  }
+
+  /**
+   * Manage the memory blocks
+   * This method processes new messages into memory blocks when short-term memory exceeds its token limit.
+   * It uses a cursor system to track which messages have already been processed into long-term memory.
+   */
+  async manageMemoryBlocks(): Promise<void> {
+    // Early return if no memory blocks configured
+    if (this.memoryBlocks.length === 0) {
+      return;
+    }
+    // Should always calculate the number
+    const shortTermTokenLimit = Math.ceil(
+      this.tokenLimit * this.shortTermTokenLimitRatio,
+    );
+
+    // Check if unprocessed messages exceed the short term token limit
+    const unprocessedMessages = this.getUnprocessedMessages();
+    const unprocessedMessagesTokenCount =
+      this.countMemoryMessagesToken(unprocessedMessages);
+
+    if (unprocessedMessagesTokenCount <= shortTermTokenLimit) {
+      // No need to manage memory blocks yet
+      return;
+    }
+
+    await this.processMessagesIntoMemoryBlocks(unprocessedMessages);
+    this.updateMemoryCursor(unprocessedMessages.length);
+  }
+
+  /**
+   * Get messages that haven't been processed into long-term memory yet
+   */
+  private getUnprocessedMessages(): MemoryMessage<TMessageOptions>[] {
+    if (this.memoryCursor >= this.messages.length) {
+      return [];
+    }
+    return this.messages.slice(this.memoryCursor);
+  }
+
+  /**
+   * Process new messages into all memory blocks
+   */
+  private async processMessagesIntoMemoryBlocks(
+    newMessages: MemoryMessage<TMessageOptions>[],
+  ): Promise<void> {
+    const longTermMemoryBlocks = this.memoryBlocks.filter(
+      (block) => block.isLongTerm,
+    );
+    const promises = longTermMemoryBlocks.map(async (block) => {
+      try {
+        await block.put(newMessages);
+      } catch (error) {
+        console.warn(
+          `Failed to process messages into memory block ${block.id}:`,
+          error,
+        );
+        // Continue processing other blocks even if one fails
+      }
+    });
+
+    // Wait for all memory blocks to process the messages
+    await Promise.all(promises);
+  }
+
+  /**
+   * Update the memory cursor after successful processing
+   */
+  private updateMemoryCursor(processedCount: number): void {
+    this.memoryCursor += processedCount;
+    // Ensure cursor doesn't exceed message count
+    this.memoryCursor = Math.min(this.memoryCursor, this.messages.length);
+  }
+
+  /**
+   * Clear all the messages in the memory
+   */
+  async clear(): Promise<void> {
+    this.messages = [];
+    this.memoryCursor = 0; // Reset cursor when clearing messages
+  }
+
+  /**
+   * Creates a snapshot of the current memory state
+   * Note: Memory blocks are not included in snapshots as they may contain non-serializable content.
+   * Memory blocks should be recreated when loading from snapshot.
+   * @returns A JSON-serializable object containing the memory state
+   */
+  snapshot(): string {
+    return JSON.stringify({
+      messages: this.messages,
+      memoryCursor: this.memoryCursor,
+    });
+  }
+
+  private countMemoryMessagesToken(
+    messages: MemoryMessage<TMessageOptions>[],
+  ): number {
+    return this.countMessagesToken(
+      messages.map((m) =>
+        this.adapters.llamaindex.fromMemory(m),
+      ) as ChatMessage[],
+    );
+  }
+
+  private countMessagesToken(messages: ChatMessage[]): number {
+    if (messages.length === 0) {
+      return 0;
+    }
+    const tokenizer = Settings.tokenizer;
+    const str = messages.map((m) => extractText(m.content)).join(" ");
+    return tokenizer.encode(str).length;
+  }
+}
@@ -0,0 +1,19 @@
+import type { ChatMessage } from "../llms";
+
+/**
+ * Additional properties for storing additional data to memory messages
+ * using the same properties as vercel/ai for simplicity
+ */
+export type MemoryMessageExtension = {
+  id: string;
+  createdAt?: Date | undefined;
+  annotations?: Array<unknown> | undefined;
+};
+
+export type MemoryMessage<AdditionalMessageOptions extends object = object> =
+  ChatMessage<AdditionalMessageOptions> & MemoryMessageExtension;
+
+export type MemorySnapshot = {
+  messages: MemoryMessage[];
+  tokenLimit: number;
+};
@@ -17,7 +17,7 @@ export class SentenceWindowNodeParser extends NodeParser<TextNode[]> {
  windowSize: number;
  windowMetadataKey: string;
  originalTextMetadataKey: string;
-  sentenceSplitter: TextSplitterFn = splitBySentenceTokenizer();
+  sentenceSplitter: TextSplitterFn = splitBySentenceTokenizer([], true);
  idGenerator: () => string = () => randomUUID();

  constructor(params?: z.input<typeof sentenceWindowNodeParserSchema>) {
@@ -1,5 +1,5 @@
 declare class SentenceTokenizer {
-  constructor(abbreviations?: string[]);
+  constructor(abbreviations?: string[], trimSentences?: boolean);
  tokenize(text: string): string[];
 }

@@ -1,3 +1,24 @@
+/*
+Copyright (c) 2024, Hugo W.L. ter Doest
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
 var __getOwnPropNames = Object.getOwnPropertyNames;
 var __commonJS = (cb, mod) =>
  function __require() {
@@ -30,32 +51,47 @@ var require_tokenizer = __commonJS({
 // lib/natural/tokenizers/sentence_tokenizer.js
 var require_sentence_tokenizer = __commonJS({
  "lib/natural/tokenizers/sentence_tokenizer.js"(exports, module) {
-    var Tokenizer = require_tokenizer();
-    var NUM = "NUMBER";
-    var DELIM = "DELIM";
-    var URI = "URI";
-    var ABBREV = "ABBREV";
-    var DEBUG = false;
+    const Tokenizer = require_tokenizer();
+
+    // Strings that will be used to create placeholders
+    const NUM = "NUMBER";
+    const DELIM = "DELIM";
+    const URI = "URI";
+    const ABBREV = "ABBREV";
+
+    const DEBUG = false;
+
    function generateUniqueCode(base, index) {
+      // Surround the placeholder with {{}} to prevent shorter numbers to be recognized
+      // in larger numbers
      return `{{${base}_${index}}}`;
    }
+
    function escapeRegExp(string) {
      return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
    }
-    var SentenceTokenizer = class extends Tokenizer {
-      constructor(abbreviations) {
+
+    class SentenceTokenizer extends Tokenizer {
+      constructor(abbreviations, trimSentences) {
        super();
        if (abbreviations) {
          this.abbreviations = abbreviations;
        } else {
          this.abbreviations = [];
        }
+        if (trimSentences === undefined) {
+          this.trimSentences = true;
+        } else {
+          this.trimSentences = trimSentences;
+        }
        this.replacementMap = null;
        this.replacementCounter = 0;
      }
+
      replaceUrisWithPlaceholders(text) {
        const urlPattern =
          /(https?:\/\/\S+|www\.\S+|ftp:\/\/\S+|(mailto:)?[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}|file:\/\/\S+)/gi;
+
        const modifiedText = text.replace(urlPattern, (match) => {
          const placeholder = generateUniqueCode(
            URI,
@@ -64,8 +100,10 @@ var require_sentence_tokenizer = __commonJS({
          this.replacementMap.set(placeholder, match);
          return placeholder;
        });
+
        return modifiedText;
      }
+
      replaceAbbreviations(text) {
        if (this.abbreviations.length === 0) {
          return text;
@@ -79,9 +117,14 @@ var require_sentence_tokenizer = __commonJS({
          this.replacementMap.set(code, match);
          return code;
        });
+
        return replacedText;
      }
+
      replaceDelimitersWithPlaceholders(text) {
+        // Regular expression for sentence delimiters optionally followed by a bracket or quote
+        // Multiple delimiters with spaces in between are allowed
+        // The expression makes sure that the sentence delimiter group ends with a sentence delimiter
        const delimiterPattern = /([.?!… ]*)([.?!…])(["'”’)}\]]?)/g;
        const modifiedText = text.replace(
          delimiterPattern,
@@ -94,32 +137,42 @@ var require_sentence_tokenizer = __commonJS({
            return placeholder;
          },
        );
+
        return modifiedText;
      }
+
      splitOnPlaceholders(text, placeholders) {
        if (this.delimiterMap.size === 0) {
          return [text];
        }
+
        const keys = Array.from(this.delimiterMap.keys());
        const pattern = new RegExp(`(${keys.map(escapeRegExp).join("|")})`);
        const parts = text.split(pattern);
+
        const sentences = [];
        for (let i = 0; i < parts.length; i += 2) {
          const sentence = parts[i];
          const placeholder = parts[i + 1] || "";
          sentences.push(sentence + placeholder);
        }
+
        return sentences;
      }
+
      replaceNumbersWithCode(text) {
+        // Regular expression to match numbers, including decimal points and commas
        const numberPattern = /\b\d{1,3}(?:,\d{3})*(?:\.\d+)?\b/g;
+
        const replacedText = text.replace(numberPattern, (match) => {
          const code = generateUniqueCode(NUM, this.replacementCounter++);
          this.replacementMap.set(code, match);
          return code;
        });
+
        return replacedText;
      }
+
      revertReplacements(text) {
        let originalText = text;
        for (const [
@@ -129,16 +182,20 @@ var require_sentence_tokenizer = __commonJS({
          const pattern = new RegExp(escapeRegExp(placeholder), "g");
          originalText = originalText.replace(pattern, replacement);
        }
+
        return originalText;
      }
+
      revertDelimiters(text) {
        let originalText = text;
        for (const [placeholder, replacement] of this.delimiterMap.entries()) {
          const pattern = new RegExp(escapeRegExp(placeholder), "g");
          originalText = originalText.replace(pattern, replacement);
        }
+
        return originalText;
      }
+
      tokenize(text) {
        this.replacementCounter = 0;
        this.replacementMap = /* @__PURE__ */ new Map();
@@ -148,32 +205,43 @@ var require_sentence_tokenizer = __commonJS({
            "---Start of sentence tokenization-----------------------",
          );
        DEBUG && console.log("Original input: >>>" + text + "<<<");
+        // Replace abbreviations
        const result1 = this.replaceAbbreviations(text);
        DEBUG &&
          console.log(
            "Phase 1: replacing abbreviations: " + JSON.stringify(result1),
          );
+
+        // Replace URIs
        const result2 = this.replaceUrisWithPlaceholders(result1);
        DEBUG &&
          console.log("Phase 2: replacing URIs: " + JSON.stringify(result2));
+
+        // Replace delimiters followed by optional quotes, brackets, and braces
        const result3 = this.replaceNumbersWithCode(result2);
        DEBUG &&
          console.log(
            "Phase 3: replacing numbers with placeholders: " +
              JSON.stringify(result3),
          );
+
+        // Replace delimiters followed by optional quotes, brackets, and braces
        const result4 = this.replaceDelimitersWithPlaceholders(result3);
        DEBUG &&
          console.log(
            "Phase 4: replacing delimiters with placeholders: " +
              JSON.stringify(result4),
          );
+
+        // Split on placeholders for sentence delimiters
        const sentences = this.splitOnPlaceholders(result4);
        DEBUG &&
          console.log(
            "Phase 5: splitting into sentences on placeholders: " +
              JSON.stringify(sentences),
          );
+
+        // Replace back all abbreviations, URIs, and delimiters
        const newSentences = sentences.map((s) => {
          const s1 = this.revertReplacements(s);
          return this.revertDelimiters(s1);
@@ -183,13 +251,17 @@ var require_sentence_tokenizer = __commonJS({
            "Phase 6: replacing back abbreviations, URIs, numbers and delimiters: " +
              JSON.stringify(newSentences),
          );
+
        const trimmedSentences = this.trim(newSentences);
        DEBUG &&
          console.log(
            "Phase 7: trimming array of empty sentences: " +
              JSON.stringify(trimmedSentences),
          );
-        const trimmedSentences2 = trimmedSentences.map((sent) => sent.trim());
+
+        const trimmedSentences2 = trimmedSentences.map((sent) =>
+          this.trimSentences ? sent.trim() : sent,
+        );
        DEBUG &&
          console.log(
            "Phase 8: trimming sentences from surrounding whitespace: " +
@@ -213,9 +285,10 @@ var require_sentence_tokenizer = __commonJS({
          console.log(
            "---------------------------------------------------------",
          );
+
        return trimmedSentences2;
      }
-    };
+    }
    module.exports = SentenceTokenizer;
  },
 });
@@ -37,13 +37,17 @@ export const splitByChar = (): TextSplitterFn => {

 export const splitBySentenceTokenizer = (
  extraAbbreviations: string[] | undefined = [],
+  trimSentences: boolean = false,
 ): TextSplitterFn => {
-  const tokenizer = new SentenceTokenizer([
-    ...abbreviations.english,
-    ...abbreviations.spanish,
-    // Add the extra abbreviations provided by the user, e.g. for business-specific context
-    ...extraAbbreviations,
-  ]);
+  const tokenizer = new SentenceTokenizer(
+    [
+      ...abbreviations.english,
+      ...abbreviations.spanish,
+      // Add the extra abbreviations provided by the user, e.g. for business-specific context
+      ...extraAbbreviations,
+    ],
+    trimSentences,
+  );
  return (text: string) => {
    try {
      return tokenizer.tokenize(text);
@@ -101,17 +101,21 @@ export class SimpleKVStore extends BaseKVStore {
  static async fromPersistPath(persistPath: string): Promise<SimpleKVStore> {
    const dirPath = path.dirname(persistPath);
    if (!(await exists(dirPath))) {
-      await fs.mkdir(dirPath);
+      await fs.mkdir(dirPath, { recursive: true });
    }

    let data: DataType = {};
-    try {
-      const fileData = await fs.readFile(persistPath);
-      data = JSON.parse(fileData.toString());
-    } catch (e) {
-      console.error(
-        `No valid data found at path: ${persistPath} starting new store.`,
-      );
+    if (!(await exists(persistPath))) {
+      console.info(`Starting new store from path: ${persistPath}`);
+    } else {
+      try {
+        const fileData = await fs.readFile(persistPath);
+        data = JSON.parse(fileData.toString());
+      } catch (e) {
+        throw new Error(`Failed to load data from path: ${persistPath}`, {
+          cause: e,
+        });
+      }
    }

    const store = new SimpleKVStore(data);
@@ -0,0 +1,103 @@
+import { filetypemime } from "magic-bytes.js";
+
+/**
+ * Converts a base64 string (without data: prefix) to a Uint8Array
+ * @param base64 - The base64 string without data: prefix
+ * @returns The Uint8Array
+ */
+export function base64ToUint8Array(base64: string): Uint8Array {
+  // Decode Base64 string
+  const binaryString = atob(base64);
+
+  // Convert binary string to Uint8Array
+  const bytes = new Uint8Array(binaryString.length);
+  for (let i = 0; i < binaryString.length; i++) {
+    bytes[i] = binaryString.charCodeAt(i);
+  }
+
+  return bytes;
+}
+
+/**
+ * Converts a Uint8Array to a base64 string.
+ * @param uint8Array The Uint8Array to convert.
+ * @returns The base64-encoded string.
+ */
+export function uint8ArrayToBase64(uint8Array: Uint8Array): string {
+  let binary = "";
+  for (let i = 0; i < uint8Array.byteLength; i++) {
+    // Asserts that the value is not undefined, for `noUncheckedIndexedAccess`
+    binary += String.fromCharCode(uint8Array[i]!);
+  }
+  return btoa(binary);
+}
+
+/**
+ * Extracts the MIME type from a data URL.
+ * @param dataUrl The data URL string.
+ * @returns The MIME type from the data URL.
+ * @throws An error if the data URL is malformed.
+ */
+export function getMimeTypeFromDataUrl(dataUrl: string): string {
+  if (!dataUrl.startsWith("data:")) {
+    throw new Error("Not a data URL");
+  }
+  const commaIndex = dataUrl.indexOf(",");
+  if (commaIndex === -1) {
+    throw new Error("Invalid data URL format");
+  }
+
+  const header = dataUrl.slice(0, commaIndex);
+  const semicolonIndex = header.indexOf(";base64");
+  if (semicolonIndex === -1) {
+    throw new Error("Invalid data URL format: missing base64 encoding");
+  }
+
+  return header.slice(5, semicolonIndex);
+}
+
+/**
+ * Convert base64 data to Blob
+ * @param base64 - The base64 string
+ * @param mimeType - The MIME type of the file
+ * @returns The Blob
+ */
+export function base64ToBlob(base64: string, mimeType?: string): Blob {
+  let extractedMimeType = mimeType;
+  let base64Data = base64;
+
+  // Extract mimeType from data URL if not provided
+  if (!mimeType && base64.startsWith("data:")) {
+    extractedMimeType = getMimeTypeFromDataUrl(base64);
+    base64Data = base64.slice(base64.indexOf(",") + 1);
+  } else if (!mimeType) {
+    throw new Error(
+      "No MIME type provided and base64 is not in data URL format",
+    );
+  } else {
+    // Extract base64 data from data URL if present
+    const commaIndex = base64.indexOf(",");
+    base64Data = commaIndex !== -1 ? base64.slice(commaIndex + 1) : base64;
+  }
+
+  if (!extractedMimeType) {
+    throw new Error("No MIME type found in base64 data");
+  }
+
+  // convert base64 to Uint8Array
+  const bytes = base64ToUint8Array(base64Data);
+
+  // Create Blob
+  return new Blob([bytes], { type: extractedMimeType });
+}
+
+export async function blobToDataUrl(input: Blob) {
+  const arrayBuffer = await input.arrayBuffer();
+  const uint8Array = new Uint8Array(arrayBuffer);
+  const mimes = filetypemime(uint8Array);
+  if (mimes.length < 1) {
+    throw new Error("Unsupported image type");
+  }
+  const base64 = uint8ArrayToBase64(uint8Array);
+  return `data:${mimes[0]};base64,${base64}`;
+}
@@ -72,5 +72,6 @@ export {

 export { MockLLM } from "./mock";

+export * from "./encoding";
 export { objectEntries } from "./object-entries";
 export * from "./stream";
@@ -1,5 +1,4 @@
 import { fs } from "@llamaindex/env";
-import { filetypemime } from "magic-bytes.js";
 import type {
  ChatMessage,
  MessageContent,
@@ -9,6 +8,7 @@ import type {
 } from "../llms";
 import type { QueryType } from "../query-engine";
 import type { ImageType } from "../schema";
+import { blobToDataUrl } from "./encoding";

 /**
 * Extracts just the text whether from
@@ -110,15 +110,6 @@ export function toToolDescriptions(tools: ToolMetadata[]): string {
  return JSON.stringify(toolsObj, null, 4);
 }

-async function blobToDataUrl(input: Blob) {
-  const buffer = Buffer.from(await input.arrayBuffer());
-  const mimes = filetypemime(buffer);
-  if (mimes.length < 1) {
-    throw new Error("Unsupported image type");
-  }
-  return "data:" + mimes[0] + ";base64," + buffer.toString("base64");
-}
-
 export async function imageToDataUrl(
  input: ImageType | Uint8Array,
 ): Promise<string> {
@@ -0,0 +1,99 @@
+import {
+  base64ToBlob,
+  base64ToUint8Array,
+  blobToDataUrl,
+  getMimeTypeFromDataUrl,
+  uint8ArrayToBase64,
+} from "@llamaindex/core/utils";
+import { describe, expect, it } from "vitest";
+
+const testString = "LlamaIndex";
+const testBase64 = "TGxhbWFJbmRleA=="; // btoa('LlamaIndex')
+const testUint8Array = new TextEncoder().encode(testString);
+
+const pngB64 =
+  "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=";
+const pngMime = "image/png";
+const pngDataUrl = `data:${pngMime};base64,${pngB64}`;
+const pngBinaryString = atob(pngB64);
+const pngBytes = new Uint8Array(pngBinaryString.length);
+for (let i = 0; i < pngBinaryString.length; i++) {
+  pngBytes[i] = pngBinaryString.charCodeAt(i);
+}
+
+describe("Encoding utils", () => {
+  describe("base64ToUint8Array", () => {
+    it("should correctly convert a base64 string to a Uint8Array", () => {
+      const result = base64ToUint8Array(testBase64);
+      expect(result).toBeInstanceOf(Uint8Array);
+      expect(result).toEqual(testUint8Array);
+    });
+  });
+
+  describe("uint8ArrayToBase64", () => {
+    it("should correctly convert a Uint8Array to a base64 string", () => {
+      const result = uint8ArrayToBase64(testUint8Array);
+      expect(result).toBe(testBase64);
+    });
+  });
+
+  describe("getMimeTypeFromDataUrl", () => {
+    it("should extract the correct MIME type from a data URL", () => {
+      const result = getMimeTypeFromDataUrl(pngDataUrl);
+      expect(result).toBe(pngMime);
+    });
+
+    it("should throw an error for non-data URLs", () => {
+      expect(() => getMimeTypeFromDataUrl("not a data url")).toThrow(
+        "Not a data URL",
+      );
+    });
+
+    it("should throw an error for malformed data URLs", () => {
+      expect(() => getMimeTypeFromDataUrl("data:image/pngbase64,abc")).toThrow(
+        "Invalid data URL format: missing base64 encoding",
+      );
+      expect(() => getMimeTypeFromDataUrl("data:image/png;base64")).toThrow(
+        "Invalid data URL format",
+      );
+    });
+  });
+
+  describe("base64ToBlob", () => {
+    it("should convert from a data URL string", async () => {
+      const blob = base64ToBlob(pngDataUrl);
+      expect(blob).toBeInstanceOf(Blob);
+      expect(blob.type).toBe(pngMime);
+      const arrayBuffer = await blob.arrayBuffer();
+      expect(new Uint8Array(arrayBuffer)).toEqual(pngBytes);
+    });
+
+    it("should convert from a base64 string with an explicit MIME type", async () => {
+      const blob = base64ToBlob(pngB64, pngMime);
+      expect(blob).toBeInstanceOf(Blob);
+      expect(blob.type).toBe(pngMime);
+      const arrayBuffer = await blob.arrayBuffer();
+      expect(new Uint8Array(arrayBuffer)).toEqual(pngBytes);
+    });
+
+    it("should prioritize the explicit MIME type if a data URL is provided", async () => {
+      const differentMime = "image/jpeg";
+      const blob = base64ToBlob(pngDataUrl, differentMime);
+      expect(blob.type).toBe(differentMime);
+    });
+
+    it("should throw an error if no MIME type can be determined", () => {
+      expect(() => base64ToBlob(pngB64)).toThrow(
+        "No MIME type provided and base64 is not in data URL format",
+      );
+    });
+  });
+
+  describe("blobToDataUrl", () => {
+    it("should correctly convert a blob to a data URL", async () => {
+      const blob = new Blob([pngBytes], { type: "image/png" });
+      const result = await blobToDataUrl(blob);
+      expect(result).toBe(pngDataUrl);
+    });
+  });
+});
@@ -0,0 +1,395 @@
+import { Settings } from "@llamaindex/core/global";
+import type { ChatMessage, LLM } from "@llamaindex/core/llms";
+import { createMemory, Memory } from "@llamaindex/core/memory";
+import { MockLLM } from "@llamaindex/core/utils";
+import type { Tokenizer } from "@llamaindex/env/tokenizers";
+import {
+  afterAll,
+  beforeAll,
+  beforeEach,
+  describe,
+  expect,
+  test,
+} from "vitest";
+
+// Mock tokenizer that returns predictable token counts
+const createMockTokenizer = (): Tokenizer => ({
+  encode: (text: string): Uint32Array => {
+    // Simple mock: 1 token per 4 characters (rounded up)
+    const tokenCount = Math.ceil(text.length / 4);
+    return new Uint32Array(Array.from({ length: tokenCount }, (_, i) => i));
+  },
+  decode: (tokens: Uint32Array): string => {
+    // Simple mock: just return a string based on token count
+    return `decoded_${tokens.length}_tokens`;
+  },
+});
+
+// Helper function to create mock LLMs with different context windows
+const createMockLLM = (contextWindow: number): LLM =>
+  new MockLLM({
+    metadata: {
+      contextWindow,
+      model: "test-model",
+      temperature: 0.7,
+      topP: 1.0,
+      tokenizer: undefined,
+      structuredOutput: false,
+    },
+  });
+
+describe("Memory", () => {
+  let memory: Memory;
+  let originalTokenizer: Tokenizer;
+
+  beforeAll(() => {
+    // Save original tokenizer and set mock
+    originalTokenizer = Settings.tokenizer;
+    Settings.tokenizer = createMockTokenizer();
+  });
+
+  afterAll(() => {
+    // Restore original tokenizer
+    Settings.tokenizer = originalTokenizer;
+  });
+
+  beforeEach(() => {
+    memory = createMemory();
+  });
+
+  describe("add", () => {
+    test("should add LlamaIndex ChatMessage", async () => {
+      const message: ChatMessage = {
+        role: "user",
+        content: "Hello, world!",
+      };
+
+      await memory.add(message);
+      const messages = await memory.get();
+
+      expect(messages).toHaveLength(1);
+      expect(messages[0]).toEqual(message);
+    });
+
+    test("should add Vercel UI Message and convert to ChatMessage", async () => {
+      const vercelMessage = {
+        id: "test-id",
+        role: "user",
+        content: "Hello from Vercel!",
+        parts: [{ type: "text", text: "Hello from Vercel!" }],
+        createdAt: new Date(),
+        annotations: [],
+      };
+
+      await memory.add(vercelMessage);
+      const messages = await memory.get();
+
+      expect(messages).toHaveLength(1);
+      expect(messages[0]).toEqual({
+        role: "user",
+        content: "Hello from Vercel!",
+      });
+    });
+
+    test("should add multiple messages in sequence", async () => {
+      const message1: ChatMessage = { role: "user", content: "First message" };
+      const message2: ChatMessage = {
+        role: "assistant",
+        content: "Second message",
+      };
+
+      await memory.add(message1);
+      await memory.add(message2);
+
+      const messages = await memory.get();
+      expect(messages).toHaveLength(2);
+      expect(messages[0]).toEqual(message1);
+      expect(messages[1]).toEqual(message2);
+    });
+  });
+
+  describe("get", () => {
+    beforeEach(async () => {
+      // Add some test messages
+      await memory.add({ role: "user", content: "User message" });
+      await memory.add({ role: "assistant", content: "Assistant response" });
+    });
+
+    test("should return messages in LlamaIndex format by default", async () => {
+      const messages = await memory.get();
+
+      expect(messages).toHaveLength(2);
+      expect(messages[0]).toEqual({ role: "user", content: "User message" });
+      expect(messages[1]).toEqual({
+        role: "assistant",
+        content: "Assistant response",
+      });
+    });
+
+    test("should return messages in LlamaIndex format when explicitly requested", async () => {
+      const messages = await memory.get({ type: "llamaindex" });
+
+      expect(messages).toHaveLength(2);
+      expect(messages[0]).toEqual({ role: "user", content: "User message" });
+      expect(messages[1]).toEqual({
+        role: "assistant",
+        content: "Assistant response",
+      });
+    });
+
+    test("should add and get messages in LlamaIndex format when explicitly requested with options", async () => {
+      const message = {
+        role: "user",
+        content: "Hello, world!",
+        options: {
+          temperature: 0.7,
+          topP: 1.0,
+        },
+      };
+
+      await memory.add(message);
+      const messages = await memory.get({ type: "llamaindex" });
+
+      expect(messages[messages.length - 1]).toEqual({
+        role: "user",
+        content: "Hello, world!",
+        options: {
+          temperature: 0.7,
+          topP: 1.0,
+        },
+      });
+    });
+
+    test("should return messages in Vercel format when requested", async () => {
+      const messages = await memory.get({ type: "vercel" });
+
+      expect(messages).toHaveLength(2);
+      expect(messages[0]).toMatchObject({
+        role: "user",
+        content: "User message",
+        parts: [{ type: "text", text: "User message" }],
+      });
+      expect(messages[1]).toMatchObject({
+        role: "assistant",
+        content: "Assistant response",
+        parts: [{ type: "text", text: "Assistant response" }],
+      });
+
+      // Check that IDs and timestamps are generated
+      expect(typeof messages[0]).toBe("object");
+      expect(messages[0]).toHaveProperty("id");
+      expect(messages[0]).toHaveProperty("parts");
+      expect(messages[0]?.parts).toHaveLength(1);
+      expect(messages[1]).toHaveProperty("parts");
+      expect(messages[1]?.parts).toHaveLength(1);
+    });
+
+    test("should include transient messages without storing them", async () => {
+      const transientMessages: ChatMessage[] = [
+        { role: "system", content: "Transient system message" },
+        { role: "user", content: "Transient user message" },
+      ];
+
+      const messages = await memory.get({ transientMessages });
+
+      // Should return stored messages + transient messages
+      expect(messages).toHaveLength(4);
+      expect(messages[0]).toEqual({ role: "user", content: "User message" });
+      expect(messages[1]).toEqual({
+        role: "assistant",
+        content: "Assistant response",
+      });
+      expect(messages[2]).toEqual({
+        role: "system",
+        content: "Transient system message",
+      });
+      expect(messages[3]).toEqual({
+        role: "user",
+        content: "Transient user message",
+      });
+
+      // Verify transient messages are not stored permanently
+      const storedMessages = await memory.get();
+      expect(storedMessages).toHaveLength(2);
+      expect(storedMessages[0]).toEqual({
+        role: "user",
+        content: "User message",
+      });
+      expect(storedMessages[1]).toEqual({
+        role: "assistant",
+        content: "Assistant response",
+      });
+    });
+  });
+
+  describe("getLLM", () => {
+    beforeEach(async () => {
+      // Add test messages with varying lengths
+      await memory.add({ role: "user", content: "Short message 1" });
+      await memory.add({
+        role: "assistant",
+        content:
+          "This is a longer assistant response with more content to test token limits",
+      });
+      await memory.add({ role: "user", content: "Another user message" });
+      await memory.add({
+        role: "assistant",
+        content: "Final assistant response",
+      });
+    });
+
+    test("should return all messages when no LLM is provided", async () => {
+      const messages = await memory.getLLM();
+
+      expect(messages).toHaveLength(4);
+      expect(messages[0]?.content).toBe("Short message 1");
+      expect(messages[1]?.content).toBe(
+        "This is a longer assistant response with more content to test token limits",
+      );
+      expect(messages[2]?.content).toBe("Another user message");
+      expect(messages[3]?.content).toBe("Final assistant response");
+    });
+
+    test("should include transient messages in token calculation", async () => {
+      const transientMessages: ChatMessage[] = [
+        { role: "system", content: "System instruction" },
+        { role: "user", content: "Transient user question" },
+      ];
+
+      const messages = await memory.getLLM(
+        createMockLLM(500),
+        transientMessages,
+      );
+
+      // Should include some combination of stored and transient messages
+      expect(messages.length).toBeGreaterThan(0);
+
+      // Check if transient messages are included (they should be recent)
+      const messageContents = messages.map((m) => m.content);
+      const hasTransientMessage = messageContents.some(
+        (content) =>
+          content === "System instruction" ||
+          content === "Transient user question",
+      );
+      expect(hasTransientMessage).toBe(true);
+    });
+
+    test("should handle empty memory with transient messages", async () => {
+      const emptyMemory = createMemory();
+      const transientMessages: ChatMessage[] = [
+        { role: "system", content: "System message" },
+        { role: "user", content: "User question" },
+      ];
+
+      const messages = await emptyMemory.getLLM(
+        createMockLLM(1000),
+        transientMessages,
+      );
+
+      expect(messages).toHaveLength(2);
+      expect(messages[0]?.content).toBe("System message");
+      expect(messages[1]?.content).toBe("User question");
+    });
+  });
+
+  describe("token limit handling", () => {
+    beforeEach(async () => {
+      // Add messages with different lengths for testing
+      await memory.add({
+        role: "assistant",
+        content:
+          "This is a medium length response that should take up more tokens than the previous message",
+      });
+      await memory.add({ role: "user", content: "Short" }); // has 2 tokens
+      await memory.add({ role: "assistant", content: "Last message" }); // has 4 tokens
+    });
+
+    test("should return messages in token limit", async () => {
+      const messages = await memory.getLLM(createMockLLM(1000));
+      expect(messages).toHaveLength(3);
+      expect(messages[0]?.content).toBe(
+        "This is a medium length response that should take up more tokens than the previous message",
+      );
+      expect(messages[1]?.content).toBe("Short");
+      expect(messages[2]?.content).toBe("Last message");
+    });
+
+    test("should only return messages that fit in the token limit", async () => {
+      const messages = await memory.getLLM(createMockLLM(6));
+
+      expect(messages).toHaveLength(1);
+      expect(messages[0]?.content).toBe("Last message");
+    });
+  });
+
+  describe("clear", () => {
+    test("should clear all messages", async () => {
+      await memory.add({ role: "user", content: "Test message" });
+      await memory.add({ role: "assistant", content: "Test response" });
+
+      expect(await memory.get()).toHaveLength(2);
+
+      await memory.clear();
+
+      expect(await memory.get()).toHaveLength(0);
+    });
+
+    test("should allow adding messages after clearing", async () => {
+      await memory.add({ role: "user", content: "First message" });
+      await memory.clear();
+      await memory.add({ role: "user", content: "After clear" });
+
+      const messages = await memory.get();
+      expect(messages).toHaveLength(1);
+      expect(messages[0]?.content).toBe("After clear");
+    });
+  });
+
+  describe("edge cases", () => {
+    test("should handle message with empty content", async () => {
+      await memory.add({ role: "user", content: "" });
+      const messages = await memory.get();
+
+      expect(messages).toHaveLength(1);
+      expect(messages[0]?.content).toBe("");
+    });
+
+    test("should handle different role types", async () => {
+      const roles: ChatMessage["role"][] = [
+        "user",
+        "assistant",
+        "system",
+        "memory",
+        "developer",
+      ];
+
+      for (const role of roles) {
+        await memory.add({ role, content: `Message from ${role}` });
+      }
+
+      const messages = await memory.get();
+      expect(messages).toHaveLength(roles.length);
+
+      roles.forEach((role, index) => {
+        expect(messages[index]?.role).toBe(role);
+        expect(messages[index]?.content).toBe(`Message from ${role}`);
+      });
+    });
+
+    test("should handle Vercel message with data role", async () => {
+      const vercelMessage = {
+        id: "test-id",
+        role: "data",
+        content: "Data message",
+        parts: [{ type: "text", text: "Data message" }],
+        createdAt: new Date(),
+        annotations: [],
+      };
+
+      await memory.add(vercelMessage);
+      const messages = await memory.get();
+
+      expect(messages[0]?.role).toBe("user"); // data role should be mapped to user
+    });
+  });
+});
@@ -0,0 +1,397 @@
+import type { ChatMessage, MessageContentDetail } from "@llamaindex/core/llms";
+import type { MemoryMessage, VercelMessage } from "@llamaindex/core/memory";
+import { VercelMessageAdapter } from "@llamaindex/core/memory";
+import { describe, expect, test } from "vitest";
+
+describe("VercelMessageAdapter", () => {
+  const adapter = new VercelMessageAdapter();
+
+  describe("toLlamaIndexMessage", () => {
+    test("should convert basic Vercel message to LlamaIndex message", () => {
+      const vercelMessage: VercelMessage = {
+        id: "test-id",
+        role: "user",
+        content: "Hello, world!",
+        parts: [{ type: "text", text: "Hello, world!" }],
+        createdAt: new Date(),
+        annotations: [],
+      };
+
+      const result = adapter.toMemory(vercelMessage);
+
+      expect(result).toEqual({
+        id: "test-id",
+        role: "user",
+        content: "Hello, world!",
+        annotations: [],
+        createdAt: vercelMessage.createdAt,
+      });
+    });
+
+    test("should handle all supported Vercel message roles", () => {
+      const roles: Array<VercelMessage["role"]> = [
+        "system",
+        "user",
+        "assistant",
+        "data",
+      ];
+
+      roles.forEach((role) => {
+        const vercelMessage: VercelMessage = {
+          id: "test-id",
+          role,
+          content: `Message from ${role}`,
+          parts: [{ type: "text", text: `Message from ${role}` }],
+          createdAt: new Date(),
+          annotations: [],
+        };
+
+        const result = adapter.toMemory(vercelMessage);
+
+        // Data role should be mapped to user
+        const expectedRole = role === "data" ? "user" : role;
+        expect(result.role).toBe(expectedRole);
+        expect(result.content).toBe(`Message from ${role}`);
+      });
+    });
+
+    test("should convert file parts to MessageContent", () => {
+      const vercelMessage: VercelMessage = {
+        id: "test-id",
+        role: "user",
+        content: "File message",
+        parts: [
+          { type: "file", data: "base64data", mimeType: "image/png" },
+          { type: "text", text: "Description" },
+        ],
+        createdAt: new Date(),
+        annotations: [],
+      };
+
+      const result = adapter.toMemory(vercelMessage);
+
+      expect(result.content).toEqual([
+        { type: "file", data: "base64data", mimeType: "image/png" },
+        { type: "text", text: "Description" },
+      ]);
+    });
+
+    test("should handle empty parts array", () => {
+      const vercelMessage: VercelMessage = {
+        id: "test-id",
+        role: "user",
+        content: "Fallback content",
+        parts: [],
+        createdAt: new Date(),
+        annotations: [],
+      };
+
+      const result = adapter.toMemory(vercelMessage);
+
+      expect(result.content).toBe("Fallback content");
+    });
+
+    test("should handle single text part", () => {
+      const vercelMessage: VercelMessage = {
+        id: "test-id",
+        role: "user",
+        content: "Original content",
+        parts: [{ type: "text", text: "Single text part" }],
+        createdAt: new Date(),
+        annotations: [],
+      };
+
+      const result = adapter.toMemory(vercelMessage);
+
+      expect(result.content).toBe("Single text part");
+    });
+  });
+
+  describe("toUIMessage", () => {
+    test("should convert basic MemoryMessage to Vercel message", () => {
+      const memoryMessage: MemoryMessage = {
+        id: "test-id",
+        role: "user",
+        content: "Hello, LlamaIndex!",
+        createdAt: new Date(),
+        annotations: [],
+      };
+
+      const result = adapter.fromMemory(memoryMessage);
+
+      expect(result).toMatchObject({
+        id: "test-id",
+        role: "user",
+        content: "Hello, LlamaIndex!",
+        parts: [{ type: "text", text: "Hello, LlamaIndex!" }],
+        annotations: [],
+      });
+    });
+
+    test("should convert MemoryMessage with options to Vercel message", () => {
+      const createdAt = new Date();
+      const annotations = ["test"];
+
+      const memoryMessage: MemoryMessage = {
+        id: "test-id",
+        role: "user",
+        content: "Hello, LlamaIndex!",
+        createdAt,
+        annotations,
+      };
+
+      const result = adapter.fromMemory(memoryMessage);
+
+      expect(result).toMatchObject({
+        role: "user",
+        content: "Hello, LlamaIndex!",
+        parts: [{ type: "text", text: "Hello, LlamaIndex!" }],
+        id: "test-id",
+        createdAt,
+        annotations,
+      });
+    });
+
+    test("should handle all MemoryMessage roles", () => {
+      const roles: Array<MemoryMessage["role"]> = [
+        "user",
+        "assistant",
+        "system",
+        "memory",
+        "developer",
+      ];
+
+      roles.forEach((role) => {
+        const memoryMessage: MemoryMessage = {
+          id: "test-id",
+          role,
+          content: `Message from ${role}`,
+          createdAt: new Date(),
+          annotations: [],
+        };
+
+        const result = adapter.fromMemory(memoryMessage);
+
+        // Memory role should be mapped to system, developer to user
+        let expectedRole: VercelMessage["role"];
+        switch (role) {
+          case "memory":
+            expectedRole = "system";
+            break;
+          case "developer":
+            expectedRole = "user";
+            break;
+          default:
+            expectedRole = role as VercelMessage["role"];
+        }
+
+        expect(result.role).toBe(expectedRole);
+        expect(result.content).toBe(`Message from ${role}`);
+      });
+    });
+
+    test("should convert multi-modal content to parts", () => {
+      const memoryMessage: MemoryMessage = {
+        id: "test-id",
+        role: "user",
+        content: [
+          { type: "text", text: "Text content" },
+          {
+            type: "image_url",
+            image_url: { url: "https://example.com/image.jpg" },
+          },
+          { type: "file", data: "base64data", mimeType: "application/pdf" },
+        ] as MessageContentDetail[],
+      };
+
+      const result = adapter.fromMemory(memoryMessage);
+
+      expect(result.parts).toEqual([
+        { type: "text", text: "Text content" },
+        { type: "text", text: "[Image URL: https://example.com/image.jpg]" },
+        { type: "file", data: "base64data", mimeType: "file" },
+      ]);
+      expect(result.content).toBe("Text content");
+    });
+
+    test("should handle different media types", () => {
+      const memoryMessage: MemoryMessage = {
+        id: "test-id",
+        role: "user",
+        content: [
+          { type: "audio", data: "audio-data", mimeType: "audio/mp3" },
+          { type: "video", data: "video-data", mimeType: "video/mp4" },
+          { type: "image", data: "image-data", mimeType: "image/png" },
+        ] as MessageContentDetail[],
+      };
+
+      const result = adapter.fromMemory(memoryMessage);
+
+      expect(result.parts).toEqual([
+        { type: "file", data: "audio-data", mimeType: "audio" },
+        { type: "file", data: "video-data", mimeType: "video" },
+        { type: "file", data: "image-data", mimeType: "image" },
+      ]);
+    });
+
+    test("should handle unknown content types", () => {
+      const memoryMessage: MemoryMessage = {
+        id: "test-id",
+        role: "user",
+        content: [
+          {
+            type: "unknown",
+            data: "unknown-data",
+          } as unknown as MessageContentDetail,
+        ],
+      };
+
+      const result = adapter.fromMemory(memoryMessage);
+
+      expect(result.parts).toEqual([
+        {
+          type: "text",
+          text: JSON.stringify({ type: "unknown", data: "unknown-data" }),
+        },
+      ]);
+    });
+  });
+
+  describe("isVercelMessage", () => {
+    test("should return true for valid Vercel message", () => {
+      const validMessage: VercelMessage = {
+        id: "test-id",
+        role: "user",
+        content: "Test content",
+        parts: [],
+        createdAt: new Date(),
+        annotations: [],
+      };
+
+      expect(adapter.isCompatible(validMessage)).toBe(true);
+    });
+
+    test("should return true for all valid roles", () => {
+      const roles: Array<VercelMessage["role"]> = [
+        "system",
+        "user",
+        "assistant",
+        "data",
+      ];
+
+      roles.forEach((role) => {
+        const message = {
+          id: "test-id",
+          role,
+          content: "Test content",
+          parts: [],
+        };
+
+        expect(adapter.isCompatible(message)).toBe(true);
+      });
+    });
+  });
+
+  describe("isLlamaIndexMessage", () => {
+    test("should return true for valid LlamaIndex message", () => {
+      const validMessage: ChatMessage = {
+        role: "user",
+        content: "Test content",
+      };
+
+      expect(adapter.isCompatible(validMessage)).toBe(false);
+    });
+
+    test("should return true for all valid roles", () => {
+      const roles: Array<ChatMessage["role"]> = [
+        "user",
+        "assistant",
+        "system",
+        "memory",
+        "developer",
+      ];
+
+      roles.forEach((role) => {
+        const message = {
+          role,
+          content: "Test content",
+        };
+
+        expect(adapter.isCompatible(message)).toBe(false);
+      });
+    });
+
+    test("should return false for invalid message structures", () => {
+      const invalidMessages = [
+        null,
+        undefined,
+        "string",
+        123,
+        {},
+        { role: "user" }, // missing content
+        { content: "test" }, // missing role
+        { role: "invalid", content: "test" }, // invalid role
+        { role: "user", content: 123 }, // invalid content type (not string or array)
+      ];
+
+      invalidMessages.forEach((message) => {
+        expect(adapter.isCompatible(message)).toBe(false);
+      });
+    });
+  });
+
+  describe("edge cases and error handling", () => {
+    test("should handle conversion with undefined optional fields", () => {
+      const vercelMessage = {
+        id: "test-id",
+        role: "user" as const,
+        content: "Test content",
+        parts: [{ type: "text" as const, text: "Test content" }],
+        // missing optional fields
+      };
+
+      const result = adapter.toMemory(vercelMessage);
+      expect(result.role).toBe("user");
+      expect(result.content).toBe("Test content");
+    });
+
+    test("should handle empty string content", () => {
+      const memoryMessage: MemoryMessage = {
+        id: "test-id",
+        role: "user",
+        content: "",
+      };
+
+      const result = adapter.fromMemory(memoryMessage);
+      expect(result.content).toBe("");
+      expect(result.parts).toEqual([{ type: "text", text: "" }]);
+    });
+
+    test("should handle empty array content", () => {
+      const memoryMessage: MemoryMessage = {
+        id: "test-id",
+        role: "user",
+        content: [],
+      };
+
+      const result = adapter.fromMemory(memoryMessage);
+      expect(result.content).toBe("");
+      expect(result.parts).toEqual([]);
+    });
+
+    test("should generate unique IDs", () => {
+      const memoryMessage: MemoryMessage = {
+        id: "test-id",
+        role: "user",
+        content: "Test",
+      };
+
+      const result1 = adapter.fromMemory(memoryMessage);
+      const result2 = adapter.toMemory(result1);
+
+      // Both should have valid UUIDs (they will be different)
+      expect(typeof result1.id).toBe("string");
+      expect(result1.id.length).toBeGreaterThan(0);
+    });
+  });
+});
@@ -0,0 +1,118 @@
+import {
+  createMemory,
+  loadMemory,
+  type MemoryMessage,
+} from "@llamaindex/core/memory";
+import { describe, expect, it } from "vitest";
+
+describe("Memory Snapshot", () => {
+  it("should create a snapshot of empty memory", () => {
+    const memory = createMemory();
+    const snapshot = memory.snapshot();
+    const parsedSnapshot = JSON.parse(snapshot);
+
+    expect(typeof snapshot).toBe("string");
+    expect(parsedSnapshot).toEqual({
+      messages: [],
+      memoryCursor: 0,
+    });
+  });
+
+  it("should create a snapshot with messages", async () => {
+    const memory = createMemory();
+    const message1: MemoryMessage = {
+      id: "test-id",
+      role: "user",
+      content: "Hello",
+    };
+    const message2: MemoryMessage = {
+      id: "test-id",
+      role: "assistant",
+      content: "Hi there!",
+    };
+
+    await memory.add(message1);
+    await memory.add(message2);
+
+    const snapshot = memory.snapshot();
+    const parsedSnapshot = JSON.parse(snapshot);
+
+    expect(typeof snapshot).toBe("string");
+    expect(parsedSnapshot.messages).toHaveLength(2);
+    expect(parsedSnapshot.messages[0].id).toBe(message1.id);
+    expect(parsedSnapshot.messages[1].id).toBe(message2.id);
+  });
+
+  it("should load memory from snapshot", async () => {
+    const originalMemory = createMemory();
+    const message: MemoryMessage = {
+      id: "test-id",
+      role: "user",
+      content: "Test message",
+    };
+
+    await originalMemory.add(message);
+    const snapshot = originalMemory.snapshot();
+
+    const loadedMemory = loadMemory(snapshot);
+    const loadedSnapshot = JSON.parse(loadedMemory.snapshot());
+
+    expect(loadedSnapshot).toEqual(JSON.parse(snapshot));
+  });
+
+  it("should load memory with correct messages", async () => {
+    const message1: MemoryMessage = {
+      id: "test-id-1",
+      role: "user",
+      content: "First message",
+    };
+    const message2: MemoryMessage = {
+      id: "test-id-2",
+      role: "assistant",
+      content: "Second message",
+    };
+
+    const snapshot = JSON.stringify({
+      messages: [message1, message2],
+    });
+
+    const memory = loadMemory(snapshot);
+    const messages = await memory.get();
+
+    expect(messages).toHaveLength(2);
+    expect(messages[0]?.content).toBe(message1.content);
+    expect(messages[1]?.content).toBe(message2.content);
+
+    const vercelMessages = await memory.get({ type: "vercel" });
+    expect(vercelMessages).toHaveLength(2);
+    expect(vercelMessages[0]?.id).toBe(message1.id);
+    expect(vercelMessages[1]?.id).toBe(message2.id);
+  });
+
+  it("should create independent memory instances", async () => {
+    const originalMemory = createMemory();
+    const message: MemoryMessage = {
+      id: "test-id",
+      role: "user",
+      content: "Original message",
+    };
+
+    await originalMemory.add(message);
+    const snapshot = originalMemory.snapshot();
+
+    const loadedMemory = loadMemory(snapshot);
+    const newMessage: MemoryMessage = {
+      id: "test-id-2",
+      role: "user",
+      content: "New message",
+    };
+
+    await loadedMemory.add(newMessage);
+
+    const originalMessages = await originalMemory.get();
+    const loadedMessages = await loadedMemory.get();
+
+    expect(originalMessages).toHaveLength(1);
+    expect(loadedMessages).toHaveLength(2);
+  });
+});
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
github-actions[bot]	7a2485cca2	Release 0.11.12 (#2050 ) Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>	2025-07-02 11:41:55 +07:00
Marcus Schiesser	1329186a23	docs: clarify how to run docs	2025-07-02 11:33:48 +07:00
dependabot[bot]	5d6e7384f5	chore(deps-dev): bump @modelcontextprotocol/server-filesystem from 2025.3.28 to 2025.7.1 (#2055 )	2025-07-02 11:26:18 +07:00
allen	f2dfd305fb	implement bm25 retriever (#2045 ) Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>	2025-07-02 11:22:47 +07:00
Huu Le	3cd8a573df	feat: update interpreter to always upload all files in the configured directory (#2057 )	2025-07-02 10:57:04 +07:00
Laurie Voss	09c6077f6e	Import path for llamaparsereader (#2056 )	2025-07-01 16:51:25 -07:00
Logan	14cc65b4e3	add google analytics (#2053 ) Co-authored-by: Alex Yang <himself65@outlook.com>	2025-07-01 11:18:14 -07:00
Marcus Schiesser	c544d8f67c	docs: review and update memory doc	2025-07-01 15:10:43 +07:00
Huu Le	d578889e21	feat: new memory api (#2028 ) Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>	2025-07-01 09:30:49 +07:00
Marcus Schiesser	9f745d1941	chore: revert to wrong opus change	2025-07-01 09:07:46 +07:00
Alex Yang	f292e94dcd	fix: change default claude model (#2052 )	2025-06-30 15:19:40 -07:00
Marcus Schiesser	0fcc92f632	fix: sentence splitter must not trim whitespaces (#2046 )	2025-06-30 17:32:04 +07:00
Marcus Schiesser	515a8b9111	fix: error logging for fromPersistPath (#2049 )	2025-06-30 13:41:13 +07:00
github-actions[bot]	7e8efc6284	Release @llamaindex/tools@0.1.2 (#2048 )	2025-06-30 11:40:54 +07:00
Wassim Chegham	0fcf65126d	chore: export type MCPClientOptions (#2047 ) Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>	2025-06-28 10:55:07 +07:00
github-actions[bot]	a50acf634c	Release 0.11.11 (#2044 ) Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>	2025-06-27 14:51:09 +07:00
Thuc Pham	7039e1a214	chore: migrate to @google/genai SDK (#2038 ) Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>	2025-06-27 12:09:26 +07:00
github-actions[bot]	785d010cd3	Release 0.11.10 (#2037 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-06-26 14:29:33 +07:00
Marcus Schiesser	b878032131	fix release step	2025-06-26 14:18:56 +07:00
Marcus Schiesser	f7ec293a0f	chore: Update workflow-core (#2042 )	2025-06-26 14:03:03 +07:00
jerinthomascarmel	49a5e0a8cf	feat(readers): add ExcelReader for parsing Excel files (run-llama#1959) (#2033 ) Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de> Co-authored-by: leehuwuj <leehuwuj@gmail.com>	2025-06-26 11:15:19 +07:00
Logan	118924799a	Rename llama-flow -> workflows in docs (#2040 )	2025-06-25 15:52:04 -07:00
allen	ec8f673dae	support filter to supabase vector search (#2036 )	2025-06-25 16:17:54 +07:00
github-actions[bot]	85039a5360	Release @llamaindex/tools@0.1.0 (#2034 )	2025-06-24 12:32:24 +07:00
Marcus Schiesser	d7305edb53	fix changesets	2025-06-24 12:26:09 +07:00
Huu Le	096bf2bda1	feat: Add support for StreamableHTTP MCP Client (#2032 )	2025-06-24 11:40:34 +07:00
jerinthomascarmel	c5846bd7dc	feat(readers): add XMLReader for parsing XML files (#1846 ) (#2031 ) Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>	2025-06-24 10:46:32 +07:00