Release 0.11.21 (#2128 )

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>
feat: VectoryMemoryBlock (#2110 )
2026-07-01 22:14:03 -04:00 · 2025-07-22 12:23:58 +08:00 · 2025-07-22 12:18:09 +08:00 · 2025-07-22 11:30:01 +08:00 · 2025-07-21 15:40:31 -06:00 · 2025-07-21 15:30:37 -06:00
154 changed files with 2317 additions and 233 deletions
@@ -1,5 +1,47 @@
 # @llamaindex/doc

+## 0.2.44
+
+### Patch Changes
+
+- 38da40b: feat: VectoryMemoryBlock
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+  - @llamaindex/cloud@4.0.26
+  - llamaindex@0.11.21
+  - @llamaindex/node-parser@2.0.17
+  - @llamaindex/openai@0.4.12
+  - @llamaindex/readers@3.1.16
+  - @llamaindex/workflow@1.1.17
+
+## 0.2.43
+
+### Patch Changes
+
+- ea15e75: Minor updates in deployment docs
+
+## 0.2.42
+
+### Patch Changes
+
+- a8ec08c: fix: ensure correct message content in agent workflow
+- Updated dependencies [a8ec08c]
+- Updated dependencies [2967d57]
+  - @llamaindex/core@0.6.16
+  - @llamaindex/workflow@1.1.16
+  - @llamaindex/cloud@4.0.25
+  - llamaindex@0.11.20
+  - @llamaindex/node-parser@2.0.16
+  - @llamaindex/openai@0.4.11
+  - @llamaindex/readers@3.1.15
+
+## 0.2.41
+
+### Patch Changes
+
+- Updated dependencies [856dd8c]
+  - @llamaindex/openai@0.4.10
+
 ## 0.2.40

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/doc",
-  "version": "0.2.40",
+  "version": "0.2.44",
  "private": true,
  "scripts": {
    "postinstall": "fumadocs-mdx",
@@ -1,6 +1,6 @@
 import { AIProvider } from "@/actions";
 import { TooltipProvider } from "@/components/ui/tooltip";
-import { GoogleAnalytics } from "@next/third-parties/google";
+import { GoogleAnalytics, GoogleTagManager } from "@next/third-parties/google";
 import { RootProvider } from "fumadocs-ui/provider";
 import { Inter } from "next/font/google";
 import type { ReactNode } from "react";
@@ -36,6 +36,7 @@ export default function Layout({ children }: { children: ReactNode }) {
          LlamaIndex.TS - Build LLM-powered document agents and workflows
        </title>
      </head>
+      <GoogleTagManager gtmId="GTM-WWRFB36R" />
      <body className="flex min-h-screen flex-col">
        <TooltipProvider>
          <AIProvider>
@@ -77,7 +77,7 @@ export async function POST(request: NextRequest) {
    const agent = await initializeAgent();
    const result = await agent.run(message);
    
-    return NextResponse.json({ response: result.result });
+    return NextResponse.json({ response: result.data });
  } catch (error) {
    console.error("Chat error:", error);
    return NextResponse.json(
@@ -132,7 +132,7 @@ export default async function handler(
    const agent = await initializeAgent();
    const result = await agent.run(message);
    
-    res.json({ response: result.result });
+    res.json({ response: result.data });
  } catch (error) {
    console.error("Chat error:", error);
    res.status(500).json({ error: "Internal server error" });
@@ -220,7 +220,7 @@ export async function POST(request: NextRequest) {
    });

    const result = await myAgent.run(message);
-    return NextResponse.json({ response: result.result });
+    return NextResponse.json({ response: result.data });
  } catch (error) {
    return NextResponse.json({ error: error.message }, { status: 500 });
  }
@@ -233,11 +233,40 @@ Implement streaming for better user experience:

 ```typescript
 // app/api/chat-stream/route.ts
+import { agent } from "@llamaindex/workflow";
+import { tool } from "llamaindex";
+import { openai } from "@llamaindex/openai";
 import { agentStreamEvent } from "@llamaindex/workflow";
 import { NextRequest } from "next/server";
+import { z } from "zod";

-// Assume myAgent is initialized elsewhere
-declare const myAgent: any;
+// Initialize agent once (consider using a singleton pattern)
+let myAgent: any = null;
+
+async function initializeAgent() {
+  if (myAgent) return myAgent;
+  
+  try {
+    const greetTool = tool({
+      name: "greet",
+      description: "Greets a user with their name",
+      parameters: z.object({
+        name: z.string(),
+      }),
+      execute: ({ name }) => `Hello, ${name}! How can I help you today?`,
+    });
+
+    myAgent = agent({
+      tools: [greetTool],
+      llm: openai({ model: "gpt-4o-mini" }),
+    });
+    
+    return myAgent;
+  } catch (error) {
+    console.error("Failed to initialize agent:", error);
+    throw error;
+  }
+}

 export async function POST(request: NextRequest) {
  const { message } = await request.json();
@@ -245,9 +274,10 @@ export async function POST(request: NextRequest) {
  const stream = new ReadableStream({
    async start(controller) {
      try {
-        const context = myAgent.runStream(message);
+        const agent = await initializeAgent();
+        const events = agent.runStream(message);
        
-        for await (const event of context) {
+        for await (const event of events) {
          if (agentStreamEvent.include(event)) {
            controller.enqueue(new TextEncoder().encode(event.data.delta));
          }
@@ -63,7 +63,7 @@ app.post('/api/chat', async (req, res) => {
  try {
    const { message } = req.body;
    const result = await myAgent.run(message);
-    res.json({ response: result.result });
+    res.json({ response: result.data });
  } catch (error) {
    res.status(500).json({ error: 'Chat failed' });
  }
@@ -110,7 +110,7 @@ fastify.post('/api/chat', async (request, reply) => {
  try {
    const { message } = request.body as { message: string };
    const result = await myAgent.run(message);
-    return { response: result.result };
+    return { response: result.data };
  } catch (error) {
    reply.status(500).send({ error: 'Chat failed' });
  }
@@ -162,7 +162,7 @@ app.post("/api/chat", async (c) => {
  
  try {
    const result = await myAgent.run(message);
-    return c.json({ response: result.result });
+    return c.json({ response: result.data });
  } catch (error) {
    return c.json({ error: error.message }, 500);
  }
@@ -187,9 +187,9 @@ app.post('/api/chat-stream', async (req, res) => {
  });
  
  try {
-    const context = myAgent.runStream(message);
+    const events = myAgent.runStream(message);
    
-    for await (const event of context) {
+    for await (const event of events) {
      if (agentStreamEvent.include(event)) {
        res.write(event.data.delta);
      }
@@ -34,7 +34,7 @@ export default {
      const { message } = await request.json();
      const result = await myAgent.run(message);
      
-      return new Response(JSON.stringify({ response: result.result }), {
+      return new Response(JSON.stringify({ response: result.data }), {
        headers: { "Content-Type": "application/json" },
      });
    } catch (error) {
@@ -83,7 +83,7 @@ export default async function handler(req, res) {
  
  try {
    const result = await myAgent.run(message);
-    res.json({ response: result.result });
+    res.json({ response: result.data });
  } catch (error) {
    res.status(500).json({ error: error.message });
  }
@@ -124,7 +124,7 @@ export async function POST(request: NextRequest) {
    });

    const result = await myAgent.run(message);
-    return NextResponse.json({ response: result.result });
+    return NextResponse.json({ response: result.data });
  } catch (error) {
    return NextResponse.json({ error: error.message }, { status: 500 });
  }
@@ -173,7 +173,7 @@ export const handler: APIGatewayProxyHandler = async (event, context) => {
        "Content-Type": "application/json",
        "Access-Control-Allow-Origin": "*",
      },
-      body: JSON.stringify({ response: result.result }),
+      body: JSON.stringify({ response: result.data }),
    };
  } catch (error) {
    return {
@@ -222,7 +222,7 @@ export const handler: Handler = async (event, context) => {
    
    return {
      statusCode: 200,
-      body: JSON.stringify({ response: result.result }),
+      body: JSON.stringify({ response: result.data }),
    };
  } catch (error) {
    return {
@@ -34,6 +34,7 @@ const jokeAgent = agent({
 // Run the workflow
 const result = await jokeAgent.run("Tell me something funny");
 console.log(result.data.result); // Baby Llama is called cria
+console.log(result.data.message); // { role: 'assistant', content: 'Baby Llama is called cria' }
 ```

 ### Event Streaming
@@ -106,34 +106,40 @@ const memory = createMemory({

 Long-term memory is represented as `Memory Block` objects. These objects contain information that are from previous user sessions or from the beginning of the current conversation. When memory is retrieved (by calling `getLLM`), the short-term and long-term memories are merged together within the given `tokenLimit`. 

-Currently, there are two predefined memory blocks:
+Currently, there are three predefined memory blocks:

 - `staticBlock`: A memory block that stores a static piece of information.
 - `factExtractionBlock`: A memory block that extracts facts from the chat history.
+- `vectorBlock`: A memory block that stores and retrieves chat messages from a vector database using semantic similarity search. Messages are stored individually and retrieved based on their relevance to recent conversation context. Here we've passed in the `vectorStore` to use to store and retrieve the chat messages.

 This sounds a bit complicated, but it's actually quite simple. Let's look at an example:

 ```ts
-import { createMemory, factExtractionBlock, staticBlock } from "llamaindex";
+import { createMemory, factExtractionBlock, staticBlock, vectorBlock } from "llamaindex";
+import { QdrantVectorStore } from "@llamaindex/qdrant";
+import { OpenAIEmbedding } from "@llamaindex/openai";

 const memoryBlocks= [
  staticBlock({
-    id: "core_info",
    content: "My name is Logan, and I live in Saskatoon. I work at LlamaIndex.",
  }),
  factExtractionBlock({
-    id: "user-extracted_info",
    priority: 1,
    llm: llm,
    maxFacts: 50,
  }),
+  vectorBlock({
+    vectorStore: new QdrantVectorStore({ url: "http://localhost:6333" }),
+    priority: 2,
+  }),
 ];
 ```

-Here, we've setup two memory blocks:
+Here, we've setup three memory blocks:

- `core_info`: A static memory block that stores some core information about the user. This information will always be inserted into the memory. The type used is `MessageContent` to support multi-modal content.
- `extracted_info`: An extracted memory block that will extract information from the chat history. Here we've passed in the `llm` to use to extract facts from the chat history, and set the `maxFacts` to 50. If the number of extracted facts exceeds this limit, the `maxFacts` will be automatically summarized and reduced to leave room for new information.
+- `staticBlock`: A static memory block that stores some core information about the user. This information will always be inserted into the memory. The type used is `MessageContent` to support multi-modal content.
+- `factExtractionBlock`: An extracted memory block that will extract information from the chat history. Here we've passed in the `llm` to use to extract facts from the chat history, and set the `maxFacts` to 50. If the number of extracted facts exceeds this limit, the `maxFacts` will be automatically summarized and reduced to leave room for new information.
+- `vectorBlock`: A vector memory block that will store in a vector database and retrieve them from there. Messages are stored individually and retrieved based on their relevance to recent conversation context. Here we've passed in the `vectorStore` to use to store and retrieve the chat messages.

 You'll also notice that we've set the `priority` for the `factExtractionBlock` block. This is used to determine the handling when the memory blocks content (i.e. long-term memory) + short-term memory exceeds the token limit on the `Memory` object.

@@ -158,6 +164,46 @@ When memory is retrieved (using `getLLM`), the short-term and long-term memories

 The amount of short-term memory included is specified by the `shortTermTokenLimitRatio`. If it's set to `0.7`, 70% of the `tokenLimit` is used for short-term memory (not including the static memory block).

+
+#### VectorBlock Configuration Options
+
+The `vectorBlock` offers several configuration options to customize its behavior:
+
+```ts
+vectorBlock({
+  vectorStore: new QdrantVectorStore({ url: "http://localhost:6333" }),
+  priority: 2,
+  retrievalContextWindow: 5, // Number of recent messages to use for context when retrieving
+  formatTemplate: new PromptTemplate({ template: "Context: {{ context }}" }), // Custom formatting template
+  nodePostprocessors: [/* custom postprocessors */], // Apply processing to retrieved nodes
+  queryOptions: {
+    similarityTopK: 3, // Number of top similar results to return (default: 2)
+    mode: VectorStoreQueryMode.DEFAULT, // Query mode for the vector store
+    sessionFilterKey: "session_id", // Metadata key for session filtering (default: "session_id")
+    // Custom filters can be added here - session filter is automatically included
+    filters: {
+      filters: [
+        { key: "custom_field", value: "custom_value", operator: "==" }
+      ],
+      condition: "and"
+    }
+  }
+})
+```
+
+**Key Configuration Options:**
+
+- **`retrievalContextWindow`**: Number of recent messages to consider when creating the retrieval query (default: 5). A larger window provides more context but may be less precise.
+- **`formatTemplate`**: Template for formatting retrieved information before adding to memory. Defaults to a simple context template.
+- **`nodePostprocessors`**: Array of postprocessors to apply to retrieved nodes, useful for filtering or transforming results.
+- **`queryOptions.similarityTopK`**: Number of most similar messages to retrieve from the vector store (default: 2).
+- **`queryOptions.sessionFilterKey`**: Metadata key used to isolate memory between different sessions (default: "session_id").
+- **`queryOptions.filters`**: Additional metadata filters for retrieval. The session filter is automatically added to ensure memory isolation.
+
+**Session Isolation:**
+
+The vectorBlock automatically adds a session filter using the block's ID to ensure that memories from different sessions don't interfere with each other. This filter uses the `sessionFilterKey` (default: "session_id") and can be customized if needed.
+
 ## Persistence with Snapshots

 Save and restore memory state:
@@ -1,5 +1,17 @@
 # @llamaindex/cloudflare-worker-agent-test

+## 0.0.182
+
+### Patch Changes
+
+- llamaindex@0.11.21
+
+## 0.0.181
+
+### Patch Changes
+
+- llamaindex@0.11.20
+
 ## 0.0.180

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/cloudflare-worker-agent-test",
-  "version": "0.0.180",
+  "version": "0.0.182",
  "type": "module",
  "private": true,
  "scripts": {
@@ -1,5 +1,18 @@
 # @llamaindex/llama-parse-browser-test

+## 0.0.81
+
+### Patch Changes
+
+- @llamaindex/cloud@4.0.26
+
+## 0.0.80
+
+### Patch Changes
+
+- Updated dependencies [2967d57]
+  - @llamaindex/cloud@4.0.25
+
 ## 0.0.79

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/llama-parse-browser-test",
  "private": true,
-  "version": "0.0.79",
+  "version": "0.0.81",
  "type": "module",
  "scripts": {
    "dev": "vite",
@@ -1,5 +1,17 @@
 # @llamaindex/next-agent-test

+## 0.1.182
+
+### Patch Changes
+
+- llamaindex@0.11.21
+
+## 0.1.181
+
+### Patch Changes
+
+- llamaindex@0.11.20
+
 ## 0.1.180

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/next-agent-test",
-  "version": "0.1.180",
+  "version": "0.1.182",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,17 @@
 # test-edge-runtime

+## 0.1.181
+
+### Patch Changes
+
+- llamaindex@0.11.21
+
+## 0.1.180
+
+### Patch Changes
+
+- llamaindex@0.11.20
+
 ## 0.1.179

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/nextjs-edge-runtime-test",
-  "version": "0.1.179",
+  "version": "0.1.181",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,27 @@
 # @llamaindex/next-node-runtime

+## 0.1.51
+
+### Patch Changes
+
+- llamaindex@0.11.21
+- @llamaindex/huggingface@0.1.22
+- @llamaindex/readers@3.1.16
+
+## 0.1.50
+
+### Patch Changes
+
+- llamaindex@0.11.20
+- @llamaindex/huggingface@0.1.21
+- @llamaindex/readers@3.1.15
+
+## 0.1.49
+
+### Patch Changes
+
+- @llamaindex/huggingface@0.1.20
+
 ## 0.1.48

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/next-node-runtime-test",
-  "version": "0.1.48",
+  "version": "0.1.51",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,17 @@
 # vite-import-llamaindex

+## 0.0.48
+
+### Patch Changes
+
+- llamaindex@0.11.21
+
+## 0.0.47
+
+### Patch Changes
+
+- llamaindex@0.11.20
+
 ## 0.0.46

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "vite-import-llamaindex",
  "private": true,
-  "version": "0.0.46",
+  "version": "0.0.48",
  "type": "module",
  "scripts": {
    "build": "vite build",
@@ -1,5 +1,17 @@
 # @llamaindex/waku-query-engine-test

+## 0.0.182
+
+### Patch Changes
+
+- llamaindex@0.11.21
+
+## 0.0.181
+
+### Patch Changes
+
+- llamaindex@0.11.20
+
 ## 0.0.180

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/waku-query-engine-test",
-  "version": "0.0.180",
+  "version": "0.0.182",
  "type": "module",
  "private": true,
  "scripts": {
@@ -23,7 +23,7 @@ await test("pinecone", async (t) => {
  });

  const vectorStore = new PineconeVectorStore({
-    embeddingModel: openaiEmbedding,
+    embedModel: openaiEmbedding,
  });

  t.after(async () => {
@@ -1,5 +1,132 @@
 # examples

+## 0.3.33
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+  - @llamaindex/cloud@4.0.26
+  - llamaindex@0.11.21
+  - @llamaindex/node-parser@2.0.17
+  - @llamaindex/anthropic@0.3.19
+  - @llamaindex/assemblyai@0.1.16
+  - @llamaindex/clip@0.0.68
+  - @llamaindex/cohere@0.0.31
+  - @llamaindex/deepinfra@0.0.68
+  - @llamaindex/discord@0.1.16
+  - @llamaindex/google@0.3.16
+  - @llamaindex/huggingface@0.1.22
+  - @llamaindex/jinaai@0.0.28
+  - @llamaindex/mistral@0.1.17
+  - @llamaindex/mixedbread@0.0.31
+  - @llamaindex/notion@0.1.16
+  - @llamaindex/ollama@0.1.17
+  - @llamaindex/openai@0.4.12
+  - @llamaindex/perplexity@0.0.25
+  - @llamaindex/portkey-ai@0.0.59
+  - @llamaindex/replicate@0.0.59
+  - @llamaindex/bm25-retriever@0.0.6
+  - @llamaindex/astra@0.0.31
+  - @llamaindex/azure@0.1.29
+  - @llamaindex/chroma@0.0.31
+  - @llamaindex/elastic-search@0.1.17
+  - @llamaindex/firestore@1.0.24
+  - @llamaindex/milvus@0.1.26
+  - @llamaindex/mongodb@0.0.32
+  - @llamaindex/pinecone@0.1.17
+  - @llamaindex/postgres@0.0.60
+  - @llamaindex/qdrant@0.1.27
+  - @llamaindex/supabase@0.1.18
+  - @llamaindex/upstash@0.0.31
+  - @llamaindex/weaviate@0.0.32
+  - @llamaindex/vercel@0.1.17
+  - @llamaindex/voyage-ai@1.0.23
+  - @llamaindex/readers@3.1.16
+  - @llamaindex/tools@0.1.7
+  - @llamaindex/workflow@1.1.17
+  - @llamaindex/deepseek@0.0.29
+  - @llamaindex/fireworks@0.0.28
+  - @llamaindex/groq@0.0.84
+  - @llamaindex/together@0.0.28
+  - @llamaindex/vllm@0.0.54
+  - @llamaindex/xai@0.0.15
+
+## 0.3.32
+
+### Patch Changes
+
+- Updated dependencies [650eeb1]
+- Updated dependencies [a8ec08c]
+- Updated dependencies [2967d57]
+  - @llamaindex/google@0.3.15
+  - @llamaindex/core@0.6.16
+  - @llamaindex/workflow@1.1.16
+  - @llamaindex/cloud@4.0.25
+  - llamaindex@0.11.20
+  - @llamaindex/node-parser@2.0.16
+  - @llamaindex/anthropic@0.3.18
+  - @llamaindex/assemblyai@0.1.15
+  - @llamaindex/clip@0.0.67
+  - @llamaindex/cohere@0.0.30
+  - @llamaindex/deepinfra@0.0.67
+  - @llamaindex/discord@0.1.15
+  - @llamaindex/huggingface@0.1.21
+  - @llamaindex/jinaai@0.0.27
+  - @llamaindex/mistral@0.1.16
+  - @llamaindex/mixedbread@0.0.30
+  - @llamaindex/notion@0.1.15
+  - @llamaindex/ollama@0.1.16
+  - @llamaindex/openai@0.4.11
+  - @llamaindex/perplexity@0.0.24
+  - @llamaindex/portkey-ai@0.0.58
+  - @llamaindex/replicate@0.0.58
+  - @llamaindex/bm25-retriever@0.0.5
+  - @llamaindex/astra@0.0.30
+  - @llamaindex/azure@0.1.28
+  - @llamaindex/chroma@0.0.30
+  - @llamaindex/elastic-search@0.1.16
+  - @llamaindex/firestore@1.0.23
+  - @llamaindex/milvus@0.1.25
+  - @llamaindex/mongodb@0.0.31
+  - @llamaindex/pinecone@0.1.16
+  - @llamaindex/postgres@0.0.59
+  - @llamaindex/qdrant@0.1.26
+  - @llamaindex/supabase@0.1.17
+  - @llamaindex/upstash@0.0.30
+  - @llamaindex/weaviate@0.0.31
+  - @llamaindex/vercel@0.1.16
+  - @llamaindex/voyage-ai@1.0.22
+  - @llamaindex/readers@3.1.15
+  - @llamaindex/tools@0.1.6
+  - @llamaindex/deepseek@0.0.28
+  - @llamaindex/fireworks@0.0.27
+  - @llamaindex/groq@0.0.83
+  - @llamaindex/together@0.0.27
+  - @llamaindex/vllm@0.0.53
+  - @llamaindex/xai@0.0.14
+
+## 0.3.31
+
+### Patch Changes
+
+- Updated dependencies [d8f4f6a]
+- Updated dependencies [856dd8c]
+  - @llamaindex/supabase@0.1.16
+  - @llamaindex/openai@0.4.10
+  - @llamaindex/clip@0.0.66
+  - @llamaindex/deepinfra@0.0.66
+  - @llamaindex/deepseek@0.0.27
+  - @llamaindex/fireworks@0.0.26
+  - @llamaindex/groq@0.0.82
+  - @llamaindex/huggingface@0.1.20
+  - @llamaindex/jinaai@0.0.26
+  - @llamaindex/perplexity@0.0.23
+  - @llamaindex/azure@0.1.27
+  - @llamaindex/together@0.0.26
+  - @llamaindex/vllm@0.0.52
+  - @llamaindex/xai@0.0.13
+
 ## 0.3.30

 ### Patch Changes
@@ -24,6 +24,7 @@ async function main() {
    state: result.data.state,
  });
  console.log(`${JSON.stringify(caResult, null, 2)}`);
+  console.log("assistant message:", result.data.message);
 }

 main().catch((error) => {
@@ -0,0 +1,150 @@
+/**
+ * Example: Vector Memory Block
+ *
+ * This example demonstrates how to use the VectorMemoryBlock to store and retrieve
+ * conversation history using vector similarity search. The vector memory block
+ * stores messages in a vector store and can retrieve relevant context based on
+ * semantic similarity to recent messages.
+ */
+
+import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai";
+import { QdrantVectorStore } from "@llamaindex/qdrant";
+import { createMemory, vectorBlock } from "llamaindex";
+
+// Set up the LLM and embedding model
+const llm = new OpenAI({ model: "gpt-4.1-mini" });
+const embedModel = new OpenAIEmbedding({ model: "text-embedding-3-small" });
+
+// Simulate a conversation with some context
+// This conversation has 8 messages, which is more than the token limit of 100 tokens (set below)
+// The last 4 messages are kept in to short term memory block (as their tokens are in the limit)
+// Whereas the first 5 messages are added to long term memory block (in here we will use the vector memory block with Qdrant)
+const CONVERSATION_TURNS = [
+  //// This is the first 5 messages that are added to long term memory block (vector memory block)
+  {
+    role: "user",
+    content: "Hi, I'm Sarah and I work as a data scientist at Google.",
+  },
+  {
+    role: "assistant",
+    content:
+      "Hello Sarah! It's great to meet you. Data science at Google must be exciting!",
+  },
+  {
+    role: "user",
+    content:
+      "Yes, I specialize in machine learning and natural language processing.",
+  },
+  {
+    role: "assistant",
+    content: "That's impressive! ML and NLP are fascinating fields.",
+  },
+  {
+    role: "user",
+    content:
+      "I have a PhD in Computer Science from Stanford, and I love hiking on weekends.",
+  },
+
+  //// This is the last 4 messages that are added to short term memory block
+  {
+    role: "assistant",
+    content:
+      "Wow, Stanford PhD! And hiking is a great way to unwind from tech work.",
+  },
+  {
+    role: "user",
+    content: "I also have two cats named Whiskers and Mittens.",
+  },
+  {
+    role: "assistant",
+    content:
+      "Cats make wonderful companions! Whiskers and Mittens are cute names.",
+  },
+  {
+    role: "user",
+    content: "Summary information about Sarah and her cats",
+  },
+];
+
+async function main() {
+  console.log("=== Vector Memory Block Example ===\n");
+
+  /**
+   * Create a vector store. You can quickly get a local instance of Qdrant running with Docker:
+   * ```bash
+   * docker pull qdrant/qdrant
+   * docker run -p 6333:6333 qdrant/qdrant
+   * ```
+   *
+   * Go to http://localhost:6333/dashboard#/collections to see your data
+   */
+  const vectorStore = new QdrantVectorStore({
+    url: "http://localhost:6333",
+    embedModel,
+  });
+
+  // Create a vector memory block using the factory function
+  const vectorMemoryBlock = vectorBlock({
+    vectorStore,
+    priority: 5,
+  });
+
+  // Create a memory store with the vector memory block
+  const memory = createMemory([], {
+    llm,
+    memoryBlocks: [vectorMemoryBlock],
+    tokenLimit: 100,
+    shortTermTokenLimitRatio: 0.7,
+  });
+
+  // Store the conversation history in the vector memory
+  console.log(`Adding ${CONVERSATION_TURNS.length} messages to the memory...`);
+  for (const message of CONVERSATION_TURNS) {
+    await memory.add(message);
+  }
+
+  // Retrieve relevant context for the current user request
+  console.log("Retrieving relevant context...");
+  const chatHistory = await memory.getLLM();
+
+  // You will see there's 1 generated context message from vector memory block, and 4 messages from short term memory block
+  console.log("Chat memory:", chatHistory);
+
+  // Now simulate the assistant responding with context
+  console.log("\nAssistant response with context:");
+  const response = await llm.chat({
+    messages: chatHistory,
+  });
+  console.log(response.message.content);
+
+  // Try adding more messages to the memory
+  const newMessages = [
+    {
+      role: "user",
+      content: "Write a long paragraph about weather in Tokyo",
+    },
+    {
+      role: "assistant",
+      content:
+        "The weather in Tokyo is sunny and warm. The temperature is around 20 degrees Celsius. The weather is very nice and the people are friendly.",
+    },
+    {
+      role: "user",
+      content: "What is the weather in Tokyo?",
+    },
+  ];
+  // Add the new messages to the memory
+  for (const message of newMessages) {
+    await memory.add(message);
+  }
+
+  // Try retrieving the new messages
+  const newChatHistory = await memory.getLLM();
+  // You can see now that new chat history will contain the nodes (separated by `\n`) in the
+  // context message that is generated by the vector memory block
+  // The number of retrieved nodes is set by `similarityTopK` in `queryOptions` of `vectorBlock`
+  // (default `similarityTopK` is 2)
+  console.log("New chat history:", newChatHistory);
+}
+
+main().catch(console.error);
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/examples",
-  "version": "0.3.30",
+  "version": "0.3.33",
  "private": true,
  "scripts": {
    "lint": "eslint .",
@@ -11,52 +11,52 @@
    "@azure/cosmos": "^4.1.1",
    "@azure/identity": "^4.4.1",
    "@azure/search-documents": "^12.1.0",
-    "@llamaindex/anthropic": "^0.3.17",
-    "@llamaindex/assemblyai": "^0.1.14",
-    "@llamaindex/astra": "^0.0.29",
-    "@llamaindex/azure": "^0.1.26",
-    "@llamaindex/bm25-retriever": "^0.0.4",
-    "@llamaindex/chroma": "^0.0.29",
-    "@llamaindex/clip": "^0.0.65",
-    "@llamaindex/cloud": "^4.0.24",
-    "@llamaindex/cohere": "^0.0.29",
-    "@llamaindex/core": "^0.6.15",
-    "@llamaindex/deepinfra": "^0.0.65",
-    "@llamaindex/deepseek": "^0.0.26",
-    "@llamaindex/discord": "^0.1.14",
-    "@llamaindex/elastic-search": "^0.1.15",
+    "@llamaindex/anthropic": "^0.3.19",
+    "@llamaindex/assemblyai": "^0.1.16",
+    "@llamaindex/astra": "^0.0.31",
+    "@llamaindex/azure": "^0.1.29",
+    "@llamaindex/bm25-retriever": "^0.0.6",
+    "@llamaindex/chroma": "^0.0.31",
+    "@llamaindex/clip": "^0.0.68",
+    "@llamaindex/cloud": "^4.0.26",
+    "@llamaindex/cohere": "^0.0.31",
+    "@llamaindex/core": "^0.6.17",
+    "@llamaindex/deepinfra": "^0.0.68",
+    "@llamaindex/deepseek": "^0.0.29",
+    "@llamaindex/discord": "^0.1.16",
+    "@llamaindex/elastic-search": "^0.1.17",
    "@llamaindex/env": "^0.1.30",
-    "@llamaindex/firestore": "^1.0.22",
-    "@llamaindex/fireworks": "^0.0.25",
-    "@llamaindex/google": "^0.3.14",
-    "@llamaindex/groq": "^0.0.81",
-    "@llamaindex/huggingface": "^0.1.19",
-    "@llamaindex/jinaai": "^0.0.25",
-    "@llamaindex/milvus": "^0.1.24",
-    "@llamaindex/mistral": "^0.1.15",
-    "@llamaindex/mixedbread": "^0.0.29",
-    "@llamaindex/mongodb": "^0.0.30",
-    "@llamaindex/node-parser": "^2.0.15",
-    "@llamaindex/notion": "^0.1.14",
-    "@llamaindex/ollama": "^0.1.15",
-    "@llamaindex/openai": "^0.4.9",
-    "@llamaindex/perplexity": "^0.0.22",
-    "@llamaindex/pinecone": "^0.1.15",
-    "@llamaindex/portkey-ai": "^0.0.57",
-    "@llamaindex/postgres": "^0.0.58",
-    "@llamaindex/qdrant": "^0.1.25",
-    "@llamaindex/readers": "^3.1.14",
-    "@llamaindex/replicate": "^0.0.57",
-    "@llamaindex/supabase": "^0.1.15",
-    "@llamaindex/together": "^0.0.25",
-    "@llamaindex/tools": "^0.1.5",
-    "@llamaindex/upstash": "^0.0.29",
-    "@llamaindex/vercel": "^0.1.15",
-    "@llamaindex/vllm": "^0.0.51",
-    "@llamaindex/voyage-ai": "^1.0.21",
-    "@llamaindex/weaviate": "^0.0.30",
-    "@llamaindex/workflow": "^1.1.15",
-    "@llamaindex/xai": "^0.0.12",
+    "@llamaindex/firestore": "^1.0.24",
+    "@llamaindex/fireworks": "^0.0.28",
+    "@llamaindex/google": "^0.3.16",
+    "@llamaindex/groq": "^0.0.84",
+    "@llamaindex/huggingface": "^0.1.22",
+    "@llamaindex/jinaai": "^0.0.28",
+    "@llamaindex/milvus": "^0.1.26",
+    "@llamaindex/mistral": "^0.1.17",
+    "@llamaindex/mixedbread": "^0.0.31",
+    "@llamaindex/mongodb": "^0.0.32",
+    "@llamaindex/node-parser": "^2.0.17",
+    "@llamaindex/notion": "^0.1.16",
+    "@llamaindex/ollama": "^0.1.17",
+    "@llamaindex/openai": "^0.4.12",
+    "@llamaindex/perplexity": "^0.0.25",
+    "@llamaindex/pinecone": "^0.1.17",
+    "@llamaindex/portkey-ai": "^0.0.59",
+    "@llamaindex/postgres": "^0.0.60",
+    "@llamaindex/qdrant": "^0.1.27",
+    "@llamaindex/readers": "^3.1.16",
+    "@llamaindex/replicate": "^0.0.59",
+    "@llamaindex/supabase": "^0.1.18",
+    "@llamaindex/together": "^0.0.28",
+    "@llamaindex/tools": "^0.1.7",
+    "@llamaindex/upstash": "^0.0.31",
+    "@llamaindex/vercel": "^0.1.17",
+    "@llamaindex/vllm": "^0.0.54",
+    "@llamaindex/voyage-ai": "^1.0.23",
+    "@llamaindex/weaviate": "^0.0.32",
+    "@llamaindex/workflow": "^1.1.17",
+    "@llamaindex/xai": "^0.0.15",
    "@notionhq/client": "^4.0.0",
    "@pinecone-database/pinecone": "^4.0.0",
    "@vercel/postgres": "^0.10.0",
@@ -65,7 +65,7 @@
    "commander": "^12.1.0",
    "dotenv": "^17.2.0",
    "js-tiktoken": "^1.0.14",
-    "llamaindex": "^0.11.19",
+    "llamaindex": "^0.11.21",
    "mongodb": "6.7.0",
    "postgres": "^3.4.4",
    "wikipedia": "^2.1.2",
@@ -15,7 +15,7 @@ async function main() {
  const vectorStore = new QdrantVectorStore({
    url: process.env.QDRANT_URL,
    apiKey: process.env.QDRANT_API_KEY,
-    embeddingModel: embedding,
+    embedModel: embedding,
    collectionName: "gemini_test",
  });
  const storageContext = await storageContextFromDefaults({ vectorStore });
@@ -16,7 +16,7 @@ async function main() {
  const vectorStore = new QdrantVectorStore({
    url: process.env.QDRANT_URL,
    apiKey: process.env.QDRANT_API_KEY,
-    embeddingModel: embedding,
+    embedModel: embedding,
    collectionName: "jina_test",
  });
  const storageContext = await storageContextFromDefaults({ vectorStore });
@@ -1,5 +1,17 @@
 # @llamaindex/autotool

+## 8.0.21
+
+### Patch Changes
+
+- llamaindex@0.11.21
+
+## 8.0.20
+
+### Patch Changes
+
+- llamaindex@0.11.20
+
 ## 8.0.19

 ### Patch Changes
@@ -1,5 +1,19 @@
 # @llamaindex/autotool-01-node-example

+## 0.0.129
+
+### Patch Changes
+
+- llamaindex@0.11.21
+- @llamaindex/autotool@8.0.21
+
+## 0.0.128
+
+### Patch Changes
+
+- llamaindex@0.11.20
+- @llamaindex/autotool@8.0.20
+
 ## 0.0.127

 ### Patch Changes
@@ -13,5 +13,5 @@
  "scripts": {
    "start": "node --import tsx --import @llamaindex/autotool/node ./src/index.ts"
  },
-  "version": "0.0.127"
+  "version": "0.0.129"
 }
@@ -6,7 +6,7 @@
    "url": "git+https://github.com/run-llama/LlamaIndexTS.git",
    "directory": "packages/autotool"
  },
-  "version": "8.0.19",
+  "version": "8.0.21",
  "description": "auto transpile your JS function to LLM Agent compatible",
  "files": [
    "dist",
@@ -1,5 +1,20 @@
 # @llamaindex/cloud

+## 4.0.26
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+
+## 4.0.25
+
+### Patch Changes
+
+- 2967d57: Default to \_public agent url id
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+
 ## 4.0.24

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/cloud",
-  "version": "4.0.24",
+  "version": "4.0.26",
  "type": "module",
  "license": "MIT",
  "scripts": {
@@ -33,7 +33,7 @@ export class AgentClient<T = unknown> {
    apiKey = getEnv("LLAMA_CLOUD_API_KEY"),
    baseUrl = "https://api.cloud.llamaindex.ai/",
    collection = "default",
-    agentUrlId = "default",
+    agentUrlId = "_public",
  }: {
    apiKey?: string;
    baseUrl?: string;
@@ -127,7 +127,7 @@ export class AgentClient<T = unknown> {
  }

  /**
-   * List agent data
+   * Search agent data
   */
  async search(
    options: SearchAgentDataOptions,
@@ -275,7 +275,8 @@ export interface AgentDataClientOptions<T = unknown> {
  collection?: string;
 }
 /**
- * Create a new AsyncAgentDataClient instance
+ * Create a new AsyncAgentDataClient instance. Does it's best to infer an agent url id from environment.
+ * Pass in the window url and/or env to infer the agent url id from them.
 * @param options - The options for the client
 * @returns A new AgentClient instance
 */
@@ -283,20 +284,34 @@ export function createAgentDataClient<T = unknown>({
  apiKey,
  baseUrl,
  windowUrl,
+  env,
  agentUrlId,
  collection = "default",
 }: {
  apiKey?: string;
  baseUrl?: string;
  windowUrl?: string;
+  env?: Record<string, string>;
  agentUrlId?: string;
  collection?: string;
 } = {}): AgentClient<T> {
+  if (env && !agentUrlId) {
+    agentUrlId =
+      env.LLAMA_DEPLOY_DEPLOYMENT_NAME ||
+      env.NEXT_PUBLIC_LLAMA_DEPLOY_DEPLOYMENT_NAME ||
+      env.VITE_LLAMA_DEPLOY_DEPLOYMENT_NAME;
+  }
  if (windowUrl && !agentUrlId) {
    try {
-      const path = new URL(windowUrl).pathname;
-      // /deployments/<agent-url-id>/ui/ -> ["", "deployments", "<agent-url-id>", "ui"]
-      agentUrlId = path.split("/")[2];
+      const url = new URL(windowUrl);
+      const path = url.pathname;
+      const isLocalhost = // local agents should default to _public, otherwise a full deployment is required
+        url.hostname.includes("localhost") ||
+        url.hostname.includes("127.0.0.1");
+      if (path.startsWith("/deployments/") && !isLocalhost) {
+        // /deployments/<agent-url-id>/ui/ -> ["", "deployments", "<agent-url-id>", "ui"]
+        agentUrlId = path.split("/")[2];
+      }
    } catch (error) {
      console.warn(
        "Failed to infer agent url id from window url, falling back to default",
@@ -1,5 +1,17 @@
 # @llamaindex/core

+## 0.6.17
+
+### Patch Changes
+
+- 38da40b: feat: VectoryMemoryBlock
+
+## 0.6.16
+
+### Patch Changes
+
+- a8ec08c: fix: ensure correct message content in agent workflow
+
 ## 0.6.15

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/core",
  "type": "module",
-  "version": "0.6.15",
+  "version": "0.6.17",
  "description": "LlamaIndex Core Module",
  "exports": {
    "./agent": {
@@ -39,7 +39,9 @@ export abstract class BaseMemoryBlock<
   *
   * @returns The memory block content as an array of ChatMessage.
   */
-  abstract get(): Promise<MemoryMessage<TAdditionalMessageOptions>[]>;
+  abstract get(
+    messages?: MemoryMessage<TAdditionalMessageOptions>[],
+  ): Promise<MemoryMessage<TAdditionalMessageOptions>[]>;

  /**
   * Store the messages in the memory block.
@@ -1,3 +1,4 @@
 export { BaseMemoryBlock } from "./base";
 export { FactExtractionMemoryBlock } from "./fact";
 export { StaticMemoryBlock } from "./static";
+export { VectorMemoryBlock } from "./vector";
@@ -0,0 +1,250 @@
+import type { BaseEmbedding } from "../../embeddings";
+import type { BaseNodePostprocessor } from "../../postprocessor";
+import { BasePromptTemplate, defaultContextSystemPrompt } from "../../prompts";
+import type { NodeWithScore } from "../../schema";
+import { MetadataMode, TextNode } from "../../schema";
+import { extractText } from "../../utils/llms";
+import type {
+  BaseVectorStore,
+  MetadataFilter,
+  VectorStoreQuery,
+} from "../../vector-store";
+import { VectorStoreQueryMode } from "../../vector-store";
+import type { MemoryMessage } from "../types";
+import { BaseMemoryBlock, type MemoryBlockOptions } from "./base";
+
+/**
+ * The options for the vector memory block.
+ */
+export type VectorMemoryBlockOptions = {
+  /**
+   * The vector store to use for retrieval.
+   */
+  vectorStore: BaseVectorStore;
+
+  /**
+   * Maximum number of messages to include for context when retrieving.
+   * @default 5
+   */
+  retrievalContextWindow?: number;
+
+  /**
+   * Template for formatting the retrieved information.
+   * @default new PromptTemplate({ template: "{{ text }}" })
+   */
+  formatTemplate?: BasePromptTemplate;
+
+  /**
+   * List of node postprocessors to apply to the retrieved nodes containing messages.
+   *
+   * @default []
+   */
+  nodePostprocessors?: BaseNodePostprocessor[];
+
+  /**
+   * Configuration options for vector store queries when retrieving memory.
+   *
+   * @default
+   * ```typescript
+   * {
+   *   similarityTopK: 2,                    // Number of top similar results to return
+   *   mode: VectorStoreQueryMode.DEFAULT,   // Query mode for the vector store
+   *   sessionFilterKey: "session_id",       // Metadata key for session filtering
+   *   filters: {
+   *     filters: [
+   *       { key: "session_id", value: "<current block id>", operator: "==" }
+   *     ],
+   *     condition: "and"
+   *   }
+   * }
+   * ```
+   *
+   * Note: A session filter is automatically added to ensure memory isolation between blocks.
+   * If custom filters are provided, the session filter will be merged with them.
+   */
+  queryOptions?: Partial<VectorMemoryBlockQueryOptions>;
+} & MemoryBlockOptions;
+
+export type VectorMemoryBlockQueryOptions = Omit<
+  VectorStoreQuery,
+  "queryEmbedding" | "queryStr"
+> & {
+  sessionFilterKey: string;
+};
+
+/**
+ * A memory block that retrieves relevant information from a vector store.
+ *
+ * This block stores conversation history in a vector store and retrieves
+ * relevant information based on the most recent messages.
+ */
+export class VectorMemoryBlock<
+  TAdditionalMessageOptions extends object = object,
+> extends BaseMemoryBlock<TAdditionalMessageOptions> {
+  private readonly vectorStore: BaseVectorStore;
+  private readonly retrievalContextWindow: number;
+  private readonly formatTemplate: BasePromptTemplate;
+  private readonly nodePostprocessors: BaseNodePostprocessor[];
+  private readonly queryOptions: VectorMemoryBlockQueryOptions;
+
+  constructor(options: VectorMemoryBlockOptions) {
+    super(options);
+
+    // Validate vector store
+    if (!options.vectorStore.storesText) {
+      throw new Error(
+        "vectorStore must store text to be used as a retrieval memory block",
+      );
+    }
+
+    this.vectorStore = options.vectorStore;
+    this.retrievalContextWindow = options.retrievalContextWindow ?? 5;
+    this.queryOptions = this.buildDefaultQueryOptions(options.queryOptions);
+    this.formatTemplate = options.formatTemplate ?? defaultContextSystemPrompt;
+    this.nodePostprocessors = options.nodePostprocessors ?? [];
+  }
+
+  get embedModel(): BaseEmbedding {
+    return this.vectorStore.embedModel;
+  }
+
+  async get(
+    messages: MemoryMessage<TAdditionalMessageOptions>[] = [],
+  ): Promise<MemoryMessage<TAdditionalMessageOptions>[]> {
+    if (messages?.length === 0) return [];
+
+    // Use the last message or a context window of messages for the query
+    let context: MemoryMessage<TAdditionalMessageOptions>[];
+    if (
+      this.retrievalContextWindow > 1 &&
+      messages.length >= this.retrievalContextWindow
+    ) {
+      context = messages.slice(-this.retrievalContextWindow);
+    } else {
+      context = messages;
+    }
+    const queryText = context
+      .map((message) => extractText(message.content))
+      .join("\n\n");
+    if (!queryText) return [];
+
+    // Create and execute the query
+    const queryEmbedding = await this.embedModel.getTextEmbedding(queryText);
+    const query: VectorStoreQuery = {
+      queryStr: queryText,
+      queryEmbedding,
+      ...this.queryOptions,
+    };
+    const results = await this.vectorStore.query(query);
+    if (!results.nodes?.length) return [];
+
+    // Create nodes with scores
+    const nodesWithScores: NodeWithScore[] = results.nodes.map(
+      (node, index) => ({
+        node,
+        score: results.similarities?.[index] ?? undefined,
+      }),
+    );
+
+    // Apply postprocessors
+    let processedNodes = nodesWithScores;
+    for (const postprocessor of this.nodePostprocessors) {
+      processedNodes = await postprocessor.postprocessNodes(
+        processedNodes,
+        queryText,
+      );
+    }
+
+    // Format the results
+    const retrievedText = processedNodes
+      .map(({ node }) => node.getContent(MetadataMode.NONE))
+      .join("\n\n");
+
+    const formattedText = this.formatTemplate.format({
+      context: retrievedText,
+    });
+
+    // Return as memory message
+    return [
+      {
+        id: this.id,
+        role: "memory",
+        content: formattedText,
+      } as MemoryMessage<TAdditionalMessageOptions>,
+    ];
+  }
+
+  async put(
+    messages: MemoryMessage<TAdditionalMessageOptions>[],
+  ): Promise<void> {
+    if (messages.length === 0) return;
+
+    // Format messages with role, text content, and additional info
+    const texts: string[] = [];
+
+    for (const message of messages) {
+      const text = extractText(message.content);
+      if (!text) continue;
+
+      let messageText = text;
+
+      // Add additional info if present
+      const additionalInfo = (message.options ?? {}) as Record<string, unknown>;
+      if (Object.keys(additionalInfo).length > 0) {
+        messageText += `\nAdditional Info: (${JSON.stringify(additionalInfo)})`;
+      }
+
+      texts.push(`<message role='${message.role}'>${messageText}</message>`);
+    }
+
+    if (texts.length === 0) return;
+
+    // Create text node with session metadata
+    const textNode = new TextNode({
+      text: texts.join("\n"),
+      metadata: { [this.queryOptions.sessionFilterKey]: this.id },
+    });
+
+    // Get embedding for the text
+    textNode.embedding = await this.embedModel.getTextEmbedding(textNode.text);
+
+    // Add to vector store
+    await this.vectorStore.add([textNode]);
+  }
+
+  private buildDefaultQueryOptions(
+    options: Partial<VectorMemoryBlockQueryOptions> | undefined,
+  ): VectorMemoryBlockQueryOptions {
+    const {
+      similarityTopK = 2,
+      mode = VectorStoreQueryMode.DEFAULT,
+      sessionFilterKey = "session_id",
+    } = options ?? {};
+
+    let filters = options?.filters;
+
+    const sessionFilter: MetadataFilter = {
+      key: sessionFilterKey,
+      value: this.id,
+      operator: "==",
+    };
+
+    if (filters) {
+      // Only add session_id filter if it doesn't exist in the filters list
+      const sessionIdFilterExists = filters.filters.some(
+        (filter) => filter.key === sessionFilterKey,
+      );
+      if (!sessionIdFilterExists) {
+        filters.filters.push(sessionFilter);
+      }
+    } else {
+      // If no filters are provided, add the session_id filter
+      filters = {
+        filters: [sessionFilter],
+        condition: "and",
+      };
+    }
+
+    return { ...options, similarityTopK, mode, sessionFilterKey, filters };
+  }
+}
@@ -8,6 +8,10 @@ import {
  StaticMemoryBlock,
  type StaticMemoryBlockOptions,
 } from "./block/static";
+import {
+  VectorMemoryBlock,
+  type VectorMemoryBlockOptions,
+} from "./block/vector";
 import { DEFAULT_TOKEN_LIMIT, Memory, type MemoryOptions } from "./memory";
 import type { MemoryMessage } from "./types";

@@ -115,6 +119,17 @@ export function factExtractionBlock<TMessageOptions extends object = object>(
  return new FactExtractionMemoryBlock<TMessageOptions>(options);
 }

+/**
+ * create a VectorMemoryBlock
+ * @param options - Configuration options for the vector memory block
+ * @returns A new VectorMemoryBlock instance
+ */
+export function vectorBlock<TMessageOptions extends object = object>(
+  options: VectorMemoryBlockOptions,
+): VectorMemoryBlock<TMessageOptions> {
+  return new VectorMemoryBlock<TMessageOptions>(options);
+}
+
 /**
 * Creates a new Memory instance from a snapshot
 * @param snapshot The snapshot to load from
@@ -31,6 +31,13 @@ export type MemoryOptions<TMessageOptions extends object = object> = {
   * Used internally for memory restoration from snapshots.
   */
  memoryCursor?: number;
+
+  /**
+   * The default LLM to use for memory retrieval.
+   * If not provided, the default `Settings.llm` will be used.
+   * This default LLM can be overridden by the LLM passed in the `getLLM` method.
+   */
+  llm?: LLM | undefined;
 };

 export class Memory<
@@ -65,6 +72,10 @@ export class Memory<
   * The cursor for the messages that have been processed into long-term memory.
   */
  private memoryCursor: number = 0;
+  /**
+   * The default LLM to use for memory retrieval.
+   */
+  private llm: LLM | undefined;

  constructor(
    messages: MemoryMessage<TMessageOptions>[] = [],
@@ -76,6 +87,7 @@ export class Memory<
      options.shortTermTokenLimitRatio ?? DEFAULT_SHORT_TERM_TOKEN_LIMIT_RATIO;
    this.memoryBlocks = options.memoryBlocks ?? [];
    this.memoryCursor = options.memoryCursor ?? 0;
+    this.initLLM(options.llm);

    this.adapters = {
      ...options.customAdapters,
@@ -84,6 +96,15 @@ export class Memory<
    } as TAdapters & BuiltinAdapters<TMessageOptions>;
  }

+  private initLLM(llm: LLM | undefined) {
+    // safe initialize LLM without throwing error if Settings.llm hasn't been set yet
+    try {
+      this.llm = llm ?? Settings.llm;
+    } catch (error) {
+      this.llm = undefined;
+    }
+  }
+
  /**
   * Add a message to the memory
   * @param message - The message to add to the memory
@@ -160,12 +181,13 @@ export class Memory<
  /**
   * Get the messages from the memory, optionally including transient messages.
   * only return messages that are within context window of the LLM
-   * @param llm - To fit the result messages to the context window of the LLM. If not provided, the default token limit will be used.
+   * @param llm - To fit the result messages to the context window of the LLM  (fallback to default llm if not provided).
+   * If llm is not specified in both the constructor and the method, the default token limit will be used.
   * @param transientMessages - Optional transient messages to include.
   * @returns The messages from the memory, optionally including transient messages.
   */
  async getLLM(
-    llm?: LLM,
+    llm: LLM | undefined = this.llm,
    transientMessages?: ChatMessage<TMessageOptions>[],
  ): Promise<ChatMessage[]> {
    // Priority of result messages:
@@ -176,11 +198,20 @@ export class Memory<
      ? Math.ceil(contextWindow * DEFAULT_TOKEN_LIMIT_RATIO)
      : this.tokenLimit;

+    let blockInputMessages = this.messages;
+    if (transientMessages && transientMessages.length > 0) {
+      blockInputMessages = [
+        ...this.messages,
+        ...transientMessages.map((m) => this.adapters.llamaindex.toMemory(m)),
+      ];
+    }
+
    // Start with fixed block messages (priority=0)
    // as it must always be included in the retrieval result
    const messages = await this.getMemoryBlockMessages(
      this.memoryBlocks.filter((block) => block.priority === 0),
      tokenLimit,
+      blockInputMessages,
    );
    // remaining token limit for short-term and memory blocks content
    const remainingTokenLimit =
@@ -207,6 +238,7 @@ export class Memory<
    const longTermBlockMessages = await this.getMemoryBlockMessages(
      longTermBlocks,
      memoryBlocksTokenLimit,
+      blockInputMessages,
    );
    messages.push(...longTermBlockMessages);

@@ -252,6 +284,7 @@ export class Memory<
  private async getMemoryBlockMessages(
    blocks: BaseMemoryBlock<TMessageOptions>[],
    tokenLimit?: number,
+    messages?: MemoryMessage<TMessageOptions>[],
  ): Promise<ChatMessage<TMessageOptions>[]> {
    if (blocks.length === 0) {
      return [];
@@ -265,7 +298,7 @@ export class Memory<
    let addedTokenCount = 0;
    for (const block of sortedBlocks) {
      try {
-        const content = await block.get();
+        const content = await block.get(messages);
        for (const message of content) {
          const chatMessage = this.adapters.llamaindex.fromMemory(message);
          const messageTokenCount = this.countMessagesToken([chatMessage]);
@@ -56,10 +56,45 @@ export function prettifyError(error: unknown): string {
  }
 }

+/**
+ * Returns a stringfied JSON with double quotes removed.
+ *
+ * @param value - The JSON value to stringify
+ * @returns The stringified JSON with no double quotes
+ */
 export function stringifyJSONToMessageContent(value: JSONValue): string {
  return JSON.stringify(value, null, 2).replace(/"([^"]*)"/g, "$1");
 }

+export function assertIsJSONValue(value: unknown): asserts value is JSONValue {
+  if (
+    typeof value === "string" ||
+    typeof value === "number" ||
+    typeof value === "boolean"
+  ) {
+    return;
+  }
+
+  if (Array.isArray(value)) {
+    for (const item of value) {
+      assertIsJSONValue(item);
+    }
+    return;
+  }
+
+  if (typeof value === "object" && value !== null) {
+    for (const [key, val] of Object.entries(value)) {
+      if (typeof key !== "string") {
+        throw new Error(`Invalid object key: ${key}`);
+      }
+      assertIsJSONValue(val);
+    }
+    return;
+  }
+
+  throw new Error(`Value is not a valid JSONValue: ${String(value)}`);
+}
+
 export {
  extractDataUrlComponents,
  extractImage,
@@ -101,7 +101,9 @@ export type VectorStoreByType = {
 };

 export type VectorStoreBaseParams = {
+  // @deprecated: use embedModel instead
  embeddingModel?: BaseEmbedding | undefined;
+  embedModel?: BaseEmbedding | undefined;
 };

 export abstract class BaseVectorStore<Client = unknown, T = unknown> {
@@ -117,7 +119,8 @@ export abstract class BaseVectorStore<Client = unknown, T = unknown> {
  ): Promise<VectorStoreQueryResult>;

  protected constructor(params?: VectorStoreBaseParams) {
-    this.embedModel = params?.embeddingModel ?? Settings.embedModel;
+    this.embedModel =
+      params?.embedModel ?? params?.embeddingModel ?? Settings.embedModel;
  }
 }

@@ -1,5 +1,17 @@
 # @llamaindex/experimental

+## 0.0.198
+
+### Patch Changes
+
+- llamaindex@0.11.21
+
+## 0.0.197
+
+### Patch Changes
+
+- llamaindex@0.11.20
+
 ## 0.0.196

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/experimental",
  "description": "Experimental package for LlamaIndexTS",
-  "version": "0.0.196",
+  "version": "0.0.198",
  "type": "module",
  "types": "dist/type/index.d.ts",
  "main": "dist/cjs/index.js",
@@ -1,5 +1,26 @@
 # llamaindex

+## 0.11.21
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+  - @llamaindex/cloud@4.0.26
+  - @llamaindex/node-parser@2.0.17
+  - @llamaindex/workflow@1.1.17
+
+## 0.11.20
+
+### Patch Changes
+
+- Updated dependencies [a8ec08c]
+- Updated dependencies [2967d57]
+  - @llamaindex/core@0.6.16
+  - @llamaindex/workflow@1.1.16
+  - @llamaindex/cloud@4.0.25
+  - @llamaindex/node-parser@2.0.16
+
 ## 0.11.19

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "llamaindex",
-  "version": "0.11.19",
+  "version": "0.11.21",
  "license": "MIT",
  "type": "module",
  "keywords": [
@@ -272,7 +272,7 @@ export class SimpleVectorStore extends BaseVectorStore {

  static async fromPersistPath(
    persistPath: string,
-    embeddingModel?: BaseEmbedding,
+    embedModel?: BaseEmbedding,
  ): Promise<SimpleVectorStore> {
    const dirPath = path.dirname(persistPath);
    if (!(await exists(dirPath))) {
@@ -300,20 +300,20 @@ export class SimpleVectorStore extends BaseVectorStore {
    data.textIdToRefDocId = dataDict.textIdToRefDocId ?? {};
    // @ts-expect-error TS2322
    data.metadataDict = dataDict.metadataDict ?? {};
-    const store = new SimpleVectorStore({ data, embeddingModel });
+    const store = new SimpleVectorStore({ data, embedModel });
    store.persistPath = persistPath;
    return store;
  }

  static fromDict(
    saveDict: SimpleVectorStoreData,
-    embeddingModel?: BaseEmbedding,
+    embedModel?: BaseEmbedding,
  ): SimpleVectorStore {
    const data = new SimpleVectorStoreData();
    data.embeddingDict = saveDict.embeddingDict;
    data.textIdToRefDocId = saveDict.textIdToRefDocId;
    data.metadataDict = saveDict.metadataDict;
-    return new SimpleVectorStore({ data, embeddingModel });
+    return new SimpleVectorStore({ data, embedModel });
  }

  toDict(): SimpleVectorStoreData {
@@ -1,5 +1,24 @@
 # @llamaindex/core-test

+## 0.1.13
+
+### Patch Changes
+
+- @llamaindex/openai@0.4.12
+
+## 0.1.12
+
+### Patch Changes
+
+- @llamaindex/openai@0.4.11
+
+## 0.1.11
+
+### Patch Changes
+
+- Updated dependencies [856dd8c]
+  - @llamaindex/openai@0.4.10
+
 ## 0.1.10

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/llamaindex-test",
  "private": true,
-  "version": "0.1.10",
+  "version": "0.1.13",
  "type": "module",
  "scripts": {
    "test": "vitest run"
@@ -59,7 +59,7 @@ describe("SimpleVectorStore", () => {
      }),
    ];
    store = new SimpleVectorStore({
-      embeddingModel: {} as BaseEmbedding, // Mocking the embedModel
+      embedModel: {} as BaseEmbedding, // Mocking the embedModel
      data: {
        embeddingDict: {},
        textIdToRefDocId: {},
@@ -1,5 +1,19 @@
 # @llamaindex/node-parser

+## 2.0.17
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+
+## 2.0.16
+
+### Patch Changes
+
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+
 ## 2.0.15

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/node-parser",
-  "version": "2.0.15",
+  "version": "2.0.17",
  "description": "Node parser for LlamaIndex",
  "type": "module",
  "exports": {
@@ -1,5 +1,19 @@
 # @llamaindex/anthropic

+## 0.3.19
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+
+## 0.3.18
+
+### Patch Changes
+
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+
 ## 0.3.17

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/anthropic",
  "description": "Anthropic Adapter for LlamaIndex",
-  "version": "0.3.17",
+  "version": "0.3.19",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -1,5 +1,19 @@
 # @llamaindex/assemblyai

+## 0.1.16
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+
+## 0.1.15
+
+### Patch Changes
+
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+
 ## 0.1.14

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/assemblyai",
  "description": "AssemblyAI Reader for LlamaIndex",
-  "version": "0.1.14",
+  "version": "0.1.16",
  "type": "module",
  "types": "dist/index.d.ts",
  "main": "dist/index.cjs",
@@ -1,5 +1,20 @@
 # @llamaindex/community

+## 0.0.112
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+
+## 0.0.111
+
+### Patch Changes
+
+- 678b327: feat: added apac bedrock models
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+
 ## 0.0.110

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/aws",
  "description": "AWS package for LlamaIndexTS",
-  "version": "0.0.110",
+  "version": "0.0.112",
  "type": "module",
  "types": "dist/type/index.d.ts",
  "main": "dist/cjs/index.js",
@@ -134,6 +134,19 @@ export const INFERENCE_BEDROCK_MODELS = {
  EU_AMAZON_NOVA_PRO_1: "eu.amazon.nova-pro-v1:0",
  EU_AMAZON_NOVA_LITE_1: "eu.amazon.nova-lite-v1:0",
  EU_AMAZON_NOVA_MICRO_1: "eu.amazon.nova-micro-v1:0",
+
+  APAC_ANTHROPIC_CLAUDE_3_5_SONNET:
+    "apac.anthropic.claude-3-5-sonnet-20240620-v1:0",
+  APAC_ANTHROPIC_CLAUDE_3_5_SONNET_V2:
+    "apac.anthropic.claude-3-5-sonnet-20241022-v2:0",
+  APAC_ANTHROPIC_CLAUDE_3_7_SONNET:
+    "apac.anthropic.claude-3-7-sonnet-20250219-v1:0",
+  APAC_ANTHROPIC_CLAUDE_3_HAIKU: "apac.anthropic.claude-3-haiku-20240307-v1:0",
+  APAC_ANTHROPIC_CLAUDE_3_SONNET:
+    "apac.anthropic.claude-3-sonnet-20240229-v1:0",
+  APAC_AMAZON_NOVA_PRO_1: "apac.amazon.nova-pro-v1:0",
+  APAC_AMAZON_NOVA_LITE_1: "apac.amazon.nova-lite-v1:0",
+  APAC_AMAZON_NOVA_MICRO_1: "apac.amazon.nova-micro-v1:0",
 };

 export type INFERENCE_BEDROCK_MODELS =
@@ -206,6 +219,24 @@ export const INFERENCE_TO_BEDROCK_MAP: Record<
    BEDROCK_MODELS.AMAZON_NOVA_LITE_1,
  [INFERENCE_BEDROCK_MODELS.EU_AMAZON_NOVA_MICRO_1]:
    BEDROCK_MODELS.AMAZON_NOVA_MICRO_1,
+
+  [INFERENCE_BEDROCK_MODELS.APAC_ANTHROPIC_CLAUDE_3_5_SONNET]:
+    BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_5_SONNET,
+  [INFERENCE_BEDROCK_MODELS.APAC_ANTHROPIC_CLAUDE_3_5_SONNET_V2]:
+    BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_5_SONNET_V2,
+  [INFERENCE_BEDROCK_MODELS.APAC_ANTHROPIC_CLAUDE_3_7_SONNET]:
+    BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_7_SONNET,
+  [INFERENCE_BEDROCK_MODELS.APAC_ANTHROPIC_CLAUDE_3_HAIKU]:
+    BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_HAIKU,
+  [INFERENCE_BEDROCK_MODELS.APAC_ANTHROPIC_CLAUDE_3_SONNET]:
+    BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_SONNET,
+
+  [INFERENCE_BEDROCK_MODELS.APAC_AMAZON_NOVA_PRO_1]:
+    BEDROCK_MODELS.AMAZON_NOVA_PRO_1,
+  [INFERENCE_BEDROCK_MODELS.APAC_AMAZON_NOVA_LITE_1]:
+    BEDROCK_MODELS.AMAZON_NOVA_LITE_1,
+  [INFERENCE_BEDROCK_MODELS.APAC_AMAZON_NOVA_MICRO_1]:
+    BEDROCK_MODELS.AMAZON_NOVA_MICRO_1,
 };

 /*
@@ -1,5 +1,28 @@
 # @llamaindex/clip

+## 0.0.68
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+  - @llamaindex/openai@0.4.12
+
+## 0.0.67
+
+### Patch Changes
+
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+  - @llamaindex/openai@0.4.11
+
+## 0.0.66
+
+### Patch Changes
+
+- Updated dependencies [856dd8c]
+  - @llamaindex/openai@0.4.10
+
 ## 0.0.65

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/clip",
  "description": "Clip Embedding Adapter for LlamaIndex",
-  "version": "0.0.65",
+  "version": "0.0.68",
  "type": "module",
  "types": "dist/index.d.ts",
  "main": "dist/index.cjs",
@@ -1,5 +1,19 @@
 # @llamaindex/cohere

+## 0.0.31
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+
+## 0.0.30
+
+### Patch Changes
+
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+
 ## 0.0.29

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/cohere",
  "description": "Cohere Adapter for LlamaIndex",
-  "version": "0.0.29",
+  "version": "0.0.31",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -1,5 +1,28 @@
 # @llamaindex/deepinfra

+## 0.0.68
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+  - @llamaindex/openai@0.4.12
+
+## 0.0.67
+
+### Patch Changes
+
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+  - @llamaindex/openai@0.4.11
+
+## 0.0.66
+
+### Patch Changes
+
+- Updated dependencies [856dd8c]
+  - @llamaindex/openai@0.4.10
+
 ## 0.0.65

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/deepinfra",
  "description": "Deepinfra Adapter for LlamaIndex",
-  "version": "0.0.65",
+  "version": "0.0.68",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -1,5 +1,24 @@
 # @llamaindex/deepseek

+## 0.0.29
+
+### Patch Changes
+
+- @llamaindex/openai@0.4.12
+
+## 0.0.28
+
+### Patch Changes
+
+- @llamaindex/openai@0.4.11
+
+## 0.0.27
+
+### Patch Changes
+
+- Updated dependencies [856dd8c]
+  - @llamaindex/openai@0.4.10
+
 ## 0.0.26

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/deepseek",
  "description": "DeepSeek Adapter for LlamaIndex",
-  "version": "0.0.26",
+  "version": "0.0.29",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -1,5 +1,19 @@
 # @llamaindex/discord

+## 0.1.16
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+
+## 0.1.15
+
+### Patch Changes
+
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+
 ## 0.1.14

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/discord",
  "description": "Discord Reader for LlamaIndex",
-  "version": "0.1.14",
+  "version": "0.1.16",
  "type": "module",
  "types": "dist/index.d.ts",
  "main": "dist/index.cjs",
@@ -1,5 +1,19 @@
 # @llamaindex/excel

+## 0.1.17
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+
+## 0.1.16
+
+### Patch Changes
+
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+
 ## 0.1.15

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/excel",
  "description": "Excel Reader for LlamaIndex",
-  "version": "0.1.15",
+  "version": "0.1.17",
  "type": "module",
  "types": "dist/index.d.ts",
  "main": "dist/index.cjs",
@@ -1,5 +1,24 @@
 # @llamaindex/fireworks

+## 0.0.28
+
+### Patch Changes
+
+- @llamaindex/openai@0.4.12
+
+## 0.0.27
+
+### Patch Changes
+
+- @llamaindex/openai@0.4.11
+
+## 0.0.26
+
+### Patch Changes
+
+- Updated dependencies [856dd8c]
+  - @llamaindex/openai@0.4.10
+
 ## 0.0.25

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/fireworks",
  "description": "Fireworks Adapter for LlamaIndex",
-  "version": "0.0.25",
+  "version": "0.0.28",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -1,5 +1,20 @@
 # @llamaindex/google

+## 0.3.16
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+
+## 0.3.15
+
+### Patch Changes
+
+- 650eeb1: fix: GeminiEmbedding should send batches of max 100
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+
 ## 0.3.14

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/google",
  "description": "Google Adapter for LlamaIndex",
-  "version": "0.3.14",
+  "version": "0.3.16",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -0,0 +1,248 @@
+import { beforeEach, describe, expect, test, vi } from "vitest";
+import {
+  DEFAULT_EMBED_BATCH_SIZE,
+  GEMINI_EMBEDDING_MODEL,
+  GeminiEmbedding,
+} from "./GeminiEmbedding";
+
+// Mock the Google GenAI module
+const mockEmbedContent = vi.fn();
+vi.mock("@google/genai", () => ({
+  GoogleGenAI: vi.fn().mockImplementation(() => ({
+    models: {
+      embedContent: mockEmbedContent,
+    },
+  })),
+}));
+
+describe("GeminiEmbedding", () => {
+  let geminiEmbedding: GeminiEmbedding;
+  // Move capturedBatches to outer scope so all tests can access it
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  let capturedBatches: any[];
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    geminiEmbedding = new GeminiEmbedding({
+      model: GEMINI_EMBEDDING_MODEL.EMBEDDING_001,
+      apiKey: "test-api-key",
+    });
+    // Default mock for other tests
+    mockEmbedContent.mockResolvedValue({
+      embeddings: [
+        { values: [0.1, 0.2, 0.3] },
+        { values: [0.4, 0.5, 0.6] },
+        { values: [0.7, 0.8, 0.9] },
+      ],
+    });
+  });
+
+  describe("getTextEmbeddingsBatch", () => {
+    beforeEach(() => {
+      // Reset and set up capturedBatches and the mock implementation for all tests in this suite
+      capturedBatches = [];
+      mockEmbedContent.mockImplementation((args) => {
+        capturedBatches.push({
+          ...args,
+          contents: Array.isArray(args.contents)
+            ? [...args.contents]
+            : args.contents,
+        });
+        return Promise.resolve({
+          embeddings: Array.from(
+            { length: Array.isArray(args.contents) ? args.contents.length : 1 },
+            (_, i) => ({
+              values: [i * 0.1, i * 0.2, i * 0.3],
+            }),
+          ),
+        });
+      });
+    });
+
+    test("should respect batch size limit of 10 for texts longer than 10", async () => {
+      // Create a list of 2.5x the batch size texts, to exceed the batch size
+      const texts = Array.from(
+        { length: DEFAULT_EMBED_BATCH_SIZE * 2.5 },
+        (_, i) => `text ${i + 1}`,
+      );
+
+      await geminiEmbedding.getTextEmbeddingsBatch(texts);
+
+      // Verify that embedContent was called exactly 3 times (ceil(250/100) = 3)
+      expect(mockEmbedContent).toHaveBeenCalledTimes(3);
+      // Verify that each call had no more than 100 texts
+      const calls = mockEmbedContent.mock.calls;
+
+      // First batch should have DEFAULT_EMBED_BATCH_SIZE texts
+      expect(capturedBatches[0].contents).toHaveLength(
+        DEFAULT_EMBED_BATCH_SIZE,
+      );
+      expect(capturedBatches[0].contents).toEqual(
+        texts.slice(0 * DEFAULT_EMBED_BATCH_SIZE, 1 * DEFAULT_EMBED_BATCH_SIZE),
+      );
+
+      // Second batch should have DEFAULT_EMBED_BATCH_SIZE texts
+      expect(capturedBatches[1].contents).toHaveLength(
+        DEFAULT_EMBED_BATCH_SIZE,
+      );
+      expect(capturedBatches[1].contents).toEqual(
+        texts.slice(1 * DEFAULT_EMBED_BATCH_SIZE, 2 * DEFAULT_EMBED_BATCH_SIZE),
+      );
+
+      // Third batch should have 0.5 * DEFAULT_EMBED_BATCH_SIZE texts (remaining)
+      expect(capturedBatches[2].contents).toHaveLength(
+        DEFAULT_EMBED_BATCH_SIZE * 0.5,
+      );
+      expect(capturedBatches[2].contents).toEqual(
+        texts.slice(
+          2 * DEFAULT_EMBED_BATCH_SIZE,
+          2.5 * DEFAULT_EMBED_BATCH_SIZE,
+        ),
+      );
+    });
+
+    test("should handle exactly DEFAULT_EMBED_BATCH_SIZE texts in a single batch", async () => {
+      const texts = Array.from(
+        { length: DEFAULT_EMBED_BATCH_SIZE },
+        (_, i) => `text ${i + 1}`,
+      );
+
+      await geminiEmbedding.getTextEmbeddingsBatch(texts);
+
+      // Should be called exactly once
+      expect(mockEmbedContent).toHaveBeenCalledTimes(1);
+      // // Should contain all 100 texts
+      expect(capturedBatches[0]?.contents).toHaveLength(
+        DEFAULT_EMBED_BATCH_SIZE,
+      );
+      expect(capturedBatches[0]?.contents).toEqual(texts);
+    });
+
+    test("should handle texts shorter than batch size", async () => {
+      const short_batch_length = 5; // Less than DEFAULT_EMBED_BATCH_SIZE
+      const texts = Array.from(
+        { length: short_batch_length },
+        (_, i) => `text ${i + 1}`,
+      );
+
+      await geminiEmbedding.getTextEmbeddingsBatch(texts);
+
+      // Should be called exactly once
+      expect(mockEmbedContent).toHaveBeenCalledTimes(1);
+
+      // Should contain all 5 texts
+      expect(capturedBatches[0].contents).toHaveLength(short_batch_length);
+      expect(capturedBatches[0].contents).toEqual(texts);
+    });
+
+    test("should handle large batches correctly (100 texts)", async () => {
+      const n_batches = 10;
+      const texts = Array.from(
+        { length: DEFAULT_EMBED_BATCH_SIZE * n_batches },
+        (_, i) => `text ${i + 1}`,
+      );
+
+      await geminiEmbedding.getTextEmbeddingsBatch(texts);
+
+      // Should be called exactly 10 times
+      expect(mockEmbedContent).toHaveBeenCalledTimes(n_batches);
+
+      // Verify each batch has exactly DEFAULT_EMBED_BATCH_SIZE texts
+      for (let i = 0; i < n_batches; i++) {
+        expect(capturedBatches[i].contents).toHaveLength(
+          DEFAULT_EMBED_BATCH_SIZE,
+        );
+        expect(capturedBatches[i].contents).toEqual(
+          texts.slice(
+            i * DEFAULT_EMBED_BATCH_SIZE,
+            (i + 1) * DEFAULT_EMBED_BATCH_SIZE,
+          ),
+        );
+      }
+    });
+
+    test("should return correct embeddings for all texts", async () => {
+      const texts = ["text1", "text2", "text3"];
+
+      mockEmbedContent.mockResolvedValueOnce({
+        embeddings: [
+          { values: [0.1, 0.2, 0.3] },
+          { values: [0.4, 0.5, 0.6] },
+          { values: [0.7, 0.8, 0.9] },
+        ],
+      });
+
+      const result = await geminiEmbedding.getTextEmbeddingsBatch(texts);
+
+      expect(result).toEqual([
+        [0.1, 0.2, 0.3],
+        [0.4, 0.5, 0.6],
+        [0.7, 0.8, 0.9],
+      ]);
+    });
+
+    test("should handle empty embeddings gracefully", async () => {
+      const texts = ["text1", "text2"];
+
+      mockEmbedContent.mockResolvedValueOnce({
+        embeddings: [{ values: undefined }, { values: [0.1, 0.2, 0.3] }],
+      });
+
+      const result = await geminiEmbedding.getTextEmbeddingsBatch(texts);
+
+      expect(result).toEqual([[], [0.1, 0.2, 0.3]]);
+    });
+
+    test("should handle missing embeddings array", async () => {
+      const texts = ["text1"];
+
+      mockEmbedContent.mockResolvedValueOnce({
+        embeddings: undefined,
+      });
+
+      const result = await geminiEmbedding.getTextEmbeddingsBatch(texts);
+
+      expect(result).toEqual([]);
+    });
+  });
+
+  describe("getTextEmbedding", () => {
+    test("should call embedContent with single text", async () => {
+      const text = "single text";
+
+      mockEmbedContent.mockResolvedValueOnce({
+        embeddings: [{ values: [0.1, 0.2, 0.3] }],
+      });
+
+      const result = await geminiEmbedding.getTextEmbedding(text);
+
+      expect(mockEmbedContent).toHaveBeenCalledTimes(1);
+      expect(mockEmbedContent).toHaveBeenCalledWith({
+        model: GEMINI_EMBEDDING_MODEL.EMBEDDING_001,
+        contents: text,
+      });
+      expect(result).toEqual([0.1, 0.2, 0.3]);
+    });
+  });
+
+  describe("constructor", () => {
+    test("should set default model and batch size", () => {
+      const embedding = new GeminiEmbedding({ apiKey: "test-key" });
+
+      expect(embedding.model).toBe(GEMINI_EMBEDDING_MODEL.EMBEDDING_001);
+      expect(embedding.embedBatchSize).toBe(DEFAULT_EMBED_BATCH_SIZE);
+    });
+
+    test("should use provided model", () => {
+      const new_batch_size = 50;
+      const embedding = new GeminiEmbedding({
+        model: GEMINI_EMBEDDING_MODEL.TEXT_EMBEDDING_004,
+        apiKey: "test-key",
+        embedBatchSize: new_batch_size,
+      });
+
+      expect(embedding.model).toBe(GEMINI_EMBEDDING_MODEL.TEXT_EMBEDDING_004);
+      expect(embedding.embedBatchSize).toBe(new_batch_size);
+    });
+  });
+});
@@ -1,5 +1,9 @@
 import { GoogleGenAI, type GoogleGenAIOptions } from "@google/genai";
-import { BaseEmbedding } from "@llamaindex/core/embeddings";
+import {
+  BaseEmbedding,
+  batchEmbeddings,
+  type BaseEmbeddingOptions,
+} from "@llamaindex/core/embeddings";
 import { getEnv } from "@llamaindex/env";

 export enum GEMINI_EMBEDDING_MODEL {
@@ -7,11 +11,15 @@ export enum GEMINI_EMBEDDING_MODEL {
  TEXT_EMBEDDING_004 = "text-embedding-004",
 }

+// 100 is max batch size, see https://github.com/run-llama/LlamaIndexTS/pull/2099
+export const DEFAULT_EMBED_BATCH_SIZE = 100;
+
 /**
 * Configuration options for GeminiEmbedding.
 */
 export type GeminiEmbeddingOptions = {
  model?: GEMINI_EMBEDDING_MODEL;
+  embedBatchSize?: number;
 } & GoogleGenAIOptions;

 /**
@@ -20,6 +28,7 @@ export type GeminiEmbeddingOptions = {
 export class GeminiEmbedding extends BaseEmbedding {
  model: GEMINI_EMBEDDING_MODEL;
  ai: GoogleGenAI;
+  embedBatchSize: number = DEFAULT_EMBED_BATCH_SIZE;

  constructor(opts?: GeminiEmbeddingOptions) {
    super();
@@ -31,15 +40,27 @@ export class GeminiEmbedding extends BaseEmbedding {

    this.ai = new GoogleGenAI({ ...opts, apiKey });
    this.model = opts?.model ?? GEMINI_EMBEDDING_MODEL.EMBEDDING_001;
+    this.embedBatchSize = opts?.embedBatchSize ?? DEFAULT_EMBED_BATCH_SIZE;
  }

-  async getTextEmbeddingsBatch(texts: string[]): Promise<number[][]> {
+  getTextEmbeddings = async (texts: string[]) => {
    const result = await this.ai.models.embedContent({
      model: this.model,
      contents: texts,
    });
-
    return result.embeddings?.map((embedding) => embedding.values ?? []) ?? [];
+  };
+
+  async getTextEmbeddingsBatch(
+    texts: string[],
+    options?: BaseEmbeddingOptions,
+  ): Promise<Array<number[]>> {
+    return await batchEmbeddings(
+      texts,
+      this.getTextEmbeddings.bind(this),
+      this.embedBatchSize,
+      options,
+    );
  }

  async getTextEmbedding(text: string): Promise<number[]> {
@@ -1,5 +1,24 @@
 # @llamaindex/groq

+## 0.0.84
+
+### Patch Changes
+
+- @llamaindex/openai@0.4.12
+
+## 0.0.83
+
+### Patch Changes
+
+- @llamaindex/openai@0.4.11
+
+## 0.0.82
+
+### Patch Changes
+
+- Updated dependencies [856dd8c]
+  - @llamaindex/openai@0.4.10
+
 ## 0.0.81

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/groq",
  "description": "Groq Adapter for LlamaIndex",
-  "version": "0.0.81",
+  "version": "0.0.84",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -1,5 +1,28 @@
 # @llamaindex/huggingface

+## 0.1.22
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+  - @llamaindex/openai@0.4.12
+
+## 0.1.21
+
+### Patch Changes
+
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+  - @llamaindex/openai@0.4.11
+
+## 0.1.20
+
+### Patch Changes
+
+- Updated dependencies [856dd8c]
+  - @llamaindex/openai@0.4.10
+
 ## 0.1.19

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/huggingface",
  "description": "Huggingface Adapter for LlamaIndex",
-  "version": "0.1.19",
+  "version": "0.1.22",
  "type": "module",
  "types": "dist/index.d.ts",
  "main": "dist/index.cjs",
@@ -1,5 +1,28 @@
 # @llamaindex/jinaai

+## 0.0.28
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+  - @llamaindex/openai@0.4.12
+
+## 0.0.27
+
+### Patch Changes
+
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+  - @llamaindex/openai@0.4.11
+
+## 0.0.26
+
+### Patch Changes
+
+- Updated dependencies [856dd8c]
+  - @llamaindex/openai@0.4.10
+
 ## 0.0.25

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/jinaai",
  "description": "JinaAI Adapter for LlamaIndex",
-  "version": "0.0.25",
+  "version": "0.0.28",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -1,5 +1,19 @@
 # @llamaindex/mistral

+## 0.1.17
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+
+## 0.1.16
+
+### Patch Changes
+
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+
 ## 0.1.15

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/mistral",
  "description": "Mistral Adapter for LlamaIndex",
-  "version": "0.1.15",
+  "version": "0.1.17",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -1,5 +1,19 @@
 # @llamaindex/mixedbread

+## 0.0.31
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+
+## 0.0.30
+
+### Patch Changes
+
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+
 ## 0.0.29

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/mixedbread",
  "description": "Mixedbread Adapter for LlamaIndex",
-  "version": "0.0.29",
+  "version": "0.0.31",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -1,5 +1,19 @@
 # @llamaindex/notion

+## 0.1.16
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+
+## 0.1.15
+
+### Patch Changes
+
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+
 ## 0.1.14

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/notion",
  "description": "Notion Reader for LlamaIndex",
-  "version": "0.1.14",
+  "version": "0.1.16",
  "type": "module",
  "types": "dist/index.d.ts",
  "main": "dist/index.cjs",
@@ -1,5 +1,19 @@
 # @llamaindex/ollama

+## 0.1.17
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+
+## 0.1.16
+
+### Patch Changes
+
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+
 ## 0.1.15

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/ollama",
  "description": "Ollama Adapter for LlamaIndex",
-  "version": "0.1.15",
+  "version": "0.1.17",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -1,5 +1,25 @@
 # @llamaindex/openai

+## 0.4.12
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+
+## 0.4.11
+
+### Patch Changes
+
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+
+## 0.4.10
+
+### Patch Changes
+
+- 856dd8c: fix: assume new models are function call models
+
 ## 0.4.9

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/openai",
  "description": "OpenAI Adapter for LlamaIndex",
-  "version": "0.4.9",
+  "version": "0.4.12",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -383,8 +383,8 @@ export class OpenAI extends ToolCallLLM<OpenAIAdditionalChatOptions> {
      // skip parts that don't have any content
      if (
        !(
-          choice.delta.content ||
-          choice.delta.tool_calls ||
+          choice.delta?.content ||
+          choice.delta?.tool_calls ||
          choice.finish_reason
        )
      )
@@ -149,10 +149,9 @@ export function isFunctionCallingModel(llm: LLM): llm is OpenAI {
  } else {
    return false;
  }
-  const isChatModel = Object.keys(ALL_AVAILABLE_OPENAI_MODELS).includes(model);
  const isOld = model.includes("0314") || model.includes("0301");
  const isO1 = model.startsWith("o1");
-  return isChatModel && !isOld && !isO1;
+  return !isOld && !isO1;
 }

 export function isReasoningModel(model: ChatModel | string): boolean {
@@ -1,5 +1,9 @@
-import { ChatMessage, ToolCallLLMMessageOptions } from "@llamaindex/core/llms";
-import { describe, expect, it } from "vitest";
+import {
+  ChatMessage,
+  ChatResponseChunk,
+  ToolCallLLMMessageOptions,
+} from "@llamaindex/core/llms";
+import { describe, expect, it, vi } from "vitest";
 import { z } from "zod";
 import { OpenAI } from "../src/llm";

@@ -231,3 +235,52 @@ describe("OpenAI Static Methods", () => {
    });
  });
 });
+
+describe("OpenAI streamChat", () => {
+  it("should handle choice with empty delta and finish_reason stop", async () => {
+    // Create a mock OpenAI instance
+    const mockStream = async function* () {
+      yield {
+        choices: [
+          {
+            delta: {},
+            finish_reason: "stop",
+            index: 0,
+            logprobs: null,
+          },
+        ],
+      };
+    };
+
+    // Mock the OpenAI session and chat completions
+    const mockSession = {
+      chat: {
+        completions: {
+          create: vi.fn().mockResolvedValue(mockStream()),
+        },
+      },
+    };
+
+    const openai = new OpenAI({
+      model: "gpt-4o-mini",
+      apiKey: "test-key",
+      // @ts-expect-error: mockSession is a mock object for testing purposes
+      session: mockSession,
+    });
+
+    // @ts-expect-error accessing protected method
+    const stream = openai.streamChat({
+      messages: [{ role: "user" as const, content: "Hello" }],
+      stream: true,
+    });
+
+    const chunks: ChatResponseChunk[] = [];
+    for await (const chunk of stream) {
+      chunks.push(chunk);
+    }
+
+    expect(chunks).toHaveLength(1);
+    expect(chunks[0].options).toEqual({});
+    expect(chunks[0].delta).toBe("");
+  });
+});
@@ -1,5 +1,28 @@
 # @llamaindex/perplexity

+## 0.0.25
+
+### Patch Changes
+
+- Updated dependencies [38da40b]
+  - @llamaindex/core@0.6.17
+  - @llamaindex/openai@0.4.12
+
+## 0.0.24
+
+### Patch Changes
+
+- Updated dependencies [a8ec08c]
+  - @llamaindex/core@0.6.16
+  - @llamaindex/openai@0.4.11
+
+## 0.0.23
+
+### Patch Changes
+
+- Updated dependencies [856dd8c]
+  - @llamaindex/openai@0.4.10
+
 ## 0.0.22

 ### Patch Changes
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
github-actions[bot]	e4c7113614	Release 0.11.21 (#2128 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>	2025-07-22 12:23:58 +08:00
Thuc Pham	38da40bc98	feat: VectoryMemoryBlock (#2110 ) Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>	2025-07-22 12:18:09 +08:00
Marcus Schiesser	4d50ca4d84	chore: add streamchat test (#2122 )	2025-07-22 11:30:01 +08:00
github-actions[bot]	8b5253a297	Release (#2127 )	2025-07-21 15:40:31 -06:00
Logan	ea15e75c89	deployment docs nits (#2126 )	2025-07-21 15:30:37 -06:00
github-actions[bot]	3be87d4670	Release 0.11.20 (#2121 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: himself65 <14026360+himself65@users.noreply.github.com>	2025-07-21 09:37:44 -07:00
Terence Sim	94da13db0d	fix: azure openai streamchat empty delta throw TypeError (#2118 ) Co-authored-by: Terence Sim <40583743+InTheAxis@users.noreply.github.com>	2025-07-21 09:16:09 -07:00
Terence Sim	acd50ea99f	chore: replaced console.log with logger type from @llamaindex/env (#2123 ) Co-authored-by: Terence Sim <40583743+InTheAxis@users.noreply.github.com>	2025-07-21 09:14:06 -07:00
Adrian Lyjak	2967d57ac0	feat: default to _public agent data (#2117 )	2025-07-21 09:07:15 -07:00
Thuc Pham	a8ec08c682	fix: ensure correct message content in agent workflow (#2114 ) Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>	2025-07-21 15:13:27 +08:00
Terence Sim	678b327051	feat: added apac bedrock models (#2119 ) Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>	2025-07-21 12:13:37 +08:00
Jeremy B. Merrill	650eeb1df3	fix: GeminiEmbedding should send batches of max 100 (#2099 ) Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>	2025-07-21 12:12:42 +08:00
Laurie Voss	50f6747758	Instrumenting with Google Tag Manager (in addition to Google Analytics) (#2116 )	2025-07-20 13:18:09 -07:00
github-actions[bot]	12414a6836	Release (#2113 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>	2025-07-18 13:54:38 +08:00
Marcus Schiesser	856dd8cca8	fix: assume new models are function call models (#2112 )	2025-07-18 12:52:43 +08:00
Jerry Cheng	d8f4f6a859	Update SupabaseVectorStore.ts to fix score calculating error (#2109 ) Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>	2025-07-18 12:48:47 +08:00