Release 0.8.13 (#1480 )

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: himself65 <himself65@users.noreply.github.com>
docs: update docusaurus.config.js (#1489 )
2026-07-01 22:14:03 -04:00 · 2024-11-14 18:25:19 -08:00 · 2024-11-14 17:36:52 -08:00 · 2024-11-14 17:35:21 -08:00 · 2024-11-14 15:46:48 -08:00 · 2024-11-14 13:45:50 -08:00
125 changed files with 2943 additions and 605 deletions
@@ -1,12 +1,16 @@
-# LlamaIndex.TS
+<p align="center">
+  <img height="100" width="100" alt="LlamaIndex logo" src="https://ts.llamaindex.ai/square.svg" />
+</p>
+<h1 align="center">LlamaIndex.TS</h1>
+<h3 align="center">
+  Data framework for your LLM application.
+</h3>

 [![NPM Version](https://img.shields.io/npm/v/llamaindex)](https://www.npmjs.com/package/llamaindex)
 [![NPM License](https://img.shields.io/npm/l/llamaindex)](https://www.npmjs.com/package/llamaindex)
 [![NPM Downloads](https://img.shields.io/npm/dm/llamaindex)](https://www.npmjs.com/package/llamaindex)
 [![Discord](https://img.shields.io/discord/1059199217496772688)](https://discord.com/invite/eN6D2HQ4aX)

-LlamaIndex is a data framework for your LLM application.
-
 Use your own data with large language models (LLMs, OpenAI ChatGPT and others) in JS runtime environments with TypeScript support.

 Documentation: https://ts.llamaindex.ai/
@@ -1,5 +1,46 @@
 # docs

+## 0.0.118
+
+### Patch Changes
+
+- llamaindex@0.8.13
+- @llamaindex/examples@0.0.16
+
+## 0.0.117
+
+### Patch Changes
+
+- @llamaindex/examples@0.0.15
+
+## 0.0.116
+
+### Patch Changes
+
+- llamaindex@0.8.12
+
+## 0.0.115
+
+### Patch Changes
+
+- llamaindex@0.8.11
+
+## 0.0.114
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+  - @llamaindex/examples@0.0.14
+
+## 0.0.113
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+
 ## 0.0.112

 ### Patch Changes
@@ -62,6 +62,12 @@ const config = {
    ({
      // Replace with your project's social card
      image: "img/favicon.png", // TODO change this
+      announcementBar: {
+        id: "migrate_to_next",
+        content:
+          'We are migrating to Next.js based documentation. Check it out <a href="https://ts.llamaindex.ai/docs/llamaindex">here</a>!',
+        isCloseable: false,
+      },
      navbar: {
        title: "LlamaIndex.TS",
        logo: {
@@ -1,6 +1,6 @@
 {
  "name": "docs",
-  "version": "0.0.112",
+  "version": "0.0.118",
  "private": true,
  "scripts": {
    "docusaurus": "docusaurus",
@@ -1,5 +1,81 @@
 # @llamaindex/doc

+## 0.0.16
+
+### Patch Changes
+
+- Updated dependencies [a7b0ac3]
+- Updated dependencies [ee20c44]
+- Updated dependencies [c69605f]
+  - @llamaindex/core@0.4.10
+  - @llamaindex/workflow@0.0.6
+  - llamaindex@0.8.13
+  - @llamaindex/cloud@2.0.10
+  - @llamaindex/node-parser@0.0.11
+  - @llamaindex/openai@0.1.35
+  - @llamaindex/readers@1.0.11
+
+## 0.0.15
+
+### Patch Changes
+
+- Updated dependencies [ea92b69]
+- Updated dependencies [fadc8b8]
+  - @llamaindex/workflow@0.0.5
+
+## 0.0.14
+
+### Patch Changes
+
+- Updated dependencies [7ae6eaa]
+  - @llamaindex/core@0.4.9
+  - @llamaindex/openai@0.1.34
+  - @llamaindex/cloud@2.0.9
+  - llamaindex@0.8.12
+  - @llamaindex/node-parser@0.0.10
+  - @llamaindex/readers@1.0.10
+
+## 0.0.13
+
+### Patch Changes
+
+- Updated dependencies [f865c98]
+  - @llamaindex/core@0.4.8
+  - @llamaindex/cloud@2.0.8
+  - llamaindex@0.8.11
+  - @llamaindex/node-parser@0.0.9
+  - @llamaindex/openai@0.1.33
+  - @llamaindex/readers@1.0.9
+
+## 0.0.12
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+- Updated dependencies [fd8c882]
+  - llamaindex@0.8.10
+  - @llamaindex/core@0.4.7
+  - @llamaindex/workflow@0.0.4
+  - @llamaindex/cloud@2.0.7
+  - @llamaindex/node-parser@0.0.8
+  - @llamaindex/openai@0.1.32
+  - @llamaindex/readers@1.0.8
+
+## 0.0.11
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+  - @llamaindex/cloud@2.0.6
+  - @llamaindex/core@0.4.6
+  - @llamaindex/node-parser@0.0.7
+  - @llamaindex/openai@0.1.31
+  - @llamaindex/readers@1.0.7
+
 ## 0.0.10

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/doc",
-  "version": "0.0.10",
+  "version": "0.0.16",
  "private": true,
  "scripts": {
    "build": "pnpm run build:docs && next build",
@@ -1,6 +1,14 @@
 {
  "extends": ["//"],
  "tasks": {
+    "build": {
+      "outputs": [
+        ".next",
+        ".source",
+        "next-env.d.ts",
+        "src/content/docs/cloud/api/**"
+      ]
+    },
    "dev": {
      "dependsOn": ["^build"]
    }
@@ -1,5 +1,38 @@
 # @llamaindex/cloudflare-worker-agent-test

+## 0.0.109
+
+### Patch Changes
+
+- llamaindex@0.8.13
+
+## 0.0.108
+
+### Patch Changes
+
+- llamaindex@0.8.12
+
+## 0.0.107
+
+### Patch Changes
+
+- llamaindex@0.8.11
+
+## 0.0.106
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+
+## 0.0.105
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+
 ## 0.0.104

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/cloudflare-worker-agent-test",
-  "version": "0.0.104",
+  "version": "0.0.109",
  "type": "module",
  "private": true,
  "scripts": {
@@ -1,5 +1,35 @@
 # @llamaindex/llama-parse-browser-test

+## 0.0.30
+
+### Patch Changes
+
+- @llamaindex/cloud@2.0.10
+
+## 0.0.29
+
+### Patch Changes
+
+- @llamaindex/cloud@2.0.9
+
+## 0.0.28
+
+### Patch Changes
+
+- @llamaindex/cloud@2.0.8
+
+## 0.0.27
+
+### Patch Changes
+
+- @llamaindex/cloud@2.0.7
+
+## 0.0.26
+
+### Patch Changes
+
+- @llamaindex/cloud@2.0.6
+
 ## 0.0.25

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/llama-parse-browser-test",
  "private": true,
-  "version": "0.0.25",
+  "version": "0.0.30",
  "type": "module",
  "scripts": {
    "dev": "vite",
@@ -1,5 +1,38 @@
 # @llamaindex/next-agent-test

+## 0.1.109
+
+### Patch Changes
+
+- llamaindex@0.8.13
+
+## 0.1.108
+
+### Patch Changes
+
+- llamaindex@0.8.12
+
+## 0.1.107
+
+### Patch Changes
+
+- llamaindex@0.8.11
+
+## 0.1.106
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+
+## 0.1.105
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+
 ## 0.1.104

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/next-agent-test",
-  "version": "0.1.104",
+  "version": "0.1.109",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,38 @@
 # test-edge-runtime

+## 0.1.108
+
+### Patch Changes
+
+- llamaindex@0.8.13
+
+## 0.1.107
+
+### Patch Changes
+
+- llamaindex@0.8.12
+
+## 0.1.106
+
+### Patch Changes
+
+- llamaindex@0.8.11
+
+## 0.1.105
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+
+## 0.1.104
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+
 ## 0.1.103

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/nextjs-edge-runtime-test",
-  "version": "0.1.103",
+  "version": "0.1.108",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,38 @@
 # @llamaindex/next-node-runtime

+## 0.0.90
+
+### Patch Changes
+
+- llamaindex@0.8.13
+
+## 0.0.89
+
+### Patch Changes
+
+- llamaindex@0.8.12
+
+## 0.0.88
+
+### Patch Changes
+
+- llamaindex@0.8.11
+
+## 0.0.87
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+
+## 0.0.86
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+
 ## 0.0.85

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/next-node-runtime-test",
-  "version": "0.0.85",
+  "version": "0.0.90",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -15,7 +15,6 @@ Settings.llm = new OpenAI({
 });
 Settings.embedModel = new HuggingFaceEmbedding({
  modelType: "BAAI/bge-small-en-v1.5",
-  quantized: false,
 });
 Settings.callbackManager.on("llm-tool-call", (event) => {
  console.log(event.detail);
@@ -1,5 +1,38 @@
 # @llamaindex/waku-query-engine-test

+## 0.0.109
+
+### Patch Changes
+
+- llamaindex@0.8.13
+
+## 0.0.108
+
+### Patch Changes
+
+- llamaindex@0.8.12
+
+## 0.0.107
+
+### Patch Changes
+
+- llamaindex@0.8.11
+
+## 0.0.106
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+
+## 0.0.105
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+
 ## 0.0.104

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/waku-query-engine-test",
-  "version": "0.0.104",
+  "version": "0.0.109",
  "type": "module",
  "private": true,
  "scripts": {
@@ -0,0 +1,3 @@
+import { OpenAI } from "./openai.js";
+
+export class Ollama extends OpenAI {}
@@ -15,7 +15,17 @@ export async function resolve(specifier, context, nextResolve) {
  const targetUrl = fileURLToPath(result.url).replace(/\.js$/, ".ts");
  let relativePath = relative(packageDistDir, targetUrl);
  // todo: make it more generic if we have more sub modules fixtures in the future
-  if (relativePath.startsWith("../../llm/openai")) {
+  if (relativePath.startsWith("../../llm/anthropic")) {
+    relativePath = relativePath.replace(
+      "../../llm/ollama/dist/index.ts",
+      "llm/anthropic.ts",
+    );
+  } else if (relativePath.startsWith("../../llm/ollama")) {
+    relativePath = relativePath.replace(
+      "../../llm/ollama/dist/index.ts",
+      "llm/ollama.ts",
+    );
+  } else if (relativePath.startsWith("../../llm/openai")) {
    relativePath = relativePath.replace(
      "../../llm/openai/dist/index.ts",
      "llm/openai.ts",
@@ -64,7 +64,7 @@ await test("clip embedding", async (t) => {
  });

  await t.test("custom transformer", async () => {
-    const transformers = await import("@xenova/transformers");
+    const transformers = await import("@huggingface/transformers");
    const getter = test.mock.fn((t, k, r) => {
      return Reflect.get(t, k, r);
    });
@@ -0,0 +1,35 @@
+import { Ollama } from "@llamaindex/ollama";
+import assert from "node:assert";
+import { test } from "node:test";
+import { getWeatherTool } from "./fixtures/tools.js";
+import { mockLLMEvent } from "./utils.js";
+
+await test("ollama", async (t) => {
+  await mockLLMEvent(t, "ollama");
+  await t.test("ollama function call", async (t) => {
+    const llm = new Ollama({
+      model: "llama3.2",
+    });
+    const chatResponse = await llm.chat({
+      messages: [
+        {
+          role: "user",
+          content: "What is the weather in Paris?",
+        },
+      ],
+      tools: [getWeatherTool],
+    });
+    if (
+      chatResponse.message.options &&
+      "toolCall" in chatResponse.message.options
+    ) {
+      assert.equal(chatResponse.message.options.toolCall.length, 1);
+      assert.equal(
+        chatResponse.message.options.toolCall[0]!.name,
+        getWeatherTool.metadata.name,
+      );
+    } else {
+      throw new Error("Expected tool calls in response");
+    }
+  });
+});
@@ -167,6 +167,7 @@ For questions about more specific sections, please use the vector_tool.`,
  const mockCall = t.mock.fn(({ query }: { query: string }) => {
    return originalCall({ query });
  });
+  // @ts-expect-error what?
  queryEngineTools[1]!.call = mockCall;

  const toolMapping = SimpleToolNodeMapping.fromObjects(queryEngineTools);
@@ -0,0 +1,37 @@
+{
+  "llmEventStart": [
+    {
+      "id": "PRESERVE_0",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the weather in Paris?"
+        }
+      ]
+    }
+  ],
+  "llmEventEnd": [
+    {
+      "id": "PRESERVE_0",
+      "response": {
+        "message": {
+          "role": "assistant",
+          "content": "",
+          "options": {
+            "toolCall": [
+              {
+                "name": "getWeather",
+                "input": {
+                  "city": "Paris"
+                },
+                "id": "5d198775-5268-4552-993b-9ecb4425385b"
+              }
+            ]
+          }
+        },
+        "raw": null
+      }
+    }
+  ],
+  "llmEventStream": []
+}
@@ -12,10 +12,11 @@
    "@faker-js/faker": "^9.2.0",
    "@llamaindex/core": "workspace:*",
    "@llamaindex/env": "workspace:*",
+    "@llamaindex/ollama": "workspace:*",
    "@llamaindex/openai": "workspace:*",
    "@types/node": "^22.9.0",
    "@types/pg": "^8.11.8",
-    "@xenova/transformers": "^2.17.2",
+    "@huggingface/transformers": "^3.0.2",
    "consola": "^3.2.3",
    "dotenv": "^16.4.5",
    "llamaindex": "workspace:*",
@@ -1,5 +1,38 @@
 # examples

+## 0.0.16
+
+### Patch Changes
+
+- Updated dependencies [a7b0ac3]
+- Updated dependencies [ee20c44]
+- Updated dependencies [c69605f]
+  - @llamaindex/core@0.4.10
+  - @llamaindex/workflow@0.0.6
+  - llamaindex@0.8.13
+  - @llamaindex/readers@1.0.11
+
+## 0.0.15
+
+### Patch Changes
+
+- Updated dependencies [ea92b69]
+- Updated dependencies [fadc8b8]
+  - @llamaindex/workflow@0.0.5
+
+## 0.0.14
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+- Updated dependencies [fd8c882]
+  - llamaindex@0.8.10
+  - @llamaindex/core@0.4.7
+  - @llamaindex/workflow@0.0.4
+  - @llamaindex/readers@1.0.8
+
 ## 0.0.13

 ### Patch Changes
@@ -0,0 +1,38 @@
+import { Anthropic } from "llamaindex";
+
+async function main() {
+  const anthropic = new Anthropic({
+    model: "claude-3-5-sonnet-20241022",
+  });
+
+  const entireBook = await fetch(
+    "https://www.gutenberg.org/files/1342/1342-0.txt",
+  ).then((response) => response.text());
+
+  const response = await anthropic.chat({
+    messages: [
+      {
+        content:
+          "You are an AI assistant tasked with analyzing literary works. Your goal is to provide insightful commentary on themes, characters, and writing style.\n",
+        role: "system",
+      },
+      {
+        content: entireBook,
+        role: "system",
+        options: {
+          cache_control: {
+            type: "ephemeral",
+          },
+        },
+      },
+      {
+        content: "analyze the major themes in Pride and Prejudice.",
+        role: "user",
+      },
+    ],
+  });
+
+  console.log(response.message.content);
+}
+
+main().catch(console.error);
@@ -1,15 +1,15 @@
 {
  "name": "@llamaindex/examples",
  "private": true,
-  "version": "0.0.13",
+  "version": "0.0.16",
  "dependencies": {
    "@aws-crypto/sha256-js": "^5.2.0",
    "@azure/cosmos": "^4.1.1",
    "@azure/identity": "^4.4.1",
    "@datastax/astra-db-ts": "^1.4.1",
-    "@llamaindex/core": "^0.4.5",
-    "@llamaindex/readers": "^1.0.6",
-    "@llamaindex/workflow": "^0.0.3",
+    "@llamaindex/core": "^0.4.10",
+    "@llamaindex/readers": "^1.0.11",
+    "@llamaindex/workflow": "^0.0.6",
    "@notionhq/client": "^2.2.15",
    "@pinecone-database/pinecone": "^3.0.2",
    "@vercel/postgres": "^0.10.0",
@@ -18,7 +18,7 @@
    "commander": "^12.1.0",
    "dotenv": "^16.4.5",
    "js-tiktoken": "^1.0.14",
-    "llamaindex": "^0.8.8",
+    "llamaindex": "^0.8.13",
    "mongodb": "^6.7.0",
    "pathe": "^1.1.2",
    "postgres": "^3.4.4"
@@ -14,7 +14,6 @@ Settings.llm = new Ollama({

 Settings.embedModel = new HuggingFaceEmbedding({
  modelType: "BAAI/bge-small-en-v1.5",
-  quantized: false,
 });

 async function main() {
@@ -0,0 +1,16 @@
+import { VLLM } from "llamaindex";
+
+const llm = new VLLM({
+  model: "NousResearch/Meta-Llama-3-8B-Instruct",
+});
+
+const response = await llm.chat({
+  messages: [
+    {
+      role: "user",
+      content: "Hello?",
+    },
+  ],
+});
+
+console.log(response.message.content);
@@ -1,14 +1,19 @@
 import {
-  Context,
+  HandlerContext,
  StartEvent,
  StopEvent,
  Workflow,
  WorkflowEvent,
-} from "@llamaindex/core/workflow";
+} from "@llamaindex/workflow";
 import { OpenAI } from "llamaindex";

 const MAX_REVIEWS = 3;

+type Context = {
+  specification: string;
+  numberReviews: number;
+};
+
 // Using the o1-preview model (see https://platform.openai.com/docs/guides/reasoning?reasoning-prompt-examples=coding-planning)
 const llm = new OpenAI({ model: "o1-preview", temperature: 1 });

@@ -20,7 +25,9 @@ stores the question/answer pair in the database.`;

 // Create custom event types
 export class MessageEvent extends WorkflowEvent<{ msg: string }> {}
+
 export class CodeEvent extends WorkflowEvent<{ code: string }> {}
+
 export class ReviewEvent extends WorkflowEvent<{
  review: string;
  code: string;
@@ -34,12 +41,13 @@ const truncate = (str: string) => {
 };

 // the architect is responsible for writing the structure and the initial code based on the specification
-const architect = async (context: Context, ev: StartEvent) => {
-  // get the specification from the start event and save it to context
-  context.set("specification", ev.data.input);
-  const spec = context.get("specification");
+const architect = async (
+  context: HandlerContext<Context>,
+  _: StartEvent<string>,
+) => {
+  const spec = context.data.specification;
  // write a message to send an update to the user
-  context.writeEventToStream(
+  context.sendEvent(
    new MessageEvent({
      msg: `Writing app using this specification: ${truncate(spec)}`,
    }),
@@ -50,13 +58,13 @@ const architect = async (context: Context, ev: StartEvent) => {
 };

 // the coder is responsible for updating the code based on the review
-const coder = async (context: Context, ev: ReviewEvent) => {
+const coder = async (context: HandlerContext<Context>, ev: ReviewEvent) => {
  // get the specification from the context
-  const spec = context.get("specification");
+  const spec = context.data.specification;
  // get the latest review and code
  const { review, code } = ev.data;
  // write a message to send an update to the user
-  context.writeEventToStream(
+  context.sendEvent(
    new MessageEvent({
      msg: `Update code based on review: ${truncate(review)}`,
    }),
@@ -67,32 +75,35 @@ const coder = async (context: Context, ev: ReviewEvent) => {
 };

 // the reviewer is responsible for reviewing the code and providing feedback
-const reviewer = async (context: Context, ev: CodeEvent) => {
+const reviewer = async (context: HandlerContext<Context>, ev: CodeEvent) => {
  // get the specification from the context
-  const spec = context.get("specification");
+  const spec = context.data.specification;
  // get latest code from the event
  const { code } = ev.data;
  // update and check the number of reviews
-  const numberReviews = context.get("numberReviews", 0) + 1;
-  context.set("numberReviews", numberReviews);
-  if (numberReviews > MAX_REVIEWS) {
+  context.data.numberReviews++;
+  if (context.data.numberReviews > MAX_REVIEWS) {
    // the we've done this too many times - return the code
-    context.writeEventToStream(
+    context.sendEvent(
      new MessageEvent({
-        msg: `Already reviewed ${numberReviews - 1} times, stopping!`,
+        msg: `Already reviewed ${
+          context.data.numberReviews - 1
+        } times, stopping!`,
      }),
    );
    return new StopEvent({ result: code });
  }
  // write a message to send an update to the user
-  context.writeEventToStream(
-    new MessageEvent({ msg: `Review #${numberReviews}: ${truncate(code)}` }),
+  context.sendEvent(
+    new MessageEvent({
+      msg: `Review #${context.data.numberReviews}: ${truncate(code)}`,
+    }),
  );
  const prompt = `Review this code: <code>${code}</code>. Check if the code quality and whether it correctly implements this specification: <spec>${spec}</spec>. If you're satisfied, just return 'Looks great', nothing else. If not, return a review with a list of changes you'd like to see.`;
  const review = (await llm.complete({ prompt })).text;
  if (review.includes("Looks great")) {
    // the reviewer is satisfied with the code, let's return the review
-    context.writeEventToStream(
+    context.sendEvent(
      new MessageEvent({
        msg: `Reviewer says: ${review}`,
      }),
@@ -103,20 +114,44 @@ const reviewer = async (context: Context, ev: CodeEvent) => {
  return new ReviewEvent({ review, code });
 };

-const codeAgent = new Workflow({ validate: true });
-codeAgent.addStep(StartEvent, architect, { outputs: CodeEvent });
-codeAgent.addStep(ReviewEvent, coder, { outputs: CodeEvent });
-codeAgent.addStep(CodeEvent, reviewer, { outputs: ReviewEvent });
+const codeAgent = new Workflow<Context, string, string>();
+codeAgent.addStep(
+  {
+    inputs: [StartEvent<string>],
+    outputs: [CodeEvent],
+  },
+  architect,
+);
+codeAgent.addStep(
+  {
+    inputs: [ReviewEvent],
+    outputs: [CodeEvent],
+  },
+  coder,
+);
+codeAgent.addStep(
+  {
+    inputs: [CodeEvent],
+    outputs: [ReviewEvent, StopEvent],
+  },
+  reviewer,
+);

 // Usage
 async function main() {
-  const run = codeAgent.run(specification);
-  for await (const event of codeAgent.streamEvents()) {
-    const msg = (event as MessageEvent).data.msg;
-    console.log(`${msg}\n`);
+  const run = codeAgent.run(specification).with({
+    specification,
+    numberReviews: 0,
+  });
+  for await (const event of run) {
+    if (event instanceof MessageEvent) {
+      const msg = (event as MessageEvent).data.msg;
+      console.log(`${msg}\n`);
+    } else if (event instanceof StopEvent) {
+      const result = (event as StopEvent<string>).data;
+      console.log("Final code:\n", result);
+    }
  }
-  const result = await run;
-  console.log("Final code:\n", result.data.result);
 }

 main().catch(console.error);
@@ -1,10 +1,10 @@
 import {
-  Context,
+  HandlerContext,
  StartEvent,
  StopEvent,
  Workflow,
  WorkflowEvent,
-} from "@llamaindex/core/workflow";
+} from "@llamaindex/workflow";
 import { OpenAI } from "llamaindex";

 // Create LLM instance
@@ -12,59 +12,77 @@ const llm = new OpenAI();

 // Create custom event types
 export class JokeEvent extends WorkflowEvent<{ joke: string }> {}
+
 export class CritiqueEvent extends WorkflowEvent<{ critique: string }> {}
+
 export class AnalysisEvent extends WorkflowEvent<{ analysis: string }> {}

-const generateJoke = async (_context: Context, ev: StartEvent) => {
-  const prompt = `Write your best joke about ${ev.data.input}.`;
+const generateJoke = async (_: unknown, ev: StartEvent<string>) => {
+  const prompt = `Write your best joke about ${ev.data}.`;
  const response = await llm.complete({ prompt });
  return new JokeEvent({ joke: response.text });
 };

-const critiqueJoke = async (_context: Context, ev: JokeEvent) => {
+const critiqueJoke = async (_: unknown, ev: JokeEvent) => {
  const prompt = `Give a thorough critique of the following joke: ${ev.data.joke}`;
  const response = await llm.complete({ prompt });
  return new CritiqueEvent({ critique: response.text });
 };

-const analyzeJoke = async (_context: Context, ev: JokeEvent) => {
+const analyzeJoke = async (_: unknown, ev: JokeEvent) => {
  const prompt = `Give a thorough analysis of the following joke: ${ev.data.joke}`;
  const response = await llm.complete({ prompt });
  return new AnalysisEvent({ analysis: response.text });
 };

 const reportJoke = async (
-  context: Context,
-  ev: AnalysisEvent | CritiqueEvent,
+  context: HandlerContext,
+  ev1: AnalysisEvent,
+  ev2: CritiqueEvent,
 ) => {
-  const events = context.collectEvents(ev, [AnalysisEvent, CritiqueEvent]);
-  if (!events) {
-    return;
-  }
-  const subPrompts = events.map((event) => {
-    if (event instanceof AnalysisEvent) {
-      return `Analysis: ${event.data.analysis}`;
-    } else if (event instanceof CritiqueEvent) {
-      return `Critique: ${event.data.critique}`;
-    }
-    return "";
-  });
+  const subPrompts = [ev1.data.analysis, ev2.data.critique];

-  const prompt = `Based on the following information about a joke:\n${subPrompts.join("\n")}\nProvide a comprehensive report on the joke's quality and impact.`;
+  const prompt = `Based on the following information about a joke:\n${subPrompts.join(
+    "\n",
+  )}\nProvide a comprehensive report on the joke's quality and impact.`;
  const response = await llm.complete({ prompt });
-  return new StopEvent({ result: response.text });
+  return new StopEvent(response.text);
 };

-const jokeFlow = new Workflow();
-jokeFlow.addStep(StartEvent, generateJoke);
-jokeFlow.addStep(JokeEvent, critiqueJoke);
-jokeFlow.addStep(JokeEvent, analyzeJoke);
-jokeFlow.addStep([AnalysisEvent, CritiqueEvent], reportJoke);
+const jokeFlow = new Workflow<unknown, string, string>();
+jokeFlow.addStep(
+  {
+    inputs: [StartEvent<string>],
+    outputs: [JokeEvent],
+  },
+  generateJoke,
+);
+jokeFlow.addStep(
+  {
+    inputs: [JokeEvent],
+    outputs: [CritiqueEvent],
+  },
+  critiqueJoke,
+);
+jokeFlow.addStep(
+  {
+    inputs: [JokeEvent],
+    outputs: [AnalysisEvent],
+  },
+  analyzeJoke,
+);
+jokeFlow.addStep(
+  {
+    inputs: [AnalysisEvent, CritiqueEvent],
+    outputs: [StopEvent<string>],
+  },
+  reportJoke,
+);

 // Usage
 async function main() {
  const result = await jokeFlow.run("pirates");
-  console.log(result.data.result);
+  console.log(result.data);
 }

 main().catch(console.error);
@@ -1,10 +1,9 @@
 import {
-  Context,
  StartEvent,
  StopEvent,
  Workflow,
  WorkflowEvent,
-} from "@llamaindex/core/workflow";
+} from "@llamaindex/workflow";
 import { OpenAI } from "llamaindex";

 // Create LLM instance
@@ -13,26 +12,38 @@ const llm = new OpenAI();
 // Create a custom event type
 export class JokeEvent extends WorkflowEvent<{ joke: string }> {}

-const generateJoke = async (_context: Context, ev: StartEvent) => {
-  const prompt = `Write your best joke about ${ev.data.input}.`;
+const generateJoke = async (_: unknown, ev: StartEvent<string>) => {
+  const prompt = `Write your best joke about ${ev.data}.`;
  const response = await llm.complete({ prompt });
  return new JokeEvent({ joke: response.text });
 };

-const critiqueJoke = async (_context: Context, ev: JokeEvent) => {
+const critiqueJoke = async (_: unknown, ev: JokeEvent) => {
  const prompt = `Give a thorough critique of the following joke: ${ev.data.joke}`;
  const response = await llm.complete({ prompt });
-  return new StopEvent({ result: response.text });
+  return new StopEvent(response.text);
 };

-const jokeFlow = new Workflow({ verbose: true });
-jokeFlow.addStep(StartEvent, generateJoke);
-jokeFlow.addStep(JokeEvent, critiqueJoke);
+const jokeFlow = new Workflow<unknown, string, string>();
+jokeFlow.addStep(
+  {
+    inputs: [StartEvent<string>],
+    outputs: [JokeEvent],
+  },
+  generateJoke,
+);
+jokeFlow.addStep(
+  {
+    inputs: [JokeEvent],
+    outputs: [StopEvent<string>],
+  },
+  critiqueJoke,
+);

 // Usage
 async function main() {
  const result = await jokeFlow.run("pirates");
-  console.log(result.data.result);
+  console.log(result.data);
 }

 main().catch(console.error);
@@ -1,10 +1,10 @@
 import {
-  Context,
+  HandlerContext,
  StartEvent,
  StopEvent,
  Workflow,
  WorkflowEvent,
-} from "@llamaindex/core/workflow";
+} from "@llamaindex/workflow";
 import { OpenAI } from "llamaindex";

 // Create LLM instance
@@ -12,38 +12,55 @@ const llm = new OpenAI();

 // Create custom event types
 export class JokeEvent extends WorkflowEvent<{ joke: string }> {}
+
 export class MessageEvent extends WorkflowEvent<{ msg: string }> {}

-const generateJoke = async (context: Context, ev: StartEvent) => {
-  context.writeEventToStream(
-    new MessageEvent({ msg: `Generating a joke about: ${ev.data.input}` }),
+const generateJoke = async (context: HandlerContext, ev: StartEvent) => {
+  context.sendEvent(
+    new MessageEvent({ msg: `Generating a joke about: ${ev.data}` }),
  );
-  const prompt = `Write your best joke about ${ev.data.input}.`;
+  const prompt = `Write your best joke about ${ev.data}.`;
  const response = await llm.complete({ prompt });
  return new JokeEvent({ joke: response.text });
 };

-const critiqueJoke = async (context: Context, ev: JokeEvent) => {
-  context.writeEventToStream(
+const critiqueJoke = async (context: HandlerContext, ev: JokeEvent) => {
+  context.sendEvent(
    new MessageEvent({ msg: `Write a critique of this joke: ${ev.data.joke}` }),
  );
  const prompt = `Give a thorough critique of the following joke: ${ev.data.joke}`;
  const response = await llm.complete({ prompt });
-  return new StopEvent({ result: response.text });
+  return new StopEvent(response.text);
 };

 const jokeFlow = new Workflow();
-jokeFlow.addStep(StartEvent, generateJoke);
-jokeFlow.addStep(JokeEvent, critiqueJoke);
+jokeFlow.addStep(
+  {
+    inputs: [StartEvent<string>],
+    outputs: [JokeEvent],
+  },
+  generateJoke,
+);
+jokeFlow.addStep(
+  {
+    inputs: [JokeEvent],
+    outputs: [StopEvent<string>],
+  },
+  critiqueJoke,
+);

 // Usage
 async function main() {
  const run = jokeFlow.run("pirates");
-  for await (const event of jokeFlow.streamEvents()) {
-    console.log((event as MessageEvent).data.msg);
+  for await (const event of run) {
+    if (event instanceof MessageEvent) {
+      console.log("Message:");
+      console.log((event as MessageEvent).data.msg);
+    } else if (event instanceof StopEvent) {
+      console.log("Result:");
+      console.log((event as StopEvent<string>).data);
+    }
  }
-  const result = await run;
-  console.log(result.data.result);
 }

 main().catch(console.error);
@@ -1,19 +1,21 @@
-import {
-  Context,
-  StartEvent,
-  StopEvent,
-  Workflow,
-} from "@llamaindex/core/workflow";
+import { StartEvent, StopEvent, Workflow } from "@llamaindex/workflow";

-const longRunning = async (_context: Context, ev: StartEvent) => {
+const longRunning = async (_: unknown, ev: StartEvent<string>) => {
  await new Promise((resolve) => setTimeout(resolve, 2000)); // Wait for 2 seconds
-  return new StopEvent({ result: "We waited 2 seconds" });
+  return new StopEvent("We waited 2 seconds");
 };

 async function timeout() {
-  const workflow = new Workflow({ verbose: true, timeout: 1 });
-  workflow.addStep(StartEvent, longRunning);
-  // This will timeout
+  const workflow = new Workflow<unknown, string, string>({
+    timeout: 1,
+  });
+  workflow.addStep(
+    {
+      inputs: [StartEvent<string>],
+      outputs: [StopEvent<string>],
+    },
+    longRunning,
+  );
  try {
    await workflow.run("Let's start");
  } catch (error) {
@@ -23,14 +25,23 @@ async function timeout() {

 async function notimeout() {
  // Increase timeout to 3 seconds - no timeout
-  const workflow = new Workflow({ verbose: true, timeout: 3 });
-  workflow.addStep(StartEvent, longRunning);
+  const workflow = new Workflow<unknown, string, string>({
+    timeout: 3,
+  });
+  workflow.addStep(
+    {
+      inputs: [StartEvent<string>],
+      outputs: [StopEvent<string>],
+    },
+    longRunning,
+  );
  const result = await workflow.run("Let's start");
-  console.log(result.data.result);
+  console.log(result.data);
 }

 async function main() {
  await timeout();
+  console.log("---");
  await notimeout();
 }

@@ -1,10 +1,9 @@
 import {
-  Context,
  StartEvent,
  StopEvent,
  Workflow,
  WorkflowEvent,
-} from "@llamaindex/core/workflow";
+} from "@llamaindex/workflow";
 import { OpenAI } from "llamaindex";

 // Create LLM instance
@@ -13,40 +12,66 @@ const llm = new OpenAI();
 // Create a custom event type
 export class JokeEvent extends WorkflowEvent<{ joke: string }> {}

-const generateJoke = async (_context: Context, ev: StartEvent) => {
-  const prompt = `Write your best joke about ${ev.data.input}.`;
+const generateJoke = async (_: unknown, ev: StartEvent<string>) => {
+  const prompt = `Write your best joke about ${ev.data}.`;
  const response = await llm.complete({ prompt });
  return new JokeEvent({ joke: response.text });
 };

-const critiqueJoke = async (_context: Context, ev: JokeEvent) => {
+const critiqueJoke = async (_: unknown, ev: JokeEvent) => {
  const prompt = `Give a thorough critique of the following joke: ${ev.data.joke}`;
  const response = await llm.complete({ prompt });
-  return new StopEvent({ result: response.text });
+  return new StopEvent(response.text);
 };

 async function validateFails() {
  try {
-    const jokeFlow = new Workflow({ verbose: true, validate: true });
-    jokeFlow.addStep(StartEvent, generateJoke, { outputs: StopEvent });
-    jokeFlow.addStep(JokeEvent, critiqueJoke, { outputs: StopEvent });
-    await jokeFlow.run("pirates");
+    const jokeFlow = new Workflow();
+    jokeFlow.addStep(
+      {
+        inputs: [StartEvent<string>],
+        outputs: [StopEvent<string>],
+      },
+      // @ts-expect-error outputs should be JokeEvent
+      generateJoke,
+    );
+    jokeFlow.addStep(
+      {
+        inputs: [JokeEvent],
+        outputs: [StopEvent],
+      },
+      critiqueJoke,
+    );
+    await jokeFlow.run("pirates").strict();
  } catch (e) {
    console.error("Validation failed:", e);
  }
 }

 async function validate() {
-  const jokeFlow = new Workflow({ verbose: true, validate: true });
-  jokeFlow.addStep(StartEvent, generateJoke, { outputs: JokeEvent });
-  jokeFlow.addStep(JokeEvent, critiqueJoke, { outputs: StopEvent });
-  const result = await jokeFlow.run("pirates");
-  console.log(result.data.result);
+  const jokeFlow = new Workflow();
+  jokeFlow.addStep(
+    {
+      inputs: [StartEvent<string>],
+      outputs: [JokeEvent],
+    },
+    generateJoke,
+  );
+  jokeFlow.addStep(
+    {
+      inputs: [JokeEvent],
+      outputs: [StopEvent<string>],
+    },
+    critiqueJoke,
+  );
+  const result = await jokeFlow.run("pirates").strict();
+  console.log(result.data);
 }

 // Usage
 async function main() {
  await validateFails();
+  console.log("---");
  await validate();
 }

@@ -35,12 +35,6 @@
    "typescript-eslint": "^8.13.0"
  },
  "packageManager": "pnpm@9.12.3",
-  "pnpm": {
-    "overrides": {
-      "trim": "1.0.1",
-      "protobufjs": "7.2.6"
-    }
-  },
  "lint-staged": {
    "(!apps/docs/i18n/**/docusaurus-plugin-content-docs/current/api/*).{js,jsx,ts,tsx,md}": "prettier --write"
  }
@@ -1,5 +1,38 @@
 # @llamaindex/autotool

+## 5.0.13
+
+### Patch Changes
+
+- llamaindex@0.8.13
+
+## 5.0.12
+
+### Patch Changes
+
+- llamaindex@0.8.12
+
+## 5.0.11
+
+### Patch Changes
+
+- llamaindex@0.8.11
+
+## 5.0.10
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+
+## 5.0.9
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+
 ## 5.0.8

 ### Patch Changes
@@ -1,5 +1,43 @@
 # @llamaindex/autotool-01-node-example

+## 0.0.56
+
+### Patch Changes
+
+- llamaindex@0.8.13
+- @llamaindex/autotool@5.0.13
+
+## 0.0.55
+
+### Patch Changes
+
+- llamaindex@0.8.12
+- @llamaindex/autotool@5.0.12
+
+## 0.0.54
+
+### Patch Changes
+
+- llamaindex@0.8.11
+- @llamaindex/autotool@5.0.11
+
+## 0.0.53
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+  - @llamaindex/autotool@5.0.10
+
+## 0.0.52
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+  - @llamaindex/autotool@5.0.9
+
 ## 0.0.51

 ### Patch Changes
@@ -13,5 +13,5 @@
  "scripts": {
    "start": "node --import tsx --import @llamaindex/autotool/node ./src/index.ts"
  },
-  "version": "0.0.51"
+  "version": "0.0.56"
 }
@@ -1,5 +1,43 @@
 # @llamaindex/autotool-02-next-example

+## 0.1.100
+
+### Patch Changes
+
+- llamaindex@0.8.13
+- @llamaindex/autotool@5.0.13
+
+## 0.1.99
+
+### Patch Changes
+
+- llamaindex@0.8.12
+- @llamaindex/autotool@5.0.12
+
+## 0.1.98
+
+### Patch Changes
+
+- llamaindex@0.8.11
+- @llamaindex/autotool@5.0.11
+
+## 0.1.97
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+  - @llamaindex/autotool@5.0.10
+
+## 0.1.96
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+  - @llamaindex/autotool@5.0.9
+
 ## 0.1.95

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/autotool-02-next-example",
  "private": true,
-  "version": "0.1.95",
+  "version": "0.1.100",
  "scripts": {
    "dev": "next dev",
    "build": "next build",
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/autotool",
  "type": "module",
-  "version": "5.0.8",
+  "version": "5.0.13",
  "description": "auto transpile your JS function to LLM Agent compatible",
  "files": [
    "dist",
@@ -1,5 +1,43 @@
 # @llamaindex/cloud

+## 2.0.10
+
+### Patch Changes
+
+- Updated dependencies [a7b0ac3]
+- Updated dependencies [c69605f]
+  - @llamaindex/core@0.4.10
+
+## 2.0.9
+
+### Patch Changes
+
+- Updated dependencies [7ae6eaa]
+  - @llamaindex/core@0.4.9
+
+## 2.0.8
+
+### Patch Changes
+
+- Updated dependencies [f865c98]
+  - @llamaindex/core@0.4.8
+
+## 2.0.7
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+
+## 2.0.6
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+
 ## 2.0.5

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/cloud",
-  "version": "2.0.5",
+  "version": "2.0.10",
  "type": "module",
  "license": "MIT",
  "scripts": {
@@ -1,5 +1,43 @@
 # @llamaindex/community

+## 0.0.68
+
+### Patch Changes
+
+- Updated dependencies [a7b0ac3]
+- Updated dependencies [c69605f]
+  - @llamaindex/core@0.4.10
+
+## 0.0.67
+
+### Patch Changes
+
+- Updated dependencies [7ae6eaa]
+  - @llamaindex/core@0.4.9
+
+## 0.0.66
+
+### Patch Changes
+
+- Updated dependencies [f865c98]
+  - @llamaindex/core@0.4.8
+
+## 0.0.65
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+
+## 0.0.64
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+
 ## 0.0.63

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/community",
  "description": "Community package for LlamaIndexTS",
-  "version": "0.0.63",
+  "version": "0.0.68",
  "type": "module",
  "types": "dist/type/index.d.ts",
  "main": "dist/cjs/index.js",
@@ -1,5 +1,38 @@
 # @llamaindex/core

+## 0.4.10
+
+### Patch Changes
+
+- a7b0ac3: fix: update tool call llm type
+- c69605f: feat: add async support to BaseChatStore and BaseChatStoreMemory
+
+## 0.4.9
+
+### Patch Changes
+
+- 7ae6eaa: feat: allow pass `additionalChatOptions` to agent
+
+## 0.4.8
+
+### Patch Changes
+
+- f865c98: feat: async get message on chat store
+
+## 0.4.7
+
+### Patch Changes
+
+- d89ebe0: feat: better support for zod schema
+- fd8c882: chore: add warning on legacy workflow API
+
+## 0.4.6
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+
 ## 0.4.5

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/core",
  "type": "module",
-  "version": "0.4.5",
+  "version": "0.4.10",
  "description": "LlamaIndex Core Module",
  "exports": {
    "./agent": {
@@ -106,11 +106,17 @@ export type AgentRunnerParams<
  >
    ? AdditionalMessageOptions
    : never,
+  AdditionalChatOptions extends object = object,
 > = {
  llm: AI;
  chatHistory: ChatMessage<AdditionalMessageOptions>[];
  systemPrompt: MessageContent | null;
-  runner: AgentWorker<AI, Store, AdditionalMessageOptions>;
+  runner: AgentWorker<
+    AI,
+    Store,
+    AdditionalMessageOptions,
+    AdditionalChatOptions
+  >;
  tools:
    | BaseToolWithCall[]
    | ((query: MessageContent) => Promise<BaseToolWithCall[]>);
@@ -125,6 +131,7 @@ export type AgentParamsBase<
  >
    ? AdditionalMessageOptions
    : never,
+  AdditionalChatOptions extends object = object,
 > =
  | {
      llm?: AI;
@@ -132,6 +139,7 @@ export type AgentParamsBase<
      systemPrompt?: MessageContent;
      verbose?: boolean;
      tools: BaseToolWithCall[];
+      additionalChatOptions?: AdditionalChatOptions;
    }
  | {
      llm?: AI;
@@ -139,6 +147,7 @@ export type AgentParamsBase<
      systemPrompt?: MessageContent;
      verbose?: boolean;
      toolRetriever: ObjectRetriever<BaseToolWithCall>;
+      additionalChatOptions?: AdditionalChatOptions;
    };

 /**
@@ -153,21 +162,36 @@ export abstract class AgentWorker<
  >
    ? AdditionalMessageOptions
    : never,
+  AdditionalChatOptions extends object = object,
 > {
-  #taskSet = new Set<TaskStep<AI, Store, AdditionalMessageOptions>>();
-  abstract taskHandler: TaskHandler<AI, Store, AdditionalMessageOptions>;
+  #taskSet = new Set<
+    TaskStep<AI, Store, AdditionalMessageOptions, AdditionalChatOptions>
+  >();
+  abstract taskHandler: TaskHandler<
+    AI,
+    Store,
+    AdditionalMessageOptions,
+    AdditionalChatOptions
+  >;

  public createTask(
    query: MessageContent,
-    context: AgentTaskContext<AI, Store, AdditionalMessageOptions>,
-  ): ReadableStream<TaskStepOutput<AI, Store, AdditionalMessageOptions>> {
+    context: AgentTaskContext<
+      AI,
+      Store,
+      AdditionalMessageOptions,
+      AdditionalChatOptions
+    >,
+  ): ReadableStream<
+    TaskStepOutput<AI, Store, AdditionalMessageOptions, AdditionalChatOptions>
+  > {
    context.store.messages.push({
      role: "user",
      content: query,
    });
    const taskOutputStream = createTaskOutputStream(this.taskHandler, context);
    return new ReadableStream<
-      TaskStepOutput<AI, Store, AdditionalMessageOptions>
+      TaskStepOutput<AI, Store, AdditionalMessageOptions, AdditionalChatOptions>
    >({
      start: async (controller) => {
        for await (const stepOutput of taskOutputStream) {
@@ -176,7 +200,8 @@ export abstract class AgentWorker<
            let currentStep: TaskStep<
              AI,
              Store,
-              AdditionalMessageOptions
+              AdditionalMessageOptions,
+              AdditionalChatOptions
            > | null = stepOutput.taskStep;
            while (currentStep) {
              this.#taskSet.delete(currentStep);
@@ -227,6 +252,7 @@ export abstract class AgentRunner<
  >
    ? AdditionalMessageOptions
    : never,
+  AdditionalChatOptions extends object = object,
 > extends BaseChatEngine {
  readonly #llm: AI;
  readonly #tools:
@@ -234,7 +260,12 @@ export abstract class AgentRunner<
    | ((query: MessageContent) => Promise<BaseToolWithCall[]>);
  readonly #systemPrompt: MessageContent | null = null;
  #chatHistory: ChatMessage<AdditionalMessageOptions>[];
-  readonly #runner: AgentWorker<AI, Store, AdditionalMessageOptions>;
+  readonly #runner: AgentWorker<
+    AI,
+    Store,
+    AdditionalMessageOptions,
+    AdditionalChatOptions
+  >;
  readonly #verbose: boolean;

  // create extra store
@@ -245,7 +276,7 @@ export abstract class AgentRunner<
  }

  static defaultTaskHandler: TaskHandler<LLM> = async (step, enqueueOutput) => {
-    const { llm, getTools, stream } = step.context;
+    const { llm, getTools, stream, additionalChatOptions } = step.context;
    const lastMessage = step.context.store.messages.at(-1)!.content;
    const tools = await getTools(lastMessage);
    if (!stream) {
@@ -253,8 +284,9 @@ export abstract class AgentRunner<
        stream,
        tools,
        messages: [...step.context.store.messages],
+        additionalChatOptions,
      });
-      await stepTools<LLM>({
+      await stepTools({
        response,
        tools,
        step,
@@ -265,6 +297,7 @@ export abstract class AgentRunner<
        stream,
        tools,
        messages: [...step.context.store.messages],
+        additionalChatOptions,
      });
      await stepToolsStreaming<LLM>({
        response,
@@ -276,7 +309,12 @@ export abstract class AgentRunner<
  };

  protected constructor(
-    params: AgentRunnerParams<AI, Store, AdditionalMessageOptions>,
+    params: AgentRunnerParams<
+      AI,
+      Store,
+      AdditionalMessageOptions,
+      AdditionalChatOptions
+    >,
  ) {
    super();
    const { llm, chatHistory, systemPrompt, runner, tools, verbose } = params;
@@ -330,6 +368,7 @@ export abstract class AgentRunner<
    stream: boolean = false,
    verbose: boolean | undefined = undefined,
    chatHistory?: ChatMessage<AdditionalMessageOptions>[],
+    additionalChatOptions?: AdditionalChatOptions,
  ) {
    const initialMessages = [...(chatHistory ?? this.#chatHistory)];
    if (this.#systemPrompt !== null) {
@@ -348,6 +387,7 @@ export abstract class AgentRunner<
      stream,
      toolCallCount: 0,
      llm: this.#llm,
+      additionalChatOptions: additionalChatOptions ?? {},
      getTools: (message) => this.getTools(message),
      store: {
        ...this.createStore(),
@@ -365,13 +405,29 @@ export abstract class AgentRunner<
    });
  }

-  async chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>;
  async chat(
-    params: StreamingChatEngineParams,
+    params: NonStreamingChatEngineParams<
+      AdditionalMessageOptions,
+      AdditionalChatOptions
+    >,
+  ): Promise<EngineResponse>;
+  async chat(
+    params: StreamingChatEngineParams<
+      AdditionalMessageOptions,
+      AdditionalChatOptions
+    >,
  ): Promise<ReadableStream<EngineResponse>>;
  @wrapEventCaller
  async chat(
-    params: NonStreamingChatEngineParams | StreamingChatEngineParams,
+    params:
+      | NonStreamingChatEngineParams<
+          AdditionalMessageOptions,
+          AdditionalChatOptions
+        >
+      | StreamingChatEngineParams<
+          AdditionalMessageOptions,
+          AdditionalChatOptions
+        >,
  ): Promise<EngineResponse | ReadableStream<EngineResponse>> {
    let chatHistory: ChatMessage<AdditionalMessageOptions>[] = [];

@@ -388,6 +444,7 @@ export abstract class AgentRunner<
      !!params.stream,
      false,
      chatHistory,
+      params.chatOptions,
    );
    for await (const stepOutput of task) {
      // update chat history for each round
@@ -398,7 +455,12 @@ export abstract class AgentRunner<
          return output.pipeThrough(
            new TransformStream<EngineResponse>({
              transform(chunk, controller) {
-                controller.enqueue(EngineResponse.fromChatResponseChunk(chunk));
+                controller.enqueue(
+                  EngineResponse.fromChatResponseChunk(
+                    chunk,
+                    chunk.sourceNodes,
+                  ),
+                );
              },
            }),
          );
@@ -4,24 +4,66 @@ import { ObjectRetriever } from "../objects";
 import { AgentRunner, AgentWorker, type AgentParamsBase } from "./base.js";
 import { validateAgentParams } from "./utils.js";

-type LLMParamsBase = AgentParamsBase<LLM>;
+type LLMParamsBase<
+  AI extends LLM,
+  AdditionalMessageOptions extends object = AI extends LLM<
+    object,
+    infer AdditionalMessageOptions
+  >
+    ? AdditionalMessageOptions
+    : never,
+  AdditionalChatOptions extends object = object,
+> = AgentParamsBase<AI, AdditionalMessageOptions, AdditionalChatOptions>;

-type LLMParamsWithTools = LLMParamsBase & {
+type LLMParamsWithTools<
+  AI extends LLM,
+  AdditionalMessageOptions extends object = AI extends LLM<
+    object,
+    infer AdditionalMessageOptions
+  >
+    ? AdditionalMessageOptions
+    : never,
+  AdditionalChatOptions extends object = object,
+> = LLMParamsBase<AI, AdditionalMessageOptions, AdditionalChatOptions> & {
  tools: BaseToolWithCall[];
 };

-type LLMParamsWithToolRetriever = LLMParamsBase & {
+type LLMParamsWithToolRetriever<
+  AI extends LLM,
+  AdditionalMessageOptions extends object = AI extends LLM<
+    object,
+    infer AdditionalMessageOptions
+  >
+    ? AdditionalMessageOptions
+    : never,
+  AdditionalChatOptions extends object = object,
+> = LLMParamsBase<AI, AdditionalMessageOptions, AdditionalChatOptions> & {
  toolRetriever: ObjectRetriever<BaseToolWithCall>;
 };

-export type LLMAgentParams = LLMParamsWithTools | LLMParamsWithToolRetriever;
+export type LLMAgentParams<
+  AI extends LLM,
+  AdditionalMessageOptions extends object = AI extends LLM<
+    object,
+    infer AdditionalMessageOptions
+  >
+    ? AdditionalMessageOptions
+    : never,
+  AdditionalChatOptions extends object = object,
+> =
+  | LLMParamsWithTools<AI, AdditionalMessageOptions, AdditionalChatOptions>
+  | LLMParamsWithToolRetriever<
+      AI,
+      AdditionalMessageOptions,
+      AdditionalChatOptions
+    >;

 export class LLMAgentWorker extends AgentWorker<LLM> {
  taskHandler = AgentRunner.defaultTaskHandler;
 }

 export class LLMAgent extends AgentRunner<LLM> {
-  constructor(params: LLMAgentParams) {
+  constructor(params: LLMAgentParams<LLM>) {
    validateAgentParams(params);
    const llm = params.llm ?? (Settings.llm ? (Settings.llm as LLM) : null);
    if (!llm)
@@ -19,6 +19,7 @@ export type AgentTaskContext<
  >
    ? AdditionalMessageOptions
    : never,
+  AdditionalChatOptions extends object = object,
 > = {
  readonly stream: boolean;
  readonly toolCallCount: number;
@@ -26,6 +27,7 @@ export type AgentTaskContext<
  readonly getTools: (
    input: MessageContent,
  ) => BaseToolWithCall[] | Promise<BaseToolWithCall[]>;
+  readonly additionalChatOptions: Partial<AdditionalChatOptions>;
  shouldContinue: (
    taskStep: Readonly<TaskStep<Model, Store, AdditionalMessageOptions>>,
  ) => boolean;
@@ -45,13 +47,26 @@ export type TaskStep<
  >
    ? AdditionalMessageOptions
    : never,
+  AdditionalChatOptions extends object = object,
 > = {
  id: UUID;
-  context: AgentTaskContext<Model, Store, AdditionalMessageOptions>;
+  context: AgentTaskContext<
+    Model,
+    Store,
+    AdditionalMessageOptions,
+    AdditionalChatOptions
+  >;

  // linked list
-  prevStep: TaskStep<Model, Store, AdditionalMessageOptions> | null;
-  nextSteps: Set<TaskStep<Model, Store, AdditionalMessageOptions>>;
+  prevStep: TaskStep<
+    Model,
+    Store,
+    AdditionalMessageOptions,
+    AdditionalChatOptions
+  > | null;
+  nextSteps: Set<
+    TaskStep<Model, Store, AdditionalMessageOptions, AdditionalChatOptions>
+  >;
 };

 export type TaskStepOutput<
@@ -63,8 +78,14 @@ export type TaskStepOutput<
  >
    ? AdditionalMessageOptions
    : never,
+  AdditionalChatOptions extends object = object,
 > = {
-  taskStep: TaskStep<Model, Store, AdditionalMessageOptions>;
+  taskStep: TaskStep<
+    Model,
+    Store,
+    AdditionalMessageOptions,
+    AdditionalChatOptions
+  >;
  // output shows the response to the user
  output:
    | ChatResponse<AdditionalMessageOptions>
@@ -81,10 +102,16 @@ export type TaskHandler<
  >
    ? AdditionalMessageOptions
    : never,
+  AdditionalChatOptions extends object = object,
 > = (
-  step: TaskStep<Model, Store, AdditionalMessageOptions>,
+  step: TaskStep<Model, Store, AdditionalMessageOptions, AdditionalChatOptions>,
  enqueueOutput: (
-    taskOutput: TaskStepOutput<Model, Store, AdditionalMessageOptions>,
+    taskOutput: TaskStepOutput<
+      Model,
+      Store,
+      AdditionalMessageOptions,
+      AdditionalChatOptions
+    >,
  ) => void,
 ) => Promise<void>;

@@ -16,14 +16,18 @@ export interface BaseChatEngineParams<

 export interface StreamingChatEngineParams<
  AdditionalMessageOptions extends object = object,
+  AdditionalChatOptions extends object = object,
 > extends BaseChatEngineParams<AdditionalMessageOptions> {
  stream: true;
+  chatOptions?: AdditionalChatOptions;
 }

 export interface NonStreamingChatEngineParams<
  AdditionalMessageOptions extends object = object,
+  AdditionalChatOptions extends object = object,
 > extends BaseChatEngineParams<AdditionalMessageOptions> {
  stream?: false;
+  chatOptions?: AdditionalChatOptions;
 }

 export abstract class BaseChatEngine {
@@ -1,5 +1,4 @@
-import { streamConverter } from "../utils";
-import { extractText } from "../utils/llms";
+import { extractText, streamConverter } from "../utils";
 import type {
  ChatResponse,
  ChatResponseChunk,
@@ -67,6 +66,8 @@ export abstract class BaseLLM<

 export abstract class ToolCallLLM<
  AdditionalChatOptions extends object = object,
-> extends BaseLLM<AdditionalChatOptions, ToolCallLLMMessageOptions> {
+  AdditionalMessageOptions extends
+    ToolCallLLMMessageOptions = ToolCallLLMMessageOptions,
+> extends BaseLLM<AdditionalChatOptions, AdditionalMessageOptions> {
  abstract supportToolCall: boolean;
 }
@@ -65,19 +65,21 @@ export abstract class BaseChatStoreMemory<
    super();
  }

-  getAllMessages(): ChatMessage<AdditionalMessageOptions>[] {
+  getAllMessages():
+    | ChatMessage<AdditionalMessageOptions>[]
+    | Promise<ChatMessage<AdditionalMessageOptions>[]> {
    return this.chatStore.getMessages(this.chatStoreKey);
  }

-  put(messages: ChatMessage<AdditionalMessageOptions>) {
+  put(messages: ChatMessage<AdditionalMessageOptions>): void | Promise<void> {
    this.chatStore.addMessage(this.chatStoreKey, messages);
  }

-  set(messages: ChatMessage<AdditionalMessageOptions>[]) {
+  set(messages: ChatMessage<AdditionalMessageOptions>[]): void | Promise<void> {
    this.chatStore.setMessages(this.chatStoreKey, messages);
  }

-  reset() {
+  reset(): void | Promise<void> {
    this.chatStore.deleteMessages(this.chatStoreKey);
  }
 }
@@ -33,11 +33,11 @@ export class ChatMemoryBuffer<
    }
  }

-  getMessages(
+  async getMessages(
    transientMessages?: ChatMessage<AdditionalMessageOptions>[] | undefined,
    initialTokenCount: number = 0,
  ) {
-    const messages = this.getAllMessages();
+    const messages = await this.getAllMessages();

    if (initialTokenCount > this.tokenLimit) {
      throw new Error("Initial token count exceeds token limit");
@@ -7,7 +7,11 @@ export abstract class BaseChatStore<
    key: string,
    messages: ChatMessage<AdditionalMessageOptions>[],
  ): void;
-  abstract getMessages(key: string): ChatMessage<AdditionalMessageOptions>[];
+  abstract getMessages(
+    key: string,
+  ):
+    | ChatMessage<AdditionalMessageOptions>[]
+    | Promise<ChatMessage<AdditionalMessageOptions>[]>;
  abstract addMessage(
    key: string,
    message: ChatMessage<AdditionalMessageOptions>,
@@ -15,5 +19,7 @@ export abstract class BaseChatStore<
  ): void;
  abstract deleteMessages(key: string): void;
  abstract deleteMessage(key: string, idx: number): void;
-  abstract getKeys(): IterableIterator<string>;
+  abstract getKeys():
+    | IterableIterator<string>
+    | Promise<IterableIterator<string>>;
 }
@@ -4,18 +4,12 @@ import { zodToJsonSchema } from "zod-to-json-schema";
 import type { JSONValue } from "../global";
 import type { BaseTool, ToolMetadata } from "../llms";

-const kOriginalFn = Symbol("originalFn");
-
 export class FunctionTool<T, R extends JSONValue | Promise<JSONValue>>
  implements BaseTool<T>
 {
-  [kOriginalFn]?: (input: T) => R;
-
  #fn: (input: T) => R;
-  #metadata: ToolMetadata<JSONSchemaType<T>>;
-  // todo: for the future, we can use zod to validate the input parameters
-  // eslint-disable-next-line no-unused-private-class-members
-  #zodType: z.ZodType<T> | null = null;
+  readonly #metadata: ToolMetadata<JSONSchemaType<T>>;
+  readonly #zodType: z.ZodType<T> | null = null;
  constructor(
    fn: (input: T) => R,
    metadata: ToolMetadata<JSONSchemaType<T>>,
@@ -32,6 +26,12 @@ export class FunctionTool<T, R extends JSONValue | Promise<JSONValue>>
    fn: (input: T) => JSONValue | Promise<JSONValue>,
    schema: ToolMetadata<JSONSchemaType<T>>,
  ): FunctionTool<T, JSONValue | Promise<JSONValue>>;
+  static from<R extends z.ZodType>(
+    fn: (input: z.infer<R>) => JSONValue | Promise<JSONValue>,
+    schema: Omit<ToolMetadata, "parameters"> & {
+      parameters: R;
+    },
+  ): FunctionTool<z.infer<R>, JSONValue | Promise<JSONValue>>;
  static from<T, R extends z.ZodType<T>>(
    fn: (input: T) => JSONValue | Promise<JSONValue>,
    schema: Omit<ToolMetadata, "parameters"> & {
@@ -40,15 +40,15 @@ export class FunctionTool<T, R extends JSONValue | Promise<JSONValue>>
  ): FunctionTool<T, JSONValue>;
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  static from(fn: any, schema: any): any {
-    if (schema.parameter instanceof z.ZodSchema) {
-      const jsonSchema = zodToJsonSchema(schema.parameter);
+    if (schema.parameters instanceof z.ZodSchema) {
+      const jsonSchema = zodToJsonSchema(schema.parameters);
      return new FunctionTool(
        fn,
        {
          ...schema,
          parameters: jsonSchema,
        },
-        schema.parameter,
+        schema.parameters,
      );
    }
    return new FunctionTool(fn, schema);
@@ -58,7 +58,15 @@ export class FunctionTool<T, R extends JSONValue | Promise<JSONValue>>
    return this.#metadata as BaseTool<T>["metadata"];
  }

-  call(input: T) {
+  call = (input: T) => {
+    if (this.#zodType) {
+      const result = this.#zodType.safeParse(input);
+      if (result.success) {
+        return this.#fn.call(null, result.data);
+      } else {
+        console.warn(result.error.errors);
+      }
+    }
    return this.#fn.call(null, input);
-  }
+  };
 }
@@ -13,6 +13,8 @@ export type StepFunction<T extends WorkflowEvent = WorkflowEvent> = (

 type EventTypeParam = EventTypes | EventTypes[];

+let once = false;
+
 export class Workflow {
  #steps: Map<
    // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -29,8 +31,20 @@ export class Workflow {
      verbose?: boolean;
      timeout?: number;
      validate?: boolean;
+      ignoreDeprecatedWarning?: boolean;
    } = {},
  ) {
+    if (!once && !params.ignoreDeprecatedWarning) {
+      console.warn(
+        "@llamaindex/core/workflow is going to use the new workflow API in the next major version.",
+        "Please update your imports to @llamaindex/workflow",
+      );
+      console.warn(
+        "See https://ts.llamaindex.ai/docs/llamaindex/guide/workflow for more information",
+      );
+      once = true;
+    }
+
    this.#verbose = params.verbose ?? false;
    this.#timeout = params.timeout ?? null;
    this.#validate = params.validate ?? false;
@@ -19,7 +19,7 @@ describe("ChatMemoryBuffer", () => {
    expect(buffer.tokenLimit).toBe(500);
  });

-  test("getMessages returns all messages when under token limit", () => {
+  test("getMessages returns all messages when under token limit", async () => {
    const messages: ChatMessage[] = [
      { role: "user", content: "Hello" },
      { role: "assistant", content: "Hi there!" },
@@ -30,11 +30,11 @@ describe("ChatMemoryBuffer", () => {
      chatHistory: messages,
    });

-    const result = buffer.getMessages();
+    const result = await buffer.getMessages();
    expect(result).toEqual(messages);
  });

-  test("getMessages truncates messages when over token limit", () => {
+  test("getMessages truncates messages when over token limit", async () => {
    const messages: ChatMessage[] = [
      { role: "user", content: "This is a long message" },
      { role: "assistant", content: "This is also a long reply" },
@@ -45,11 +45,11 @@ describe("ChatMemoryBuffer", () => {
      chatHistory: messages,
    });

-    const result = buffer.getMessages();
+    const result = await buffer.getMessages();
    expect(result).toEqual([{ role: "user", content: "Short" }]);
  });

-  test("getMessages handles input messages", () => {
+  test("getMessages handles input messages", async () => {
    const storedMessages: ChatMessage[] = [
      { role: "user", content: "Hello" },
      { role: "assistant", content: "Hi there!" },
@@ -62,13 +62,13 @@ describe("ChatMemoryBuffer", () => {
    const inputMessages: ChatMessage[] = [
      { role: "user", content: "New message" },
    ];
-    const result = buffer.getMessages(inputMessages);
+    const result = await buffer.getMessages(inputMessages);
    expect(result).toEqual([...inputMessages, ...storedMessages]);
  });

  test("getMessages throws error when initial token count exceeds limit", () => {
    const buffer = new ChatMemoryBuffer({ tokenLimit: 10 });
-    expect(() => buffer.getMessages(undefined, 20)).toThrow(
+    expect(async () => buffer.getMessages(undefined, 20)).rejects.toThrow(
      "Initial token count exceeds token limit",
    );
  });
@@ -1,5 +1,13 @@
 # @llamaindex/env

+## 0.1.20
+
+### Patch Changes
+
+- 4fc001c: chore: bump `@huggingface/transformers`
+
+  Upgrade to v3, please read https://github.com/huggingface/transformers.js/releases/tag/3.0.0 for more information.
+
 ## 0.1.19

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/env",
  "description": "environment wrapper, supports all JS environment including node, deno, bun, edge runtime, and cloudflare worker",
-  "version": "0.1.19",
+  "version": "0.1.20",
  "type": "module",
  "types": "dist/index.d.ts",
  "module": "dist/index.js",
@@ -124,7 +124,7 @@
  "devDependencies": {
    "@types/node": "^22.9.0",
    "@types/readable-stream": "^4.0.15",
-    "@xenova/transformers": "^2.17.2",
+    "@huggingface/transformers": "^3.0.2",
    "bunchee": "5.6.1",
    "gpt-tokenizer": "^2.6.0",
    "pathe": "^1.1.2",
@@ -132,7 +132,7 @@
  },
  "peerDependencies": {
    "@aws-crypto/sha256-js": "^5.2.0",
-    "@xenova/transformers": "^2.17.2",
+    "@huggingface/transformers": "^3.0.2",
    "gpt-tokenizer": "^2.5.0",
    "js-tiktoken": "^1.0.12",
    "pathe": "^1.1.2"
@@ -141,7 +141,7 @@
    "@aws-crypto/sha256-js": {
      "optional": true
    },
-    "@xenova/transformers": {
+    "@huggingface/transformers": {
      "optional": true
    },
    "pathe": {
@@ -8,8 +8,10 @@ export {
 export async function loadTransformers(onLoad: OnLoad) {
  if (getTransformers() === null) {
    setTransformers(
-      // @ts-expect-error no type
-      await import("https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2"),
+      await import(
+        // @ts-expect-error no type
+        "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.2"
+      ),
    );
  } else {
    return getTransformers()!;
@@ -8,7 +8,7 @@ export {

 export async function loadTransformers(onLoad: OnLoad) {
  if (getTransformers() === null) {
-    setTransformers(await import("@xenova/transformers"));
+    setTransformers(await import("@huggingface/transformers"));
  } else {
    return getTransformers()!;
  }
@@ -9,7 +9,7 @@ export async function loadTransformers(onLoad: OnLoad) {
  if (getTransformers() === null) {
    /**
     * If you see this warning, it means that the current environment does not support the transformer.
-     *  because "@xeonva/transformers" highly depends on Node.js APIs.
+     *  because "@huggingface/transformers" highly depends on Node.js APIs.
     *
     * One possible solution is to fix their implementation to make it work in the non-Node.js environment,
     *  but it's not worth the effort because Edge Runtime and Cloudflare Workers are not the for heavy Machine Learning task.
@@ -17,14 +17,14 @@ export async function loadTransformers(onLoad: OnLoad) {
     * Or you can provide an RPC server that runs the transformer in a Node.js environment.
     * Or you just run the code in a Node.js environment.
     *
-     * Refs: https://github.com/xenova/transformers.js/issues/309
+     * Refs: https://github.com/huggingface/transformers.js/issues/309
     */
    console.warn(
-      '"@xenova/transformers" is not officially supported in this environment, some features may not work as expected.',
+      '"@huggingface/transformers" is not officially supported in this environment, some features may not work as expected.',
    );
    setTransformers(
      // @ts-expect-error no type
-      await import("@xenova/transformers/dist/transformers"),
+      await import("@huggingface/transformers/dist/transformers.js"),
    );
  } else {
    return getTransformers()!;
@@ -1,17 +1,17 @@
-let transformer: typeof import("@xenova/transformers") | null = null;
+let transformer: typeof import("@huggingface/transformers") | null = null;

 export function getTransformers() {
  return transformer;
 }

-export function setTransformers(t: typeof import("@xenova/transformers")) {
+export function setTransformers(t: typeof import("@huggingface/transformers")) {
  transformer = t;
 }

 export type OnLoad = (
-  transformer: typeof import("@xenova/transformers"),
+  transformer: typeof import("@huggingface/transformers"),
 ) => void;

 export type LoadTransformerEvent = {
-  transformer: typeof import("@xenova/transformers");
+  transformer: typeof import("@huggingface/transformers");
 };
@@ -1,5 +1,38 @@
 # @llamaindex/experimental

+## 0.0.125
+
+### Patch Changes
+
+- llamaindex@0.8.13
+
+## 0.0.124
+
+### Patch Changes
+
+- llamaindex@0.8.12
+
+## 0.0.123
+
+### Patch Changes
+
+- llamaindex@0.8.11
+
+## 0.0.122
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+
+## 0.0.121
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+
 ## 0.0.120

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/experimental",
  "description": "Experimental package for LlamaIndexTS",
-  "version": "0.0.120",
+  "version": "0.0.125",
  "type": "module",
  "types": "dist/type/index.d.ts",
  "main": "dist/cjs/index.js",
@@ -1,5 +1,119 @@
 # llamaindex

+## 0.8.13
+
+### Patch Changes
+
+- Updated dependencies [a7b0ac3]
+- Updated dependencies [a7b0ac3]
+- Updated dependencies [c69605f]
+  - @llamaindex/anthropic@0.0.19
+  - @llamaindex/core@0.4.10
+  - @llamaindex/cloud@2.0.10
+  - @llamaindex/node-parser@0.0.11
+  - @llamaindex/clip@0.0.19
+  - @llamaindex/deepinfra@0.0.19
+  - @llamaindex/huggingface@0.0.19
+  - @llamaindex/ollama@0.0.26
+  - @llamaindex/openai@0.1.35
+  - @llamaindex/portkey-ai@0.0.19
+  - @llamaindex/replicate@0.0.19
+  - @llamaindex/readers@1.0.11
+  - @llamaindex/groq@0.0.34
+  - @llamaindex/vllm@0.0.5
+
+## 0.8.12
+
+### Patch Changes
+
+- Updated dependencies [7ae6eaa]
+  - @llamaindex/core@0.4.9
+  - @llamaindex/openai@0.1.34
+  - @llamaindex/cloud@2.0.9
+  - @llamaindex/node-parser@0.0.10
+  - @llamaindex/anthropic@0.0.18
+  - @llamaindex/clip@0.0.18
+  - @llamaindex/deepinfra@0.0.18
+  - @llamaindex/huggingface@0.0.18
+  - @llamaindex/ollama@0.0.25
+  - @llamaindex/portkey-ai@0.0.18
+  - @llamaindex/replicate@0.0.18
+  - @llamaindex/readers@1.0.10
+  - @llamaindex/groq@0.0.33
+  - @llamaindex/vllm@0.0.4
+
+## 0.8.11
+
+### Patch Changes
+
+- Updated dependencies [f865c98]
+  - @llamaindex/core@0.4.8
+  - @llamaindex/cloud@2.0.8
+  - @llamaindex/node-parser@0.0.9
+  - @llamaindex/anthropic@0.0.17
+  - @llamaindex/clip@0.0.17
+  - @llamaindex/deepinfra@0.0.17
+  - @llamaindex/huggingface@0.0.17
+  - @llamaindex/ollama@0.0.24
+  - @llamaindex/openai@0.1.33
+  - @llamaindex/portkey-ai@0.0.17
+  - @llamaindex/replicate@0.0.17
+  - @llamaindex/readers@1.0.9
+  - @llamaindex/groq@0.0.32
+  - @llamaindex/vllm@0.0.3
+
+## 0.8.10
+
+### Patch Changes
+
+- f066e50: feat: vllm support
+- Updated dependencies [f066e50]
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/vllm@0.0.2
+  - @llamaindex/core@0.4.7
+  - @llamaindex/cloud@2.0.7
+  - @llamaindex/node-parser@0.0.8
+  - @llamaindex/anthropic@0.0.16
+  - @llamaindex/clip@0.0.16
+  - @llamaindex/deepinfra@0.0.16
+  - @llamaindex/huggingface@0.0.16
+  - @llamaindex/ollama@0.0.23
+  - @llamaindex/openai@0.1.32
+  - @llamaindex/portkey-ai@0.0.16
+  - @llamaindex/replicate@0.0.16
+  - @llamaindex/readers@1.0.8
+  - @llamaindex/groq@0.0.31
+
+## 0.8.9
+
+### Patch Changes
+
+- 4fc001c: chore: bump `@huggingface/transformers`
+
+  Upgrade to v3, please read https://github.com/huggingface/transformers.js/releases/tag/3.0.0 for more information.
+
+- 4d4cd8a: feat: support ollama tool call
+
+  Note that `OllamaEmbedding` now is not the subclass of `Ollama`.
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/clip@0.0.15
+  - @llamaindex/huggingface@0.0.15
+  - @llamaindex/ollama@0.0.22
+  - @llamaindex/cloud@2.0.6
+  - @llamaindex/core@0.4.6
+  - @llamaindex/node-parser@0.0.7
+  - @llamaindex/anthropic@0.0.15
+  - @llamaindex/deepinfra@0.0.15
+  - @llamaindex/groq@0.0.30
+  - @llamaindex/openai@0.1.31
+  - @llamaindex/portkey-ai@0.0.15
+  - @llamaindex/replicate@0.0.15
+  - @llamaindex/readers@1.0.7
+
 ## 0.8.8

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "llamaindex",
-  "version": "0.8.8",
+  "version": "0.8.13",
  "license": "MIT",
  "type": "module",
  "keywords": [
@@ -29,7 +29,7 @@
    "@discoveryjs/json-ext": "^0.6.1",
    "@google-cloud/vertexai": "1.2.0",
    "@google/generative-ai": "0.12.0",
-    "@grpc/grpc-js": "^1.11.1",
+    "@grpc/grpc-js": "^1.12.2",
    "@llamaindex/anthropic": "workspace:*",
    "@llamaindex/clip": "workspace:*",
    "@llamaindex/cloud": "workspace:*",
@@ -44,6 +44,7 @@
    "@llamaindex/portkey-ai": "workspace:*",
    "@llamaindex/readers": "workspace:*",
    "@llamaindex/replicate": "workspace:*",
+    "@llamaindex/vllm": "workspace:*",
    "@mistralai/mistralai": "^1.0.4",
    "@mixedbread-ai/sdk": "^2.2.11",
    "@pinecone-database/pinecone": "^3.0.2",
@@ -85,10 +86,10 @@
    }
  },
  "devDependencies": {
+    "@huggingface/transformers": "^3.0.2",
    "@swc/cli": "^0.5.0",
    "@swc/core": "^1.7.22",
    "@vercel/postgres": "^0.10.0",
-    "@xenova/transformers": "^2.17.2",
    "concurrently": "^9.1.0",
    "glob": "^11.0.0",
    "pg": "^8.12.0",
@@ -1,4 +1,9 @@
 export * from "@llamaindex/core/agent";
+export {
+  OllamaAgent,
+  OllamaAgentWorker,
+  type OllamaAgentParams,
+} from "@llamaindex/ollama";
 export {
  AnthropicAgent,
  AnthropicAgentWorker,
@@ -16,7 +21,6 @@ export {
  ReActAgent,
  type ReACTAgentParams,
 } from "./react.js";
-
 // todo: ParallelAgent
 // todo: CustomAgent
 // todo: ReactMultiModal
@@ -1,7 +1 @@
-import type { BaseEmbedding } from "@llamaindex/core/embeddings";
-import { Ollama } from "@llamaindex/ollama";
-
-/**
- * OllamaEmbedding is an alias for Ollama that implements the BaseEmbedding interface.
- */
-export class OllamaEmbedding extends Ollama implements BaseEmbedding {}
+export { OllamaEmbedding } from "@llamaindex/ollama";
@@ -1,3 +1,4 @@
+export { VLLM, type VLLMParams } from "@llamaindex/vllm";
 export {
  ALL_AVAILABLE_ANTHROPIC_LEGACY_MODELS,
  ALL_AVAILABLE_ANTHROPIC_MODELS,
@@ -6,7 +7,6 @@ export {
 } from "./anthropic.js";
 export { FireworksLLM } from "./fireworks.js";
 export { Gemini, GeminiSession } from "./gemini/base.js";
-
 export {
  GEMINI_MODEL,
  type GoogleGeminiSessionOptions,
@@ -22,7 +22,7 @@ export default function withLlamaIndex(config: any) {
  config.experimental.serverComponentsExternalPackages =
    config.experimental.serverComponentsExternalPackages ?? [];
  config.experimental.serverComponentsExternalPackages.push(
-    "@xenova/transformers",
+    "@huggingface/transformers",
  );
  const userWebpack = config.webpack;
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -1,5 +1,43 @@
 # @llamaindex/node-parser

+## 0.0.11
+
+### Patch Changes
+
+- Updated dependencies [a7b0ac3]
+- Updated dependencies [c69605f]
+  - @llamaindex/core@0.4.10
+
+## 0.0.10
+
+### Patch Changes
+
+- Updated dependencies [7ae6eaa]
+  - @llamaindex/core@0.4.9
+
+## 0.0.9
+
+### Patch Changes
+
+- Updated dependencies [f865c98]
+  - @llamaindex/core@0.4.8
+
+## 0.0.8
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+
+## 0.0.7
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+
 ## 0.0.6

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/node-parser",
-  "version": "0.0.6",
+  "version": "0.0.11",
  "description": "Node parser for LlamaIndex",
  "type": "module",
  "exports": {
@@ -1,5 +1,44 @@
 # @llamaindex/anthropic

+## 0.0.19
+
+### Patch Changes
+
+- a7b0ac3: feat(anthropic): support prompt caching
+- Updated dependencies [a7b0ac3]
+- Updated dependencies [c69605f]
+  - @llamaindex/core@0.4.10
+
+## 0.0.18
+
+### Patch Changes
+
+- Updated dependencies [7ae6eaa]
+  - @llamaindex/core@0.4.9
+
+## 0.0.17
+
+### Patch Changes
+
+- Updated dependencies [f865c98]
+  - @llamaindex/core@0.4.8
+
+## 0.0.16
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+
+## 0.0.15
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+
 ## 0.0.14

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/anthropic",
  "description": "Anthropic Adapter for LlamaIndex",
-  "version": "0.0.14",
+  "version": "0.0.19",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -33,7 +33,7 @@
    "bunchee": "5.6.1"
  },
  "dependencies": {
-    "@anthropic-ai/sdk": "0.27.1",
+    "@anthropic-ai/sdk": "0.32.1",
    "@llamaindex/core": "workspace:*",
    "@llamaindex/env": "workspace:*",
    "remeda": "^2.12.0"
@@ -11,7 +11,7 @@ import { Settings } from "@llamaindex/core/global";
 import type { EngineResponse } from "@llamaindex/core/schema";
 import { Anthropic } from "./llm.js";

-export type AnthropicAgentParams = LLMAgentParams;
+export type AnthropicAgentParams = LLMAgentParams<Anthropic>;

 export class AnthropicAgentWorker extends LLMAgentWorker {}

@@ -1,5 +1,9 @@
 import type { ClientOptions } from "@anthropic-ai/sdk";
 import { Anthropic as SDKAnthropic } from "@anthropic-ai/sdk";
+import type {
+  BetaCacheControlEphemeral,
+  BetaTextBlockParam,
+} from "@anthropic-ai/sdk/resources/beta/index";
 import type {
  TextBlock,
  TextBlockParam,
@@ -8,6 +12,7 @@ import type {
  ImageBlockParam,
  MessageCreateParamsNonStreaming,
  MessageParam,
+  Model,
  Tool,
  ToolResultBlockParam,
  ToolUseBlock,
@@ -75,6 +80,9 @@ export const ALL_AVAILABLE_ANTHROPIC_LEGACY_MODELS = {
  "claude-2.1": {
    contextWindow: 200000,
  },
+  "claude-2.0": {
+    contextWindow: 100000,
+  },
  "claude-instant-1.2": {
    contextWindow: 100000,
  },
@@ -82,18 +90,30 @@ export const ALL_AVAILABLE_ANTHROPIC_LEGACY_MODELS = {

 export const ALL_AVAILABLE_V3_MODELS = {
  "claude-3-opus": { contextWindow: 200000 },
+  "claude-3-opus-latest": { contextWindow: 200000 },
+  "claude-3-opus-20240229": { contextWindow: 200000 },
  "claude-3-sonnet": { contextWindow: 200000 },
+  "claude-3-sonnet-20240229": { contextWindow: 200000 },
  "claude-3-haiku": { contextWindow: 200000 },
+  "claude-3-haiku-20240307": { contextWindow: 200000 },
 };

 export const ALL_AVAILABLE_V3_5_MODELS = {
  "claude-3-5-sonnet": { contextWindow: 200000 },
+  "claude-3-5-sonnet-20241022": { contextWindow: 200000 },
+  "claude-3-5-sonnet-20240620": { contextWindow: 200000 },
+  "claude-3-5-sonnet-latest": { contextWindow: 200000 },
+  "claude-3-5-haiku": { contextWindow: 200000 },
+  "claude-3-5-haiku-latest": { contextWindow: 200000 },
+  "claude-3-5-haiku-20241022": { contextWindow: 200000 },
 };

 export const ALL_AVAILABLE_ANTHROPIC_MODELS = {
  ...ALL_AVAILABLE_ANTHROPIC_LEGACY_MODELS,
  ...ALL_AVAILABLE_V3_MODELS,
  ...ALL_AVAILABLE_V3_5_MODELS,
+} satisfies {
+  [key in Model]: { contextWindow: number };
 };

 const AVAILABLE_ANTHROPIC_MODELS_WITHOUT_DATE: { [key: string]: string } = {
@@ -104,10 +124,16 @@ const AVAILABLE_ANTHROPIC_MODELS_WITHOUT_DATE: { [key: string]: string } = {
 } as { [key in keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS]: string };

 export type AnthropicAdditionalChatOptions = object;
+export type AnthropicToolCallLLMMessageOptions = ToolCallLLMMessageOptions & {
+  cache_control?: BetaCacheControlEphemeral | null;
+};

-export class Anthropic extends ToolCallLLM<AnthropicAdditionalChatOptions> {
+export class Anthropic extends ToolCallLLM<
+  AnthropicAdditionalChatOptions,
+  AnthropicToolCallLLMMessageOptions
+> {
  // Per completion Anthropic params
-  model: keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS;
+  model: keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS | ({} & string);
  temperature: number;
  topP: number;
  maxTokens?: number | undefined;
@@ -147,7 +173,12 @@ export class Anthropic extends ToolCallLLM<AnthropicAdditionalChatOptions> {
      temperature: this.temperature,
      topP: this.topP,
      maxTokens: this.maxTokens,
-      contextWindow: ALL_AVAILABLE_ANTHROPIC_MODELS[this.model].contextWindow,
+      contextWindow:
+        this.model in ALL_AVAILABLE_ANTHROPIC_MODELS
+          ? ALL_AVAILABLE_ANTHROPIC_MODELS[
+              this.model as keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS
+            ].contextWindow
+          : 200000,
      tokenizer: undefined,
    };
  }
@@ -291,56 +322,74 @@ export class Anthropic extends ToolCallLLM<AnthropicAdditionalChatOptions> {
  chat(
    params: LLMChatParamsStreaming<
      AnthropicAdditionalChatOptions,
-      ToolCallLLMMessageOptions
+      AnthropicToolCallLLMMessageOptions
    >,
-  ): Promise<AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>>>;
+  ): Promise<
+    AsyncIterable<ChatResponseChunk<AnthropicToolCallLLMMessageOptions>>
+  >;
  chat(
    params: LLMChatParamsNonStreaming<
      AnthropicAdditionalChatOptions,
-      ToolCallLLMMessageOptions
+      AnthropicToolCallLLMMessageOptions
    >,
-  ): Promise<ChatResponse<ToolCallLLMMessageOptions>>;
+  ): Promise<ChatResponse<AnthropicToolCallLLMMessageOptions>>;
  @wrapLLMEvent
  async chat(
    params:
      | LLMChatParamsNonStreaming<
          AnthropicAdditionalChatOptions,
-          ToolCallLLMMessageOptions
+          AnthropicToolCallLLMMessageOptions
        >
      | LLMChatParamsStreaming<
          AnthropicAdditionalChatOptions,
-          ToolCallLLMMessageOptions
+          AnthropicToolCallLLMMessageOptions
        >,
  ): Promise<
-    | ChatResponse<ToolCallLLMMessageOptions>
-    | AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>>
+    | ChatResponse<AnthropicToolCallLLMMessageOptions>
+    | AsyncIterable<ChatResponseChunk<AnthropicToolCallLLMMessageOptions>>
  > {
    let { messages } = params;

    const { stream, tools } = params;

-    let systemPrompt: string | null = null;
+    let systemPrompt: string | Array<BetaTextBlockParam> | null = null;

    const systemMessages = messages.filter(
      (message) => message.role === "system",
    );

    if (systemMessages.length > 0) {
-      systemPrompt = systemMessages
-        .map((message) => message.content)
-        .join("\n");
+      systemPrompt = systemMessages.map((message) =>
+        message.options && "cache_control" in message.options
+          ? {
+              type: "text",
+              text: extractText(message.content),
+              cache_control: message.options.cache_control,
+            }
+          : {
+              type: "text",
+              text: extractText(message.content),
+            },
+      );
      messages = messages.filter((message) => message.role !== "system");
    }
+    const beta =
+      systemPrompt?.find((message) => "cache_control" in message) !== undefined;
+
+    // case: Non-streaming
+    let anthropic = this.session.anthropic;
+    if (beta) {
+      // @ts-expect-error type casting
+      anthropic = anthropic.beta.promptCaching;
+    }

    // case: Streaming
    if (stream) {
      if (tools) {
        console.error("Tools are not supported in streaming mode");
      }
-      return this.streamChat(messages, systemPrompt);
+      return this.streamChat(messages, systemPrompt, anthropic);
    }
-    // case: Non-streaming
-    const anthropic = this.session.anthropic;

    if (tools) {
      const params: MessageCreateParamsNonStreaming = {
@@ -378,7 +427,10 @@ export class Anthropic extends ToolCallLLM<AnthropicAdditionalChatOptions> {
                  toolCall: toolUseBlock.map((block) => ({
                    id: block.id,
                    name: block.name,
-                    input: block.input,
+                    input:
+                      typeof block.input === "object"
+                        ? JSON.stringify(block.input)
+                        : `${block.input}`,
                  })),
                }
              : {},
@@ -411,10 +463,11 @@ export class Anthropic extends ToolCallLLM<AnthropicAdditionalChatOptions> {
  }

  protected async *streamChat(
-    messages: ChatMessage<ToolCallLLMMessageOptions>[],
-    systemPrompt?: string | null,
-  ): AsyncIterable<ChatResponseChunk<ToolCallLLMMessageOptions>> {
-    const stream = await this.session.anthropic.messages.create({
+    messages: ChatMessage<AnthropicToolCallLLMMessageOptions>[],
+    systemPrompt: string | Array<BetaTextBlockParam> | null,
+    anthropic: SDKAnthropic,
+  ): AsyncIterable<ChatResponseChunk<AnthropicToolCallLLMMessageOptions>> {
+    const stream = await anthropic.messages.create({
      model: this.getModelName(this.model),
      messages: this.formatMessages(messages),
      max_tokens: this.maxTokens ?? 4096,
@@ -1,5 +1,52 @@
 # @llamaindex/clip

+## 0.0.19
+
+### Patch Changes
+
+- Updated dependencies [a7b0ac3]
+- Updated dependencies [c69605f]
+  - @llamaindex/core@0.4.10
+  - @llamaindex/openai@0.1.35
+
+## 0.0.18
+
+### Patch Changes
+
+- Updated dependencies [7ae6eaa]
+  - @llamaindex/core@0.4.9
+  - @llamaindex/openai@0.1.34
+
+## 0.0.17
+
+### Patch Changes
+
+- Updated dependencies [f865c98]
+  - @llamaindex/core@0.4.8
+  - @llamaindex/openai@0.1.33
+
+## 0.0.16
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+  - @llamaindex/openai@0.1.32
+
+## 0.0.15
+
+### Patch Changes
+
+- 4fc001c: chore: bump `@huggingface/transformers`
+
+  Upgrade to v3, please read https://github.com/huggingface/transformers.js/releases/tag/3.0.0 for more information.
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+  - @llamaindex/openai@0.1.31
+
 ## 0.0.14

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/clip",
  "description": "Clip Embedding Adapter for LlamaIndex",
-  "version": "0.0.14",
+  "version": "0.0.19",
  "type": "module",
  "types": "dist/index.d.ts",
  "main": "dist/index.cjs",
@@ -45,6 +45,9 @@
    "@llamaindex/core": "workspace:*",
    "@llamaindex/env": "workspace:*",
    "@llamaindex/openai": "workspace:*",
-    "@xenova/transformers": "^2.17.2"
+    "@huggingface/transformers": "^3.0.2"
+  },
+  "peerDependencies": {
+    "@huggingface/transformers": "^3.0.2"
  }
 }
@@ -1,17 +1,17 @@
 import { MultiModalEmbedding } from "@llamaindex/core/embeddings";
 import type { ImageType } from "@llamaindex/core/schema";
 // only import type, to avoid bundling error
-import { Settings } from "@llamaindex/core/global";
-import {
-  type LoadTransformerEvent,
-  loadTransformers,
-} from "@llamaindex/env/multi-model";
 import type {
  CLIPTextModelWithProjection,
  CLIPVisionModelWithProjection,
  PreTrainedTokenizer,
  Processor,
-} from "@xenova/transformers";
+} from "@huggingface/transformers";
+import { Settings } from "@llamaindex/core/global";
+import {
+  type LoadTransformerEvent,
+  loadTransformers,
+} from "@llamaindex/env/multi-model";
 import { ClipEmbeddingModelType } from "./shared";

 declare module "@llamaindex/core/global" {
@@ -1,5 +1,48 @@
 # @llamaindex/deepinfra

+## 0.0.19
+
+### Patch Changes
+
+- Updated dependencies [a7b0ac3]
+- Updated dependencies [c69605f]
+  - @llamaindex/core@0.4.10
+  - @llamaindex/openai@0.1.35
+
+## 0.0.18
+
+### Patch Changes
+
+- Updated dependencies [7ae6eaa]
+  - @llamaindex/core@0.4.9
+  - @llamaindex/openai@0.1.34
+
+## 0.0.17
+
+### Patch Changes
+
+- Updated dependencies [f865c98]
+  - @llamaindex/core@0.4.8
+  - @llamaindex/openai@0.1.33
+
+## 0.0.16
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+  - @llamaindex/openai@0.1.32
+
+## 0.0.15
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+  - @llamaindex/openai@0.1.31
+
 ## 0.0.14

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/deepinfra",
  "description": "Deepinfra Adapter for LlamaIndex",
-  "version": "0.0.14",
+  "version": "0.0.19",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -1,5 +1,38 @@
 # @llamaindex/groq

+## 0.0.34
+
+### Patch Changes
+
+- @llamaindex/openai@0.1.35
+
+## 0.0.33
+
+### Patch Changes
+
+- Updated dependencies [7ae6eaa]
+  - @llamaindex/openai@0.1.34
+
+## 0.0.32
+
+### Patch Changes
+
+- @llamaindex/openai@0.1.33
+
+## 0.0.31
+
+### Patch Changes
+
+- @llamaindex/openai@0.1.32
+
+## 0.0.30
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/openai@0.1.31
+
 ## 0.0.29

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/groq",
  "description": "Groq Adapter for LlamaIndex",
-  "version": "0.0.29",
+  "version": "0.0.34",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -1,5 +1,52 @@
 # @llamaindex/huggingface

+## 0.0.19
+
+### Patch Changes
+
+- Updated dependencies [a7b0ac3]
+- Updated dependencies [c69605f]
+  - @llamaindex/core@0.4.10
+  - @llamaindex/openai@0.1.35
+
+## 0.0.18
+
+### Patch Changes
+
+- Updated dependencies [7ae6eaa]
+  - @llamaindex/core@0.4.9
+  - @llamaindex/openai@0.1.34
+
+## 0.0.17
+
+### Patch Changes
+
+- Updated dependencies [f865c98]
+  - @llamaindex/core@0.4.8
+  - @llamaindex/openai@0.1.33
+
+## 0.0.16
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+  - @llamaindex/openai@0.1.32
+
+## 0.0.15
+
+### Patch Changes
+
+- 4fc001c: chore: bump `@huggingface/transformers`
+
+  Upgrade to v3, please read https://github.com/huggingface/transformers.js/releases/tag/3.0.0 for more information.
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+  - @llamaindex/openai@0.1.31
+
 ## 0.0.14

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/huggingface",
  "description": "Huggingface Adapter for LlamaIndex",
-  "version": "0.0.14",
+  "version": "0.0.19",
  "type": "module",
  "types": "dist/index.d.ts",
  "main": "dist/index.cjs",
@@ -46,6 +46,9 @@
    "@llamaindex/core": "workspace:*",
    "@llamaindex/env": "workspace:*",
    "@llamaindex/openai": "workspace:*",
-    "@xenova/transformers": "^2.17.2"
+    "@huggingface/transformers": "^3.0.2"
+  },
+  "peerDependencies": {
+    "@huggingface/transformers": "^3.0.2"
  }
 }
@@ -1,10 +1,10 @@
+import type { pipeline } from "@huggingface/transformers";
 import { BaseEmbedding } from "@llamaindex/core/embeddings";
 import { Settings } from "@llamaindex/core/global";
 import {
  type LoadTransformerEvent,
  loadTransformers,
 } from "@llamaindex/env/multi-model";
-import type { pipeline } from "@xenova/transformers";
 import { HuggingFaceEmbeddingModelType } from "./shared";

 declare module "@llamaindex/core/global" {
@@ -13,6 +13,11 @@ declare module "@llamaindex/core/global" {
  }
 }

+export type HuggingFaceEmbeddingParams = {
+  modelType?: string;
+  modelOptions?: Parameters<typeof pipeline<"feature-extraction">>[2];
+};
+
 /**
 * Uses feature extraction from '@xenova/transformers' to generate embeddings.
 * Per default the model [XENOVA_ALL_MINILM_L6_V2](https://huggingface.co/Xenova/all-MiniLM-L6-v2) is used.
@@ -28,15 +33,20 @@ declare module "@llamaindex/core/global" {
 */
 export class HuggingFaceEmbedding extends BaseEmbedding {
  modelType: string = HuggingFaceEmbeddingModelType.XENOVA_ALL_MINILM_L6_V2;
-  quantized: boolean = true;
+  modelOptions: Parameters<typeof pipeline<"feature-extraction">>[2] = {};

  private extractor: Awaited<
    ReturnType<typeof pipeline<"feature-extraction">>
  > | null = null;

-  constructor(init?: Partial<HuggingFaceEmbedding>) {
+  constructor(params: HuggingFaceEmbeddingParams = {}) {
    super();
-    Object.assign(this, init);
+    if (params.modelType) {
+      this.modelType = params.modelType;
+    }
+    if (params.modelOptions) {
+      this.modelOptions = params.modelOptions;
+    }
  }

  async getExtractor() {
@@ -50,9 +60,11 @@ export class HuggingFaceEmbedding extends BaseEmbedding {
          true,
        );
      });
-      this.extractor = await pipeline("feature-extraction", this.modelType, {
-        quantized: this.quantized,
-      });
+      this.extractor = await pipeline(
+        "feature-extraction",
+        this.modelType,
+        this.modelOptions,
+      );
    }
    return this.extractor;
  }
@@ -1,4 +1,7 @@
-export { HuggingFaceEmbedding } from "./embedding";
+export {
+  HuggingFaceEmbedding,
+  type HuggingFaceEmbeddingParams,
+} from "./embedding";
 export { HuggingFaceLLM, type HFLLMConfig } from "./llm";
 export {
  HuggingFaceEmbeddingModelType,
@@ -1,3 +1,8 @@
+import type {
+  PreTrainedModel,
+  PreTrainedTokenizer,
+  Tensor,
+} from "@huggingface/transformers";
 import { wrapLLMEvent } from "@llamaindex/core/decorator";
 import { Settings } from "@llamaindex/core/global";
 import "@llamaindex/core/llms";
@@ -10,11 +15,6 @@ import {
  type LLMMetadata,
 } from "@llamaindex/core/llms";
 import { loadTransformers } from "@llamaindex/env/multi-model";
-import type {
-  PreTrainedModel,
-  PreTrainedTokenizer,
-  Tensor,
-} from "@xenova/transformers";
 import { DEFAULT_PARAMS } from "./shared";

 const DEFAULT_HUGGINGFACE_MODEL = "stabilityai/stablelm-tuned-alpha-3b";
@@ -122,7 +122,10 @@ export class HuggingFaceLLM extends BaseLLM {
    // TODO: the input for model.generate should be updated when using @xenova/transformers v3
    // We should add `stopping_criteria` also when it's supported in v3
    // See: https://github.com/xenova/transformers.js/blob/3260640b192b3e06a10a1f4dc004b1254fdf1b80/src/models.js#L1248C9-L1248C27
-    const outputs = await model.generate(inputs, this.metadata);
+    const outputs = (await model.generate({
+      inputs,
+      ...this.metadata,
+    })) as Tensor;
    const outputText = tokenizer.batch_decode(outputs, {
      skip_special_tokens: false,
    });
@@ -1,5 +1,47 @@
 # @llamaindex/ollama

+## 0.0.26
+
+### Patch Changes
+
+- Updated dependencies [a7b0ac3]
+- Updated dependencies [c69605f]
+  - @llamaindex/core@0.4.10
+
+## 0.0.25
+
+### Patch Changes
+
+- Updated dependencies [7ae6eaa]
+  - @llamaindex/core@0.4.9
+
+## 0.0.24
+
+### Patch Changes
+
+- Updated dependencies [f865c98]
+  - @llamaindex/core@0.4.8
+
+## 0.0.23
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+
+## 0.0.22
+
+### Patch Changes
+
+- 4d4cd8a: feat: support ollama tool call
+
+  Note that `OllamaEmbedding` now is not the subclass of `Ollama`.
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+
 ## 0.0.21

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/ollama",
  "description": "Ollama Adapter for LlamaIndex",
-  "version": "0.0.21",
+  "version": "0.0.26",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -0,0 +1,33 @@
+import {
+  LLMAgent,
+  LLMAgentWorker,
+  type LLMAgentParams,
+} from "@llamaindex/core/agent";
+import { Settings } from "@llamaindex/core/global";
+import { Ollama } from "./llm";
+
+// This is likely not necessary anymore but leaving it here just incase it's in use elsewhere
+
+export type OllamaAgentParams = LLMAgentParams<Ollama> & {
+  model?: string;
+};
+
+export class OllamaAgentWorker extends LLMAgentWorker {}
+
+export class OllamaAgent extends LLMAgent {
+  constructor(params: OllamaAgentParams) {
+    const llm =
+      params.llm ??
+      (Settings.llm instanceof Ollama
+        ? (Settings.llm as Ollama)
+        : !params.model
+          ? (() => {
+              throw new Error("No model provided");
+            })()
+          : new Ollama({ model: params.model }));
+    super({
+      ...params,
+      llm,
+    });
+  }
+}
@@ -0,0 +1,29 @@
+import { BaseEmbedding } from "@llamaindex/core/embeddings";
+import { Ollama, type OllamaParams } from "./llm";
+
+export class OllamaEmbedding extends BaseEmbedding {
+  private readonly llm: Ollama;
+
+  constructor(params: OllamaParams) {
+    super();
+    this.llm = new Ollama(params);
+  }
+
+  private async getEmbedding(prompt: string): Promise<number[]> {
+    const payload = {
+      model: this.llm.model,
+      prompt,
+      options: {
+        ...this.llm.options,
+      },
+    };
+    const response = await this.llm.ollama.embeddings({
+      ...payload,
+    });
+    return response.embedding;
+  }
+
+  async getTextEmbedding(text: string): Promise<number[]> {
+    return this.getEmbedding(text);
+  }
+}
@@ -1,172 +1,7 @@
-import { BaseEmbedding } from "@llamaindex/core/embeddings";
-import type {
-  ChatResponse,
-  ChatResponseChunk,
-  CompletionResponse,
-  LLM,
-  LLMChatParamsNonStreaming,
-  LLMChatParamsStreaming,
-  LLMCompletionParamsNonStreaming,
-  LLMCompletionParamsStreaming,
-  LLMMetadata,
-} from "@llamaindex/core/llms";
-import { extractText, streamConverter } from "@llamaindex/core/utils";
-import {
-  Ollama as OllamaBase,
-  type Config,
-  type ChatResponse as OllamaChatResponse,
-  type GenerateResponse as OllamaGenerateResponse,
-  type Options,
-} from "ollama/browser";
-
-const messageAccessor = (part: OllamaChatResponse): ChatResponseChunk => {
-  return {
-    raw: part,
-    delta: part.message.content,
-  };
-};
-
-const completionAccessor = (
-  part: OllamaGenerateResponse,
-): CompletionResponse => {
-  return { text: part.response, raw: part };
-};
-
-export type OllamaParams = {
-  model: string;
-  config?: Partial<Config>;
-  options?: Partial<Options>;
-};
-
-export class Ollama extends BaseEmbedding implements LLM {
-  public readonly ollama: OllamaBase;
-
-  // https://ollama.ai/library
-  model: string;
-
-  options: Partial<Omit<Options, "num_ctx" | "top_p" | "temperature">> &
-    Pick<Options, "num_ctx" | "top_p" | "temperature"> = {
-    num_ctx: 4096,
-    top_p: 0.9,
-    temperature: 0.7,
-  };
-
-  constructor(params: OllamaParams) {
-    super();
-    this.model = params.model;
-    this.ollama = new OllamaBase(params.config);
-    if (params.options) {
-      this.options = {
-        ...this.options,
-        ...params.options,
-      };
-    }
-  }
-
-  get metadata(): LLMMetadata {
-    const { temperature, top_p, num_ctx } = this.options;
-    return {
-      model: this.model,
-      temperature: temperature,
-      topP: top_p,
-      maxTokens: this.options.num_ctx,
-      contextWindow: num_ctx,
-      tokenizer: undefined,
-    };
-  }
-
-  chat(
-    params: LLMChatParamsStreaming,
-  ): Promise<AsyncIterable<ChatResponseChunk>>;
-  chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>;
-  async chat(
-    params: LLMChatParamsNonStreaming | LLMChatParamsStreaming,
-  ): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> {
-    const { messages, stream } = params;
-    const payload = {
-      model: this.model,
-      messages: messages.map((message) => ({
-        role: message.role,
-        content: extractText(message.content),
-      })),
-      stream: !!stream,
-      options: {
-        ...this.options,
-      },
-    };
-    if (!stream) {
-      const chatResponse = await this.ollama.chat({
-        ...payload,
-        stream: false,
-      });
-
-      return {
-        message: {
-          role: "assistant",
-          content: chatResponse.message.content,
-        },
-        raw: chatResponse,
-      };
-    } else {
-      const stream = await this.ollama.chat({
-        ...payload,
-        stream: true,
-      });
-      return streamConverter(stream, messageAccessor);
-    }
-  }
-
-  complete(
-    params: LLMCompletionParamsStreaming,
-  ): Promise<AsyncIterable<CompletionResponse>>;
-  complete(
-    params: LLMCompletionParamsNonStreaming,
-  ): Promise<CompletionResponse>;
-  async complete(
-    params: LLMCompletionParamsStreaming | LLMCompletionParamsNonStreaming,
-  ): Promise<CompletionResponse | AsyncIterable<CompletionResponse>> {
-    const { prompt, stream } = params;
-    const payload = {
-      model: this.model,
-      prompt: extractText(prompt),
-      stream: !!stream,
-      options: {
-        ...this.options,
-      },
-    };
-    if (!stream) {
-      const response = await this.ollama.generate({
-        ...payload,
-        stream: false,
-      });
-      return {
-        text: response.response,
-        raw: response,
-      };
-    } else {
-      const stream = await this.ollama.generate({
-        ...payload,
-        stream: true,
-      });
-      return streamConverter(stream, completionAccessor);
-    }
-  }
-
-  private async getEmbedding(prompt: string): Promise<number[]> {
-    const payload = {
-      model: this.model,
-      prompt,
-      options: {
-        ...this.options,
-      },
-    };
-    const response = await this.ollama.embeddings({
-      ...payload,
-    });
-    return response.embedding;
-  }
-
-  async getTextEmbedding(text: string): Promise<number[]> {
-    return this.getEmbedding(text);
-  }
-}
+export {
+  OllamaAgent,
+  OllamaAgentWorker,
+  type OllamaAgentParams,
+} from "./agent";
+export { OllamaEmbedding } from "./embedding";
+export { Ollama, type OllamaParams } from "./llm";
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
github-actions[bot]	4c7b891446	Release 0.8.13 (#1480 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: himself65 <himself65@users.noreply.github.com>	2024-11-14 18:25:19 -08:00
Alex Yang	a9c5b4899b	docs: update docusaurus.config.js (#1489 )	2024-11-14 17:36:52 -08:00
Alex Yang	a7b0ac3cb7	feat(anthropic): support prompt caching (#1488 )	2024-11-14 17:35:21 -08:00
Alex Yang	a7540ff47b	docs: update (#1486 )	2024-11-14 15:46:48 -08:00
Aman Rao	c69605f406	feat: add async support to BaseChatStore and BaseChatStoreMemory (#1483 ) Co-authored-by: Alex Yang <himself65@outlook.com>	2024-11-14 13:45:50 -08:00
Alex Yang	ee20c44d9b	feat(workflow): allow send event with no output (#1479 )	2024-11-14 00:17:53 -08:00
Alex Yang	1d470363df	docs: add banner for legacy (#1478 )	2024-11-13 18:16:13 -08:00
github-actions[bot]	b39f40dbd8	Release (#1477 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: himself65 <himself65@users.noreply.github.com>	2024-11-13 01:25:40 -08:00
Alex Yang	fadc8b8ea0	feat: recoverable data with error handling (#1476 )	2024-11-13 01:15:50 -08:00
Alex Yang	ea92b6986d	chore: update changeset	2024-11-13 01:15:28 -08:00
Alex Yang	17f9022d22	fix: output event check (#1475 )	2024-11-13 00:46:35 -08:00
github-actions[bot]	14792cd8b4	Release 0.8.12 (#1473 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2024-11-12 16:20:25 -08:00
Alex Yang	7ae6eaa0a2	chore: update changeset	2024-11-12 12:49:17 -08:00
Alex Yang	dbb5bd9f23	feat: allow `tool_choice` for OpenAIAgent (#1472 )	2024-11-12 12:46:57 -08:00
github-actions[bot]	aacd606204	Release 0.8.11 (#1471 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2024-11-12 11:49:22 -08:00
Alex Yang	f865c984d3	feat: async get message on chat store (#1470 )	2024-11-12 10:59:44 -08:00
github-actions[bot]	7b10882d06	Release 0.8.10 (#1466 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: himself65 <himself65@users.noreply.github.com>	2024-11-11 14:19:46 -08:00
Alex Yang	f066e50482	feat: vllm support (#1468 )	2024-11-11 13:14:08 -08:00
Alex Yang	fd8c882792	refactor: migrate example to new workflow API (#1467 )	2024-11-11 12:03:38 -08:00
Alex Yang	d89ebe0261	chore: update changeset	2024-11-11 10:11:04 -08:00
Alex Yang	968feb32cd	feat: better input type for function tool with `zod` (#1464 )	2024-11-11 10:10:03 -08:00
Alex Yang	43f6f56c5b	docs(next): fix turbo.json (#1465 )	2024-11-11 10:07:12 -08:00
github-actions[bot]	b2364dc5ba	Release 0.8.9 (#1460 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2024-11-10 23:32:41 -08:00
Alex Yang	67f4db8501	fix: steaming chat in ollama (#1463 )	2024-11-10 23:27:09 -08:00
Alex Yang	e4151a8b02	feat: support ollama agent (#1462 )	2024-11-10 22:38:40 -08:00
Alex Yang	4d4cd8ac6b	feat: support ollama tool call (#1461 )	2024-11-10 20:46:46 -08:00
Alex Yang	4fc001c8de	chore: bump `@huggingface/transformers` (#1459 )	2024-11-10 20:14:44 -08:00
Alex Yang	cf675bdc7a	chore: bump version (#1458 )	2024-11-10 16:43:45 -08:00