Release 0.8.10 (#1466 )

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: himself65 <himself65@users.noreply.github.com>
feat: vllm support (#1468 )
2026-07-01 22:14:03 -04:00 · 2024-11-11 14:19:46 -08:00 · 2024-11-11 13:14:08 -08:00 · 2024-11-11 12:03:38 -08:00 · 2024-11-11 10:11:04 -08:00 · 2024-11-11 10:10:03 -08:00
110 changed files with 1645 additions and 435 deletions
@@ -1,5 +1,21 @@
 # docs

+## 0.0.114
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+  - @llamaindex/examples@0.0.14
+
+## 0.0.113
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+
 ## 0.0.112

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "docs",
-  "version": "0.0.112",
+  "version": "0.0.114",
  "private": true,
  "scripts": {
    "docusaurus": "docusaurus",
@@ -1,5 +1,34 @@
 # @llamaindex/doc

+## 0.0.12
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+- Updated dependencies [fd8c882]
+  - llamaindex@0.8.10
+  - @llamaindex/core@0.4.7
+  - @llamaindex/workflow@0.0.4
+  - @llamaindex/cloud@2.0.7
+  - @llamaindex/node-parser@0.0.8
+  - @llamaindex/openai@0.1.32
+  - @llamaindex/readers@1.0.8
+
+## 0.0.11
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+  - @llamaindex/cloud@2.0.6
+  - @llamaindex/core@0.4.6
+  - @llamaindex/node-parser@0.0.7
+  - @llamaindex/openai@0.1.31
+  - @llamaindex/readers@1.0.7
+
 ## 0.0.10

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/doc",
-  "version": "0.0.10",
+  "version": "0.0.12",
  "private": true,
  "scripts": {
    "build": "pnpm run build:docs && next build",
@@ -1,6 +1,14 @@
 {
  "extends": ["//"],
  "tasks": {
+    "build": {
+      "outputs": [
+        ".next",
+        ".source",
+        "next-env.d.ts",
+        "src/content/docs/cloud/api/**"
+      ]
+    },
    "dev": {
      "dependsOn": ["^build"]
    }
@@ -1,5 +1,20 @@
 # @llamaindex/cloudflare-worker-agent-test

+## 0.0.106
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+
+## 0.0.105
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+
 ## 0.0.104

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/cloudflare-worker-agent-test",
-  "version": "0.0.104",
+  "version": "0.0.106",
  "type": "module",
  "private": true,
  "scripts": {
@@ -1,5 +1,17 @@
 # @llamaindex/llama-parse-browser-test

+## 0.0.27
+
+### Patch Changes
+
+- @llamaindex/cloud@2.0.7
+
+## 0.0.26
+
+### Patch Changes
+
+- @llamaindex/cloud@2.0.6
+
 ## 0.0.25

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/llama-parse-browser-test",
  "private": true,
-  "version": "0.0.25",
+  "version": "0.0.27",
  "type": "module",
  "scripts": {
    "dev": "vite",
@@ -1,5 +1,20 @@
 # @llamaindex/next-agent-test

+## 0.1.106
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+
+## 0.1.105
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+
 ## 0.1.104

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/next-agent-test",
-  "version": "0.1.104",
+  "version": "0.1.106",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,20 @@
 # test-edge-runtime

+## 0.1.105
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+
+## 0.1.104
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+
 ## 0.1.103

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/nextjs-edge-runtime-test",
-  "version": "0.1.103",
+  "version": "0.1.105",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,20 @@
 # @llamaindex/next-node-runtime

+## 0.0.87
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+
+## 0.0.86
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+
 ## 0.0.85

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/next-node-runtime-test",
-  "version": "0.0.85",
+  "version": "0.0.87",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -15,7 +15,6 @@ Settings.llm = new OpenAI({
 });
 Settings.embedModel = new HuggingFaceEmbedding({
  modelType: "BAAI/bge-small-en-v1.5",
-  quantized: false,
 });
 Settings.callbackManager.on("llm-tool-call", (event) => {
  console.log(event.detail);
@@ -1,5 +1,20 @@
 # @llamaindex/waku-query-engine-test

+## 0.0.106
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+
+## 0.0.105
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+
 ## 0.0.104

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/waku-query-engine-test",
-  "version": "0.0.104",
+  "version": "0.0.106",
  "type": "module",
  "private": true,
  "scripts": {
@@ -0,0 +1,3 @@
+import { OpenAI } from "./openai.js";
+
+export class Ollama extends OpenAI {}
@@ -15,7 +15,17 @@ export async function resolve(specifier, context, nextResolve) {
  const targetUrl = fileURLToPath(result.url).replace(/\.js$/, ".ts");
  let relativePath = relative(packageDistDir, targetUrl);
  // todo: make it more generic if we have more sub modules fixtures in the future
-  if (relativePath.startsWith("../../llm/openai")) {
+  if (relativePath.startsWith("../../llm/anthropic")) {
+    relativePath = relativePath.replace(
+      "../../llm/ollama/dist/index.ts",
+      "llm/anthropic.ts",
+    );
+  } else if (relativePath.startsWith("../../llm/ollama")) {
+    relativePath = relativePath.replace(
+      "../../llm/ollama/dist/index.ts",
+      "llm/ollama.ts",
+    );
+  } else if (relativePath.startsWith("../../llm/openai")) {
    relativePath = relativePath.replace(
      "../../llm/openai/dist/index.ts",
      "llm/openai.ts",
@@ -64,7 +64,7 @@ await test("clip embedding", async (t) => {
  });

  await t.test("custom transformer", async () => {
-    const transformers = await import("@xenova/transformers");
+    const transformers = await import("@huggingface/transformers");
    const getter = test.mock.fn((t, k, r) => {
      return Reflect.get(t, k, r);
    });
@@ -0,0 +1,35 @@
+import { Ollama } from "@llamaindex/ollama";
+import assert from "node:assert";
+import { test } from "node:test";
+import { getWeatherTool } from "./fixtures/tools.js";
+import { mockLLMEvent } from "./utils.js";
+
+await test("ollama", async (t) => {
+  await mockLLMEvent(t, "ollama");
+  await t.test("ollama function call", async (t) => {
+    const llm = new Ollama({
+      model: "llama3.2",
+    });
+    const chatResponse = await llm.chat({
+      messages: [
+        {
+          role: "user",
+          content: "What is the weather in Paris?",
+        },
+      ],
+      tools: [getWeatherTool],
+    });
+    if (
+      chatResponse.message.options &&
+      "toolCall" in chatResponse.message.options
+    ) {
+      assert.equal(chatResponse.message.options.toolCall.length, 1);
+      assert.equal(
+        chatResponse.message.options.toolCall[0]!.name,
+        getWeatherTool.metadata.name,
+      );
+    } else {
+      throw new Error("Expected tool calls in response");
+    }
+  });
+});
@@ -167,6 +167,7 @@ For questions about more specific sections, please use the vector_tool.`,
  const mockCall = t.mock.fn(({ query }: { query: string }) => {
    return originalCall({ query });
  });
+  // @ts-expect-error what?
  queryEngineTools[1]!.call = mockCall;

  const toolMapping = SimpleToolNodeMapping.fromObjects(queryEngineTools);
@@ -0,0 +1,37 @@
+{
+  "llmEventStart": [
+    {
+      "id": "PRESERVE_0",
+      "messages": [
+        {
+          "role": "user",
+          "content": "What is the weather in Paris?"
+        }
+      ]
+    }
+  ],
+  "llmEventEnd": [
+    {
+      "id": "PRESERVE_0",
+      "response": {
+        "message": {
+          "role": "assistant",
+          "content": "",
+          "options": {
+            "toolCall": [
+              {
+                "name": "getWeather",
+                "input": {
+                  "city": "Paris"
+                },
+                "id": "5d198775-5268-4552-993b-9ecb4425385b"
+              }
+            ]
+          }
+        },
+        "raw": null
+      }
+    }
+  ],
+  "llmEventStream": []
+}
@@ -12,10 +12,11 @@
    "@faker-js/faker": "^9.2.0",
    "@llamaindex/core": "workspace:*",
    "@llamaindex/env": "workspace:*",
+    "@llamaindex/ollama": "workspace:*",
    "@llamaindex/openai": "workspace:*",
    "@types/node": "^22.9.0",
    "@types/pg": "^8.11.8",
-    "@xenova/transformers": "^2.17.2",
+    "@huggingface/transformers": "^3.0.2",
    "consola": "^3.2.3",
    "dotenv": "^16.4.5",
    "llamaindex": "workspace:*",
@@ -1,5 +1,18 @@
 # examples

+## 0.0.14
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+- Updated dependencies [fd8c882]
+  - llamaindex@0.8.10
+  - @llamaindex/core@0.4.7
+  - @llamaindex/workflow@0.0.4
+  - @llamaindex/readers@1.0.8
+
 ## 0.0.13

 ### Patch Changes
@@ -1,15 +1,15 @@
 {
  "name": "@llamaindex/examples",
  "private": true,
-  "version": "0.0.13",
+  "version": "0.0.14",
  "dependencies": {
    "@aws-crypto/sha256-js": "^5.2.0",
    "@azure/cosmos": "^4.1.1",
    "@azure/identity": "^4.4.1",
    "@datastax/astra-db-ts": "^1.4.1",
-    "@llamaindex/core": "^0.4.5",
-    "@llamaindex/readers": "^1.0.6",
-    "@llamaindex/workflow": "^0.0.3",
+    "@llamaindex/core": "^0.4.7",
+    "@llamaindex/readers": "^1.0.8",
+    "@llamaindex/workflow": "^0.0.4",
    "@notionhq/client": "^2.2.15",
    "@pinecone-database/pinecone": "^3.0.2",
    "@vercel/postgres": "^0.10.0",
@@ -18,7 +18,7 @@
    "commander": "^12.1.0",
    "dotenv": "^16.4.5",
    "js-tiktoken": "^1.0.14",
-    "llamaindex": "^0.8.8",
+    "llamaindex": "^0.8.10",
    "mongodb": "^6.7.0",
    "pathe": "^1.1.2",
    "postgres": "^3.4.4"
@@ -14,7 +14,6 @@ Settings.llm = new Ollama({

 Settings.embedModel = new HuggingFaceEmbedding({
  modelType: "BAAI/bge-small-en-v1.5",
-  quantized: false,
 });

 async function main() {
@@ -0,0 +1,16 @@
+import { VLLM } from "llamaindex";
+
+const llm = new VLLM({
+  model: "NousResearch/Meta-Llama-3-8B-Instruct",
+});
+
+const response = await llm.chat({
+  messages: [
+    {
+      role: "user",
+      content: "Hello?",
+    },
+  ],
+});
+
+console.log(response.message.content);
@@ -1,14 +1,19 @@
 import {
-  Context,
+  HandlerContext,
  StartEvent,
  StopEvent,
  Workflow,
  WorkflowEvent,
-} from "@llamaindex/core/workflow";
+} from "@llamaindex/workflow";
 import { OpenAI } from "llamaindex";

 const MAX_REVIEWS = 3;

+type Context = {
+  specification: string;
+  numberReviews: number;
+};
+
 // Using the o1-preview model (see https://platform.openai.com/docs/guides/reasoning?reasoning-prompt-examples=coding-planning)
 const llm = new OpenAI({ model: "o1-preview", temperature: 1 });

@@ -20,7 +25,9 @@ stores the question/answer pair in the database.`;

 // Create custom event types
 export class MessageEvent extends WorkflowEvent<{ msg: string }> {}
+
 export class CodeEvent extends WorkflowEvent<{ code: string }> {}
+
 export class ReviewEvent extends WorkflowEvent<{
  review: string;
  code: string;
@@ -34,12 +41,13 @@ const truncate = (str: string) => {
 };

 // the architect is responsible for writing the structure and the initial code based on the specification
-const architect = async (context: Context, ev: StartEvent) => {
-  // get the specification from the start event and save it to context
-  context.set("specification", ev.data.input);
-  const spec = context.get("specification");
+const architect = async (
+  context: HandlerContext<Context>,
+  _: StartEvent<string>,
+) => {
+  const spec = context.data.specification;
  // write a message to send an update to the user
-  context.writeEventToStream(
+  context.sendEvent(
    new MessageEvent({
      msg: `Writing app using this specification: ${truncate(spec)}`,
    }),
@@ -50,13 +58,13 @@ const architect = async (context: Context, ev: StartEvent) => {
 };

 // the coder is responsible for updating the code based on the review
-const coder = async (context: Context, ev: ReviewEvent) => {
+const coder = async (context: HandlerContext<Context>, ev: ReviewEvent) => {
  // get the specification from the context
-  const spec = context.get("specification");
+  const spec = context.data.specification;
  // get the latest review and code
  const { review, code } = ev.data;
  // write a message to send an update to the user
-  context.writeEventToStream(
+  context.sendEvent(
    new MessageEvent({
      msg: `Update code based on review: ${truncate(review)}`,
    }),
@@ -67,32 +75,35 @@ const coder = async (context: Context, ev: ReviewEvent) => {
 };

 // the reviewer is responsible for reviewing the code and providing feedback
-const reviewer = async (context: Context, ev: CodeEvent) => {
+const reviewer = async (context: HandlerContext<Context>, ev: CodeEvent) => {
  // get the specification from the context
-  const spec = context.get("specification");
+  const spec = context.data.specification;
  // get latest code from the event
  const { code } = ev.data;
  // update and check the number of reviews
-  const numberReviews = context.get("numberReviews", 0) + 1;
-  context.set("numberReviews", numberReviews);
-  if (numberReviews > MAX_REVIEWS) {
+  context.data.numberReviews++;
+  if (context.data.numberReviews > MAX_REVIEWS) {
    // the we've done this too many times - return the code
-    context.writeEventToStream(
+    context.sendEvent(
      new MessageEvent({
-        msg: `Already reviewed ${numberReviews - 1} times, stopping!`,
+        msg: `Already reviewed ${
+          context.data.numberReviews - 1
+        } times, stopping!`,
      }),
    );
    return new StopEvent({ result: code });
  }
  // write a message to send an update to the user
-  context.writeEventToStream(
-    new MessageEvent({ msg: `Review #${numberReviews}: ${truncate(code)}` }),
+  context.sendEvent(
+    new MessageEvent({
+      msg: `Review #${context.data.numberReviews}: ${truncate(code)}`,
+    }),
  );
  const prompt = `Review this code: <code>${code}</code>. Check if the code quality and whether it correctly implements this specification: <spec>${spec}</spec>. If you're satisfied, just return 'Looks great', nothing else. If not, return a review with a list of changes you'd like to see.`;
  const review = (await llm.complete({ prompt })).text;
  if (review.includes("Looks great")) {
    // the reviewer is satisfied with the code, let's return the review
-    context.writeEventToStream(
+    context.sendEvent(
      new MessageEvent({
        msg: `Reviewer says: ${review}`,
      }),
@@ -103,20 +114,44 @@ const reviewer = async (context: Context, ev: CodeEvent) => {
  return new ReviewEvent({ review, code });
 };

-const codeAgent = new Workflow({ validate: true });
-codeAgent.addStep(StartEvent, architect, { outputs: CodeEvent });
-codeAgent.addStep(ReviewEvent, coder, { outputs: CodeEvent });
-codeAgent.addStep(CodeEvent, reviewer, { outputs: ReviewEvent });
+const codeAgent = new Workflow<Context, string, string>();
+codeAgent.addStep(
+  {
+    inputs: [StartEvent<string>],
+    outputs: [CodeEvent],
+  },
+  architect,
+);
+codeAgent.addStep(
+  {
+    inputs: [ReviewEvent],
+    outputs: [CodeEvent],
+  },
+  coder,
+);
+codeAgent.addStep(
+  {
+    inputs: [CodeEvent],
+    outputs: [ReviewEvent, StopEvent],
+  },
+  reviewer,
+);

 // Usage
 async function main() {
-  const run = codeAgent.run(specification);
-  for await (const event of codeAgent.streamEvents()) {
-    const msg = (event as MessageEvent).data.msg;
-    console.log(`${msg}\n`);
+  const run = codeAgent.run(specification).with({
+    specification,
+    numberReviews: 0,
+  });
+  for await (const event of run) {
+    if (event instanceof MessageEvent) {
+      const msg = (event as MessageEvent).data.msg;
+      console.log(`${msg}\n`);
+    } else if (event instanceof StopEvent) {
+      const result = (event as StopEvent<string>).data;
+      console.log("Final code:\n", result);
+    }
  }
-  const result = await run;
-  console.log("Final code:\n", result.data.result);
 }

 main().catch(console.error);
@@ -1,10 +1,10 @@
 import {
-  Context,
+  HandlerContext,
  StartEvent,
  StopEvent,
  Workflow,
  WorkflowEvent,
-} from "@llamaindex/core/workflow";
+} from "@llamaindex/workflow";
 import { OpenAI } from "llamaindex";

 // Create LLM instance
@@ -12,59 +12,77 @@ const llm = new OpenAI();

 // Create custom event types
 export class JokeEvent extends WorkflowEvent<{ joke: string }> {}
+
 export class CritiqueEvent extends WorkflowEvent<{ critique: string }> {}
+
 export class AnalysisEvent extends WorkflowEvent<{ analysis: string }> {}

-const generateJoke = async (_context: Context, ev: StartEvent) => {
-  const prompt = `Write your best joke about ${ev.data.input}.`;
+const generateJoke = async (_: unknown, ev: StartEvent<string>) => {
+  const prompt = `Write your best joke about ${ev.data}.`;
  const response = await llm.complete({ prompt });
  return new JokeEvent({ joke: response.text });
 };

-const critiqueJoke = async (_context: Context, ev: JokeEvent) => {
+const critiqueJoke = async (_: unknown, ev: JokeEvent) => {
  const prompt = `Give a thorough critique of the following joke: ${ev.data.joke}`;
  const response = await llm.complete({ prompt });
  return new CritiqueEvent({ critique: response.text });
 };

-const analyzeJoke = async (_context: Context, ev: JokeEvent) => {
+const analyzeJoke = async (_: unknown, ev: JokeEvent) => {
  const prompt = `Give a thorough analysis of the following joke: ${ev.data.joke}`;
  const response = await llm.complete({ prompt });
  return new AnalysisEvent({ analysis: response.text });
 };

 const reportJoke = async (
-  context: Context,
-  ev: AnalysisEvent | CritiqueEvent,
+  context: HandlerContext,
+  ev1: AnalysisEvent,
+  ev2: CritiqueEvent,
 ) => {
-  const events = context.collectEvents(ev, [AnalysisEvent, CritiqueEvent]);
-  if (!events) {
-    return;
-  }
-  const subPrompts = events.map((event) => {
-    if (event instanceof AnalysisEvent) {
-      return `Analysis: ${event.data.analysis}`;
-    } else if (event instanceof CritiqueEvent) {
-      return `Critique: ${event.data.critique}`;
-    }
-    return "";
-  });
+  const subPrompts = [ev1.data.analysis, ev2.data.critique];

-  const prompt = `Based on the following information about a joke:\n${subPrompts.join("\n")}\nProvide a comprehensive report on the joke's quality and impact.`;
+  const prompt = `Based on the following information about a joke:\n${subPrompts.join(
+    "\n",
+  )}\nProvide a comprehensive report on the joke's quality and impact.`;
  const response = await llm.complete({ prompt });
-  return new StopEvent({ result: response.text });
+  return new StopEvent(response.text);
 };

-const jokeFlow = new Workflow();
-jokeFlow.addStep(StartEvent, generateJoke);
-jokeFlow.addStep(JokeEvent, critiqueJoke);
-jokeFlow.addStep(JokeEvent, analyzeJoke);
-jokeFlow.addStep([AnalysisEvent, CritiqueEvent], reportJoke);
+const jokeFlow = new Workflow<unknown, string, string>();
+jokeFlow.addStep(
+  {
+    inputs: [StartEvent<string>],
+    outputs: [JokeEvent],
+  },
+  generateJoke,
+);
+jokeFlow.addStep(
+  {
+    inputs: [JokeEvent],
+    outputs: [CritiqueEvent],
+  },
+  critiqueJoke,
+);
+jokeFlow.addStep(
+  {
+    inputs: [JokeEvent],
+    outputs: [AnalysisEvent],
+  },
+  analyzeJoke,
+);
+jokeFlow.addStep(
+  {
+    inputs: [AnalysisEvent, CritiqueEvent],
+    outputs: [StopEvent<string>],
+  },
+  reportJoke,
+);

 // Usage
 async function main() {
  const result = await jokeFlow.run("pirates");
-  console.log(result.data.result);
+  console.log(result.data);
 }

 main().catch(console.error);
@@ -1,10 +1,9 @@
 import {
-  Context,
  StartEvent,
  StopEvent,
  Workflow,
  WorkflowEvent,
-} from "@llamaindex/core/workflow";
+} from "@llamaindex/workflow";
 import { OpenAI } from "llamaindex";

 // Create LLM instance
@@ -13,26 +12,38 @@ const llm = new OpenAI();
 // Create a custom event type
 export class JokeEvent extends WorkflowEvent<{ joke: string }> {}

-const generateJoke = async (_context: Context, ev: StartEvent) => {
-  const prompt = `Write your best joke about ${ev.data.input}.`;
+const generateJoke = async (_: unknown, ev: StartEvent<string>) => {
+  const prompt = `Write your best joke about ${ev.data}.`;
  const response = await llm.complete({ prompt });
  return new JokeEvent({ joke: response.text });
 };

-const critiqueJoke = async (_context: Context, ev: JokeEvent) => {
+const critiqueJoke = async (_: unknown, ev: JokeEvent) => {
  const prompt = `Give a thorough critique of the following joke: ${ev.data.joke}`;
  const response = await llm.complete({ prompt });
-  return new StopEvent({ result: response.text });
+  return new StopEvent(response.text);
 };

-const jokeFlow = new Workflow({ verbose: true });
-jokeFlow.addStep(StartEvent, generateJoke);
-jokeFlow.addStep(JokeEvent, critiqueJoke);
+const jokeFlow = new Workflow<unknown, string, string>();
+jokeFlow.addStep(
+  {
+    inputs: [StartEvent<string>],
+    outputs: [JokeEvent],
+  },
+  generateJoke,
+);
+jokeFlow.addStep(
+  {
+    inputs: [JokeEvent],
+    outputs: [StopEvent<string>],
+  },
+  critiqueJoke,
+);

 // Usage
 async function main() {
  const result = await jokeFlow.run("pirates");
-  console.log(result.data.result);
+  console.log(result.data);
 }

 main().catch(console.error);
@@ -1,10 +1,10 @@
 import {
-  Context,
+  HandlerContext,
  StartEvent,
  StopEvent,
  Workflow,
  WorkflowEvent,
-} from "@llamaindex/core/workflow";
+} from "@llamaindex/workflow";
 import { OpenAI } from "llamaindex";

 // Create LLM instance
@@ -12,38 +12,55 @@ const llm = new OpenAI();

 // Create custom event types
 export class JokeEvent extends WorkflowEvent<{ joke: string }> {}
+
 export class MessageEvent extends WorkflowEvent<{ msg: string }> {}

-const generateJoke = async (context: Context, ev: StartEvent) => {
-  context.writeEventToStream(
-    new MessageEvent({ msg: `Generating a joke about: ${ev.data.input}` }),
+const generateJoke = async (context: HandlerContext, ev: StartEvent) => {
+  context.sendEvent(
+    new MessageEvent({ msg: `Generating a joke about: ${ev.data}` }),
  );
-  const prompt = `Write your best joke about ${ev.data.input}.`;
+  const prompt = `Write your best joke about ${ev.data}.`;
  const response = await llm.complete({ prompt });
  return new JokeEvent({ joke: response.text });
 };

-const critiqueJoke = async (context: Context, ev: JokeEvent) => {
-  context.writeEventToStream(
+const critiqueJoke = async (context: HandlerContext, ev: JokeEvent) => {
+  context.sendEvent(
    new MessageEvent({ msg: `Write a critique of this joke: ${ev.data.joke}` }),
  );
  const prompt = `Give a thorough critique of the following joke: ${ev.data.joke}`;
  const response = await llm.complete({ prompt });
-  return new StopEvent({ result: response.text });
+  return new StopEvent(response.text);
 };

 const jokeFlow = new Workflow();
-jokeFlow.addStep(StartEvent, generateJoke);
-jokeFlow.addStep(JokeEvent, critiqueJoke);
+jokeFlow.addStep(
+  {
+    inputs: [StartEvent<string>],
+    outputs: [JokeEvent],
+  },
+  generateJoke,
+);
+jokeFlow.addStep(
+  {
+    inputs: [JokeEvent],
+    outputs: [StopEvent<string>],
+  },
+  critiqueJoke,
+);

 // Usage
 async function main() {
  const run = jokeFlow.run("pirates");
-  for await (const event of jokeFlow.streamEvents()) {
-    console.log((event as MessageEvent).data.msg);
+  for await (const event of run) {
+    if (event instanceof MessageEvent) {
+      console.log("Message:");
+      console.log((event as MessageEvent).data.msg);
+    } else if (event instanceof StopEvent) {
+      console.log("Result:");
+      console.log((event as StopEvent<string>).data);
+    }
  }
-  const result = await run;
-  console.log(result.data.result);
 }

 main().catch(console.error);
@@ -1,19 +1,21 @@
-import {
-  Context,
-  StartEvent,
-  StopEvent,
-  Workflow,
-} from "@llamaindex/core/workflow";
+import { StartEvent, StopEvent, Workflow } from "@llamaindex/workflow";

-const longRunning = async (_context: Context, ev: StartEvent) => {
+const longRunning = async (_: unknown, ev: StartEvent<string>) => {
  await new Promise((resolve) => setTimeout(resolve, 2000)); // Wait for 2 seconds
-  return new StopEvent({ result: "We waited 2 seconds" });
+  return new StopEvent("We waited 2 seconds");
 };

 async function timeout() {
-  const workflow = new Workflow({ verbose: true, timeout: 1 });
-  workflow.addStep(StartEvent, longRunning);
-  // This will timeout
+  const workflow = new Workflow<unknown, string, string>({
+    timeout: 1,
+  });
+  workflow.addStep(
+    {
+      inputs: [StartEvent<string>],
+      outputs: [StopEvent<string>],
+    },
+    longRunning,
+  );
  try {
    await workflow.run("Let's start");
  } catch (error) {
@@ -23,14 +25,23 @@ async function timeout() {

 async function notimeout() {
  // Increase timeout to 3 seconds - no timeout
-  const workflow = new Workflow({ verbose: true, timeout: 3 });
-  workflow.addStep(StartEvent, longRunning);
+  const workflow = new Workflow<unknown, string, string>({
+    timeout: 3,
+  });
+  workflow.addStep(
+    {
+      inputs: [StartEvent<string>],
+      outputs: [StopEvent<string>],
+    },
+    longRunning,
+  );
  const result = await workflow.run("Let's start");
-  console.log(result.data.result);
+  console.log(result.data);
 }

 async function main() {
  await timeout();
+  console.log("---");
  await notimeout();
 }

@@ -1,10 +1,9 @@
 import {
-  Context,
  StartEvent,
  StopEvent,
  Workflow,
  WorkflowEvent,
-} from "@llamaindex/core/workflow";
+} from "@llamaindex/workflow";
 import { OpenAI } from "llamaindex";

 // Create LLM instance
@@ -13,40 +12,66 @@ const llm = new OpenAI();
 // Create a custom event type
 export class JokeEvent extends WorkflowEvent<{ joke: string }> {}

-const generateJoke = async (_context: Context, ev: StartEvent) => {
-  const prompt = `Write your best joke about ${ev.data.input}.`;
+const generateJoke = async (_: unknown, ev: StartEvent<string>) => {
+  const prompt = `Write your best joke about ${ev.data}.`;
  const response = await llm.complete({ prompt });
  return new JokeEvent({ joke: response.text });
 };

-const critiqueJoke = async (_context: Context, ev: JokeEvent) => {
+const critiqueJoke = async (_: unknown, ev: JokeEvent) => {
  const prompt = `Give a thorough critique of the following joke: ${ev.data.joke}`;
  const response = await llm.complete({ prompt });
-  return new StopEvent({ result: response.text });
+  return new StopEvent(response.text);
 };

 async function validateFails() {
  try {
-    const jokeFlow = new Workflow({ verbose: true, validate: true });
-    jokeFlow.addStep(StartEvent, generateJoke, { outputs: StopEvent });
-    jokeFlow.addStep(JokeEvent, critiqueJoke, { outputs: StopEvent });
-    await jokeFlow.run("pirates");
+    const jokeFlow = new Workflow();
+    jokeFlow.addStep(
+      {
+        inputs: [StartEvent<string>],
+        outputs: [StopEvent<string>],
+      },
+      // @ts-expect-error outputs should be JokeEvent
+      generateJoke,
+    );
+    jokeFlow.addStep(
+      {
+        inputs: [JokeEvent],
+        outputs: [StopEvent],
+      },
+      critiqueJoke,
+    );
+    await jokeFlow.run("pirates").strict();
  } catch (e) {
    console.error("Validation failed:", e);
  }
 }

 async function validate() {
-  const jokeFlow = new Workflow({ verbose: true, validate: true });
-  jokeFlow.addStep(StartEvent, generateJoke, { outputs: JokeEvent });
-  jokeFlow.addStep(JokeEvent, critiqueJoke, { outputs: StopEvent });
-  const result = await jokeFlow.run("pirates");
-  console.log(result.data.result);
+  const jokeFlow = new Workflow();
+  jokeFlow.addStep(
+    {
+      inputs: [StartEvent<string>],
+      outputs: [JokeEvent],
+    },
+    generateJoke,
+  );
+  jokeFlow.addStep(
+    {
+      inputs: [JokeEvent],
+      outputs: [StopEvent<string>],
+    },
+    critiqueJoke,
+  );
+  const result = await jokeFlow.run("pirates").strict();
+  console.log(result.data);
 }

 // Usage
 async function main() {
  await validateFails();
+  console.log("---");
  await validate();
 }

@@ -35,12 +35,6 @@
    "typescript-eslint": "^8.13.0"
  },
  "packageManager": "pnpm@9.12.3",
-  "pnpm": {
-    "overrides": {
-      "trim": "1.0.1",
-      "protobufjs": "7.2.6"
-    }
-  },
  "lint-staged": {
    "(!apps/docs/i18n/**/docusaurus-plugin-content-docs/current/api/*).{js,jsx,ts,tsx,md}": "prettier --write"
  }
@@ -1,5 +1,20 @@
 # @llamaindex/autotool

+## 5.0.10
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+
+## 5.0.9
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+
 ## 5.0.8

 ### Patch Changes
@@ -1,5 +1,22 @@
 # @llamaindex/autotool-01-node-example

+## 0.0.53
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+  - @llamaindex/autotool@5.0.10
+
+## 0.0.52
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+  - @llamaindex/autotool@5.0.9
+
 ## 0.0.51

 ### Patch Changes
@@ -13,5 +13,5 @@
  "scripts": {
    "start": "node --import tsx --import @llamaindex/autotool/node ./src/index.ts"
  },
-  "version": "0.0.51"
+  "version": "0.0.53"
 }
@@ -1,5 +1,22 @@
 # @llamaindex/autotool-02-next-example

+## 0.1.97
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+  - @llamaindex/autotool@5.0.10
+
+## 0.1.96
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+  - @llamaindex/autotool@5.0.9
+
 ## 0.1.95

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/autotool-02-next-example",
  "private": true,
-  "version": "0.1.95",
+  "version": "0.1.97",
  "scripts": {
    "dev": "next dev",
    "build": "next build",
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/autotool",
  "type": "module",
-  "version": "5.0.8",
+  "version": "5.0.10",
  "description": "auto transpile your JS function to LLM Agent compatible",
  "files": [
    "dist",
@@ -1,5 +1,21 @@
 # @llamaindex/cloud

+## 2.0.7
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+
+## 2.0.6
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+
 ## 2.0.5

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/cloud",
-  "version": "2.0.5",
+  "version": "2.0.7",
  "type": "module",
  "license": "MIT",
  "scripts": {
@@ -1,5 +1,21 @@
 # @llamaindex/community

+## 0.0.65
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+
+## 0.0.64
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+
 ## 0.0.63

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/community",
  "description": "Community package for LlamaIndexTS",
-  "version": "0.0.63",
+  "version": "0.0.65",
  "type": "module",
  "types": "dist/type/index.d.ts",
  "main": "dist/cjs/index.js",
@@ -1,5 +1,19 @@
 # @llamaindex/core

+## 0.4.7
+
+### Patch Changes
+
+- d89ebe0: feat: better support for zod schema
+- fd8c882: chore: add warning on legacy workflow API
+
+## 0.4.6
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+
 ## 0.4.5

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/core",
  "type": "module",
-  "version": "0.4.5",
+  "version": "0.4.7",
  "description": "LlamaIndex Core Module",
  "exports": {
    "./agent": {
@@ -398,7 +398,12 @@ export abstract class AgentRunner<
          return output.pipeThrough(
            new TransformStream<EngineResponse>({
              transform(chunk, controller) {
-                controller.enqueue(EngineResponse.fromChatResponseChunk(chunk));
+                controller.enqueue(
+                  EngineResponse.fromChatResponseChunk(
+                    chunk,
+                    chunk.sourceNodes,
+                  ),
+                );
              },
            }),
          );
@@ -1,5 +1,4 @@
-import { streamConverter } from "../utils";
-import { extractText } from "../utils/llms";
+import { extractText, streamConverter } from "../utils";
 import type {
  ChatResponse,
  ChatResponseChunk,
@@ -4,18 +4,12 @@ import { zodToJsonSchema } from "zod-to-json-schema";
 import type { JSONValue } from "../global";
 import type { BaseTool, ToolMetadata } from "../llms";

-const kOriginalFn = Symbol("originalFn");
-
 export class FunctionTool<T, R extends JSONValue | Promise<JSONValue>>
  implements BaseTool<T>
 {
-  [kOriginalFn]?: (input: T) => R;
-
  #fn: (input: T) => R;
-  #metadata: ToolMetadata<JSONSchemaType<T>>;
-  // todo: for the future, we can use zod to validate the input parameters
-  // eslint-disable-next-line no-unused-private-class-members
-  #zodType: z.ZodType<T> | null = null;
+  readonly #metadata: ToolMetadata<JSONSchemaType<T>>;
+  readonly #zodType: z.ZodType<T> | null = null;
  constructor(
    fn: (input: T) => R,
    metadata: ToolMetadata<JSONSchemaType<T>>,
@@ -32,6 +26,12 @@ export class FunctionTool<T, R extends JSONValue | Promise<JSONValue>>
    fn: (input: T) => JSONValue | Promise<JSONValue>,
    schema: ToolMetadata<JSONSchemaType<T>>,
  ): FunctionTool<T, JSONValue | Promise<JSONValue>>;
+  static from<R extends z.ZodType>(
+    fn: (input: z.infer<R>) => JSONValue | Promise<JSONValue>,
+    schema: Omit<ToolMetadata, "parameters"> & {
+      parameters: R;
+    },
+  ): FunctionTool<z.infer<R>, JSONValue | Promise<JSONValue>>;
  static from<T, R extends z.ZodType<T>>(
    fn: (input: T) => JSONValue | Promise<JSONValue>,
    schema: Omit<ToolMetadata, "parameters"> & {
@@ -40,15 +40,15 @@ export class FunctionTool<T, R extends JSONValue | Promise<JSONValue>>
  ): FunctionTool<T, JSONValue>;
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  static from(fn: any, schema: any): any {
-    if (schema.parameter instanceof z.ZodSchema) {
-      const jsonSchema = zodToJsonSchema(schema.parameter);
+    if (schema.parameters instanceof z.ZodSchema) {
+      const jsonSchema = zodToJsonSchema(schema.parameters);
      return new FunctionTool(
        fn,
        {
          ...schema,
          parameters: jsonSchema,
        },
-        schema.parameter,
+        schema.parameters,
      );
    }
    return new FunctionTool(fn, schema);
@@ -58,7 +58,15 @@ export class FunctionTool<T, R extends JSONValue | Promise<JSONValue>>
    return this.#metadata as BaseTool<T>["metadata"];
  }

-  call(input: T) {
+  call = (input: T) => {
+    if (this.#zodType) {
+      const result = this.#zodType.safeParse(input);
+      if (result.success) {
+        return this.#fn.call(null, result.data);
+      } else {
+        console.warn(result.error.errors);
+      }
+    }
    return this.#fn.call(null, input);
-  }
+  };
 }
@@ -13,6 +13,8 @@ export type StepFunction<T extends WorkflowEvent = WorkflowEvent> = (

 type EventTypeParam = EventTypes | EventTypes[];

+let once = false;
+
 export class Workflow {
  #steps: Map<
    // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -29,8 +31,20 @@ export class Workflow {
      verbose?: boolean;
      timeout?: number;
      validate?: boolean;
+      ignoreDeprecatedWarning?: boolean;
    } = {},
  ) {
+    if (!once && !params.ignoreDeprecatedWarning) {
+      console.warn(
+        "@llamaindex/core/workflow is going to use the new workflow API in the next major version.",
+        "Please update your imports to @llamaindex/workflow",
+      );
+      console.warn(
+        "See https://ts.llamaindex.ai/docs/llamaindex/guide/workflow for more information",
+      );
+      once = true;
+    }
+
    this.#verbose = params.verbose ?? false;
    this.#timeout = params.timeout ?? null;
    this.#validate = params.validate ?? false;
@@ -1,5 +1,13 @@
 # @llamaindex/env

+## 0.1.20
+
+### Patch Changes
+
+- 4fc001c: chore: bump `@huggingface/transformers`
+
+  Upgrade to v3, please read https://github.com/huggingface/transformers.js/releases/tag/3.0.0 for more information.
+
 ## 0.1.19

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/env",
  "description": "environment wrapper, supports all JS environment including node, deno, bun, edge runtime, and cloudflare worker",
-  "version": "0.1.19",
+  "version": "0.1.20",
  "type": "module",
  "types": "dist/index.d.ts",
  "module": "dist/index.js",
@@ -124,7 +124,7 @@
  "devDependencies": {
    "@types/node": "^22.9.0",
    "@types/readable-stream": "^4.0.15",
-    "@xenova/transformers": "^2.17.2",
+    "@huggingface/transformers": "^3.0.2",
    "bunchee": "5.6.1",
    "gpt-tokenizer": "^2.6.0",
    "pathe": "^1.1.2",
@@ -132,7 +132,7 @@
  },
  "peerDependencies": {
    "@aws-crypto/sha256-js": "^5.2.0",
-    "@xenova/transformers": "^2.17.2",
+    "@huggingface/transformers": "^3.0.2",
    "gpt-tokenizer": "^2.5.0",
    "js-tiktoken": "^1.0.12",
    "pathe": "^1.1.2"
@@ -141,7 +141,7 @@
    "@aws-crypto/sha256-js": {
      "optional": true
    },
-    "@xenova/transformers": {
+    "@huggingface/transformers": {
      "optional": true
    },
    "pathe": {
@@ -8,8 +8,10 @@ export {
 export async function loadTransformers(onLoad: OnLoad) {
  if (getTransformers() === null) {
    setTransformers(
-      // @ts-expect-error no type
-      await import("https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2"),
+      await import(
+        // @ts-expect-error no type
+        "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.2"
+      ),
    );
  } else {
    return getTransformers()!;
@@ -8,7 +8,7 @@ export {

 export async function loadTransformers(onLoad: OnLoad) {
  if (getTransformers() === null) {
-    setTransformers(await import("@xenova/transformers"));
+    setTransformers(await import("@huggingface/transformers"));
  } else {
    return getTransformers()!;
  }
@@ -9,7 +9,7 @@ export async function loadTransformers(onLoad: OnLoad) {
  if (getTransformers() === null) {
    /**
     * If you see this warning, it means that the current environment does not support the transformer.
-     *  because "@xeonva/transformers" highly depends on Node.js APIs.
+     *  because "@huggingface/transformers" highly depends on Node.js APIs.
     *
     * One possible solution is to fix their implementation to make it work in the non-Node.js environment,
     *  but it's not worth the effort because Edge Runtime and Cloudflare Workers are not the for heavy Machine Learning task.
@@ -17,14 +17,14 @@ export async function loadTransformers(onLoad: OnLoad) {
     * Or you can provide an RPC server that runs the transformer in a Node.js environment.
     * Or you just run the code in a Node.js environment.
     *
-     * Refs: https://github.com/xenova/transformers.js/issues/309
+     * Refs: https://github.com/huggingface/transformers.js/issues/309
     */
    console.warn(
-      '"@xenova/transformers" is not officially supported in this environment, some features may not work as expected.',
+      '"@huggingface/transformers" is not officially supported in this environment, some features may not work as expected.',
    );
    setTransformers(
      // @ts-expect-error no type
-      await import("@xenova/transformers/dist/transformers"),
+      await import("@huggingface/transformers/dist/transformers.js"),
    );
  } else {
    return getTransformers()!;
@@ -1,17 +1,17 @@
-let transformer: typeof import("@xenova/transformers") | null = null;
+let transformer: typeof import("@huggingface/transformers") | null = null;

 export function getTransformers() {
  return transformer;
 }

-export function setTransformers(t: typeof import("@xenova/transformers")) {
+export function setTransformers(t: typeof import("@huggingface/transformers")) {
  transformer = t;
 }

 export type OnLoad = (
-  transformer: typeof import("@xenova/transformers"),
+  transformer: typeof import("@huggingface/transformers"),
 ) => void;

 export type LoadTransformerEvent = {
-  transformer: typeof import("@xenova/transformers");
+  transformer: typeof import("@huggingface/transformers");
 };
@@ -1,5 +1,20 @@
 # @llamaindex/experimental

+## 0.0.122
+
+### Patch Changes
+
+- Updated dependencies [f066e50]
+  - llamaindex@0.8.10
+
+## 0.0.121
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - llamaindex@0.8.9
+
 ## 0.0.120

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/experimental",
  "description": "Experimental package for LlamaIndexTS",
-  "version": "0.0.120",
+  "version": "0.0.122",
  "type": "module",
  "types": "dist/type/index.d.ts",
  "main": "dist/cjs/index.js",
@@ -1,5 +1,57 @@
 # llamaindex

+## 0.8.10
+
+### Patch Changes
+
+- f066e50: feat: vllm support
+- Updated dependencies [f066e50]
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/vllm@0.0.2
+  - @llamaindex/core@0.4.7
+  - @llamaindex/cloud@2.0.7
+  - @llamaindex/node-parser@0.0.8
+  - @llamaindex/anthropic@0.0.16
+  - @llamaindex/clip@0.0.16
+  - @llamaindex/deepinfra@0.0.16
+  - @llamaindex/huggingface@0.0.16
+  - @llamaindex/ollama@0.0.23
+  - @llamaindex/openai@0.1.32
+  - @llamaindex/portkey-ai@0.0.16
+  - @llamaindex/replicate@0.0.16
+  - @llamaindex/readers@1.0.8
+  - @llamaindex/groq@0.0.31
+
+## 0.8.9
+
+### Patch Changes
+
+- 4fc001c: chore: bump `@huggingface/transformers`
+
+  Upgrade to v3, please read https://github.com/huggingface/transformers.js/releases/tag/3.0.0 for more information.
+
+- 4d4cd8a: feat: support ollama tool call
+
+  Note that `OllamaEmbedding` now is not the subclass of `Ollama`.
+
+- Updated dependencies [4fc001c]
+- Updated dependencies [4d4cd8a]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/clip@0.0.15
+  - @llamaindex/huggingface@0.0.15
+  - @llamaindex/ollama@0.0.22
+  - @llamaindex/cloud@2.0.6
+  - @llamaindex/core@0.4.6
+  - @llamaindex/node-parser@0.0.7
+  - @llamaindex/anthropic@0.0.15
+  - @llamaindex/deepinfra@0.0.15
+  - @llamaindex/groq@0.0.30
+  - @llamaindex/openai@0.1.31
+  - @llamaindex/portkey-ai@0.0.15
+  - @llamaindex/replicate@0.0.15
+  - @llamaindex/readers@1.0.7
+
 ## 0.8.8

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "llamaindex",
-  "version": "0.8.8",
+  "version": "0.8.10",
  "license": "MIT",
  "type": "module",
  "keywords": [
@@ -29,7 +29,7 @@
    "@discoveryjs/json-ext": "^0.6.1",
    "@google-cloud/vertexai": "1.2.0",
    "@google/generative-ai": "0.12.0",
-    "@grpc/grpc-js": "^1.11.1",
+    "@grpc/grpc-js": "^1.12.2",
    "@llamaindex/anthropic": "workspace:*",
    "@llamaindex/clip": "workspace:*",
    "@llamaindex/cloud": "workspace:*",
@@ -44,6 +44,7 @@
    "@llamaindex/portkey-ai": "workspace:*",
    "@llamaindex/readers": "workspace:*",
    "@llamaindex/replicate": "workspace:*",
+    "@llamaindex/vllm": "workspace:*",
    "@mistralai/mistralai": "^1.0.4",
    "@mixedbread-ai/sdk": "^2.2.11",
    "@pinecone-database/pinecone": "^3.0.2",
@@ -85,10 +86,10 @@
    }
  },
  "devDependencies": {
+    "@huggingface/transformers": "^3.0.2",
    "@swc/cli": "^0.5.0",
    "@swc/core": "^1.7.22",
    "@vercel/postgres": "^0.10.0",
-    "@xenova/transformers": "^2.17.2",
    "concurrently": "^9.1.0",
    "glob": "^11.0.0",
    "pg": "^8.12.0",
@@ -1,4 +1,9 @@
 export * from "@llamaindex/core/agent";
+export {
+  OllamaAgent,
+  OllamaAgentWorker,
+  type OllamaAgentParams,
+} from "@llamaindex/ollama";
 export {
  AnthropicAgent,
  AnthropicAgentWorker,
@@ -16,7 +21,6 @@ export {
  ReActAgent,
  type ReACTAgentParams,
 } from "./react.js";
-
 // todo: ParallelAgent
 // todo: CustomAgent
 // todo: ReactMultiModal
@@ -1,7 +1 @@
-import type { BaseEmbedding } from "@llamaindex/core/embeddings";
-import { Ollama } from "@llamaindex/ollama";
-
-/**
- * OllamaEmbedding is an alias for Ollama that implements the BaseEmbedding interface.
- */
-export class OllamaEmbedding extends Ollama implements BaseEmbedding {}
+export { OllamaEmbedding } from "@llamaindex/ollama";
@@ -1,3 +1,4 @@
+export { VLLM, type VLLMParams } from "@llamaindex/vllm";
 export {
  ALL_AVAILABLE_ANTHROPIC_LEGACY_MODELS,
  ALL_AVAILABLE_ANTHROPIC_MODELS,
@@ -6,7 +7,6 @@ export {
 } from "./anthropic.js";
 export { FireworksLLM } from "./fireworks.js";
 export { Gemini, GeminiSession } from "./gemini/base.js";
-
 export {
  GEMINI_MODEL,
  type GoogleGeminiSessionOptions,
@@ -22,7 +22,7 @@ export default function withLlamaIndex(config: any) {
  config.experimental.serverComponentsExternalPackages =
    config.experimental.serverComponentsExternalPackages ?? [];
  config.experimental.serverComponentsExternalPackages.push(
-    "@xenova/transformers",
+    "@huggingface/transformers",
  );
  const userWebpack = config.webpack;
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -1,5 +1,21 @@
 # @llamaindex/node-parser

+## 0.0.8
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+
+## 0.0.7
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+
 ## 0.0.6

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/node-parser",
-  "version": "0.0.6",
+  "version": "0.0.8",
  "description": "Node parser for LlamaIndex",
  "type": "module",
  "exports": {
@@ -1,5 +1,21 @@
 # @llamaindex/anthropic

+## 0.0.16
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+
+## 0.0.15
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+
 ## 0.0.14

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/anthropic",
  "description": "Anthropic Adapter for LlamaIndex",
-  "version": "0.0.14",
+  "version": "0.0.16",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -1,5 +1,27 @@
 # @llamaindex/clip

+## 0.0.16
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+  - @llamaindex/openai@0.1.32
+
+## 0.0.15
+
+### Patch Changes
+
+- 4fc001c: chore: bump `@huggingface/transformers`
+
+  Upgrade to v3, please read https://github.com/huggingface/transformers.js/releases/tag/3.0.0 for more information.
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+  - @llamaindex/openai@0.1.31
+
 ## 0.0.14

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/clip",
  "description": "Clip Embedding Adapter for LlamaIndex",
-  "version": "0.0.14",
+  "version": "0.0.16",
  "type": "module",
  "types": "dist/index.d.ts",
  "main": "dist/index.cjs",
@@ -45,6 +45,9 @@
    "@llamaindex/core": "workspace:*",
    "@llamaindex/env": "workspace:*",
    "@llamaindex/openai": "workspace:*",
-    "@xenova/transformers": "^2.17.2"
+    "@huggingface/transformers": "^3.0.2"
+  },
+  "peerDependencies": {
+    "@huggingface/transformers": "^3.0.2"
  }
 }
@@ -1,17 +1,17 @@
 import { MultiModalEmbedding } from "@llamaindex/core/embeddings";
 import type { ImageType } from "@llamaindex/core/schema";
 // only import type, to avoid bundling error
-import { Settings } from "@llamaindex/core/global";
-import {
-  type LoadTransformerEvent,
-  loadTransformers,
-} from "@llamaindex/env/multi-model";
 import type {
  CLIPTextModelWithProjection,
  CLIPVisionModelWithProjection,
  PreTrainedTokenizer,
  Processor,
-} from "@xenova/transformers";
+} from "@huggingface/transformers";
+import { Settings } from "@llamaindex/core/global";
+import {
+  type LoadTransformerEvent,
+  loadTransformers,
+} from "@llamaindex/env/multi-model";
 import { ClipEmbeddingModelType } from "./shared";

 declare module "@llamaindex/core/global" {
@@ -1,5 +1,23 @@
 # @llamaindex/deepinfra

+## 0.0.16
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+  - @llamaindex/openai@0.1.32
+
+## 0.0.15
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+  - @llamaindex/openai@0.1.31
+
 ## 0.0.14

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/deepinfra",
  "description": "Deepinfra Adapter for LlamaIndex",
-  "version": "0.0.14",
+  "version": "0.0.16",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -1,5 +1,19 @@
 # @llamaindex/groq

+## 0.0.31
+
+### Patch Changes
+
+- @llamaindex/openai@0.1.32
+
+## 0.0.30
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/openai@0.1.31
+
 ## 0.0.29

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/groq",
  "description": "Groq Adapter for LlamaIndex",
-  "version": "0.0.29",
+  "version": "0.0.31",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -1,5 +1,27 @@
 # @llamaindex/huggingface

+## 0.0.16
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+  - @llamaindex/openai@0.1.32
+
+## 0.0.15
+
+### Patch Changes
+
+- 4fc001c: chore: bump `@huggingface/transformers`
+
+  Upgrade to v3, please read https://github.com/huggingface/transformers.js/releases/tag/3.0.0 for more information.
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+  - @llamaindex/openai@0.1.31
+
 ## 0.0.14

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/huggingface",
  "description": "Huggingface Adapter for LlamaIndex",
-  "version": "0.0.14",
+  "version": "0.0.16",
  "type": "module",
  "types": "dist/index.d.ts",
  "main": "dist/index.cjs",
@@ -46,6 +46,9 @@
    "@llamaindex/core": "workspace:*",
    "@llamaindex/env": "workspace:*",
    "@llamaindex/openai": "workspace:*",
-    "@xenova/transformers": "^2.17.2"
+    "@huggingface/transformers": "^3.0.2"
+  },
+  "peerDependencies": {
+    "@huggingface/transformers": "^3.0.2"
  }
 }
@@ -1,10 +1,10 @@
+import type { pipeline } from "@huggingface/transformers";
 import { BaseEmbedding } from "@llamaindex/core/embeddings";
 import { Settings } from "@llamaindex/core/global";
 import {
  type LoadTransformerEvent,
  loadTransformers,
 } from "@llamaindex/env/multi-model";
-import type { pipeline } from "@xenova/transformers";
 import { HuggingFaceEmbeddingModelType } from "./shared";

 declare module "@llamaindex/core/global" {
@@ -13,6 +13,11 @@ declare module "@llamaindex/core/global" {
  }
 }

+export type HuggingFaceEmbeddingParams = {
+  modelType?: string;
+  modelOptions?: Parameters<typeof pipeline<"feature-extraction">>[2];
+};
+
 /**
 * Uses feature extraction from '@xenova/transformers' to generate embeddings.
 * Per default the model [XENOVA_ALL_MINILM_L6_V2](https://huggingface.co/Xenova/all-MiniLM-L6-v2) is used.
@@ -28,15 +33,20 @@ declare module "@llamaindex/core/global" {
 */
 export class HuggingFaceEmbedding extends BaseEmbedding {
  modelType: string = HuggingFaceEmbeddingModelType.XENOVA_ALL_MINILM_L6_V2;
-  quantized: boolean = true;
+  modelOptions: Parameters<typeof pipeline<"feature-extraction">>[2] = {};

  private extractor: Awaited<
    ReturnType<typeof pipeline<"feature-extraction">>
  > | null = null;

-  constructor(init?: Partial<HuggingFaceEmbedding>) {
+  constructor(params: HuggingFaceEmbeddingParams = {}) {
    super();
-    Object.assign(this, init);
+    if (params.modelType) {
+      this.modelType = params.modelType;
+    }
+    if (params.modelOptions) {
+      this.modelOptions = params.modelOptions;
+    }
  }

  async getExtractor() {
@@ -50,9 +60,11 @@ export class HuggingFaceEmbedding extends BaseEmbedding {
          true,
        );
      });
-      this.extractor = await pipeline("feature-extraction", this.modelType, {
-        quantized: this.quantized,
-      });
+      this.extractor = await pipeline(
+        "feature-extraction",
+        this.modelType,
+        this.modelOptions,
+      );
    }
    return this.extractor;
  }
@@ -1,4 +1,7 @@
-export { HuggingFaceEmbedding } from "./embedding";
+export {
+  HuggingFaceEmbedding,
+  type HuggingFaceEmbeddingParams,
+} from "./embedding";
 export { HuggingFaceLLM, type HFLLMConfig } from "./llm";
 export {
  HuggingFaceEmbeddingModelType,
@@ -1,3 +1,8 @@
+import type {
+  PreTrainedModel,
+  PreTrainedTokenizer,
+  Tensor,
+} from "@huggingface/transformers";
 import { wrapLLMEvent } from "@llamaindex/core/decorator";
 import { Settings } from "@llamaindex/core/global";
 import "@llamaindex/core/llms";
@@ -10,11 +15,6 @@ import {
  type LLMMetadata,
 } from "@llamaindex/core/llms";
 import { loadTransformers } from "@llamaindex/env/multi-model";
-import type {
-  PreTrainedModel,
-  PreTrainedTokenizer,
-  Tensor,
-} from "@xenova/transformers";
 import { DEFAULT_PARAMS } from "./shared";

 const DEFAULT_HUGGINGFACE_MODEL = "stabilityai/stablelm-tuned-alpha-3b";
@@ -122,7 +122,10 @@ export class HuggingFaceLLM extends BaseLLM {
    // TODO: the input for model.generate should be updated when using @xenova/transformers v3
    // We should add `stopping_criteria` also when it's supported in v3
    // See: https://github.com/xenova/transformers.js/blob/3260640b192b3e06a10a1f4dc004b1254fdf1b80/src/models.js#L1248C9-L1248C27
-    const outputs = await model.generate(inputs, this.metadata);
+    const outputs = (await model.generate({
+      inputs,
+      ...this.metadata,
+    })) as Tensor;
    const outputText = tokenizer.batch_decode(outputs, {
      skip_special_tokens: false,
    });
@@ -1,5 +1,25 @@
 # @llamaindex/ollama

+## 0.0.23
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+
+## 0.0.22
+
+### Patch Changes
+
+- 4d4cd8a: feat: support ollama tool call
+
+  Note that `OllamaEmbedding` now is not the subclass of `Ollama`.
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+
 ## 0.0.21

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/ollama",
  "description": "Ollama Adapter for LlamaIndex",
-  "version": "0.0.21",
+  "version": "0.0.23",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -0,0 +1,33 @@
+import {
+  LLMAgent,
+  LLMAgentWorker,
+  type LLMAgentParams,
+} from "@llamaindex/core/agent";
+import { Settings } from "@llamaindex/core/global";
+import { Ollama } from "./llm";
+
+// This is likely not necessary anymore but leaving it here just incase it's in use elsewhere
+
+export type OllamaAgentParams = LLMAgentParams & {
+  model?: string;
+};
+
+export class OllamaAgentWorker extends LLMAgentWorker {}
+
+export class OllamaAgent extends LLMAgent {
+  constructor(params: OllamaAgentParams) {
+    const llm =
+      params.llm ??
+      (Settings.llm instanceof Ollama
+        ? (Settings.llm as Ollama)
+        : !params.model
+          ? (() => {
+              throw new Error("No model provided");
+            })()
+          : new Ollama({ model: params.model }));
+    super({
+      ...params,
+      llm,
+    });
+  }
+}
@@ -0,0 +1,29 @@
+import { BaseEmbedding } from "@llamaindex/core/embeddings";
+import { Ollama, type OllamaParams } from "./llm";
+
+export class OllamaEmbedding extends BaseEmbedding {
+  private readonly llm: Ollama;
+
+  constructor(params: OllamaParams) {
+    super();
+    this.llm = new Ollama(params);
+  }
+
+  private async getEmbedding(prompt: string): Promise<number[]> {
+    const payload = {
+      model: this.llm.model,
+      prompt,
+      options: {
+        ...this.llm.options,
+      },
+    };
+    const response = await this.llm.ollama.embeddings({
+      ...payload,
+    });
+    return response.embedding;
+  }
+
+  async getTextEmbedding(text: string): Promise<number[]> {
+    return this.getEmbedding(text);
+  }
+}
@@ -1,172 +1,7 @@
-import { BaseEmbedding } from "@llamaindex/core/embeddings";
-import type {
-  ChatResponse,
-  ChatResponseChunk,
-  CompletionResponse,
-  LLM,
-  LLMChatParamsNonStreaming,
-  LLMChatParamsStreaming,
-  LLMCompletionParamsNonStreaming,
-  LLMCompletionParamsStreaming,
-  LLMMetadata,
-} from "@llamaindex/core/llms";
-import { extractText, streamConverter } from "@llamaindex/core/utils";
-import {
-  Ollama as OllamaBase,
-  type Config,
-  type ChatResponse as OllamaChatResponse,
-  type GenerateResponse as OllamaGenerateResponse,
-  type Options,
-} from "ollama/browser";
-
-const messageAccessor = (part: OllamaChatResponse): ChatResponseChunk => {
-  return {
-    raw: part,
-    delta: part.message.content,
-  };
-};
-
-const completionAccessor = (
-  part: OllamaGenerateResponse,
-): CompletionResponse => {
-  return { text: part.response, raw: part };
-};
-
-export type OllamaParams = {
-  model: string;
-  config?: Partial<Config>;
-  options?: Partial<Options>;
-};
-
-export class Ollama extends BaseEmbedding implements LLM {
-  public readonly ollama: OllamaBase;
-
-  // https://ollama.ai/library
-  model: string;
-
-  options: Partial<Omit<Options, "num_ctx" | "top_p" | "temperature">> &
-    Pick<Options, "num_ctx" | "top_p" | "temperature"> = {
-    num_ctx: 4096,
-    top_p: 0.9,
-    temperature: 0.7,
-  };
-
-  constructor(params: OllamaParams) {
-    super();
-    this.model = params.model;
-    this.ollama = new OllamaBase(params.config);
-    if (params.options) {
-      this.options = {
-        ...this.options,
-        ...params.options,
-      };
-    }
-  }
-
-  get metadata(): LLMMetadata {
-    const { temperature, top_p, num_ctx } = this.options;
-    return {
-      model: this.model,
-      temperature: temperature,
-      topP: top_p,
-      maxTokens: this.options.num_ctx,
-      contextWindow: num_ctx,
-      tokenizer: undefined,
-    };
-  }
-
-  chat(
-    params: LLMChatParamsStreaming,
-  ): Promise<AsyncIterable<ChatResponseChunk>>;
-  chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>;
-  async chat(
-    params: LLMChatParamsNonStreaming | LLMChatParamsStreaming,
-  ): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> {
-    const { messages, stream } = params;
-    const payload = {
-      model: this.model,
-      messages: messages.map((message) => ({
-        role: message.role,
-        content: extractText(message.content),
-      })),
-      stream: !!stream,
-      options: {
-        ...this.options,
-      },
-    };
-    if (!stream) {
-      const chatResponse = await this.ollama.chat({
-        ...payload,
-        stream: false,
-      });
-
-      return {
-        message: {
-          role: "assistant",
-          content: chatResponse.message.content,
-        },
-        raw: chatResponse,
-      };
-    } else {
-      const stream = await this.ollama.chat({
-        ...payload,
-        stream: true,
-      });
-      return streamConverter(stream, messageAccessor);
-    }
-  }
-
-  complete(
-    params: LLMCompletionParamsStreaming,
-  ): Promise<AsyncIterable<CompletionResponse>>;
-  complete(
-    params: LLMCompletionParamsNonStreaming,
-  ): Promise<CompletionResponse>;
-  async complete(
-    params: LLMCompletionParamsStreaming | LLMCompletionParamsNonStreaming,
-  ): Promise<CompletionResponse | AsyncIterable<CompletionResponse>> {
-    const { prompt, stream } = params;
-    const payload = {
-      model: this.model,
-      prompt: extractText(prompt),
-      stream: !!stream,
-      options: {
-        ...this.options,
-      },
-    };
-    if (!stream) {
-      const response = await this.ollama.generate({
-        ...payload,
-        stream: false,
-      });
-      return {
-        text: response.response,
-        raw: response,
-      };
-    } else {
-      const stream = await this.ollama.generate({
-        ...payload,
-        stream: true,
-      });
-      return streamConverter(stream, completionAccessor);
-    }
-  }
-
-  private async getEmbedding(prompt: string): Promise<number[]> {
-    const payload = {
-      model: this.model,
-      prompt,
-      options: {
-        ...this.options,
-      },
-    };
-    const response = await this.ollama.embeddings({
-      ...payload,
-    });
-    return response.embedding;
-  }
-
-  async getTextEmbedding(text: string): Promise<number[]> {
-    return this.getEmbedding(text);
-  }
-}
+export {
+  OllamaAgent,
+  OllamaAgentWorker,
+  type OllamaAgentParams,
+} from "./agent";
+export { OllamaEmbedding } from "./embedding";
+export { Ollama, type OllamaParams } from "./llm";
@@ -0,0 +1,224 @@
+import { wrapLLMEvent } from "@llamaindex/core/decorator";
+import {
+  ToolCallLLM,
+  type BaseTool,
+  type ChatResponse,
+  type ChatResponseChunk,
+  type CompletionResponse,
+  type LLMChatParamsNonStreaming,
+  type LLMChatParamsStreaming,
+  type LLMCompletionParamsNonStreaming,
+  type LLMCompletionParamsStreaming,
+  type LLMMetadata,
+  type ToolCallLLMMessageOptions,
+} from "@llamaindex/core/llms";
+import { extractText, streamConverter } from "@llamaindex/core/utils";
+import { randomUUID } from "@llamaindex/env";
+import type { ChatRequest, GenerateRequest, Tool } from "ollama";
+import {
+  Ollama as OllamaBase,
+  type Config,
+  type ChatResponse as OllamaChatResponse,
+  type GenerateResponse as OllamaGenerateResponse,
+  type Options,
+} from "ollama/browser";
+
+const messageAccessor = (
+  part: OllamaChatResponse,
+): ChatResponseChunk<ToolCallLLMMessageOptions> => {
+  if (part.message.tool_calls) {
+    return {
+      raw: part,
+      delta: part.message.content,
+      options: {
+        toolCall: part.message.tool_calls.map((toolCall) => ({
+          name: toolCall.function.name,
+          input: toolCall.function.arguments,
+          id: randomUUID(),
+        })),
+      },
+    };
+  }
+  return {
+    raw: part,
+    delta: part.message.content,
+  };
+};
+
+const completionAccessor = (
+  part: OllamaGenerateResponse,
+): CompletionResponse => {
+  return { text: part.response, raw: part };
+};
+
+export type OllamaParams = {
+  model: string;
+  config?: Partial<Config>;
+  options?: Partial<Options>;
+};
+
+export class Ollama extends ToolCallLLM {
+  supportToolCall: boolean = true;
+  public readonly ollama: OllamaBase;
+
+  // https://ollama.ai/library
+  model: string;
+
+  options: Partial<Omit<Options, "num_ctx" | "top_p" | "temperature">> &
+    Pick<Options, "num_ctx" | "top_p" | "temperature"> = {
+    num_ctx: 4096,
+    top_p: 0.9,
+    temperature: 0.7,
+  };
+
+  constructor(params: OllamaParams) {
+    super();
+    this.model = params.model;
+    this.ollama = new OllamaBase(params.config);
+    if (params.options) {
+      this.options = {
+        ...this.options,
+        ...params.options,
+      };
+    }
+  }
+
+  get metadata(): LLMMetadata {
+    const { temperature, top_p, num_ctx } = this.options;
+    return {
+      model: this.model,
+      temperature: temperature,
+      topP: top_p,
+      maxTokens: this.options.num_ctx,
+      contextWindow: num_ctx,
+      tokenizer: undefined,
+    };
+  }
+
+  chat(
+    params: LLMChatParamsStreaming<ToolCallLLMMessageOptions>,
+  ): Promise<AsyncIterable<ChatResponseChunk>>;
+  chat(
+    params: LLMChatParamsNonStreaming<ToolCallLLMMessageOptions>,
+  ): Promise<ChatResponse<ToolCallLLMMessageOptions>>;
+  @wrapLLMEvent
+  async chat(
+    params:
+      | LLMChatParamsNonStreaming<object, ToolCallLLMMessageOptions>
+      | LLMChatParamsStreaming<object, ToolCallLLMMessageOptions>,
+  ): Promise<
+    ChatResponse<ToolCallLLMMessageOptions> | AsyncIterable<ChatResponseChunk>
+  > {
+    const { messages, stream, tools } = params;
+    const payload: ChatRequest = {
+      model: this.model,
+      messages: messages.map((message) => {
+        if (message.options && "toolResult" in message.options) {
+          return {
+            role: "tool",
+            content: message.options.toolResult.result,
+          };
+        }
+
+        return {
+          role: message.role,
+          content: extractText(message.content),
+        };
+      }),
+      stream: !!stream,
+      options: {
+        ...this.options,
+      },
+    };
+    if (tools) {
+      payload.tools = tools.map((tool) => Ollama.toTool(tool));
+    }
+    if (!stream) {
+      const chatResponse = await this.ollama.chat({
+        ...payload,
+        stream: false,
+      });
+      if (chatResponse.message.tool_calls) {
+        return {
+          message: {
+            role: "assistant",
+            content: chatResponse.message.content,
+            options: {
+              toolCall: chatResponse.message.tool_calls.map((toolCall) => ({
+                name: toolCall.function.name,
+                input: toolCall.function.arguments,
+                id: randomUUID(),
+              })),
+            },
+          },
+          raw: chatResponse,
+        };
+      }
+
+      return {
+        message: {
+          role: "assistant",
+          content: chatResponse.message.content,
+        },
+        raw: chatResponse,
+      };
+    } else {
+      const stream = await this.ollama.chat({
+        ...payload,
+        stream: true,
+      });
+      return streamConverter(stream, messageAccessor);
+    }
+  }
+
+  complete(
+    params: LLMCompletionParamsStreaming,
+  ): Promise<AsyncIterable<CompletionResponse>>;
+  complete(
+    params: LLMCompletionParamsNonStreaming,
+  ): Promise<CompletionResponse>;
+  async complete(
+    params: LLMCompletionParamsStreaming | LLMCompletionParamsNonStreaming,
+  ): Promise<CompletionResponse | AsyncIterable<CompletionResponse>> {
+    const { prompt, stream } = params;
+    const payload: GenerateRequest = {
+      model: this.model,
+      prompt: extractText(prompt),
+      stream: !!stream,
+      options: {
+        ...this.options,
+      },
+    };
+    if (!stream) {
+      const response = await this.ollama.generate({
+        ...payload,
+        stream: false,
+      });
+      return {
+        text: response.response,
+        raw: response,
+      };
+    } else {
+      const stream = await this.ollama.generate({
+        ...payload,
+        stream: true,
+      });
+      return streamConverter(stream, completionAccessor);
+    }
+  }
+
+  static toTool(tool: BaseTool): Tool {
+    return {
+      type: "function",
+      function: {
+        name: tool.metadata.name,
+        description: tool.metadata.description,
+        parameters: {
+          type: tool.metadata.parameters?.type,
+          required: tool.metadata.parameters?.required,
+          properties: tool.metadata.parameters?.properties,
+        },
+      },
+    };
+  }
+}
@@ -1,5 +1,21 @@
 # @llamaindex/openai

+## 0.1.32
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+
+## 0.1.31
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+
 ## 0.1.30

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/openai",
  "description": "OpenAI Adapter for LlamaIndex",
-  "version": "0.1.30",
+  "version": "0.1.32",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -1,5 +1,21 @@
 # @llamaindex/portkey-ai

+## 0.0.16
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+
+## 0.0.15
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+
 ## 0.0.14

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/portkey-ai",
  "description": "Portkey Adapter for LlamaIndex",
-  "version": "0.0.14",
+  "version": "0.0.16",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -1,5 +1,21 @@
 # @llamaindex/replicate

+## 0.0.16
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+
+## 0.0.15
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+
 ## 0.0.14

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/replicate",
  "description": "Replicate Adapter for LlamaIndex",
-  "version": "0.0.14",
+  "version": "0.0.16",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -0,0 +1,8 @@
+# @llamaindex/vllm
+
+## 0.0.2
+
+### Patch Changes
+
+- f066e50: feat: vllm support
+  - @llamaindex/openai@0.1.32
@@ -0,0 +1,38 @@
+{
+  "name": "@llamaindex/vllm",
+  "description": "vLLM Adapter for LlamaIndex",
+  "version": "0.0.2",
+  "type": "module",
+  "main": "./dist/index.cjs",
+  "module": "./dist/index.js",
+  "exports": {
+    ".": {
+      "require": {
+        "types": "./dist/index.d.cts",
+        "default": "./dist/index.cjs"
+      },
+      "import": {
+        "types": "./dist/index.d.ts",
+        "default": "./dist/index.js"
+      }
+    }
+  },
+  "files": [
+    "dist"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/run-llama/LlamaIndexTS.git",
+    "directory": "packages/providers/vllm"
+  },
+  "scripts": {
+    "build": "bunchee",
+    "dev": "bunchee --watch"
+  },
+  "devDependencies": {
+    "bunchee": "5.6.1"
+  },
+  "dependencies": {
+    "@llamaindex/openai": "workspace:*"
+  }
+}
@@ -0,0 +1 @@
+export { VLLM, type VLLMParams } from "./llm";
@@ -0,0 +1,25 @@
+/**
+ * vLLM
+ *
+ * https://docs.vllm.ai/en/latest/index.html
+ *
+ * @module
+ */
+import { OpenAI } from "@llamaindex/openai";
+
+export type VLLMParams = {
+  model: string;
+  baseURL?: string;
+};
+
+export class VLLM extends OpenAI {
+  constructor(params: VLLMParams) {
+    super({
+      additionalSessionOptions: {
+        baseURL: "http://localhost:8000/v1",
+      },
+      model: params.model,
+      apiKey: "token-abc123",
+    });
+  }
+}
@@ -0,0 +1,16 @@
+{
+  "extends": "../../../tsconfig.json",
+  "compilerOptions": {
+    "target": "ESNext",
+    "module": "ESNext",
+    "moduleResolution": "bundler",
+    "outDir": "./lib",
+    "tsBuildInfoFile": "./lib/.tsbuildinfo"
+  },
+  "include": ["./src", "package.json"],
+  "references": [
+    {
+      "path": "../openai/tsconfig.json"
+    }
+  ]
+}
@@ -1,5 +1,21 @@
 # @llamaindex/readers

+## 1.0.8
+
+### Patch Changes
+
+- Updated dependencies [d89ebe0]
+- Updated dependencies [fd8c882]
+  - @llamaindex/core@0.4.7
+
+## 1.0.7
+
+### Patch Changes
+
+- Updated dependencies [4fc001c]
+  - @llamaindex/env@0.1.20
+  - @llamaindex/core@0.4.6
+
 ## 1.0.6

 ### Patch Changes
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
github-actions[bot]	7b10882d06	Release 0.8.10 (#1466 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: himself65 <himself65@users.noreply.github.com>	2024-11-11 14:19:46 -08:00
Alex Yang	f066e50482	feat: vllm support (#1468 )	2024-11-11 13:14:08 -08:00
Alex Yang	fd8c882792	refactor: migrate example to new workflow API (#1467 )	2024-11-11 12:03:38 -08:00
Alex Yang	d89ebe0261	chore: update changeset	2024-11-11 10:11:04 -08:00
Alex Yang	968feb32cd	feat: better input type for function tool with `zod` (#1464 )	2024-11-11 10:10:03 -08:00
Alex Yang	43f6f56c5b	docs(next): fix turbo.json (#1465 )	2024-11-11 10:07:12 -08:00
github-actions[bot]	b2364dc5ba	Release 0.8.9 (#1460 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2024-11-10 23:32:41 -08:00
Alex Yang	67f4db8501	fix: steaming chat in ollama (#1463 )	2024-11-10 23:27:09 -08:00
Alex Yang	e4151a8b02	feat: support ollama agent (#1462 )	2024-11-10 22:38:40 -08:00
Alex Yang	4d4cd8ac6b	feat: support ollama tool call (#1461 )	2024-11-10 20:46:46 -08:00
Alex Yang	4fc001c8de	chore: bump `@huggingface/transformers` (#1459 )	2024-11-10 20:14:44 -08:00
Alex Yang	cf675bdc7a	chore: bump version (#1458 )	2024-11-10 16:43:45 -08:00
				`@@ -0,0 +1 @@`
				`export { VLLM, type VLLMParams } from "./llm";`