release

update mistral (#700 )
update example packages
2026-07-04 03:40:26 -04:00 · 2024-04-09 16:23:29 -07:00 · 2024-04-09 16:19:51 -07:00 · 2024-04-09 13:22:03 -07:00 · 2024-04-09 13:17:44 -07:00 · 2024-04-09 13:04:43 -07:00
141 changed files with 4482 additions and 3415 deletions
@@ -2,8 +2,32 @@ name: Run Tests

 on: [push, pull_request]

+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
+  e2e:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: pnpm/action-setup@v2
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version-file: ".nvmrc"
+          cache: "pnpm"
+      - name: Install dependencies
+        run: pnpm install
+      - name: Run E2E Tests
+        run: pnpm run e2e
+
  test:
+    strategy:
+      fail-fast: false
+      matrix:
+        node-version: [18.x, 20.x, 21.x]
+    name: Test on Node.js ${{ matrix.node-version }}
    runs-on: ubuntu-latest

    steps:
@@ -12,7 +36,7 @@ jobs:
      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
-          node-version-file: ".nvmrc"
+          node-version: ${{ matrix.node-version }}
          cache: "pnpm"
      - name: Install dependencies
        run: pnpm install
@@ -15,12 +15,12 @@
    "typecheck": "tsc"
  },
  "dependencies": {
-    "@docusaurus/core": "^3.2.0",
-    "@docusaurus/remark-plugin-npm2yarn": "^3.2.0",
+    "@docusaurus/core": "^3.2.1",
+    "@docusaurus/remark-plugin-npm2yarn": "^3.2.1",
    "@llamaindex/examples": "workspace:*",
-    "@mdx-js/react": "^3.0.0",
+    "@mdx-js/react": "^3.0.1",
    "clsx": "^2.1.0",
-    "postcss": "^8.4.33",
+    "postcss": "^8.4.38",
    "prism-react-renderer": "^2.3.1",
    "raw-loader": "^4.0.2",
    "react": "^18.2.0",
@@ -28,15 +28,15 @@
  },
  "devDependencies": {
    "@docusaurus/module-type-aliases": "3.2.0",
-    "@docusaurus/preset-classic": "^3.2.0",
-    "@docusaurus/theme-classic": "^3.2.0",
-    "@docusaurus/types": "^3.2.0",
+    "@docusaurus/preset-classic": "^3.2.1",
+    "@docusaurus/theme-classic": "^3.2.1",
+    "@docusaurus/types": "^3.2.1",
    "@tsconfig/docusaurus": "^2.0.3",
-    "@types/node": "^18.19.10",
+    "@types/node": "^18.19.31",
    "docusaurus-plugin-typedoc": "^0.22.0",
-    "typedoc": "^0.25.12",
+    "typedoc": "^0.25.13",
    "typedoc-plugin-markdown": "^3.17.1",
-    "typescript": "^5.4.3"
+    "typescript": "^5.4.4"
  },
  "browserslist": {
    "production": [
@@ -86,7 +86,6 @@ async function main() {
    const agent = new OpenAIAgent({
      tools: queryEngineTools,
      llm: new OpenAI({ model: "gpt-4" }),
-      verbose: true,
    });

    documentAgents[title] = agent;
@@ -126,7 +125,6 @@ async function main() {
  const topAgent = new OpenAIAgent({
    toolRetriever: await objectIndex.asRetriever({}),
    llm: new OpenAI({ model: "gpt-4" }),
-    verbose: true,
    prefixMessages: [
      {
        content:
@@ -145,4 +143,4 @@ async function main() {
  });
 }

-main();
+void main();
@@ -59,7 +59,6 @@ async function main() {
  // Create an OpenAIAgent with the function tools
  const agent = new OpenAIAgent({
    tools: [functionTool, functionTool2],
-    verbose: true,
  });

  // Chat with the agent
@@ -71,6 +70,6 @@ async function main() {
  console.log(String(response));
 }

-main().then(() => {
+void main().then(() => {
  console.log("Done");
 });
@@ -29,7 +29,6 @@ async function main() {
  // Create an OpenAIAgent with the function tools
  const agent = new OpenAIAgent({
    tools: [queryEngineTool],
-    verbose: true,
  });

  // Chat with the agent
@@ -41,6 +40,6 @@ async function main() {
  console.log(String(response));
 }

-main().then(() => {
+void main().then(() => {
  console.log("Done");
 });
@@ -65,7 +65,6 @@ async function main() {
  const agent = new ReActAgent({
    llm: anthropic,
    tools: [functionTool, functionTool2],
-    verbose: true,
  });

  // Chat with the agent
@@ -77,6 +76,6 @@ async function main() {
  console.log(String(response));
 }

-main().then(() => {
+void main().then(() => {
  console.log("Done");
 });
@@ -59,7 +59,6 @@ async function main() {
  // Create an OpenAIAgent with the function tools
  const agent = new OpenAIAgent({
    tools: [functionTool, functionTool2],
-    verbose: true,
  });

  // Create a task to sum and divide numbers
@@ -90,6 +89,6 @@ async function main() {
  }
 }

-main().then(() => {
+void main().then(() => {
  console.log("Done");
 });
@@ -29,7 +29,6 @@ async function main() {
  // Create an OpenAIAgent with the function tools
  const agent = new OpenAIAgent({
    tools: [queryEngineTool],
-    verbose: true,
  });

  const task = agent.createTask("What was his salary?");
@@ -59,6 +58,6 @@ async function main() {
  }
 }

-main().then(() => {
+void main().then(() => {
  console.log("Done");
 });
@@ -59,7 +59,6 @@ async function main() {
  // Create an OpenAIAgent with the function tools
  const agent = new ReActAgent({
    tools: [functionTool, functionTool2],
-    verbose: true,
  });

  const task = agent.createTask("Divide 16 by 2 then add 20");
@@ -85,6 +84,6 @@ async function main() {
  }
 }

-main().then(() => {
+void main().then(() => {
  console.log("Done");
 });
@@ -59,7 +59,6 @@ async function main() {
  // Create an OpenAIAgent with the function tools
  const agent = new OpenAIAgent({
    tools: [functionTool, functionTool2],
-    verbose: false,
  });

  const stream = await agent.chat({
@@ -72,6 +71,6 @@ async function main() {
  }
 }

-main().then(() => {
+void main().then(() => {
  console.log("\nDone");
 });
@@ -0,0 +1,27 @@
+import { OpenAI, OpenAIAgent, WikipediaTool } from "llamaindex";
+
+async function main() {
+  const llm = new OpenAI({ model: "gpt-4-turbo" });
+  const wikiTool = new WikipediaTool();
+
+  // Create an OpenAIAgent with the Wikipedia tool
+  const agent = new OpenAIAgent({
+    llm,
+    tools: [wikiTool],
+  });
+
+  // Chat with the agent
+  const response = await agent.chat({
+    message: "Who was Goethe?",
+    stream: true,
+  });
+
+  for await (const chunk of response.response) {
+    process.stdout.write(chunk.response);
+  }
+}
+
+(async function () {
+  await main();
+  console.log("\nDone");
+})();
@@ -1,23 +0,0 @@
-import { OpenAIAgent, WikipediaTool } from "llamaindex";
-
-async function main() {
-  const wikipediaTool = new WikipediaTool();
-
-  // Create an OpenAIAgent with the function tools
-  const agent = new OpenAIAgent({
-    tools: [wikipediaTool],
-    verbose: true,
-  });
-
-  // Chat with the agent
-  const response = await agent.chat({
-    message: "Where is Ho Chi Minh City?",
-  });
-
-  // Print the response
-  console.log(response);
-}
-
-main().then(() => {
-  console.log("Done");
-});
@@ -55,4 +55,4 @@ async function main() {
  }
 }

-main();
+void main();
@@ -27,4 +27,4 @@ async function main() {
  }
 }

-main();
+void main();
@@ -23,4 +23,4 @@ async function main() {
  }
 }

-main();
+void main();
@@ -1,7 +1,18 @@
 import { stdin as input, stdout as output } from "node:process";
 import readline from "node:readline/promises";

-import { OpenAI, SimpleChatEngine, SummaryChatHistory } from "llamaindex";
+import {
+  OpenAI,
+  Settings,
+  SimpleChatEngine,
+  SummaryChatHistory,
+} from "llamaindex";
+
+if (process.env.NODE_ENV === "development") {
+  Settings.callbackManager.on("llm-end", (event) => {
+    console.log("callers chain", event.reason?.computedCallers);
+  });
+}

 async function main() {
  // Set maxTokens to 75% of the context window size of 4096
@@ -54,4 +54,4 @@ async function main() {
  }
 }

-main();
+void main();
@@ -37,4 +37,4 @@ async function main() {
  }
 }

-main();
+void main();
@@ -0,0 +1,44 @@
+import fs from "node:fs/promises";
+
+import { stdin as input, stdout as output } from "node:process";
+
+import readline from "node:readline/promises";
+
+import { Document, LlamaCloudIndex } from "llamaindex";
+
+async function main() {
+  const path = "node_modules/llamaindex/examples/abramov.txt";
+
+  const essay = await fs.readFile(path, "utf-8");
+
+  // Create Document object with essay
+  const document = new Document({ text: essay, id_: path });
+
+  const index = await LlamaCloudIndex.fromDocuments({
+    documents: [document],
+    name: "test",
+    projectName: "default",
+    apiKey: process.env.LLAMA_CLOUD_API_KEY,
+    baseUrl: process.env.LLAMA_CLOUD_BASE_URL,
+  });
+
+  const queryEngine = index.asQueryEngine({
+    denseSimilarityTopK: 5,
+  });
+
+  const rl = readline.createInterface({ input, output });
+
+  while (true) {
+    const query = await rl.question("Query: ");
+    const stream = await queryEngine.query({
+      query,
+      stream: true,
+    });
+    console.log();
+    for await (const chunk of stream) {
+      process.stdout.write(chunk.response);
+    }
+  }
+}
+
+main().catch(console.error);
@@ -22,4 +22,4 @@ However, general relativity, published in 1915, extended these ideas to include
  console.log(result);
 }

-main();
+void main();
@@ -36,4 +36,4 @@ async function main() {
  console.log(result);
 }

-main();
+void main();
@@ -37,4 +37,4 @@ async function main() {
  console.log(result);
 }

-main();
+void main();
@@ -36,9 +36,7 @@ async function main() {
    ],
  });

-  const json = JSON.parse(response.message.content);
-
-  console.log(json);
+  console.log(response.message.content);
 }

 main().catch(console.error);
@@ -23,4 +23,4 @@ async function main() {
  }
 }

-main();
+void main();
@@ -22,4 +22,4 @@ async function main() {
  }
 }

-main();
+void main();
@@ -61,4 +61,4 @@ async function main() {
  }
 }

-main();
+void main();
@@ -31,4 +31,4 @@ async function importJsonToMongo() {
 }

 // Run the import function
-importJsonToMongo();
+void importJsonToMongo();
@@ -27,4 +27,4 @@ async function query() {
  await client.close();
 }

-query();
+void query();
@@ -30,4 +30,4 @@ async function main() {
  console.log(`Similarity between "${text2}" and the image is ${sim2}`);
 }

-main();
+void main();
@@ -13,7 +13,7 @@ Settings.chunkSize = 512;
 Settings.chunkOverlap = 20;

 // Update llm
-Settings.llm = new OpenAI({ model: "gpt-4-vision-preview", maxTokens: 512 });
+Settings.llm = new OpenAI({ model: "gpt-4-turbo", maxTokens: 512 });

 // Update callbackManager
 Settings.callbackManager = new CallbackManager({
@@ -21,4 +21,4 @@ Sub-header content
  console.log(splits);
 }

-main();
+void main();
@@ -10,16 +10,16 @@
    "@zilliz/milvus2-sdk-node": "^2.3.5",
    "chromadb": "^1.8.1",
    "commander": "^11.1.0",
-    "dotenv": "^16.4.1",
+    "dotenv": "^16.4.5",
    "js-tiktoken": "^1.0.10",
-    "llamaindex": "latest",
-    "mongodb": "^6.2.0",
+    "llamaindex": "workspace:latest",
+    "mongodb": "^6.5.0",
    "pathe": "^1.1.2"
  },
  "devDependencies": {
-    "@types/node": "^18.19.10",
+    "@types/node": "^18.19.31",
    "ts-node": "^10.9.2",
-    "typescript": "^5.4.3"
+    "typescript": "^5.4.4"
  },
  "scripts": {
    "lint": "eslint ."
@@ -32,7 +32,7 @@ async function main(args: any) {
  console.log(`Found ${count} files`);

  console.log(`Importing contents from ${count} files in ${sourceDir}`);
-  var fileName = "";
+  const fileName = "";
  try {
    // Passing callback fn to the ctor here
    // will enable looging to console.
@@ -42,7 +42,7 @@ async function main(args: any) {

    const pgvs = new PGVectorStore();
    pgvs.setCollection(sourceDir);
-    pgvs.clearCollection();
+    await pgvs.clearCollection();

    const ctx = await storageContextFromDefaults({ vectorStore: pgvs });

@@ -65,4 +65,4 @@ async function main(args: any) {
  process.exit(0);
 }

-main(process.argv).catch((err) => console.error(err));
+void main(process.argv).catch((err) => console.error(err));
@@ -32,7 +32,7 @@ async function main(args: any) {
  console.log(`Found ${count} files`);

  console.log(`Importing contents from ${count} files in ${sourceDir}`);
-  var fileName = "";
+  const fileName = "";
  try {
    // Passing callback fn to the ctor here
    // will enable looging to console.
@@ -63,4 +63,4 @@ async function main(args: any) {
  process.exit(0);
 }

-main(process.argv).catch((err) => console.error(err));
+void main(process.argv).catch((err) => console.error(err));
@@ -45,4 +45,4 @@ async function main() {
  await queryEngine.query({ query });
 }

-main();
+void main();
@@ -79,4 +79,4 @@ async function main() {
  }
 }

-main();
+void main();
@@ -20,4 +20,4 @@ async function main() {
  console.log(`Test query > ${SAMPLE_QUERY}:\n`, response.toString());
 }

-main();
+void main();
@@ -20,4 +20,4 @@ async function main() {
  console.log(`Test query > ${SAMPLE_QUERY}:\n`, response.toString());
 }

-main();
+void main();
@@ -1,6 +1,7 @@
 import { encodingForModel } from "js-tiktoken";
 import { OpenAI } from "llamaindex";
 import { Settings } from "llamaindex/Settings";
+import { extractText } from "llamaindex/llm/utils";

 const encoding = encodingForModel("gpt-4-0125-preview");

@@ -13,7 +14,7 @@ let tokenCount = 0;
 Settings.callbackManager.on("llm-start", (event) => {
  const { messages } = event.detail.payload;
  tokenCount += messages.reduce((count, message) => {
-    return count + encoding.encode(message.content).length;
+    return count + encoding.encode(extractText(message.content)).length;
  }, 0);
  console.log("Token count:", tokenCount);
  // https://openai.com/pricing
@@ -22,7 +23,7 @@ Settings.callbackManager.on("llm-start", (event) => {
 });
 Settings.callbackManager.on("llm-end", (event) => {
  const { response } = event.detail.payload;
-  tokenCount += encoding.encode(response.message.content).length;
+  tokenCount += encoding.encode(extractText(response.message.content)).length;
  console.log("Token count:", tokenCount);
  // https://openai.com/pricing
  // $30.00 / 1M tokens
@@ -31,7 +32,7 @@ Settings.callbackManager.on("llm-end", (event) => {

 const question = "Hello, how are you?";
 console.log("Question:", question);
-llm
+void llm
  .chat({
    stream: true,
    messages: [
@@ -65,4 +65,4 @@ async function main() {
  });
 }

-main().then(() => console.log("Done"));
+void main().then(() => console.log("Done"));
@@ -13,4 +13,4 @@ async function main() {
  console.log(chunks);
 }

-main();
+void main();
@@ -0,0 +1,47 @@
+import { ChatResponseChunk, OpenAI } from "llamaindex";
+
+async function main() {
+  const llm = new OpenAI({ model: "gpt-4-turbo" });
+
+  const args: Parameters<typeof llm.chat>[0] = {
+    additionalChatOptions: {
+      tool_choice: "auto",
+    },
+    messages: [
+      {
+        content: "Who was Goethe?",
+        role: "user",
+      },
+    ],
+    tools: [
+      {
+        metadata: {
+          name: "wikipedia_tool",
+          description: "A tool that uses a query engine to search Wikipedia.",
+          parameters: {
+            type: "object",
+            properties: {
+              query: {
+                type: "string",
+                description: "The query to search for",
+              },
+            },
+            required: ["query"],
+          },
+        },
+      },
+    ],
+  };
+
+  const stream = await llm.chat({ ...args, stream: true });
+  let chunk: ChatResponseChunk | null = null;
+  for await (chunk of stream) {
+    process.stdout.write(chunk.delta);
+  }
+  console.log(chunk?.additionalKwargs?.toolCalls[0]);
+}
+
+(async function () {
+  await main();
+  console.log("Done");
+})();
@@ -1,7 +1,7 @@
 import { OpenAI } from "llamaindex";

 (async () => {
-  const llm = new OpenAI({ model: "gpt-4-vision-preview", temperature: 0.1 });
+  const llm = new OpenAI({ model: "gpt-4-turbo", temperature: 0.1 });

  // complete api
  const response1 = await llm.complete({ prompt: "How are you?" });
@@ -9,27 +9,27 @@
    "format:write": "prettier --ignore-unknown --write .",
    "lint": "turbo run lint",
    "prepare": "husky",
+    "e2e": "turbo run e2e",
    "test": "turbo run test",
    "type-check": "tsc -b --diagnostics",
    "release": "pnpm run check-minor-version && pnpm run build:release && changeset publish",
    "release-snapshot": "pnpm run check-minor-version && pnpm run build:release && changeset publish --tag snapshot",
    "check-minor-version": "node ./scripts/check-minor-version",
-    "update-version": "node ./scripts/update-version",
-    "new-version": "pnpm run build:release && changeset version && pnpm run check-minor-version && pnpm run update-version",
-    "new-snapshot": "pnpm run build:release && changeset version --snapshot && pnpm run update-version"
+    "new-version": "pnpm run build:release && changeset version && pnpm run check-minor-version",
+    "new-snapshot": "pnpm run build:release && changeset version --snapshot"
  },
  "devDependencies": {
    "@changesets/cli": "^2.27.1",
-    "eslint": "^8.56.0",
+    "eslint": "^8.57.0",
    "eslint-config-custom": "workspace:*",
-    "husky": "^9.0.10",
+    "husky": "^9.0.11",
    "lint-staged": "^15.2.2",
    "prettier": "^3.2.5",
    "prettier-plugin-organize-imports": "^3.2.4",
-    "turbo": "^1.12.3",
-    "typescript": "^5.4.3"
+    "turbo": "^1.13.2",
+    "typescript": "^5.4.4"
  },
-  "packageManager": "pnpm@8.15.1",
+  "packageManager": "pnpm@8.15.6+sha256.01c01eeb990e379b31ef19c03e9d06a14afa5250b82e81303f88721c99ff2e6f",
  "pnpm": {
    "overrides": {
      "trim": "1.0.1",
@@ -1,5 +1,20 @@
 # llamaindex

+## 0.2.4
+
+### Patch Changes
+
+- 3bc77f7: gpt-4-turbo GA
+- 8d2b21e: Mistral 0.1.3
+
+## 0.2.3
+
+### Patch Changes
+
+- f0704ec: Support streaming for OpenAI agent (and OpenAI tool calls)
+- Removed 'parentEvent' - Use 'event.reason?.computedCallers' instead
+- 3cbfa98: Added LlamaCloudIndex.fromDocuments
+
 ## 0.2.2

 ### Patch Changes
@@ -0,0 +1 @@
+logs
@@ -0,0 +1,38 @@
+# LlamaIndexTS Core E2E Tests
+
+## Overview
+
+We are using Node.js Test Runner to run E2E tests for LlamaIndexTS Core.
+
+It supports the following features:
+
+- Run tests in parallel
+- Pure Node.js Environment
+- Switch between mock and real LLM API
+- Customizable logics
+
+## Usage
+
+- Run with mock register:
+
+```shell
+node --import tsx --import ./mock-register.js --test ./node/basic.e2e.ts
+```
+
+- Run without mock register:
+
+```shell
+node --import tsx --test ./node/basic.e2e.ts
+```
+
+- Run with specific test:
+
+```shell
+node --import tsx --import ./mock-register.js --test-name-pattern=agent --test ./node/basic.e2e.ts
+```
+
+- Run with debug logs:
+
+```shell
+CONSOLA_LEVEL=5 node --import tsx --import ./mock-register.js --test-name-pattern=agent --test ./node/basic.e2e.ts
+```
@@ -0,0 +1,68 @@
+import { faker } from "@faker-js/faker";
+import type {
+  ChatResponse,
+  ChatResponseChunk,
+  CompletionResponse,
+  LLM,
+  LLMChatParamsNonStreaming,
+  LLMChatParamsStreaming,
+  LLMCompletionParamsNonStreaming,
+  LLMCompletionParamsStreaming,
+} from "llamaindex/llm/types";
+
+export function getOpenAISession() {
+  return {};
+}
+
+export function isFunctionCallingModel() {
+  return true;
+}
+
+export class OpenAI implements LLM {
+  get metadata() {
+    return {
+      model: "mock-model",
+      temperature: 0.1,
+      topP: 1,
+      contextWindow: 2048,
+      tokenizer: undefined,
+      isFunctionCallingModel: true,
+    };
+  }
+  chat(
+    params: LLMChatParamsStreaming<Record<string, unknown>>,
+  ): Promise<AsyncIterable<ChatResponseChunk>>;
+  chat(
+    params: LLMChatParamsNonStreaming<Record<string, unknown>>,
+  ): Promise<ChatResponse>;
+  chat(
+    params:
+      | LLMChatParamsStreaming<Record<string, unknown>>
+      | LLMChatParamsNonStreaming<Record<string, unknown>>,
+  ): unknown {
+    if (params.stream) {
+      return {
+        [Symbol.asyncIterator]: async function* () {
+          yield {
+            delta: faker.word.words(),
+          } satisfies ChatResponseChunk;
+        },
+      };
+    }
+    return {
+      message: {
+        content: faker.lorem.paragraph(),
+        role: "assistant",
+      },
+    } satisfies ChatResponse;
+  }
+  complete(
+    params: LLMCompletionParamsStreaming,
+  ): Promise<AsyncIterable<CompletionResponse>>;
+  complete(
+    params: LLMCompletionParamsNonStreaming,
+  ): Promise<CompletionResponse>;
+  async complete(params: unknown): Promise<unknown> {
+    throw new Error("Method not implemented.");
+  }
+}
@@ -0,0 +1,36 @@
+/**
+ * This script will replace the resolved module with the corresponding fixture file.
+ */
+import { stat } from "node:fs/promises";
+import { join, relative } from "node:path";
+import { fileURLToPath, pathToFileURL } from "node:url";
+const packageDistDir = fileURLToPath(new URL("../dist", import.meta.url));
+const fixturesDir = fileURLToPath(new URL("./fixtures", import.meta.url));
+
+export async function resolve(specifier, context, nextResolve) {
+  const result = await nextResolve(specifier, context);
+  if (result.format === "builtin" || result.url.startsWith("node:")) {
+    return result;
+  }
+  const targetUrl = fileURLToPath(result.url).replace(/\.js$/, ".ts");
+  const relativePath = relative(packageDistDir, targetUrl);
+  if (relativePath.startsWith(".") || relativePath.startsWith("/")) {
+    return result;
+  }
+  const url = pathToFileURL(join(fixturesDir, relativePath)).toString();
+  const exist = await stat(fileURLToPath(url))
+    .then((stat) => stat.isFile())
+    .catch((err) => {
+      if (err.code === "ENOENT") {
+        return false;
+      }
+      throw err;
+    });
+  if (!exist) {
+    return result;
+  }
+  return {
+    url,
+    format: "module",
+  };
+}
@@ -0,0 +1,3 @@
+import { register } from "node:module";
+
+register("./mock-module.js", import.meta.url);
@@ -0,0 +1,139 @@
+/* eslint-disable @typescript-eslint/no-floating-promises */
+import { consola } from "consola";
+import {
+  OpenAI,
+  OpenAIAgent,
+  Settings,
+  type LLM,
+  type LLMEndEvent,
+  type LLMStartEvent,
+} from "llamaindex";
+import { ok } from "node:assert";
+import type { WriteStream } from "node:fs";
+import { createWriteStream } from "node:fs";
+import { mkdir } from "node:fs/promises";
+import { join } from "node:path";
+import { after, before, beforeEach, describe, test } from "node:test";
+import { inspect } from "node:util";
+
+let llm: LLM;
+let fsStream: WriteStream;
+before(async () => {
+  const logUrl = new URL(
+    join(
+      "..",
+      "logs",
+      `basic.e2e.${new Date().toISOString().replace(/:/g, "-").replace(/\./g, "-")}.log`,
+    ),
+    import.meta.url,
+  );
+  await mkdir(new URL(".", logUrl), { recursive: true });
+  fsStream = createWriteStream(logUrl, {
+    encoding: "utf-8",
+  });
+});
+
+after(() => {
+  fsStream.end();
+});
+
+beforeEach((s) => {
+  fsStream.write("start: " + s.name + "\n");
+});
+
+const llmEventStartHandler = (event: LLMStartEvent) => {
+  const { payload } = event.detail;
+  fsStream.write(
+    "llmEventStart: " +
+      inspect(payload, {
+        depth: Infinity,
+      }) +
+      "\n",
+  );
+};
+
+const llmEventEndHandler = (event: LLMEndEvent) => {
+  const { payload } = event.detail;
+  fsStream.write(
+    "llmEventEnd: " +
+      inspect(payload, {
+        depth: Infinity,
+      }) +
+      "\n",
+  );
+};
+
+before(() => {
+  Settings.llm = new OpenAI({
+    model: "gpt-3.5-turbo",
+  });
+  llm = Settings.llm;
+  Settings.callbackManager.on("llm-start", llmEventStartHandler);
+  Settings.callbackManager.on("llm-end", llmEventEndHandler);
+});
+
+after(() => {
+  Settings.callbackManager.off("llm-start", llmEventStartHandler);
+  Settings.callbackManager.off("llm-end", llmEventEndHandler);
+});
+
+describe("llm", () => {
+  test("llm.chat", async () => {
+    const response = await llm.chat({
+      messages: [
+        {
+          content: "Hello",
+          role: "user",
+        },
+      ],
+    });
+    consola.debug("response:", response);
+    ok(typeof response.message.content === "string");
+  });
+
+  test("stream llm.chat", async () => {
+    const iter = await llm.chat({
+      stream: true,
+      messages: [
+        {
+          content: "hello",
+          role: "user",
+        },
+      ],
+    });
+    for await (const chunk of iter) {
+      consola.debug("chunk:", chunk);
+      ok(typeof chunk.delta === "string");
+    }
+  });
+});
+
+describe("agent", () => {
+  test("agent.chat", async () => {
+    const agent = new OpenAIAgent({
+      tools: [
+        {
+          call: async () => {
+            return "35 degrees and sunny in San Francisco";
+          },
+          metadata: {
+            name: "Weather",
+            description: "Get the weather",
+            parameters: {
+              type: "object",
+              properties: {
+                location: { type: "string" },
+              },
+              required: ["location"],
+            },
+          },
+        },
+      ],
+    });
+    const result = await agent.chat({
+      message: "What is the weather in San Francisco?",
+    });
+    consola.debug("response:", result.response);
+    ok(typeof result.response === "string");
+  });
+});
@@ -0,0 +1,16 @@
+{
+  "name": "@llamaindex/core-e2e",
+  "private": true,
+  "version": "0.0.2",
+  "type": "module",
+  "scripts": {
+    "e2e": "node --import tsx --import ./mock-register.js --test ./node/*.e2e.ts",
+    "e2e:nomock": "node --import tsx --test ./node/*.e2e.ts"
+  },
+  "devDependencies": {
+    "@faker-js/faker": "^8.4.1",
+    "consola": "^3.2.3",
+    "llamaindex": "workspace:*",
+    "tsx": "^4.7.2"
+  }
+}
@@ -0,0 +1,23 @@
+{
+  "extends": "../../../tsconfig.json",
+  "compilerOptions": {
+    "outDir": "./lib",
+    "module": "node16",
+    "moduleResolution": "node16",
+    "target": "ESNext"
+  },
+  "include": [
+    "./**/*.ts",
+    "./mock-module.js",
+    "./mock-register.js",
+    "./fixtures"
+  ],
+  "references": [
+    {
+      "path": "../../core/tsconfig.json"
+    },
+    {
+      "path": "../../env/tsconfig.json"
+    }
+  ]
+}
@@ -1,8 +1,8 @@
 {
  "name": "@llamaindex/core",
-  "version": "0.1.21",
+  "version": "0.2.3",
  "exports": "./src/index.ts",
  "imports": {
-    "@llamaindex/env": "jsr:@llamaindex/env@0.0.5"
+    "@llamaindex/env": "jsr:@llamaindex/env@0.0.6"
  }
 }
@@ -1,6 +1,6 @@
 {
  "name": "llamaindex",
-  "version": "0.2.2",
+  "version": "0.2.4",
  "expectedMinorVersion": "2",
  "license": "MIT",
  "type": "module",
@@ -8,49 +8,49 @@
    "@anthropic-ai/sdk": "^0.18.0",
    "@aws-crypto/sha256-js": "^5.2.0",
    "@datastax/astra-db-ts": "^0.1.4",
-    "@grpc/grpc-js": "^1.10.2",
-    "@llamaindex/cloud": "0.0.4",
+    "@grpc/grpc-js": "^1.10.6",
+    "@llamaindex/cloud": "0.0.5",
    "@llamaindex/env": "workspace:*",
-    "@mistralai/mistralai": "^0.0.10",
+    "@mistralai/mistralai": "^0.1.3",
    "@notionhq/client": "^2.2.14",
-    "@pinecone-database/pinecone": "^2.0.1",
-    "@qdrant/js-client-rest": "^1.7.0",
-    "@types/lodash": "^4.14.202",
-    "@types/node": "^18.19.14",
+    "@pinecone-database/pinecone": "^2.2.0",
+    "@qdrant/js-client-rest": "^1.8.2",
+    "@types/lodash": "^4.17.0",
+    "@types/node": "^18.19.31",
    "@types/papaparse": "^5.3.14",
-    "@types/pg": "^8.11.0",
-    "@xenova/transformers": "^2.15.0",
+    "@types/pg": "^8.11.5",
+    "@xenova/transformers": "^2.16.1",
    "@zilliz/milvus2-sdk-node": "^2.3.5",
-    "assemblyai": "^4.2.2",
+    "assemblyai": "^4.3.4",
    "chromadb": "~1.7.3",
-    "cohere-ai": "^7.7.5",
+    "cohere-ai": "^7.9.2",
    "js-tiktoken": "^1.0.10",
    "lodash": "^4.17.21",
    "magic-bytes.js": "^1.10.0",
-    "mammoth": "^1.6.0",
+    "mammoth": "^1.7.1",
    "md-utils-ts": "^2.0.0",
-    "mongodb": "^6.3.0",
+    "mongodb": "^6.5.0",
    "notion-md-crawler": "^0.0.2",
-    "openai": "^4.26.1",
+    "openai": "^4.33.0",
    "papaparse": "^5.4.1",
    "pathe": "^1.1.2",
    "pdf2json": "^3.0.5",
-    "pg": "^8.11.3",
-    "pgvector": "^0.1.7",
+    "pg": "^8.11.5",
+    "pgvector": "^0.1.8",
    "portkey-ai": "^0.1.16",
    "rake-modified": "^1.0.8",
    "replicate": "^0.25.2",
-    "string-strip-html": "^13.4.6",
-    "wink-nlp": "^1.14.3",
-    "wikipedia": "^2.1.2"
+    "string-strip-html": "^13.4.8",
+    "wikipedia": "^2.1.2",
+    "wink-nlp": "^1.14.3"
  },
  "devDependencies": {
-    "@swc/cli": "^0.3.9",
-    "@swc/core": "^1.4.2",
+    "@swc/cli": "^0.3.12",
+    "@swc/core": "^1.4.13",
    "concurrently": "^8.2.2",
-    "glob": "^10.3.10",
+    "glob": "^10.3.12",
    "madge": "^6.1.0",
-    "typescript": "^5.3.3"
+    "typescript": "^5.4.4"
  },
  "engines": {
    "node": ">=18.0.0"
@@ -1,8 +1,9 @@
 import { globalsHelper } from "./GlobalsHelper.js";
 import type { SummaryPrompt } from "./Prompt.js";
 import { defaultSummaryPrompt, messagesToHistoryStr } from "./Prompt.js";
-import { OpenAI } from "./llm/LLM.js";
+import { OpenAI } from "./llm/open_ai.js";
 import type { ChatMessage, LLM, MessageType } from "./llm/types.js";
+import { extractText } from "./llm/utils.js";

 /**
 * A ChatHistory is used to keep the state of back and forth chat messages
@@ -188,7 +189,8 @@ export class SummaryChatHistory extends ChatHistory {

    // get tokens of current request messages and the transient messages
    const tokens = requestMessages.reduce(
-      (count, message) => count + this.tokenizer(message.content).length,
+      (count, message) =>
+        count + this.tokenizer(extractText(message.content)).length,
      0,
    );
    if (tokens > this.tokensToSummarize) {
@@ -1,12 +1,5 @@
 import { encodingForModel } from "js-tiktoken";

-import { randomUUID } from "@llamaindex/env";
-import type {
-  Event,
-  EventTag,
-  EventType,
-} from "./callbacks/CallbackManager.js";
-
 export enum Tokenizers {
  CL100K_BASE = "cl100k_base",
 }
@@ -51,39 +44,6 @@ class GlobalsHelper {

    return this.defaultTokenizer!.decode.bind(this.defaultTokenizer);
  }
-
-  /**
-   * @deprecated createEvent will be removed in the future,
-   *  please use `new CustomEvent(eventType, { detail: payload })` instead.
-   *
-   *  Also, `parentEvent` will not be used in the future,
-   *    use `AsyncLocalStorage` to track parent events instead.
-   *    @example - Usage of `AsyncLocalStorage`:
-   *    let id = 0;
-   *    const asyncLocalStorage = new AsyncLocalStorage<number>();
-   *    asyncLocalStorage.run(++id, async () => {
-   *      setTimeout(() => {
-   *        console.log('parent event id:', asyncLocalStorage.getStore()); // 1
-   *      }, 1000)
-   *    });
-   */
-  createEvent({
-    parentEvent,
-    type,
-    tags,
-  }: {
-    parentEvent?: Event;
-    type: EventType;
-    tags?: EventTag[];
-  }): Event {
-    return {
-      id: randomUUID(),
-      type,
-      // inherit parent tags if tags not set
-      tags: tags || parentEvent?.tags,
-      parentId: parentEvent?.id,
-    };
-  }
 }

 export const globalsHelper = new GlobalsHelper();
@@ -5,7 +5,7 @@ import type {
  BaseQuestionGenerator,
  SubQuestion,
 } from "./engines/query/types.js";
-import { OpenAI } from "./llm/LLM.js";
+import { OpenAI } from "./llm/open_ai.js";
 import type { LLM } from "./llm/types.js";
 import { PromptMixin } from "./prompts/index.js";
 import type {
@@ -1,13 +1,8 @@
-import type { Event } from "./callbacks/CallbackManager.js";
 import type { NodeWithScore } from "./Node.js";
 import type { ServiceContext } from "./ServiceContext.js";

 export type RetrieveParams = {
  query: string;
-  /**
-   * @deprecated will be removed in the next major version
-   */
-  parentEvent?: Event;
  preFilters?: unknown;
 };

@@ -1,7 +1,7 @@
 import { PromptHelper } from "./PromptHelper.js";
 import { OpenAIEmbedding } from "./embeddings/OpenAIEmbedding.js";
 import type { BaseEmbedding } from "./embeddings/types.js";
-import { OpenAI } from "./llm/LLM.js";
+import { OpenAI } from "./llm/open_ai.js";
 import type { LLM } from "./llm/types.js";
 import { SimpleNodeParser } from "./nodeParsers/SimpleNodeParser.js";
 import type { NodeParser } from "./nodeParsers/types.js";
@@ -1,11 +1,11 @@
 import { CallbackManager } from "./callbacks/CallbackManager.js";
 import { OpenAIEmbedding } from "./embeddings/OpenAIEmbedding.js";
-import { OpenAI } from "./llm/LLM.js";
+import { OpenAI } from "./llm/open_ai.js";

 import { PromptHelper } from "./PromptHelper.js";
 import { SimpleNodeParser } from "./nodeParsers/SimpleNodeParser.js";

-import { AsyncLocalStorage } from "@llamaindex/env";
+import { AsyncLocalStorage, getEnv } from "@llamaindex/env";
 import type { ServiceContext } from "./ServiceContext.js";
 import type { BaseEmbedding } from "./embeddings/types.js";
 import {
@@ -52,6 +52,15 @@ class GlobalSettings implements Config {
  #chunkOverlapAsyncLocalStorage = new AsyncLocalStorage<number>();
  #promptAsyncLocalStorage = new AsyncLocalStorage<PromptConfig>();

+  get debug() {
+    const debug = getEnv("DEBUG");
+    return (
+      getEnv("NODE_ENV") === "development" &&
+      Boolean(debug) &&
+      debug?.includes("llamaindex")
+    );
+  }
+
  get llm(): LLM {
    if (this.#llm === null) {
      this.#llm = new OpenAI();
@@ -66,8 +66,9 @@ export const defaultParagraphSeparator = EOL + EOL + EOL;
 * One of the advantages of SentenceSplitter is that even in the fixed length chunks it will try to keep sentences together.
 */
 export class SentenceSplitter {
-  private chunkSize: number;
-  private chunkOverlap: number;
+  public chunkSize: number;
+  public chunkOverlap: number;
+
  private tokenizer: any;
  private tokenizerDecoder: any;
  private paragraphSeparator: string;
@@ -1,3 +1,4 @@
+import { Settings } from "../../Settings.js";
 import type { ChatMessage } from "../../llm/index.js";
 import { OpenAI } from "../../llm/index.js";
 import type { ObjectRetriever } from "../../objects/base.js";
@@ -10,7 +11,6 @@ type OpenAIAgentParams = {
  llm?: OpenAI;
  memory?: any;
  prefixMessages?: ChatMessage[];
-  verbose?: boolean;
  maxFunctionCalls?: number;
  defaultToolChoice?: string;
  toolRetriever?: ObjectRetriever;
@@ -28,13 +28,19 @@ export class OpenAIAgent extends AgentRunner {
    llm,
    memory,
    prefixMessages,
-    verbose,
    maxFunctionCalls = 5,
    defaultToolChoice = "auto",
    toolRetriever,
    systemPrompt,
  }: OpenAIAgentParams) {
-    llm = llm ?? new OpenAI({ model: "gpt-3.5-turbo-0613" });
+    if (!llm) {
+      if (Settings.llm instanceof OpenAI) {
+        llm = Settings.llm;
+      } else {
+        console.warn("No OpenAI model provided, creating a new one");
+        llm = new OpenAI({ model: "gpt-3.5-turbo-0613" });
+      }
+    }

    if (systemPrompt) {
      if (prefixMessages) {
@@ -59,11 +65,11 @@ export class OpenAIAgent extends AgentRunner {
      prefixMessages,
      maxFunctionCalls,
      toolRetriever,
-      verbose,
    });

    super({
      agentWorker: stepEngine,
+      llm,
      memory,
      defaultToolChoice,
      chatHistory: prefixMessages,
@@ -1,27 +0,0 @@
-import type { ToolMetadata } from "../../types.js";
-
-export type OpenAIFunction = {
-  type: "function";
-  function: ToolMetadata;
-};
-
-type OpenAiTool = {
-  name: string;
-  description: string;
-  parameters: ToolMetadata["parameters"];
-};
-
-export const toOpenAiTool = ({
-  name,
-  description,
-  parameters,
-}: OpenAiTool): OpenAIFunction => {
-  return {
-    type: "function",
-    function: {
-      name: name,
-      description: description,
-      parameters,
-    },
-  };
-};
@@ -1,17 +1,25 @@
 import { randomUUID } from "@llamaindex/env";
+import type { ChatCompletionToolChoiceOption } from "openai/resources/chat/completions";
 import { Response } from "../../Response.js";
+import { Settings } from "../../Settings.js";
 import {
  AgentChatResponse,
  ChatResponseMode,
  StreamingAgentChatResponse,
 } from "../../engines/chat/types.js";
-import type {
-  ChatMessage,
-  ChatResponse,
-  ChatResponseChunk,
+import {
+  OpenAI,
+  isFunctionCallingModel,
+  type ChatMessage,
+  type ChatResponseChunk,
+  type LLMChatParamsBase,
+  type OpenAIAdditionalChatOptions,
 } from "../../llm/index.js";
-import { OpenAI } from "../../llm/index.js";
-import { streamConverter, streamReducer } from "../../llm/utils.js";
+import {
+  extractText,
+  streamConverter,
+  streamReducer,
+} from "../../llm/utils.js";
 import { ChatMemoryBuffer } from "../../memory/ChatMemoryBuffer.js";
 import type { ObjectRetriever } from "../../objects/base.js";
 import type { ToolOutput } from "../../tools/types.js";
@@ -21,28 +29,17 @@ import type { AgentWorker, Task } from "../types.js";
 import { TaskStep, TaskStepOutput } from "../types.js";
 import { addUserStepToMemory, getFunctionByName } from "../utils.js";
 import type { OpenAIToolCall } from "./types/chat.js";
-import { toOpenAiTool } from "./utils.js";

-const DEFAULT_MAX_FUNCTION_CALLS = 5;
-
-/**
- * Call function.
- * @param tools: tools
- * @param toolCall: tool call
- * @param verbose: verbose
- * @returns: void
- */
 async function callFunction(
  tools: BaseTool[],
  toolCall: OpenAIToolCall,
-  verbose: boolean = false,
 ): Promise<[ChatMessage, ToolOutput]> {
  const id_ = toolCall.id;
  const functionCall = toolCall.function;
  const name = toolCall.function.name;
  const argumentsStr = toolCall.function.arguments;

-  if (verbose) {
+  if (Settings.debug) {
    console.log("=== Calling Function ===");
    console.log(`Calling function: ${name} with args: ${argumentsStr}`);
  }
@@ -54,7 +51,7 @@ async function callFunction(
  // Use default error message
  const output = await callToolWithErrorHandling(tool, argumentDict, null);

-  if (verbose) {
+  if (Settings.debug) {
    console.log(`Got output ${output}`);
    console.log("==========================");
  }
@@ -76,7 +73,6 @@ type OpenAIAgentWorkerParams = {
  tools?: BaseTool[];
  llm?: OpenAI;
  prefixMessages?: ChatMessage[];
-  verbose?: boolean;
  maxFunctionCalls?: number;
  toolRetriever?: ObjectRetriever;
 };
@@ -86,40 +82,40 @@ type CallFunctionOutput = {
  toolOutput: ToolOutput;
 };

-/**
- * OpenAI agent worker.
- * This class is responsible for running the agent.
- */
-export class OpenAIAgentWorker implements AgentWorker {
+export class OpenAIAgentWorker
+  implements AgentWorker<LLMChatParamsBase<OpenAIAdditionalChatOptions>>
+{
  private llm: OpenAI;
-  private verbose: boolean;
-  private maxFunctionCalls: number;
+  private maxFunctionCalls: number = 5;

  public prefixMessages: ChatMessage[];

  private _getTools: (input: string) => Promise<BaseTool[]>;

-  /**
-   * Initialize.
-   */
  constructor({
    tools = [],
    llm,
    prefixMessages,
-    verbose,
-    maxFunctionCalls = DEFAULT_MAX_FUNCTION_CALLS,
+    maxFunctionCalls,
    toolRetriever,
  }: OpenAIAgentWorkerParams) {
-    this.llm = llm ?? new OpenAI({ model: "gpt-3.5-turbo-0613" });
-    this.verbose = verbose || false;
-    this.maxFunctionCalls = maxFunctionCalls;
+    this.llm =
+      llm ?? isFunctionCallingModel(Settings.llm)
+        ? (Settings.llm as OpenAI)
+        : new OpenAI({
+            model: "gpt-3.5-turbo-0613",
+          });
+    if (maxFunctionCalls) {
+      this.maxFunctionCalls = maxFunctionCalls;
+    }
    this.prefixMessages = prefixMessages || [];

-    if (tools.length > 0 && toolRetriever) {
+    if (Array.isArray(tools) && tools.length > 0 && toolRetriever) {
      throw new Error("Cannot specify both tools and tool_retriever");
-    } else if (tools.length > 0) {
+    } else if (Array.isArray(tools)) {
      this._getTools = async () => tools;
    } else if (toolRetriever) {
+      // fixme: this won't work, type mismatch
      this._getTools = async (message: string) =>
        toolRetriever.retrieve(message);
    } else {
@@ -127,11 +123,6 @@ export class OpenAIAgentWorker implements AgentWorker {
    }
  }

-  /**
-   * Get all messages.
-   * @param task: task
-   * @returns: messages
-   */
  public getAllMessages(task: Task): ChatMessage[] {
    return [
      ...this.prefixMessages,
@@ -140,11 +131,6 @@ export class OpenAIAgentWorker implements AgentWorker {
    ];
  }

-  /**
-   * Get latest tool calls.
-   * @param task: task
-   * @returns: tool calls
-   */
  public getLatestToolCalls(task: Task): OpenAIToolCall[] | null {
    const chatHistory: ChatMessage[] = task.extraState.newMemory.getAll();

@@ -155,59 +141,67 @@ export class OpenAIAgentWorker implements AgentWorker {
    return chatHistory[chatHistory.length - 1].additionalKwargs?.toolCalls;
  }

-  /**
-   *
-   * @param task
-   * @param openaiTools
-   * @param toolChoice
-   * @returns
-   */
-  private _getLlmChatKwargs(
+  private _getLlmChatParams(
    task: Task,
-    openaiTools: { [key: string]: any }[],
-    toolChoice: string | { [key: string]: any } = "auto",
-  ): { [key: string]: any } {
-    const llmChatKwargs: { [key: string]: any } = {
+    openaiTools: BaseTool[],
+    toolChoice: ChatCompletionToolChoiceOption = "auto",
+  ): LLMChatParamsBase<OpenAIAdditionalChatOptions> {
+    const llmChatParams = {
      messages: this.getAllMessages(task),
-    };
+      tools: [] as BaseTool[],
+      additionalChatOptions: {} as OpenAIAdditionalChatOptions,
+    } satisfies LLMChatParamsBase<OpenAIAdditionalChatOptions>;

    if (openaiTools.length > 0) {
-      llmChatKwargs.tools = openaiTools;
-      llmChatKwargs.toolChoice = toolChoice;
+      llmChatParams.tools = openaiTools;
+      llmChatParams.additionalChatOptions.tool_choice = toolChoice;
    }

-    return llmChatKwargs;
+    return llmChatParams;
  }

-  /**
-   * Process message.
-   * @param task: task
-   * @param chatResponse: chat response
-   * @returns: agent chat response
-   */
  private _processMessage(
    task: Task,
-    chatResponse: ChatResponse,
+    aiMessage: ChatMessage,
  ): AgentChatResponse {
-    const aiMessage = chatResponse.message;
    task.extraState.newMemory.put(aiMessage);

-    return new AgentChatResponse(aiMessage.content, task.extraState.sources);
+    return new AgentChatResponse(
+      extractText(aiMessage.content),
+      task.extraState.sources,
+    );
  }

  private async _getStreamAiResponse(
    task: Task,
-    llmChatKwargs: any,
-  ): Promise<StreamingAgentChatResponse> {
+    llmChatParams: LLMChatParamsBase<OpenAIAdditionalChatOptions>,
+  ): Promise<StreamingAgentChatResponse | AgentChatResponse> {
    const stream = await this.llm.chat({
      stream: true,
-      ...llmChatKwargs,
+      ...llmChatParams,
    });
+    // read first chunk from stream to find out if we need to call tools
+    const iterator = stream[Symbol.asyncIterator]();
+    let { value } = await iterator.next();
+    let content = value.delta;
+    const hasToolCalls = value.additionalKwargs?.toolCalls.length > 0;

-    const iterator = streamConverter(
+    if (hasToolCalls) {
+      // consume stream until we have all the tool calls and return a non-streamed response
+      for await (value of stream) {
+        content += value.delta;
+      }
+      return this._processMessage(task, {
+        content,
+        role: "assistant",
+        additionalKwargs: value.additionalKwargs,
+      });
+    }
+
+    const newStream = streamConverter.bind(this)(
      streamReducer({
        stream,
-        initialValue: "",
+        initialValue: content,
        reducer: (accumulator, part) => (accumulator += part.delta),
        finished: (accumulator) => {
          task.extraState.newMemory.put({
@@ -219,43 +213,31 @@ export class OpenAIAgentWorker implements AgentWorker {
      (r: ChatResponseChunk) => new Response(r.delta),
    );

-    return new StreamingAgentChatResponse(iterator, task.extraState.sources);
+    return new StreamingAgentChatResponse(newStream, task.extraState.sources);
  }

-  /**
-   * Get agent response.
-   * @param task: task
-   * @param mode: mode
-   * @param llmChatKwargs: llm chat kwargs
-   * @returns: agent chat response
-   */
  private async _getAgentResponse(
    task: Task,
    mode: ChatResponseMode,
-    llmChatKwargs: any,
+    llmChatParams: LLMChatParamsBase<OpenAIAdditionalChatOptions>,
  ): Promise<AgentChatResponse | StreamingAgentChatResponse> {
    if (mode === ChatResponseMode.WAIT) {
-      const chatResponse = (await this.llm.chat({
+      const chatResponse = await this.llm.chat({
        stream: false,
-        ...llmChatKwargs,
-      })) as unknown as ChatResponse;
+        ...llmChatParams,
+      });

-      return this._processMessage(task, chatResponse) as AgentChatResponse;
+      return this._processMessage(
+        task,
+        chatResponse.message,
+      ) as AgentChatResponse;
    } else if (mode === ChatResponseMode.STREAM) {
-      return this._getStreamAiResponse(task, llmChatKwargs);
+      return this._getStreamAiResponse(task, llmChatParams);
    }

    throw new Error("Invalid mode");
  }

-  /**
-   * Call function.
-   * @param tools: tools
-   * @param toolCall: tool call
-   * @param memory: memory
-   * @param sources: sources
-   * @returns: void
-   */
  async callFunction(
    tools: BaseTool[],
    toolCall: OpenAIToolCall,
@@ -266,7 +248,7 @@ export class OpenAIAgentWorker implements AgentWorker {
      throw new Error("Invalid tool_call object");
    }

-    const functionMessage = await callFunction(tools, toolCall, this.verbose);
+    const functionMessage = await callFunction(tools, toolCall);

    const message = functionMessage[0];
    const toolOutput = functionMessage[1];
@@ -277,16 +259,12 @@ export class OpenAIAgentWorker implements AgentWorker {
    };
  }

-  /**
-   * Initialize step.
-   * @param task: task
-   * @param kwargs: kwargs
-   * @returns: task step
-   */
-  initializeStep(task: Task, kwargs?: any): TaskStep {
+  initializeStep(task: Task): TaskStep {
    const sources: ToolOutput[] = [];

-    const newMemory = new ChatMemoryBuffer();
+    const newMemory = new ChatMemoryBuffer({
+      tokenLimit: task.memory.tokenLimit,
+    });

    const taskState = {
      sources,
@@ -302,12 +280,6 @@ export class OpenAIAgentWorker implements AgentWorker {
    return new TaskStep(task.taskId, randomUUID(), task.input);
  }

-  /**
-   * Should continue.
-   * @param toolCalls: tool calls
-   * @param nFunctionCalls: number of function calls
-   * @returns: boolean
-   */
  private _shouldContinue(
    toolCalls: OpenAIToolCall[] | null,
    nFunctionCalls: number,
@@ -323,11 +295,6 @@ export class OpenAIAgentWorker implements AgentWorker {
    return true;
  }

-  /**
-   * Get tools.
-   * @param input: input
-   * @returns: tools
-   */
  async getTools(input: string): Promise<BaseTool[]> {
    return this._getTools(input);
  }
@@ -336,28 +303,20 @@ export class OpenAIAgentWorker implements AgentWorker {
    step: TaskStep,
    task: Task,
    mode: ChatResponseMode = ChatResponseMode.WAIT,
-    toolChoice: string | { [key: string]: any } = "auto",
+    toolChoice: ChatCompletionToolChoiceOption = "auto",
  ): Promise<TaskStepOutput> {
    const tools = await this.getTools(task.input);

    if (step.input) {
-      addUserStepToMemory(step, task.extraState.newMemory, this.verbose);
+      addUserStepToMemory(step, task.extraState.newMemory);
    }

-    const openaiTools = tools.map((tool) =>
-      toOpenAiTool({
-        name: tool.metadata.name,
-        description: tool.metadata.description,
-        parameters: tool.metadata.parameters,
-      }),
-    );
-
-    const llmChatKwargs = this._getLlmChatKwargs(task, openaiTools, toolChoice);
+    const llmChatParams = this._getLlmChatParams(task, tools, toolChoice);

    const agentChatResponse = await this._getAgentResponse(
      task,
      mode,
-      llmChatKwargs,
+      llmChatParams,
    );

    const latestToolCalls = this.getLatestToolCalls(task) || [];
@@ -390,45 +349,25 @@ export class OpenAIAgentWorker implements AgentWorker {
    return new TaskStepOutput(agentChatResponse, step, newSteps, isDone);
  }

-  /**
-   * Run step.
-   * @param step: step
-   * @param task: task
-   * @param kwargs: kwargs
-   * @returns: task step output
-   */
  async runStep(
    step: TaskStep,
    task: Task,
-    kwargs?: any,
+    chatParams: LLMChatParamsBase<OpenAIAdditionalChatOptions>,
  ): Promise<TaskStepOutput> {
-    const toolChoice = kwargs?.toolChoice || "auto";
+    const toolChoice = chatParams?.additionalChatOptions?.tool_choice ?? "auto";
    return this._runStep(step, task, ChatResponseMode.WAIT, toolChoice);
  }

-  /**
-   * Stream step.
-   * @param step: step
-   * @param task: task
-   * @param kwargs: kwargs
-   * @returns: task step output
-   */
  async streamStep(
    step: TaskStep,
    task: Task,
-    kwargs?: any,
+    chatParams: LLMChatParamsBase<OpenAIAdditionalChatOptions>,
  ): Promise<TaskStepOutput> {
-    const toolChoice = kwargs?.toolChoice || "auto";
+    const toolChoice = chatParams?.additionalChatOptions?.tool_choice ?? "auto";
    return this._runStep(step, task, ChatResponseMode.STREAM, toolChoice);
  }

-  /**
-   * Finalize task.
-   * @param task: task
-   * @param kwargs: kwargs
-   * @returns: void
-   */
-  finalizeTask(task: Task, kwargs?: any): void {
+  finalizeTask(task: Task): void {
    task.memory.set(task.memory.get().concat(task.extraState.newMemory.get()));
    task.extraState.newMemory.reset();
  }
@@ -9,7 +9,6 @@ type ReActAgentParams = {
  llm?: LLM;
  memory?: any;
  prefixMessages?: ChatMessage[];
-  verbose?: boolean;
  maxInteractions?: number;
  defaultToolChoice?: string;
  toolRetriever?: ObjectRetriever;
@@ -26,7 +25,6 @@ export class ReActAgent extends AgentRunner {
    llm,
    memory,
    prefixMessages,
-    verbose,
    maxInteractions = 10,
    defaultToolChoice = "auto",
    toolRetriever,
@@ -36,7 +34,6 @@ export class ReActAgent extends AgentRunner {
      llm,
      maxInteractions,
      toolRetriever,
-      verbose,
    });

    super({
@@ -1,4 +1,5 @@
 import type { ChatMessage } from "../../llm/index.js";
+import { extractText } from "../../llm/utils.js";

 export interface BaseReasoningStep {
  getContent(): string;
@@ -51,10 +52,12 @@ export abstract class BaseOutputParser {
  formatMessages(messages: ChatMessage[]): ChatMessage[] {
    if (messages) {
      if (messages[0].role === "system") {
-        messages[0].content = this.format(messages[0].content || "");
+        messages[0].content = this.format(
+          extractText(messages[0].content) || "",
+        );
      } else {
        messages[messages.length - 1].content = this.format(
-          messages[messages.length - 1].content || "",
+          extractText(messages[messages.length - 1].content) || "",
        );
      }
    }
@@ -1,7 +1,9 @@
 import { randomUUID } from "@llamaindex/env";
+import type { ChatMessage } from "cohere-ai/api";
+import { Settings } from "../../Settings.js";
 import { AgentChatResponse } from "../../engines/chat/index.js";
-import type { ChatResponse, LLM } from "../../llm/index.js";
-import { OpenAI } from "../../llm/index.js";
+import { type ChatResponse, type LLM } from "../../llm/index.js";
+import { extractText } from "../../llm/utils.js";
 import { ChatMemoryBuffer } from "../../memory/ChatMemoryBuffer.js";
 import type { ObjectRetriever } from "../../objects/base.js";
 import { ToolOutput } from "../../tools/index.js";
@@ -16,32 +18,24 @@ import {
  ObservationReasoningStep,
  ResponseReasoningStep,
 } from "./types.js";
+
 type ReActAgentWorkerParams = {
  tools: BaseTool[];
  llm?: LLM;
  maxInteractions?: number;
  reactChatFormatter?: ReActChatFormatter | undefined;
  outputParser?: ReActOutputParser | undefined;
-  verbose?: boolean | undefined;
  toolRetriever?: ObjectRetriever | undefined;
 };

-/**
- *
- * @param step
- * @param memory
- * @param currentReasoning
- * @param verbose
- */
 function addUserStepToReasoning(
  step: TaskStep,
  memory: ChatMemoryBuffer,
  currentReasoning: BaseReasoningStep[],
-  verbose: boolean = false,
 ): void {
  if (step.stepState.isFirst) {
    memory.put({
-      content: step.input,
+      content: step.input ?? "",
      role: "user",
    });
    step.stepState.isFirst = false;
@@ -50,18 +44,22 @@ function addUserStepToReasoning(
      observation: step.input ?? undefined,
    });
    currentReasoning.push(reasoningStep);
-    if (verbose) {
+    if (Settings.debug) {
      console.log(`Added user message to memory: ${step.input}`);
    }
  }
 }

+type ChatParams = {
+  messages: ChatMessage[];
+  tools?: BaseTool[];
+};
+
 /**
 * ReAct agent worker.
 */
-export class ReActAgentWorker implements AgentWorker {
+export class ReActAgentWorker implements AgentWorker<ChatParams> {
  llm: LLM;
-  verbose: boolean;

  maxInteractions: number = 10;
  reactChatFormatter: ReActChatFormatter;
@@ -75,15 +73,13 @@ export class ReActAgentWorker implements AgentWorker {
    maxInteractions,
    reactChatFormatter,
    outputParser,
-    verbose,
    toolRetriever,
  }: ReActAgentWorkerParams) {
-    this.llm = llm ?? new OpenAI({ model: "gpt-3.5-turbo-0613" });
+    this.llm = llm ?? Settings.llm;

    this.maxInteractions = maxInteractions ?? 10;
    this.reactChatFormatter = reactChatFormatter ?? new ReActChatFormatter();
    this.outputParser = outputParser ?? new ReActOutputParser();
-    this.verbose = verbose || false;

    if (tools.length > 0 && toolRetriever) {
      throw new Error("Cannot specify both tools and tool_retriever");
@@ -97,16 +93,12 @@ export class ReActAgentWorker implements AgentWorker {
    }
  }

-  /**
-   * Initialize a task step.
-   * @param task - task
-   * @param kwargs - keyword arguments
-   * @returns - task step
-   */
-  initializeStep(task: Task, kwargs?: any): TaskStep {
+  initializeStep(task: Task): TaskStep {
    const sources: ToolOutput[] = [];
    const currentReasoning: BaseReasoningStep[] = [];
-    const newMemory = new ChatMemoryBuffer();
+    const newMemory = new ChatMemoryBuffer({
+      tokenLimit: task.memory.tokenLimit,
+    });

    const taskState = {
      sources,
@@ -124,12 +116,6 @@ export class ReActAgentWorker implements AgentWorker {
    });
  }

-  /**
-   * Extract reasoning step from chat response.
-   * @param output - chat response
-   * @param isStreaming - whether the chat response is streaming
-   * @returns - [message content, reasoning steps, is done]
-   */
  extractReasoningStep(
    output: ChatResponse,
    isStreaming: boolean,
@@ -145,21 +131,21 @@ export class ReActAgentWorker implements AgentWorker {

    try {
      reasoningStep = this.outputParser.parse(
-        messageContent,
+        extractText(messageContent),
        isStreaming,
      ) as ActionReasoningStep;
    } catch (e) {
      throw new Error(`Could not parse output: ${e}`);
    }

-    if (this.verbose) {
+    if (Settings.debug) {
      console.log(`${reasoningStep.getContent()}\n`);
    }

    currentReasoning.push(reasoningStep);

    if (reasoningStep.isDone()) {
-      return [messageContent, currentReasoning, true];
+      return [extractText(messageContent), currentReasoning, true];
    }

    const actionReasoningStep = new ActionReasoningStep({
@@ -172,17 +158,9 @@ export class ReActAgentWorker implements AgentWorker {
      throw new Error(`Expected ActionReasoningStep, got ${reasoningStep}`);
    }

-    return [messageContent, currentReasoning, false];
+    return [extractText(messageContent), currentReasoning, false];
  }

-  /**
-   * Process actions.
-   * @param task - task
-   * @param tools - tools
-   * @param output - chat response
-   * @param isStreaming - whether the chat response is streaming
-   * @returns - [reasoning steps, is done]
-   */
  async _processActions(
    task: Task,
    tools: BaseTool[],
@@ -233,19 +211,13 @@ export class ReActAgentWorker implements AgentWorker {

    currentReasoning.push(observationStep);

-    if (this.verbose) {
+    if (Settings.debug) {
      console.log(`${observationStep.getContent()}`);
    }

    return [currentReasoning, false];
  }

-  /**
-   * Get response.
-   * @param currentReasoning - current reasoning steps
-   * @param sources - tool outputs
-   * @returns - agent chat response
-   */
  _getResponse(
    currentReasoning: BaseReasoningStep[],
    sources: ToolOutput[],
@@ -269,13 +241,6 @@ export class ReActAgentWorker implements AgentWorker {
    return new AgentChatResponse(responseStr, sources);
  }

-  /**
-   * Get task step response.
-   * @param agentResponse - agent chat response
-   * @param step - task step
-   * @param isDone - whether the task is done
-   * @returns - task step output
-   */
  _getTaskStepResponse(
    agentResponse: AgentChatResponse,
    step: TaskStep,
@@ -292,24 +257,12 @@ export class ReActAgentWorker implements AgentWorker {
    return new TaskStepOutput(agentResponse, step, newSteps, isDone);
  }

-  /**
-   * Run a task step.
-   * @param step - task step
-   * @param task - task
-   * @param kwargs - keyword arguments
-   * @returns - task step output
-   */
-  async _runStep(
-    step: TaskStep,
-    task: Task,
-    kwargs?: any,
-  ): Promise<TaskStepOutput> {
+  async _runStep(step: TaskStep, task: Task): Promise<TaskStepOutput> {
    if (step.input) {
      addUserStepToReasoning(
        step,
        task.extraState.newMemory,
        task.extraState.currentReasoning,
-        this.verbose,
      );
    }

@@ -348,42 +301,15 @@ export class ReActAgentWorker implements AgentWorker {
    return this._getTaskStepResponse(agentResponse, step, isDone);
  }

-  /**
-   * Run a task step.
-   * @param step - task step
-   * @param task - task
-   * @param kwargs - keyword arguments
-   * @returns - task step output
-   */
-  async runStep(
-    step: TaskStep,
-    task: Task,
-    kwargs?: any,
-  ): Promise<TaskStepOutput> {
+  async runStep(step: TaskStep, task: Task): Promise<TaskStepOutput> {
    return await this._runStep(step, task);
  }

-  /**
-   * Run a task step.
-   * @param step - task step
-   * @param task - task
-   * @param kwargs - keyword arguments
-   * @returns - task step output
-   */
-  streamStep(
-    step: TaskStep,
-    task: Task,
-    kwargs?: any,
-  ): Promise<TaskStepOutput> {
+  streamStep(): Promise<TaskStepOutput> {
    throw new Error("Method not implemented.");
  }

-  /**
-   * Finalize a task.
-   * @param task - task
-   * @param kwargs - keyword arguments
-   */
-  finalizeTask(task: Task, kwargs?: any): void {
+  finalizeTask(task: Task): void {
    task.memory.set(task.memory.get() + task.extraState.newMemory.get());
    task.extraState.newMemory.reset();
  }
@@ -58,6 +58,7 @@ export class AgentRunner extends BaseAgentRunner {
    this.memory =
      params.memory ??
      new ChatMemoryBuffer({
+        llm: params.llm,
        chatHistory: params.chatHistory,
      });
    this.initTaskStateKwargs = params.initTaskStateKwargs ?? {};
@@ -6,11 +6,19 @@ import type {

 import type { QueryEngineParamsNonStreaming } from "../types.js";

-export interface AgentWorker {
-  initializeStep(task: Task, kwargs?: any): TaskStep;
-  runStep(step: TaskStep, task: Task, kwargs?: any): Promise<TaskStepOutput>;
-  streamStep(step: TaskStep, task: Task, kwargs?: any): Promise<TaskStepOutput>;
-  finalizeTask(task: Task, kwargs?: any): void;
+export interface AgentWorker<ExtraParams extends object = object> {
+  initializeStep(task: Task, params?: ExtraParams): TaskStep;
+  runStep(
+    step: TaskStep,
+    task: Task,
+    params?: ExtraParams,
+  ): Promise<TaskStepOutput>;
+  streamStep(
+    step: TaskStep,
+    task: Task,
+    params?: ExtraParams,
+  ): Promise<TaskStepOutput>;
+  finalizeTask(task: Task, params?: ExtraParams): void;
 }

 interface BaseChatEngine {
@@ -170,13 +178,13 @@ export class TaskStep implements ITaskStep {
 * @param isLast: isLast
 */
 export class TaskStepOutput {
-  output: any;
+  output: AgentChatResponse | StreamingAgentChatResponse;
  taskStep: TaskStep;
  nextSteps: TaskStep[];
  isLast: boolean;

  constructor(
-    output: any,
+    output: AgentChatResponse | StreamingAgentChatResponse,
    taskStep: TaskStep,
    nextSteps: TaskStep[],
    isLast: boolean = false,
@@ -1,19 +1,12 @@
+import { Settings } from "../Settings.js";
 import type { ChatMessage } from "../llm/index.js";
 import type { ChatMemoryBuffer } from "../memory/ChatMemoryBuffer.js";
 import type { BaseTool } from "../types.js";
 import type { TaskStep } from "./types.js";

-/**
- * Adds the user's input to the memory.
- *
- * @param step - The step to add to the memory.
- * @param memory - The memory to add the step to.
- * @param verbose - Whether to print debug messages.
- */
 export function addUserStepToMemory(
  step: TaskStep,
  memory: ChatMemoryBuffer,
-  verbose: boolean = false,
 ): void {
  if (!step.input) {
    return;
@@ -26,26 +19,17 @@ export function addUserStepToMemory(

  memory.put(userMessage);

-  if (verbose) {
+  if (Settings.debug) {
    console.log(`Added user message to memory!: ${userMessage.content}`);
  }
 }

-/**
- * Get function by name.
- * @param tools: tools
- * @param name: name
- * @returns: tool
- */
 export function getFunctionByName(tools: BaseTool[], name: string): BaseTool {
-  const nameToTool: { [key: string]: BaseTool } = {};
-  tools.forEach((tool) => {
-    nameToTool[tool.metadata.name] = tool;
-  });
+  const exist = tools.find((tool) => tool.metadata.name === name);

-  if (!(name in nameToTool)) {
+  if (!exist) {
    throw new Error(`Tool with name ${name} not found`);
  }

-  return nameToTool[name];
+  return exist;
 }
@@ -1,46 +1,52 @@
 import type { Anthropic } from "@anthropic-ai/sdk";
 import { CustomEvent } from "@llamaindex/env";
 import type { NodeWithScore } from "../Node.js";
+import {
+  EventCaller,
+  getEventCaller,
+} from "../internal/context/EventCaller.js";
+import type { LLMEndEvent, LLMStartEvent } from "../llm/types.js";

-/**
- * This type is used to define the event maps for the Llamaindex package.
- */
-export interface LlamaIndexEventMaps {}
+export class LlamaIndexCustomEvent<T = any> extends CustomEvent<T> {
+  reason: EventCaller | null;
+  private constructor(
+    event: string,
+    options?: CustomEventInit & {
+      reason?: EventCaller | null;
+    },
+  ) {
+    super(event, options);
+    this.reason = options?.reason ?? null;
+  }

-declare module "llamaindex" {
-  interface LlamaIndexEventMaps {
-    /**
-     * @deprecated
-     */
-    retrieve: CustomEvent<RetrievalCallbackResponse>;
-    /**
-     * @deprecated
-     */
-    stream: CustomEvent<StreamCallbackResponse>;
+  static fromEvent<Type extends keyof LlamaIndexEventMaps>(
+    type: Type,
+    detail: LlamaIndexEventMaps[Type]["detail"],
+  ) {
+    return new LlamaIndexCustomEvent(type, {
+      detail: detail,
+      reason: getEventCaller(),
+    });
  }
 }

+/**
+ * This type is used to define the event maps.
+ */
+export interface LlamaIndexEventMaps {
+  /**
+   * @deprecated
+   */
+  retrieve: CustomEvent<RetrievalCallbackResponse>;
+  /**
+   * @deprecated
+   */
+  stream: CustomEvent<StreamCallbackResponse>;
+  "llm-start": LLMStartEvent;
+  "llm-end": LLMEndEvent;
+}
+
 //#region @deprecated remove in the next major version
-/*
-  An event is a wrapper that groups related operations.
-  For example, during retrieve and synthesize,
-  a parent event wraps both operations, and each operation has it's own
-  event. In this case, both sub-events will share a parentId.
-*/
-
-export type EventTag = "intermediate" | "final";
-export type EventType = "retrieve" | "llmPredict" | "wrapper";
-export interface Event {
-  id: string;
-  type: EventType;
-  tags?: EventTag[];
-  parentId?: string;
-}
-
-interface BaseCallbackResponse {
-  event: Event;
-}
-
 //Specify StreamToken per mainstream LLM
 export interface DefaultStreamToken {
  id: string;
@@ -68,13 +74,13 @@ export type AnthropicStreamToken = Anthropic.Completion;

 //StreamCallbackResponse should let practitioners implement callbacks out of the box...
 //When custom streaming LLMs are involved, people are expected to write their own StreamCallbackResponses
-export interface StreamCallbackResponse extends BaseCallbackResponse {
+export interface StreamCallbackResponse {
  index: number;
  isDone?: boolean;
  token?: DefaultStreamToken;
 }

-export interface RetrievalCallbackResponse extends BaseCallbackResponse {
+export interface RetrievalCallbackResponse {
  query: string;
  nodes: NodeWithScore[];
 }
@@ -98,7 +104,11 @@ interface CallbackManagerMethods {

 const noop: (...args: any[]) => any = () => void 0;

-type EventHandler<Event extends CustomEvent> = (event: Event) => void;
+type EventHandler<Event extends CustomEvent> = (
+  event: Event & {
+    reason: EventCaller | null;
+  },
+) => void;

 export class CallbackManager implements CallbackManagerMethods {
  /**
@@ -110,7 +120,7 @@ export class CallbackManager implements CallbackManagerMethods {
        this.#handlers
          .get("stream")!
          .map((handler) =>
-            handler(new CustomEvent("stream", { detail: response })),
+            handler(LlamaIndexCustomEvent.fromEvent("stream", response)),
          ),
      );
    };
@@ -125,7 +135,7 @@ export class CallbackManager implements CallbackManagerMethods {
        this.#handlers
          .get("retrieve")!
          .map((handler) =>
-            handler(new CustomEvent("retrieve", { detail: response })),
+            handler(LlamaIndexCustomEvent.fromEvent("retrieve", response)),
          ),
      );
    };
@@ -188,6 +198,8 @@ export class CallbackManager implements CallbackManagerMethods {
    if (!handlers) {
      return;
    }
-    handlers.forEach((handler) => handler(new CustomEvent(event, { detail })));
+    handlers.forEach((handler) =>
+      handler(LlamaIndexCustomEvent.fromEvent(event, detail)),
+    );
  }
 }
@@ -1,11 +1,20 @@
+import { PlatformApi } from "@llamaindex/cloud";
+import type { Document } from "../Node.js";
 import type { BaseRetriever } from "../Retriever.js";
 import { RetrieverQueryEngine } from "../engines/query/RetrieverQueryEngine.js";
+import type { TransformComponent } from "../ingestion/types.js";
 import type { BaseNodePostprocessor } from "../postprocessors/types.js";
 import type { BaseSynthesizer } from "../synthesizers/types.js";
 import type { BaseQueryEngine } from "../types.js";
 import type { CloudRetrieveParams } from "./LlamaCloudRetriever.js";
 import { LlamaCloudRetriever } from "./LlamaCloudRetriever.js";
+import { getPipelineCreate } from "./config.js";
 import type { CloudConstructorParams } from "./types.js";
+import { getAppBaseUrl, getClient } from "./utils.js";
+
+import { getEnv } from "@llamaindex/env";
+import { OpenAIEmbedding } from "../embeddings/OpenAIEmbedding.js";
+import { SimpleNodeParser } from "../nodeParsers/SimpleNodeParser.js";

 export class LlamaCloudIndex {
  params: CloudConstructorParams;
@@ -14,6 +23,151 @@ export class LlamaCloudIndex {
    this.params = params;
  }

+  static async fromDocuments(
+    params: {
+      documents: Document[];
+      transformations?: TransformComponent[];
+      verbose?: boolean;
+    } & CloudConstructorParams,
+  ): Promise<LlamaCloudIndex> {
+    const defaultTransformations: TransformComponent[] = [
+      new OpenAIEmbedding({
+        apiKey: getEnv("OPENAI_API_KEY"),
+      }),
+      new SimpleNodeParser(),
+    ];
+
+    const appUrl = getAppBaseUrl(params.baseUrl);
+
+    const client = await getClient({ ...params, baseUrl: appUrl });
+
+    const pipelineCreateParams = await getPipelineCreate({
+      pipelineName: params.name,
+      pipelineType: "MANAGED",
+      inputNodes: params.documents,
+      transformations: params.transformations ?? defaultTransformations,
+    });
+
+    const project = await client.project.upsertProject({
+      name: params.projectName ?? "default",
+    });
+
+    if (!project.id) {
+      throw new Error("Project ID should be defined");
+    }
+
+    const pipeline = await client.project.upsertPipelineForProject(
+      project.id,
+      pipelineCreateParams,
+    );
+
+    if (!pipeline.id) {
+      throw new Error("Pipeline ID must be defined");
+    }
+
+    if (params.verbose) {
+      console.log(`Created pipeline ${pipeline.id} with name ${params.name}`);
+    }
+
+    const executionsIds: {
+      exectionId: string;
+      dataSourceId: string;
+    }[] = [];
+
+    for (const dataSource of pipeline.dataSources) {
+      const dataSourceExection =
+        await client.dataSource.createDataSourceExecution(dataSource.id);
+
+      if (!dataSourceExection.id) {
+        throw new Error("Data Source Execution ID must be defined");
+      }
+
+      executionsIds.push({
+        exectionId: dataSourceExection.id,
+        dataSourceId: dataSource.id,
+      });
+    }
+
+    let isDone = false;
+
+    while (!isDone) {
+      const statuses = [];
+
+      for await (const execution of executionsIds) {
+        const dataSourceExecution =
+          await client.dataSource.getDataSourceExecution(
+            execution.dataSourceId,
+            execution.exectionId,
+          );
+
+        statuses.push(dataSourceExecution.status);
+
+        if (
+          statuses.every((status) => status === PlatformApi.StatusEnum.Success)
+        ) {
+          isDone = true;
+          if (params.verbose) {
+            console.info("Data Source Execution completed");
+          }
+          break;
+        } else if (
+          statuses.some((status) => status === PlatformApi.StatusEnum.Error)
+        ) {
+          throw new Error("Data Source Execution failed");
+        } else {
+          await new Promise((resolve) => setTimeout(resolve, 1000));
+          if (params.verbose) {
+            process.stdout.write(".");
+          }
+        }
+      }
+    }
+
+    isDone = false;
+
+    const execution = await client.pipeline.runManagedPipelineIngestion(
+      pipeline.id,
+    );
+
+    const ingestionId = execution.id;
+
+    if (!ingestionId) {
+      throw new Error("Ingestion ID must be defined");
+    }
+
+    while (!isDone) {
+      const pipelineStatus = await client.pipeline.getManagedIngestionExecution(
+        pipeline.id,
+        ingestionId,
+      );
+
+      if (pipelineStatus.status === PlatformApi.StatusEnum.Success) {
+        isDone = true;
+
+        if (params.verbose) {
+          console.info("Ingestion completed");
+        }
+
+        break;
+      } else if (pipelineStatus.status === PlatformApi.StatusEnum.Error) {
+        throw new Error("Ingestion failed");
+      } else {
+        await new Promise((resolve) => setTimeout(resolve, 1000));
+        if (params.verbose) {
+          process.stdout.write(".");
+        }
+      }
+    }
+
+    if (params.verbose) {
+      console.info(
+        `Ingestion completed, find your index at ${appUrl}/project/${project.id}/deploy/${pipeline.id}`,
+      );
+    }
+
+    return new LlamaCloudIndex({ ...params });
+  }
+
  asRetriever(params: CloudRetrieveParams = {}): BaseRetriever {
    return new LlamaCloudRetriever({ ...this.params, ...params });
  }
@@ -1,13 +1,12 @@
 import type { PlatformApi, PlatformApiClient } from "@llamaindex/cloud";
-import { globalsHelper } from "../GlobalsHelper.js";
 import type { NodeWithScore } from "../Node.js";
 import { ObjectType, jsonToNode } from "../Node.js";
 import type { BaseRetriever, RetrieveParams } from "../Retriever.js";
 import { Settings } from "../Settings.js";
+import { wrapEventCaller } from "../internal/context/EventCaller.js";
 import type { ClientParams, CloudConstructorParams } from "./types.js";
 import { DEFAULT_PROJECT_NAME } from "./types.js";
 import { getClient } from "./utils.js";
-
 export type CloudRetrieveParams = Omit<
  PlatformApi.RetrievalParams,
  "query" | "searchFilters" | "pipelineId" | "className"
@@ -51,9 +50,9 @@ export class LlamaCloudRetriever implements BaseRetriever {
    return this.client;
  }

+  @wrapEventCaller
  async retrieve({
    query,
-    parentEvent,
    preFilters,
  }: RetrieveParams): Promise<NodeWithScore[]> {
    const pipelines = await (
@@ -77,13 +76,9 @@ export class LlamaCloudRetriever implements BaseRetriever {

    const nodes = this.resultNodesToNodeWithScore(results.retrievalNodes);

-    Settings.callbackManager.onRetrieve({
+    Settings.callbackManager.dispatchEvent("retrieve", {
      query,
      nodes,
-      event: globalsHelper.createEvent({
-        parentEvent,
-        type: "retrieve",
-      }),
    });

    return nodes;
@@ -18,11 +18,11 @@ function getTransformationConfig(
    return {
      configurableTransformationType: "SENTENCE_AWARE_NODE_PARSER",
      component: {
-        // TODO: API returns 422 if these parameters are included
-        // chunkSize: transformation.textSplitter.chunkSize, // TODO: set to public in SentenceSplitter
-        // chunkOverlap: transformation.textSplitter.chunkOverlap, // TODO: set to public in SentenceSplitter
-        // includeMetadata: transformation.includeMetadata,
-        // includePrevNextRel: transformation.includePrevNextRel,
+        // TODO: API doesnt accept camelCase
+        chunk_size: transformation.textSplitter.chunkSize, // TODO: set to public in SentenceSplitter
+        chunk_overlap: transformation.textSplitter.chunkOverlap, // TODO: set to public in SentenceSplitter
+        include_metadata: transformation.includeMetadata,
+        include_prev_next_rel: transformation.includePrevNextRel,
      },
    };
  }
@@ -30,9 +30,10 @@ function getTransformationConfig(
    return {
      configurableTransformationType: "OPENAI_EMBEDDING",
      component: {
-        modelName: transformation.model,
-        apiKey: transformation.apiKey,
-        embedBatchSize: transformation.embedBatchSize,
+        // TODO: API doesnt accept camelCase
+        model: transformation.model,
+        api_key: transformation.apiKey,
+        embed_batch_size: transformation.embedBatchSize,
        dimensions: transformation.dimensions,
      },
    };
@@ -71,10 +72,12 @@ export async function getPipelineCreate(
    inputNodes = [],
  } = params;

+  const dataSources = inputNodes.map(getDataSourceConfig);
+
  return {
    name: pipelineName,
    configuredTransformations: transformations.map(getTransformationConfig),
-    dataSources: inputNodes.map(getDataSourceConfig),
+    dataSources,
    dataSinks: [],
    pipelineType,
  };
@@ -8,6 +8,7 @@ import {
 import type { Response } from "../../Response.js";
 import type { ServiceContext } from "../../ServiceContext.js";
 import { llmFromSettingsOrContext } from "../../Settings.js";
+import { wrapEventCaller } from "../../internal/context/EventCaller.js";
 import type { ChatMessage, LLM } from "../../llm/index.js";
 import { extractText, streamReducer } from "../../llm/utils.js";
 import { PromptMixin } from "../../prompts/index.js";
@@ -17,7 +18,6 @@ import type {
  ChatEngineParamsNonStreaming,
  ChatEngineParamsStreaming,
 } from "./types.js";
-
 /**
 * CondenseQuestionChatEngine is used in conjunction with a Index (for example VectorStoreIndex).
 * It does two steps on taking a user's chat message: first, it condenses the chat message
@@ -82,6 +82,7 @@ export class CondenseQuestionChatEngine

  chat(params: ChatEngineParamsStreaming): Promise<AsyncIterable<Response>>;
  chat(params: ChatEngineParamsNonStreaming): Promise<Response>;
+  @wrapEventCaller
  async chat(
    params: ChatEngineParamsStreaming | ChatEngineParamsNonStreaming,
  ): Promise<Response | AsyncIterable<Response>> {
@@ -1,10 +1,9 @@
-import { randomUUID } from "@llamaindex/env";
 import type { ChatHistory } from "../../ChatHistory.js";
 import { getHistory } from "../../ChatHistory.js";
 import type { ContextSystemPrompt } from "../../Prompt.js";
 import { Response } from "../../Response.js";
 import type { BaseRetriever } from "../../Retriever.js";
-import type { Event } from "../../callbacks/CallbackManager.js";
+import { wrapEventCaller } from "../../internal/context/EventCaller.js";
 import type { ChatMessage, ChatResponseChunk, LLM } from "../../llm/index.js";
 import { OpenAI } from "../../llm/index.js";
 import type { MessageContent } from "../../llm/types.js";
@@ -60,6 +59,7 @@ export class ContextChatEngine extends PromptMixin implements ChatEngine {

  chat(params: ChatEngineParamsStreaming): Promise<AsyncIterable<Response>>;
  chat(params: ChatEngineParamsNonStreaming): Promise<Response>;
+  @wrapEventCaller
  async chat(
    params: ChatEngineParamsStreaming | ChatEngineParamsNonStreaming,
  ): Promise<Response | AsyncIterable<Response>> {
@@ -67,21 +67,14 @@ export class ContextChatEngine extends PromptMixin implements ChatEngine {
    const chatHistory = params.chatHistory
      ? getHistory(params.chatHistory)
      : this.chatHistory;
-    const parentEvent: Event = {
-      id: randomUUID(),
-      type: "wrapper",
-      tags: ["final"],
-    };
    const requestMessages = await this.prepareRequestMessages(
      message,
      chatHistory,
-      parentEvent,
    );

    if (stream) {
      const stream = await this.chatModel.chat({
        messages: requestMessages.messages,
-        parentEvent,
        stream: true,
      });
      return streamConverter(
@@ -98,10 +91,12 @@ export class ContextChatEngine extends PromptMixin implements ChatEngine {
    }
    const response = await this.chatModel.chat({
      messages: requestMessages.messages,
-      parentEvent,
    });
    chatHistory.addMessage(response.message);
-    return new Response(response.message.content, requestMessages.nodes);
+    return new Response(
+      extractText(response.message.content),
+      requestMessages.nodes,
+    );
  }

  reset() {
@@ -111,14 +106,13 @@ export class ContextChatEngine extends PromptMixin implements ChatEngine {
  private async prepareRequestMessages(
    message: MessageContent,
    chatHistory: ChatHistory,
-    parentEvent?: Event,
  ) {
    chatHistory.addMessage({
      content: message,
      role: "user",
    });
    const textOnly = extractText(message);
-    const context = await this.contextGenerator.generate(textOnly, parentEvent);
+    const context = await this.contextGenerator.generate(textOnly);
    const nodes = context.nodes.map((r) => r.node);
    const messages = await chatHistory.requestMessages(
      context ? [context.message] : undefined,
@@ -1,9 +1,7 @@
-import { randomUUID } from "@llamaindex/env";
 import type { NodeWithScore, TextNode } from "../../Node.js";
 import type { ContextSystemPrompt } from "../../Prompt.js";
 import { defaultContextSystemPrompt } from "../../Prompt.js";
 import type { BaseRetriever } from "../../Retriever.js";
-import type { Event } from "../../callbacks/CallbackManager.js";
 import type { BaseNodePostprocessor } from "../../postprocessors/index.js";
 import { PromptMixin } from "../../prompts/index.js";
 import type { Context, ContextGenerator } from "./types.js";
@@ -56,17 +54,9 @@ export class DefaultContextGenerator
    return nodesWithScore;
  }

-  async generate(message: string, parentEvent?: Event): Promise<Context> {
-    if (!parentEvent) {
-      parentEvent = {
-        id: randomUUID(),
-        type: "wrapper",
-        tags: ["final"],
-      };
-    }
+  async generate(message: string): Promise<Context> {
    const sourceNodesWithScore = await this.retriever.retrieve({
      query: message,
-      parentEvent,
    });

    const nodes = await this.applyNodePostprocessors(
@@ -1,9 +1,14 @@
 import type { ChatHistory } from "../../ChatHistory.js";
 import { getHistory } from "../../ChatHistory.js";
 import { Response } from "../../Response.js";
+import { wrapEventCaller } from "../../internal/context/EventCaller.js";
 import type { ChatResponseChunk, LLM } from "../../llm/index.js";
 import { OpenAI } from "../../llm/index.js";
-import { streamConverter, streamReducer } from "../../llm/utils.js";
+import {
+  extractText,
+  streamConverter,
+  streamReducer,
+} from "../../llm/utils.js";
 import type {
  ChatEngine,
  ChatEngineParamsNonStreaming,
@@ -25,6 +30,7 @@ export class SimpleChatEngine implements ChatEngine {

  chat(params: ChatEngineParamsStreaming): Promise<AsyncIterable<Response>>;
  chat(params: ChatEngineParamsNonStreaming): Promise<Response>;
+  @wrapEventCaller
  async chat(
    params: ChatEngineParamsStreaming | ChatEngineParamsNonStreaming,
  ): Promise<Response | AsyncIterable<Response>> {
@@ -44,7 +50,7 @@ export class SimpleChatEngine implements ChatEngine {
        streamReducer({
          stream,
          initialValue: "",
-          reducer: (accumulator, part) => (accumulator += part.delta),
+          reducer: (accumulator, part) => accumulator + part.delta,
          finished: (accumulator) => {
            chatHistory.addMessage({ content: accumulator, role: "assistant" });
          },
@@ -57,7 +63,7 @@ export class SimpleChatEngine implements ChatEngine {
      messages: await chatHistory.requestMessages(),
    });
    chatHistory.addMessage(response.message);
-    return new Response(response.message.content);
+    return new Response(extractText(response.message.content));
  }

  reset() {
@@ -1,7 +1,6 @@
 import type { ChatHistory } from "../../ChatHistory.js";
 import type { BaseNode, NodeWithScore } from "../../Node.js";
 import type { Response } from "../../Response.js";
-import type { Event } from "../../callbacks/CallbackManager.js";
 import type { ChatMessage } from "../../llm/index.js";
 import type { MessageContent } from "../../llm/types.js";
 import type { ToolOutput } from "../../tools/types.js";
@@ -56,7 +55,7 @@ export interface Context {
 * A ContextGenerator is used to generate a context based on a message's text content
 */
 export interface ContextGenerator {
-  generate(message: string, parentEvent?: Event): Promise<Context>;
+  generate(message: string): Promise<Context>;
 }

 export enum ChatResponseMode {
@@ -1,8 +1,7 @@
-import { randomUUID } from "@llamaindex/env";
 import type { NodeWithScore } from "../../Node.js";
 import type { Response } from "../../Response.js";
 import type { BaseRetriever } from "../../Retriever.js";
-import type { Event } from "../../callbacks/CallbackManager.js";
+import { wrapEventCaller } from "../../internal/context/EventCaller.js";
 import type { BaseNodePostprocessor } from "../../postprocessors/index.js";
 import { PromptMixin } from "../../prompts/Mixin.js";
 import type { BaseSynthesizer } from "../../synthesizers/index.js";
@@ -62,10 +61,9 @@ export class RetrieverQueryEngine
    return nodesWithScore;
  }

-  private async retrieve(query: string, parentEvent: Event) {
+  private async retrieve(query: string) {
    const nodes = await this.retriever.retrieve({
      query,
-      parentEvent,
      preFilters: this.preFilters,
    });

@@ -74,28 +72,22 @@ export class RetrieverQueryEngine

  query(params: QueryEngineParamsStreaming): Promise<AsyncIterable<Response>>;
  query(params: QueryEngineParamsNonStreaming): Promise<Response>;
+  @wrapEventCaller
  async query(
    params: QueryEngineParamsStreaming | QueryEngineParamsNonStreaming,
  ): Promise<Response | AsyncIterable<Response>> {
    const { query, stream } = params;
-    const parentEvent: Event = params.parentEvent || {
-      id: randomUUID(),
-      type: "wrapper",
-      tags: ["final"],
-    };
-    const nodesWithScore = await this.retrieve(query, parentEvent);
+    const nodesWithScore = await this.retrieve(query);
    if (stream) {
      return this.responseSynthesizer.synthesize({
        query,
        nodesWithScore,
-        parentEvent,
        stream: true,
      });
    }
    return this.responseSynthesizer.synthesize({
      query,
      nodesWithScore,
-      parentEvent,
    });
  }
 }
@@ -1,10 +1,8 @@
-import { randomUUID } from "@llamaindex/env";
 import type { NodeWithScore } from "../../Node.js";
 import { TextNode } from "../../Node.js";
 import { LLMQuestionGenerator } from "../../QuestionGenerator.js";
 import type { Response } from "../../Response.js";
 import type { ServiceContext } from "../../ServiceContext.js";
-import type { Event } from "../../callbacks/CallbackManager.js";
 import { PromptMixin } from "../../prompts/Mixin.js";
 import type { BaseSynthesizer } from "../../synthesizers/index.js";
 import {
@@ -20,6 +18,7 @@ import type {
  ToolMetadata,
 } from "../../types.js";

+import { wrapEventCaller } from "../../internal/context/EventCaller.js";
 import type { BaseQuestionGenerator, SubQuestion } from "./types.js";

 /**
@@ -80,29 +79,15 @@ export class SubQuestionQueryEngine

  query(params: QueryEngineParamsStreaming): Promise<AsyncIterable<Response>>;
  query(params: QueryEngineParamsNonStreaming): Promise<Response>;
+  @wrapEventCaller
  async query(
    params: QueryEngineParamsStreaming | QueryEngineParamsNonStreaming,
  ): Promise<Response | AsyncIterable<Response>> {
    const { query, stream } = params;
    const subQuestions = await this.questionGen.generate(this.metadatas, query);

-    // groups final retrieval+synthesis operation
-    const parentEvent: Event = params.parentEvent || {
-      id: randomUUID(),
-      type: "wrapper",
-      tags: ["final"],
-    };
-
-    // groups all sub-queries
-    const subQueryParentEvent: Event = {
-      id: randomUUID(),
-      parentId: parentEvent.id,
-      type: "wrapper",
-      tags: ["intermediate"],
-    };
-
    const subQNodes = await Promise.all(
-      subQuestions.map((subQ) => this.querySubQ(subQ, subQueryParentEvent)),
+      subQuestions.map((subQ) => this.querySubQ(subQ)),
    );

    const nodesWithScore = subQNodes
@@ -112,21 +97,16 @@ export class SubQuestionQueryEngine
      return this.responseSynthesizer.synthesize({
        query,
        nodesWithScore,
-        parentEvent,
        stream: true,
      });
    }
    return this.responseSynthesizer.synthesize({
      query,
      nodesWithScore,
-      parentEvent,
    });
  }

-  private async querySubQ(
-    subQ: SubQuestion,
-    parentEvent?: Event,
-  ): Promise<NodeWithScore | null> {
+  private async querySubQ(subQ: SubQuestion): Promise<NodeWithScore | null> {
    try {
      const question = subQ.subQuestion;

@@ -140,7 +120,6 @@ export class SubQuestionQueryEngine

      const responseText = await queryEngine?.call?.({
        query: question,
-        parentEvent,
      });

      if (!responseText) {
@@ -2,6 +2,7 @@ import { MetadataMode } from "../Node.js";
 import type { ServiceContext } from "../ServiceContext.js";
 import { llmFromSettingsOrContext } from "../Settings.js";
 import type { ChatMessage, LLM } from "../llm/types.js";
+import { extractText } from "../llm/utils.js";
 import { PromptMixin } from "../prompts/Mixin.js";
 import type { CorrectnessSystemPrompt } from "./prompts.js";
 import {
@@ -85,7 +86,7 @@ export class CorrectnessEvaluator extends PromptMixin implements BaseEvaluator {
    });

    const [score, reasoning] = this.parserFunction(
-      evalResponse.message.content,
+      extractText(evalResponse.message.content),
    );

    return {
@@ -278,7 +278,7 @@ export class KeywordTableIndex extends BaseIndex<KeywordTable> {
    serviceContext = serviceContext ?? serviceContextFromDefaults({});
    const docStore = storageContext.docStore;

-    docStore.addDocuments(documents, true);
+    await docStore.addDocuments(documents, true);
    for (const doc of documents) {
      docStore.setDocumentHash(doc.id_, doc.hash);
    }
@@ -1,5 +1,4 @@
 import _ from "lodash";
-import { globalsHelper } from "../../GlobalsHelper.js";
 import type { BaseNode, Document, NodeWithScore } from "../../Node.js";
 import type { ChoiceSelectPrompt } from "../../Prompt.js";
 import { defaultChoiceSelectPrompt } from "../../Prompt.js";
@@ -11,6 +10,7 @@ import {
  nodeParserFromSettingsOrContext,
 } from "../../Settings.js";
 import { RetrieverQueryEngine } from "../../engines/query/index.js";
+import { wrapEventCaller } from "../../internal/context/EventCaller.js";
 import type { BaseNodePostprocessor } from "../../postprocessors/index.js";
 import type { StorageContext } from "../../storage/StorageContext.js";
 import { storageContextFromDefaults } from "../../storage/StorageContext.js";
@@ -135,7 +135,7 @@ export class SummaryIndex extends BaseIndex<IndexList> {
    serviceContext = serviceContext;
    const docStore = storageContext.docStore;

-    docStore.addDocuments(documents, true);
+    await docStore.addDocuments(documents, true);
    for (const doc of documents) {
      docStore.setDocumentHash(doc.id_, doc.hash);
    }
@@ -287,10 +287,8 @@ export class SummaryIndexRetriever implements BaseRetriever {
    this.index = index;
  }

-  async retrieve({
-    query,
-    parentEvent,
-  }: RetrieveParams): Promise<NodeWithScore[]> {
+  @wrapEventCaller
+  async retrieve({ query }: RetrieveParams): Promise<NodeWithScore[]> {
    const nodeIds = this.index.indexStruct.nodes;
    const nodes = await this.index.docStore.getNodes(nodeIds);
    const result = nodes.map((node) => ({
@@ -298,13 +296,9 @@ export class SummaryIndexRetriever implements BaseRetriever {
      score: 1,
    }));

-    Settings.callbackManager.onRetrieve({
+    Settings.callbackManager.dispatchEvent("retrieve", {
      query,
      nodes: result,
-      event: globalsHelper.createEvent({
-        parentEvent,
-        type: "retrieve",
-      }),
    });

    return result;
@@ -340,10 +334,7 @@ export class SummaryIndexLLMRetriever implements BaseRetriever {
    this.serviceContext = serviceContext || index.serviceContext;
  }

-  async retrieve({
-    query,
-    parentEvent,
-  }: RetrieveParams): Promise<NodeWithScore[]> {
+  async retrieve({ query }: RetrieveParams): Promise<NodeWithScore[]> {
    const nodeIds = this.index.indexStruct.nodes;
    const results: NodeWithScore[] = [];

@@ -380,13 +371,9 @@ export class SummaryIndexLLMRetriever implements BaseRetriever {
      results.push(...nodeWithScores);
    }

-    Settings.callbackManager.onRetrieve({
+    Settings.callbackManager.dispatchEvent("retrieve", {
      query,
      nodes: results,
-      event: globalsHelper.createEvent({
-        parentEvent,
-        type: "retrieve",
-      }),
    });

    return results;
@@ -1,4 +1,3 @@
-import { globalsHelper } from "../../GlobalsHelper.js";
 import type {
  BaseNode,
  Document,
@@ -18,7 +17,6 @@ import {
  embedModelFromSettingsOrContext,
  nodeParserFromSettingsOrContext,
 } from "../../Settings.js";
-import { type Event } from "../../callbacks/CallbackManager.js";
 import { DEFAULT_SIMILARITY_TOP_K } from "../../constants.js";
 import type {
  BaseEmbedding,
@@ -31,6 +29,7 @@ import {
  DocStoreStrategy,
  createDocStoreStrategy,
 } from "../../ingestion/strategies/index.js";
+import { wrapEventCaller } from "../../internal/context/EventCaller.js";
 import type { BaseNodePostprocessor } from "../../postprocessors/types.js";
 import type { StorageContext } from "../../storage/StorageContext.js";
 import { storageContextFromDefaults } from "../../storage/StorageContext.js";
@@ -365,7 +364,7 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
    vectorStore: VectorStore,
    refDocId: string,
  ): Promise<void> {
-    vectorStore.delete(refDocId);
+    await vectorStore.delete(refDocId);

    if (!vectorStore.storesText) {
      const refDocInfo = await this.docStore.getRefDocInfo(refDocId);
@@ -373,7 +372,7 @@ export class VectorStoreIndex extends BaseIndex<IndexDict> {
      if (refDocInfo) {
        for (const nodeId of refDocInfo.nodeIds) {
          this.indexStruct.delete(nodeId);
-          vectorStore.delete(nodeId);
+          await vectorStore.delete(nodeId);
        }
      }
      await this.indexStore.addIndexStruct(this.indexStruct);
@@ -440,7 +439,6 @@ export class VectorIndexRetriever implements BaseRetriever {

  async retrieve({
    query,
-    parentEvent,
    preFilters,
  }: RetrieveParams): Promise<NodeWithScore[]> {
    let nodesWithScores = await this.textRetrieve(
@@ -450,7 +448,7 @@ export class VectorIndexRetriever implements BaseRetriever {
    nodesWithScores = nodesWithScores.concat(
      await this.textToImageRetrieve(query, preFilters as MetadataFilters),
    );
-    this.sendEvent(query, nodesWithScores, parentEvent);
+    this.sendEvent(query, nodesWithScores);
    return nodesWithScores;
  }

@@ -487,18 +485,14 @@ export class VectorIndexRetriever implements BaseRetriever {
    return this.buildNodeListFromQueryResult(result);
  }

+  @wrapEventCaller
  protected sendEvent(
    query: string,
    nodesWithScores: NodeWithScore<Metadata>[],
-    parentEvent: Event | undefined,
  ) {
-    Settings.callbackManager.onRetrieve({
+    Settings.callbackManager.dispatchEvent("retrieve", {
      query,
      nodes: nodesWithScores,
-      event: globalsHelper.createEvent({
-        parentEvent,
-        type: "retrieve",
-      }),
    });
  }

@@ -25,7 +25,7 @@ export class DuplicatesStrategy implements TransformComponent {
      }
    }

-    this.docStore.addDocuments(nodesToRun, true);
+    await this.docStore.addDocuments(nodesToRun, true);

    return nodesToRun;
  }
@@ -26,7 +26,7 @@ export class UpsertsStrategy implements TransformComponent {
      }
    }
    // add non-duplicate docs
-    this.docStore.addDocuments(dedupedNodes, true);
+    await this.docStore.addDocuments(dedupedNodes, true);
    return dedupedNodes;
  }
 }
@@ -5,9 +5,16 @@ import { DuplicatesStrategy } from "./DuplicatesStrategy.js";
 import { UpsertsAndDeleteStrategy } from "./UpsertsAndDeleteStrategy.js";
 import { UpsertsStrategy } from "./UpsertsStrategy.js";

+/**
+ * Document de-deduplication strategies work by comparing the hashes or ids stored in the document store.
+ * They require a document store to be set which must be persisted across pipeline runs.
+ */
 export enum DocStoreStrategy {
+  // Use upserts to handle duplicates. Checks if the a document is already in the doc store based on its id. If it is not, or if the hash of the document is updated, it will update the document in the doc store and run the transformations.
  UPSERTS = "upserts",
+  // Only handle duplicates. Checks if the hash of a document is already in the doc store. Only then it will add the document to the doc store and run the transformations
  DUPLICATES_ONLY = "duplicates_only",
+  // Use upserts and delete to handle duplicates. Like the upsert strategy but it will also delete non-existing documents from the doc store
  UPSERTS_AND_DELETE = "upserts_and_delete",
  NONE = "none", // no-op strategy
 }
@@ -0,0 +1,99 @@
+import { AsyncLocalStorage, randomUUID } from "@llamaindex/env";
+import { isAsyncGenerator, isGenerator } from "../utils.js";
+
+const eventReasonAsyncLocalStorage = new AsyncLocalStorage<EventCaller>();
+
+/**
+ * EventCaller is used to track the caller of an event.
+ */
+export class EventCaller {
+  public readonly id = randomUUID();
+
+  private constructor(
+    public readonly caller: unknown,
+    public readonly parent: EventCaller | null,
+  ) {}
+
+  #computedCallers: unknown[] | null = null;
+
+  public get computedCallers(): unknown[] {
+    if (this.#computedCallers != null) {
+      return this.#computedCallers;
+    }
+    const callers = [this.caller];
+    let parent = this.parent;
+    while (parent != null) {
+      callers.push(parent.caller);
+      parent = parent.parent;
+    }
+    this.#computedCallers = callers;
+    return callers;
+  }
+
+  public static create(
+    caller: unknown,
+    parent: EventCaller | null,
+  ): EventCaller {
+    return new EventCaller(caller, parent);
+  }
+}
+
+export function getEventCaller(): EventCaller | null {
+  return eventReasonAsyncLocalStorage.getStore() ?? null;
+}
+
+/**
+ * @param caller who is calling this function, pass in `this` if it's a class method
+ * @param fn
+ */
+function withEventCaller<T>(caller: unknown, fn: () => T) {
+  // create a chain of event callers
+  const parentCaller = getEventCaller();
+  return eventReasonAsyncLocalStorage.run(
+    EventCaller.create(caller, parentCaller),
+    fn,
+  );
+}
+
+export function wrapEventCaller<This, Result, Args extends unknown[]>(
+  originalMethod: (this: This, ...args: Args) => Result,
+  context: ClassMethodDecoratorContext<object>,
+) {
+  const name = context.name;
+  context.addInitializer(function () {
+    // @ts-expect-error
+    const fn = this[name].bind(this);
+    // @ts-expect-error
+    this[name] = (...args: unknown[]) => {
+      return withEventCaller(this, () => fn(...args));
+    };
+  });
+  return function (this: This, ...args: Args): Result {
+    const result = originalMethod.call(this, ...args);
+    // patch for iterators because AsyncLocalStorage doesn't work with them
+    if (isAsyncGenerator(result)) {
+      const snapshot = AsyncLocalStorage.snapshot();
+      return (async function* asyncGeneratorWrapper() {
+        while (true) {
+          const { value, done } = await snapshot(() => result.next());
+          if (done) {
+            break;
+          }
+          yield value;
+        }
+      })() as Result;
+    } else if (isGenerator(result)) {
+      const snapshot = AsyncLocalStorage.snapshot();
+      return (function* generatorWrapper() {
+        while (true) {
+          const { value, done } = snapshot(() => result.next());
+          if (done) {
+            break;
+          }
+          yield value;
+        }
+      })() as Result;
+    }
+    return result;
+  };
+}
@@ -0,0 +1,7 @@
+export const isAsyncGenerator = (obj: unknown): obj is AsyncGenerator => {
+  return obj != null && typeof obj === "object" && Symbol.asyncIterator in obj;
+};
+
+export const isGenerator = (obj: unknown): obj is Generator => {
+  return obj != null && typeof obj === "object" && Symbol.iterator in obj;
+};
@@ -1,28 +1,8 @@
-import type OpenAILLM from "openai";
-import type { ClientOptions as OpenAIClientOptions } from "openai";
-import {
-  type Event,
-  type EventType,
-  type OpenAIStreamToken,
-  type StreamCallbackResponse,
-} from "../callbacks/CallbackManager.js";
+import { type StreamCallbackResponse } from "../callbacks/CallbackManager.js";

-import type { ChatCompletionMessageParam } from "openai/resources/index.js";
 import type { LLMOptions } from "portkey-ai";
-import { Tokenizers } from "../GlobalsHelper.js";
 import { getCallbackManager } from "../internal/settings/CallbackManager.js";
-import type { AnthropicSession } from "./anthropic.js";
-import { getAnthropicSession } from "./anthropic.js";
-import type { AzureOpenAIConfig } from "./azure.js";
-import {
-  getAzureBaseUrl,
-  getAzureConfigFromEnv,
-  getAzureModel,
-  shouldUseAzure,
-} from "./azure.js";
 import { BaseLLM } from "./base.js";
-import type { OpenAISession } from "./open_ai.js";
-import { getOpenAISession } from "./open_ai.js";
 import type { PortkeySession } from "./portkey.js";
 import { getPortkeySession } from "./portkey.js";
 import { ReplicateSession } from "./replicate_ai.js";
@@ -35,290 +15,7 @@ import type {
  LLMMetadata,
  MessageType,
 } from "./types.js";
-import { llmEvent } from "./utils.js";
-
-export const GPT4_MODELS = {
-  "gpt-4": { contextWindow: 8192 },
-  "gpt-4-32k": { contextWindow: 32768 },
-  "gpt-4-32k-0613": { contextWindow: 32768 },
-  "gpt-4-turbo-preview": { contextWindow: 128000 },
-  "gpt-4-1106-preview": { contextWindow: 128000 },
-  "gpt-4-0125-preview": { contextWindow: 128000 },
-  "gpt-4-vision-preview": { contextWindow: 128000 },
-};
-
-// NOTE we don't currently support gpt-3.5-turbo-instruct and don't plan to in the near future
-export const GPT35_MODELS = {
-  "gpt-3.5-turbo": { contextWindow: 4096 },
-  "gpt-3.5-turbo-0613": { contextWindow: 4096 },
-  "gpt-3.5-turbo-16k": { contextWindow: 16384 },
-  "gpt-3.5-turbo-16k-0613": { contextWindow: 16384 },
-  "gpt-3.5-turbo-1106": { contextWindow: 16384 },
-  "gpt-3.5-turbo-0125": { contextWindow: 16384 },
-};
-
-/**
- * We currently support GPT-3.5 and GPT-4 models
- */
-export const ALL_AVAILABLE_OPENAI_MODELS = {
-  ...GPT4_MODELS,
-  ...GPT35_MODELS,
-};
-
-export const isFunctionCallingModel = (model: string): boolean => {
-  const isChatModel = Object.keys(ALL_AVAILABLE_OPENAI_MODELS).includes(model);
-  const isOld = model.includes("0314") || model.includes("0301");
-  return isChatModel && !isOld;
-};
-
-/**
- * OpenAI LLM implementation
- */
-export class OpenAI extends BaseLLM {
-  // Per completion OpenAI params
-  model: keyof typeof ALL_AVAILABLE_OPENAI_MODELS | string;
-  temperature: number;
-  topP: number;
-  maxTokens?: number;
-  additionalChatOptions?: Omit<
-    Partial<OpenAILLM.Chat.ChatCompletionCreateParams>,
-    | "max_tokens"
-    | "messages"
-    | "model"
-    | "temperature"
-    | "top_p"
-    | "stream"
-    | "tools"
-    | "toolChoice"
-  >;
-
-  // OpenAI session params
-  apiKey?: string = undefined;
-  maxRetries: number;
-  timeout?: number;
-  session: OpenAISession;
-  additionalSessionOptions?: Omit<
-    Partial<OpenAIClientOptions>,
-    "apiKey" | "maxRetries" | "timeout"
-  >;
-
-  constructor(
-    init?: Partial<OpenAI> & {
-      azure?: AzureOpenAIConfig;
-    },
-  ) {
-    super();
-    this.model = init?.model ?? "gpt-3.5-turbo";
-    this.temperature = init?.temperature ?? 0.1;
-    this.topP = init?.topP ?? 1;
-    this.maxTokens = init?.maxTokens ?? undefined;
-
-    this.maxRetries = init?.maxRetries ?? 10;
-    this.timeout = init?.timeout ?? 60 * 1000; // Default is 60 seconds
-    this.additionalChatOptions = init?.additionalChatOptions;
-    this.additionalSessionOptions = init?.additionalSessionOptions;
-
-    if (init?.azure || shouldUseAzure()) {
-      const azureConfig = getAzureConfigFromEnv({
-        ...init?.azure,
-        model: getAzureModel(this.model),
-      });
-
-      if (!azureConfig.apiKey) {
-        throw new Error(
-          "Azure API key is required for OpenAI Azure models. Please set the AZURE_OPENAI_KEY environment variable.",
-        );
-      }
-
-      this.apiKey = azureConfig.apiKey;
-      this.session =
-        init?.session ??
-        getOpenAISession({
-          azure: true,
-          apiKey: this.apiKey,
-          baseURL: getAzureBaseUrl(azureConfig),
-          maxRetries: this.maxRetries,
-          timeout: this.timeout,
-          defaultQuery: { "api-version": azureConfig.apiVersion },
-          ...this.additionalSessionOptions,
-        });
-    } else {
-      this.apiKey = init?.apiKey ?? undefined;
-      this.session =
-        init?.session ??
-        getOpenAISession({
-          apiKey: this.apiKey,
-          maxRetries: this.maxRetries,
-          timeout: this.timeout,
-          ...this.additionalSessionOptions,
-        });
-    }
-  }
-
-  get metadata() {
-    const contextWindow =
-      ALL_AVAILABLE_OPENAI_MODELS[
-        this.model as keyof typeof ALL_AVAILABLE_OPENAI_MODELS
-      ]?.contextWindow ?? 1024;
-    return {
-      model: this.model,
-      temperature: this.temperature,
-      topP: this.topP,
-      maxTokens: this.maxTokens,
-      contextWindow,
-      tokenizer: Tokenizers.CL100K_BASE,
-      isFunctionCallingModel: isFunctionCallingModel(this.model),
-    };
-  }
-
-  mapMessageType(
-    messageType: MessageType,
-  ): "user" | "assistant" | "system" | "function" | "tool" {
-    switch (messageType) {
-      case "user":
-        return "user";
-      case "assistant":
-        return "assistant";
-      case "system":
-        return "system";
-      case "function":
-        return "function";
-      case "tool":
-        return "tool";
-      default:
-        return "user";
-    }
-  }
-
-  toOpenAIMessage(messages: ChatMessage[]) {
-    return messages.map((message) => {
-      const additionalKwargs = message.additionalKwargs ?? {};
-
-      if (message.additionalKwargs?.toolCalls) {
-        additionalKwargs.tool_calls = message.additionalKwargs.toolCalls;
-        delete additionalKwargs.toolCalls;
-      }
-
-      return {
-        role: this.mapMessageType(message.role),
-        content: message.content,
-        ...additionalKwargs,
-      };
-    });
-  }
-
-  chat(
-    params: LLMChatParamsStreaming,
-  ): Promise<AsyncIterable<ChatResponseChunk>>;
-  chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>;
-  @llmEvent
-  async chat(
-    params: LLMChatParamsNonStreaming | LLMChatParamsStreaming,
-  ): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> {
-    const { messages, parentEvent, stream, tools, toolChoice } = params;
-    const baseRequestParams: OpenAILLM.Chat.ChatCompletionCreateParams = {
-      model: this.model,
-      temperature: this.temperature,
-      max_tokens: this.maxTokens,
-      tools: tools,
-      tool_choice: toolChoice,
-      messages: this.toOpenAIMessage(messages) as ChatCompletionMessageParam[],
-      top_p: this.topP,
-      ...this.additionalChatOptions,
-    };
-
-    // Streaming
-    if (stream) {
-      return this.streamChat(params);
-    }
-
-    // Non-streaming
-    const response = await this.session.openai.chat.completions.create({
-      ...baseRequestParams,
-      stream: false,
-    });
-
-    const content = response.choices[0].message?.content ?? null;
-
-    const kwargsOutput: Record<string, any> = {};
-
-    if (response.choices[0].message?.tool_calls) {
-      kwargsOutput.toolCalls = response.choices[0].message.tool_calls;
-    }
-
-    return {
-      message: {
-        content,
-        role: response.choices[0].message.role,
-        additionalKwargs: kwargsOutput,
-      },
-    };
-  }
-
-  protected async *streamChat({
-    messages,
-    parentEvent,
-  }: LLMChatParamsStreaming): AsyncIterable<ChatResponseChunk> {
-    const baseRequestParams: OpenAILLM.Chat.ChatCompletionCreateParams = {
-      model: this.model,
-      temperature: this.temperature,
-      max_tokens: this.maxTokens,
-      messages: messages.map(
-        (message) =>
-          ({
-            role: this.mapMessageType(message.role),
-            content: message.content,
-          }) as ChatCompletionMessageParam,
-      ),
-      top_p: this.topP,
-      ...this.additionalChatOptions,
-    };
-
-    //Now let's wrap our stream in a callback
-    const onLLMStream = getCallbackManager().onLLMStream;
-
-    const chunk_stream: AsyncIterable<OpenAIStreamToken> =
-      await this.session.openai.chat.completions.create({
-        ...baseRequestParams,
-        stream: true,
-      });
-
-    const event: Event = parentEvent
-      ? parentEvent
-      : {
-          id: "unspecified",
-          type: "llmPredict" as EventType,
-        };
-
-    // TODO: add callback to streamConverter and use streamConverter here
-    //Indices
-    let idx_counter: number = 0;
-    for await (const part of chunk_stream) {
-      if (!part.choices.length) continue;
-
-      //Increment
-      part.choices[0].index = idx_counter;
-      const is_done: boolean =
-        part.choices[0].finish_reason === "stop" ? true : false;
-      //onLLMStream Callback
-
-      const stream_callback: StreamCallbackResponse = {
-        event: event,
-        index: idx_counter,
-        isDone: is_done,
-        token: part,
-      };
-      onLLMStream(stream_callback);
-
-      idx_counter++;
-
-      yield {
-        delta: part.choices[0].delta.content ?? "",
-      };
-    }
-    return;
-  }
-}
+import { extractText, wrapLLMEvent } from "./utils.js";

 export const ALL_AVAILABLE_LLAMADEUCE_MODELS = {
  "Llama-2-70b-chat-old": {
@@ -518,16 +215,15 @@ If a question does not make any sense, or is not factually coherent, explain why

    return {
      prompt: messages.reduce((acc, message, index) => {
+        const content = extractText(message.content);
        if (index % 2 === 0) {
          return (
-            `${acc}${
-              withBos ? BOS : ""
-            }${B_INST} ${message.content.trim()} ${E_INST}` +
+            `${acc}${withBos ? BOS : ""}${B_INST} ${content.trim()} ${E_INST}` +
            (withNewlines ? "\n" : "")
          );
        } else {
          return (
-            `${acc} ${message.content.trim()}` +
+            `${acc} ${content.trim()}` +
            (withNewlines ? "\n" : " ") +
            (withBos ? EOS : "")
          ); // Yes, the EOS comes after the space. This is not a mistake.
@@ -541,11 +237,11 @@ If a question does not make any sense, or is not factually coherent, explain why
    params: LLMChatParamsStreaming,
  ): Promise<AsyncIterable<ChatResponseChunk>>;
  chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>;
-  @llmEvent
+  @wrapLLMEvent
  async chat(
    params: LLMChatParamsNonStreaming | LLMChatParamsStreaming,
  ): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> {
-    const { messages, parentEvent, stream } = params;
+    const { messages, stream } = params;
    const api = ALL_AVAILABLE_LLAMADEUCE_MODELS[this.model]
      .replicateApi as `${string}/${string}:${string}`;

@@ -586,173 +282,6 @@ If a question does not make any sense, or is not factually coherent, explain why
  }
 }

-export const ALL_AVAILABLE_ANTHROPIC_LEGACY_MODELS = {
-  "claude-2.1": {
-    contextWindow: 200000,
-  },
-  "claude-instant-1.2": {
-    contextWindow: 100000,
-  },
-};
-
-export const ALL_AVAILABLE_V3_MODELS = {
-  "claude-3-opus": { contextWindow: 200000 },
-  "claude-3-sonnet": { contextWindow: 200000 },
-  "claude-3-haiku": { contextWindow: 200000 },
-};
-
-export const ALL_AVAILABLE_ANTHROPIC_MODELS = {
-  ...ALL_AVAILABLE_ANTHROPIC_LEGACY_MODELS,
-  ...ALL_AVAILABLE_V3_MODELS,
-};
-
-const AVAILABLE_ANTHROPIC_MODELS_WITHOUT_DATE: { [key: string]: string } = {
-  "claude-3-opus": "claude-3-opus-20240229",
-  "claude-3-sonnet": "claude-3-sonnet-20240229",
-  "claude-3-haiku": "claude-3-haiku-20240307",
-} as { [key in keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS]: string };
-
-/**
- * Anthropic LLM implementation
- */
-
-export class Anthropic extends BaseLLM {
-  // Per completion Anthropic params
-  model: keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS;
-  temperature: number;
-  topP: number;
-  maxTokens?: number;
-
-  // Anthropic session params
-  apiKey?: string = undefined;
-  maxRetries: number;
-  timeout?: number;
-  session: AnthropicSession;
-
-  constructor(init?: Partial<Anthropic>) {
-    super();
-    this.model = init?.model ?? "claude-3-opus";
-    this.temperature = init?.temperature ?? 0.1;
-    this.topP = init?.topP ?? 0.999; // Per Ben Mann
-    this.maxTokens = init?.maxTokens ?? undefined;
-
-    this.apiKey = init?.apiKey ?? undefined;
-    this.maxRetries = init?.maxRetries ?? 10;
-    this.timeout = init?.timeout ?? 60 * 1000; // Default is 60 seconds
-    this.session =
-      init?.session ??
-      getAnthropicSession({
-        apiKey: this.apiKey,
-        maxRetries: this.maxRetries,
-        timeout: this.timeout,
-      });
-  }
-
-  get metadata() {
-    return {
-      model: this.model,
-      temperature: this.temperature,
-      topP: this.topP,
-      maxTokens: this.maxTokens,
-      contextWindow: ALL_AVAILABLE_ANTHROPIC_MODELS[this.model].contextWindow,
-      tokenizer: undefined,
-    };
-  }
-
-  getModelName = (model: string): string => {
-    if (Object.keys(AVAILABLE_ANTHROPIC_MODELS_WITHOUT_DATE).includes(model)) {
-      return AVAILABLE_ANTHROPIC_MODELS_WITHOUT_DATE[model];
-    }
-    return model;
-  };
-
-  formatMessages(messages: ChatMessage[]) {
-    return messages.map((message) => {
-      if (message.role !== "user" && message.role !== "assistant") {
-        throw new Error("Unsupported Anthropic role");
-      }
-
-      return {
-        content: message.content,
-        role: message.role,
-      };
-    });
-  }
-
-  chat(
-    params: LLMChatParamsStreaming,
-  ): Promise<AsyncIterable<ChatResponseChunk>>;
-  chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>;
-  @llmEvent
-  async chat(
-    params: LLMChatParamsNonStreaming | LLMChatParamsStreaming,
-  ): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> {
-    let { messages } = params;
-
-    const { parentEvent, stream } = params;
-
-    let systemPrompt: string | null = null;
-
-    const systemMessages = messages.filter(
-      (message) => message.role === "system",
-    );
-
-    if (systemMessages.length > 0) {
-      systemPrompt = systemMessages
-        .map((message) => message.content)
-        .join("\n");
-      messages = messages.filter((message) => message.role !== "system");
-    }
-
-    //Streaming
-    if (stream) {
-      return this.streamChat(messages, parentEvent, systemPrompt);
-    }
-
-    //Non-streaming
-    const response = await this.session.anthropic.messages.create({
-      model: this.getModelName(this.model),
-      messages: this.formatMessages(messages),
-      max_tokens: this.maxTokens ?? 4096,
-      temperature: this.temperature,
-      top_p: this.topP,
-      ...(systemPrompt && { system: systemPrompt }),
-    });
-
-    return {
-      message: { content: response.content[0].text, role: "assistant" },
-    };
-  }
-
-  protected async *streamChat(
-    messages: ChatMessage[],
-    parentEvent?: Event | undefined,
-    systemPrompt?: string | null,
-  ): AsyncIterable<ChatResponseChunk> {
-    const stream = await this.session.anthropic.messages.create({
-      model: this.getModelName(this.model),
-      messages: this.formatMessages(messages),
-      max_tokens: this.maxTokens ?? 4096,
-      temperature: this.temperature,
-      top_p: this.topP,
-      stream: true,
-      ...(systemPrompt && { system: systemPrompt }),
-    });
-
-    let idx_counter: number = 0;
-    for await (const part of stream) {
-      const content =
-        part.type === "content_block_delta" ? part.delta.text : null;
-
-      if (typeof content !== "string") continue;
-
-      idx_counter++;
-      yield { delta: content };
-    }
-    return;
-  }
-}
-
 export class Portkey extends BaseLLM {
  apiKey?: string = undefined;
  baseURL?: string = undefined;
@@ -782,17 +311,20 @@ export class Portkey extends BaseLLM {
    params: LLMChatParamsStreaming,
  ): Promise<AsyncIterable<ChatResponseChunk>>;
  chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>;
-  @llmEvent
+  @wrapLLMEvent
  async chat(
    params: LLMChatParamsNonStreaming | LLMChatParamsStreaming,
  ): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> {
-    const { messages, parentEvent, stream, extraParams } = params;
+    const { messages, stream, additionalChatOptions } = params;
    if (stream) {
-      return this.streamChat(messages, parentEvent, extraParams);
+      return this.streamChat(messages, additionalChatOptions);
    } else {
-      const bodyParams = extraParams || {};
+      const bodyParams = additionalChatOptions || {};
      const response = await this.session.portkey.chatCompletions.create({
-        messages,
+        messages: messages.map((message) => ({
+          content: extractText(message.content),
+          role: message.role,
+        })),
        ...bodyParams,
      });

@@ -804,25 +336,17 @@ export class Portkey extends BaseLLM {

  async *streamChat(
    messages: ChatMessage[],
-    parentEvent?: Event,
    params?: Record<string, any>,
  ): AsyncIterable<ChatResponseChunk> {
-    // Wrapping the stream in a callback.
-    const onLLMStream = getCallbackManager().onLLMStream;
-
    const chunkStream = await this.session.portkey.chatCompletions.create({
-      messages,
+      messages: messages.map((message) => ({
+        content: extractText(message.content),
+        role: message.role,
+      })),
      ...params,
      stream: true,
    });

-    const event: Event = parentEvent
-      ? parentEvent
-      : {
-          id: "unspecified",
-          type: "llmPredict" as EventType,
-        };
-
    //Indices
    let idx_counter: number = 0;
    for await (const part of chunkStream) {
@@ -833,12 +357,11 @@ export class Portkey extends BaseLLM {
      //onLLMStream Callback

      const stream_callback: StreamCallbackResponse = {
-        event: event,
        index: idx_counter,
        isDone: is_done,
        // token: part,
      };
-      onLLMStream(stream_callback);
+      getCallbackManager().dispatchEvent("stream", stream_callback);

      idx_counter++;

@@ -1,10 +1,19 @@
 import type { ClientOptions } from "@anthropic-ai/sdk";
-import Anthropic, { AI_PROMPT, HUMAN_PROMPT } from "@anthropic-ai/sdk";
+import { Anthropic as SDKAnthropic } from "@anthropic-ai/sdk";
 import { getEnv } from "@llamaindex/env";
+import type {
+  ChatMessage,
+  ChatResponse,
+  ChatResponseChunk,
+  LLMChatParamsNonStreaming,
+  LLMChatParamsStreaming,
+} from "llamaindex";
 import _ from "lodash";
+import { BaseLLM } from "./base.js";
+import { extractText, wrapLLMEvent } from "./utils.js";

 export class AnthropicSession {
-  anthropic: Anthropic;
+  anthropic: SDKAnthropic;

  constructor(options: ClientOptions = {}) {
    if (!options.apiKey) {
@@ -12,10 +21,10 @@ export class AnthropicSession {
    }

    if (!options.apiKey) {
-      throw new Error("Set Anthropic Key in ANTHROPIC_API_KEY env variable"); // Overriding Anthropic package's error message
+      throw new Error("Set Anthropic Key in ANTHROPIC_API_KEY env variable");
    }

-    this.anthropic = new Anthropic(options);
+    this.anthropic = new SDKAnthropic(options);
  }
 }

@@ -46,5 +55,164 @@ export function getAnthropicSession(options: ClientOptions = {}) {
  return session;
 }

-export const ANTHROPIC_HUMAN_PROMPT = HUMAN_PROMPT;
-export const ANTHROPIC_AI_PROMPT = AI_PROMPT;
+export const ALL_AVAILABLE_ANTHROPIC_LEGACY_MODELS = {
+  "claude-2.1": {
+    contextWindow: 200000,
+  },
+  "claude-instant-1.2": {
+    contextWindow: 100000,
+  },
+};
+
+export const ALL_AVAILABLE_V3_MODELS = {
+  "claude-3-opus": { contextWindow: 200000 },
+  "claude-3-sonnet": { contextWindow: 200000 },
+  "claude-3-haiku": { contextWindow: 200000 },
+};
+
+export const ALL_AVAILABLE_ANTHROPIC_MODELS = {
+  ...ALL_AVAILABLE_ANTHROPIC_LEGACY_MODELS,
+  ...ALL_AVAILABLE_V3_MODELS,
+};
+
+const AVAILABLE_ANTHROPIC_MODELS_WITHOUT_DATE: { [key: string]: string } = {
+  "claude-3-opus": "claude-3-opus-20240229",
+  "claude-3-sonnet": "claude-3-sonnet-20240229",
+  "claude-3-haiku": "claude-3-haiku-20240307",
+} as { [key in keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS]: string };
+
+export class Anthropic extends BaseLLM {
+  // Per completion Anthropic params
+  model: keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS;
+  temperature: number;
+  topP: number;
+  maxTokens?: number;
+
+  // Anthropic session params
+  apiKey?: string = undefined;
+  maxRetries: number;
+  timeout?: number;
+  session: AnthropicSession;
+
+  constructor(init?: Partial<Anthropic>) {
+    super();
+    this.model = init?.model ?? "claude-3-opus";
+    this.temperature = init?.temperature ?? 0.1;
+    this.topP = init?.topP ?? 0.999; // Per Ben Mann
+    this.maxTokens = init?.maxTokens ?? undefined;
+
+    this.apiKey = init?.apiKey ?? undefined;
+    this.maxRetries = init?.maxRetries ?? 10;
+    this.timeout = init?.timeout ?? 60 * 1000; // Default is 60 seconds
+    this.session =
+      init?.session ??
+      getAnthropicSession({
+        apiKey: this.apiKey,
+        maxRetries: this.maxRetries,
+        timeout: this.timeout,
+      });
+  }
+
+  get metadata() {
+    return {
+      model: this.model,
+      temperature: this.temperature,
+      topP: this.topP,
+      maxTokens: this.maxTokens,
+      contextWindow: ALL_AVAILABLE_ANTHROPIC_MODELS[this.model].contextWindow,
+      tokenizer: undefined,
+    };
+  }
+
+  getModelName = (model: string): string => {
+    if (Object.keys(AVAILABLE_ANTHROPIC_MODELS_WITHOUT_DATE).includes(model)) {
+      return AVAILABLE_ANTHROPIC_MODELS_WITHOUT_DATE[model];
+    }
+    return model;
+  };
+
+  formatMessages(messages: ChatMessage[]) {
+    return messages.map((message) => {
+      if (message.role !== "user" && message.role !== "assistant") {
+        throw new Error("Unsupported Anthropic role");
+      }
+
+      return {
+        content: extractText(message.content),
+        role: message.role,
+      };
+    });
+  }
+
+  chat(
+    params: LLMChatParamsStreaming,
+  ): Promise<AsyncIterable<ChatResponseChunk>>;
+  chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>;
+  @wrapLLMEvent
+  async chat(
+    params: LLMChatParamsNonStreaming | LLMChatParamsStreaming,
+  ): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> {
+    let { messages } = params;
+
+    const { stream } = params;
+
+    let systemPrompt: string | null = null;
+
+    const systemMessages = messages.filter(
+      (message) => message.role === "system",
+    );
+
+    if (systemMessages.length > 0) {
+      systemPrompt = systemMessages
+        .map((message) => message.content)
+        .join("\n");
+      messages = messages.filter((message) => message.role !== "system");
+    }
+
+    //Streaming
+    if (stream) {
+      return this.streamChat(messages, systemPrompt);
+    }
+
+    //Non-streaming
+    const response = await this.session.anthropic.messages.create({
+      model: this.getModelName(this.model),
+      messages: this.formatMessages(messages),
+      max_tokens: this.maxTokens ?? 4096,
+      temperature: this.temperature,
+      top_p: this.topP,
+      ...(systemPrompt && { system: systemPrompt }),
+    });
+
+    return {
+      message: { content: response.content[0].text, role: "assistant" },
+    };
+  }
+
+  protected async *streamChat(
+    messages: ChatMessage[],
+    systemPrompt?: string | null,
+  ): AsyncIterable<ChatResponseChunk> {
+    const stream = await this.session.anthropic.messages.create({
+      model: this.getModelName(this.model),
+      messages: this.formatMessages(messages),
+      max_tokens: this.maxTokens ?? 4096,
+      temperature: this.temperature,
+      top_p: this.topP,
+      stream: true,
+      ...(systemPrompt && { system: systemPrompt }),
+    });
+
+    let idx_counter: number = 0;
+    for await (const part of stream) {
+      const content =
+        part.type === "content_block_delta" ? part.delta.text : null;
+
+      if (typeof content !== "string") continue;
+
+      idx_counter++;
+      yield { delta: content };
+    }
+    return;
+  }
+}
@@ -19,6 +19,10 @@ const ALL_AZURE_OPENAI_CHAT_MODELS = {
  },
  "gpt-4": { contextWindow: 8192, openAIModel: "gpt-4" },
  "gpt-4-32k": { contextWindow: 32768, openAIModel: "gpt-4-32k" },
+  "gpt-4-turbo": {
+    contextWindow: 128000,
+    openAIModel: "gpt-4-turbo",
+  },
  "gpt-4-vision-preview": {
    contextWindow: 128000,
    openAIModel: "gpt-4-vision-preview",
@@ -9,9 +9,15 @@ import type {
  LLMCompletionParamsStreaming,
  LLMMetadata,
 } from "./types.js";
-import { streamConverter } from "./utils.js";
+import { extractText, streamConverter } from "./utils.js";

-export abstract class BaseLLM implements LLM {
+export abstract class BaseLLM<
+  AdditionalChatOptions extends Record<string, unknown> = Record<
+    string,
+    unknown
+  >,
+> implements LLM<AdditionalChatOptions>
+{
  abstract metadata: LLMMetadata;

  complete(
@@ -23,11 +29,10 @@ export abstract class BaseLLM implements LLM {
  async complete(
    params: LLMCompletionParamsStreaming | LLMCompletionParamsNonStreaming,
  ): Promise<CompletionResponse | AsyncIterable<CompletionResponse>> {
-    const { prompt, parentEvent, stream } = params;
+    const { prompt, stream } = params;
    if (stream) {
      const stream = await this.chat({
        messages: [{ content: prompt, role: "user" }],
-        parentEvent,
        stream: true,
      });
      return streamConverter(stream, (chunk) => {
@@ -38,13 +43,17 @@ export abstract class BaseLLM implements LLM {
    }
    const chatResponse = await this.chat({
      messages: [{ content: prompt, role: "user" }],
-      parentEvent,
    });
-    return { text: chatResponse.message.content as string };
+    return {
+      text: extractText(chatResponse.message.content),
+      raw: chatResponse.raw,
+    };
  }

  abstract chat(
-    params: LLMChatParamsStreaming,
+    params: LLMChatParamsStreaming<AdditionalChatOptions>,
  ): Promise<AsyncIterable<ChatResponseChunk>>;
-  abstract chat(params: LLMChatParamsNonStreaming): Promise<ChatResponse>;
+  abstract chat(
+    params: LLMChatParamsNonStreaming<AdditionalChatOptions>,
+  ): Promise<ChatResponse>;
 }
@@ -1,5 +1,5 @@
 import { getEnv } from "@llamaindex/env";
-import { OpenAI } from "./LLM.js";
+import { OpenAI } from "./open_ai.js";

 export class FireworksLLM extends OpenAI {
  constructor(init?: Partial<OpenAI>) {
@@ -1,5 +1,5 @@
 import { getEnv } from "@llamaindex/env";
-import { OpenAI } from "./LLM.js";
+import { OpenAI } from "./open_ai.js";

 export class Groq extends OpenAI {
  constructor(init?: Partial<OpenAI>) {
@@ -1,4 +1,5 @@
 export * from "./LLM.js";
+export { Anthropic } from "./anthropic.js";
 export { FireworksLLM } from "./fireworks.js";
 export { Groq } from "./groq.js";
 export {
@@ -1,10 +1,6 @@
 import { getEnv } from "@llamaindex/env";
 import { Settings } from "../Settings.js";
-import {
-  type Event,
-  type EventType,
-  type StreamCallbackResponse,
-} from "../callbacks/CallbackManager.js";
+import { type StreamCallbackResponse } from "../callbacks/CallbackManager.js";
 import { BaseLLM } from "./base.js";
 import type {
  ChatMessage,
@@ -116,21 +112,10 @@ export class MistralAI extends BaseLLM {

  protected async *streamChat({
    messages,
-    parentEvent,
  }: LLMChatParamsStreaming): AsyncIterable<ChatResponseChunk> {
-    //Now let's wrap our stream in a callback
-    const onLLMStream = Settings.callbackManager.onLLMStream;
-
    const client = await this.session.getClient();
    const chunkStream = await client.chatStream(this.buildParams(messages));

-    const event: Event = parentEvent
-      ? parentEvent
-      : {
-          id: "unspecified",
-          type: "llmPredict" as EventType,
-        };
-
    //Indices
    let idx_counter: number = 0;
    for await (const part of chunkStream) {
@@ -141,12 +126,12 @@ export class MistralAI extends BaseLLM {
        part.choices[0].finish_reason === "stop" ? true : false;

      const stream_callback: StreamCallbackResponse = {
-        event: event,
        index: idx_counter,
        isDone: isDone,
        token: part,
      };
-      onLLMStream(stream_callback);
+
+      Settings.callbackManager.dispatchEvent("stream", stream_callback);

      idx_counter++;

@@ -1,5 +1,4 @@
 import { ok } from "@llamaindex/env";
-import type { Event } from "../callbacks/CallbackManager.js";
 import { BaseEmbedding } from "../embeddings/types.js";
 import type {
  ChatResponse,
@@ -69,7 +68,7 @@ export class Ollama extends BaseEmbedding implements LLM {
  async chat(
    params: LLMChatParamsNonStreaming | LLMChatParamsStreaming,
  ): Promise<ChatResponse | AsyncIterable<ChatResponseChunk>> {
-    const { messages, parentEvent, stream } = params;
+    const { messages, stream } = params;
    const payload = {
      model: this.model,
      messages: messages.map((message) => ({
@@ -106,14 +105,13 @@ export class Ollama extends BaseEmbedding implements LLM {
      const stream = response.body;
      ok(stream, "stream is null");
      ok(stream instanceof ReadableStream, "stream is not readable");
-      return this.streamChat(stream, messageAccessor, parentEvent);
+      return this.streamChat(stream, messageAccessor);
    }
  }

  private async *streamChat<T>(
    stream: ReadableStream<Uint8Array>,
    accessor: (data: any) => T,
-    parentEvent?: Event,
  ): AsyncIterable<T> {
    const reader = stream.getReader();
    while (true) {
@@ -147,7 +145,7 @@ export class Ollama extends BaseEmbedding implements LLM {
  async complete(
    params: LLMCompletionParamsStreaming | LLMCompletionParamsNonStreaming,
  ): Promise<CompletionResponse | AsyncIterable<CompletionResponse>> {
-    const { prompt, parentEvent, stream } = params;
+    const { prompt, stream } = params;
    const payload = {
      model: this.model,
      prompt: prompt,
@@ -177,7 +175,7 @@ export class Ollama extends BaseEmbedding implements LLM {
      const stream = response.body;
      ok(stream, "stream is null");
      ok(stream instanceof ReadableStream, "stream is not readable");
-      return this.streamChat(stream, completionAccessor, parentEvent);
+      return this.streamChat(stream, completionAccessor);
    }
  }

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Yi Ding	a87f13b9d2	release	2024-04-09 16:23:29 -07:00
Yi Ding	8d2b21ee75	update mistral (#700 )	2024-04-09 16:19:51 -07:00
Yi Ding	87741c9be8	update example packages	2024-04-09 13:22:03 -07:00
Yi Ding	171cb89170	security update (docs)	2024-04-09 13:17:44 -07:00
Yi Ding	5dad867bbe	update packages	2024-04-09 13:04:43 -07:00
Yi Ding	13f26fd84d	pnpm version	2024-04-09 12:45:12 -07:00
Yi Ding	3bc77f7d7f	gpt-4-turbo GA (#698 )	2024-04-09 12:42:16 -07:00
Alex Yang	aac1ee3af3	e2e: init llamaindex e2e test (#697 )	2024-04-06 23:57:21 -05:00
Alex Yang	e85893ac0f	fix: message content type (#696 )	2024-04-06 18:59:12 -05:00
Alex Yang	315947ee6f	refactor: move anthropic class (#695 )	2024-04-06 17:13:53 -05:00
Alex Yang	23a0d44b11	fix: jsr disallow global type	2024-04-06 17:09:39 -05:00
Alex Yang	3b501de057	chore: jsr release	2024-04-06 17:04:20 -05:00
Alex Yang	6cc645aa2a	refactor: improve agent type (#694 )	2024-04-05 15:21:49 -05:00
Marcus Schiesser	0b37207adc	Release llamaindex@0.2.3	2024-04-05 15:15:39 +08:00
Marcus Schiesser	f0704ec705	Add streaming for OpenAI agents (#693 )	2024-04-05 12:53:26 +07:00
Thuc Pham	4fcbdf710e	Add tool calls for openai streaming (#682 ) Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de> Co-authored-by: Alex Yang <himself65@outlook.com>	2024-04-05 08:33:23 +07:00
Marcus Schiesser	866149193a	fix: use LLM's context window to specify agent's token limit (#689 )	2024-04-03 17:04:35 -05:00
Thuc Pham	6ffb161618	feat: add ts eslint plugin (#688 )	2024-04-03 14:21:13 +07:00
Marcus Schiesser	8e4b49824b	doc: document docstore strategies (#690 )	2024-04-03 13:26:38 +07:00
Alex Yang	5263576de1	ci: test matrix on nodejs 18/20/21 (#687 )	2024-04-02 17:23:11 -05:00
WarlaxZ	6d4e2ea0e9	fix: dynamic import cjs module pg (#685 ) Co-authored-by: Alex Yang <himself65@outlook.com>	2024-04-02 16:07:13 -05:00
Emanuel Ferreira	3cbfa98e6b	feat: LlamaCloudIndex from documents (#677 )	2024-04-02 14:03:45 -03:00
Alex Yang	d256cbe0e0	refactor: use `event.reason`, remove `parentEvent` (#681 )	2024-04-01 17:03:39 -07:00