Release 0.6.9 (#1252 )

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
chore: bump cloud sdk version (#1251 )
2026-07-01 22:14:03 -04:00 · 2024-09-24 10:31:54 -07:00 · 2024-09-24 09:43:45 -07:00 · 2024-09-23 18:41:44 -07:00 · 2024-09-23 18:35:36 -07:00 · 2024-09-23 15:11:53 -07:00
108 changed files with 11133 additions and 5410 deletions
@@ -14,6 +14,9 @@ concurrency:

 env:
  POSTGRES_HOST_AUTH_METHOD: trust
+  TURBO_TOKEN: ${{ secrets.TURBO_TOKEN }}
+  TURBO_TEAM: ${{ vars.TURBO_TEAM }}
+  TURBO_REMOTE_ONLY: true

 jobs:
  e2e:
@@ -167,11 +167,13 @@ export async function chatWithAgent(
      // ... adding your tools here
    ],
  });
-  const responseStream = await agent.chat({
-    stream: true,
-    message: question,
-    chatHistory: prevMessages,
-  });
+  const responseStream = await agent.chat(
+    {
+      message: question,
+      chatHistory: prevMessages,
+    },
+    true,
+  );
  const uiStream = createStreamableUI(<div>loading...</div>);
  responseStream
    .pipeTo(
@@ -1,5 +1,48 @@
 # docs

+## 0.0.78
+
+### Patch Changes
+
+- llamaindex@0.6.9
+
+## 0.0.77
+
+### Patch Changes
+
+- Updated dependencies [8b7fdba]
+  - llamaindex@0.6.8
+
+## 0.0.76
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+
+## 0.0.75
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+
+## 0.0.74
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+
+## 0.0.73
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+
 ## 0.0.72

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "docs",
-  "version": "0.0.72",
+  "version": "0.0.78",
  "private": true,
  "scripts": {
    "docusaurus": "docusaurus",
@@ -18,7 +18,7 @@ import readline from "node:readline/promises";
  });
  const chatEngine = new SimpleChatEngine({
    llm,
-    chatHistory,
+    memory: chatHistory,
  });
  const rl = readline.createInterface({ input, output });

@@ -27,12 +27,10 @@ async function main() {

  // Query the index
  const queryEngine = index.asQueryEngine();
-  const stream = await queryEngine.query(
-    {
-      query: "What did the author do in college?",
-    },
-    true,
-  );
+  const stream = await queryEngine.query({
+    query: "What did the author do in college?",
+    stream: true,
+  });

  // Output response
  for await (const chunk of stream) {
@@ -37,12 +37,10 @@ async function main() {

  // Query the index
  const queryEngine = index.asQueryEngine();
-  const stream = await queryEngine.query(
-    {
-      query: "What did the author do in college?",
-    },
-    true,
-  );
+  const stream = await queryEngine.query({
+    query: "What did the author do in college?",
+    stream: true,
+  });

  // Output response
  for await (const chunk of stream) {
@@ -1,4 +1,5 @@
 // call pnpm tsx multimodal/load.ts first to init the storage
+import { extractText } from "@llamaindex/core/utils";
 import {
  ContextChatEngine,
  NodeWithScore,
@@ -25,8 +26,9 @@ Settings.callbackManager.on("retrieve-end", (event) => {
  const textNodes = nodes.filter(
    (node: NodeWithScore) => node.node.type === ObjectType.TEXT,
  );
+  const text = extractText(query);
  console.log(
-    `Retrieved ${textNodes.length} text nodes and ${imageNodes.length} image nodes for query: ${query}`,
+    `Retrieved ${textNodes.length} text nodes and ${imageNodes.length} image nodes for query: ${text}`,
  );
 });

@@ -1,3 +1,4 @@
+import { extractText } from "@llamaindex/core/utils";
 import {
  getResponseSynthesizer,
  OpenAI,
@@ -16,7 +17,8 @@ Settings.llm = new OpenAI({ model: "gpt-4-turbo", maxTokens: 512 });
 // Update callbackManager
 Settings.callbackManager.on("retrieve-end", (event) => {
  const { nodes, query } = event.detail;
-  console.log(`Retrieved ${nodes.length} nodes for query: ${query}`);
+  const text = extractText(query);
+  console.log(`Retrieved ${nodes.length} nodes for query: ${text}`);
 });

 async function main() {
@@ -30,12 +32,10 @@ async function main() {
    responseSynthesizer: getResponseSynthesizer("multi_modal"),
    retriever: index.asRetriever({ topK: { TEXT: 3, IMAGE: 1 } }),
  });
-  const stream = await queryEngine.query(
-    {
-      query: "Tell me more about Vincent van Gogh's famous paintings",
-    },
-    true,
-  );
+  const stream = await queryEngine.query({
+    query: "Tell me more about Vincent van Gogh's famous paintings",
+    stream: true,
+  });
  for await (const chunk of stream) {
    process.stdout.write(chunk.response);
  }
@@ -40,7 +40,11 @@ async function main(args: any) {
    const rdr = new SimpleDirectoryReader(callback);
    const docs = await rdr.loadData({ directoryPath: sourceDir });

-    const pgvs = new PGVectorStore();
+    const pgvs = new PGVectorStore({
+      clientConfig: {
+        connectionString: process.env.PG_CONNECTION_STRING,
+      },
+    });
    pgvs.setCollection(sourceDir);
    await pgvs.clearCollection();

@@ -7,7 +7,11 @@ async function main() {
  });

  try {
-    const pgvs = new PGVectorStore();
+    const pgvs = new PGVectorStore({
+      clientConfig: {
+        connectionString: process.env.PG_CONNECTION_STRING,
+      },
+    });
    // Optional - set your collection name, default is no filter on this field.
    // pgvs.setCollection();

@@ -1,5 +1,48 @@
 # @llamaindex/autotool

+## 3.0.9
+
+### Patch Changes
+
+- llamaindex@0.6.9
+
+## 3.0.8
+
+### Patch Changes
+
+- Updated dependencies [8b7fdba]
+  - llamaindex@0.6.8
+
+## 3.0.7
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+
+## 3.0.6
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+
+## 3.0.5
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+
+## 3.0.4
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+
 ## 3.0.3

 ### Patch Changes
@@ -1,5 +1,54 @@
 # @llamaindex/autotool-01-node-example

+## 0.0.18
+
+### Patch Changes
+
+- llamaindex@0.6.9
+- @llamaindex/autotool@3.0.9
+
+## 0.0.17
+
+### Patch Changes
+
+- Updated dependencies [8b7fdba]
+  - llamaindex@0.6.8
+  - @llamaindex/autotool@3.0.8
+
+## 0.0.16
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+  - @llamaindex/autotool@3.0.7
+
+## 0.0.15
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+  - @llamaindex/autotool@3.0.6
+
+## 0.0.14
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+  - @llamaindex/autotool@3.0.5
+
+## 0.0.13
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+  - @llamaindex/autotool@3.0.4
+
 ## 0.0.12

 ### Patch Changes
@@ -13,5 +13,5 @@
  "scripts": {
    "start": "node --import tsx --import @llamaindex/autotool/node ./src/index.ts"
  },
-  "version": "0.0.12"
+  "version": "0.0.18"
 }
@@ -1,5 +1,54 @@
 # @llamaindex/autotool-02-next-example

+## 0.1.62
+
+### Patch Changes
+
+- llamaindex@0.6.9
+- @llamaindex/autotool@3.0.9
+
+## 0.1.61
+
+### Patch Changes
+
+- Updated dependencies [8b7fdba]
+  - llamaindex@0.6.8
+  - @llamaindex/autotool@3.0.8
+
+## 0.1.60
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+  - @llamaindex/autotool@3.0.7
+
+## 0.1.59
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+  - @llamaindex/autotool@3.0.6
+
+## 0.1.58
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+  - @llamaindex/autotool@3.0.5
+
+## 0.1.57
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+  - @llamaindex/autotool@3.0.4
+
 ## 0.1.56

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/autotool-02-next-example",
  "private": true,
-  "version": "0.1.56",
+  "version": "0.1.62",
  "scripts": {
    "dev": "next dev",
    "build": "next build",
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/autotool",
  "type": "module",
-  "version": "3.0.3",
+  "version": "3.0.9",
  "description": "auto transpile your JS function to LLM Agent compatible",
  "files": [
    "dist",
@@ -1,5 +1,11 @@
 # @llamaindex/cloud

+## 0.2.8
+
+### Patch Changes
+
+- ac41ed3: feat: bump cloud sdk version
+
 ## 0.2.7

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/cloud",
-  "version": "0.2.7",
+  "version": "0.2.8",
  "type": "module",
  "license": "MIT",
  "scripts": {
@@ -170,6 +170,15 @@ export class LlamaParseReader extends FileReader {
  vendorMultimodalModelName?: string | undefined;
  // The API key for the multimodal API. Can also be set as an env variable: LLAMA_CLOUD_VENDOR_MULTIMODAL_API_KEY
  vendorMultimodalApiKey?: string | undefined;
+
+  webhookUrl?: string | undefined;
+  premiumMode?: boolean | undefined;
+  takeScreenshot?: boolean | undefined;
+  disableOcr?: boolean | undefined;
+  disableReconstruction?: boolean | undefined;
+  inputS3Path?: string | undefined;
+  outputS3PathPrefix?: string | undefined;
+
  // numWorkers is implemented in SimpleDirectoryReader
  stdout?: WriteStream | undefined;

@@ -258,13 +267,13 @@ export class LlamaParseReader extends FileReader {
      use_vendor_multimodal_model: this.useVendorMultimodalModel,
      vendor_multimodal_model_name: this.vendorMultimodalModelName,
      vendor_multimodal_api_key: this.vendorMultimodalApiKey,
-      // fixme: does these fields need to be set?
-      webhook_url: undefined,
-      take_screenshot: undefined,
-      disable_ocr: undefined,
-      disable_reconstruction: undefined,
-      input_s3_path: undefined,
-      output_s3_path_prefix: undefined,
+      premium_mode: this.premiumMode,
+      webhook_url: this.webhookUrl,
+      take_screenshot: this.takeScreenshot,
+      disable_ocr: this.disableOcr,
+      disable_reconstruction: this.disableReconstruction,
+      input_s3_path: this.inputS3Path,
+      output_s3_path_prefix: this.outputS3PathPrefix,
    } satisfies {
      [Key in keyof Body_upload_file_api_v1_parsing_upload_post]-?:
        | Body_upload_file_api_v1_parsing_upload_post[Key]
@@ -0,0 +1,8 @@
+{
+  "extends": ["//"],
+  "tasks": {
+    "build": {
+      "outputs": ["dist/**", "src/client/**"]
+    }
+  }
+}
@@ -1,5 +1,28 @@
 # @llamaindex/community

+## 0.0.40
+
+### Patch Changes
+
+- 50e6b57: feat: add Amazon Bedrock Retriever
+- Updated dependencies [8b7fdba]
+  - @llamaindex/core@0.2.6
+
+## 0.0.39
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+  - @llamaindex/core@0.2.5
+
+## 0.0.38
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - @llamaindex/core@0.2.4
+  - @llamaindex/env@0.1.12
+
 ## 0.0.37

 ### Patch Changes
@@ -7,6 +7,7 @@
 - Bedrock support for the Anthropic Claude Models [usage](https://ts.llamaindex.ai/modules/llms/available_llms/bedrock)
 - Bedrock support for the Meta LLama 2, 3 and 3.1 Models [usage](https://ts.llamaindex.ai/modules/llms/available_llms/bedrock)
 - Meta LLama3.1 405b tool call support
+- Bedrock support for querying Knowledge Base

 ## LICENSE

@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/community",
  "description": "Community package for LlamaIndexTS",
-  "version": "0.0.37",
+  "version": "0.0.40",
  "type": "module",
  "types": "dist/type/index.d.ts",
  "main": "dist/cjs/index.js",
@@ -47,6 +47,7 @@
  },
  "dependencies": {
    "@aws-sdk/client-bedrock-runtime": "^3.642.0",
+    "@aws-sdk/client-bedrock-agent-runtime": "^3.642.0",
    "@llamaindex/core": "workspace:*",
    "@llamaindex/env": "workspace:*"
  }
@@ -3,3 +3,4 @@ export {
  BEDROCK_MODEL_MAX_TOKENS,
  Bedrock,
 } from "./llm/bedrock/index.js";
+export { AmazonKnowledgeBaseRetriever } from "./retrievers/bedrock.js";
@@ -0,0 +1,165 @@
+import type { KnowledgeBaseVectorSearchConfiguration } from "@aws-sdk/client-bedrock-agent-runtime";
+import {
+  BedrockAgentRuntimeClient,
+  type BedrockAgentRuntimeClientConfig,
+  type RetrievalFilter,
+  RetrieveCommand,
+  type SearchType,
+} from "@aws-sdk/client-bedrock-agent-runtime";
+import type { QueryBundle } from "@llamaindex/core/query-engine";
+import { BaseRetriever } from "@llamaindex/core/retriever";
+import { Document, type NodeWithScore } from "@llamaindex/core/schema";
+import { extractText } from "@llamaindex/core/utils";
+
+/**
+ * Interface for the arguments required to initialize an
+ * AmazonKnowledgeBaseRetriever instance.
+ */
+export interface AmazonKnowledgeBaseRetrieverArgs {
+  knowledgeBaseId: string;
+  topK: number;
+  region: string;
+  clientOptions?: BedrockAgentRuntimeClientConfig;
+  filter?: RetrievalFilter;
+  overrideSearchType?: SearchType;
+}
+
+/**
+ * Class for interacting with Amazon Bedrock Knowledge Bases, a RAG workflow oriented service
+ * Extends the BaseRetriever class.
+ * @example
+ * ```typescript
+ * const retriever = new AmazonKnowledgeBaseRetriever({
+ *   topK: 10,
+ *   knowledgeBaseId: "YOUR_KNOWLEDGE_BASE_ID",
+ *   region: "us-east-2",
+ *   clientOptions: {
+ *     credentials: {
+ *       accessKeyId: "YOUR_ACCESS_KEY_ID",
+ *       secretAccessKey: "YOUR_SECRET_ACCESS_KEY",
+ *     },
+ *   },
+ * });
+ *
+ * const docs = await retriever.retrieve({query: "How are clouds formed?"});
+ * ```
+ */
+export class AmazonKnowledgeBaseRetriever extends BaseRetriever {
+  static lc_name() {
+    return "AmazonKnowledgeBaseRetriever";
+  }
+
+  lc_namespace = ["llamaindex", "retrievers", "amazon_bedrock_knowledge_base"];
+
+  knowledgeBaseId: string;
+
+  topK: number;
+
+  bedrockAgentRuntimeClient: BedrockAgentRuntimeClient;
+
+  filter: RetrievalFilter | undefined;
+
+  overrideSearchType: SearchType | undefined;
+
+  constructor({
+    knowledgeBaseId,
+    topK = 10,
+    clientOptions,
+    region,
+    filter,
+    overrideSearchType,
+  }: AmazonKnowledgeBaseRetrieverArgs) {
+    super();
+
+    this.topK = topK;
+    this.filter = filter;
+    this.overrideSearchType = overrideSearchType;
+    this.bedrockAgentRuntimeClient = new BedrockAgentRuntimeClient({
+      region,
+      ...clientOptions,
+    });
+    this.knowledgeBaseId = knowledgeBaseId;
+  }
+
+  /**
+   * Cleans the result text by replacing sequences of whitespace with a
+   * single space and removing ellipses.
+   * @param resText The result text to clean.
+   * @returns The cleaned result text.
+   */
+  cleanResult(resText: string) {
+    const res = resText.replace(/\s+/g, " ").replace(/\.\.\./g, "");
+    return res;
+  }
+
+  async queryKnowledgeBase(
+    query: QueryBundle,
+    topK: number,
+    filter?: RetrievalFilter,
+    overrideSearchType?: SearchType,
+  ): Promise<NodeWithScore[]> {
+    const retrieveCommand = new RetrieveCommand({
+      knowledgeBaseId: this.knowledgeBaseId,
+      retrievalQuery: {
+        text: extractText(query),
+      },
+      retrievalConfiguration: {
+        vectorSearchConfiguration: {
+          numberOfResults: topK,
+          overrideSearchType,
+          filter,
+        } as KnowledgeBaseVectorSearchConfiguration,
+      },
+    });
+
+    const retrieveResponse =
+      await this.bedrockAgentRuntimeClient.send(retrieveCommand);
+
+    return (
+      retrieveResponse.retrievalResults?.map((result) => {
+        let source;
+        switch (result.location?.type) {
+          case "CONFLUENCE":
+            source = result.location?.confluenceLocation?.url;
+            break;
+          case "S3":
+            source = result.location?.s3Location?.uri;
+            break;
+          case "SALESFORCE":
+            source = result.location?.salesforceLocation?.url;
+            break;
+          case "SHAREPOINT":
+            source = result.location?.sharePointLocation?.url;
+            break;
+          case "WEB":
+            source = result.location?.webLocation?.url;
+            break;
+          default:
+            source = result.location?.s3Location?.uri;
+            break;
+        }
+
+        return {
+          node: new Document({
+            text: this.cleanResult(result.content?.text || ""),
+            metadata: {
+              source,
+              score: result.score,
+              ...result.metadata,
+            },
+          }),
+          score: result.score ?? 1.0,
+        };
+      }) ?? []
+    );
+  }
+
+  async _retrieve(query: QueryBundle): Promise<NodeWithScore[]> {
+    return await this.queryKnowledgeBase(
+      query,
+      this.topK,
+      this.filter,
+      this.overrideSearchType,
+    );
+  }
+}
@@ -1,5 +1,31 @@
 # @llamaindex/core

+## 0.2.6
+
+### Patch Changes
+
+- 8b7fdba: refactor: move chat engine & retriever into core.
+
+  - `chatHistory` in BaseChatEngine now returns `ChatMessage[] | Promise<ChatMessage[]>`, instead of `BaseMemory`
+  - update `retrieve-end` type
+
+## 0.2.5
+
+### Patch Changes
+
+- d902cc3: Fix context not being sent using ContextChatEngine
+
+## 0.2.4
+
+### Patch Changes
+
+- b48bcc3: feat: add `load-transformers` event type when loading `@xenova/transformers` module
+
+  This would benefit user who want to customize the transformer env.
+
+- Updated dependencies [b48bcc3]
+  - @llamaindex/env@0.1.12
+
 ## 0.2.3

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/core",
  "type": "module",
-  "version": "0.2.3",
+  "version": "0.2.6",
  "description": "LlamaIndex Core Module",
  "exports": {
    "./node-parser": {
@@ -199,6 +199,34 @@
        "types": "./dist/response-synthesizers/index.d.ts",
        "default": "./dist/response-synthesizers/index.js"
      }
+    },
+    "./chat-engine": {
+      "require": {
+        "types": "./dist/chat-engine/index.d.cts",
+        "default": "./dist/chat-engine/index.cjs"
+      },
+      "import": {
+        "types": "./dist/chat-engine/index.d.ts",
+        "default": "./dist/chat-engine/index.js"
+      },
+      "default": {
+        "types": "./dist/chat-engine/index.d.ts",
+        "default": "./dist/chat-engine/index.js"
+      }
+    },
+    "./retriever": {
+      "require": {
+        "types": "./dist/retriever/index.d.cts",
+        "default": "./dist/retriever/index.cjs"
+      },
+      "import": {
+        "types": "./dist/retriever/index.d.ts",
+        "default": "./dist/retriever/index.js"
+      },
+      "default": {
+        "types": "./dist/retriever/index.d.ts",
+        "default": "./dist/retriever/index.js"
+      }
    }
  },
  "files": [
@@ -0,0 +1,36 @@
+import type { ChatMessage, MessageContent } from "../llms";
+import type { BaseMemory } from "../memory";
+import { EngineResponse } from "../schema";
+
+export interface BaseChatEngineParams<
+  AdditionalMessageOptions extends object = object,
+> {
+  message: MessageContent;
+  /**
+   * Optional chat history if you want to customize the chat history.
+   */
+  chatHistory?:
+    | ChatMessage<AdditionalMessageOptions>[]
+    | BaseMemory<AdditionalMessageOptions>;
+}
+
+export interface StreamingChatEngineParams<
+  AdditionalMessageOptions extends object = object,
+> extends BaseChatEngineParams<AdditionalMessageOptions> {
+  stream: true;
+}
+
+export interface NonStreamingChatEngineParams<
+  AdditionalMessageOptions extends object = object,
+> extends BaseChatEngineParams<AdditionalMessageOptions> {
+  stream?: false;
+}
+
+export abstract class BaseChatEngine {
+  abstract chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>;
+  abstract chat(
+    params: StreamingChatEngineParams,
+  ): Promise<AsyncIterable<EngineResponse>>;
+
+  abstract chatHistory: ChatMessage[] | Promise<ChatMessage[]>;
+}
@@ -11,6 +11,7 @@ import type {
  SynthesizeEndEvent,
  SynthesizeStartEvent,
 } from "../../response-synthesizers";
+import type { RetrieveEndEvent, RetrieveStartEvent } from "../../retriever";
 import { TextNode } from "../../schema";
 import { EventCaller, getEventCaller } from "../../utils";
 import type { UUID } from "../type";
@@ -69,6 +70,8 @@ export interface LlamaIndexEventMaps {
  "query-end": QueryEndEvent;
  "synthesize-start": SynthesizeStartEvent;
  "synthesize-end": SynthesizeEndEvent;
+  "retrieve-start": RetrieveStartEvent;
+  "retrieve-end": RetrieveEndEvent;
 }

 export class LlamaIndexCustomEvent<T = any> extends CustomEvent<T> {
@@ -128,16 +131,29 @@ export class CallbackManager {
  dispatchEvent<K extends keyof LlamaIndexEventMaps>(
    event: K,
    detail: LlamaIndexEventMaps[K],
+    sync = false,
  ) {
    const cbs = this.#handlers.get(event);
    if (!cbs) {
      return;
    }
-    queueMicrotask(() => {
+    if (typeof queueMicrotask === "undefined") {
+      console.warn(
+        "queueMicrotask is not available, dispatching synchronously",
+      );
+      sync = true;
+    }
+    if (sync) {
      cbs.forEach((handler) =>
        handler(LlamaIndexCustomEvent.fromEvent(event, { ...detail })),
      );
-    });
+    } else {
+      queueMicrotask(() => {
+        cbs.forEach((handler) =>
+          handler(LlamaIndexCustomEvent.fromEvent(event, { ...detail })),
+        );
+      });
+    }
  }
 }

@@ -1,5 +1,5 @@
 import { Settings } from "../global";
-import type { ChatMessage, MessageContent } from "../llms";
+import type { ChatMessage } from "../llms";
 import { type BaseChatStore, SimpleChatStore } from "../storage/chat-store";
 import { extractText } from "../utils";

@@ -12,15 +12,36 @@ export const DEFAULT_CHAT_STORE_KEY = "chat_history";
 export abstract class BaseMemory<
  AdditionalMessageOptions extends object = object,
 > {
+  /**
+   * Retrieves messages from the memory, optionally including transient messages.
+   * Compared to getAllMessages, this method a) allows for transient messages to be included in the retrieval and b) may return a subset of the total messages by applying a token limit.
+   * @param transientMessages Optional array of temporary messages to be included in the retrieval.
+   * These messages are not stored in the memory but are considered for the current interaction.
+   * @returns An array of chat messages, either synchronously or as a Promise.
+   */
  abstract getMessages(
-    input?: MessageContent | undefined,
+    transientMessages?: ChatMessage<AdditionalMessageOptions>[] | undefined,
  ):
    | ChatMessage<AdditionalMessageOptions>[]
    | Promise<ChatMessage<AdditionalMessageOptions>[]>;
+
+  /**
+   * Retrieves all messages stored in the memory.
+   * @returns An array of all chat messages, either synchronously or as a Promise.
+   */
  abstract getAllMessages():
    | ChatMessage<AdditionalMessageOptions>[]
    | Promise<ChatMessage<AdditionalMessageOptions>[]>;
+
+  /**
+   * Adds a new message to the memory.
+   * @param messages The chat message to be added to the memory.
+   */
  abstract put(messages: ChatMessage<AdditionalMessageOptions>): void;
+
+  /**
+   * Clears all messages from the memory.
+   */
  abstract reset(): void;

  protected _tokenCountForMessages(messages: ChatMessage[]): number {
@@ -1,5 +1,5 @@
 import { Settings } from "../global";
-import type { ChatMessage, LLM, MessageContent } from "../llms";
+import type { ChatMessage, LLM } from "../llms";
 import { type BaseChatStore } from "../storage/chat-store";
 import { BaseChatStoreMemory, DEFAULT_TOKEN_LIMIT_RATIO } from "./base";

@@ -34,7 +34,7 @@ export class ChatMemoryBuffer<
  }

  getMessages(
-    input?: MessageContent | undefined,
+    transientMessages?: ChatMessage<AdditionalMessageOptions>[] | undefined,
    initialTokenCount: number = 0,
  ) {
    const messages = this.getAllMessages();
@@ -43,16 +43,22 @@ export class ChatMemoryBuffer<
      throw new Error("Initial token count exceeds token limit");
    }

-    let messageCount = messages.length;
-    let currentMessages = messages.slice(-messageCount);
-    let tokenCount = this._tokenCountForMessages(messages) + initialTokenCount;
+    // Add input messages as transient messages
+    const messagesWithInput = transientMessages
+      ? [...transientMessages, ...messages]
+      : messages;
+
+    let messageCount = messagesWithInput.length;
+    let currentMessages = messagesWithInput.slice(-messageCount);
+    let tokenCount =
+      this._tokenCountForMessages(messagesWithInput) + initialTokenCount;

    while (tokenCount > this.tokenLimit && messageCount > 1) {
      messageCount -= 1;
-      if (messages.at(-messageCount)!.role === "assistant") {
+      if (messagesWithInput.at(-messageCount)!.role === "assistant") {
        messageCount -= 1;
      }
-      currentMessages = messages.slice(-messageCount);
+      currentMessages = messagesWithInput.slice(-messageCount);
      tokenCount =
        this._tokenCountForMessages(currentMessages) + initialTokenCount;
    }
@@ -60,6 +66,6 @@ export class ChatMemoryBuffer<
    if (tokenCount > this.tokenLimit && messageCount <= 0) {
      return [];
    }
-    return messages.slice(-messageCount);
+    return messagesWithInput.slice(-messageCount);
  }
 }
@@ -114,18 +114,22 @@ export class ChatSummaryMemoryBuffer extends BaseMemory {
    }
  }

-  private calcCurrentRequestMessages() {
-    // TODO: check order: currently, we're sending:
+  private calcCurrentRequestMessages(transientMessages?: ChatMessage[]) {
+    // currently, we're sending:
    // system messages first, then transient messages and then the messages that describe the conversation so far
-    return [...this.systemMessages, ...this.calcConversationMessages(true)];
+    return [
+      ...this.systemMessages,
+      ...(transientMessages ? transientMessages : []),
+      ...this.calcConversationMessages(true),
+    ];
  }

  reset() {
    this.messages = [];
  }

-  async getMessages(): Promise<ChatMessage[]> {
-    const requestMessages = this.calcCurrentRequestMessages();
+  async getMessages(transientMessages?: ChatMessage[]): Promise<ChatMessage[]> {
+    const requestMessages = this.calcCurrentRequestMessages(transientMessages);

    // get tokens of current request messages and the transient messages
    const tokens = requestMessages.reduce(
@@ -149,7 +153,7 @@ export class ChatSummaryMemoryBuffer extends BaseMemory {
      // TODO: we still might have too many tokens
      // e.g. too large system messages or transient messages
      // how should we deal with that?
-      return this.calcCurrentRequestMessages();
+      return this.calcCurrentRequestMessages(transientMessages);
    }
    return requestMessages;
  }
@@ -2,7 +2,7 @@ import { randomUUID } from "@llamaindex/env";
 import { Settings } from "../global";
 import type { MessageContent } from "../llms";
 import { PromptMixin } from "../prompts";
-import { EngineResponse } from "../schema";
+import { EngineResponse, type NodeWithScore } from "../schema";
 import { wrapEventCaller } from "../utils";

 /**
@@ -18,6 +18,18 @@ export type QueryBundle = {

 export type QueryType = string | QueryBundle;

+export type BaseQueryParams = {
+  query: QueryType;
+};
+
+export interface StreamingQueryParams extends BaseQueryParams {
+  stream: true;
+}
+
+export interface NonStreamingQueryParams extends BaseQueryParams {
+  stream?: false;
+}
+
 export type QueryFn = (
  strOrQueryBundle: QueryType,
  stream?: boolean,
@@ -28,23 +40,26 @@ export abstract class BaseQueryEngine extends PromptMixin {
    super();
  }

-  query(
-    strOrQueryBundle: QueryType,
-    stream: true,
-  ): Promise<AsyncIterable<EngineResponse>>;
-  query(strOrQueryBundle: QueryType, stream?: false): Promise<EngineResponse>;
+  async retrieve(params: QueryType): Promise<NodeWithScore[]> {
+    throw new Error(
+      "This query engine does not support retrieve, use query directly",
+    );
+  }
+
+  query(params: StreamingQueryParams): Promise<AsyncIterable<EngineResponse>>;
+  query(params: NonStreamingQueryParams): Promise<EngineResponse>;
  @wrapEventCaller
  async query(
-    strOrQueryBundle: QueryType,
-    stream = false,
+    params: StreamingQueryParams | NonStreamingQueryParams,
  ): Promise<EngineResponse | AsyncIterable<EngineResponse>> {
+    const { stream, query } = params;
    const id = randomUUID();
    const callbackManager = Settings.callbackManager;
    callbackManager.dispatchEvent("query-start", {
      id,
-      query: strOrQueryBundle,
+      query,
    });
-    const response = await this._query(strOrQueryBundle, stream);
+    const response = await this._query(query, stream);
    callbackManager.dispatchEvent("query-end", {
      id,
      response,
@@ -0,0 +1,112 @@
+import { randomUUID } from "@llamaindex/env";
+import { Settings } from "../global";
+import type { MessageContent } from "../llms";
+import { PromptMixin } from "../prompts";
+import type { QueryBundle, QueryType } from "../query-engine";
+import { BaseNode, IndexNode, type NodeWithScore, ObjectType } from "../schema";
+
+export type RetrieveParams = {
+  query: MessageContent;
+  preFilters?: unknown;
+};
+
+export type RetrieveStartEvent = {
+  id: string;
+  query: QueryBundle;
+};
+
+export type RetrieveEndEvent = {
+  id: string;
+  query: QueryBundle;
+  nodes: NodeWithScore[];
+};
+
+export abstract class BaseRetriever extends PromptMixin {
+  objectMap: Map<string, unknown> = new Map();
+
+  protected _updatePrompts() {}
+  protected _getPrompts() {
+    return {};
+  }
+
+  protected _getPromptModules() {
+    return {};
+  }
+
+  protected constructor() {
+    super();
+  }
+
+  public async retrieve(params: QueryType): Promise<NodeWithScore[]> {
+    const cb = Settings.callbackManager;
+    const queryBundle = typeof params === "string" ? { query: params } : params;
+    const id = randomUUID();
+    cb.dispatchEvent("retrieve-start", { id, query: queryBundle });
+    let response = await this._retrieve(queryBundle);
+    response = await this._handleRecursiveRetrieval(queryBundle, response);
+    cb.dispatchEvent("retrieve-end", {
+      id,
+      query: queryBundle,
+      nodes: response,
+    });
+    return response;
+  }
+
+  abstract _retrieve(params: QueryBundle): Promise<NodeWithScore[]>;
+
+  async _handleRecursiveRetrieval(
+    params: QueryBundle,
+    nodes: NodeWithScore[],
+  ): Promise<NodeWithScore[]> {
+    const retrievedNodes = [];
+    for (const { node, score = 1.0 } of nodes) {
+      if (node.type === ObjectType.INDEX) {
+        const indexNode = node as IndexNode;
+        const object = this.objectMap.get(indexNode.indexId);
+        if (object !== undefined) {
+          retrievedNodes.push(
+            ...this._retrieveFromObject(object, params, score),
+          );
+        } else {
+          retrievedNodes.push({ node, score });
+        }
+      } else {
+        retrievedNodes.push({ node, score });
+      }
+    }
+    return nodes;
+  }
+
+  _retrieveFromObject(
+    object: unknown,
+    queryBundle: QueryBundle,
+    score: number,
+  ): NodeWithScore[] {
+    if (object == null) {
+      throw new TypeError("Object is not retrievable");
+    }
+    if (typeof object !== "object") {
+      throw new TypeError("Object is not retrievable");
+    }
+    if ("node" in object && object.node instanceof BaseNode) {
+      return [
+        {
+          node: object.node,
+          score:
+            "score" in object && typeof object.score === "number"
+              ? object.score
+              : score,
+        },
+      ];
+    }
+    if (object instanceof BaseNode) {
+      return [{ node: object, score }];
+    } else {
+      // todo: support other types
+      // BaseQueryEngine
+      // BaseRetriever
+      // QueryComponent
+      throw new TypeError("Object is not retrievable");
+    }
+  }
+}
@@ -0,0 +1,74 @@
+import { Settings } from "@llamaindex/core/global";
+import type { ChatMessage } from "@llamaindex/core/llms";
+import { ChatMemoryBuffer } from "@llamaindex/core/memory";
+import { beforeEach, describe, expect, test } from "vitest";
+
+describe("ChatMemoryBuffer", () => {
+  beforeEach(() => {
+    // Mock the Settings.llm
+    (Settings.llm as any) = {
+      metadata: {
+        contextWindow: 1000,
+      },
+    };
+  });
+
+  test("constructor initializes with custom token limit", () => {
+    const buffer = new ChatMemoryBuffer({ tokenLimit: 500 });
+    expect(buffer.tokenLimit).toBe(500);
+  });
+
+  test("getMessages returns all messages when under token limit", () => {
+    const messages: ChatMessage[] = [
+      { role: "user", content: "Hello" },
+      { role: "assistant", content: "Hi there!" },
+      { role: "user", content: "How are you?" },
+    ];
+    const buffer = new ChatMemoryBuffer({
+      tokenLimit: 1000,
+      chatHistory: messages,
+    });
+
+    const result = buffer.getMessages();
+    expect(result).toEqual(messages);
+  });
+
+  test("getMessages truncates messages when over token limit", () => {
+    const messages: ChatMessage[] = [
+      { role: "user", content: "This is a long message" },
+      { role: "assistant", content: "This is also a long reply" },
+      { role: "user", content: "Short" },
+    ];
+    const buffer = new ChatMemoryBuffer({
+      tokenLimit: 5, // limit to only allow the last message
+      chatHistory: messages,
+    });
+
+    const result = buffer.getMessages();
+    expect(result).toEqual([{ role: "user", content: "Short" }]);
+  });
+
+  test("getMessages handles input messages", () => {
+    const storedMessages: ChatMessage[] = [
+      { role: "user", content: "Hello" },
+      { role: "assistant", content: "Hi there!" },
+    ];
+    const buffer = new ChatMemoryBuffer({
+      tokenLimit: 50,
+      chatHistory: storedMessages,
+    });
+
+    const inputMessages: ChatMessage[] = [
+      { role: "user", content: "New message" },
+    ];
+    const result = buffer.getMessages(inputMessages);
+    expect(result).toEqual([...inputMessages, ...storedMessages]);
+  });
+
+  test("getMessages throws error when initial token count exceeds limit", () => {
+    const buffer = new ChatMemoryBuffer({ tokenLimit: 10 });
+    expect(() => buffer.getMessages(undefined, 20)).toThrow(
+      "Initial token count exceeds token limit",
+    );
+  });
+});
@@ -1,5 +1,13 @@
 # @llamaindex/env

+## 0.1.12
+
+### Patch Changes
+
+- b48bcc3: feat: add `load-transformers` event type when loading `@xenova/transformers` module
+
+  This would benefit user who want to customize the transformer env.
+
 ## 0.1.11

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/env",
  "description": "environment wrapper, supports all JS environment including node, deno, bun, edge runtime, and cloudflare worker",
-  "version": "0.1.11",
+  "version": "0.1.12",
  "type": "module",
  "types": "dist/type/index.d.ts",
  "main": "dist/cjs/index.js",
@@ -74,16 +74,18 @@
    "@aws-crypto/sha256-js": "^5.2.0",
    "@swc/cli": "^0.4.0",
    "@swc/core": "^1.7.22",
+    "@xenova/transformers": "^2.17.2",
    "concurrently": "^8.2.2",
    "pathe": "^1.1.2",
+    "tiktoken": "^1.0.16",
    "vitest": "^2.0.5"
  },
  "dependencies": {
-    "@types/lodash": "^4.17.7",
    "@types/node": "^22.5.1"
  },
  "peerDependencies": {
    "@aws-crypto/sha256-js": "^5.2.0",
+    "@xenova/transformers": "^2.17.2",
    "js-tiktoken": "^1.0.12",
    "pathe": "^1.1.2",
    "tiktoken": "^1.0.15"
@@ -92,8 +94,17 @@
    "@aws-crypto/sha256-js": {
      "optional": true
    },
+    "@xenova/transformers": {
+      "optional": true
+    },
    "pathe": {
      "optional": true
+    },
+    "tiktoken": {
+      "optional": true
+    },
+    "js-tiktoken": {
+      "optional": true
    }
  }
 }
@@ -6,6 +6,12 @@
 import "./global-check.js";
 export * from "./web-polyfill.js";

+export {
+  loadTransformers,
+  setTransformers,
+  type LoadTransformerEvent,
+  type OnLoad,
+} from "./multi-model/index.browser.js";
 export { Tokenizers, tokenizers, type Tokenizer } from "./tokenizers/js.js";

 // @ts-expect-error
@@ -6,4 +6,10 @@
 import "./global-check.js";
 export * from "./node-polyfill.js";

+export {
+  loadTransformers,
+  setTransformers,
+  type LoadTransformerEvent,
+  type OnLoad,
+} from "./multi-model/index.non-nodejs.js";
 export { Tokenizers, tokenizers, type Tokenizer } from "./tokenizers/js.js";
@@ -33,6 +33,12 @@ export function createSHA256(): SHA256 {
  };
 }

+export {
+  loadTransformers,
+  setTransformers,
+  type LoadTransformerEvent,
+  type OnLoad,
+} from "./multi-model/index.js";
 export { Tokenizers, tokenizers, type Tokenizer } from "./tokenizers/node.js";
 export {
  AsyncLocalStorage,
@@ -13,4 +13,10 @@ export function getEnv(name: string): string | undefined {
  return INTERNAL_ENV[name];
 }

+export {
+  loadTransformers,
+  setTransformers,
+  type LoadTransformerEvent,
+  type OnLoad,
+} from "./multi-model/index.non-nodejs.js";
 export { Tokenizers, tokenizers, type Tokenizer } from "./tokenizers/js.js";
@@ -0,0 +1,20 @@
+import { getTransformers, setTransformers, type OnLoad } from "./shared.js";
+
+export {
+  setTransformers,
+  type LoadTransformerEvent,
+  type OnLoad,
+} from "./shared.js";
+export async function loadTransformers(onLoad: OnLoad) {
+  if (getTransformers() === null) {
+    setTransformers(
+      // @ts-expect-error
+      await import("https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2"),
+    );
+  } else {
+    return getTransformers()!;
+  }
+  const transformer = getTransformers()!;
+  onLoad(transformer);
+  return transformer;
+}
@@ -0,0 +1,35 @@
+import { getTransformers, setTransformers, type OnLoad } from "./shared.js";
+export {
+  setTransformers,
+  type LoadTransformerEvent,
+  type OnLoad,
+} from "./shared.js";
+
+export async function loadTransformers(onLoad: OnLoad) {
+  if (getTransformers() === null) {
+    /**
+     * If you see this warning, it means that the current environment does not support the transformer.
+     *  because "@xeonva/transformers" highly depends on Node.js APIs.
+     *
+     * One possible solution is to fix their implementation to make it work in the non-Node.js environment,
+     *  but it's not worth the effort because Edge Runtime and Cloudflare Workers are not the for heavy Machine Learning task.
+     *
+     * Or you can provide an RPC server that runs the transformer in a Node.js environment.
+     * Or you just run the code in a Node.js environment.
+     *
+     * Refs: https://github.com/xenova/transformers.js/issues/309
+     */
+    console.warn(
+      '"@xenova/transformers" is not officially supported in this environment, some features may not work as expected.',
+    );
+    setTransformers(
+      // @ts-expect-error
+      await import("@xenova/transformers/dist/transformers"),
+    );
+  } else {
+    return getTransformers()!;
+  }
+  const transformer = getTransformers()!;
+  onLoad(transformer);
+  return transformer;
+}
@@ -0,0 +1,20 @@
+import { getTransformers, setTransformers, type OnLoad } from "./shared.js";
+
+export {
+  setTransformers,
+  type LoadTransformerEvent,
+  type OnLoad,
+} from "./shared.js";
+
+export async function loadTransformers(onLoad: OnLoad) {
+  if (getTransformers() === null) {
+    setTransformers(await import("@xenova/transformers"));
+  } else {
+    return getTransformers()!;
+  }
+  const transformer = getTransformers()!;
+
+  onLoad(transformer);
+
+  return transformer;
+}
@@ -0,0 +1,17 @@
+let transformer: typeof import("@xenova/transformers") | null = null;
+
+export function getTransformers() {
+  return transformer;
+}
+
+export function setTransformers(t: typeof import("@xenova/transformers")) {
+  transformer = t;
+}
+
+export type OnLoad = (
+  transformer: typeof import("@xenova/transformers"),
+) => void;
+
+export type LoadTransformerEvent = {
+  transformer: typeof import("@xenova/transformers");
+};
@@ -1,5 +1,48 @@
 # @llamaindex/experimental

+## 0.0.87
+
+### Patch Changes
+
+- llamaindex@0.6.9
+
+## 0.0.86
+
+### Patch Changes
+
+- Updated dependencies [8b7fdba]
+  - llamaindex@0.6.8
+
+## 0.0.85
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+
+## 0.0.84
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+
+## 0.0.83
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+
+## 0.0.82
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+
 ## 0.0.81

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/experimental",
  "description": "Experimental package for LlamaIndexTS",
-  "version": "0.0.81",
+  "version": "0.0.87",
  "type": "module",
  "types": "dist/type/index.d.ts",
  "main": "dist/cjs/index.js",
@@ -1,5 +1,70 @@
 # llamaindex

+## 0.6.9
+
+### Patch Changes
+
+- Updated dependencies [ac41ed3]
+  - @llamaindex/cloud@0.2.8
+
+## 0.6.8
+
+### Patch Changes
+
+- 8b7fdba: refactor: move chat engine & retriever into core.
+
+  - `chatHistory` in BaseChatEngine now returns `ChatMessage[] | Promise<ChatMessage[]>`, instead of `BaseMemory`
+  - update `retrieve-end` type
+
+- Updated dependencies [8b7fdba]
+  - @llamaindex/core@0.2.6
+  - @llamaindex/openai@0.1.8
+  - @llamaindex/groq@0.0.7
+
+## 0.6.7
+
+### Patch Changes
+
+- 23bcc37: fix: add `serializer` in doc store
+
+  `PostgresDocumentStore` now will not use JSON.stringify for better performance
+
+## 0.6.6
+
+### Patch Changes
+
+- d902cc3: Fix context not being sent using ContextChatEngine
+- 025ffe6: fix: update `PostgresKVStore` constructor params
+- a659574: Adds upstash vector store as a storage
+- Updated dependencies [d902cc3]
+  - @llamaindex/core@0.2.5
+  - @llamaindex/openai@0.1.7
+  - @llamaindex/groq@0.0.6
+
+## 0.6.5
+
+### Patch Changes
+
+- e9714db: feat: update `PGVectorStore`
+
+  - move constructor parameter `config.user` | `config.database` | `config.password` | `config.connectionString` into `config.clientConfig`
+  - if you pass `pg.Client` or `pg.Pool` instance to `PGVectorStore`, move it to `config.client`, setting `config.shouldConnect` to false if it's already connected
+  - default value of `PGVectorStore.collection` is now `"data"` instead of `""` (empty string)
+
+## 0.6.4
+
+### Patch Changes
+
+- b48bcc3: feat: add `load-transformers` event type when loading `@xenova/transformers` module
+
+  This would benefit user who want to customize the transformer env.
+
+- Updated dependencies [b48bcc3]
+  - @llamaindex/core@0.2.4
+  - @llamaindex/env@0.1.12
+  - @llamaindex/openai@0.1.6
+  - @llamaindex/groq@0.0.5
+
 ## 0.6.3

 ### Patch Changes
@@ -1,5 +1,48 @@
 # @llamaindex/cloudflare-worker-agent-test

+## 0.0.71
+
+### Patch Changes
+
+- llamaindex@0.6.9
+
+## 0.0.70
+
+### Patch Changes
+
+- Updated dependencies [8b7fdba]
+  - llamaindex@0.6.8
+
+## 0.0.69
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+
+## 0.0.68
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+
+## 0.0.67
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+
+## 0.0.66
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+
 ## 0.0.65

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/cloudflare-worker-agent-test",
-  "version": "0.0.65",
+  "version": "0.0.71",
  "type": "module",
  "private": true,
  "scripts": {
@@ -1,5 +1,12 @@
 # @llamaindex/llama-parse-browser-test

+## 0.0.4
+
+### Patch Changes
+
+- Updated dependencies [ac41ed3]
+  - @llamaindex/cloud@0.2.8
+
 ## 0.0.3

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/llama-parse-browser-test",
  "private": true,
-  "version": "0.0.3",
+  "version": "0.0.4",
  "type": "module",
  "scripts": {
    "dev": "vite",
@@ -1,5 +1,48 @@
 # @llamaindex/next-agent-test

+## 0.1.71
+
+### Patch Changes
+
+- llamaindex@0.6.9
+
+## 0.1.70
+
+### Patch Changes
+
+- Updated dependencies [8b7fdba]
+  - llamaindex@0.6.8
+
+## 0.1.69
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+
+## 0.1.68
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+
+## 0.1.67
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+
+## 0.1.66
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+
 ## 0.1.65

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/next-agent-test",
-  "version": "0.1.65",
+  "version": "0.1.71",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,48 @@
 # test-edge-runtime

+## 0.1.70
+
+### Patch Changes
+
+- llamaindex@0.6.9
+
+## 0.1.69
+
+### Patch Changes
+
+- Updated dependencies [8b7fdba]
+  - llamaindex@0.6.8
+
+## 0.1.68
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+
+## 0.1.67
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+
+## 0.1.66
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+
+## 0.1.65
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+
 ## 0.1.64

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/nextjs-edge-runtime-test",
-  "version": "0.1.64",
+  "version": "0.1.70",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,107 +0,0 @@
-:root {
-  --max-width: 1100px;
-  --border-radius: 12px;
-  --font-mono: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono",
-    "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro",
-    "Fira Mono", "Droid Sans Mono", "Courier New", monospace;
-
-  --foreground-rgb: 0, 0, 0;
-  --background-start-rgb: 214, 219, 220;
-  --background-end-rgb: 255, 255, 255;
-
-  --primary-glow: conic-gradient(
-    from 180deg at 50% 50%,
-    #16abff33 0deg,
-    #0885ff33 55deg,
-    #54d6ff33 120deg,
-    #0071ff33 160deg,
-    transparent 360deg
-  );
-  --secondary-glow: radial-gradient(
-    rgba(255, 255, 255, 1),
-    rgba(255, 255, 255, 0)
-  );
-
-  --tile-start-rgb: 239, 245, 249;
-  --tile-end-rgb: 228, 232, 233;
-  --tile-border: conic-gradient(
-    #00000080,
-    #00000040,
-    #00000030,
-    #00000020,
-    #00000010,
-    #00000010,
-    #00000080
-  );
-
-  --callout-rgb: 238, 240, 241;
-  --callout-border-rgb: 172, 175, 176;
-  --card-rgb: 180, 185, 188;
-  --card-border-rgb: 131, 134, 135;
-}
-
-@media (prefers-color-scheme: dark) {
-  :root {
-    --foreground-rgb: 255, 255, 255;
-    --background-start-rgb: 0, 0, 0;
-    --background-end-rgb: 0, 0, 0;
-
-    --primary-glow: radial-gradient(rgba(1, 65, 255, 0.4), rgba(1, 65, 255, 0));
-    --secondary-glow: linear-gradient(
-      to bottom right,
-      rgba(1, 65, 255, 0),
-      rgba(1, 65, 255, 0),
-      rgba(1, 65, 255, 0.3)
-    );
-
-    --tile-start-rgb: 2, 13, 46;
-    --tile-end-rgb: 2, 5, 19;
-    --tile-border: conic-gradient(
-      #ffffff80,
-      #ffffff40,
-      #ffffff30,
-      #ffffff20,
-      #ffffff10,
-      #ffffff10,
-      #ffffff80
-    );
-
-    --callout-rgb: 20, 20, 20;
-    --callout-border-rgb: 108, 108, 108;
-    --card-rgb: 100, 100, 100;
-    --card-border-rgb: 200, 200, 200;
-  }
-}
-
-* {
-  box-sizing: border-box;
-  padding: 0;
-  margin: 0;
-}
-
-html,
-body {
-  max-width: 100vw;
-  overflow-x: hidden;
-}
-
-body {
-  color: rgb(var(--foreground-rgb));
-  background: linear-gradient(
-      to bottom,
-      transparent,
-      rgb(var(--background-end-rgb))
-    )
-    rgb(var(--background-start-rgb));
-}
-
-a {
-  color: inherit;
-  text-decoration: none;
-}
-
-@media (prefers-color-scheme: dark) {
-  html {
-    color-scheme: dark;
-  }
-}
@@ -1,6 +1,6 @@
 // test runtime
 import "llamaindex";
-import { ClipEmbedding } from "llamaindex/embeddings/ClipEmbedding";
+import { ClipEmbedding } from "llamaindex";
 import "llamaindex/readers/SimpleDirectoryReader";

 // @ts-expect-error
@@ -1,5 +1,48 @@
 # @llamaindex/next-node-runtime

+## 0.0.52
+
+### Patch Changes
+
+- llamaindex@0.6.9
+
+## 0.0.51
+
+### Patch Changes
+
+- Updated dependencies [8b7fdba]
+  - llamaindex@0.6.8
+
+## 0.0.50
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+
+## 0.0.49
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+
+## 0.0.48
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+
+## 0.0.47
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+
 ## 0.0.46

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/next-node-runtime-test",
-  "version": "0.0.46",
+  "version": "0.0.52",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,48 @@
 # @llamaindex/waku-query-engine-test

+## 0.0.71
+
+### Patch Changes
+
+- llamaindex@0.6.9
+
+## 0.0.70
+
+### Patch Changes
+
+- Updated dependencies [8b7fdba]
+  - llamaindex@0.6.8
+
+## 0.0.69
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+
+## 0.0.68
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+
+## 0.0.67
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+
+## 0.0.66
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+
 ## 0.0.65

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/waku-query-engine-test",
-  "version": "0.0.65",
+  "version": "0.0.71",
  "type": "module",
  "private": true,
  "scripts": {
@@ -1,8 +1,41 @@
-import { ClipEmbedding, ImageNode } from "llamaindex";
+import type { LoadTransformerEvent } from "@llamaindex/env";
+import { setTransformers } from "@llamaindex/env";
+import { ClipEmbedding, ImageNode, Settings } from "llamaindex";
 import assert from "node:assert";
-import { test } from "node:test";
+import { type Mock, test } from "node:test";
+
+let callback: Mock<(event: any) => void>;
+test.before(() => {
+  callback = test.mock.fn((event: any) => {
+    const { transformer } = event.detail as LoadTransformerEvent;
+    assert.ok(transformer);
+    assert.ok(transformer.env);
+  });
+  Settings.callbackManager.on("load-transformers", callback);
+});
+
+test.beforeEach(() => {
+  callback.mock.resetCalls();
+});

 await test("clip embedding", async (t) => {
+  await t.test("should trigger load transformer event", async () => {
+    const nodes = [
+      new ImageNode({
+        image: new URL(
+          "../../fixtures/img/llamaindex-white.png",
+          import.meta.url,
+        ),
+      }),
+    ];
+    assert.equal(callback.mock.callCount(), 0);
+    const clipEmbedding = new ClipEmbedding();
+    assert.equal(callback.mock.callCount(), 0);
+    const result = await clipEmbedding(nodes);
+    assert.strictEqual(result.length, 1);
+    assert.equal(callback.mock.callCount(), 1);
+  });
+
  await t.test("init & get image embedding", async () => {
    const clipEmbedding = new ClipEmbedding();
    const imgUrl = new URL(
@@ -27,4 +60,25 @@ await test("clip embedding", async (t) => {
    assert.strictEqual(result.length, 1);
    assert.ok(result[0]!.embedding);
  });
+
+  await t.test("custom transformer", async () => {
+    const transformers = await import("@xenova/transformers");
+    const getter = test.mock.fn((t, k, r) => {
+      return Reflect.get(t, k, r);
+    });
+    setTransformers(
+      new Proxy(transformers, {
+        get: getter,
+      }),
+    );
+    const clipEmbedding = new ClipEmbedding();
+    const imgUrl = new URL(
+      "../../fixtures/img/llamaindex-white.png",
+      import.meta.url,
+    );
+    assert.equal(getter.mock.callCount(), 0);
+    const vec = await clipEmbedding.getImageEmbedding(imgUrl);
+    assert.ok(vec);
+    assert.ok(getter.mock.callCount() > 0);
+  });
 });
@@ -9,43 +9,54 @@ import { registerTypes } from "pgvector/pg";

 config({ path: [".env.local", ".env", ".env.ci"] });

-let pgClient: pg.Client | pg.Pool;
-test.afterEach(async () => {
-  await pgClient.end();
-});
-
 const pgConfig = {
  user: process.env.POSTGRES_USER ?? "user",
  password: process.env.POSTGRES_PASSWORD ?? "password",
  database: "llamaindex_node_test",
 };

-await test("init with client", async () => {
-  pgClient = new pg.Client(pgConfig);
+await test("init with client", async (t) => {
+  const pgClient = new pg.Client(pgConfig);
  await pgClient.connect();
  await pgClient.query("CREATE EXTENSION IF NOT EXISTS vector");
  await registerTypes(pgClient);
-  const vectorStore = new PGVectorStore(pgClient);
+  t.after(async () => {
+    await pgClient.end();
+  });
+  const vectorStore = new PGVectorStore({
+    client: pgClient,
+    shouldConnect: false,
+  });
  assert.deepStrictEqual(await vectorStore.client(), pgClient);
 });

-await test("init with pool", async () => {
-  pgClient = new pg.Pool(pgConfig);
+await test("init with pool", async (t) => {
+  const pgClient = new pg.Pool(pgConfig);
  await pgClient.query("CREATE EXTENSION IF NOT EXISTS vector");
  const client = await pgClient.connect();
+  await client.query("CREATE EXTENSION IF NOT EXISTS vector");
  await registerTypes(client);
-  const vectorStore = new PGVectorStore(client);
+  t.after(async () => {
+    client.release();
+    await pgClient.end();
+  });
+  const vectorStore = new PGVectorStore({
+    shouldConnect: false,
+    client,
+  });
  assert.deepStrictEqual(await vectorStore.client(), client);
-  client.release();
 });

-await test("init without client", async () => {
-  const vectorStore = new PGVectorStore(pgConfig);
-  pgClient = (await vectorStore.client()) as pg.Client;
+await test("init without client", async (t) => {
+  const vectorStore = new PGVectorStore({ clientConfig: pgConfig });
+  const pgClient = (await vectorStore.client()) as pg.Client;
+  t.after(async () => {
+    await pgClient.end();
+  });
  assert.notDeepStrictEqual(pgClient, undefined);
 });

-await test("simple node", async () => {
+await test("simple node", async (t) => {
  const dimensions = 3;
  const schemaName =
    "llamaindex_vector_store_test_" + Math.random().toString(36).substring(7);
@@ -56,10 +67,14 @@ await test("simple node", async () => {
    embedding: [0.1, 0.2, 0.3],
  });
  const vectorStore = new PGVectorStore({
-    ...pgConfig,
+    clientConfig: pgConfig,
    dimensions,
    schemaName,
  });
+  const pgClient = (await vectorStore.client()) as pg.Client;
+  t.after(async () => {
+    await pgClient.end();
+  });

  await vectorStore.add([node]);

@@ -89,6 +104,4 @@ await test("simple node", async () => {
    });
    assert.deepStrictEqual(result.nodes, []);
  }
-
-  pgClient = (await vectorStore.client()) as pg.Client;
 });
@@ -1,6 +1,6 @@
 {
  "name": "llamaindex",
-  "version": "0.6.3",
+  "version": "0.6.9",
  "license": "MIT",
  "type": "module",
  "keywords": [
@@ -33,8 +33,8 @@
    "@llamaindex/cloud": "workspace:*",
    "@llamaindex/core": "workspace:*",
    "@llamaindex/env": "workspace:*",
-    "@llamaindex/openai": "workspace:*",
    "@llamaindex/groq": "workspace:*",
+    "@llamaindex/openai": "workspace:*",
    "@mistralai/mistralai": "^1.0.4",
    "@mixedbread-ai/sdk": "^2.2.11",
    "@pinecone-database/pinecone": "^3.0.2",
@@ -43,7 +43,7 @@
    "@types/node": "^22.5.1",
    "@types/papaparse": "^5.3.14",
    "@types/pg": "^8.11.8",
-    "@xenova/transformers": "^2.17.2",
+    "@upstash/vector": "^1.1.5",
    "@zilliz/milvus2-sdk-node": "^2.4.6",
    "ajv": "^8.17.1",
    "assemblyai": "^4.7.0",
@@ -91,6 +91,7 @@
    "@notionhq/client": "^2.2.15",
    "@swc/cli": "^0.4.0",
    "@swc/core": "^1.7.22",
+    "@xenova/transformers": "^2.17.2",
    "concurrently": "^8.2.2",
    "glob": "^11.0.0",
    "pg": "^8.12.0",
@@ -1,20 +0,0 @@
-import type { NodeWithScore } from "@llamaindex/core/schema";
-import type { ServiceContext } from "./ServiceContext.js";
-import type { MessageContent } from "./index.edge.js";
-
-export type RetrieveParams = {
-  query: MessageContent;
-  preFilters?: unknown;
-};
-
-/**
- * Retrievers retrieve the nodes that most closely match our query in similarity.
- */
-export interface BaseRetriever {
-  retrieve(params: RetrieveParams): Promise<NodeWithScore[]>;
-
-  /**
-   * @deprecated to be deprecated soon
-   */
-  serviceContext?: ServiceContext | undefined;
-}
@@ -12,6 +12,7 @@ import {
  type NodeParser,
  SentenceSplitter,
 } from "@llamaindex/core/node-parser";
+import type { LoadTransformerEvent } from "@llamaindex/env";
 import { AsyncLocalStorage, getEnv } from "@llamaindex/env";
 import type { ServiceContext } from "./ServiceContext.js";
 import {
@@ -20,6 +21,12 @@ import {
  withEmbeddedModel,
 } from "./internal/settings/EmbedModel.js";

+declare module "@llamaindex/core/global" {
+  interface LlamaIndexEventMaps {
+    "load-transformers": LoadTransformerEvent;
+  }
+}
+
 export type PromptConfig = {
  llm?: string;
  lang?: string;
@@ -1,9 +1,9 @@
-import { Settings } from "../Settings.js";
 import type {
-  ChatEngineParamsNonStreaming,
-  ChatEngineParamsStreaming,
-  EngineResponse,
-} from "../index.edge.js";
+  NonStreamingChatEngineParams,
+  StreamingChatEngineParams,
+} from "@llamaindex/core/chat-engine";
+import type { EngineResponse } from "@llamaindex/core/schema";
+import { Settings } from "../Settings.js";
 import { Anthropic } from "../llm/anthropic.js";
 import { LLMAgent, LLMAgentWorker, type LLMAgentParams } from "./llm.js";

@@ -24,12 +24,13 @@ export class AnthropicAgent extends LLMAgent {
    });
  }

-  async chat(params: ChatEngineParamsNonStreaming): Promise<EngineResponse>;
-  async chat(params: ChatEngineParamsStreaming): Promise<never>;
+  async chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>;
+  async chat(params: StreamingChatEngineParams): Promise<never>;
  override async chat(
-    params: ChatEngineParamsNonStreaming | ChatEngineParamsStreaming,
+    params: NonStreamingChatEngineParams | StreamingChatEngineParams,
  ) {
-    if (params.stream) {
+    const { stream } = params;
+    if (stream) {
      // Anthropic does support this, but looks like it's not supported in the LITS LLM
      throw new Error("Anthropic does not support streaming");
    }
@@ -1,3 +1,8 @@
+import {
+  BaseChatEngine,
+  type NonStreamingChatEngineParams,
+  type StreamingChatEngineParams,
+} from "@llamaindex/core/chat-engine";
 import type {
  BaseToolWithCall,
  ChatMessage,
@@ -10,11 +15,6 @@ import { EngineResponse } from "@llamaindex/core/schema";
 import { wrapEventCaller } from "@llamaindex/core/utils";
 import { randomUUID } from "@llamaindex/env";
 import { Settings } from "../Settings.js";
-import {
-  type ChatEngine,
-  type ChatEngineParamsNonStreaming,
-  type ChatEngineParamsStreaming,
-} from "../engines/chat/index.js";
 import { consoleLogger, emptyLogger } from "../internal/logger.js";
 import { isReadableStream } from "../internal/utils.js";
 import { ObjectRetriever } from "../objects/index.js";
@@ -207,8 +207,7 @@ export abstract class AgentRunner<
  >
    ? AdditionalMessageOptions
    : never,
-> implements ChatEngine
-{
+> extends BaseChatEngine {
  readonly #llm: AI;
  readonly #tools:
    | BaseToolWithCall[]
@@ -259,6 +258,7 @@ export abstract class AgentRunner<
  protected constructor(
    params: AgentRunnerParams<AI, Store, AdditionalMessageOptions>,
  ) {
+    super();
    const { llm, chatHistory, systemPrompt, runner, tools, verbose } = params;
    this.#llm = llm;
    this.#chatHistory = chatHistory;
@@ -345,20 +345,19 @@ export abstract class AgentRunner<
    });
  }

-  async chat(params: ChatEngineParamsNonStreaming): Promise<EngineResponse>;
+  async chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>;
  async chat(
-    params: ChatEngineParamsStreaming,
+    params: StreamingChatEngineParams,
  ): Promise<ReadableStream<EngineResponse>>;
  @wrapEventCaller
  async chat(
-    params: ChatEngineParamsNonStreaming | ChatEngineParamsStreaming,
+    params: NonStreamingChatEngineParams | StreamingChatEngineParams,
  ): Promise<EngineResponse | ReadableStream<EngineResponse>> {
    let chatHistory: ChatMessage<AdditionalMessageOptions>[] = [];

    if (params.chatHistory instanceof BaseMemory) {
-      chatHistory = (await params.chatHistory.getMessages(
-        params.message,
-      )) as ChatMessage<AdditionalMessageOptions>[];
+      chatHistory =
+        (await params.chatHistory.getMessages()) as ChatMessage<AdditionalMessageOptions>[];
    } else {
      chatHistory =
        params.chatHistory as ChatMessage<AdditionalMessageOptions>[];
@@ -1,7 +1,6 @@
 import type { BaseQueryEngine } from "@llamaindex/core/query-engine";
 import type { BaseSynthesizer } from "@llamaindex/core/response-synthesizers";
 import type { Document, TransformComponent } from "@llamaindex/core/schema";
-import type { BaseRetriever } from "../Retriever.js";
 import { RetrieverQueryEngine } from "../engines/query/RetrieverQueryEngine.js";
 import type { BaseNodePostprocessor } from "../postprocessors/types.js";
 import type { CloudRetrieveParams } from "./LlamaCloudRetriever.js";
@@ -12,6 +11,7 @@ import { getAppBaseUrl, getProjectId, initService } from "./utils.js";

 import { PipelinesService, ProjectsService } from "@llamaindex/cloud/api";
 import { SentenceSplitter } from "@llamaindex/core/node-parser";
+import type { BaseRetriever } from "@llamaindex/core/retriever";
 import { getEnv } from "@llamaindex/env";
 import { OpenAIEmbedding } from "@llamaindex/openai";
 import { Settings } from "../Settings.js";
@@ -4,11 +4,12 @@ import {
  type RetrievalParams,
  type TextNodeWithScore,
 } from "@llamaindex/cloud/api";
-import { DEFAULT_PROJECT_NAME, Settings } from "@llamaindex/core/global";
+import { DEFAULT_PROJECT_NAME } from "@llamaindex/core/global";
+import type { QueryBundle } from "@llamaindex/core/query-engine";
+import { BaseRetriever } from "@llamaindex/core/retriever";
 import type { NodeWithScore } from "@llamaindex/core/schema";
 import { jsonToNode, ObjectType } from "@llamaindex/core/schema";
-import { extractText, wrapEventCaller } from "@llamaindex/core/utils";
-import type { BaseRetriever, RetrieveParams } from "../Retriever.js";
+import { extractText } from "@llamaindex/core/utils";
 import type { ClientParams, CloudConstructorParams } from "./type.js";
 import { getProjectId, initService } from "./utils.js";

@@ -17,7 +18,7 @@ export type CloudRetrieveParams = Omit<
  "query" | "search_filters" | "dense_similarity_top_k"
 > & { similarityTopK?: number; filters?: MetadataFilters };

-export class LlamaCloudRetriever implements BaseRetriever {
+export class LlamaCloudRetriever extends BaseRetriever {
  clientParams: ClientParams;
  retrieveParams: CloudRetrieveParams;
  organizationId?: string;
@@ -36,12 +37,13 @@ export class LlamaCloudRetriever implements BaseRetriever {
      return {
        // Currently LlamaCloud only supports text nodes
        node: textNode,
-        score: node.score,
+        score: node.score ?? undefined,
      };
    });
  }

  constructor(params: CloudConstructorParams & CloudRetrieveParams) {
+    super();
    this.clientParams = { apiKey: params.apiKey, baseUrl: params.baseUrl };
    initService(this.clientParams);
    this.retrieveParams = params;
@@ -54,11 +56,7 @@ export class LlamaCloudRetriever implements BaseRetriever {
    }
  }

-  @wrapEventCaller
-  async retrieve({
-    query,
-    preFilters,
-  }: RetrieveParams): Promise<NodeWithScore[]> {
+  async _retrieve(query: QueryBundle): Promise<NodeWithScore[]> {
    const { data: pipelines } =
      await PipelinesService.searchPipelinesApiV1PipelinesGet({
        query: {
@@ -97,19 +95,11 @@ export class LlamaCloudRetriever implements BaseRetriever {
        body: {
          ...this.retrieveParams,
          query: extractText(query),
-          search_filters:
-            this.retrieveParams.filters ?? (preFilters as MetadataFilters),
+          search_filters: this.retrieveParams.filters as MetadataFilters,
          dense_similarity_top_k: this.retrieveParams.similarityTopK!,
        },
      });

-    const nodesWithScores = this.resultNodesToNodeWithScore(
-      results.retrieval_nodes,
-    );
-    Settings.callbackManager.dispatchEvent("retrieve-end", {
-      query,
-      nodes: nodesWithScores,
-    });
-    return nodesWithScores;
+    return this.resultNodesToNodeWithScore(results.retrieval_nodes);
  }
 }
@@ -1,17 +1,26 @@
 import { MultiModalEmbedding } from "@llamaindex/core/embeddings";
 import type { ImageType } from "@llamaindex/core/schema";
 import _ from "lodash";
-import { lazyLoadTransformers } from "../internal/deps/transformers.js";
 // only import type, to avoid bundling error
+import { loadTransformers } from "@llamaindex/env";
 import type {
  CLIPTextModelWithProjection,
  CLIPVisionModelWithProjection,
  PreTrainedTokenizer,
  Processor,
 } from "@xenova/transformers";
+import { Settings } from "../Settings.js";

 async function readImage(input: ImageType) {
-  const { RawImage } = await lazyLoadTransformers();
+  const { RawImage } = await loadTransformers((transformer) => {
+    Settings.callbackManager.dispatchEvent(
+      "load-transformers",
+      {
+        transformer,
+      },
+      true,
+    );
+  });
  if (input instanceof Blob) {
    return await RawImage.fromBlob(input);
  } else if (_.isString(input) || input instanceof URL) {
@@ -40,7 +49,15 @@ export class ClipEmbedding extends MultiModalEmbedding {
  }

  async getTokenizer() {
-    const { AutoTokenizer } = await lazyLoadTransformers();
+    const { AutoTokenizer } = await loadTransformers((transformer) => {
+      Settings.callbackManager.dispatchEvent(
+        "load-transformers",
+        {
+          transformer,
+        },
+        true,
+      );
+    });
    if (!this.tokenizer) {
      this.tokenizer = await AutoTokenizer.from_pretrained(this.modelType);
    }
@@ -48,7 +65,15 @@ export class ClipEmbedding extends MultiModalEmbedding {
  }

  async getProcessor() {
-    const { AutoProcessor } = await lazyLoadTransformers();
+    const { AutoProcessor } = await loadTransformers((transformer) => {
+      Settings.callbackManager.dispatchEvent(
+        "load-transformers",
+        {
+          transformer,
+        },
+        true,
+      );
+    });
    if (!this.processor) {
      this.processor = await AutoProcessor.from_pretrained(this.modelType);
    }
@@ -56,7 +81,17 @@ export class ClipEmbedding extends MultiModalEmbedding {
  }

  async getVisionModel() {
-    const { CLIPVisionModelWithProjection } = await lazyLoadTransformers();
+    const { CLIPVisionModelWithProjection } = await loadTransformers(
+      (transformer) => {
+        Settings.callbackManager.dispatchEvent(
+          "load-transformers",
+          {
+            transformer,
+          },
+          true,
+        );
+      },
+    );
    if (!this.visionModel) {
      this.visionModel = await CLIPVisionModelWithProjection.from_pretrained(
        this.modelType,
@@ -67,7 +102,17 @@ export class ClipEmbedding extends MultiModalEmbedding {
  }

  async getTextModel() {
-    const { CLIPTextModelWithProjection } = await lazyLoadTransformers();
+    const { CLIPTextModelWithProjection } = await loadTransformers(
+      (transformer) => {
+        Settings.callbackManager.dispatchEvent(
+          "load-transformers",
+          {
+            transformer,
+          },
+          true,
+        );
+      },
+    );
    if (!this.textModel) {
      this.textModel = await CLIPTextModelWithProjection.from_pretrained(
        this.modelType,
@@ -1,6 +1,7 @@
 import { HfInference } from "@huggingface/inference";
 import { BaseEmbedding } from "@llamaindex/core/embeddings";
-import { lazyLoadTransformers } from "../internal/deps/transformers.js";
+import { loadTransformers } from "@llamaindex/env";
+import { Settings } from "../Settings.js";

 export enum HuggingFaceEmbeddingModelType {
  XENOVA_ALL_MINILM_L6_V2 = "Xenova/all-MiniLM-L6-v2",
@@ -33,7 +34,15 @@ export class HuggingFaceEmbedding extends BaseEmbedding {

  async getExtractor() {
    if (!this.extractor) {
-      const { pipeline } = await lazyLoadTransformers();
+      const { pipeline } = await loadTransformers((transformer) => {
+        Settings.callbackManager.dispatchEvent(
+          "load-transformers",
+          {
+            transformer,
+          },
+          true,
+        );
+      });
      this.extractor = await pipeline("feature-extraction", this.modelType, {
        quantized: this.quantized,
      });
@@ -9,3 +9,5 @@ export * from "./MixedbreadAIEmbeddings.js";
 export { OllamaEmbedding } from "./OllamaEmbedding.js";
 export * from "./OpenAIEmbedding.js";
 export { TogetherEmbedding } from "./together.js";
+// ClipEmbedding might not work in non-node.js runtime, but it doesn't have side effects
+export { ClipEmbedding, ClipEmbeddingModelType } from "./ClipEmbedding.js";
@@ -1,10 +1,14 @@
+import {
+  BaseChatEngine,
+  type NonStreamingChatEngineParams,
+  type StreamingChatEngineParams,
+} from "@llamaindex/core/chat-engine";
 import type { ChatMessage, LLM } from "@llamaindex/core/llms";
 import { BaseMemory, ChatMemoryBuffer } from "@llamaindex/core/memory";
 import {
  type CondenseQuestionPrompt,
  defaultCondenseQuestionPrompt,
  type ModuleRecord,
-  PromptMixin,
 } from "@llamaindex/core/prompts";
 import type { BaseQueryEngine } from "@llamaindex/core/query-engine";
 import type { EngineResponse } from "@llamaindex/core/schema";
@@ -16,11 +20,6 @@ import {
 } from "@llamaindex/core/utils";
 import type { ServiceContext } from "../../ServiceContext.js";
 import { llmFromSettingsOrContext } from "../../Settings.js";
-import type {
-  ChatEngine,
-  ChatEngineParamsNonStreaming,
-  ChatEngineParamsStreaming,
-} from "./types.js";

 /**
 * CondenseQuestionChatEngine is used in conjunction with a Index (for example VectorStoreIndex).
@@ -32,16 +31,16 @@ import type {
 * underlying data. It performs less well when the chat messages are not questions about the
 * data, or are very referential to previous context.
 */
-
-export class CondenseQuestionChatEngine
-  extends PromptMixin
-  implements ChatEngine
-{
+export class CondenseQuestionChatEngine extends BaseChatEngine {
  queryEngine: BaseQueryEngine;
-  chatHistory: BaseMemory;
+  memory: BaseMemory;
  llm: LLM;
  condenseMessagePrompt: CondenseQuestionPrompt;

+  get chatHistory() {
+    return this.memory.getMessages();
+  }
+
  constructor(init: {
    queryEngine: BaseQueryEngine;
    chatHistory: ChatMessage[];
@@ -51,7 +50,7 @@ export class CondenseQuestionChatEngine
    super();

    this.queryEngine = init.queryEngine;
-    this.chatHistory = new ChatMemoryBuffer({
+    this.memory = new ChatMemoryBuffer({
      chatHistory: init?.chatHistory,
    });
    this.llm = llmFromSettingsOrContext(init?.serviceContext);
@@ -78,9 +77,7 @@ export class CondenseQuestionChatEngine
  }

  private async condenseQuestion(chatHistory: BaseMemory, question: string) {
-    const chatHistoryStr = messagesToHistory(
-      await chatHistory.getMessages(question),
-    );
+    const chatHistoryStr = messagesToHistory(await chatHistory.getMessages());

    return this.llm.complete({
      prompt: this.condenseMessagePrompt.format({
@@ -90,23 +87,23 @@ export class CondenseQuestionChatEngine
    });
  }

+  chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>;
  chat(
-    params: ChatEngineParamsStreaming,
+    params: StreamingChatEngineParams,
  ): Promise<AsyncIterable<EngineResponse>>;
-  chat(params: ChatEngineParamsNonStreaming): Promise<EngineResponse>;
  @wrapEventCaller
  async chat(
-    params: ChatEngineParamsStreaming | ChatEngineParamsNonStreaming,
+    params: NonStreamingChatEngineParams | StreamingChatEngineParams,
  ): Promise<EngineResponse | AsyncIterable<EngineResponse>> {
    const { message, stream } = params;
    const chatHistory = params.chatHistory
      ? new ChatMemoryBuffer({
          chatHistory:
            params.chatHistory instanceof BaseMemory
-              ? await params.chatHistory.getMessages(message)
+              ? await params.chatHistory.getMessages()
              : params.chatHistory,
        })
-      : this.chatHistory;
+      : this.memory;

    const condensedQuestion = (
      await this.condenseQuestion(chatHistory, extractText(message))
@@ -114,12 +111,10 @@ export class CondenseQuestionChatEngine
    chatHistory.put({ content: message, role: "user" });

    if (stream) {
-      const stream = await this.queryEngine.query(
-        {
-          query: condensedQuestion,
-        },
-        true,
-      );
+      const stream = await this.queryEngine.query({
+        query: condensedQuestion,
+        stream: true,
+      });
      return streamReducer({
        stream,
        initialValue: "",
@@ -142,6 +137,6 @@ export class CondenseQuestionChatEngine
  }

  reset() {
-    this.chatHistory.reset();
+    this.memory.reset();
  }
 }
@@ -1,3 +1,8 @@
+import type {
+  BaseChatEngine,
+  NonStreamingChatEngineParams,
+  StreamingChatEngineParams,
+} from "@llamaindex/core/chat-engine";
 import type {
  ChatMessage,
  LLM,
@@ -11,6 +16,7 @@ import {
  PromptMixin,
  type PromptsRecord,
 } from "@llamaindex/core/prompts";
+import type { BaseRetriever } from "@llamaindex/core/retriever";
 import { EngineResponse, MetadataMode } from "@llamaindex/core/schema";
 import {
  extractText,
@@ -18,27 +24,25 @@ import {
  streamReducer,
  wrapEventCaller,
 } from "@llamaindex/core/utils";
-import type { BaseRetriever } from "../../Retriever.js";
 import { Settings } from "../../Settings.js";
 import type { BaseNodePostprocessor } from "../../postprocessors/index.js";
 import { DefaultContextGenerator } from "./DefaultContextGenerator.js";
-import type {
-  ChatEngine,
-  ChatEngineParamsNonStreaming,
-  ChatEngineParamsStreaming,
-  ContextGenerator,
-} from "./types.js";
+import type { ContextGenerator } from "./types.js";

 /**
 * ContextChatEngine uses the Index to get the appropriate context for each query.
 * The context is stored in the system prompt, and the chat history is chunk: ChatResponseChunk, nodes?: NodeWithScore<import("/Users/marcus/code/llamaindex/LlamaIndexTS/packages/core/src/Node").Metadata>[], nodes?: NodeWithScore<import("/Users/marcus/code/llamaindex/LlamaIndexTS/packages/core/src/Node").Metadata>[]lowing the appropriate context to be surfaced for each query.
 */
-export class ContextChatEngine extends PromptMixin implements ChatEngine {
+export class ContextChatEngine extends PromptMixin implements BaseChatEngine {
  chatModel: LLM;
-  chatHistory: BaseMemory;
+  memory: BaseMemory;
  contextGenerator: ContextGenerator & PromptMixin;
  systemPrompt?: string | undefined;

+  get chatHistory() {
+    return this.memory.getMessages();
+  }
+
  constructor(init: {
    retriever: BaseRetriever;
    chatModel?: LLM | undefined;
@@ -50,7 +54,7 @@ export class ContextChatEngine extends PromptMixin implements ChatEngine {
  }) {
    super();
    this.chatModel = init.chatModel ?? Settings.llm;
-    this.chatHistory = new ChatMemoryBuffer({ chatHistory: init?.chatHistory });
+    this.memory = new ChatMemoryBuffer({ chatHistory: init?.chatHistory });
    this.contextGenerator = new DefaultContextGenerator({
      retriever: init.retriever,
      contextSystemPrompt: init?.contextSystemPrompt,
@@ -79,23 +83,23 @@ export class ContextChatEngine extends PromptMixin implements ChatEngine {
    };
  }

+  chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>;
  chat(
-    params: ChatEngineParamsStreaming,
+    params: StreamingChatEngineParams,
  ): Promise<AsyncIterable<EngineResponse>>;
-  chat(params: ChatEngineParamsNonStreaming): Promise<EngineResponse>;
  @wrapEventCaller
  async chat(
-    params: ChatEngineParamsStreaming | ChatEngineParamsNonStreaming,
+    params: StreamingChatEngineParams | NonStreamingChatEngineParams,
  ): Promise<EngineResponse | AsyncIterable<EngineResponse>> {
    const { message, stream } = params;
    const chatHistory = params.chatHistory
      ? new ChatMemoryBuffer({
          chatHistory:
            params.chatHistory instanceof BaseMemory
-              ? await params.chatHistory.getMessages(message)
+              ? await params.chatHistory.getMessages()
              : params.chatHistory,
        })
-      : this.chatHistory;
+      : this.memory;
    const requestMessages = await this.prepareRequestMessages(
      message,
      chatHistory,
@@ -125,7 +129,7 @@ export class ContextChatEngine extends PromptMixin implements ChatEngine {
  }

  reset() {
-    this.chatHistory.reset();
+    this.memory.reset();
  }

  private async prepareRequestMessages(
@@ -139,7 +143,7 @@ export class ContextChatEngine extends PromptMixin implements ChatEngine {
    const textOnly = extractText(message);
    const context = await this.contextGenerator.generate(textOnly);
    const systemMessage = this.prependSystemPrompt(context.message);
-    const messages = await chatHistory.getMessages(systemMessage.content);
+    const messages = await chatHistory.getMessages([systemMessage]);
    return { nodes: context.nodes, messages };
  }

@@ -5,10 +5,10 @@ import {
  type ModuleRecord,
  PromptMixin,
 } from "@llamaindex/core/prompts";
+import type { BaseRetriever } from "@llamaindex/core/retriever";
 import { MetadataMode, type NodeWithScore } from "@llamaindex/core/schema";
 import { createMessageContent } from "@llamaindex/core/utils";
 import type { BaseNodePostprocessor } from "../../postprocessors/index.js";
-import type { BaseRetriever } from "../../Retriever.js";
 import type { Context, ContextGenerator } from "./types.js";

 export class DefaultContextGenerator
@@ -1,3 +1,8 @@
+import type {
+  BaseChatEngine,
+  NonStreamingChatEngineParams,
+  StreamingChatEngineParams,
+} from "@llamaindex/core/chat-engine";
 import type { LLM } from "@llamaindex/core/llms";
 import { BaseMemory, ChatMemoryBuffer } from "@llamaindex/core/memory";
 import { EngineResponse } from "@llamaindex/core/schema";
@@ -7,32 +12,31 @@ import {
  wrapEventCaller,
 } from "@llamaindex/core/utils";
 import { Settings } from "../../Settings.js";
-import type {
-  ChatEngine,
-  ChatEngineParamsNonStreaming,
-  ChatEngineParamsStreaming,
-} from "./types.js";

 /**
 * SimpleChatEngine is the simplest possible chat engine. Useful for using your own custom prompts.
 */

-export class SimpleChatEngine implements ChatEngine {
-  chatHistory: BaseMemory;
+export class SimpleChatEngine implements BaseChatEngine {
+  memory: BaseMemory;
  llm: LLM;

+  get chatHistory() {
+    return this.memory.getMessages();
+  }
+
  constructor(init?: Partial<SimpleChatEngine>) {
-    this.chatHistory = init?.chatHistory ?? new ChatMemoryBuffer();
+    this.memory = init?.memory ?? new ChatMemoryBuffer();
    this.llm = init?.llm ?? Settings.llm;
  }

+  chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>;
  chat(
-    params: ChatEngineParamsStreaming,
+    params: StreamingChatEngineParams,
  ): Promise<AsyncIterable<EngineResponse>>;
-  chat(params: ChatEngineParamsNonStreaming): Promise<EngineResponse>;
  @wrapEventCaller
  async chat(
-    params: ChatEngineParamsStreaming | ChatEngineParamsNonStreaming,
+    params: NonStreamingChatEngineParams | StreamingChatEngineParams,
  ): Promise<EngineResponse | AsyncIterable<EngineResponse>> {
    const { message, stream } = params;

@@ -40,15 +44,15 @@ export class SimpleChatEngine implements ChatEngine {
      ? new ChatMemoryBuffer({
          chatHistory:
            params.chatHistory instanceof BaseMemory
-              ? await params.chatHistory.getMessages(message)
+              ? await params.chatHistory.getMessages()
              : params.chatHistory,
        })
-      : this.chatHistory;
+      : this.memory;
    chatHistory.put({ content: message, role: "user" });

    if (stream) {
      const stream = await this.llm.chat({
-        messages: await chatHistory.getMessages(params.message),
+        messages: await chatHistory.getMessages(),
        stream: true,
      });
      return streamConverter(
@@ -66,13 +70,13 @@ export class SimpleChatEngine implements ChatEngine {

    const response = await this.llm.chat({
      stream: false,
-      messages: await chatHistory.getMessages(params.message),
+      messages: await chatHistory.getMessages(),
    });
    chatHistory.put(response.message);
    return EngineResponse.fromChatResponse(response);
  }

  reset() {
-    this.chatHistory.reset();
+    this.memory.reset();
  }
 }
@@ -1,58 +1,10 @@
-import type { ChatMessage, MessageContent } from "@llamaindex/core/llms";
-import type { BaseMemory } from "@llamaindex/core/memory";
-import { EngineResponse, type NodeWithScore } from "@llamaindex/core/schema";
-
-/**
- * Represents the base parameters for ChatEngine.
- */
-export interface ChatEngineParamsBase {
-  message: MessageContent;
-  /**
-   * Optional chat history if you want to customize the chat history.
-   */
-  chatHistory?: ChatMessage[] | BaseMemory;
-  /**
-   * Optional flag to enable verbose mode.
-   * @default false
-   */
-  verbose?: boolean;
-}
-
-export interface ChatEngineParamsStreaming extends ChatEngineParamsBase {
-  stream: true;
-}
-
-export interface ChatEngineParamsNonStreaming extends ChatEngineParamsBase {
-  stream?: false | null;
-}
-
-/**
- * A ChatEngine is used to handle back and forth chats between the application and the LLM.
- */
-export interface ChatEngine<
-  // synchronous response
-  R = EngineResponse,
-  // asynchronous response
-  AR extends AsyncIterable<unknown> = AsyncIterable<R>,
-> {
-  /**
-   * Send message along with the class's current chat history to the LLM.
-   * @param params
-   */
-  chat(params: ChatEngineParamsStreaming): Promise<AR>;
-  chat(params: ChatEngineParamsNonStreaming): Promise<R>;
-
-  /**
-   * Resets the chat history so that it's empty.
-   */
-  reset(): void;
-}
+import type { ChatMessage } from "@llamaindex/core/llms";
+import type { NodeWithScore } from "@llamaindex/core/schema";

 export interface Context {
  message: ChatMessage;
  nodes: NodeWithScore[];
 }
-
 /**
 * A ContextGenerator is used to generate a context based on a message's text content
 */
@@ -1,10 +1,11 @@
-import { BaseQueryEngine } from "@llamaindex/core/query-engine";
+import type { MessageContent } from "@llamaindex/core/llms";
+import { BaseQueryEngine, type QueryType } from "@llamaindex/core/query-engine";
 import type { BaseSynthesizer } from "@llamaindex/core/response-synthesizers";
 import { getResponseSynthesizer } from "@llamaindex/core/response-synthesizers";
+import { BaseRetriever } from "@llamaindex/core/retriever";
 import { type NodeWithScore } from "@llamaindex/core/schema";
 import { extractText } from "@llamaindex/core/utils";
 import type { BaseNodePostprocessor } from "../../postprocessors/index.js";
-import type { BaseRetriever } from "../../Retriever.js";

 /**
 * A query engine that uses a retriever to query an index and then synthesizes the response.
@@ -67,7 +68,10 @@ export class RetrieverQueryEngine extends BaseQueryEngine {
    };
  }

-  private async applyNodePostprocessors(nodes: NodeWithScore[], query: string) {
+  private async applyNodePostprocessors(
+    nodes: NodeWithScore[],
+    query: MessageContent,
+  ) {
    let nodesWithScore = nodes;

    for (const postprocessor of this.nodePostprocessors) {
@@ -80,12 +84,10 @@ export class RetrieverQueryEngine extends BaseQueryEngine {
    return nodesWithScore;
  }

-  private async retrieve(query: string) {
-    const nodes = await this.retriever.retrieve({
-      query,
-      preFilters: this.preFilters,
-    });
+  override async retrieve(query: QueryType) {
+    const nodes = await this.retriever.retrieve(query);

-    return await this.applyNodePostprocessors(nodes, query);
+    const messageContent = typeof query === "string" ? query : query.query;
+    return await this.applyNodePostprocessors(nodes, messageContent);
  }
 }
@@ -136,7 +136,9 @@ export class RouterQueryEngine extends BaseQueryEngine {
        }

        const selectedQueryEngine = this.queryEngines[engineInd.index]!;
-        responses.push(await selectedQueryEngine.query(query));
+        responses.push(
+          await selectedQueryEngine.query({ query, stream: false }),
+        );
      }

      if (responses.length > 1) {
@@ -103,7 +103,8 @@ export class FaithfulnessEvaluator
    });

    const responseObj = await queryEngine.query({
-      query: response,
+      query: { query: response },
+      stream: false,
    });

    const rawResponseTxt = responseObj.toString();
@@ -1,6 +1,6 @@
 import type { AgentEndEvent, AgentStartEvent } from "./agent/types.js";
-import type { RetrievalEndEvent, RetrievalStartEvent } from "./llm/types.js";

+export * from "@llamaindex/core/chat-engine";
 export {
  CallbackManager,
  DEFAULT_BASE_URL,
@@ -35,12 +35,11 @@ export * from "@llamaindex/core/llms";
 export * from "@llamaindex/core/prompts";
 export * from "@llamaindex/core/query-engine";
 export * from "@llamaindex/core/response-synthesizers";
+export * from "@llamaindex/core/retriever";
 export * from "@llamaindex/core/schema";

 declare module "@llamaindex/core/global" {
  export interface LlamaIndexEventMaps {
-    "retrieve-start": RetrievalStartEvent;
-    "retrieve-end": RetrievalEndEvent;
    // agent events
    "agent-start": AgentStartEvent;
    "agent-end": AgentEndEvent;
@@ -66,7 +65,6 @@ export * from "./objects/index.js";
 export * from "./OutputParser.js";
 export * from "./postprocessors/index.js";
 export * from "./QuestionGenerator.js";
-export * from "./Retriever.js";
 export * from "./selectors/index.js";
 export * from "./ServiceContext.js";
 export { Settings } from "./Settings.js";
@@ -2,10 +2,6 @@ export * from "./index.edge.js";
 export * from "./readers/index.js";
 export * from "./storage/index.js";
 // Exports modules that doesn't support non-node.js runtime
-export {
-  ClipEmbedding,
-  ClipEmbeddingModelType,
-} from "./embeddings/ClipEmbedding.js";
 export {
  HuggingFaceEmbedding,
  HuggingFaceEmbeddingModelType,
@@ -1,7 +1,7 @@
 import type { BaseQueryEngine } from "@llamaindex/core/query-engine";
 import type { BaseSynthesizer } from "@llamaindex/core/response-synthesizers";
+import type { BaseRetriever } from "@llamaindex/core/retriever";
 import type { BaseNode, Document } from "@llamaindex/core/schema";
-import type { BaseRetriever } from "../Retriever.js";
 import type { ServiceContext } from "../ServiceContext.js";
 import { nodeParserFromSettingsOrContext } from "../Settings.js";
 import { runTransformations } from "../ingestion/IngestionPipeline.js";
@@ -5,7 +5,6 @@ import type {
  NodeWithScore,
 } from "@llamaindex/core/schema";
 import { MetadataMode } from "@llamaindex/core/schema";
-import type { BaseRetriever, RetrieveParams } from "../../Retriever.js";
 import type { ServiceContext } from "../../ServiceContext.js";
 import { serviceContextFromDefaults } from "../../ServiceContext.js";
 import { RetrieverQueryEngine } from "../../engines/query/index.js";
@@ -29,7 +28,11 @@ import {
  type KeywordExtractPrompt,
  type QueryKeywordExtractPrompt,
 } from "@llamaindex/core/prompts";
-import type { BaseQueryEngine } from "@llamaindex/core/query-engine";
+import type {
+  BaseQueryEngine,
+  QueryBundle,
+} from "@llamaindex/core/query-engine";
+import { BaseRetriever } from "@llamaindex/core/retriever";
 import { extractText } from "@llamaindex/core/utils";
 import { llmFromSettingsOrContext } from "../../Settings.js";

@@ -48,7 +51,7 @@ export enum KeywordTableRetrieverMode {
 }

 // Base Keyword Table Retriever
-abstract class BaseKeywordTableRetriever implements BaseRetriever {
+abstract class BaseKeywordTableRetriever extends BaseRetriever {
  protected index: KeywordTableIndex;
  protected indexStruct: KeywordTable;
  protected docstore: BaseDocumentStore;
@@ -72,6 +75,7 @@ abstract class BaseKeywordTableRetriever implements BaseRetriever {
    maxKeywordsPerQuery: number;
    numChunksPerQuery: number;
  }) {
+    super();
    this.index = index;
    this.indexStruct = index.indexStruct;
    this.docstore = index.docStore;
@@ -87,7 +91,7 @@ abstract class BaseKeywordTableRetriever implements BaseRetriever {

  abstract getKeywords(query: string): Promise<string[]>;

-  async retrieve({ query }: RetrieveParams): Promise<NodeWithScore[]> {
+  async _retrieve(query: QueryBundle): Promise<NodeWithScore[]> {
    const keywords = await this.getKeywords(extractText(query));
    const chunkIndicesCount: { [key: string]: number } = {};
    const filteredKeywords = keywords.filter((keyword) =>
@@ -2,16 +2,17 @@ import {
  type ChoiceSelectPrompt,
  defaultChoiceSelectPrompt,
 } from "@llamaindex/core/prompts";
+import type { QueryBundle } from "@llamaindex/core/query-engine";
 import type { BaseSynthesizer } from "@llamaindex/core/response-synthesizers";
 import { getResponseSynthesizer } from "@llamaindex/core/response-synthesizers";
+import { BaseRetriever } from "@llamaindex/core/retriever";
 import type {
  BaseNode,
  Document,
  NodeWithScore,
 } from "@llamaindex/core/schema";
-import { extractText, wrapEventCaller } from "@llamaindex/core/utils";
+import { extractText } from "@llamaindex/core/utils";
 import _ from "lodash";
-import type { BaseRetriever, RetrieveParams } from "../../Retriever.js";
 import type { ServiceContext } from "../../ServiceContext.js";
 import {
  llmFromSettingsOrContext,
@@ -279,15 +280,15 @@ export type ListRetrieverMode = SummaryRetrieverMode;
 /**
 * Simple retriever for SummaryIndex that returns all nodes
 */
-export class SummaryIndexRetriever implements BaseRetriever {
+export class SummaryIndexRetriever extends BaseRetriever {
  index: SummaryIndex;

  constructor(index: SummaryIndex) {
+    super();
    this.index = index;
  }

-  @wrapEventCaller
-  async retrieve({ query }: RetrieveParams): Promise<NodeWithScore[]> {
+  async _retrieve(queryBundle: QueryBundle): Promise<NodeWithScore[]> {
    const nodeIds = this.index.indexStruct.nodes;
    const nodes = await this.index.docStore.getNodes(nodeIds);
    return nodes.map((node) => ({
@@ -300,7 +301,7 @@ export class SummaryIndexRetriever implements BaseRetriever {
 /**
 * LLM retriever for SummaryIndex which lets you select the most relevant chunks.
 */
-export class SummaryIndexLLMRetriever implements BaseRetriever {
+export class SummaryIndexLLMRetriever extends BaseRetriever {
  index: SummaryIndex;
  choiceSelectPrompt: ChoiceSelectPrompt;
  choiceBatchSize: number;
@@ -317,6 +318,7 @@ export class SummaryIndexLLMRetriever implements BaseRetriever {
    parseChoiceSelectAnswerFn?: ChoiceSelectParserFunction,
    serviceContext?: ServiceContext,
  ) {
+    super();
    this.index = index;
    this.choiceSelectPrompt = choiceSelectPrompt || defaultChoiceSelectPrompt;
    this.choiceBatchSize = choiceBatchSize;
@@ -326,7 +328,7 @@ export class SummaryIndexLLMRetriever implements BaseRetriever {
    this.serviceContext = serviceContext || index.serviceContext;
  }

-  async retrieve({ query }: RetrieveParams): Promise<NodeWithScore[]> {
+  async _retrieve(query: QueryBundle): Promise<NodeWithScore[]> {
    const nodeIds = this.index.indexStruct.nodes;
    const results: NodeWithScore[] = [];

@@ -2,9 +2,10 @@ import {
  DEFAULT_SIMILARITY_TOP_K,
  type BaseEmbedding,
 } from "@llamaindex/core/embeddings";
-import { Settings } from "@llamaindex/core/global";
 import type { MessageContent } from "@llamaindex/core/llms";
+import type { QueryBundle } from "@llamaindex/core/query-engine";
 import type { BaseSynthesizer } from "@llamaindex/core/response-synthesizers";
+import { BaseRetriever } from "@llamaindex/core/retriever";
 import {
  ImageNode,
  ModalityType,
@@ -14,8 +15,6 @@ import {
  type Document,
  type NodeWithScore,
 } from "@llamaindex/core/schema";
-import { wrapEventCaller } from "@llamaindex/core/utils";
-import type { BaseRetriever, RetrieveParams } from "../../Retriever.js";
 import type { ServiceContext } from "../../ServiceContext.js";
 import { nodeParserFromSettingsOrContext } from "../../Settings.js";
 import { RetrieverQueryEngine } from "../../engines/query/RetrieverQueryEngine.js";
@@ -388,7 +387,7 @@ export type VectorIndexRetrieverOptions = {
  filters?: MetadataFilters;
 };

-export class VectorIndexRetriever implements BaseRetriever {
+export class VectorIndexRetriever extends BaseRetriever {
  index: VectorStoreIndex;
  topK: TopKMap;

@@ -401,6 +400,7 @@ export class VectorIndexRetriever implements BaseRetriever {
    topK,
    filters,
  }: VectorIndexRetrieverOptions) {
+    super();
    this.index = index;
    this.serviceContext = this.index.serviceContext;
    this.topK = topK ?? {
@@ -417,32 +417,17 @@ export class VectorIndexRetriever implements BaseRetriever {
    this.topK[ModalityType.TEXT] = similarityTopK;
  }

-  @wrapEventCaller
-  async retrieve({
-    query,
-    preFilters,
-  }: RetrieveParams): Promise<NodeWithScore[]> {
-    Settings.callbackManager.dispatchEvent("retrieve-start", {
-      query,
-    });
+  async _retrieve(params: QueryBundle): Promise<NodeWithScore[]> {
+    const { query } = params;
    const vectorStores = this.index.vectorStores;
    let nodesWithScores: NodeWithScore[] = [];

    for (const type in vectorStores) {
      const vectorStore: VectorStore = vectorStores[type as ModalityType]!;
      nodesWithScores = nodesWithScores.concat(
-        await this.retrieveQuery(
-          query,
-          type as ModalityType,
-          vectorStore,
-          preFilters as MetadataFilters,
-        ),
+        await this.retrieveQuery(query, type as ModalityType, vectorStore),
      );
    }
-    Settings.callbackManager.dispatchEvent("retrieve-end", {
-      query,
-      nodes: nodesWithScores,
-    });
    return nodesWithScores;
  }

@@ -1,7 +1,11 @@
 import type { BaseNode, TransformComponent } from "@llamaindex/core/schema";
 import { MetadataMode } from "@llamaindex/core/schema";
 import { createSHA256 } from "@llamaindex/env";
-import { docToJson, jsonToDoc } from "../storage/docStore/utils.js";
+import {
+  docToJson,
+  jsonSerializer,
+  jsonToDoc,
+} from "../storage/docStore/utils.js";
 import { SimpleKVStore } from "../storage/kvStore/SimpleKVStore.js";
 import type { BaseKVStore } from "../storage/kvStore/types.js";

@@ -53,7 +57,7 @@ export class IngestionCache {

  async put(hash: string, nodes: BaseNode[]) {
    const val = {
-      [this.nodesKey]: nodes.map((node) => docToJson(node)),
+      [this.nodesKey]: nodes.map((node) => docToJson(node, jsonSerializer)),
    };
    await this.cache.put(hash, val, this.collection);
  }
@@ -63,6 +67,8 @@ export class IngestionCache {
    if (!json || !json[this.nodesKey] || !Array.isArray(json[this.nodesKey])) {
      return undefined;
    }
-    return json[this.nodesKey].map((doc: any) => jsonToDoc(doc));
+    return json[this.nodesKey].map((doc: any) =>
+      jsonToDoc(doc, jsonSerializer),
+    );
  }
 }
@@ -1,15 +0,0 @@
-let transformer: typeof import("@xenova/transformers") | null = null;
-
-export async function lazyLoadTransformers() {
-  if (!transformer) {
-    transformer = await import("@xenova/transformers");
-  }
-
-  // @ts-expect-error
-  if (typeof EdgeRuntime === "string") {
-    // there is no local file system in the edge runtime
-    transformer.env.allowLocalModels = false;
-  }
-  // fixme: handle cloudflare workers case here?
-  return transformer;
-}
@@ -11,12 +11,13 @@ import {
  type ToolCallLLMMessageOptions,
 } from "@llamaindex/core/llms";
 import { streamConverter, wrapLLMEvent } from "@llamaindex/core/utils";
+import { loadTransformers } from "@llamaindex/env";
 import type {
  PreTrainedModel,
  PreTrainedTokenizer,
  Tensor,
 } from "@xenova/transformers";
-import { lazyLoadTransformers } from "../internal/deps/transformers.js";
+import { Settings } from "../Settings.js";

 // TODO workaround issue with @huggingface/inference@2.7.0
 interface HfInferenceOptions {
@@ -225,7 +226,15 @@ export class HuggingFaceLLM extends BaseLLM {
  }

  async getTokenizer() {
-    const { AutoTokenizer } = await lazyLoadTransformers();
+    const { AutoTokenizer } = await loadTransformers((transformer) => {
+      Settings.callbackManager.dispatchEvent(
+        "load-transformers",
+        {
+          transformer,
+        },
+        true,
+      );
+    });
    if (!this.tokenizer) {
      this.tokenizer = await AutoTokenizer.from_pretrained(this.tokenizerName);
    }
@@ -233,7 +242,15 @@ export class HuggingFaceLLM extends BaseLLM {
  }

  async getModel() {
-    const { AutoModelForCausalLM } = await lazyLoadTransformers();
+    const { AutoModelForCausalLM } = await loadTransformers((transformer) => {
+      Settings.callbackManager.dispatchEvent(
+        "load-transformers",
+        {
+          transformer,
+        },
+        true,
+      );
+    });
    if (!this.model) {
      this.model = await AutoModelForCausalLM.from_pretrained(this.modelName);
    }
@@ -1,10 +0,0 @@
-import type { MessageContent } from "@llamaindex/core/llms";
-import type { NodeWithScore } from "@llamaindex/core/schema";
-
-export type RetrievalStartEvent = {
-  query: MessageContent;
-};
-export type RetrievalEndEvent = {
-  query: MessageContent;
-  nodes: NodeWithScore[];
-};
@@ -1,8 +1,8 @@
 import type { BaseTool, MessageContent } from "@llamaindex/core/llms";
+import { BaseRetriever } from "@llamaindex/core/retriever";
 import type { BaseNode, Metadata } from "@llamaindex/core/schema";
 import { TextNode } from "@llamaindex/core/schema";
 import { extractText } from "@llamaindex/core/utils";
-import type { BaseRetriever } from "../Retriever.js";
 import type { VectorStoreIndex } from "../indices/vectorStore/index.js";

 // Assuming that necessary interfaces and classes (like OT, TextNode, BaseNode, etc.) are defined elsewhere
@@ -49,9 +49,6 @@ export abstract class BaseObjectNodeMapping {

 // You will need to implement specific subclasses of BaseObjectNodeMapping as per your project requirements.

-// todo: multimodal support
-type QueryType = MessageContent;
-
 export class ObjectRetriever<T = unknown> {
  _retriever: BaseRetriever;
  _objectNodeMapping: BaseObjectNodeMapping;
@@ -70,7 +67,7 @@ export class ObjectRetriever<T = unknown> {
  }

  // Translating the retrieve method
-  async retrieve(strOrQueryBundle: QueryType): Promise<T[]> {
+  async retrieve(strOrQueryBundle: MessageContent): Promise<T[]> {
    const nodes = await this.retriever.retrieve({
      query: extractText(strOrQueryBundle),
    });
@@ -29,7 +29,7 @@ export class KVDocumentStore extends BaseDocumentStore {
    for (const key in jsonDict) {
      const value = jsonDict[key];
      if (isValidDocJson(value)) {
-        docs[key] = jsonToDoc(value);
+        docs[key] = jsonToDoc(value, this.serializer);
      } else {
        console.warn(`Invalid JSON for docId ${key}`);
      }
@@ -52,7 +52,7 @@ export class KVDocumentStore extends BaseDocumentStore {
        );
      }
      const nodeKey = doc.id_;
-      const data = docToJson(doc);
+      const data = docToJson(doc, this.serializer);
      await this.kvstore.put(nodeKey, data, this.nodeCollection);
      const metadata: DocMetaData = { docHash: doc.hash };

@@ -94,7 +94,7 @@ export class KVDocumentStore extends BaseDocumentStore {
    if (!isValidDocJson(json)) {
      throw new Error(`Invalid JSON for docId ${docId}`);
    }
-    return jsonToDoc(json);
+    return jsonToDoc(json, this.serializer);
  }

  async getRefDocInfo(refDocId: string): Promise<RefDocInfo | undefined> {
@@ -1,19 +1,32 @@
 import { DEFAULT_NAMESPACE } from "@llamaindex/core/global";
-import { PostgresKVStore } from "../kvStore/PostgresKVStore.js";
+import {
+  PostgresKVStore,
+  type PostgresKVStoreConfig,
+} from "../kvStore/PostgresKVStore.js";
 import { KVDocumentStore } from "./KVDocumentStore.js";
+import { noneSerializer } from "./utils.js";

 const DEFAULT_TABLE_NAME = "llamaindex_doc_store";

+export type PostgresDocumentStoreConfig = PostgresKVStoreConfig & {
+  namespace?: string;
+};
+
 export class PostgresDocumentStore extends KVDocumentStore {
-  constructor(config?: {
-    schemaName?: string;
-    tableName?: string;
-    connectionString?: string;
-    namespace?: string;
-  }) {
+  serializer = noneSerializer;
+
+  constructor(config?: PostgresDocumentStoreConfig) {
    const kvStore = new PostgresKVStore({
      schemaName: config?.schemaName,
      tableName: config?.tableName || DEFAULT_TABLE_NAME,
+      ...(config && "clientConfig" in config
+        ? { clientConfig: config.clientConfig }
+        : config && "client" in config
+          ? {
+              client: config.client,
+              shouldConnect: config.shouldConnect ?? false,
+            }
+          : {}),
    });
    const namespace = config?.namespace || DEFAULT_NAMESPACE;
    super(kvStore, namespace);
@@ -3,6 +3,7 @@ import {
  DEFAULT_PERSIST_DIR,
 } from "@llamaindex/core/global";
 import { BaseNode } from "@llamaindex/core/schema";
+import { jsonSerializer, type Serializer } from "./utils.js";

 const defaultPersistPath = `${DEFAULT_PERSIST_DIR}/${DEFAULT_DOC_STORE_PERSIST_FILENAME}`;

@@ -12,6 +13,8 @@ export interface RefDocInfo {
 }

 export abstract class BaseDocumentStore {
+  serializer: Serializer<any> = jsonSerializer;
+
  // Save/load
  persist(persistPath: string = defaultPersistPath): void {
    // Persist the docstore to a file.
@@ -4,12 +4,35 @@ import { Document, ObjectType, TextNode } from "@llamaindex/core/schema";
 const TYPE_KEY = "__type__";
 const DATA_KEY = "__data__";

-type DocJson = {
-  [TYPE_KEY]: ObjectType;
-  [DATA_KEY]: string;
+export interface Serializer<T> {
+  toPersistence(data: Record<string, unknown>): T;
+  fromPersistence(data: T): Record<string, unknown>;
+}
+
+export const jsonSerializer: Serializer<string> = {
+  toPersistence(data) {
+    return JSON.stringify(data);
+  },
+  fromPersistence(data) {
+    return JSON.parse(data);
+  },
 };

-export function isValidDocJson(docJson: any): docJson is DocJson {
+export const noneSerializer: Serializer<Record<string, unknown>> = {
+  toPersistence(data) {
+    return data;
+  },
+  fromPersistence(data) {
+    return data;
+  },
+};
+
+type DocJson<Data> = {
+  [TYPE_KEY]: ObjectType;
+  [DATA_KEY]: Data;
+};
+
+export function isValidDocJson(docJson: any): docJson is DocJson<unknown> {
  return (
    typeof docJson === "object" &&
    docJson !== null &&
@@ -18,16 +41,22 @@ export function isValidDocJson(docJson: any): docJson is DocJson {
  );
 }

-export function docToJson(doc: BaseNode): DocJson {
+export function docToJson(
+  doc: BaseNode,
+  serializer: Serializer<unknown>,
+): DocJson<unknown> {
  return {
-    [DATA_KEY]: JSON.stringify(doc.toJSON()),
+    [DATA_KEY]: serializer.toPersistence(doc.toJSON()),
    [TYPE_KEY]: doc.type,
  };
 }

-export function jsonToDoc(docDict: DocJson): BaseNode {
+export function jsonToDoc<Data>(
+  docDict: DocJson<Data>,
+  serializer: Serializer<Data>,
+): BaseNode {
  const docType = docDict[TYPE_KEY];
-  const dataDict = JSON.parse(docDict[DATA_KEY]);
+  const dataDict = serializer.fromPersistence(docDict[DATA_KEY]) as any;
  let doc: BaseNode;

  if (docType === ObjectType.DOCUMENT) {
@@ -1,19 +1,29 @@
 import { DEFAULT_NAMESPACE } from "@llamaindex/core/global";
-import { PostgresKVStore } from "../kvStore/PostgresKVStore.js";
+import {
+  PostgresKVStore,
+  type PostgresKVStoreConfig,
+} from "../kvStore/PostgresKVStore.js";
 import { KVIndexStore } from "./KVIndexStore.js";

 const DEFAULT_TABLE_NAME = "llamaindex_index_store";

+export type PostgresIndexStoreConfig = PostgresKVStoreConfig & {
+  namespace?: string;
+};
+
 export class PostgresIndexStore extends KVIndexStore {
-  constructor(config?: {
-    schemaName?: string;
-    tableName?: string;
-    connectionString?: string;
-    namespace?: string;
-  }) {
+  constructor(config?: PostgresIndexStoreConfig) {
    const kvStore = new PostgresKVStore({
      schemaName: config?.schemaName,
      tableName: config?.tableName || DEFAULT_TABLE_NAME,
+      ...(config && "clientConfig" in config
+        ? { clientConfig: config.clientConfig }
+        : config && "client" in config
+          ? {
+              client: config.client,
+              shouldConnect: config.shouldConnect ?? false,
+            }
+          : {}),
    });
    const namespace = config?.namespace || DEFAULT_NAMESPACE;
    super(kvStore, namespace);
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
github-actions[bot]	d12edee802	Release 0.6.9 (#1252 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2024-09-24 10:31:54 -07:00
Alex Yang	ac41ed3aae	chore: bump cloud sdk version (#1251 )	2024-09-24 09:43:45 -07:00
github-actions[bot]	d8c1159032	Release 0.6.8 (#1245 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2024-09-23 18:41:44 -07:00
Alex Yang	c856c5becb	revert: stream back to first parameter (#1247 )	2024-09-23 18:35:36 -07:00
John Wick	50e6b57be0	feat: add Amazon Bedrock Retriever (#1219 ) Co-authored-by: Arnaud JEAN <arnajean@amazon.com> Co-authored-by: ajohn-wick <ajohnwick@mrwick.org> Co-authored-by: Alex Yang <himself65@outlook.com>	2024-09-23 15:11:53 -07:00
Alex Yang	8b7fdba544	refactor: move chat engine & retriever into core (#1242 )	2024-09-23 13:26:26 -07:00
github-actions[bot]	22ae8d0166	Release 0.6.7 (#1244 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2024-09-23 13:25:02 -07:00
Goran	23bcc379a8	fix: add `serializer` in doc store (#1243 ) Co-authored-by: Alex Yang <himself65@outlook.com>	2024-09-23 13:11:51 -07:00
github-actions[bot]	bdc4bfe7b0	Release 0.6.6 (#1241 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2024-09-23 11:54:33 -07:00
Goran	025ffe6b50	fix: update `PostgresKVStore` constructor params (#1240 ) Co-authored-by: Alex Yang <himself65@outlook.com>	2024-09-23 10:46:11 -07:00
Cahid Arda Öz	a6595747fa	feat: add Upstash Vector Store (#1218 ) Co-authored-by: ogzhanolguncu <ogzhan11@gmail.com> Co-authored-by: Alex Yang <himself65@outlook.com>	2024-09-23 10:00:10 -07:00
Marcus Schiesser	d902cc3e7e	fix: context not working in contextchatengine (#1237 )	2024-09-22 15:19:13 -07:00
github-actions[bot]	726eb41359	Release 0.6.5 (#1239 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2024-09-20 14:24:23 -07:00
André Mazayev	e9714dbfcd	feat: update `PGVectorStore` constructor parameters (#1225 ) Co-authored-by: Alex Yang <himself65@outlook.com>	2024-09-20 01:34:51 -07:00
Alex Yang	a3618e761e	chore: fix cache for cloud package (#1236 )	2024-09-19 17:48:39 -07:00
github-actions[bot]	24eabe7f35	Release 0.6.4 (#1234 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2024-09-19 16:42:39 -07:00
Alex Yang	ecfa939ea6	ci: enable remote cache (#1233 )	2024-09-19 15:40:34 -07:00
Alex Yang	b48bcc3add	feat: support custom `@xenova/transformers` (#1232 )	2024-09-19 14:55:23 -07:00