Release 0.6.7 (#1244 )

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
fix: add serializer in doc store (#1243 )
2026-07-01 22:14:03 -04:00 · 2024-09-23 13:25:02 -07:00 · 2024-09-23 13:11:51 -07:00 · 2024-09-23 11:54:33 -07:00 · 2024-09-23 10:46:11 -07:00 · 2024-09-23 10:00:10 -07:00
144 changed files with 3366 additions and 1846 deletions
@@ -13,8 +13,10 @@ concurrency:
  cancel-in-progress: true

 env:
-  POSTGRES_USER: runneradmin
  POSTGRES_HOST_AUTH_METHOD: trust
+  TURBO_TOKEN: ${{ secrets.TURBO_TOKEN }}
+  TURBO_TEAM: ${{ vars.TURBO_TEAM }}
+  TURBO_REMOTE_ONLY: true

 jobs:
  e2e:
@@ -1,5 +1,50 @@
 # docs

+## 0.0.76
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+
+## 0.0.75
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+
+## 0.0.74
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+
+## 0.0.73
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+
+## 0.0.72
+
+### Patch Changes
+
+- Updated dependencies [2cd1383]
+- Updated dependencies [5c4badb]
+  - llamaindex@0.6.3
+
+## 0.0.71
+
+### Patch Changes
+
+- Updated dependencies [749b43a]
+  - llamaindex@0.6.2
+
 ## 0.0.70

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "docs",
-  "version": "0.0.70",
+  "version": "0.0.76",
  "private": true,
  "scripts": {
    "docusaurus": "docusaurus",
@@ -1,4 +1,4 @@
-import { Anthropic, SimpleChatEngine, SimpleChatHistory } from "llamaindex";
+import { Anthropic, ChatMemoryBuffer, SimpleChatEngine } from "llamaindex";
 import { stdin as input, stdout as output } from "node:process";
 import readline from "node:readline/promises";

@@ -8,8 +8,8 @@ import readline from "node:readline/promises";
    model: "claude-3-opus",
  });
  // chatHistory will store all the messages in the conversation
-  const chatHistory = new SimpleChatHistory({
-    messages: [
+  const chatHistory = new ChatMemoryBuffer({
+    chatHistory: [
      {
        content: "You want to talk in rhymes.",
        role: "system",
@@ -2,10 +2,10 @@ import { stdin as input, stdout as output } from "node:process";
 import readline from "node:readline/promises";

 import {
+  ChatSummaryMemoryBuffer,
  OpenAI,
  Settings,
  SimpleChatEngine,
-  SummaryChatHistory,
 } from "llamaindex";

 if (process.env.NODE_ENV === "development") {
@@ -18,7 +18,7 @@ async function main() {
  // Set maxTokens to 75% of the context window size of 4096
  // This will trigger the summarizer once the chat history reaches 25% of the context window size (1024 tokens)
  const llm = new OpenAI({ model: "gpt-3.5-turbo", maxTokens: 4096 * 0.75 });
-  const chatHistory = new SummaryChatHistory({ llm });
+  const chatHistory = new ChatSummaryMemoryBuffer({ llm });
  const chatEngine = new SimpleChatEngine({ llm });
  const rl = readline.createInterface({ input, output });

@@ -27,10 +27,12 @@ async function main() {

  // Query the index
  const queryEngine = index.asQueryEngine();
-  const stream = await queryEngine.query({
-    query: "What did the author do in college?",
-    stream: true,
-  });
+  const stream = await queryEngine.query(
+    {
+      query: "What did the author do in college?",
+    },
+    true,
+  );

  // Output response
  for await (const chunk of stream) {
@@ -37,10 +37,12 @@ async function main() {

  // Query the index
  const queryEngine = index.asQueryEngine();
-  const stream = await queryEngine.query({
-    query: "What did the author do in college?",
-    stream: true,
-  });
+  const stream = await queryEngine.query(
+    {
+      query: "What did the author do in college?",
+    },
+    true,
+  );

  // Output response
  for await (const chunk of stream) {
@@ -1,7 +1,7 @@
 import {
  Document,
+  getResponseSynthesizer,
  NodeWithScore,
-  ResponseSynthesizer,
  SentenceSplitter,
  TextNode,
 } from "llamaindex";
@@ -14,7 +14,7 @@ import {

  console.log(nodes);

-  const responseSynthesizer = new ResponseSynthesizer();
+  const responseSynthesizer = getResponseSynthesizer("compact");

  const nodesWithScore: NodeWithScore[] = [
    {
@@ -30,7 +30,7 @@ import {
  const stream = await responseSynthesizer.synthesize(
    {
      query: "What age am I?",
-      nodesWithScore,
+      nodes: nodesWithScore,
    },
    true,
  );
@@ -1,5 +1,5 @@
 import {
-  MultiModalResponseSynthesizer,
+  getResponseSynthesizer,
  OpenAI,
  Settings,
  VectorStoreIndex,
@@ -27,13 +27,15 @@ async function main() {
  });

  const queryEngine = index.asQueryEngine({
-    responseSynthesizer: new MultiModalResponseSynthesizer(),
+    responseSynthesizer: getResponseSynthesizer("multi_modal"),
    retriever: index.asRetriever({ topK: { TEXT: 3, IMAGE: 1 } }),
  });
-  const stream = await queryEngine.query({
-    query: "Tell me more about Vincent van Gogh's famous paintings",
-    stream: true,
-  });
+  const stream = await queryEngine.query(
+    {
+      query: "Tell me more about Vincent van Gogh's famous paintings",
+    },
+    true,
+  );
  for await (const chunk of stream) {
    process.stdout.write(chunk.response);
  }
@@ -40,7 +40,11 @@ async function main(args: any) {
    const rdr = new SimpleDirectoryReader(callback);
    const docs = await rdr.loadData({ directoryPath: sourceDir });

-    const pgvs = new PGVectorStore();
+    const pgvs = new PGVectorStore({
+      clientConfig: {
+        connectionString: process.env.PG_CONNECTION_STRING,
+      },
+    });
    pgvs.setCollection(sourceDir);
    await pgvs.clearCollection();

@@ -7,7 +7,11 @@ async function main() {
  });

  try {
-    const pgvs = new PGVectorStore();
+    const pgvs = new PGVectorStore({
+      clientConfig: {
+        connectionString: process.env.PG_CONNECTION_STRING,
+      },
+    });
    // Optional - set your collection name, default is no filter on this field.
    // pgvs.setCollection();

@@ -1,8 +1,7 @@
 import {
  Document,
+  getResponseSynthesizer,
  PromptTemplate,
-  ResponseSynthesizer,
-  TreeSummarize,
  TreeSummarizePrompt,
  VectorStoreIndex,
 } from "llamaindex";
@@ -27,9 +26,7 @@ async function main() {

  const query = "The quick brown fox jumps over the lazy dog";

-  const responseSynthesizer = new ResponseSynthesizer({
-    responseBuilder: new TreeSummarize(),
-  });
+  const responseSynthesizer = getResponseSynthesizer("tree_summarize");

  const queryEngine = index.asQueryEngine({
    responseSynthesizer,
@@ -1,8 +1,7 @@
 import {
-  CompactAndRefine,
+  getResponseSynthesizer,
  OpenAI,
  PromptTemplate,
-  ResponseSynthesizer,
  Settings,
  VectorStoreIndex,
 } from "llamaindex";
@@ -29,8 +28,8 @@ Given the CSV file, generate me Typescript code to answer the question: {query}.
 `,
  });

-  const responseSynthesizer = new ResponseSynthesizer({
-    responseBuilder: new CompactAndRefine(undefined, csvPrompt),
+  const responseSynthesizer = getResponseSynthesizer("compact", {
+    textQATemplate: csvPrompt,
  });

  const queryEngine = index.asQueryEngine({ responseSynthesizer });
@@ -1,3 +1,4 @@
+import { createMessageContent } from "@llamaindex/core/utils";
 import {
  Document,
  ImageNode,
@@ -6,7 +7,6 @@ import {
  PromptTemplate,
  VectorStoreIndex,
 } from "llamaindex";
-import { createMessageContent } from "llamaindex/synthesizers/utils";

 const reader = new LlamaParseReader();
 async function main() {
@@ -2,12 +2,10 @@ import fs from "node:fs/promises";

 import {
  Anthropic,
-  CompactAndRefine,
  Document,
-  ResponseSynthesizer,
  Settings,
  VectorStoreIndex,
-  anthropicTextQaPrompt,
+  getResponseSynthesizer,
 } from "llamaindex";

 // Update llm to use Anthropic
@@ -23,9 +21,7 @@ async function main() {
  const document = new Document({ text: essay, id_: path });

  // Split text and create embeddings. Store them in a VectorStoreIndex
-  const responseSynthesizer = new ResponseSynthesizer({
-    responseBuilder: new CompactAndRefine(undefined, anthropicTextQaPrompt),
-  });
+  const responseSynthesizer = getResponseSynthesizer("compact");

  const index = await VectorStoreIndex.fromDocuments([document]);

@@ -1,11 +1,10 @@
 import {
+  getResponseSynthesizer,
  OpenAI,
  OpenAIEmbedding,
-  ResponseSynthesizer,
  RetrieverQueryEngine,
  Settings,
  TextNode,
-  TreeSummarize,
  VectorIndexRetriever,
  VectorStore,
  VectorStoreIndex,
@@ -165,10 +164,7 @@ async function main() {
      similarityTopK: 500,
    });

-    const responseSynthesizer = new ResponseSynthesizer({
-      responseBuilder: new TreeSummarize(),
-    });
-
+    const responseSynthesizer = getResponseSynthesizer("tree_summarize");
    return new RetrieverQueryEngine(retriever, responseSynthesizer, {
      filter,
    });
@@ -1,5 +1,50 @@
 # @llamaindex/autotool

+## 3.0.7
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+
+## 3.0.6
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+
+## 3.0.5
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+
+## 3.0.4
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+
+## 3.0.3
+
+### Patch Changes
+
+- Updated dependencies [2cd1383]
+- Updated dependencies [5c4badb]
+  - llamaindex@0.6.3
+
+## 3.0.2
+
+### Patch Changes
+
+- Updated dependencies [749b43a]
+  - llamaindex@0.6.2
+
 ## 3.0.1

 ### Patch Changes
@@ -1,5 +1,56 @@
 # @llamaindex/autotool-01-node-example

+## 0.0.16
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+  - @llamaindex/autotool@3.0.7
+
+## 0.0.15
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+  - @llamaindex/autotool@3.0.6
+
+## 0.0.14
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+  - @llamaindex/autotool@3.0.5
+
+## 0.0.13
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+  - @llamaindex/autotool@3.0.4
+
+## 0.0.12
+
+### Patch Changes
+
+- Updated dependencies [2cd1383]
+- Updated dependencies [5c4badb]
+  - llamaindex@0.6.3
+  - @llamaindex/autotool@3.0.3
+
+## 0.0.11
+
+### Patch Changes
+
+- Updated dependencies [749b43a]
+  - llamaindex@0.6.2
+  - @llamaindex/autotool@3.0.2
+
 ## 0.0.10

 ### Patch Changes
@@ -13,5 +13,5 @@
  "scripts": {
    "start": "node --import tsx --import @llamaindex/autotool/node ./src/index.ts"
  },
-  "version": "0.0.10"
+  "version": "0.0.16"
 }
@@ -1,5 +1,56 @@
 # @llamaindex/autotool-02-next-example

+## 0.1.60
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+  - @llamaindex/autotool@3.0.7
+
+## 0.1.59
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+  - @llamaindex/autotool@3.0.6
+
+## 0.1.58
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+  - @llamaindex/autotool@3.0.5
+
+## 0.1.57
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+  - @llamaindex/autotool@3.0.4
+
+## 0.1.56
+
+### Patch Changes
+
+- Updated dependencies [2cd1383]
+- Updated dependencies [5c4badb]
+  - llamaindex@0.6.3
+  - @llamaindex/autotool@3.0.3
+
+## 0.1.55
+
+### Patch Changes
+
+- Updated dependencies [749b43a]
+  - llamaindex@0.6.2
+  - @llamaindex/autotool@3.0.2
+
 ## 0.1.54

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/autotool-02-next-example",
  "private": true,
-  "version": "0.1.54",
+  "version": "0.1.60",
  "scripts": {
    "dev": "next dev",
    "build": "next build",
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/autotool",
  "type": "module",
-  "version": "3.0.1",
+  "version": "3.0.7",
  "description": "auto transpile your JS function to LLM Agent compatible",
  "files": [
    "dist",
@@ -1,5 +1,25 @@
 # @llamaindex/cloud

+## 0.2.7
+
+### Patch Changes
+
+- fb36eff: fix: backport for node.js 18
+
+  There could have one missing API in the node.js 18, so we need to backport it to make it work.
+
+- d24d3d1: fix: print warning when llama parse reader has error
+- Updated dependencies [2cd1383]
+  - @llamaindex/core@0.2.3
+
+## 0.2.6
+
+### Patch Changes
+
+- b42adeb: fix: get job result in llama parse reader
+- Updated dependencies [749b43a]
+  - @llamaindex/core@0.2.2
+
 ## 0.2.5

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/cloud",
-  "version": "0.2.5",
+  "version": "0.2.7",
  "type": "module",
  "license": "MIT",
  "scripts": {
@@ -50,12 +50,12 @@
  "devDependencies": {
    "@hey-api/client-fetch": "^0.2.4",
    "@hey-api/openapi-ts": "^0.53.0",
-    "@llamaindex/core": "workspace:^0.2.1",
+    "@llamaindex/core": "workspace:^0.2.3",
    "@llamaindex/env": "workspace:^0.1.11",
    "bunchee": "5.3.2"
  },
  "peerDependencies": {
-    "@llamaindex/core": "workspace:^0.2.1",
+    "@llamaindex/core": "workspace:^0.2.3",
    "@llamaindex/env": "workspace:^0.1.11"
  },
  "dependencies": {
@@ -229,20 +229,18 @@ export class LlamaParseReader extends FileReader {
  }

  // Create a job for the LlamaParse API
-  private async createJob(
-    data: Uint8Array,
-    fileName: string = "unknown",
-  ): Promise<string> {
+  private async createJob(data: Uint8Array): Promise<string> {
    // Load data, set the mime type
-    const { mime, extension } = await LlamaParseReader.getMimeType(data);
+    const { mime } = await LlamaParseReader.getMimeType(data);

    if (this.verbose) {
-      const name = fileName ? fileName : extension;
-      console.log(`Starting load for ${name} file`);
+      console.log("Started uploading the file");
    }

    const body = {
-      file: new File([data], fileName, { type: mime }),
+      file: new Blob([data], {
+        type: mime,
+      }),
      language: this.language,
      parsing_instruction: this.parsingInstruction,
      skip_diagonal_text: this.skipDiagonalText,
@@ -294,17 +292,14 @@ export class LlamaParseReader extends FileReader {
      await sleep(this.checkInterval * 1000);

      // Check the job status. If unsuccessful response, checks if maximum timeout has been reached. If reached, throws an error
-      const result =
-        await ParsingService.getParsingJobDetailsApiV1ParsingJobJobIdDetailsGet(
-          {
-            client: this.#client,
-            throwOnError: true,
-            path: {
-              job_id: jobId,
-            },
-            signal,
-          },
-        );
+      const result = await ParsingService.getJobApiV1ParsingJobJobIdGet({
+        client: this.#client,
+        throwOnError: true,
+        path: {
+          job_id: jobId,
+        },
+        signal,
+      });
      const { data } = result;

      const status = (data as Record<string, unknown>)["status"];
@@ -376,14 +371,10 @@ export class LlamaParseReader extends FileReader {
   * To be used with resultType = "text" and "markdown"
   *
   * @param {Uint8Array} fileContent - The content of the file to be loaded.
-   * @param {string} [fileName] - The optional name of the file to be loaded.
   * @return {Promise<Document[]>} A Promise object that resolves to an array of Document objects.
   */
-  async loadDataAsContent(
-    fileContent: Uint8Array,
-    fileName?: string,
-  ): Promise<Document[]> {
-    return this.createJob(fileContent, fileName)
+  async loadDataAsContent(fileContent: Uint8Array): Promise<Document[]> {
+    return this.createJob(fileContent)
      .then(async (jobId) => {
        if (this.verbose) {
          console.log(`Started parsing the file under job id ${jobId}`);
@@ -406,6 +397,7 @@ export class LlamaParseReader extends FileReader {
      })
      .catch((error) => {
        if (this.ignoreErrors) {
+          console.warn(`Error while parsing the file: ${error.message}`);
          return [];
        } else {
          throw error;
@@ -440,8 +432,8 @@ export class LlamaParseReader extends FileReader {
      resultJson.file_path = isFilePath ? filePathOrContent : undefined;
      return [resultJson];
    } catch (e) {
-      console.error(`Error while parsing the file under job id ${jobId}`, e);
      if (this.ignoreErrors) {
+        console.error(`Error while parsing the file under job id ${jobId}`, e);
        return [];
      } else {
        throw e;
@@ -0,0 +1,8 @@
+{
+  "extends": ["//"],
+  "tasks": {
+    "build": {
+      "outputs": ["dist/**", "src/client/**"]
+    }
+  }
+}
@@ -1,5 +1,34 @@
 # @llamaindex/community

+## 0.0.39
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+  - @llamaindex/core@0.2.5
+
+## 0.0.38
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - @llamaindex/core@0.2.4
+  - @llamaindex/env@0.1.12
+
+## 0.0.37
+
+### Patch Changes
+
+- Updated dependencies [2cd1383]
+  - @llamaindex/core@0.2.3
+
+## 0.0.36
+
+### Patch Changes
+
+- Updated dependencies [749b43a]
+  - @llamaindex/core@0.2.2
+
 ## 0.0.35

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/community",
  "description": "Community package for LlamaIndexTS",
-  "version": "0.0.35",
+  "version": "0.0.39",
  "type": "module",
  "types": "dist/type/index.d.ts",
  "main": "dist/cjs/index.js",
@@ -1,5 +1,40 @@
 # @llamaindex/core

+## 0.2.5
+
+### Patch Changes
+
+- d902cc3: Fix context not being sent using ContextChatEngine
+
+## 0.2.4
+
+### Patch Changes
+
+- b48bcc3: feat: add `load-transformers` event type when loading `@xenova/transformers` module
+
+  This would benefit user who want to customize the transformer env.
+
+- Updated dependencies [b48bcc3]
+  - @llamaindex/env@0.1.12
+
+## 0.2.3
+
+### Patch Changes
+
+- 2cd1383: refactor: align `response-synthesizers` & `chat-engine` module
+
+  - builtin event system
+  - correct class extends
+  - aligin APIs, naming with llama-index python
+  - move stream out of first parameter to second parameter for the better tyep checking
+  - remove JSONQueryEngine in `@llamaindex/experimental`, as the code quality is not satisify and we will bring it back later
+
+## 0.2.2
+
+### Patch Changes
+
+- 749b43a: fix: clip embedding transform function
+
 ## 0.2.1

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/core",
  "type": "module",
-  "version": "0.2.1",
+  "version": "0.2.5",
  "description": "LlamaIndex Core Module",
  "exports": {
    "./node-parser": {
@@ -157,6 +157,48 @@
        "types": "./dist/workflow/index.d.ts",
        "default": "./dist/workflow/index.js"
      }
+    },
+    "./memory": {
+      "require": {
+        "types": "./dist/memory/index.d.cts",
+        "default": "./dist/memory/index.cjs"
+      },
+      "import": {
+        "types": "./dist/memory/index.d.ts",
+        "default": "./dist/memory/index.js"
+      },
+      "default": {
+        "types": "./dist/memory/index.d.ts",
+        "default": "./dist/memory/index.js"
+      }
+    },
+    "./storage/chat-store": {
+      "require": {
+        "types": "./dist/storage/chat-store/index.d.cts",
+        "default": "./dist/storage/chat-store/index.cjs"
+      },
+      "import": {
+        "types": "./dist/storage/chat-store/index.d.ts",
+        "default": "./dist/storage/chat-store/index.js"
+      },
+      "default": {
+        "types": "./dist/storage/chat-store/index.d.ts",
+        "default": "./dist/storage/chat-store/index.js"
+      }
+    },
+    "./response-synthesizers": {
+      "require": {
+        "types": "./dist/response-synthesizers/index.d.cts",
+        "default": "./dist/response-synthesizers/index.cjs"
+      },
+      "import": {
+        "types": "./dist/response-synthesizers/index.d.ts",
+        "default": "./dist/response-synthesizers/index.js"
+      },
+      "default": {
+        "types": "./dist/response-synthesizers/index.d.ts",
+        "default": "./dist/response-synthesizers/index.js"
+      }
    }
  },
  "files": [
@@ -182,6 +224,7 @@
  "dependencies": {
    "@llamaindex/env": "workspace:*",
    "@types/node": "^22.5.1",
+    "magic-bytes.js": "^1.10.0",
    "zod": "^3.23.8"
  }
 }
@@ -23,23 +23,34 @@ export abstract class BaseEmbedding extends TransformComponent {
  embedBatchSize = DEFAULT_EMBED_BATCH_SIZE;
  embedInfo?: EmbeddingInfo;

-  constructor() {
-    super(
-      async (
-        nodes: BaseNode[],
-        options?: BaseEmbeddingOptions,
-      ): Promise<BaseNode[]> => {
-        const texts = nodes.map((node) => node.getContent(MetadataMode.EMBED));
+  protected constructor(
+    transformFn?: (
+      nodes: BaseNode[],
+      options?: BaseEmbeddingOptions,
+    ) => Promise<BaseNode[]>,
+  ) {
+    if (transformFn) {
+      super(transformFn);
+    } else {
+      super(
+        async (
+          nodes: BaseNode[],
+          options?: BaseEmbeddingOptions,
+        ): Promise<BaseNode[]> => {
+          const texts = nodes.map((node) =>
+            node.getContent(MetadataMode.EMBED),
+          );

-        const embeddings = await this.getTextEmbeddingsBatch(texts, options);
+          const embeddings = await this.getTextEmbeddingsBatch(texts, options);

-        for (let i = 0; i < nodes.length; i++) {
-          nodes[i]!.embedding = embeddings[i];
-        }
+          for (let i = 0; i < nodes.length; i++) {
+            nodes[i]!.embedding = embeddings[i];
+          }

-        return nodes;
-      },
-    );
+          return nodes;
+        },
+      );
+    }
  }

  similarity(
@@ -1,4 +1,5 @@
 export { BaseEmbedding, batchEmbeddings } from "./base";
 export type { BaseEmbeddingOptions, EmbeddingInfo } from "./base";
+export { MultiModalEmbedding } from "./muti-model";
 export { truncateMaxTokens } from "./tokenizer";
 export { DEFAULT_SIMILARITY_TOP_K, SimilarityType, similarity } from "./utils";
@@ -0,0 +1,81 @@
+import type { MessageContentDetail } from "../llms";
+import {
+  ImageNode,
+  MetadataMode,
+  ModalityType,
+  splitNodesByType,
+  type BaseNode,
+  type ImageType,
+} from "../schema";
+import { extractImage, extractSingleText } from "../utils";
+import {
+  BaseEmbedding,
+  batchEmbeddings,
+  type BaseEmbeddingOptions,
+} from "./base";
+
+/*
+ * Base class for Multi Modal embeddings.
+ */
+export abstract class MultiModalEmbedding extends BaseEmbedding {
+  abstract getImageEmbedding(images: ImageType): Promise<number[]>;
+
+  protected constructor() {
+    super(
+      async (
+        nodes: BaseNode[],
+        options?: BaseEmbeddingOptions,
+      ): Promise<BaseNode[]> => {
+        const nodeMap = splitNodesByType(nodes);
+        const imageNodes = nodeMap[ModalityType.IMAGE] ?? [];
+        const textNodes = nodeMap[ModalityType.TEXT] ?? [];
+
+        const embeddings = await batchEmbeddings(
+          textNodes.map((node) => node.getContent(MetadataMode.EMBED)),
+          this.getTextEmbeddings.bind(this),
+          this.embedBatchSize,
+          options,
+        );
+        for (let i = 0; i < textNodes.length; i++) {
+          textNodes[i]!.embedding = embeddings[i];
+        }
+
+        const imageEmbeddings = await batchEmbeddings(
+          imageNodes.map((n) => (n as ImageNode).image),
+          this.getImageEmbeddings.bind(this),
+          this.embedBatchSize,
+          options,
+        );
+        for (let i = 0; i < imageNodes.length; i++) {
+          imageNodes[i]!.embedding = imageEmbeddings[i];
+        }
+
+        return nodes;
+      },
+    );
+  }
+
+  /**
+   * Optionally override this method to retrieve multiple image embeddings in a single request
+   * @param images
+   */
+  async getImageEmbeddings(images: ImageType[]): Promise<number[][]> {
+    return Promise.all(
+      images.map((imgFilePath) => this.getImageEmbedding(imgFilePath)),
+    );
+  }
+
+  async getQueryEmbedding(
+    query: MessageContentDetail,
+  ): Promise<number[] | null> {
+    const image = extractImage(query);
+    if (image) {
+      return await this.getImageEmbedding(image);
+    }
+    const text = extractSingleText(query);
+    if (text) {
+      return await this.getTextEmbedding(text);
+    }
+    return null;
+  }
+}
@@ -6,8 +6,13 @@ import type {
  ToolCall,
  ToolOutput,
 } from "../../llms";
+import type { QueryEndEvent, QueryStartEvent } from "../../query-engine";
+import type {
+  SynthesizeEndEvent,
+  SynthesizeStartEvent,
+} from "../../response-synthesizers";
 import { TextNode } from "../../schema";
-import { EventCaller, getEventCaller } from "../../utils/event-caller";
+import { EventCaller, getEventCaller } from "../../utils";
 import type { UUID } from "../type";

 export type LLMStartEvent = {
@@ -60,6 +65,10 @@ export interface LlamaIndexEventMaps {
  "chunking-end": ChunkingEndEvent;
  "node-parsing-start": NodeParsingStartEvent;
  "node-parsing-end": NodeParsingEndEvent;
+  "query-start": QueryStartEvent;
+  "query-end": QueryEndEvent;
+  "synthesize-start": SynthesizeStartEvent;
+  "synthesize-end": SynthesizeEndEvent;
 }

 export class LlamaIndexCustomEvent<T = any> extends CustomEvent<T> {
@@ -119,16 +128,29 @@ export class CallbackManager {
  dispatchEvent<K extends keyof LlamaIndexEventMaps>(
    event: K,
    detail: LlamaIndexEventMaps[K],
+    sync = false,
  ) {
    const cbs = this.#handlers.get(event);
    if (!cbs) {
      return;
    }
-    queueMicrotask(() => {
+    if (typeof queueMicrotask === "undefined") {
+      console.warn(
+        "queueMicrotask is not available, dispatching synchronously",
+      );
+      sync = true;
+    }
+    if (sync) {
      cbs.forEach((handler) =>
        handler(LlamaIndexCustomEvent.fromEvent(event, { ...detail })),
      );
-    });
+    } else {
+      queueMicrotask(() => {
+        cbs.forEach((handler) =>
+          handler(LlamaIndexCustomEvent.fromEvent(event, { ...detail })),
+        );
+      });
+    }
  }
 }

@@ -1,10 +1,13 @@
 import { type Tokenizer, tokenizers } from "@llamaindex/env";
 import {
  DEFAULT_CHUNK_OVERLAP_RATIO,
+  DEFAULT_CHUNK_SIZE,
  DEFAULT_CONTEXT_WINDOW,
  DEFAULT_NUM_OUTPUTS,
  DEFAULT_PADDING,
+  Settings,
 } from "../global";
+import type { LLMMetadata } from "../llms";
 import { SentenceSplitter } from "../node-parser";
 import type { PromptTemplate } from "../prompts";

@@ -133,4 +136,29 @@ export class PromptHelper {
    const combinedStr = textChunks.join("\n\n");
    return textSplitter.splitText(combinedStr);
  }
+
+  static fromLLMMetadata(
+    metadata: LLMMetadata,
+    options?: {
+      chunkOverlapRatio?: number;
+      chunkSizeLimit?: number;
+      tokenizer?: Tokenizer;
+      separator?: string;
+    },
+  ) {
+    const {
+      chunkOverlapRatio = DEFAULT_CHUNK_OVERLAP_RATIO,
+      chunkSizeLimit = DEFAULT_CHUNK_SIZE,
+      tokenizer = Settings.tokenizer,
+      separator = " ",
+    } = options ?? {};
+    return new PromptHelper({
+      contextWindow: metadata.contextWindow,
+      numOutput: metadata.maxTokens ?? DEFAULT_NUM_OUTPUTS,
+      chunkOverlapRatio,
+      chunkSizeLimit,
+      tokenizer,
+      separator,
+    });
+  }
 }
@@ -0,0 +1,83 @@
+import { Settings } from "../global";
+import type { ChatMessage } from "../llms";
+import { type BaseChatStore, SimpleChatStore } from "../storage/chat-store";
+import { extractText } from "../utils";
+
+export const DEFAULT_TOKEN_LIMIT_RATIO = 0.75;
+export const DEFAULT_CHAT_STORE_KEY = "chat_history";
+
+/**
+ * A ChatMemory is used to keep the state of back and forth chat messages
+ */
+export abstract class BaseMemory<
+  AdditionalMessageOptions extends object = object,
+> {
+  /**
+   * Retrieves messages from the memory, optionally including transient messages.
+   * Compared to getAllMessages, this method a) allows for transient messages to be included in the retrieval and b) may return a subset of the total messages by applying a token limit.
+   * @param transientMessages Optional array of temporary messages to be included in the retrieval.
+   * These messages are not stored in the memory but are considered for the current interaction.
+   * @returns An array of chat messages, either synchronously or as a Promise.
+   */
+  abstract getMessages(
+    transientMessages?: ChatMessage<AdditionalMessageOptions>[] | undefined,
+  ):
+    | ChatMessage<AdditionalMessageOptions>[]
+    | Promise<ChatMessage<AdditionalMessageOptions>[]>;
+
+  /**
+   * Retrieves all messages stored in the memory.
+   * @returns An array of all chat messages, either synchronously or as a Promise.
+   */
+  abstract getAllMessages():
+    | ChatMessage<AdditionalMessageOptions>[]
+    | Promise<ChatMessage<AdditionalMessageOptions>[]>;
+
+  /**
+   * Adds a new message to the memory.
+   * @param messages The chat message to be added to the memory.
+   */
+  abstract put(messages: ChatMessage<AdditionalMessageOptions>): void;
+
+  /**
+   * Clears all messages from the memory.
+   */
+  abstract reset(): void;
+
+  protected _tokenCountForMessages(messages: ChatMessage[]): number {
+    if (messages.length === 0) {
+      return 0;
+    }
+
+    const tokenizer = Settings.tokenizer;
+    const str = messages.map((m) => extractText(m.content)).join(" ");
+    return tokenizer.encode(str).length;
+  }
+}
+
+export abstract class BaseChatStoreMemory<
+  AdditionalMessageOptions extends object = object,
+> extends BaseMemory<AdditionalMessageOptions> {
+  protected constructor(
+    public chatStore: BaseChatStore<AdditionalMessageOptions> = new SimpleChatStore<AdditionalMessageOptions>(),
+    public chatStoreKey: string = DEFAULT_CHAT_STORE_KEY,
+  ) {
+    super();
+  }
+
+  getAllMessages(): ChatMessage<AdditionalMessageOptions>[] {
+    return this.chatStore.getMessages(this.chatStoreKey);
+  }
+
+  put(messages: ChatMessage<AdditionalMessageOptions>) {
+    this.chatStore.addMessage(this.chatStoreKey, messages);
+  }
+
+  set(messages: ChatMessage<AdditionalMessageOptions>[]) {
+    this.chatStore.setMessages(this.chatStoreKey, messages);
+  }
+
+  reset() {
+    this.chatStore.deleteMessages(this.chatStoreKey);
+  }
+}
@@ -0,0 +1,71 @@
+import { Settings } from "../global";
+import type { ChatMessage, LLM } from "../llms";
+import { type BaseChatStore } from "../storage/chat-store";
+import { BaseChatStoreMemory, DEFAULT_TOKEN_LIMIT_RATIO } from "./base";
+
+type ChatMemoryBufferOptions<AdditionalMessageOptions extends object = object> =
+  {
+    tokenLimit?: number | undefined;
+    chatStore?: BaseChatStore<AdditionalMessageOptions> | undefined;
+    chatStoreKey?: string | undefined;
+    chatHistory?: ChatMessage<AdditionalMessageOptions>[] | undefined;
+    llm?: LLM<object, AdditionalMessageOptions> | undefined;
+  };
+
+export class ChatMemoryBuffer<
+  AdditionalMessageOptions extends object = object,
+> extends BaseChatStoreMemory<AdditionalMessageOptions> {
+  tokenLimit: number;
+
+  constructor(
+    options?: Partial<ChatMemoryBufferOptions<AdditionalMessageOptions>>,
+  ) {
+    super(options?.chatStore, options?.chatStoreKey);
+
+    const llm = options?.llm ?? Settings.llm;
+    const contextWindow = llm.metadata.contextWindow;
+    this.tokenLimit =
+      options?.tokenLimit ??
+      Math.ceil(contextWindow * DEFAULT_TOKEN_LIMIT_RATIO);
+
+    if (options?.chatHistory) {
+      this.chatStore.setMessages(this.chatStoreKey, options.chatHistory);
+    }
+  }
+
+  getMessages(
+    transientMessages?: ChatMessage<AdditionalMessageOptions>[] | undefined,
+    initialTokenCount: number = 0,
+  ) {
+    const messages = this.getAllMessages();
+
+    if (initialTokenCount > this.tokenLimit) {
+      throw new Error("Initial token count exceeds token limit");
+    }
+
+    // Add input messages as transient messages
+    const messagesWithInput = transientMessages
+      ? [...transientMessages, ...messages]
+      : messages;
+
+    let messageCount = messagesWithInput.length;
+    let currentMessages = messagesWithInput.slice(-messageCount);
+    let tokenCount =
+      this._tokenCountForMessages(messagesWithInput) + initialTokenCount;
+
+    while (tokenCount > this.tokenLimit && messageCount > 1) {
+      messageCount -= 1;
+      if (messagesWithInput.at(-messageCount)!.role === "assistant") {
+        messageCount -= 1;
+      }
+      currentMessages = messagesWithInput.slice(-messageCount);
+      tokenCount =
+        this._tokenCountForMessages(currentMessages) + initialTokenCount;
+    }
+
+    if (tokenCount > this.tokenLimit && messageCount <= 0) {
+      return [];
+    }
+    return messagesWithInput.slice(-messageCount);
+  }
+}
@@ -0,0 +1,3 @@
+export { BaseMemory } from "./base";
+export { ChatMemoryBuffer } from "./chat-memory-buffer";
+export { ChatSummaryMemoryBuffer } from "./summary-memory";
@@ -1,73 +1,11 @@
-import type { ChatMessage, LLM, MessageType } from "@llamaindex/core/llms";
-import {
-  defaultSummaryPrompt,
-  type SummaryPrompt,
-} from "@llamaindex/core/prompts";
-import { extractText, messagesToHistory } from "@llamaindex/core/utils";
-import { tokenizers, type Tokenizer } from "@llamaindex/env";
-import { OpenAI } from "@llamaindex/openai";
+import { type Tokenizer, tokenizers } from "@llamaindex/env";
+import { Settings } from "../global";
+import type { ChatMessage, LLM, MessageType } from "../llms";
+import { defaultSummaryPrompt, type SummaryPrompt } from "../prompts";
+import { extractText, messagesToHistory } from "../utils";
+import { BaseMemory } from "./base";

-/**
- * A ChatHistory is used to keep the state of back and forth chat messages
- */
-export abstract class ChatHistory<
-  AdditionalMessageOptions extends object = object,
-> {
-  abstract get messages(): ChatMessage<AdditionalMessageOptions>[];
-  /**
-   * Adds a message to the chat history.
-   * @param message
-   */
-  abstract addMessage(message: ChatMessage<AdditionalMessageOptions>): void;
-
-  /**
-   * Returns the messages that should be used as input to the LLM.
-   */
-  abstract requestMessages(
-    transientMessages?: ChatMessage<AdditionalMessageOptions>[],
-  ): Promise<ChatMessage<AdditionalMessageOptions>[]>;
-
-  /**
-   * Resets the chat history so that it's empty.
-   */
-  abstract reset(): void;
-
-  /**
-   * Returns the new messages since the last call to this function (or since calling the constructor)
-   */
-  abstract newMessages(): ChatMessage<AdditionalMessageOptions>[];
-}
-
-export class SimpleChatHistory extends ChatHistory {
-  messages: ChatMessage[];
-  private messagesBefore: number;
-
-  constructor(init?: { messages?: ChatMessage[] | undefined }) {
-    super();
-    this.messages = init?.messages ?? [];
-    this.messagesBefore = this.messages.length;
-  }
-
-  addMessage(message: ChatMessage) {
-    this.messages.push(message);
-  }
-
-  async requestMessages(transientMessages?: ChatMessage[]) {
-    return [...(transientMessages ?? []), ...this.messages];
-  }
-
-  reset() {
-    this.messages = [];
-  }
-
-  newMessages() {
-    const newMessages = this.messages.slice(this.messagesBefore);
-    this.messagesBefore = this.messages.length;
-    return newMessages;
-  }
-}
-
-export class SummaryChatHistory extends ChatHistory {
+export class ChatSummaryMemoryBuffer extends BaseMemory {
  /**
   * Tokenizer function that converts text to tokens,
   *  this is used to calculate the number of tokens in a message.
@@ -77,20 +15,18 @@ export class SummaryChatHistory extends ChatHistory {
  messages: ChatMessage[];
  summaryPrompt: SummaryPrompt;
  llm: LLM;
-  private messagesBefore: number;

-  constructor(init?: Partial<SummaryChatHistory>) {
+  constructor(options?: Partial<ChatSummaryMemoryBuffer>) {
    super();
-    this.messages = init?.messages ?? [];
-    this.messagesBefore = this.messages.length;
-    this.summaryPrompt = init?.summaryPrompt ?? defaultSummaryPrompt;
-    this.llm = init?.llm ?? new OpenAI();
+    this.messages = options?.messages ?? [];
+    this.summaryPrompt = options?.summaryPrompt ?? defaultSummaryPrompt;
+    this.llm = options?.llm ?? Settings.llm;
    if (!this.llm.metadata.maxTokens) {
      throw new Error(
        "LLM maxTokens is not set. Needed so the summarizer ensures the context window size of the LLM.",
      );
    }
-    this.tokenizer = init?.tokenizer ?? tokenizers.tokenizer();
+    this.tokenizer = options?.tokenizer ?? tokenizers.tokenizer();
    this.tokensToSummarize =
      this.llm.metadata.contextWindow - this.llm.metadata.maxTokens;
    if (this.tokensToSummarize < this.llm.metadata.contextWindow * 0.25) {
@@ -128,12 +64,8 @@ export class SummaryChatHistory extends ChatHistory {
    return { content: response.message.content, role: "memory" };
  }

-  addMessage(message: ChatMessage) {
-    this.messages.push(message);
-  }
-
  // Find last summary message
-  private getLastSummaryIndex(): number | null {
+  private get lastSummaryIndex(): number | null {
    const reversedMessages = this.messages.slice().reverse();
    const index = reversedMessages.findIndex(
      (message) => message.role === "memory",
@@ -145,7 +77,7 @@ export class SummaryChatHistory extends ChatHistory {
  }

  public getLastSummary(): ChatMessage | null {
-    const lastSummaryIndex = this.getLastSummaryIndex();
+    const lastSummaryIndex = this.lastSummaryIndex;
    return lastSummaryIndex ? this.messages[lastSummaryIndex]! : null;
  }

@@ -165,7 +97,7 @@ export class SummaryChatHistory extends ChatHistory {
   * If there's a memory, uses all messages after the last summary message.
   */
  private calcConversationMessages(transformSummary?: boolean): ChatMessage[] {
-    const lastSummaryIndex = this.getLastSummaryIndex();
+    const lastSummaryIndex = this.lastSummaryIndex;
    if (!lastSummaryIndex) {
      // there's no memory, so just use all non-system messages
      return this.nonSystemMessages;
@@ -183,7 +115,7 @@ export class SummaryChatHistory extends ChatHistory {
  }

  private calcCurrentRequestMessages(transientMessages?: ChatMessage[]) {
-    // TODO: check order: currently, we're sending:
+    // currently, we're sending:
    // system messages first, then transient messages and then the messages that describe the conversation so far
    return [
      ...this.systemMessages,
@@ -192,7 +124,11 @@ export class SummaryChatHistory extends ChatHistory {
    ];
  }

-  async requestMessages(transientMessages?: ChatMessage[]) {
+  reset() {
+    this.messages = [];
+  }
+
+  async getMessages(transientMessages?: ChatMessage[]): Promise<ChatMessage[]> {
    const requestMessages = this.calcCurrentRequestMessages(transientMessages);

    // get tokens of current request messages and the transient messages
@@ -222,22 +158,11 @@ export class SummaryChatHistory extends ChatHistory {
    return requestMessages;
  }

-  reset() {
-    this.messages = [];
+  async getAllMessages(): Promise<ChatMessage[]> {
+    return this.getMessages();
  }

-  newMessages() {
-    const newMessages = this.messages.slice(this.messagesBefore);
-    this.messagesBefore = this.messages.length;
-    return newMessages;
+  put(message: ChatMessage) {
+    this.messages.push(message);
  }
 }
-
-export function getHistory(
-  chatHistory?: ChatMessage[] | ChatHistory,
-): ChatHistory {
-  if (chatHistory instanceof ChatHistory) {
-    return chatHistory;
-  }
-  return new SimpleChatHistory({ messages: chatHistory });
-}
@@ -1,5 +1,9 @@
+import { randomUUID } from "@llamaindex/env";
+import { Settings } from "../global";
 import type { MessageContent } from "../llms";
-import { EngineResponse, type NodeWithScore } from "../schema";
+import { PromptMixin } from "../prompts";
+import { EngineResponse } from "../schema";
+import { wrapEventCaller } from "../utils";

 /**
 * @link https://docs.llamaindex.ai/en/stable/api_reference/schema/?h=querybundle#llama_index.core.schema.QueryBundle
@@ -14,16 +18,37 @@ export type QueryBundle = {

 export type QueryType = string | QueryBundle;

-export interface BaseQueryEngine {
+export type QueryFn = (
+  strOrQueryBundle: QueryType,
+  stream?: boolean,
+) => Promise<AsyncIterable<EngineResponse> | EngineResponse>;
+
+export abstract class BaseQueryEngine extends PromptMixin {
+  protected constructor(protected readonly _query: QueryFn) {
+    super();
+  }
+
  query(
    strOrQueryBundle: QueryType,
    stream: true,
  ): Promise<AsyncIterable<EngineResponse>>;
  query(strOrQueryBundle: QueryType, stream?: false): Promise<EngineResponse>;
-
-  synthesize?(
+  @wrapEventCaller
+  async query(
    strOrQueryBundle: QueryType,
-    nodes: NodeWithScore[],
-    additionalSources?: Iterator<NodeWithScore>,
-  ): Promise<EngineResponse>;
+    stream = false,
+  ): Promise<EngineResponse | AsyncIterable<EngineResponse>> {
+    const id = randomUUID();
+    const callbackManager = Settings.callbackManager;
+    callbackManager.dispatchEvent("query-start", {
+      id,
+      query: strOrQueryBundle,
+    });
+    const response = await this._query(strOrQueryBundle, stream);
+    callbackManager.dispatchEvent("query-end", {
+      id,
+      response,
+    });
+    return response;
+  }
 }
@@ -1 +1,2 @@
-export type { BaseQueryEngine, QueryBundle, QueryType } from "./base";
+export { BaseQueryEngine, type QueryBundle, type QueryType } from "./base";
+export type { QueryEndEvent, QueryStartEvent } from "./type";
@@ -0,0 +1,12 @@
+import { EngineResponse } from "../schema";
+import type { QueryType } from "./base";
+
+export type QueryStartEvent = {
+  id: string;
+  query: QueryType;
+};
+
+export type QueryEndEvent = {
+  id: string;
+  response: EngineResponse | AsyncIterable<EngineResponse>;
+};
@@ -0,0 +1,58 @@
+import { randomUUID } from "@llamaindex/env";
+import { Settings } from "../global";
+import { PromptHelper } from "../indices";
+import type { LLM, MessageContent } from "../llms";
+import { PromptMixin } from "../prompts";
+import { EngineResponse, type NodeWithScore } from "../schema";
+import type { SynthesizeQuery } from "./type";
+
+export type BaseSynthesizerOptions = {
+  llm?: LLM;
+  promptHelper?: PromptHelper;
+};
+
+export abstract class BaseSynthesizer extends PromptMixin {
+  llm: LLM;
+  promptHelper: PromptHelper;
+
+  protected constructor(options: Partial<BaseSynthesizerOptions>) {
+    super();
+    this.llm = options.llm ?? Settings.llm;
+    this.promptHelper =
+      options.promptHelper ?? PromptHelper.fromLLMMetadata(this.llm.metadata);
+  }
+
+  protected abstract getResponse(
+    query: MessageContent,
+    textChunks: NodeWithScore[],
+    stream: boolean,
+  ): Promise<EngineResponse | AsyncIterable<EngineResponse>>;
+
+  synthesize(
+    query: SynthesizeQuery,
+    stream: true,
+  ): Promise<AsyncIterable<EngineResponse>>;
+  synthesize(query: SynthesizeQuery, stream?: false): Promise<EngineResponse>;
+  async synthesize(
+    query: SynthesizeQuery,
+    stream = false,
+  ): Promise<EngineResponse | AsyncIterable<EngineResponse>> {
+    const callbackManager = Settings.callbackManager;
+    const id = randomUUID();
+    callbackManager.dispatchEvent("synthesize-start", { id, query });
+    let response: EngineResponse | AsyncIterable<EngineResponse>;
+    if (query.nodes.length === 0) {
+      if (stream) {
+        response = EngineResponse.fromResponse("Empty Response", true);
+      } else {
+        response = EngineResponse.fromResponse("Empty Response", false);
+      }
+    } else {
+      const queryMessage: MessageContent =
+        typeof query.query === "string" ? query.query : query.query.query;
+      response = await this.getResponse(queryMessage, query.nodes, stream);
+    }
+    callbackManager.dispatchEvent("synthesize-end", { id, query, response });
+    return response;
+  }
+}
@@ -1,108 +1,52 @@
-import { getBiggestPrompt, type PromptHelper } from "@llamaindex/core/indices";
-import type { LLM } from "@llamaindex/core/llms";
+import { z } from "zod";
+import { getBiggestPrompt } from "../indices";
+import type { MessageContent } from "../llms";
 import {
-  PromptMixin,
  defaultRefinePrompt,
  defaultTextQAPrompt,
  defaultTreeSummarizePrompt,
  type ModuleRecord,
-  type PromptsRecord,
  type RefinePrompt,
  type TextQAPrompt,
  type TreeSummarizePrompt,
-} from "@llamaindex/core/prompts";
-import type { QueryType } from "@llamaindex/core/query-engine";
-import { extractText, streamConverter } from "@llamaindex/core/utils";
-import type { ServiceContext } from "../ServiceContext.js";
+} from "../prompts";
 import {
-  llmFromSettingsOrContext,
-  promptHelperFromSettingsOrContext,
-} from "../Settings.js";
-import type { ResponseBuilder, ResponseBuilderQuery } from "./types.js";
+  EngineResponse,
+  MetadataMode,
+  type NodeWithScore,
+  TextNode,
+} from "../schema";
+import { createMessageContent, extractText, streamConverter } from "../utils";
+import {
+  BaseSynthesizer,
+  type BaseSynthesizerOptions,
+} from "./base-synthesizer";

-/**
- * Response modes of the response synthesizer
- */
-enum ResponseMode {
-  REFINE = "refine",
-  COMPACT = "compact",
-  TREE_SUMMARIZE = "tree_summarize",
-  SIMPLE = "simple",
-}
+const responseModeSchema = z.enum([
+  "refine",
+  "compact",
+  "tree_summarize",
+  "multi_modal",
+]);

-/**
- * A response builder that just concatenates responses.
- */
-export class SimpleResponseBuilder
-  extends PromptMixin
-  implements ResponseBuilder
-{
-  llm: LLM;
-  textQATemplate: TextQAPrompt;
-
-  constructor(serviceContext?: ServiceContext, textQATemplate?: TextQAPrompt) {
-    super();
-    this.llm = llmFromSettingsOrContext(serviceContext);
-    this.textQATemplate = textQATemplate ?? defaultTextQAPrompt;
-  }
-
-  protected _getPrompts(): PromptsRecord {
-    return {
-      textQATemplate: this.textQATemplate,
-    };
-  }
-  protected _updatePrompts(prompts: { textQATemplate: TextQAPrompt }): void {
-    if (prompts.textQATemplate) {
-      this.textQATemplate = prompts.textQATemplate;
-    }
-  }
-  protected _getPromptModules(): ModuleRecord {
-    return {};
-  }
-
-  getResponse(
-    query: ResponseBuilderQuery,
-    stream: true,
-  ): Promise<AsyncIterable<string>>;
-  getResponse(query: ResponseBuilderQuery, stream?: false): Promise<string>;
-  async getResponse(
-    { query, textChunks }: ResponseBuilderQuery,
-    stream?: boolean,
-  ): Promise<AsyncIterable<string> | string> {
-    const prompt = this.textQATemplate.format({
-      query: extractText(query),
-      context: textChunks.join("\n\n"),
-    });
-    if (stream) {
-      const response = await this.llm.complete({ prompt, stream: true });
-      return streamConverter(response, (chunk) => chunk.text);
-    } else {
-      const response = await this.llm.complete({ prompt, stream: false });
-      return response.text;
-    }
-  }
-}
+export type ResponseMode = z.infer<typeof responseModeSchema>;

 /**
 * A response builder that uses the query to ask the LLM generate a better response using multiple text chunks.
 */
-export class Refine extends PromptMixin implements ResponseBuilder {
-  llm: LLM;
-  promptHelper: PromptHelper;
+class Refine extends BaseSynthesizer {
  textQATemplate: TextQAPrompt;
  refineTemplate: RefinePrompt;

  constructor(
-    serviceContext?: ServiceContext,
-    textQATemplate?: TextQAPrompt,
-    refineTemplate?: RefinePrompt,
+    options: BaseSynthesizerOptions & {
+      textQATemplate?: TextQAPrompt | undefined;
+      refineTemplate?: RefinePrompt | undefined;
+    },
  ) {
-    super();
-
-    this.llm = llmFromSettingsOrContext(serviceContext);
-    this.promptHelper = promptHelperFromSettingsOrContext(serviceContext);
-    this.textQATemplate = textQATemplate ?? defaultTextQAPrompt;
-    this.refineTemplate = refineTemplate ?? defaultRefinePrompt;
+    super(options);
+    this.textQATemplate = options.textQATemplate ?? defaultTextQAPrompt;
+    this.refineTemplate = options.refineTemplate ?? defaultRefinePrompt;
  }

  protected _getPromptModules(): ModuleRecord {
@@ -132,41 +76,47 @@ export class Refine extends PromptMixin implements ResponseBuilder {
    }
  }

-  getResponse(
-    query: ResponseBuilderQuery,
-    stream: true,
-  ): Promise<AsyncIterable<string>>;
-  getResponse(query: ResponseBuilderQuery, stream?: false): Promise<string>;
  async getResponse(
-    { query, textChunks, prevResponse }: ResponseBuilderQuery,
-    stream?: boolean,
-  ): Promise<AsyncIterable<string> | string> {
-    let response: AsyncIterable<string> | string | undefined = prevResponse;
+    query: MessageContent,
+    nodes: NodeWithScore[],
+    stream: boolean,
+  ): Promise<EngineResponse | AsyncIterable<EngineResponse>> {
+    let response: AsyncIterable<string> | string | undefined = undefined;
+    const textChunks = nodes.map(({ node }) =>
+      node.getContent(MetadataMode.LLM),
+    );

    for (let i = 0; i < textChunks.length; i++) {
-      const chunk = textChunks[i]!;
+      const text = textChunks[i]!;
      const lastChunk = i === textChunks.length - 1;
      if (!response) {
        response = await this.giveResponseSingle(
          query,
-          chunk,
+          text,
          !!stream && lastChunk,
        );
      } else {
        response = await this.refineResponseSingle(
          response as string,
          query,
-          chunk,
+          text,
          !!stream && lastChunk,
        );
      }
    }

-    return response ?? "Empty Response";
+    // fixme: no source nodes provided, cannot fix right now due to lack of context
+    if (typeof response === "string") {
+      return EngineResponse.fromResponse(response, false);
+    } else {
+      return streamConverter(response!, (text) =>
+        EngineResponse.fromResponse(text, true),
+      );
+    }
  }

  private async giveResponseSingle(
-    query: QueryType,
+    query: MessageContent,
    textChunk: string,
    stream: boolean,
  ): Promise<AsyncIterable<string> | string> {
@@ -203,10 +153,10 @@ export class Refine extends PromptMixin implements ResponseBuilder {
  // eslint-disable-next-line max-params
  private async refineResponseSingle(
    initialReponse: string,
-    query: QueryType,
+    query: MessageContent,
    textChunk: string,
    stream: boolean,
-  ) {
+  ): Promise<AsyncIterable<string> | string> {
    const refineTemplate: RefinePrompt = this.refineTemplate.partialFormat({
      query: extractText(query),
    });
@@ -246,59 +196,54 @@ export class Refine extends PromptMixin implements ResponseBuilder {
 /**
 * CompactAndRefine is a slight variation of Refine that first compacts the text chunks into the smallest possible number of chunks.
 */
-export class CompactAndRefine extends Refine {
-  getResponse(
-    query: ResponseBuilderQuery,
-    stream: true,
-  ): Promise<AsyncIterable<string>>;
-  getResponse(query: ResponseBuilderQuery, stream?: false): Promise<string>;
+class CompactAndRefine extends Refine {
  async getResponse(
-    { query, textChunks, prevResponse }: ResponseBuilderQuery,
-    stream?: boolean,
-  ): Promise<AsyncIterable<string> | string> {
+    query: MessageContent,
+    nodes: NodeWithScore[],
+    stream: boolean,
+  ): Promise<EngineResponse | AsyncIterable<EngineResponse>> {
    const textQATemplate: TextQAPrompt = this.textQATemplate.partialFormat({
      query: extractText(query),
    });
    const refineTemplate: RefinePrompt = this.refineTemplate.partialFormat({
      query: extractText(query),
    });
+    const textChunks = nodes.map(({ node }) =>
+      node.getContent(MetadataMode.LLM),
+    );

    const maxPrompt = getBiggestPrompt([textQATemplate, refineTemplate]);
    const newTexts = this.promptHelper.repack(maxPrompt, textChunks);
-    const params = {
-      query,
-      textChunks: newTexts,
-      prevResponse,
-    };
+    const newNodes = newTexts.map((text) => new TextNode({ text }));
    if (stream) {
      return super.getResponse(
-        {
-          ...params,
-        },
+        query,
+        newNodes.map((node) => ({ node })),
        true,
      );
    }
-    return super.getResponse(params);
+    return super.getResponse(
+      query,
+      newNodes.map((node) => ({ node })),
+      false,
+    );
  }
 }

 /**
 * TreeSummarize repacks the text chunks into the smallest possible number of chunks and then summarizes them, then recursively does so until there's one chunk left.
 */
-export class TreeSummarize extends PromptMixin implements ResponseBuilder {
-  llm: LLM;
-  promptHelper: PromptHelper;
+class TreeSummarize extends BaseSynthesizer {
  summaryTemplate: TreeSummarizePrompt;

  constructor(
-    serviceContext?: ServiceContext,
-    summaryTemplate?: TreeSummarizePrompt,
+    options: BaseSynthesizerOptions & {
+      summaryTemplate?: TreeSummarizePrompt;
+    },
  ) {
-    super();
-
-    this.llm = llmFromSettingsOrContext(serviceContext);
-    this.promptHelper = promptHelperFromSettingsOrContext(serviceContext);
-    this.summaryTemplate = summaryTemplate ?? defaultTreeSummarizePrompt;
+    super(options);
+    this.summaryTemplate =
+      options.summaryTemplate ?? defaultTreeSummarizePrompt;
  }

  protected _getPromptModules(): ModuleRecord {
@@ -319,15 +264,14 @@ export class TreeSummarize extends PromptMixin implements ResponseBuilder {
    }
  }

-  getResponse(
-    query: ResponseBuilderQuery,
-    stream: true,
-  ): Promise<AsyncIterable<string>>;
-  getResponse(query: ResponseBuilderQuery, stream?: false): Promise<string>;
  async getResponse(
-    { query, textChunks }: ResponseBuilderQuery,
-    stream?: boolean,
-  ): Promise<AsyncIterable<string> | string> {
+    query: MessageContent,
+    nodes: NodeWithScore[],
+    stream: boolean,
+  ): Promise<EngineResponse | AsyncIterable<EngineResponse>> {
+    const textChunks = nodes.map(({ node }) =>
+      node.getContent(MetadataMode.LLM),
+    );
    if (!textChunks || textChunks.length === 0) {
      throw new Error("Must have at least one text chunk");
    }
@@ -347,9 +291,14 @@ export class TreeSummarize extends PromptMixin implements ResponseBuilder {
      };
      if (stream) {
        const response = await this.llm.complete({ ...params, stream });
-        return streamConverter(response, (chunk) => chunk.text);
+        return streamConverter(response, (chunk) =>
+          EngineResponse.fromResponse(chunk.text, true),
+        );
      }
-      return (await this.llm.complete(params)).text;
+      return EngineResponse.fromResponse(
+        (await this.llm.complete(params)).text,
+        false,
+      );
    } else {
      const summaries = await Promise.all(
        packedTextChunks.map((chunk) =>
@@ -362,40 +311,118 @@ export class TreeSummarize extends PromptMixin implements ResponseBuilder {
        ),
      );

-      const params = {
-        query,
-        textChunks: summaries.map((s) => s.text),
-      };
      if (stream) {
        return this.getResponse(
-          {
-            ...params,
-          },
+          query,
+          summaries.map((s) => ({
+            node: new TextNode({
+              text: s.text,
+            }),
+          })),
          true,
        );
      }
-      return this.getResponse(params);
+      return this.getResponse(
+        query,
+        summaries.map((s) => ({
+          node: new TextNode({
+            text: s.text,
+          }),
+        })),
+        false,
+      );
    }
  }
 }

-export function getResponseBuilder(
-  serviceContext?: ServiceContext,
-  responseMode?: ResponseMode,
-): ResponseBuilder {
-  switch (responseMode) {
-    case ResponseMode.SIMPLE:
-      return new SimpleResponseBuilder(serviceContext);
-    case ResponseMode.REFINE:
-      return new Refine(serviceContext);
-    case ResponseMode.TREE_SUMMARIZE:
-      return new TreeSummarize(serviceContext);
-    default:
-      return new CompactAndRefine(serviceContext);
+class MultiModal extends BaseSynthesizer {
+  metadataMode: MetadataMode;
+  textQATemplate: TextQAPrompt;
+
+  constructor({
+    textQATemplate,
+    metadataMode,
+    ...options
+  }: BaseSynthesizerOptions & {
+    textQATemplate?: TextQAPrompt;
+    metadataMode?: MetadataMode;
+  } = {}) {
+    super(options);
+
+    this.metadataMode = metadataMode ?? MetadataMode.NONE;
+    this.textQATemplate = textQATemplate ?? defaultTextQAPrompt;
+  }
+
+  protected _getPromptModules(): ModuleRecord {
+    return {};
+  }
+
+  protected _getPrompts(): { textQATemplate: TextQAPrompt } {
+    return {
+      textQATemplate: this.textQATemplate,
+    };
+  }
+
+  protected _updatePrompts(promptsDict: {
+    textQATemplate: TextQAPrompt;
+  }): void {
+    if (promptsDict.textQATemplate) {
+      this.textQATemplate = promptsDict.textQATemplate;
+    }
+  }
+
+  protected async getResponse(
+    query: MessageContent,
+    nodes: NodeWithScore[],
+    stream: boolean,
+  ): Promise<EngineResponse | AsyncIterable<EngineResponse>> {
+    const prompt = await createMessageContent(
+      this.textQATemplate,
+      nodes.map(({ node }) => node),
+      // this might not be good as this remove the image information
+      { query: extractText(query) },
+      this.metadataMode,
+    );
+
+    const llm = this.llm;
+
+    if (stream) {
+      const response = await llm.complete({
+        prompt,
+        stream,
+      });
+      return streamConverter(response, ({ text }) =>
+        EngineResponse.fromResponse(text, true),
+      );
+    }
+    const response = await llm.complete({
+      prompt,
+    });
+    return EngineResponse.fromResponse(response.text, false);
  }
 }

-export type ResponseBuilderPrompts =
-  | TextQAPrompt
-  | TreeSummarizePrompt
-  | RefinePrompt;
+export function getResponseSynthesizer(
+  mode: ResponseMode,
+  options: BaseSynthesizerOptions & {
+    textQATemplate?: TextQAPrompt;
+    refineTemplate?: RefinePrompt;
+    summaryTemplate?: TreeSummarizePrompt;
+    metadataMode?: MetadataMode;
+  } = {},
+) {
+  switch (mode) {
+    case "compact": {
+      return new CompactAndRefine(options);
+    }
+    case "refine": {
+      return new Refine(options);
+    }
+    case "tree_summarize": {
+      return new TreeSummarize(options);
+    }
+    case "multi_modal": {
+      return new MultiModal(options);
+    }
+  }
+}
@@ -0,0 +1,10 @@
+export {
+  BaseSynthesizer,
+  type BaseSynthesizerOptions,
+} from "./base-synthesizer";
+export { getResponseSynthesizer, type ResponseMode } from "./factory";
+export type {
+  SynthesizeEndEvent,
+  SynthesizeQuery,
+  SynthesizeStartEvent,
+} from "./type";
@@ -0,0 +1,19 @@
+import type { QueryType } from "../query-engine";
+import { EngineResponse, type NodeWithScore } from "../schema";
+
+export type SynthesizeQuery = {
+  query: QueryType;
+  nodes: NodeWithScore[];
+  additionalSourceNodes?: NodeWithScore[];
+};
+
+export type SynthesizeStartEvent = {
+  id: string;
+  query: SynthesizeQuery;
+};
+
+export type SynthesizeEndEvent = {
+  id: string;
+  query: SynthesizeQuery;
+  response: EngineResponse | AsyncIterable<EngineResponse>;
+};
@@ -0,0 +1,19 @@
+import type { ChatMessage } from "../../llms";
+
+export abstract class BaseChatStore<
+  AdditionalMessageOptions extends object = object,
+> {
+  abstract setMessages(
+    key: string,
+    messages: ChatMessage<AdditionalMessageOptions>[],
+  ): void;
+  abstract getMessages(key: string): ChatMessage<AdditionalMessageOptions>[];
+  abstract addMessage(
+    key: string,
+    message: ChatMessage<AdditionalMessageOptions>,
+    idx?: number,
+  ): void;
+  abstract deleteMessages(key: string): void;
+  abstract deleteMessage(key: string, idx: number): void;
+  abstract getKeys(): IterableIterator<string>;
+}
@@ -0,0 +1,2 @@
+export { BaseChatStore } from "./base-chat-store";
+export { SimpleChatStore } from "./simple-chat-store";
@@ -0,0 +1,43 @@
+import type { ChatMessage } from "../../llms";
+import { BaseChatStore } from "./base-chat-store";
+
+export class SimpleChatStore<
+  AdditionalMessageOptions extends object = object,
+> extends BaseChatStore<AdditionalMessageOptions> {
+  #store = new Map<string, ChatMessage<AdditionalMessageOptions>[]>();
+  setMessages(key: string, messages: ChatMessage<AdditionalMessageOptions>[]) {
+    this.#store.set(key, messages);
+  }
+
+  getMessages(key: string) {
+    return this.#store.get(key) ?? [];
+  }
+
+  addMessage(
+    key: string,
+    message: ChatMessage<AdditionalMessageOptions>,
+    idx?: number,
+  ) {
+    const messages = this.#store.get(key) ?? [];
+    if (idx === undefined) {
+      messages.push(message);
+    } else {
+      messages.splice(idx, 0, message);
+    }
+    this.#store.set(key, messages);
+  }
+
+  deleteMessages(key: string) {
+    this.#store.delete(key);
+  }
+
+  deleteMessage(key: string, idx: number) {
+    const messages = this.#store.get(key) ?? [];
+    messages.splice(idx, 1);
+    this.#store.set(key, messages);
+  }
+
+  getKeys() {
+    return this.#store.keys();
+  }
+}
@@ -1,4 +1,4 @@
-export { wrapEventCaller } from "./event-caller";
+export { EventCaller, getEventCaller, wrapEventCaller } from "./event-caller";

 export async function* streamConverter<S, D>(
  stream: AsyncIterable<S>,
@@ -47,10 +47,12 @@ export async function* streamReducer<S, D>(params: {
 export { wrapLLMEvent } from "./wrap-llm-event";

 export {
+  createMessageContent,
  extractDataUrlComponents,
  extractImage,
  extractSingleText,
  extractText,
+  imageToDataUrl,
  messagesToHistory,
  toToolDescriptions,
 } from "./llms";
@@ -1,3 +1,5 @@
+import { fs } from "@llamaindex/env";
+import { filetypemime } from "magic-bytes.js";
 import type {
  ChatMessage,
  MessageContent,
@@ -5,8 +7,16 @@ import type {
  MessageContentTextDetail,
  ToolMetadata,
 } from "../llms";
+import type { BasePromptTemplate } from "../prompts";
 import type { QueryType } from "../query-engine";
 import type { ImageType } from "../schema";
+import {
+  type BaseNode,
+  ImageNode,
+  MetadataMode,
+  ModalityType,
+  splitNodesByType,
+} from "../schema";

 /**
 * Extracts just the text whether from
@@ -107,3 +117,99 @@ export function toToolDescriptions(tools: ToolMetadata[]): string {

  return JSON.stringify(toolsObj, null, 4);
 }
+
+async function blobToDataUrl(input: Blob) {
+  const buffer = Buffer.from(await input.arrayBuffer());
+  const mimes = filetypemime(buffer);
+  if (mimes.length < 1) {
+    throw new Error("Unsupported image type");
+  }
+  return "data:" + mimes[0] + ";base64," + buffer.toString("base64");
+}
+
+export async function imageToDataUrl(
+  input: ImageType | Uint8Array,
+): Promise<string> {
+  // first ensure, that the input is a Blob
+  if (
+    (input instanceof URL && input.protocol === "file:") ||
+    typeof input === "string"
+  ) {
+    // string or file URL
+    const dataBuffer = await fs.readFile(
+      input instanceof URL ? input.pathname : input,
+    );
+    input = new Blob([dataBuffer]);
+  } else if (!(input instanceof Blob)) {
+    if (input instanceof URL) {
+      throw new Error(`Unsupported URL with protocol: ${input.protocol}`);
+    } else if (input instanceof Uint8Array) {
+      input = new Blob([input]); // convert Uint8Array to Blob
+    } else {
+      throw new Error(`Unsupported input type: ${typeof input}`);
+    }
+  }
+  return await blobToDataUrl(input);
+}
+
+// eslint-disable-next-line max-params
+async function createContentPerModality(
+  prompt: BasePromptTemplate,
+  type: ModalityType,
+  nodes: BaseNode[],
+  extraParams: Record<string, string>,
+  metadataMode: MetadataMode,
+): Promise<MessageContentDetail[]> {
+  switch (type) {
+    case ModalityType.TEXT:
+      return [
+        {
+          type: "text",
+          text: prompt.format({
+            ...extraParams,
+            context: nodes.map((r) => r.getContent(metadataMode)).join("\n\n"),
+          }),
+        },
+      ];
+    case ModalityType.IMAGE:
+      const images: MessageContentDetail[] = await Promise.all(
+        (nodes as ImageNode[]).map(async (node) => {
+          return {
+            type: "image_url",
+            image_url: {
+              url: await imageToDataUrl(node.image),
+            },
+          } satisfies MessageContentDetail;
+        }),
+      );
+      return images;
+    default:
+      return [];
+  }
+}
+
+export async function createMessageContent(
+  prompt: BasePromptTemplate,
+  nodes: BaseNode[],
+  extraParams: Record<string, string> = {},
+  metadataMode: MetadataMode = MetadataMode.NONE,
+): Promise<MessageContentDetail[]> {
+  const content: MessageContentDetail[] = [];
+  const nodeMap = splitNodesByType(nodes);
+  for (const type in nodeMap) {
+    // for each retrieved modality type, create message content
+    const nodes = nodeMap[type as ModalityType];
+    if (nodes) {
+      content.push(
+        ...(await createContentPerModality(
+          prompt,
+          type as ModalityType,
+          nodes,
+          extraParams,
+          metadataMode,
+        )),
+      );
+    }
+  }
+  return content;
+}
@@ -0,0 +1,74 @@
+import { Settings } from "@llamaindex/core/global";
+import type { ChatMessage } from "@llamaindex/core/llms";
+import { ChatMemoryBuffer } from "@llamaindex/core/memory";
+import { beforeEach, describe, expect, test } from "vitest";
+
+describe("ChatMemoryBuffer", () => {
+  beforeEach(() => {
+    // Mock the Settings.llm
+    (Settings.llm as any) = {
+      metadata: {
+        contextWindow: 1000,
+      },
+    };
+  });
+
+  test("constructor initializes with custom token limit", () => {
+    const buffer = new ChatMemoryBuffer({ tokenLimit: 500 });
+    expect(buffer.tokenLimit).toBe(500);
+  });
+
+  test("getMessages returns all messages when under token limit", () => {
+    const messages: ChatMessage[] = [
+      { role: "user", content: "Hello" },
+      { role: "assistant", content: "Hi there!" },
+      { role: "user", content: "How are you?" },
+    ];
+    const buffer = new ChatMemoryBuffer({
+      tokenLimit: 1000,
+      chatHistory: messages,
+    });
+
+    const result = buffer.getMessages();
+    expect(result).toEqual(messages);
+  });
+
+  test("getMessages truncates messages when over token limit", () => {
+    const messages: ChatMessage[] = [
+      { role: "user", content: "This is a long message" },
+      { role: "assistant", content: "This is also a long reply" },
+      { role: "user", content: "Short" },
+    ];
+    const buffer = new ChatMemoryBuffer({
+      tokenLimit: 5, // limit to only allow the last message
+      chatHistory: messages,
+    });
+
+    const result = buffer.getMessages();
+    expect(result).toEqual([{ role: "user", content: "Short" }]);
+  });
+
+  test("getMessages handles input messages", () => {
+    const storedMessages: ChatMessage[] = [
+      { role: "user", content: "Hello" },
+      { role: "assistant", content: "Hi there!" },
+    ];
+    const buffer = new ChatMemoryBuffer({
+      tokenLimit: 50,
+      chatHistory: storedMessages,
+    });
+
+    const inputMessages: ChatMessage[] = [
+      { role: "user", content: "New message" },
+    ];
+    const result = buffer.getMessages(inputMessages);
+    expect(result).toEqual([...inputMessages, ...storedMessages]);
+  });
+
+  test("getMessages throws error when initial token count exceeds limit", () => {
+    const buffer = new ChatMemoryBuffer({ tokenLimit: 10 });
+    expect(() => buffer.getMessages(undefined, 20)).toThrow(
+      "Initial token count exceeds token limit",
+    );
+  });
+});
@@ -1,5 +1,13 @@
 # @llamaindex/env

+## 0.1.12
+
+### Patch Changes
+
+- b48bcc3: feat: add `load-transformers` event type when loading `@xenova/transformers` module
+
+  This would benefit user who want to customize the transformer env.
+
 ## 0.1.11

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/env",
  "description": "environment wrapper, supports all JS environment including node, deno, bun, edge runtime, and cloudflare worker",
-  "version": "0.1.11",
+  "version": "0.1.12",
  "type": "module",
  "types": "dist/type/index.d.ts",
  "main": "dist/cjs/index.js",
@@ -74,16 +74,18 @@
    "@aws-crypto/sha256-js": "^5.2.0",
    "@swc/cli": "^0.4.0",
    "@swc/core": "^1.7.22",
+    "@xenova/transformers": "^2.17.2",
    "concurrently": "^8.2.2",
    "pathe": "^1.1.2",
+    "tiktoken": "^1.0.16",
    "vitest": "^2.0.5"
  },
  "dependencies": {
-    "@types/lodash": "^4.17.7",
    "@types/node": "^22.5.1"
  },
  "peerDependencies": {
    "@aws-crypto/sha256-js": "^5.2.0",
+    "@xenova/transformers": "^2.17.2",
    "js-tiktoken": "^1.0.12",
    "pathe": "^1.1.2",
    "tiktoken": "^1.0.15"
@@ -92,8 +94,17 @@
    "@aws-crypto/sha256-js": {
      "optional": true
    },
+    "@xenova/transformers": {
+      "optional": true
+    },
    "pathe": {
      "optional": true
+    },
+    "tiktoken": {
+      "optional": true
+    },
+    "js-tiktoken": {
+      "optional": true
    }
  }
 }
@@ -6,6 +6,12 @@
 import "./global-check.js";
 export * from "./web-polyfill.js";

+export {
+  loadTransformers,
+  setTransformers,
+  type LoadTransformerEvent,
+  type OnLoad,
+} from "./multi-model/index.browser.js";
 export { Tokenizers, tokenizers, type Tokenizer } from "./tokenizers/js.js";

 // @ts-expect-error
@@ -6,4 +6,10 @@
 import "./global-check.js";
 export * from "./node-polyfill.js";

+export {
+  loadTransformers,
+  setTransformers,
+  type LoadTransformerEvent,
+  type OnLoad,
+} from "./multi-model/index.non-nodejs.js";
 export { Tokenizers, tokenizers, type Tokenizer } from "./tokenizers/js.js";
@@ -33,6 +33,12 @@ export function createSHA256(): SHA256 {
  };
 }

+export {
+  loadTransformers,
+  setTransformers,
+  type LoadTransformerEvent,
+  type OnLoad,
+} from "./multi-model/index.js";
 export { Tokenizers, tokenizers, type Tokenizer } from "./tokenizers/node.js";
 export {
  AsyncLocalStorage,
@@ -13,4 +13,10 @@ export function getEnv(name: string): string | undefined {
  return INTERNAL_ENV[name];
 }

+export {
+  loadTransformers,
+  setTransformers,
+  type LoadTransformerEvent,
+  type OnLoad,
+} from "./multi-model/index.non-nodejs.js";
 export { Tokenizers, tokenizers, type Tokenizer } from "./tokenizers/js.js";
@@ -0,0 +1,20 @@
+import { getTransformers, setTransformers, type OnLoad } from "./shared.js";
+
+export {
+  setTransformers,
+  type LoadTransformerEvent,
+  type OnLoad,
+} from "./shared.js";
+export async function loadTransformers(onLoad: OnLoad) {
+  if (getTransformers() === null) {
+    setTransformers(
+      // @ts-expect-error
+      await import("https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2"),
+    );
+  } else {
+    return getTransformers()!;
+  }
+  const transformer = getTransformers()!;
+  onLoad(transformer);
+  return transformer;
+}
@@ -0,0 +1,35 @@
+import { getTransformers, setTransformers, type OnLoad } from "./shared.js";
+export {
+  setTransformers,
+  type LoadTransformerEvent,
+  type OnLoad,
+} from "./shared.js";
+
+export async function loadTransformers(onLoad: OnLoad) {
+  if (getTransformers() === null) {
+    /**
+     * If you see this warning, it means that the current environment does not support the transformer.
+     *  because "@xeonva/transformers" highly depends on Node.js APIs.
+     *
+     * One possible solution is to fix their implementation to make it work in the non-Node.js environment,
+     *  but it's not worth the effort because Edge Runtime and Cloudflare Workers are not the for heavy Machine Learning task.
+     *
+     * Or you can provide an RPC server that runs the transformer in a Node.js environment.
+     * Or you just run the code in a Node.js environment.
+     *
+     * Refs: https://github.com/xenova/transformers.js/issues/309
+     */
+    console.warn(
+      '"@xenova/transformers" is not officially supported in this environment, some features may not work as expected.',
+    );
+    setTransformers(
+      // @ts-expect-error
+      await import("@xenova/transformers/dist/transformers"),
+    );
+  } else {
+    return getTransformers()!;
+  }
+  const transformer = getTransformers()!;
+  onLoad(transformer);
+  return transformer;
+}
@@ -0,0 +1,20 @@
+import { getTransformers, setTransformers, type OnLoad } from "./shared.js";
+
+export {
+  setTransformers,
+  type LoadTransformerEvent,
+  type OnLoad,
+} from "./shared.js";
+
+export async function loadTransformers(onLoad: OnLoad) {
+  if (getTransformers() === null) {
+    setTransformers(await import("@xenova/transformers"));
+  } else {
+    return getTransformers()!;
+  }
+  const transformer = getTransformers()!;
+
+  onLoad(transformer);
+
+  return transformer;
+}
@@ -0,0 +1,17 @@
+let transformer: typeof import("@xenova/transformers") | null = null;
+
+export function getTransformers() {
+  return transformer;
+}
+
+export function setTransformers(t: typeof import("@xenova/transformers")) {
+  transformer = t;
+}
+
+export type OnLoad = (
+  transformer: typeof import("@xenova/transformers"),
+) => void;
+
+export type LoadTransformerEvent = {
+  transformer: typeof import("@xenova/transformers");
+};
@@ -1,5 +1,58 @@
 # @llamaindex/experimental

+## 0.0.85
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+
+## 0.0.84
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+
+## 0.0.83
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+
+## 0.0.82
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+
+## 0.0.81
+
+### Patch Changes
+
+- 2cd1383: refactor: align `response-synthesizers` & `chat-engine` module
+
+  - builtin event system
+  - correct class extends
+  - aligin APIs, naming with llama-index python
+  - move stream out of first parameter to second parameter for the better tyep checking
+  - remove JSONQueryEngine in `@llamaindex/experimental`, as the code quality is not satisify and we will bring it back later
+
+- Updated dependencies [2cd1383]
+- Updated dependencies [5c4badb]
+  - llamaindex@0.6.3
+
+## 0.0.80
+
+### Patch Changes
+
+- Updated dependencies [749b43a]
+  - llamaindex@0.6.2
+
 ## 0.0.79

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/experimental",
  "description": "Experimental package for LlamaIndexTS",
-  "version": "0.0.79",
+  "version": "0.0.85",
  "type": "module",
  "types": "dist/type/index.d.ts",
  "main": "dist/cjs/index.js",
@@ -1,211 +0,0 @@
-import jsonpath from "jsonpath";
-
-import { EngineResponse } from "llamaindex";
-
-import { serviceContextFromDefaults, type ServiceContext } from "llamaindex";
-
-import type {
-  QueryEngine,
-  QueryEngineParamsNonStreaming,
-  QueryEngineParamsStreaming,
-} from "llamaindex";
-
-import {
-  defaultJsonPathPrompt,
-  defaultResponseSynthesizePrompt,
-  type JSONPathPrompt,
-  type ResponseSynthesisPrompt,
-} from "./prompt.js";
-
-export type JSONSchemaType = Record<string, unknown>;
-
-function removeExtraQuotes(expr: string) {
-  let startIndex = 0;
-  let endIndex = expr.length;
-
-  // Trim the leading backticks and single quotes
-  while (
-    startIndex < endIndex &&
-    (expr[startIndex] === "`" || expr[startIndex] === "'")
-  ) {
-    startIndex++;
-  }
-
-  // Trim the trailing backticks and single quotes
-  while (
-    endIndex > startIndex &&
-    (expr[endIndex - 1] === "`" || expr[endIndex - 1] === "'")
-  ) {
-    endIndex--;
-  }
-
-  // Return the trimmed substring
-  return expr.substring(startIndex, endIndex);
-}
-
-export const defaultOutputProcessor = async ({
-  llmOutput,
-  jsonValue,
-}: {
-  llmOutput: string;
-  jsonValue: JSONSchemaType;
-}): Promise<Record<string, unknown>[]> => {
-  const expressions = llmOutput
-    .split(",")
-    .map((expr) => removeExtraQuotes(expr.trim()));
-
-  const results: Record<string, unknown>[] = [];
-
-  for (const expression of expressions) {
-    // get the key for example content from $.content
-    const key = expression.split(".").pop();
-
-    try {
-      const datums = jsonpath.query(jsonValue, expression);
-
-      if (!key) throw new Error(`Invalid JSON Path: ${expression}`);
-
-      for (const datum of datums) {
-        // in case there is a filter like [?(@.username=='simon')] without a key ie: $..comments[?(@.username=='simon').content]
-        if (key.includes("==")) {
-          results.push(datum);
-          continue;
-        }
-
-        results.push({
-          [key]: datum,
-        });
-      }
-    } catch (err) {
-      throw new Error(`Invalid JSON Path: ${expression}`);
-    }
-  }
-
-  return results;
-};
-
-type OutputProcessor = typeof defaultOutputProcessor;
-
-/**
- * A JSON query engine that uses JSONPath to query a JSON object.
- */
-export class JSONQueryEngine implements QueryEngine {
-  jsonValue: JSONSchemaType;
-  jsonSchema: JSONSchemaType;
-  serviceContext: ServiceContext;
-  outputProcessor: OutputProcessor;
-  verbose: boolean;
-  jsonPathPrompt: JSONPathPrompt;
-  synthesizeResponse: boolean;
-  responseSynthesisPrompt: ResponseSynthesisPrompt;
-
-  constructor(init: {
-    jsonValue: JSONSchemaType;
-    jsonSchema: JSONSchemaType;
-    serviceContext?: ServiceContext;
-    jsonPathPrompt?: JSONPathPrompt;
-    outputProcessor?: OutputProcessor;
-    synthesizeResponse?: boolean;
-    responseSynthesisPrompt?: ResponseSynthesisPrompt;
-    verbose?: boolean;
-  }) {
-    this.jsonValue = init.jsonValue;
-    this.jsonSchema = init.jsonSchema;
-    this.serviceContext = init.serviceContext ?? serviceContextFromDefaults({});
-    this.jsonPathPrompt = init.jsonPathPrompt ?? defaultJsonPathPrompt;
-    this.outputProcessor = init.outputProcessor ?? defaultOutputProcessor;
-    this.verbose = init.verbose ?? false;
-    this.synthesizeResponse = init.synthesizeResponse ?? true;
-    this.responseSynthesisPrompt =
-      init.responseSynthesisPrompt ?? defaultResponseSynthesizePrompt;
-  }
-
-  getPrompts(): Record<string, unknown> {
-    return {
-      jsonPathPrompt: this.jsonPathPrompt,
-      responseSynthesisPrompt: this.responseSynthesisPrompt,
-    };
-  }
-
-  updatePrompts(prompts: {
-    jsonPathPrompt?: JSONPathPrompt;
-    responseSynthesisPrompt?: ResponseSynthesisPrompt;
-  }): void {
-    if (prompts.jsonPathPrompt) {
-      this.jsonPathPrompt = prompts.jsonPathPrompt;
-    }
-    if (prompts.responseSynthesisPrompt) {
-      this.responseSynthesisPrompt = prompts.responseSynthesisPrompt;
-    }
-  }
-
-  getPromptModules(): Record<string, unknown> {
-    return {};
-  }
-
-  getSchemaContext(): string {
-    return JSON.stringify(this.jsonSchema);
-  }
-
-  query(
-    params: QueryEngineParamsStreaming,
-  ): Promise<AsyncIterable<EngineResponse>>;
-  query(params: QueryEngineParamsNonStreaming): Promise<EngineResponse>;
-  async query(
-    params: QueryEngineParamsStreaming | QueryEngineParamsNonStreaming,
-  ): Promise<EngineResponse | AsyncIterable<EngineResponse>> {
-    const { query, stream } = params;
-
-    if (stream) {
-      throw new Error("Streaming is not supported");
-    }
-
-    const schema = this.getSchemaContext();
-
-    const { text: jsonPathResponse } = await this.serviceContext.llm.complete({
-      prompt: this.jsonPathPrompt({ query, schema }),
-    });
-
-    if (this.verbose) {
-      console.log(
-        `> JSONPath Instructions:\n\`\`\`\n${jsonPathResponse}\n\`\`\`\n`,
-      );
-    }
-
-    const jsonPathOutput = await this.outputProcessor({
-      llmOutput: jsonPathResponse,
-      jsonValue: this.jsonValue,
-    });
-
-    if (this.verbose) {
-      console.log(`> JSONPath Output: ${jsonPathOutput}\n`);
-    }
-
-    let responseStr;
-
-    if (this.synthesizeResponse) {
-      responseStr = await this.serviceContext.llm.complete({
-        prompt: this.responseSynthesisPrompt({
-          query,
-          jsonSchema: schema,
-          jsonPath: jsonPathResponse,
-          jsonPathValue: JSON.stringify(jsonPathOutput),
-        }),
-      });
-
-      responseStr = responseStr.text;
-    } else {
-      responseStr = JSON.stringify(jsonPathOutput);
-    }
-
-    const responseMetadata = {
-      jsonPathResponse,
-    };
-
-    const response = EngineResponse.fromResponse(responseStr, false);
-
-    response.metadata = responseMetadata;
-
-    return response;
-  }
-}
@@ -1 +0,0 @@
-export * from "./JSONQueryEngine.js";
@@ -1,36 +0,0 @@
-export const defaultJsonPathPrompt = ({
-  query,
-  schema,
-}: {
-  query: string;
-  schema: string;
-}) => `
-We have provided a JSON schema below:
-${schema}
-Given a task, respond with a JSON Path query that can retrieve data from a JSON value that matches the schema.
-Task: ${query}
-JSONPath: 
-`;
-
-export type JSONPathPrompt = typeof defaultJsonPathPrompt;
-
-export const defaultResponseSynthesizePrompt = ({
-  query,
-  jsonSchema,
-  jsonPath,
-  jsonPathValue,
-}: {
-  query: string;
-  jsonSchema: string;
-  jsonPath: string;
-  jsonPathValue: string;
-}) => `
-Given a query, synthesize a response to satisfy the query using the JSON results. Only include details that are relevant to the query. If you don't know the answer, then say that.
-JSON Schema: ${jsonSchema}
-JSON Path: ${jsonPath}
-Value at path: ${jsonPathValue}
-Query: ${query}
-Response: 
-`;
-
-export type ResponseSynthesisPrompt = typeof defaultResponseSynthesizePrompt;
@@ -1 +0,0 @@
-export * from "./engines/query/index.js";
@@ -1,5 +1,82 @@
 # llamaindex

+## 0.6.7
+
+### Patch Changes
+
+- 23bcc37: fix: add `serializer` in doc store
+
+  `PostgresDocumentStore` now will not use JSON.stringify for better performance
+
+## 0.6.6
+
+### Patch Changes
+
+- d902cc3: Fix context not being sent using ContextChatEngine
+- 025ffe6: fix: update `PostgresKVStore` constructor params
+- a659574: Adds upstash vector store as a storage
+- Updated dependencies [d902cc3]
+  - @llamaindex/core@0.2.5
+  - @llamaindex/openai@0.1.7
+  - @llamaindex/groq@0.0.6
+
+## 0.6.5
+
+### Patch Changes
+
+- e9714db: feat: update `PGVectorStore`
+
+  - move constructor parameter `config.user` | `config.database` | `config.password` | `config.connectionString` into `config.clientConfig`
+  - if you pass `pg.Client` or `pg.Pool` instance to `PGVectorStore`, move it to `config.client`, setting `config.shouldConnect` to false if it's already connected
+  - default value of `PGVectorStore.collection` is now `"data"` instead of `""` (empty string)
+
+## 0.6.4
+
+### Patch Changes
+
+- b48bcc3: feat: add `load-transformers` event type when loading `@xenova/transformers` module
+
+  This would benefit user who want to customize the transformer env.
+
+- Updated dependencies [b48bcc3]
+  - @llamaindex/core@0.2.4
+  - @llamaindex/env@0.1.12
+  - @llamaindex/openai@0.1.6
+  - @llamaindex/groq@0.0.5
+
+## 0.6.3
+
+### Patch Changes
+
+- 2cd1383: refactor: align `response-synthesizers` & `chat-engine` module
+
+  - builtin event system
+  - correct class extends
+  - aligin APIs, naming with llama-index python
+  - move stream out of first parameter to second parameter for the better tyep checking
+  - remove JSONQueryEngine in `@llamaindex/experimental`, as the code quality is not satisify and we will bring it back later
+
+- 5c4badb: Extend JinaAPIEmbedding parameters
+- Updated dependencies [fb36eff]
+- Updated dependencies [d24d3d1]
+- Updated dependencies [2cd1383]
+  - @llamaindex/cloud@0.2.7
+  - @llamaindex/core@0.2.3
+  - @llamaindex/openai@0.1.5
+  - @llamaindex/groq@0.0.4
+
+## 0.6.2
+
+### Patch Changes
+
+- 749b43a: fix: clip embedding transform function
+- Updated dependencies [b42adeb]
+- Updated dependencies [749b43a]
+  - @llamaindex/cloud@0.2.6
+  - @llamaindex/core@0.2.2
+  - @llamaindex/openai@0.1.4
+  - @llamaindex/groq@0.0.3
+
 ## 0.6.1

 ### Patch Changes
@@ -0,0 +1 @@
+POSTGRES_USER=runner
@@ -1,5 +1,50 @@
 # @llamaindex/cloudflare-worker-agent-test

+## 0.0.69
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+
+## 0.0.68
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+
+## 0.0.67
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+
+## 0.0.66
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+
+## 0.0.65
+
+### Patch Changes
+
+- Updated dependencies [2cd1383]
+- Updated dependencies [5c4badb]
+  - llamaindex@0.6.3
+
+## 0.0.64
+
+### Patch Changes
+
+- Updated dependencies [749b43a]
+  - llamaindex@0.6.2
+
 ## 0.0.63

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/cloudflare-worker-agent-test",
-  "version": "0.0.63",
+  "version": "0.0.69",
  "type": "module",
  "private": true,
  "scripts": {
@@ -100,7 +100,8 @@

    /* Completeness */
    // "skipDefaultLibCheck": true,                      /* Skip type checking .d.ts files that are included with TypeScript. */
-    "skipLibCheck": true /* Skip type checking all .d.ts files. */
+    "skipLibCheck": true /* Skip type checking all .d.ts files. */,
+    "tsBuildInfoFile": "./dist/.tsbuildinfo"
  },
  "exclude": ["test"]
 }
@@ -1,5 +1,20 @@
 # @llamaindex/llama-parse-browser-test

+## 0.0.3
+
+### Patch Changes
+
+- Updated dependencies [fb36eff]
+- Updated dependencies [d24d3d1]
+  - @llamaindex/cloud@0.2.7
+
+## 0.0.2
+
+### Patch Changes
+
+- Updated dependencies [b42adeb]
+  - @llamaindex/cloud@0.2.6
+
 ## 0.0.1

 ### Patch Changes
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/llama-parse-browser-test",
  "private": true,
-  "version": "0.0.1",
+  "version": "0.0.3",
  "type": "module",
  "scripts": {
    "dev": "vite",
@@ -1,5 +1,50 @@
 # @llamaindex/next-agent-test

+## 0.1.69
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+
+## 0.1.68
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+
+## 0.1.67
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+
+## 0.1.66
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+
+## 0.1.65
+
+### Patch Changes
+
+- Updated dependencies [2cd1383]
+- Updated dependencies [5c4badb]
+  - llamaindex@0.6.3
+
+## 0.1.64
+
+### Patch Changes
+
+- Updated dependencies [749b43a]
+  - llamaindex@0.6.2
+
 ## 0.1.63

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/next-agent-test",
-  "version": "0.1.63",
+  "version": "0.1.69",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,50 @@
 # test-edge-runtime

+## 0.1.68
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+
+## 0.1.67
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+
+## 0.1.66
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+
+## 0.1.65
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+
+## 0.1.64
+
+### Patch Changes
+
+- Updated dependencies [2cd1383]
+- Updated dependencies [5c4badb]
+  - llamaindex@0.6.3
+
+## 0.1.63
+
+### Patch Changes
+
+- Updated dependencies [749b43a]
+  - llamaindex@0.6.2
+
 ## 0.1.62

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/nextjs-edge-runtime-test",
-  "version": "0.1.62",
+  "version": "0.1.68",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,107 +0,0 @@
-:root {
-  --max-width: 1100px;
-  --border-radius: 12px;
-  --font-mono: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono",
-    "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro",
-    "Fira Mono", "Droid Sans Mono", "Courier New", monospace;
-
-  --foreground-rgb: 0, 0, 0;
-  --background-start-rgb: 214, 219, 220;
-  --background-end-rgb: 255, 255, 255;
-
-  --primary-glow: conic-gradient(
-    from 180deg at 50% 50%,
-    #16abff33 0deg,
-    #0885ff33 55deg,
-    #54d6ff33 120deg,
-    #0071ff33 160deg,
-    transparent 360deg
-  );
-  --secondary-glow: radial-gradient(
-    rgba(255, 255, 255, 1),
-    rgba(255, 255, 255, 0)
-  );
-
-  --tile-start-rgb: 239, 245, 249;
-  --tile-end-rgb: 228, 232, 233;
-  --tile-border: conic-gradient(
-    #00000080,
-    #00000040,
-    #00000030,
-    #00000020,
-    #00000010,
-    #00000010,
-    #00000080
-  );
-
-  --callout-rgb: 238, 240, 241;
-  --callout-border-rgb: 172, 175, 176;
-  --card-rgb: 180, 185, 188;
-  --card-border-rgb: 131, 134, 135;
-}
-
-@media (prefers-color-scheme: dark) {
-  :root {
-    --foreground-rgb: 255, 255, 255;
-    --background-start-rgb: 0, 0, 0;
-    --background-end-rgb: 0, 0, 0;
-
-    --primary-glow: radial-gradient(rgba(1, 65, 255, 0.4), rgba(1, 65, 255, 0));
-    --secondary-glow: linear-gradient(
-      to bottom right,
-      rgba(1, 65, 255, 0),
-      rgba(1, 65, 255, 0),
-      rgba(1, 65, 255, 0.3)
-    );
-
-    --tile-start-rgb: 2, 13, 46;
-    --tile-end-rgb: 2, 5, 19;
-    --tile-border: conic-gradient(
-      #ffffff80,
-      #ffffff40,
-      #ffffff30,
-      #ffffff20,
-      #ffffff10,
-      #ffffff10,
-      #ffffff80
-    );
-
-    --callout-rgb: 20, 20, 20;
-    --callout-border-rgb: 108, 108, 108;
-    --card-rgb: 100, 100, 100;
-    --card-border-rgb: 200, 200, 200;
-  }
-}
-
-* {
-  box-sizing: border-box;
-  padding: 0;
-  margin: 0;
-}
-
-html,
-body {
-  max-width: 100vw;
-  overflow-x: hidden;
-}
-
-body {
-  color: rgb(var(--foreground-rgb));
-  background: linear-gradient(
-      to bottom,
-      transparent,
-      rgb(var(--background-end-rgb))
-    )
-    rgb(var(--background-start-rgb));
-}
-
-a {
-  color: inherit;
-  text-decoration: none;
-}
-
-@media (prefers-color-scheme: dark) {
-  html {
-    color-scheme: dark;
-  }
-}
@@ -1,6 +1,6 @@
 // test runtime
 import "llamaindex";
-import { ClipEmbedding } from "llamaindex/embeddings/ClipEmbedding";
+import { ClipEmbedding } from "llamaindex";
 import "llamaindex/readers/SimpleDirectoryReader";

 // @ts-expect-error
@@ -1,5 +1,50 @@
 # @llamaindex/next-node-runtime

+## 0.0.50
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+
+## 0.0.49
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+
+## 0.0.48
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+
+## 0.0.47
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+
+## 0.0.46
+
+### Patch Changes
+
+- Updated dependencies [2cd1383]
+- Updated dependencies [5c4badb]
+  - llamaindex@0.6.3
+
+## 0.0.45
+
+### Patch Changes
+
+- Updated dependencies [749b43a]
+  - llamaindex@0.6.2
+
 ## 0.0.44

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/next-node-runtime-test",
-  "version": "0.0.44",
+  "version": "0.0.50",
  "private": true,
  "scripts": {
    "dev": "next dev",
@@ -1,5 +1,50 @@
 # @llamaindex/waku-query-engine-test

+## 0.0.69
+
+### Patch Changes
+
+- Updated dependencies [23bcc37]
+  - llamaindex@0.6.7
+
+## 0.0.68
+
+### Patch Changes
+
+- Updated dependencies [d902cc3]
+- Updated dependencies [025ffe6]
+- Updated dependencies [a659574]
+  - llamaindex@0.6.6
+
+## 0.0.67
+
+### Patch Changes
+
+- Updated dependencies [e9714db]
+  - llamaindex@0.6.5
+
+## 0.0.66
+
+### Patch Changes
+
+- Updated dependencies [b48bcc3]
+  - llamaindex@0.6.4
+
+## 0.0.65
+
+### Patch Changes
+
+- Updated dependencies [2cd1383]
+- Updated dependencies [5c4badb]
+  - llamaindex@0.6.3
+
+## 0.0.64
+
+### Patch Changes
+
+- Updated dependencies [749b43a]
+  - llamaindex@0.6.2
+
 ## 0.0.63

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/waku-query-engine-test",
-  "version": "0.0.63",
+  "version": "0.0.69",
  "type": "module",
  "private": true,
  "scripts": {
@@ -1,7 +1,7 @@
 "use server";
-import { Document, VectorStoreIndex, type QueryEngine } from "llamaindex";
+import { BaseQueryEngine, Document, VectorStoreIndex } from "llamaindex";
 import { readFile } from "node:fs/promises";
-let _queryEngine: QueryEngine;
+let _queryEngine: BaseQueryEngine;

 async function lazyLoadQueryEngine() {
  if (!_queryEngine) {
@@ -0,0 +1,84 @@
+import type { LoadTransformerEvent } from "@llamaindex/env";
+import { setTransformers } from "@llamaindex/env";
+import { ClipEmbedding, ImageNode, Settings } from "llamaindex";
+import assert from "node:assert";
+import { type Mock, test } from "node:test";
+
+let callback: Mock<(event: any) => void>;
+test.before(() => {
+  callback = test.mock.fn((event: any) => {
+    const { transformer } = event.detail as LoadTransformerEvent;
+    assert.ok(transformer);
+    assert.ok(transformer.env);
+  });
+  Settings.callbackManager.on("load-transformers", callback);
+});
+
+test.beforeEach(() => {
+  callback.mock.resetCalls();
+});
+
+await test("clip embedding", async (t) => {
+  await t.test("should trigger load transformer event", async () => {
+    const nodes = [
+      new ImageNode({
+        image: new URL(
+          "../../fixtures/img/llamaindex-white.png",
+          import.meta.url,
+        ),
+      }),
+    ];
+    assert.equal(callback.mock.callCount(), 0);
+    const clipEmbedding = new ClipEmbedding();
+    assert.equal(callback.mock.callCount(), 0);
+    const result = await clipEmbedding(nodes);
+    assert.strictEqual(result.length, 1);
+    assert.equal(callback.mock.callCount(), 1);
+  });
+
+  await t.test("init & get image embedding", async () => {
+    const clipEmbedding = new ClipEmbedding();
+    const imgUrl = new URL(
+      "../../fixtures/img/llamaindex-white.png",
+      import.meta.url,
+    );
+    const vec = await clipEmbedding.getImageEmbedding(imgUrl);
+    assert.ok(vec);
+  });
+
+  await t.test("load image document", async () => {
+    const nodes = [
+      new ImageNode({
+        image: new URL(
+          "../../fixtures/img/llamaindex-white.png",
+          import.meta.url,
+        ),
+      }),
+    ];
+    const clipEmbedding = new ClipEmbedding();
+    const result = await clipEmbedding(nodes);
+    assert.strictEqual(result.length, 1);
+    assert.ok(result[0]!.embedding);
+  });
+
+  await t.test("custom transformer", async () => {
+    const transformers = await import("@xenova/transformers");
+    const getter = test.mock.fn((t, k, r) => {
+      return Reflect.get(t, k, r);
+    });
+    setTransformers(
+      new Proxy(transformers, {
+        get: getter,
+      }),
+    );
+    const clipEmbedding = new ClipEmbedding();
+    const imgUrl = new URL(
+      "../../fixtures/img/llamaindex-white.png",
+      import.meta.url,
+    );
+    assert.equal(getter.mock.callCount(), 0);
+    const vec = await clipEmbedding.getImageEmbedding(imgUrl);
+    assert.ok(vec);
+    assert.ok(getter.mock.callCount() > 0);
+  });
+});
@@ -1,3 +1,5 @@
+/* eslint-disable turbo/no-undeclared-env-vars */
+import { config } from "dotenv";
 import { Document, VectorStoreQueryMode } from "llamaindex";
 import { PGVectorStore } from "llamaindex/vector-store/PGVectorStore";
 import assert from "node:assert";
@@ -5,43 +7,56 @@ import { test } from "node:test";
 import pg from "pg";
 import { registerTypes } from "pgvector/pg";

-let pgClient: pg.Client | pg.Pool;
-test.afterEach(async () => {
-  await pgClient.end();
-});
+config({ path: [".env.local", ".env", ".env.ci"] });

-await test("init with client", async () => {
-  pgClient = new pg.Client({
-    database: "llamaindex_node_test",
-  });
+const pgConfig = {
+  user: process.env.POSTGRES_USER ?? "user",
+  password: process.env.POSTGRES_PASSWORD ?? "password",
+  database: "llamaindex_node_test",
+};
+
+await test("init with client", async (t) => {
+  const pgClient = new pg.Client(pgConfig);
  await pgClient.connect();
  await pgClient.query("CREATE EXTENSION IF NOT EXISTS vector");
  await registerTypes(pgClient);
-  const vectorStore = new PGVectorStore(pgClient);
+  t.after(async () => {
+    await pgClient.end();
+  });
+  const vectorStore = new PGVectorStore({
+    client: pgClient,
+    shouldConnect: false,
+  });
  assert.deepStrictEqual(await vectorStore.client(), pgClient);
 });

-await test("init with pool", async () => {
-  pgClient = new pg.Pool({
-    database: "llamaindex_node_test",
-  });
+await test("init with pool", async (t) => {
+  const pgClient = new pg.Pool(pgConfig);
  await pgClient.query("CREATE EXTENSION IF NOT EXISTS vector");
  const client = await pgClient.connect();
+  await client.query("CREATE EXTENSION IF NOT EXISTS vector");
  await registerTypes(client);
-  const vectorStore = new PGVectorStore(client);
+  t.after(async () => {
+    client.release();
+    await pgClient.end();
+  });
+  const vectorStore = new PGVectorStore({
+    shouldConnect: false,
+    client,
+  });
  assert.deepStrictEqual(await vectorStore.client(), client);
-  client.release();
 });

-await test("init without client", async () => {
-  const vectorStore = new PGVectorStore({
-    database: "llamaindex_node_test",
+await test("init without client", async (t) => {
+  const vectorStore = new PGVectorStore({ clientConfig: pgConfig });
+  const pgClient = (await vectorStore.client()) as pg.Client;
+  t.after(async () => {
+    await pgClient.end();
  });
-  pgClient = (await vectorStore.client()) as pg.Client;
  assert.notDeepStrictEqual(pgClient, undefined);
 });

-await test("simple node", async () => {
+await test("simple node", async (t) => {
  const dimensions = 3;
  const schemaName =
    "llamaindex_vector_store_test_" + Math.random().toString(36).substring(7);
@@ -52,10 +67,14 @@ await test("simple node", async () => {
    embedding: [0.1, 0.2, 0.3],
  });
  const vectorStore = new PGVectorStore({
-    database: "llamaindex_node_test",
+    clientConfig: pgConfig,
    dimensions,
    schemaName,
  });
+  const pgClient = (await vectorStore.client()) as pg.Client;
+  t.after(async () => {
+    await pgClient.end();
+  });

  await vectorStore.add([node]);

@@ -85,6 +104,4 @@ await test("simple node", async () => {
    });
    assert.deepStrictEqual(result.nodes, []);
  }
-
-  pgClient = (await vectorStore.client()) as pg.Client;
 });
@@ -4,14 +4,15 @@
  "version": "0.0.7",
  "type": "module",
  "scripts": {
-    "e2e": "node --import tsx --import ./mock-register.js --test ./node/*.e2e.ts",
-    "e2e:nomock": "node --import tsx --test ./node/*.e2e.ts",
-    "e2e:updatesnap": "UPDATE_SNAPSHOT=1 node --import tsx --test ./node/*.e2e.ts"
+    "e2e": "node --import tsx --import ./mock-register.js --test ./node/**/*.e2e.ts",
+    "e2e:nomock": "node --import tsx --test ./node/**/*.e2e.ts",
+    "e2e:updatesnap": "UPDATE_SNAPSHOT=1 node --import tsx --test ./node/**/*.e2e.ts"
  },
  "devDependencies": {
-    "@faker-js/faker": "^8.4.1",
+    "@faker-js/faker": "^9.0.1",
    "@types/node": "^22.5.1",
    "consola": "^3.2.3",
+    "dotenv": "^16.4.5",
    "llamaindex": "workspace:*",
    "tsx": "^4.19.0"
  }
@@ -1,6 +1,6 @@
 {
  "name": "llamaindex",
-  "version": "0.6.1",
+  "version": "0.6.7",
  "license": "MIT",
  "type": "module",
  "keywords": [
@@ -33,8 +33,8 @@
    "@llamaindex/cloud": "workspace:*",
    "@llamaindex/core": "workspace:*",
    "@llamaindex/env": "workspace:*",
-    "@llamaindex/openai": "workspace:*",
    "@llamaindex/groq": "workspace:*",
+    "@llamaindex/openai": "workspace:*",
    "@mistralai/mistralai": "^1.0.4",
    "@mixedbread-ai/sdk": "^2.2.11",
    "@pinecone-database/pinecone": "^3.0.2",
@@ -43,7 +43,7 @@
    "@types/node": "^22.5.1",
    "@types/papaparse": "^5.3.14",
    "@types/pg": "^8.11.8",
-    "@xenova/transformers": "^2.17.2",
+    "@upstash/vector": "^1.1.5",
    "@zilliz/milvus2-sdk-node": "^2.4.6",
    "ajv": "^8.17.1",
    "assemblyai": "^4.7.0",
@@ -91,6 +91,7 @@
    "@notionhq/client": "^2.2.15",
    "@swc/cli": "^0.4.0",
    "@swc/core": "^1.7.22",
+    "@xenova/transformers": "^2.17.2",
    "concurrently": "^8.2.2",
    "glob": "^11.0.0",
    "pg": "^8.12.0",
@@ -9,6 +9,8 @@ import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai";

 /**
 * The ServiceContext is a collection of components that are used in different parts of the application.
+ *
+ * @deprecated This will no longer supported, please use `Settings` instead.
 */
 export interface ServiceContext {
  llm: LLM;
@@ -12,6 +12,7 @@ import {
  type NodeParser,
  SentenceSplitter,
 } from "@llamaindex/core/node-parser";
+import type { LoadTransformerEvent } from "@llamaindex/env";
 import { AsyncLocalStorage, getEnv } from "@llamaindex/env";
 import type { ServiceContext } from "./ServiceContext.js";
 import {
@@ -20,6 +21,12 @@ import {
  withEmbeddedModel,
 } from "./internal/settings/EmbedModel.js";

+declare module "@llamaindex/core/global" {
+  interface LlamaIndexEventMaps {
+    "load-transformers": LoadTransformerEvent;
+  }
+}
+
 export type PromptConfig = {
  llm?: string;
  lang?: string;
@@ -5,10 +5,10 @@ import type {
  MessageContent,
  ToolOutput,
 } from "@llamaindex/core/llms";
+import { BaseMemory } from "@llamaindex/core/memory";
 import { EngineResponse } from "@llamaindex/core/schema";
 import { wrapEventCaller } from "@llamaindex/core/utils";
 import { randomUUID } from "@llamaindex/env";
-import { ChatHistory } from "../ChatHistory.js";
 import { Settings } from "../Settings.js";
 import {
  type ChatEngine,
@@ -353,11 +353,11 @@ export abstract class AgentRunner<
  async chat(
    params: ChatEngineParamsNonStreaming | ChatEngineParamsStreaming,
  ): Promise<EngineResponse | ReadableStream<EngineResponse>> {
-    let chatHistory: ChatMessage<AdditionalMessageOptions>[] | undefined = [];
+    let chatHistory: ChatMessage<AdditionalMessageOptions>[] = [];

-    if (params.chatHistory instanceof ChatHistory) {
-      chatHistory = params.chatHistory
-        .messages as ChatMessage<AdditionalMessageOptions>[];
+    if (params.chatHistory instanceof BaseMemory) {
+      chatHistory =
+        (await params.chatHistory.getMessages()) as ChatMessage<AdditionalMessageOptions>[];
    } else {
      chatHistory =
        params.chatHistory as ChatMessage<AdditionalMessageOptions>[];
@@ -1,9 +1,9 @@
+import type { BaseQueryEngine } from "@llamaindex/core/query-engine";
+import type { BaseSynthesizer } from "@llamaindex/core/response-synthesizers";
 import type { Document, TransformComponent } from "@llamaindex/core/schema";
 import type { BaseRetriever } from "../Retriever.js";
 import { RetrieverQueryEngine } from "../engines/query/RetrieverQueryEngine.js";
 import type { BaseNodePostprocessor } from "../postprocessors/types.js";
-import type { BaseSynthesizer } from "../synthesizers/types.js";
-import type { QueryEngine } from "../types.js";
 import type { CloudRetrieveParams } from "./LlamaCloudRetriever.js";
 import { LlamaCloudRetriever } from "./LlamaCloudRetriever.js";
 import { getPipelineCreate } from "./config.js";
@@ -300,7 +300,7 @@ export class LlamaCloudIndex {
      preFilters?: unknown;
      nodePostprocessors?: BaseNodePostprocessor[];
    } & CloudRetrieveParams,
-  ): QueryEngine {
+  ): BaseQueryEngine {
    const retriever = new LlamaCloudRetriever({
      ...this.params,
      ...params,
@@ -1,17 +1,26 @@
+import { MultiModalEmbedding } from "@llamaindex/core/embeddings";
 import type { ImageType } from "@llamaindex/core/schema";
 import _ from "lodash";
-import { lazyLoadTransformers } from "../internal/deps/transformers.js";
-import { MultiModalEmbedding } from "./MultiModalEmbedding.js";
 // only import type, to avoid bundling error
+import { loadTransformers } from "@llamaindex/env";
 import type {
  CLIPTextModelWithProjection,
  CLIPVisionModelWithProjection,
  PreTrainedTokenizer,
  Processor,
 } from "@xenova/transformers";
+import { Settings } from "../Settings.js";

 async function readImage(input: ImageType) {
-  const { RawImage } = await lazyLoadTransformers();
+  const { RawImage } = await loadTransformers((transformer) => {
+    Settings.callbackManager.dispatchEvent(
+      "load-transformers",
+      {
+        transformer,
+      },
+      true,
+    );
+  });
  if (input instanceof Blob) {
    return await RawImage.fromBlob(input);
  } else if (_.isString(input) || input instanceof URL) {
@@ -35,8 +44,20 @@ export class ClipEmbedding extends MultiModalEmbedding {
  private visionModel: CLIPVisionModelWithProjection | null = null;
  private textModel: CLIPTextModelWithProjection | null = null;

+  constructor() {
+    super();
+  }
+
  async getTokenizer() {
-    const { AutoTokenizer } = await lazyLoadTransformers();
+    const { AutoTokenizer } = await loadTransformers((transformer) => {
+      Settings.callbackManager.dispatchEvent(
+        "load-transformers",
+        {
+          transformer,
+        },
+        true,
+      );
+    });
    if (!this.tokenizer) {
      this.tokenizer = await AutoTokenizer.from_pretrained(this.modelType);
    }
@@ -44,7 +65,15 @@ export class ClipEmbedding extends MultiModalEmbedding {
  }

  async getProcessor() {
-    const { AutoProcessor } = await lazyLoadTransformers();
+    const { AutoProcessor } = await loadTransformers((transformer) => {
+      Settings.callbackManager.dispatchEvent(
+        "load-transformers",
+        {
+          transformer,
+        },
+        true,
+      );
+    });
    if (!this.processor) {
      this.processor = await AutoProcessor.from_pretrained(this.modelType);
    }
@@ -52,7 +81,17 @@ export class ClipEmbedding extends MultiModalEmbedding {
  }

  async getVisionModel() {
-    const { CLIPVisionModelWithProjection } = await lazyLoadTransformers();
+    const { CLIPVisionModelWithProjection } = await loadTransformers(
+      (transformer) => {
+        Settings.callbackManager.dispatchEvent(
+          "load-transformers",
+          {
+            transformer,
+          },
+          true,
+        );
+      },
+    );
    if (!this.visionModel) {
      this.visionModel = await CLIPVisionModelWithProjection.from_pretrained(
        this.modelType,
@@ -63,7 +102,17 @@ export class ClipEmbedding extends MultiModalEmbedding {
  }

  async getTextModel() {
-    const { CLIPTextModelWithProjection } = await lazyLoadTransformers();
+    const { CLIPTextModelWithProjection } = await loadTransformers(
+      (transformer) => {
+        Settings.callbackManager.dispatchEvent(
+          "load-transformers",
+          {
+            transformer,
+          },
+          true,
+        );
+      },
+    );
    if (!this.textModel) {
      this.textModel = await CLIPTextModelWithProjection.from_pretrained(
        this.modelType,
@@ -1,10 +1,13 @@
+import { MultiModalEmbedding } from "@llamaindex/core/embeddings";
 import type { ImageType } from "@llamaindex/core/schema";
-import { MultiModalEmbedding } from "./MultiModalEmbedding.js";

 /**
 * Cloudflare worker doesn't support image embeddings for now
 */
 export class CloudflareWorkerMultiModalEmbedding extends MultiModalEmbedding {
+  constructor() {
+    super();
+  }
  getImageEmbedding(images: ImageType): Promise<number[]> {
    throw new Error("Method not implemented.");
  }
@@ -1,6 +1,7 @@
 import { HfInference } from "@huggingface/inference";
 import { BaseEmbedding } from "@llamaindex/core/embeddings";
-import { lazyLoadTransformers } from "../internal/deps/transformers.js";
+import { loadTransformers } from "@llamaindex/env";
+import { Settings } from "../Settings.js";

 export enum HuggingFaceEmbeddingModelType {
  XENOVA_ALL_MINILM_L6_V2 = "Xenova/all-MiniLM-L6-v2",
@@ -33,7 +34,15 @@ export class HuggingFaceEmbedding extends BaseEmbedding {

  async getExtractor() {
    if (!this.extractor) {
-      const { pipeline } = await lazyLoadTransformers();
+      const { pipeline } = await loadTransformers((transformer) => {
+        Settings.callbackManager.dispatchEvent(
+          "load-transformers",
+          {
+            transformer,
+          },
+          true,
+        );
+      });
      this.extractor = await pipeline("feature-extraction", this.modelType, {
        quantized: this.quantized,
      });
@@ -1,7 +1,7 @@
+import { MultiModalEmbedding } from "@llamaindex/core/embeddings";
 import { getEnv } from "@llamaindex/env";
 import { imageToDataUrl } from "../internal/utils.js";
 import type { ImageType } from "../Node.js";
-import { MultiModalEmbedding } from "./MultiModalEmbedding.js";

 function isLocal(url: ImageType): boolean {
  if (url instanceof Blob) return true;
@@ -20,8 +20,9 @@ export type JinaEmbeddingRequest = {
  input: Array<{ text: string } | { url: string } | { bytes: string }>;
  model?: string;
  encoding_type?: EncodingType;
-  task_type?: TaskType;
+  task?: TaskType;
  dimensions?: number;
+  late_chunking?: boolean;
 };

 export type JinaEmbeddingResponse = {
@@ -44,9 +45,10 @@ export class JinaAIEmbedding extends MultiModalEmbedding {
  apiKey: string;
  model: string;
  baseURL: string;
-  taskType: TaskType | undefined;
+  task?: TaskType | undefined;
  encodingType?: EncodingType | undefined;
  dimensions?: number | undefined;
+  late_chunking?: boolean | undefined;

  async getTextEmbedding(text: string): Promise<number[]> {
    const result = await this.getJinaEmbedding({ input: [{ text }] });
@@ -87,8 +89,10 @@ export class JinaAIEmbedding extends MultiModalEmbedding {
    this.model = init?.model ?? "jina-embeddings-v3";
    this.baseURL = init?.baseURL ?? "https://api.jina.ai/v1/embeddings";
    init?.embedBatchSize && (this.embedBatchSize = init?.embedBatchSize);
-    this.taskType = init?.taskType;
+    this.task = init?.task;
    this.encodingType = init?.encodingType;
+    this.dimensions = init?.dimensions;
+    this.late_chunking = init?.late_chunking;
  }

  private async getImageInput(
@@ -125,8 +129,11 @@ export class JinaAIEmbedding extends MultiModalEmbedding {
      body: JSON.stringify({
        model: this.model,
        encoding_type: this.encodingType ?? "float",
-        ...(this.taskType && { task_type: this.taskType }),
+        ...(this.task && { task: this.task }),
        ...(this.dimensions !== undefined && { dimensions: this.dimensions }),
+        ...(this.late_chunking !== undefined && {
+          late_chunking: this.late_chunking,
+        }),
        ...params,
      }),
    });
@@ -1,71 +0,0 @@
-import { BaseEmbedding, batchEmbeddings } from "@llamaindex/core/embeddings";
-import type { MessageContentDetail } from "@llamaindex/core/llms";
-import {
-  ImageNode,
-  MetadataMode,
-  ModalityType,
-  splitNodesByType,
-  type BaseNode,
-  type ImageType,
-} from "@llamaindex/core/schema";
-import { extractImage, extractSingleText } from "@llamaindex/core/utils";
-
-/*
- * Base class for Multi Modal embeddings.
- */
-
-export abstract class MultiModalEmbedding extends BaseEmbedding {
-  abstract getImageEmbedding(images: ImageType): Promise<number[]>;
-
-  /**
-   * Optionally override this method to retrieve multiple image embeddings in a single request
-   * @param images
-   */
-  async getImageEmbeddings(images: ImageType[]): Promise<number[][]> {
-    return Promise.all(
-      images.map((imgFilePath) => this.getImageEmbedding(imgFilePath)),
-    );
-  }
-
-  async transform(nodes: BaseNode[], _options?: any): Promise<BaseNode[]> {
-    const nodeMap = splitNodesByType(nodes);
-    const imageNodes = nodeMap[ModalityType.IMAGE] ?? [];
-    const textNodes = nodeMap[ModalityType.TEXT] ?? [];
-
-    const embeddings = await batchEmbeddings(
-      textNodes.map((node) => node.getContent(MetadataMode.EMBED)),
-      this.getTextEmbeddings.bind(this),
-      this.embedBatchSize,
-      _options,
-    );
-    for (let i = 0; i < textNodes.length; i++) {
-      textNodes[i]!.embedding = embeddings[i];
-    }
-
-    const imageEmbeddings = await batchEmbeddings(
-      imageNodes.map((n) => (n as ImageNode).image),
-      this.getImageEmbeddings.bind(this),
-      this.embedBatchSize,
-      _options,
-    );
-    for (let i = 0; i < imageNodes.length; i++) {
-      imageNodes[i]!.embedding = imageEmbeddings[i];
-    }
-
-    return nodes;
-  }
-
-  async getQueryEmbedding(
-    query: MessageContentDetail,
-  ): Promise<number[] | null> {
-    const image = extractImage(query);
-    if (image) {
-      return await this.getImageEmbedding(image);
-    }
-    const text = extractSingleText(query);
-    if (text) {
-      return await this.getTextEmbedding(text);
-    }
-    return null;
-  }
-}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
github-actions[bot]	22ae8d0166	Release 0.6.7 (#1244 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2024-09-23 13:25:02 -07:00
Goran	23bcc379a8	fix: add `serializer` in doc store (#1243 ) Co-authored-by: Alex Yang <himself65@outlook.com>	2024-09-23 13:11:51 -07:00
github-actions[bot]	bdc4bfe7b0	Release 0.6.6 (#1241 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2024-09-23 11:54:33 -07:00
Goran	025ffe6b50	fix: update `PostgresKVStore` constructor params (#1240 ) Co-authored-by: Alex Yang <himself65@outlook.com>	2024-09-23 10:46:11 -07:00
Cahid Arda Öz	a6595747fa	feat: add Upstash Vector Store (#1218 ) Co-authored-by: ogzhanolguncu <ogzhan11@gmail.com> Co-authored-by: Alex Yang <himself65@outlook.com>	2024-09-23 10:00:10 -07:00
Marcus Schiesser	d902cc3e7e	fix: context not working in contextchatengine (#1237 )	2024-09-22 15:19:13 -07:00
github-actions[bot]	726eb41359	Release 0.6.5 (#1239 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2024-09-20 14:24:23 -07:00
André Mazayev	e9714dbfcd	feat: update `PGVectorStore` constructor parameters (#1225 ) Co-authored-by: Alex Yang <himself65@outlook.com>	2024-09-20 01:34:51 -07:00
Alex Yang	a3618e761e	chore: fix cache for cloud package (#1236 )	2024-09-19 17:48:39 -07:00
github-actions[bot]	24eabe7f35	Release 0.6.4 (#1234 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2024-09-19 16:42:39 -07:00
Alex Yang	ecfa939ea6	ci: enable remote cache (#1233 )	2024-09-19 15:40:34 -07:00
Alex Yang	b48bcc3add	feat: support custom `@xenova/transformers` (#1232 )	2024-09-19 14:55:23 -07:00
github-actions[bot]	fa01fa2051	Release 0.6.3 (#1220 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: himself65 <himself65@users.noreply.github.com>	2024-09-19 12:38:23 -07:00
Alex Yang	fb36eff5e1	fix: use Blob instead of File (#1231 )	2024-09-19 12:32:10 -07:00
Alex Yang	d24d3d1e8c	fix: print warning when llama parse reader has error (#1230 )	2024-09-19 09:41:37 -07:00
Aaron Ji	5c4badbcca	chore: add 'late_chunking' for Jina embedding (#1223 )	2024-09-18 17:38:46 +07:00
Alex Yang	2cd1383dc8	feat: align `response-synthesizers` & `chat-engine` module (#1169 )	2024-09-17 15:44:44 -07:00
github-actions[bot]	72440c101f	Release 0.6.2 (#1217 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: himself65 <himself65@users.noreply.github.com>	2024-09-16 16:40:33 -07:00
Alex Yang	423d66b07a	refactor: chat memory & chat history into core module (#1201 )	2024-09-16 16:09:17 -07:00
Alex Yang	b42adebd51	fix: get job result in llama parse reader (#1216 )	2024-09-16 16:05:47 -07:00
Alex Yang	749b43a3b1	fix: multi model embedding (#1215 )	2024-09-16 15:51:24 -07:00