llamaindex 0.0.37

changeset
Merge pull request #228 from run-llama/ms/create-llama-fixes
2026-07-04 03:40:26 -04:00 · 2023-11-23 10:54:44 -08:00 · 2023-11-23 10:53:30 -08:00 · 2023-11-23 10:50:13 -08:00 · 2023-11-23 20:58:43 +07:00 · 2023-11-23 18:23:24 +07:00
301 changed files with 73064 additions and 3215 deletions
@@ -1,11 +1,10 @@
 name: Bugfix
-title: 'Sweep: '
+title: ""
 description: Write something like "We notice ... behavior when ... happens instead of ...""
-labels: sweep
 body:
  - type: textarea
    id: description
    attributes:
      label: Details
      description: More details about the bug
-      placeholder: The bug might be in ... file
+      placeholder: The bug might be in ... file
@@ -1,11 +1,10 @@
 name: Feature Request
-title: 'Sweep: '
-description: Write something like "Write an api endpoint that does "..." in the "..." file"
-labels: sweep
+title: ""
+description: Write something like "Write an api endpoint that does "..." in the "..." file". If you would like to use sweep.dev prefix with "Sweep:"
 body:
  - type: textarea
    id: description
    attributes:
      label: Details
-      description: More details for Sweep
-      placeholder: The new endpoint should use the ... class from ... file because it contains ... logic
+      description: More details
+      placeholder: The new endpoint should use the ... class from ... file because it contains ... logic
@@ -1,11 +1,10 @@
 name: Refactor
-title: 'Sweep: '
-description: Write something like "Modify the ... api endpoint to use ... version and ... framework"
-labels: sweep
+title: ""
+description: Write something like "Modify the ... api endpoint to use ... version and ... framework" If you would like to use sweep.dev prefix with "Sweep:"
 body:
  - type: textarea
    id: description
    attributes:
      label: Details
-      description: More details for Sweep
-      placeholder: We are migrating this function to ... version because ...
+      description: More details
+      placeholder: We are migrating this function to ... version because ...
@@ -22,4 +22,4 @@ jobs:
        run: pnpm install

      - name: Run lint
-        run: pnpm run lint
+        run: pnpm run lint
@@ -7,18 +7,18 @@ jobs:
    runs-on: ubuntu-latest

    steps:
-    - name: Checkout code
-      uses: actions/checkout@v2
+      - name: Checkout code
+        uses: actions/checkout@v2

-    - name: Setup Node.js
-      uses: actions/setup-node@v2
-      with:
-        node-version: '18'
+      - name: Setup Node.js
+        uses: actions/setup-node@v2
+        with:
+          node-version: "18"

-    - name: Install dependencies
-      run: |
-        npm i -g pnpm
-        pnpm install
+      - name: Install dependencies
+        run: |
+          npm i -g pnpm
+          pnpm install

-    - name: Run tests
-      run: pnpm run test
+      - name: Run tests
+        run: pnpm run test
@@ -3,6 +3,7 @@
 # dependencies
 node_modules
 .pnp
+.pnpm-store
 .pnp.js

 # testing
@@ -36,3 +37,6 @@ yarn-error.log*
 .vercel

 dist/
+
+# vs code
+.vscode/launch.json
@@ -2,3 +2,4 @@
 . "$(dirname -- "$0")/_/husky.sh"

 pnpm lint
+npx lint-staged
@@ -4,5 +4,6 @@
  "editor.defaultFormatter": "esbenp.prettier-vscode",
  "[xml]": {
    "editor.defaultFormatter": "redhat.vscode-xml"
-  }
-}
+  },
+  "jest.rootPath": "./packages/core"
+}
@@ -20,7 +20,7 @@ In a new folder:
 export OPENAI_API_KEY="sk-......" # Replace with your key from https://platform.openai.com/account/api-keys
 pnpm init
 pnpm install typescript
-pnpm exec tsc –-init # if needed
+pnpm exec tsc --init # if needed
 pnpm install llamaindex
 pnpm install @types/node
 ```
@@ -36,7 +36,7 @@ async function main() {
  // Load essay from abramov.txt in Node
  const essay = await fs.readFile(
    "node_modules/llamaindex/examples/abramov.txt",
-    "utf-8"
+    "utf-8",
  );

  // Create Document object with essay
@@ -48,7 +48,7 @@ async function main() {
  // Query the index
  const queryEngine = index.asQueryEngine();
  const response = await queryEngine.query(
-    "What did the author do in college?"
+    "What did the author do in college?",
  );

  // Output response
@@ -61,7 +61,7 @@ main();
 Then you can run it using

 ```bash
-pnpm dlx ts-node example.ts
+pnpx ts-node example.ts
 ```

 ## Playground
@@ -84,6 +84,26 @@ Check out our NextJS playground at https://llama-playground.vercel.app/. The sou

 - [SimplePrompt](/packages/core/src/Prompt.ts): A simple standardized function call definition that takes in inputs and formats them in a template literal. SimplePrompts can be specialized using currying and combined using other SimplePrompt functions.

+## Note: NextJS:
+
+If you're using NextJS App Router, you'll need to use the NodeJS runtime (default) and add the follow config to your next.config.js to have it use imports/exports in the same way Node does.
+
+```js
+export const runtime = "nodejs"; // default
+```
+
+```js
+// next.config.js
+/** @type {import('next').NextConfig} */
+const nextConfig = {
+  experimental: {
+    serverComponentsExternalPackages: ["pdf-parse"], // Puts pdf-parse in actual NodeJS mode with NextJS App Router
+  },
+};
+
+module.exports = nextConfig;
+```
+
 ## Supported LLMs:

 - OpenAI GPT-3.5-turbo and GPT-4
@@ -1,3 +1,3 @@
 module.exports = {
-  presets: [require.resolve('@docusaurus/core/lib/babel/preset')],
+  presets: [require.resolve("@docusaurus/core/lib/babel/preset")],
 };
@@ -8,40 +8,42 @@ LlamaIndex.TS helps you build LLM-powered applications (e.g. Q&A, chatbot) over

 In this high-level concepts guide, you will learn:

-* how an LLM can answer questions using your own data.
-* key concepts and modules in LlamaIndex.TS for composing your own query pipeline.
+- how an LLM can answer questions using your own data.
+- key concepts and modules in LlamaIndex.TS for composing your own query pipeline.

 ## Answering Questions Across Your Data

 LlamaIndex uses a two stage method when using an LLM with your data:

-1) **indexing stage**: preparing a knowledge base, and
-2) **querying stage**: retrieving relevant context from the knowledge to assist the LLM in responding to a question
+1. **indexing stage**: preparing a knowledge base, and
+2. **querying stage**: retrieving relevant context from the knowledge to assist the LLM in responding to a question

 ![](./_static/concepts/rag.jpg)

 This process is also known as Retrieval Augmented Generation (RAG).

-LlamaIndex.TS provides the essential toolkit for making both steps super easy. 
+LlamaIndex.TS provides the essential toolkit for making both steps super easy.

 Let's explore each stage in detail.

 ### Indexing Stage
+
 LlamaIndex.TS help you prepare the knowledge base with a suite of data connectors and indexes.

-![](./_static/concepts/indexing.jpg) 
+![](./_static/concepts/indexing.jpg)

 [**Data Loaders**](./modules/high_level/data_loader.md):
 A data connector (i.e. `Reader`) ingest data from different data sources and data formats into a simple `Document` representation (text and simple metadata).

 [**Documents / Nodes**](./modules/high_level/documents_and_nodes.md): A `Document` is a generic container around any data source - for instance, a PDF, an API output, or retrieved data from a database. A `Node` is the atomic unit of data in LlamaIndex and represents a "chunk" of a source `Document`. It's a rich representation that includes metadata and relationships (to other nodes) to enable accurate and expressive retrieval operations.

-[**Data Indexes**](./modules/high_level/data_index.md): 
+[**Data Indexes**](./modules/high_level/data_index.md):
 Once you've ingested your data, LlamaIndex helps you index data into a format that's easy to retrieve.

 Under the hood, LlamaIndex parses the raw documents into intermediate representations, calculates vector embeddings, and stores your data in-memory or to disk.

 ### Querying Stage
+
 In the querying stage, the query pipeline retrieves the most relevant context given a user query,
 and pass that to the LLM (along with the query) to synthesize a response.

@@ -57,12 +59,13 @@ These building blocks can be customized to reflect ranking preferences, as well
 ![](./_static/concepts/querying.jpg)

 #### Building Blocks
-[**Retrievers**](./modules/low_level/retriever.md): 
+
+[**Retrievers**](./modules/low_level/retriever.md):
 A retriever defines how to efficiently retrieve relevant context from a knowledge base (i.e. index) when given a query.
 The specific retrieval logic differs for difference indices, the most popular being dense retrieval against a vector index.

 [**Response Synthesizers**](./modules/low_level/response_synthesizer.md):
-A response synthesizer generates a response from an LLM, using a user query and a given set of retrieved text chunks.  
+A response synthesizer generates a response from an LLM, using a user query and a given set of retrieved text chunks.

 #### Pipelines

@@ -70,7 +73,6 @@ A response synthesizer generates a response from an LLM, using a user query and
 A query engine is an end-to-end pipeline that allow you to ask question over your data.
 It takes in a natural language query, and returns a response, along with reference context retrieved and passed to the LLM.

-
-[**Chat Engines**](./modules/high_level/chat_engine.md): 
+[**Chat Engines**](./modules/high_level/chat_engine.md):
 A chat engine is an end-to-end pipeline for having a conversation with your data
 (multiple back-and-forth instead of a single question & answer).
@@ -6,18 +6,20 @@ sidebar_position: 4

 We include several end-to-end examples using LlamaIndex.TS in the repository

+Check out the examples below or try them out and complete them in minutes with interactive Github Codespace tutorials provided by Dev-Docs [here](https://codespaces.new/team-dev-docs/lits-dev-docs-playground?devcontainer_path=.devcontainer%2Fjavascript_ltsquickstart%2Fdevcontainer.json):
+
 ## [Chat Engine](https://github.com/run-llama/LlamaIndexTS/blob/main/apps/simple/chatEngine.ts)

 Read a file and chat about it with the LLM.

-## [List Index](https://github.com/run-llama/LlamaIndexTS/blob/main/apps/simple/listIndex.ts)
-
-Create a list index and query it. This example also use the `LLMRetriever`, which will use the LLM to select the best nodes to use when generating answer.
-
 ## [Vector Index](https://github.com/run-llama/LlamaIndexTS/blob/main/apps/simple/vectorIndex.ts)

 Create a vector index and query it. The vector index will use embeddings to fetch the top k most relevant nodes. By default, the top k is 2.

+## [Summary Index](https://github.com/run-llama/LlamaIndexTS/blob/main/apps/simple/summaryIndex.ts)
+
+Create a list index and query it. This example also use the `LLMRetriever`, which will use the LLM to select the best nodes to use when generating answer.
+
 ## [Save / Load an Index](https://github.com/run-llama/LlamaIndexTS/blob/main/apps/simple/storageContext.ts)

 Create and load a vector index. Persistance to disk in LlamaIndex.TS happens automatically once a storage context object is created.
@@ -28,7 +30,7 @@ Create a vector index and query it, while also configuring the the `LLM`, the `S

 ## [OpenAI LLM](https://github.com/run-llama/LlamaIndexTS/blob/main/apps/simple/openai.ts)

-Create an OpenAI LLM and directly use it for chat. 
+Create an OpenAI LLM and directly use it for chat.

 ## [Llama2 DeuceLLM](https://github.com/run-llama/LlamaIndexTS/blob/main/apps/simple/llamadeuce.ts)

@@ -40,4 +42,4 @@ Uses the `SubQuestionQueryEngine`, which breaks complex queries into multiple qu

 ## [Low Level Modules](https://github.com/run-llama/LlamaIndexTS/blob/main/apps/simple/lowlevel.ts)

-This example uses several low-level components, which removes the need for an actual query engine. These components can be used anywhere, in any application, or customized and sub-classed to meet your own needs.
+This example uses several low-level components, which removes the need for an actual query engine. These components can be used anywhere, in any application, or customized and sub-classed to meet your own needs.
@@ -0,0 +1,29 @@
+---
+sidebar_position: 5
+---
+
+# Environments
+
+LlamaIndex currently officially supports NodeJS 18 and NodeJS 20.
+
+## NextJS App Router
+
+If you're using NextJS App Router route handlers/serverless functions, you'll need to use the NodeJS mode:
+
+```js
+export const runtime = "nodejs"; // default
+```
+
+and you'll need to add an exception for pdf-parse in your next.config.js
+
+```js
+// next.config.js
+/** @type {import('next').NextConfig} */
+const nextConfig = {
+  experimental: {
+    serverComponentsExternalPackages: ["pdf-parse"], // Puts pdf-parse in actual NodeJS mode with NextJS App Router
+  },
+};
+
+module.exports = nextConfig;
+```
@@ -19,7 +19,7 @@ That's where **LlamaIndex.TS** comes in.

 LlamaIndex.TS provides the following tools:

- **Data loading** ingest your existing `txt` and `pdf` data directly
+- **Data loading** ingest your existing `.txt`, `.pdf`, `.csv`, `.md` and `.docx` data directly
 - **Data indexes** structure your data in intermediate representations that are easy and performant for LLMs to consume.
 - **Engines** provide natural language access to your data. For example:
  - Query engines are powerful retrieval interfaces for knowledge-augmented output.
@@ -1,3 +1,3 @@
 label: "Modules"
 collapsed: false
-position: 5
+position: 5
@@ -1 +1 @@
-label: High-Level Modules
+label: High-Level Modules
@@ -6,23 +6,18 @@ sidebar_position: 2

 An index is the basic container and organization for your data. LlamaIndex.TS supports two indexes:

- `ListIndex` - will send every `Node` in the index to the LLM in order to generate a response
 - `VectorStoreIndex` - will send the top-k `Node`s to the LLM when generating a response. The default top-k is 2.
+- `SummaryIndex` - will send every `Node` in the index to the LLM in order to generate a response

 ```typescript
-import {
-  Document,
-  VectorStoreIndex,
-} from "llamaindex";
+import { Document, VectorStoreIndex } from "llamaindex";

 const document = new Document({ text: "test" });

-const index = await VectorStoreIndex.fromDocuments(
-  [document]
-);
+const index = await VectorStoreIndex.fromDocuments([document]);
 ```

 ## API Reference

- [ListIndex](../../api/classes/ListIndex.md)
- [VectorStoreIndex](../../api/classes/VectorStoreIndex.md)
+- [SummaryIndex](../../api/classes/SummaryIndex.md)
+- [VectorStoreIndex](../../api/classes/VectorStoreIndex.md)
@@ -4,7 +4,7 @@ sidebar_position: 1

 # Reader / Loader

-LlamaIndex.TS supports easy loading of files from folders using the `SimpleDirectoryReader` class. Currently, `.txt` and `.pdf` files are supported, with more planned in the future!
+LlamaIndex.TS supports easy loading of files from folders using the `SimpleDirectoryReader` class. Currently, `.txt`, `.pdf`, `.csv`, `.md` and `.docx` files are supported, with more planned in the future!

 ```typescript
 import { SimpleDirectoryReader } from "llamaindex";
@@ -9,7 +9,7 @@ sidebar_position: 0
 ```typescript
 import { Document } from "llamaindex";

-document = new Document({ text: "text", metadata: { "key": "val" }});
+document = new Document({ text: "text", metadata: { key: "val" } });
 ```

 ## API Reference
@@ -1 +1 @@
-label: Low-Level Modules
+label: Low-Level Modules
@@ -4,7 +4,7 @@ sidebar_position: 1

 # Embedding

-The embedding model in LlamaIndex is responsible for creating numerical representations of text. By default, LlamaIndex will use the `text-embedding-ada-002` model from OpenAI. 
+The embedding model in LlamaIndex is responsible for creating numerical representations of text. By default, LlamaIndex will use the `text-embedding-ada-002` model from OpenAI.

 This can be explicitly set in the `ServiceContext` object.

@@ -4,7 +4,7 @@ sidebar_position: 0

 # LLM

-The LLM is responsible for reading text and generating natural language responses to queries. By default, LlamaIndex.TS uses `gpt-3.5-turbo`. 
+The LLM is responsible for reading text and generating natural language responses to queries. By default, LlamaIndex.TS uses `gpt-3.5-turbo`.

 The LLM can be explicitly set in the `ServiceContext` object.

@@ -19,4 +19,4 @@ const serviceContext = serviceContextFromDefaults({ llm: openaiLLM });
 ## API Reference

 - [OpenAI](../../api/classes/OpenAI.md)
- [ServiceContext](../../api/interfaces/ServiceContext.md)
+- [ServiceContext](../../api/interfaces/ServiceContext.md)
@@ -7,10 +7,7 @@ sidebar_position: 3
 The `NodeParser` in LlamaIndex is responbile for splitting `Document` objects into more manageable `Node` objects. When you call `.fromDocuments()`, the `NodeParser` from the `ServiceContext` is used to do this automatically for you. Alternatively, you can use it to split documents ahead of time.

 ```typescript
-import {
-  Document,
-  SimpleNodeParser,
-} from "llamaindex";
+import { Document, SimpleNodeParser } from "llamaindex";

 const nodeParser = new SimpleNodeParser();
 const nodes = nodeParser.getNodesFromDocuments([
@@ -25,7 +22,7 @@ The underlying text splitter will split text by sentences. It can also be used a
 ```typescript
 import { SentenceSplitter } from "llamaindex";

-const splitter = new SentenceSplitter({ chunkSize: 1, });
+const splitter = new SentenceSplitter({ chunkSize: 1 });

 const textSplits = splitter.splitText("Hello World");
 ```
@@ -6,26 +6,21 @@ sidebar_position: 6

 The ResponseSynthesizer is responsible for sending the query, nodes, and prompt templates to the LLM to generate a response. There are a few key modes for generating a response:

- `Refine`: "create and refine" an answer by sequentially going through each retrieved text chunk. 
-    This makes a separate LLM call per Node. Good for more detailed answers.
- `CompactAndRefine` (default): "compact" the prompt during each LLM call by stuffing as 
-    many text chunks that can fit within the maximum prompt size. If there are 
-    too many chunks to stuff in one prompt, "create and refine" an answer by going through
-    multiple compact prompts. The same as `refine`, but should result in less LLM calls.
- `TreeSummarize`: Given a set of text chunks and the query, recursively construct a tree 
-    and return the root node as the response. Good for summarization purposes.
+- `Refine`: "create and refine" an answer by sequentially going through each retrieved text chunk.
+  This makes a separate LLM call per Node. Good for more detailed answers.
+- `CompactAndRefine` (default): "compact" the prompt during each LLM call by stuffing as
+  many text chunks that can fit within the maximum prompt size. If there are
+  too many chunks to stuff in one prompt, "create and refine" an answer by going through
+  multiple compact prompts. The same as `refine`, but should result in less LLM calls.
+- `TreeSummarize`: Given a set of text chunks and the query, recursively construct a tree
+  and return the root node as the response. Good for summarization purposes.
 - `SimpleResponseBuilder`: Given a set of text chunks and the query, apply the query to each text
-    chunk while accumulating the responses into an array. Returns a concatenated string of all
-    responses. Good for when you need to run the same query separately against each text
-    chunk.
+  chunk while accumulating the responses into an array. Returns a concatenated string of all
+  responses. Good for when you need to run the same query separately against each text
+  chunk.

 ```typescript
-import {
-  TextNode,
-  NodeWithScore,
-  ResponseSynthesizer,
-  CompactAndRefine
-} from "llamaindex";
+import { NodeWithScore, ResponseSynthesizer, TextNode } from "llamaindex";

 const responseSynthesizer = new ResponseSynthesizer();

@@ -42,7 +37,7 @@ const nodesWithScore: NodeWithScore[] = [

 const response = await responseSynthesizer.synthesize(
  "What age am I?",
-  nodesWithScore
+  nodesWithScore,
 );
 console.log(response.response);
 ```
@@ -4,10 +4,10 @@ sidebar_position: 5

 # Retriever

-A retriever in LlamaIndex is what is used to fetch `Node`s from an index using a query string. For example, a `ListIndexRetriever` will fetch all nodes no matter the query. Meanwhile, a `VectorIndexRetriever` will only fetch the top-k most similar nodes.
+A retriever in LlamaIndex is what is used to fetch `Node`s from an index using a query string. Aa `VectorIndexRetriever` will fetch the top-k most similar nodes. Meanwhile, a `SummaryIndexRetriever` will fetch all nodes no matter the query.

 ```typescript
-const retriever = vector_index.asRetriever()
+const retriever = vector_index.asRetriever();
 retriever.similarityTopK = 3;

 // Fetch nodes!
@@ -16,6 +16,6 @@ const nodesWithScore = await retriever.retrieve("query string");

 ## API Reference

- [ListIndexRetriever](../../api/classes/ListIndexRetriever.md)
- [ListIndexLLMRetriever](../../api/classes/ListIndexLLMRetriever.md)
+- [SummaryIndexRetriever](../../api/classes/SummaryIndexRetriever.md)
+- [SummaryIndexLLMRetriever](../../api/classes/SummaryIndexLLMRetriever.md)
 - [VectorIndexRetriever](../../api/classes/VectorIndexRetriever.md)
@@ -11,10 +11,14 @@ Right now, only saving and loading from disk is supported, with future integrati
 ```typescript
 import { Document, VectorStoreIndex, storageContextFromDefaults } from "./src";

-const storageContext = await storageContextFromDefaults({ persistDir: "./storage" });
+const storageContext = await storageContextFromDefaults({
+  persistDir: "./storage",
+});

 const document = new Document({ text: "Test Text" });
-const index = await VectorStoreIndex.fromDocuments([document], { storageContext });
+const index = await VectorStoreIndex.fromDocuments([document], {
+  storageContext,
+});
 ```

 ## API Reference
@@ -25,7 +25,7 @@ async function main() {
  // Load essay from abramov.txt in Node
  const essay = await fs.readFile(
    "node_modules/llamaindex/examples/abramov.txt",
-    "utf-8"
+    "utf-8",
  );

  // Create Document object with essay
@@ -37,7 +37,7 @@ async function main() {
  // Query the index
  const queryEngine = index.asQueryEngine();
  const response = await queryEngine.query(
-    "What did the author do in college?"
+    "What did the author do in college?",
  );

  // Output response
@@ -139,6 +139,8 @@ const config = {
        entryPoints: ["../../packages/core/src/index.ts"],
        tsconfig: "../../packages/core/tsconfig.json",
        readme: "none",
+        sourceLinkTemplate:
+          "https://github.com/run-llama/LlamaIndexTS/blob/{gitRevision}/{path}#L{line}",
        sidebar: {
          position: 6,
        },
@@ -15,24 +15,24 @@
    "typecheck": "tsc"
  },
  "dependencies": {
-    "@docusaurus/core": "2.4.1",
-    "@docusaurus/preset-classic": "2.4.1",
-    "@docusaurus/remark-plugin-npm2yarn": "^2.4.1",
+    "@docusaurus/core": "2.4.3",
+    "@docusaurus/preset-classic": "2.4.3",
+    "@docusaurus/remark-plugin-npm2yarn": "^2.4.3",
    "@mdx-js/react": "^1.6.22",
    "clsx": "^1.2.1",
-    "postcss": "^8.4.28",
+    "postcss": "^8.4.31",
    "prism-react-renderer": "^1.3.5",
    "raw-loader": "^4.0.2",
    "react": "^17.0.2",
    "react-dom": "^17.0.2"
  },
  "devDependencies": {
-    "@docusaurus/module-type-aliases": "2.4.1",
-    "@docusaurus/types": "^2.4.1",
-    "@tsconfig/docusaurus": "^1.0.7",
+    "@docusaurus/module-type-aliases": "2.4.3",
+    "@docusaurus/types": "^2.4.3",
+    "@tsconfig/docusaurus": "^2.0.1",
    "docusaurus-plugin-typedoc": "^0.19.2",
    "typedoc": "^0.24.8",
-    "typedoc-plugin-markdown": "^3.15.4",
+    "typedoc-plugin-markdown": "^3.16.0",
    "typescript": "^4.9.5"
  },
  "browserslist": {
@@ -1,5 +1,5 @@
-import React from "react";
 import clsx from "clsx";
+import React from "react";
 import styles from "./styles.module.css";

 type FeatureItem = {
@@ -18,7 +18,7 @@
 }

 /* For readability concerns, you should choose a lighter palette in dark mode. */
-[data-theme='dark'] {
+[data-theme="dark"] {
  --ifm-color-primary: #25c2a0;
  --ifm-color-primary-dark: #21af90;
  --ifm-color-primary-darker: #1fa588;
@@ -0,0 +1,34 @@
+/* eslint-disable turbo/no-undeclared-env-vars */
+import * as dotenv from "dotenv";
+import * as fs from "fs";
+import { MongoClient } from "mongodb";
+
+// Load environment variables from local .env file
+dotenv.config();
+
+const jsonFile = "tinytweets.json";
+const mongoUri = process.env.MONGODB_URI!;
+const databaseName = process.env.MONGODB_DATABASE!;
+const collectionName = process.env.MONGODB_COLLECTION!;
+
+async function importJsonToMongo() {
+  // Load the tweets from a local file
+  const tweets = JSON.parse(fs.readFileSync(jsonFile, "utf-8"));
+
+  // Create a new client and connect to the server
+  const client = new MongoClient(mongoUri);
+
+  const db = client.db(databaseName);
+  const collection = db.collection(collectionName);
+
+  // Insert the tweets into mongo
+  await collection.insertMany(tweets);
+
+  console.log(
+    `Data imported successfully to the MongoDB collection ${collectionName}.`,
+  );
+  await client.close();
+}
+
+// Run the import function
+importJsonToMongo();
@@ -0,0 +1,50 @@
+/* eslint-disable turbo/no-undeclared-env-vars */
+import * as dotenv from "dotenv";
+import {
+  MongoDBAtlasVectorSearch,
+  SimpleMongoReader,
+  storageContextFromDefaults,
+  VectorStoreIndex,
+} from "llamaindex";
+import { MongoClient } from "mongodb";
+
+// Load environment variables from local .env file
+dotenv.config();
+
+const mongoUri = process.env.MONGODB_URI!;
+const databaseName = process.env.MONGODB_DATABASE!;
+const collectionName = process.env.MONGODB_COLLECTION!;
+const vectorCollectionName = process.env.MONGODB_VECTORS!;
+const indexName = process.env.MONGODB_VECTOR_INDEX!;
+
+async function loadAndIndex() {
+  // Create a new client and connect to the server
+  const client = new MongoClient(mongoUri);
+  // load objects from mongo and convert them into LlamaIndex Document objects
+  // llamaindex has a special class that does this for you
+  // it pulls every object in a given collection
+  const reader = new SimpleMongoReader(client);
+  const documents = await reader.loadData(databaseName, collectionName, [
+    "full_text",
+  ]);
+
+  // create Atlas as a vector store
+  const vectorStore = new MongoDBAtlasVectorSearch({
+    mongodbClient: client,
+    dbName: databaseName,
+    collectionName: vectorCollectionName, // this is where your embeddings will be stored
+    indexName: indexName, // this is the name of the index you will need to create
+  });
+
+  // now create an index from all the Documents and store them in Atlas
+  const storageContext = await storageContextFromDefaults({ vectorStore });
+  await VectorStoreIndex.fromDocuments(documents, { storageContext });
+  console.log(
+    `Successfully created embeddings in the MongoDB collection ${vectorCollectionName}.`,
+  );
+  await client.close();
+}
+
+loadAndIndex();
+
+// you can't query your index yet because you need to create a vector search index in mongodb's UI now
@@ -0,0 +1,34 @@
+/* eslint-disable turbo/no-undeclared-env-vars */
+import * as dotenv from "dotenv";
+import {
+  MongoDBAtlasVectorSearch,
+  serviceContextFromDefaults,
+  VectorStoreIndex,
+} from "llamaindex";
+import { MongoClient } from "mongodb";
+
+// Load environment variables from local .env file
+dotenv.config();
+
+async function query() {
+  const client = new MongoClient(process.env.MONGODB_URI!);
+  const serviceContext = serviceContextFromDefaults();
+  const store = new MongoDBAtlasVectorSearch({
+    mongodbClient: client,
+    dbName: process.env.MONGODB_DATABASE!,
+    collectionName: process.env.MONGODB_VECTORS!,
+    indexName: process.env.MONGODB_VECTOR_INDEX!,
+  });
+
+  const index = await VectorStoreIndex.fromVectorStore(store, serviceContext);
+
+  const retriever = index.asRetriever({ similarityTopK: 20 });
+  const queryEngine = index.asQueryEngine({ retriever });
+  const result = await queryEngine.query(
+    "What does the author think of web frameworks?",
+  );
+  console.log(result.response);
+  await client.close();
+}
+
+query();
@@ -0,0 +1,20 @@
+# mongodb-llamaindexts
+
+## 0.0.3
+
+### Patch Changes
+
+- Updated dependencies [3bab231]
+  - llamaindex@0.0.37
+
+## 0.0.2
+
+### Patch Changes
+
+- Updated dependencies
+- Updated dependencies
+- Updated dependencies
+- Updated dependencies
+- Updated dependencies
+- Updated dependencies
+  - llamaindex@0.0.36
@@ -0,0 +1,127 @@
+# LlamaIndexTS retrieval augmented generation with MongoDB
+
+### Prepare Environment
+
+Make sure to run `pnpm install` and set your OpenAI environment variable before running these examples.
+
+```
+pnpm install
+export OPENAI_API_KEY="sk-..."
+```
+
+### Sign up for MongoDB Atlas
+
+We'll be using MongoDB's hosted database service, [MongoDB Atlas](https://www.mongodb.com/cloud/atlas/register). You can sign up for free and get a small hosted cluster for free:
+
+![MongoDB Atlas signup](./docs/1_signup.png)
+
+The signup process will walk you through the process of creating your cluster and ensuring it's configured for you to access. Once the cluster is created, choose "Connect" and then "Connect to your application". Choose Python, and you'll be presented with a connection string that looks like this:
+
+![MongoDB Atlas connection string](./docs/2_connection_string.png)
+
+### Set up environment variables
+
+Copy the connection string (make sure you include your password) and put it into a file called `.env` in the root of this repo. It should look like this:
+
+```
+MONGODB_URI=mongodb+srv://seldo:xxxxxxxxxxx@llamaindexdemocluster.xfrdhpz.mongodb.net/?retryWrites=true&w=majority
+```
+
+You will also need to choose a name for your database, and the collection where we will store the tweets, and also include them in .env. They can be any string, but this is what we used:
+
+```
+MONGODB_DATABASE=tiny_tweets_db
+MONGODB_COLLECTION=tiny_tweets_collection
+```
+
+### Import tweets into MongoDB
+
+You are now ready to import our ready-made data set into Mongo. This is the file `tinytweets.json`, a selection of approximately 1000 tweets from @seldo on Twitter in mid-2019. With your environment set up you can do this by running
+
+```
+pnpm ts-node 1_import.ts
+```
+
+If you don't want to use tweets, you can replace `json_file` with any other array of JSON objects, but you will need to modify some code later to make sure the correct field gets indexed. There is no LlamaIndex-specific code here; you can load your data into Mongo any way you want to.
+
+### Load and index your data
+
+Now we're ready to index our data. To do this, LlamaIndex will pull your text out of Mongo, split it into chunks, and then send those chunks to OpenAI to be turned into [vector embeddings](https://docs.llamaindex.ai/en/stable/understanding/indexing/indexing.html#what-is-an-embedding). The embeddings will then be stored in a new collection in Mongo. This will take a while depending how much text you have, but the good news is that once it's done you will be able to query quickly without needing to re-index.
+
+We'll be using OpenAI to do the embedding, so now is when you need to [generate an OpenAI API key](https://platform.openai.com/account/api-keys) if you haven't already and add it to your `.env` file like this:
+
+```
+OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+```
+
+You'll also need to pick a name for the new collection where the embeddings will be stored, and add it to `.env`, along with the name of a vector search index (we'll be creating this in the next step, after you've indexed your data):
+
+```
+MONGODB_VECTORS=tiny_tweets_vectors
+MONGODB_VECTOR_INDEX=tiny_tweets_vector_index
+```
+
+If the data you're indexing is the tweets we gave you, you're ready to go:
+
+```bash
+pnpm ts-node 2_load_and_index.ts
+```
+
+> Note: this script is running a couple of minutes and currently doesn't show any progress.
+
+What you're doing here is creating a Reader which loads the data out of Mongo in the collection and database specified. It looks for text in a set of specific keys in each object. In this case we've given it just one key, "full_text".
+
+Now you're creating a vector search client for Mongo. In addition to a MongoDB client object, you again tell it what database everything is in. This time you give it the name of the collection where you'll store the vector embeddings, and the name of the vector search index you'll create in the next step.
+
+### Create a vector search index
+
+Now if all has gone well you should be able to log in to the Mongo Atlas UI and see two collections in your database: the original data in `tiny_tweets_collection`, and the vector embeddings in `tiny_tweets_vectors`.
+
+![MongoDB Atlas collections](./docs/3_vectors_in_db.png)
+
+Now it's time to create the vector search index so that you can query the data.
+It's not yet possible to programmatically create a vector search index using the [`createIndex`](https://www.mongodb.com/docs/manual/reference/method/db.collection.createIndex/) function, therefore we have to create one manually in the UI.
+To do so, first, click the Search tab, and then click "Create Search Index":
+
+![MongoDB Atlas create search index](./docs/4_search_tab.png)
+
+We have to use the JSON editor, as the Visual Editor does not yet support to create a vector search index:
+
+![MongoDB Atlas JSON editor](./docs/5_json_editor.png)
+
+Now under "database and collection" select `tiny_tweets_db` and within that select `tiny_tweets_vectors`. Then under "Index name" enter `tiny_tweets_vector_index` (or whatever value you put for MONGODB_VECTOR_INDEX in `.env`). Under that, you'll want to enter this JSON object:
+
+```json
+{
+  "mappings": {
+    "dynamic": true,
+    "fields": {
+      "embedding": {
+        "dimensions": 1536,
+        "similarity": "cosine",
+        "type": "knnVector"
+      }
+    }
+  }
+}
+```
+
+This tells Mongo that the `embedding` field in each document (in the `tiny_tweets_vectors` collection) is a vector of 1536 dimensions (this is the size of embeddings used by OpenAI), and that we want to use cosine similarity to compare vectors. You don't need to worry too much about these values unless you want to use a different LLM to OpenAI entirely.
+
+The UI will ask you to review and confirm your choices, then you need to wait a minute or two while it generates the index. If all goes well, you should see something like this screen:
+
+![MongoDB Atlas index created](./docs/7_index_created.png)
+
+Now you're ready to query your data!
+
+### Run a test query
+
+You can do this by running
+
+```bash
+pnpm ts-node 3_query.ts
+```
+
+This sets up a connection to Atlas just like `2_load_and_index.ts` did, then it creates a [query engine](https://docs.llamaindex.ai/en/stable/understanding/querying/querying.html#getting-started) and runs a query against it.
+
+If all is well, you should get a nuanced opinion about web frameworks.
@@ -0,0 +1,17 @@
+{
+  "version": "0.0.3",
+  "private": true,
+  "name": "mongodb-llamaindexts",
+  "dependencies": {
+    "llamaindex": "workspace:*",
+    "dotenv": "^16.3.1",
+    "mongodb": "^6.2.0"
+  },
+  "devDependencies": {
+    "@types/node": "^18.18.6",
+    "ts-node": "^10.9.1"
+  },
+  "scripts": {
+    "lint": "eslint ."
+  }
+}
@@ -1,5 +1,152 @@
 # simple

+## 0.0.35
+
+### Patch Changes
+
+- Updated dependencies [3bab231]
+  - llamaindex@0.0.37
+
+## 0.0.34
+
+### Patch Changes
+
+- Updated dependencies
+- Updated dependencies
+- Updated dependencies
+- Updated dependencies
+- Updated dependencies
+- Updated dependencies
+  - llamaindex@0.0.36
+
+## 0.0.33
+
+### Patch Changes
+
+- Updated dependencies [63f2108]
+  - llamaindex@0.0.35
+
+## 0.0.32
+
+### Patch Changes
+
+- Updated dependencies [2a27e21]
+  - llamaindex@0.0.34
+
+## 0.0.31
+
+### Patch Changes
+
+- Updated dependencies [5e2e92c]
+  - llamaindex@0.0.33
+
+## 0.0.30
+
+### Patch Changes
+
+- Updated dependencies [90c0b83]
+- Updated dependencies [dfd22aa]
+  - llamaindex@0.0.32
+
+## 0.0.29
+
+### Patch Changes
+
+- Updated dependencies [6c55b2d]
+- Updated dependencies [8aa8c65]
+- Updated dependencies [6c55b2d]
+  - llamaindex@0.0.31
+
+## 0.0.28
+
+### Patch Changes
+
+- Updated dependencies [139abad]
+- Updated dependencies [139abad]
+- Updated dependencies [eb0e994]
+- Updated dependencies [eb0e994]
+- Updated dependencies [139abad]
+  - llamaindex@0.0.30
+
+## 0.0.27
+
+### Patch Changes
+
+- Updated dependencies [a52143b]
+- Updated dependencies [1b7fd95]
+- Updated dependencies [0db3f41]
+  - llamaindex@0.0.29
+
+## 0.0.26
+
+### Patch Changes
+
+- Updated dependencies [96bb657]
+- Updated dependencies [96bb657]
+- Updated dependencies [837854d]
+  - llamaindex@0.0.28
+
+## 0.0.25
+
+### Patch Changes
+
+- Updated dependencies [4a5591b]
+- Updated dependencies [4a5591b]
+- Updated dependencies [4a5591b]
+  - llamaindex@0.0.27
+
+## 0.0.24
+
+### Patch Changes
+
+- Updated dependencies [5bb55bc]
+  - llamaindex@0.0.26
+
+## 0.0.23
+
+### Patch Changes
+
+- Updated dependencies [e21eca2]
+- Updated dependencies [40a8f07]
+- Updated dependencies [40a8f07]
+  - llamaindex@0.0.25
+
+## 0.0.22
+
+### Patch Changes
+
+- Updated dependencies [e4af7b3]
+- Updated dependencies [259fe63]
+  - llamaindex@0.0.24
+
+## 0.0.21
+
+### Patch Changes
+
+- Updated dependencies
+- Updated dependencies [9d6b2ed]
+  - llamaindex@0.0.23
+
+## 0.0.20
+
+### Patch Changes
+
+- Updated dependencies [454f3f8]
+- Updated dependencies [454f3f8]
+- Updated dependencies [454f3f8]
+- Updated dependencies [454f3f8]
+- Updated dependencies [99df58f]
+  - llamaindex@0.0.22
+
+## 0.0.19
+
+### Patch Changes
+
+- Updated dependencies [f7a57ca]
+- Updated dependencies [0a09de2]
+- Updated dependencies [f7a57ca]
+  - llamaindex@0.0.21
+
 ## 0.0.18

 ### Patch Changes
@@ -1,9 +1,10 @@
 # Simple Examples

 Due to packaging, you will need to run these commands to get started.
+
 ```bash
-pnpm --filter llamaindex build
 pnpm install
+pnpm --filter llamaindex build
 ```

 Then run the examples with `ts-node`, for example `npx ts-node vectorIndex.ts`
@@ -4,8 +4,6 @@ import { Anthropic } from "llamaindex";
  const anthropic = new Anthropic();
  const result = await anthropic.chat([
    { content: "You want to talk in rhymes.", role: "system" },
-    { content: "Hello, world!", role: "user" },
-    { content: "Hello!", role: "assistant" },
    {
      content:
        "How much wood would a woodchuck chuck if a woodchuck could chuck wood?",
@@ -4,7 +4,6 @@ import {
  PapaCSVReader,
  ResponseSynthesizer,
  serviceContextFromDefaults,
-  SimplePrompt,
  VectorStoreIndex,
 } from "llamaindex";

@@ -23,9 +22,7 @@ async function main() {
    serviceContext,
  });

-  const csvPrompt: SimplePrompt = (input) => {
-    const { context = "", query = "" } = input;
-
+  const csvPrompt = ({ context = "", query = "" }) => {
    return `The following CSV file is loaded from ${path}
 \`\`\`csv
 ${context}
@@ -0,0 +1,24 @@
+import { SimpleDirectoryReader } from "llamaindex";
+
+function callback(
+  category: string,
+  name: string,
+  status: any,
+  message?: string,
+): boolean {
+  console.log(category, name, status, message);
+  if (name.endsWith(".pdf")) {
+    console.log("I DON'T WANT PDF FILES!");
+    return false;
+  }
+  return true;
+}
+
+async function main() {
+  // Load page
+  const reader = new SimpleDirectoryReader(callback);
+  const params = { directoryPath: "./data" };
+  await reader.loadData(params);
+}
+
+main().catch(console.error);
@@ -0,0 +1,21 @@
+import { HTMLReader, VectorStoreIndex } from "llamaindex";
+
+async function main() {
+  // Load page
+  const reader = new HTMLReader();
+  const documents = await reader.loadData("data/18-1_Changelog.html");
+
+  // Split text and create embeddings. Store them in a VectorStoreIndex
+  const index = await VectorStoreIndex.fromDocuments(documents);
+
+  // Query the index
+  const queryEngine = index.asQueryEngine();
+  const response = await queryEngine.query(
+    "What were the notable changes in 18.1?",
+  );
+
+  // Output response
+  console.log(response.toString());
+}
+
+main().catch(console.error);
@@ -0,0 +1,32 @@
+import {
+  Document,
+  KeywordTableIndex,
+  KeywordTableRetrieverMode,
+} from "llamaindex";
+import essay from "./essay";
+
+async function main() {
+  const document = new Document({ text: essay, id_: "essay" });
+  const index = await KeywordTableIndex.fromDocuments([document]);
+
+  const allModes: KeywordTableRetrieverMode[] = [
+    KeywordTableRetrieverMode.DEFAULT,
+    KeywordTableRetrieverMode.SIMPLE,
+    KeywordTableRetrieverMode.RAKE,
+  ];
+  allModes.forEach(async (mode) => {
+    const queryEngine = index.asQueryEngine({
+      retriever: index.asRetriever({
+        mode,
+      }),
+    });
+    const response = await queryEngine.query(
+      "What did the author do growing up?",
+    );
+    console.log(response.toString());
+  });
+}
+
+main().catch((e: Error) => {
+  console.error(e, e.stack);
+});
@@ -1,9 +1,9 @@
 import {
  Document,
-  TextNode,
  NodeWithScore,
  ResponseSynthesizer,
  SimpleNodeParser,
+  TextNode,
 } from "llamaindex";

 (async () => {
@@ -29,7 +29,7 @@ import {

  const response = await responseSynthesizer.synthesize(
    "What age am I?",
-    nodesWithScore
+    nodesWithScore,
  );
  console.log(response.response);
 })();
@@ -0,0 +1,20 @@
+import { MarkdownReader, VectorStoreIndex } from "llamaindex";
+
+async function main() {
+  // Load Markdown file
+  const reader = new MarkdownReader();
+  const documents = await reader.loadData("node_modules/llamaindex/README.md");
+
+  // Split text and create embeddings. Store them in a VectorStoreIndex
+  const index = await VectorStoreIndex.fromDocuments(documents);
+
+  // Query the index
+  const queryEngine = index.asQueryEngine();
+
+  const response = await queryEngine.query("What does the example code do?");
+
+  // Output response
+  console.log(response.toString());
+}
+
+main().catch(console.error);
@@ -0,0 +1,68 @@
+import { MongoClient } from "mongodb";
+import { VectorStoreIndex } from "../../packages/core/src/indices";
+import { Document } from "../../packages/core/src/Node";
+import { SimpleMongoReader } from "../../packages/core/src/readers/SimpleMongoReader";
+
+import { stdin as input, stdout as output } from "node:process";
+import readline from "node:readline/promises";
+
+async function main() {
+  //Dummy test code
+  const query: object = { _id: "waldo" };
+  const options: object = {};
+  const projections: object = { embedding: 0 };
+  const limit: number = Infinity;
+  const uri: string = process.env.MONGODB_URI ?? "fake_uri";
+  const client: MongoClient = new MongoClient(uri);
+
+  //Where the real code starts
+  const MR = new SimpleMongoReader(client);
+  const documents: Document[] = await MR.loadData(
+    "data",
+    "posts",
+    1,
+    {},
+    options,
+    projections,
+  );
+
+  //
+  //If you need to look at low-level details of
+  // a queryEngine (for example, needing to check each individual node)
+  //
+
+  // Split text and create embeddings. Store them in a VectorStoreIndex
+  // var storageContext = await storageContextFromDefaults({});
+  // var serviceContext = serviceContextFromDefaults({});
+  // const docStore = storageContext.docStore;
+
+  // for (const doc of documents) {
+  //   docStore.setDocumentHash(doc.id_, doc.hash);
+  // }
+  // const nodes = serviceContext.nodeParser.getNodesFromDocuments(documents);
+  // console.log(nodes);
+
+  //
+  //Making Vector Store from documents
+  //
+
+  const index = await VectorStoreIndex.fromDocuments(documents);
+  // Create query engine
+  const queryEngine = index.asQueryEngine();
+
+  const rl = readline.createInterface({ input, output });
+  while (true) {
+    const query = await rl.question("Query: ");
+
+    if (!query) {
+      break;
+    }
+
+    const response = await queryEngine.query(query);
+
+    // Output response
+    console.log(response.toString());
+  }
+}
+
+main();
@@ -0,0 +1,89 @@
+import { Client } from "@notionhq/client";
+import { program } from "commander";
+import { NotionReader, VectorStoreIndex } from "llamaindex";
+import { stdin as input, stdout as output } from "node:process";
+// readline/promises is still experimental so not in @types/node yet
+// @ts-ignore
+import readline from "node:readline/promises";
+
+program
+  .argument("[page]", "Notion page id (must be provided)")
+  .action(async (page, _options, command) => {
+    // Initializing a client
+
+    if (!process.env.NOTION_TOKEN) {
+      console.log(
+        "No NOTION_TOKEN found in environment variables. You will need to register an integration https://www.notion.com/my-integrations and put it in your NOTION_TOKEN environment variable.",
+      );
+      return;
+    }
+
+    const notion = new Client({
+      auth: process.env.NOTION_TOKEN,
+    });
+
+    if (!page) {
+      const response = await notion.search({
+        filter: {
+          value: "page",
+          property: "object",
+        },
+        sort: {
+          direction: "descending",
+          timestamp: "last_edited_time",
+        },
+      });
+
+      const { results } = response;
+
+      if (results.length === 0) {
+        console.log(
+          "No pages found. You will need to share it with your integration. (tap the three dots on the top right, find Add connections, and add your integration)",
+        );
+        return;
+      } else {
+        const pages = results
+          .map((result) => {
+            if (!("url" in result)) {
+              return null;
+            }
+
+            return {
+              id: result.id,
+              url: result.url,
+            };
+          })
+          .filter((page) => page !== null);
+        console.log("Found pages:");
+        console.table(pages);
+        console.log(`To run, run ts-node ${command.name()} [page id]`);
+        return;
+      }
+    }
+
+    const reader = new NotionReader({ client: notion });
+    const documents = await reader.loadData(page);
+    console.log(documents);
+
+    // Split text and create embeddings. Store them in a VectorStoreIndex
+    const index = await VectorStoreIndex.fromDocuments(documents);
+
+    // Create query engine
+    const queryEngine = index.asQueryEngine();
+
+    const rl = readline.createInterface({ input, output });
+    while (true) {
+      const query = await rl.question("Query: ");
+
+      if (!query) {
+        break;
+      }
+
+      const response = await queryEngine.query(query);
+
+      // Output response
+      console.log(response.toString());
+    }
+  });
+
+program.parse();
@@ -1,14 +1,7 @@
 import { OpenAI } from "llamaindex";

 (async () => {
-  const llm = new OpenAI({
-    model: "gpt-3.5-turbo",
-    temperature: 0.1,
-    additionalChatOptions: { frequency_penalty: 0.1 },
-    additionalSessionOptions: {
-      defaultHeaders: { "X-Test-Header-Please-Ignore": "true" },
-    },
-  });
+  const llm = new OpenAI({ model: "gpt-4-1106-preview", temperature: 0.1 });

  // complete api
  const response1 = await llm.complete("How are you?");
@@ -16,7 +9,7 @@ import { OpenAI } from "llamaindex";

  // chat api
  const response2 = await llm.chat([
-    { content: "Tell me a joke!", role: "user" },
+    { content: "Tell me a joke.", role: "user" },
  ]);
  console.log(response2.message.content);
 })();
@@ -1,12 +1,16 @@
 {
-  "version": "0.0.18",
+  "version": "0.0.35",
  "private": true,
  "name": "simple",
  "dependencies": {
+    "@notionhq/client": "^2.2.13",
+    "@pinecone-database/pinecone": "^1.1.2",
+    "commander": "^11.1.0",
    "llamaindex": "workspace:*"
  },
  "devDependencies": {
-    "@types/node": "^18.17.6"
+    "@types/node": "^18.18.6",
+    "ts-node": "^10.9.1"
  },
  "scripts": {
    "lint": "eslint ."
@@ -0,0 +1,33 @@
+# Postgres Vector Store
+
+There are two scripts available here: load-docs.ts and query.ts
+
+## Prerequisites
+
+You'll need a postgres database instance against which to run these scripts. A simple docker command would look like this:
+
+> `docker run -d --rm --name vector-db -p 5432:5432 -e "POSTGRES_HOST_AUTH_METHOD=trust" ankane/pgvector`
+
+Set the PGHOST and PGUSER (and PGPASSWORD) environment variables to match your database setup.
+
+You'll also need a value for OPENAI_API_KEY in your environment.
+
+**NOTE:** Using `--rm` in the example docker command above means that the vector store will be deleted every time the container is stopped. For production purposes, use a volume to ensure persistence across restarts.
+
+## Setup and Loading Docs
+
+Read and follow the instructions in the README.md file located one directory up to make sure your JS/TS dependencies are set up. The commands listed below are also run from that parent directory.
+
+To import documents and save the embedding vectors to your database:
+
+> `npx ts-node pg-vector-store/load-docs.ts data`
+
+where data is the directory containing your input files. Using the _data_ directory in the example above will read all of the files in that directory using the llamaindexTS default readers for each file type.
+
+## RAG Querying
+
+To query using the resulting vector store:
+
+> `npx ts-node pg-vector-store/query.ts`
+
+The script will prompt for a question, then process and present the answer using the PGVectorStore data and your OpenAI API key. It will continue to prompt until you enter `q`, `quit` or `exit` as the next query.
@@ -0,0 +1,68 @@
+// load-docs.ts
+import fs from "fs/promises";
+import {
+  SimpleDirectoryReader,
+  storageContextFromDefaults,
+  VectorStoreIndex,
+} from "llamaindex";
+import { PGVectorStore } from "../../../packages/core/src/storage/vectorStore/PGVectorStore";
+
+async function getSourceFilenames(sourceDir: string) {
+  return await fs
+    .readdir(sourceDir)
+    .then((fileNames) => fileNames.map((file) => sourceDir + "/" + file));
+}
+
+function callback(
+  category: string,
+  name: string,
+  status: any,
+  message: string = "",
+): boolean {
+  console.log(category, name, status, message);
+  return true;
+}
+
+async function main(args: any) {
+  const sourceDir: string = args.length > 2 ? args[2] : "../data";
+
+  console.log(`Finding documents in ${sourceDir}`);
+  const fileList = await getSourceFilenames(sourceDir);
+  const count = fileList.length;
+  console.log(`Found ${count} files`);
+
+  console.log(`Importing contents from ${count} files in ${sourceDir}`);
+  var fileName = "";
+  try {
+    // Passing callback fn to the ctor here
+    // will enable looging to console.
+    // See callback fn, defined above.
+    const rdr = new SimpleDirectoryReader(callback);
+    const docs = await rdr.loadData({ directoryPath: sourceDir });
+
+    const pgvs = new PGVectorStore();
+    pgvs.setCollection(sourceDir);
+    pgvs.clearCollection();
+
+    const ctx = await storageContextFromDefaults({ vectorStore: pgvs });
+
+    console.debug("  - creating vector store");
+    const index = await VectorStoreIndex.fromDocuments(docs, {
+      storageContext: ctx,
+    });
+    console.debug("  - done.");
+  } catch (err) {
+    console.error(fileName, err);
+    console.log(
+      "If your PGVectorStore init failed, make sure to set env vars for PGUSER or USER, PGHOST, PGPORT and PGPASSWORD as needed.",
+    );
+    process.exit(1);
+  }
+
+  console.log(
+    "Done. Try running query.ts to ask questions against the imported embeddings.",
+  );
+  process.exit(0);
+}
+
+main(process.argv).catch((err) => console.error(err));
@@ -0,0 +1,67 @@
+import { VectorStoreIndex } from "../../../packages/core/src/indices/vectorStore/VectorStoreIndex";
+import { serviceContextFromDefaults } from "../../../packages/core/src/ServiceContext";
+import { PGVectorStore } from "../../../packages/core/src/storage/vectorStore/PGVectorStore";
+
+async function main() {
+  const readline = require("readline").createInterface({
+    input: process.stdin,
+    output: process.stdout,
+  });
+
+  try {
+    const pgvs = new PGVectorStore();
+    // Optional - set your collection name, default is no filter on this field.
+    // pgvs.setCollection();
+
+    const ctx = serviceContextFromDefaults();
+    const index = await VectorStoreIndex.fromVectorStore(pgvs, ctx);
+
+    // Query the index
+    const queryEngine = await index.asQueryEngine();
+
+    let question = "";
+    while (!isQuit(question)) {
+      question = await getUserInput(readline);
+
+      if (isQuit(question)) {
+        readline.close();
+        process.exit(0);
+      }
+
+      try {
+        const answer = await queryEngine.query(question);
+        console.log(answer.response);
+      } catch (error) {
+        console.error("Error:", error);
+      }
+    }
+  } catch (err) {
+    console.error(err);
+    console.log(
+      "If your PGVectorStore init failed, make sure to set env vars for PGUSER or USER, PGHOST, PGPORT and PGPASSWORD as needed.",
+    );
+    process.exit(1);
+  }
+}
+
+function isQuit(question: string) {
+  return ["q", "quit", "exit"].includes(question.trim().toLowerCase());
+}
+
+// Function to get user input as a promise
+function getUserInput(readline: any): Promise<string> {
+  return new Promise((resolve) => {
+    readline.question(
+      "What would you like to know?\n>",
+      (userInput: string) => {
+        resolve(userInput);
+      },
+    );
+  });
+}
+
+main()
+  .catch(console.error)
+  .finally(() => {
+    process.exit(1);
+  });
@@ -0,0 +1,23 @@
+import { Portkey } from "llamaindex";
+
+(async () => {
+  const llms = [{}];
+  const portkey = new Portkey({
+    mode: "single",
+    llms: [
+      {
+        provider: "anyscale",
+        virtual_key: "anyscale-3b3c04",
+        model: "meta-llama/Llama-2-13b-chat-hf",
+        max_tokens: 2000,
+      },
+    ],
+  });
+  const result = portkey.stream_chat([
+    { role: "system", content: "You are a helpful assistant." },
+    { role: "user", content: "Tell me a joke." },
+  ]);
+  for await (const res of result) {
+    process.stdout.write(res);
+  }
+})();
@@ -1,9 +1,9 @@
 import {
  Document,
-  ListIndex,
-  ListRetrieverMode,
-  serviceContextFromDefaults,
  SimpleNodeParser,
+  SummaryIndex,
+  SummaryRetrieverMode,
+  serviceContextFromDefaults,
 } from "llamaindex";
 import essay from "./essay";

@@ -14,9 +14,11 @@ async function main() {
    }),
  });
  const document = new Document({ text: essay, id_: "essay" });
-  const index = await ListIndex.fromDocuments([document], { serviceContext });
+  const index = await SummaryIndex.fromDocuments([document], {
+    serviceContext,
+  });
  const queryEngine = index.asQueryEngine({
-    retriever: index.asRetriever({ mode: ListRetrieverMode.LLM }),
+    retriever: index.asRetriever({ mode: SummaryRetrieverMode.LLM }),
  });
  const response = await queryEngine.query(
    "What did the author do growing up?",
@@ -2,7 +2,10 @@ import fs from "node:fs/promises";

 import {
  Anthropic,
+  anthropicTextQaPrompt,
+  CompactAndRefine,
  Document,
+  ResponseSynthesizer,
  serviceContextFromDefaults,
  VectorStoreIndex,
 } from "llamaindex";
@@ -18,12 +21,20 @@ async function main() {

  // Split text and create embeddings. Store them in a VectorStoreIndex
  const serviceContext = serviceContextFromDefaults({ llm: new Anthropic() });
+
+  const responseSynthesizer = new ResponseSynthesizer({
+    responseBuilder: new CompactAndRefine(
+      serviceContext,
+      anthropicTextQaPrompt,
+    ),
+  });
+
  const index = await VectorStoreIndex.fromDocuments([document], {
    serviceContext,
  });

  // Query the index
-  const queryEngine = index.asQueryEngine();
+  const queryEngine = index.asQueryEngine({ responseSynthesizer });
  const response = await queryEngine.query(
    "What did the author do in college?",
  );
@@ -3,6 +3,7 @@ import {
  OpenAI,
  RetrieverQueryEngine,
  serviceContextFromDefaults,
+  SimilarityPostprocessor,
  VectorStoreIndex,
 } from "llamaindex";
 import essay from "./essay";
@@ -12,7 +13,7 @@ async function main() {
  const document = new Document({ text: essay, id_: "essay" });

  const serviceContext = serviceContextFromDefaults({
-    llm: new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.0 }),
+    llm: new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 }),
  });

  const index = await VectorStoreIndex.fromDocuments([document], {
@@ -21,8 +22,16 @@ async function main() {

  const retriever = index.asRetriever();
  retriever.similarityTopK = 5;
+  const nodePostprocessor = new SimilarityPostprocessor({
+    similarityCutoff: 0.7,
+  });
  // TODO: cannot pass responseSynthesizer into retriever query engine
-  const queryEngine = new RetrieverQueryEngine(retriever);
+  const queryEngine = new RetrieverQueryEngine(
+    retriever,
+    undefined,
+    undefined,
+    [nodePostprocessor],
+  );

  const response = await queryEngine.query(
    "What did the author do growing up?",
@@ -0,0 +1,197 @@
+import {
+  OpenAI,
+  ResponseSynthesizer,
+  RetrieverQueryEngine,
+  serviceContextFromDefaults,
+  TextNode,
+  TreeSummarize,
+  VectorIndexRetriever,
+  VectorStore,
+  VectorStoreIndex,
+  VectorStoreQuery,
+  VectorStoreQueryResult,
+} from "llamaindex";
+
+import { Index, Pinecone, RecordMetadata } from "@pinecone-database/pinecone";
+
+/**
+ * Please do not use this class in production; it's only for demonstration purposes.
+ */
+class PineconeVectorStore<T extends RecordMetadata = RecordMetadata>
+  implements VectorStore
+{
+  storesText = true;
+  isEmbeddingQuery = false;
+
+  indexName!: string;
+  pineconeClient!: Pinecone;
+  index!: Index<T>;
+
+  constructor({ indexName, client }: { indexName: string; client: Pinecone }) {
+    this.indexName = indexName;
+    this.pineconeClient = client;
+    this.index = client.index<T>(indexName);
+  }
+
+  client() {
+    return this.pineconeClient;
+  }
+
+  async query(
+    query: VectorStoreQuery,
+    kwargs?: any,
+  ): Promise<VectorStoreQueryResult> {
+    let queryEmbedding: number[] = [];
+    if (query.queryEmbedding) {
+      if (typeof query.alpha === "number") {
+        const alpha = query.alpha;
+        queryEmbedding = query.queryEmbedding.map((v) => v * alpha);
+      } else {
+        queryEmbedding = query.queryEmbedding;
+      }
+    }
+
+    // Current LlamaIndexTS implementation only support exact match filter, so we use kwargs instead.
+    const filter = kwargs?.filter || {};
+
+    const response = await this.index.query({
+      filter,
+      vector: queryEmbedding,
+      topK: query.similarityTopK,
+      includeValues: true,
+      includeMetadata: true,
+    });
+
+    console.log(
+      `Numbers of vectors returned by Pinecone after preFilters are applied: ${
+        response?.matches?.length || 0
+      }.`,
+    );
+
+    const topKIds: string[] = [];
+    const topKNodes: TextNode[] = [];
+    const topKScores: number[] = [];
+
+    const metadataToNode = (metadata?: T): Partial<TextNode> => {
+      if (!metadata) {
+        throw new Error("metadata is undefined.");
+      }
+
+      const nodeContent = metadata["_node_content"];
+      if (!nodeContent) {
+        throw new Error("nodeContent is undefined.");
+      }
+
+      if (typeof nodeContent !== "string") {
+        throw new Error("nodeContent is not a string.");
+      }
+
+      return JSON.parse(nodeContent);
+    };
+
+    if (response.matches) {
+      for (const match of response.matches) {
+        const node = new TextNode({
+          ...metadataToNode(match.metadata),
+          embedding: match.values,
+        });
+
+        topKIds.push(match.id);
+        topKNodes.push(node);
+        topKScores.push(match.score ?? 0);
+      }
+    }
+
+    const result = {
+      ids: topKIds,
+      nodes: topKNodes,
+      similarities: topKScores,
+    };
+
+    return result;
+  }
+
+  add(): Promise<string[]> {
+    return Promise.resolve([]);
+  }
+
+  delete(): Promise<void> {
+    throw new Error("Method `delete` not implemented.");
+  }
+
+  persist(): Promise<void> {
+    throw new Error("Method `persist` not implemented.");
+  }
+}
+
+/**
+ * The goal of this example is to show how to use Pinecone as a vector store
+ * for LlamaIndexTS with(out) preFilters.
+ *
+ * It should not be used in production like that,
+ * as you might want to find a proper PineconeVectorStore implementation.
+ */
+async function main() {
+  process.env.PINECONE_API_KEY = "Your Pinecone API Key.";
+  process.env.PINECONE_ENVIRONMENT = "Your Pinecone Environment.";
+  process.env.PINECONE_PROJECT_ID = "Your Pinecone Project ID.";
+  process.env.PINECONE_INDEX_NAME = "Your Pinecone Index Name.";
+  process.env.OPENAI_API_KEY = "Your OpenAI API Key.";
+  process.env.OPENAI_API_ORGANIZATION = "Your OpenAI API Organization.";
+
+  const getPineconeVectorStore = async () => {
+    return new PineconeVectorStore({
+      indexName: process.env.PINECONE_INDEX_NAME || "index-name",
+      client: new Pinecone(),
+    });
+  };
+
+  const getServiceContext = () => {
+    const openAI = new OpenAI({
+      model: "gpt-4",
+      apiKey: process.env.OPENAI_API_KEY,
+    });
+
+    return serviceContextFromDefaults({
+      llm: openAI,
+    });
+  };
+
+  const getQueryEngine = async (filter: unknown) => {
+    const vectorStore = await getPineconeVectorStore();
+    const serviceContext = getServiceContext();
+
+    const vectorStoreIndex = await VectorStoreIndex.fromVectorStore(
+      vectorStore,
+      serviceContext,
+    );
+
+    const retriever = new VectorIndexRetriever({
+      index: vectorStoreIndex,
+      similarityTopK: 500,
+    });
+
+    const responseSynthesizer = new ResponseSynthesizer({
+      serviceContext,
+      responseBuilder: new TreeSummarize(serviceContext),
+    });
+
+    return new RetrieverQueryEngine(retriever, responseSynthesizer, {
+      filter,
+    });
+  };
+
+  // whatever is a key from your metadata
+  const queryEngine = await getQueryEngine({
+    whatever: {
+      $gte: 1,
+      $lte: 100,
+    },
+  });
+
+  const response = await queryEngine.query("How many results do you have?");
+
+  console.log(response.toString());
+}
+
+main().catch(console.error);
@@ -0,0 +1,15 @@
+import { OpenAI } from "llamaindex";
+
+(async () => {
+  const llm = new OpenAI({ model: "gpt-4-vision-preview", temperature: 0.1 });
+
+  // complete api
+  const response1 = await llm.complete("How are you?");
+  console.log(response1.message.content);
+
+  // chat api
+  const response2 = await llm.chat([
+    { content: "Tell me a joke!", role: "user" },
+  ]);
+  console.log(response2.message.content);
+})();
@@ -4,8 +4,6 @@ import { Anthropic } from "llamaindex";
  const anthropic = new Anthropic();
  const result = await anthropic.chat([
    { content: "You want to talk in rhymes.", role: "system" },
-    { content: "Hello, world!", role: "user" },
-    { content: "Hello!", role: "assistant" },
    {
      content:
        "How much wood would a woodchuck chuck if a woodchuck could chuck wood?",
@@ -0,0 +1,33 @@
+import { ClipEmbedding, similarity, SimilarityType } from "llamaindex";
+
+async function main() {
+  const clip = new ClipEmbedding();
+
+  // Get text embeddings
+  const text1 = "a car";
+  const textEmbedding1 = await clip.getTextEmbedding(text1);
+  const text2 = "a football match";
+  const textEmbedding2 = await clip.getTextEmbedding(text2);
+
+  // Get image embedding
+  const image =
+    "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/football-match.jpg";
+  const imageEmbedding = await clip.getImageEmbedding(image);
+
+  // Calc similarity
+  const sim1 = similarity(
+    textEmbedding1,
+    imageEmbedding,
+    SimilarityType.DEFAULT,
+  );
+  const sim2 = similarity(
+    textEmbedding2,
+    imageEmbedding,
+    SimilarityType.DEFAULT,
+  );
+
+  console.log(`Similarity between "${text1}" and the image is ${sim1}`);
+  console.log(`Similarity between "${text2}" and the image is ${sim2}`);
+}
+
+main();
@@ -4,7 +4,6 @@ import {
  PapaCSVReader,
  ResponseSynthesizer,
  serviceContextFromDefaults,
-  SimplePrompt,
  VectorStoreIndex,
 } from "llamaindex";

@@ -23,9 +22,7 @@ async function main() {
    serviceContext,
  });

-  const csvPrompt: SimplePrompt = (input) => {
-    const { context = "", query = "" } = input;
-
+  const csvPrompt = ({ context = "", query = "" }) => {
    return `The following CSV file is loaded from ${path}
 \`\`\`csv
 ${context}
@@ -0,0 +1,24 @@
+import { SimpleDirectoryReader } from "llamaindex";
+
+function callback(
+  category: string,
+  name: string,
+  status: any,
+  message?: string,
+): boolean {
+  console.log(category, name, status, message);
+  if (name.endsWith(".pdf")) {
+    console.log("I DON'T WANT PDF FILES!");
+    return false;
+  }
+  return true;
+}
+
+async function main() {
+  // Load page
+  const reader = new SimpleDirectoryReader(callback);
+  const params = { directoryPath: "./data" };
+  await reader.loadData(params);
+}
+
+main().catch(console.error);
@@ -0,0 +1,21 @@
+import { HTMLReader, VectorStoreIndex } from "llamaindex";
+
+async function main() {
+  // Load page
+  const reader = new HTMLReader();
+  const documents = await reader.loadData("data/18-1_Changelog.html");
+
+  // Split text and create embeddings. Store them in a VectorStoreIndex
+  const index = await VectorStoreIndex.fromDocuments(documents);
+
+  // Query the index
+  const queryEngine = index.asQueryEngine();
+  const response = await queryEngine.query(
+    "What were the notable changes in 18.1?",
+  );
+
+  // Output response
+  console.log(response.toString());
+}
+
+main().catch(console.error);
@@ -0,0 +1,32 @@
+import {
+  Document,
+  KeywordTableIndex,
+  KeywordTableRetrieverMode,
+} from "llamaindex";
+import essay from "./essay";
+
+async function main() {
+  const document = new Document({ text: essay, id_: "essay" });
+  const index = await KeywordTableIndex.fromDocuments([document]);
+
+  const allModes: KeywordTableRetrieverMode[] = [
+    KeywordTableRetrieverMode.DEFAULT,
+    KeywordTableRetrieverMode.SIMPLE,
+    KeywordTableRetrieverMode.RAKE,
+  ];
+  allModes.forEach(async (mode) => {
+    const queryEngine = index.asQueryEngine({
+      retriever: index.asRetriever({
+        mode,
+      }),
+    });
+    const response = await queryEngine.query(
+      "What did the author do growing up?",
+    );
+    console.log(response.toString());
+  });
+}
+
+main().catch((e: Error) => {
+  console.error(e, e.stack);
+});
@@ -1,9 +1,9 @@
 import {
  Document,
-  TextNode,
  NodeWithScore,
  ResponseSynthesizer,
  SimpleNodeParser,
+  TextNode,
 } from "llamaindex";

 (async () => {
@@ -29,7 +29,7 @@ import {

  const response = await responseSynthesizer.synthesize(
    "What age am I?",
-    nodesWithScore
+    nodesWithScore,
  );
  console.log(response.response);
 })();
@@ -0,0 +1,20 @@
+import { MarkdownReader, VectorStoreIndex } from "llamaindex";
+
+async function main() {
+  // Load Markdown file
+  const reader = new MarkdownReader();
+  const documents = await reader.loadData("node_modules/llamaindex/README.md");
+
+  // Split text and create embeddings. Store them in a VectorStoreIndex
+  const index = await VectorStoreIndex.fromDocuments(documents);
+
+  // Query the index
+  const queryEngine = index.asQueryEngine();
+
+  const response = await queryEngine.query("What does the example code do?");
+
+  // Output response
+  console.log(response.toString());
+}
+
+main().catch(console.error);
@@ -0,0 +1,68 @@
+import { MongoClient } from "mongodb";
+import { Document } from "../../packages/core/src/Node";
+import { VectorStoreIndex } from "../../packages/core/src/indices";
+import { SimpleMongoReader } from "../../packages/core/src/readers/SimpleMongoReader";
+
+import { stdin as input, stdout as output } from "node:process";
+import readline from "node:readline/promises";
+
+async function main() {
+  //Dummy test code
+  const query: object = { _id: "waldo" };
+  const options: object = {};
+  const projections: object = { embedding: 0 };
+  const limit: number = Infinity;
+  const uri: string = process.env.MONGODB_URI ?? "fake_uri";
+  const client: MongoClient = new MongoClient(uri);
+
+  //Where the real code starts
+  const MR = new SimpleMongoReader(client);
+  const documents: Document[] = await MR.loadData(
+    "data",
+    "posts",
+    1,
+    {},
+    options,
+    projections,
+  );
+
+  //
+  //If you need to look at low-level details of
+  // a queryEngine (for example, needing to check each individual node)
+  //
+
+  // Split text and create embeddings. Store them in a VectorStoreIndex
+  // var storageContext = await storageContextFromDefaults({});
+  // var serviceContext = serviceContextFromDefaults({});
+  // const docStore = storageContext.docStore;
+
+  // for (const doc of documents) {
+  //   docStore.setDocumentHash(doc.id_, doc.hash);
+  // }
+  // const nodes = serviceContext.nodeParser.getNodesFromDocuments(documents);
+  // console.log(nodes);
+
+  //
+  //Making Vector Store from documents
+  //
+
+  const index = await VectorStoreIndex.fromDocuments(documents);
+  // Create query engine
+  const queryEngine = index.asQueryEngine();
+
+  const rl = readline.createInterface({ input, output });
+  while (true) {
+    const query = await rl.question("Query: ");
+
+    if (!query) {
+      break;
+    }
+
+    const response = await queryEngine.query(query);
+
+    // Output response
+    console.log(response.toString());
+  }
+}
+
+main();
@@ -0,0 +1,89 @@
+import { Client } from "@notionhq/client";
+import { program } from "commander";
+import { NotionReader, VectorStoreIndex } from "llamaindex";
+import { stdin as input, stdout as output } from "node:process";
+// readline/promises is still experimental so not in @types/node yet
+// @ts-ignore
+import readline from "node:readline/promises";
+
+program
+  .argument("[page]", "Notion page id (must be provided)")
+  .action(async (page, _options, command) => {
+    // Initializing a client
+
+    if (!process.env.NOTION_TOKEN) {
+      console.log(
+        "No NOTION_TOKEN found in environment variables. You will need to register an integration https://www.notion.com/my-integrations and put it in your NOTION_TOKEN environment variable.",
+      );
+      return;
+    }
+
+    const notion = new Client({
+      auth: process.env.NOTION_TOKEN,
+    });
+
+    if (!page) {
+      const response = await notion.search({
+        filter: {
+          value: "page",
+          property: "object",
+        },
+        sort: {
+          direction: "descending",
+          timestamp: "last_edited_time",
+        },
+      });
+
+      const { results } = response;
+
+      if (results.length === 0) {
+        console.log(
+          "No pages found. You will need to share it with your integration. (tap the three dots on the top right, find Add connections, and add your integration)",
+        );
+        return;
+      } else {
+        const pages = results
+          .map((result) => {
+            if (!("url" in result)) {
+              return null;
+            }
+
+            return {
+              id: result.id,
+              url: result.url,
+            };
+          })
+          .filter((page) => page !== null);
+        console.log("Found pages:");
+        console.table(pages);
+        console.log(`To run, run ts-node ${command.name()} [page id]`);
+        return;
+      }
+    }
+
+    const reader = new NotionReader({ client: notion });
+    const documents = await reader.loadData(page);
+    console.log(documents);
+
+    // Split text and create embeddings. Store them in a VectorStoreIndex
+    const index = await VectorStoreIndex.fromDocuments(documents);
+
+    // Create query engine
+    const queryEngine = index.asQueryEngine();
+
+    const rl = readline.createInterface({ input, output });
+    while (true) {
+      const query = await rl.question("Query: ");
+
+      if (!query) {
+        break;
+      }
+
+      const response = await queryEngine.query(query);
+
+      // Output response
+      console.log(response.toString());
+    }
+  });
+
+program.parse();
@@ -1,13 +1,15 @@
 import { OpenAI } from "llamaindex";

 (async () => {
-  const llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.0 });
-  
+  const llm = new OpenAI({ model: "gpt-4-1106-preview", temperature: 0.1 });
+
  // complete api
  const response1 = await llm.complete("How are you?");
  console.log(response1.message.content);

  // chat api
-  const response2 = await llm.chat([{ content: "Tell me a joke!", role: "user" }]);
+  const response2 = await llm.chat([
+    { content: "Tell me a joke.", role: "user" },
+  ]);
  console.log(response2.message.content);
 })();
@@ -0,0 +1,23 @@
+import { Portkey } from "llamaindex";
+
+(async () => {
+  const llms = [{}];
+  const portkey = new Portkey({
+    mode: "single",
+    llms: [
+      {
+        provider: "anyscale",
+        virtual_key: "anyscale-3b3c04",
+        model: "meta-llama/Llama-2-13b-chat-hf",
+        max_tokens: 2000,
+      },
+    ],
+  });
+  const result = portkey.stream_chat([
+    { role: "system", content: "You are a helpful assistant." },
+    { role: "user", content: "Tell me a joke." },
+  ]);
+  for await (const res of result) {
+    process.stdout.write(res);
+  }
+})();
@@ -0,0 +1,37 @@
+import { execSync } from "child_process";
+import {
+  PDFReader,
+  serviceContextFromDefaults,
+  storageContextFromDefaults,
+  VectorStoreIndex,
+} from "llamaindex";
+
+const STORAGE_DIR = "./cache";
+
+async function main() {
+  // write the index to disk
+  const serviceContext = serviceContextFromDefaults({});
+  const storageContext = await storageContextFromDefaults({
+    persistDir: `${STORAGE_DIR}`,
+  });
+  const reader = new PDFReader();
+  const documents = await reader.loadData("data/brk-2022.pdf");
+  await VectorStoreIndex.fromDocuments(documents, {
+    storageContext,
+    serviceContext,
+  });
+  console.log("wrote index to disk - now trying to read it");
+  // make index dir read only
+  execSync(`chmod -R 555 ${STORAGE_DIR}`);
+  // reopen index
+  const readOnlyStorageContext = await storageContextFromDefaults({
+    persistDir: `${STORAGE_DIR}`,
+  });
+  await VectorStoreIndex.init({
+    storageContext: readOnlyStorageContext,
+    serviceContext,
+  });
+  console.log("read only index successfully opened");
+}
+
+main().catch(console.error);
@@ -1,9 +1,9 @@
 import {
  Document,
-  ListIndex,
-  ListRetrieverMode,
-  serviceContextFromDefaults,
  SimpleNodeParser,
+  SummaryIndex,
+  SummaryRetrieverMode,
+  serviceContextFromDefaults,
 } from "llamaindex";
 import essay from "./essay";

@@ -14,9 +14,11 @@ async function main() {
    }),
  });
  const document = new Document({ text: essay, id_: "essay" });
-  const index = await ListIndex.fromDocuments([document], { serviceContext });
+  const index = await SummaryIndex.fromDocuments([document], {
+    serviceContext,
+  });
  const queryEngine = index.asQueryEngine({
-    retriever: index.asRetriever({ mode: ListRetrieverMode.LLM }),
+    retriever: index.asRetriever({ mode: SummaryRetrieverMode.LLM }),
  });
  const response = await queryEngine.query(
    "What did the author do growing up?",
@@ -2,7 +2,10 @@ import fs from "node:fs/promises";

 import {
  Anthropic,
+  anthropicTextQaPrompt,
+  CompactAndRefine,
  Document,
+  ResponseSynthesizer,
  serviceContextFromDefaults,
  VectorStoreIndex,
 } from "llamaindex";
@@ -18,12 +21,20 @@ async function main() {

  // Split text and create embeddings. Store them in a VectorStoreIndex
  const serviceContext = serviceContextFromDefaults({ llm: new Anthropic() });
+
+  const responseSynthesizer = new ResponseSynthesizer({
+    responseBuilder: new CompactAndRefine(
+      serviceContext,
+      anthropicTextQaPrompt,
+    ),
+  });
+
  const index = await VectorStoreIndex.fromDocuments([document], {
    serviceContext,
  });

  // Query the index
-  const queryEngine = index.asQueryEngine();
+  const queryEngine = index.asQueryEngine({ responseSynthesizer });
  const response = await queryEngine.query(
    "What did the author do in college?",
  );
@@ -3,6 +3,7 @@ import {
  OpenAI,
  RetrieverQueryEngine,
  serviceContextFromDefaults,
+  SimilarityPostprocessor,
  VectorStoreIndex,
 } from "llamaindex";
 import essay from "./essay";
@@ -12,7 +13,7 @@ async function main() {
  const document = new Document({ text: essay, id_: "essay" });

  const serviceContext = serviceContextFromDefaults({
-    llm: new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.0 }),
+    llm: new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 }),
  });

  const index = await VectorStoreIndex.fromDocuments([document], {
@@ -21,8 +22,16 @@ async function main() {

  const retriever = index.asRetriever();
  retriever.similarityTopK = 5;
+  const nodePostprocessor = new SimilarityPostprocessor({
+    similarityCutoff: 0.7,
+  });
  // TODO: cannot pass responseSynthesizer into retriever query engine
-  const queryEngine = new RetrieverQueryEngine(retriever);
+  const queryEngine = new RetrieverQueryEngine(
+    retriever,
+    undefined,
+    undefined,
+    [nodePostprocessor],
+  );

  const response = await queryEngine.query(
    "What did the author do growing up?",
@@ -0,0 +1,197 @@
+import {
+  OpenAI,
+  ResponseSynthesizer,
+  RetrieverQueryEngine,
+  serviceContextFromDefaults,
+  TextNode,
+  TreeSummarize,
+  VectorIndexRetriever,
+  VectorStore,
+  VectorStoreIndex,
+  VectorStoreQuery,
+  VectorStoreQueryResult,
+} from "llamaindex";
+
+import { Index, Pinecone, RecordMetadata } from "@pinecone-database/pinecone";
+
+/**
+ * Please do not use this class in production; it's only for demonstration purposes.
+ */
+class PineconeVectorStore<T extends RecordMetadata = RecordMetadata>
+  implements VectorStore
+{
+  storesText = true;
+  isEmbeddingQuery = false;
+
+  indexName!: string;
+  pineconeClient!: Pinecone;
+  index!: Index<T>;
+
+  constructor({ indexName, client }: { indexName: string; client: Pinecone }) {
+    this.indexName = indexName;
+    this.pineconeClient = client;
+    this.index = client.index<T>(indexName);
+  }
+
+  client() {
+    return this.pineconeClient;
+  }
+
+  async query(
+    query: VectorStoreQuery,
+    kwargs?: any,
+  ): Promise<VectorStoreQueryResult> {
+    let queryEmbedding: number[] = [];
+    if (query.queryEmbedding) {
+      if (typeof query.alpha === "number") {
+        const alpha = query.alpha;
+        queryEmbedding = query.queryEmbedding.map((v) => v * alpha);
+      } else {
+        queryEmbedding = query.queryEmbedding;
+      }
+    }
+
+    // Current LlamaIndexTS implementation only support exact match filter, so we use kwargs instead.
+    const filter = kwargs?.filter || {};
+
+    const response = await this.index.query({
+      filter,
+      vector: queryEmbedding,
+      topK: query.similarityTopK,
+      includeValues: true,
+      includeMetadata: true,
+    });
+
+    console.log(
+      `Numbers of vectors returned by Pinecone after preFilters are applied: ${
+        response?.matches?.length || 0
+      }.`,
+    );
+
+    const topKIds: string[] = [];
+    const topKNodes: TextNode[] = [];
+    const topKScores: number[] = [];
+
+    const metadataToNode = (metadata?: T): Partial<TextNode> => {
+      if (!metadata) {
+        throw new Error("metadata is undefined.");
+      }
+
+      const nodeContent = metadata["_node_content"];
+      if (!nodeContent) {
+        throw new Error("nodeContent is undefined.");
+      }
+
+      if (typeof nodeContent !== "string") {
+        throw new Error("nodeContent is not a string.");
+      }
+
+      return JSON.parse(nodeContent);
+    };
+
+    if (response.matches) {
+      for (const match of response.matches) {
+        const node = new TextNode({
+          ...metadataToNode(match.metadata),
+          embedding: match.values,
+        });
+
+        topKIds.push(match.id);
+        topKNodes.push(node);
+        topKScores.push(match.score ?? 0);
+      }
+    }
+
+    const result = {
+      ids: topKIds,
+      nodes: topKNodes,
+      similarities: topKScores,
+    };
+
+    return result;
+  }
+
+  add(): Promise<string[]> {
+    return Promise.resolve([]);
+  }
+
+  delete(): Promise<void> {
+    throw new Error("Method `delete` not implemented.");
+  }
+
+  persist(): Promise<void> {
+    throw new Error("Method `persist` not implemented.");
+  }
+}
+
+/**
+ * The goal of this example is to show how to use Pinecone as a vector store
+ * for LlamaIndexTS with(out) preFilters.
+ *
+ * It should not be used in production like that,
+ * as you might want to find a proper PineconeVectorStore implementation.
+ */
+async function main() {
+  process.env.PINECONE_API_KEY = "Your Pinecone API Key.";
+  process.env.PINECONE_ENVIRONMENT = "Your Pinecone Environment.";
+  process.env.PINECONE_PROJECT_ID = "Your Pinecone Project ID.";
+  process.env.PINECONE_INDEX_NAME = "Your Pinecone Index Name.";
+  process.env.OPENAI_API_KEY = "Your OpenAI API Key.";
+  process.env.OPENAI_API_ORGANIZATION = "Your OpenAI API Organization.";
+
+  const getPineconeVectorStore = async () => {
+    return new PineconeVectorStore({
+      indexName: process.env.PINECONE_INDEX_NAME || "index-name",
+      client: new Pinecone(),
+    });
+  };
+
+  const getServiceContext = () => {
+    const openAI = new OpenAI({
+      model: "gpt-4",
+      apiKey: process.env.OPENAI_API_KEY,
+    });
+
+    return serviceContextFromDefaults({
+      llm: openAI,
+    });
+  };
+
+  const getQueryEngine = async (filter: unknown) => {
+    const vectorStore = await getPineconeVectorStore();
+    const serviceContext = getServiceContext();
+
+    const vectorStoreIndex = await VectorStoreIndex.fromVectorStore(
+      vectorStore,
+      serviceContext,
+    );
+
+    const retriever = new VectorIndexRetriever({
+      index: vectorStoreIndex,
+      similarityTopK: 500,
+    });
+
+    const responseSynthesizer = new ResponseSynthesizer({
+      serviceContext,
+      responseBuilder: new TreeSummarize(serviceContext),
+    });
+
+    return new RetrieverQueryEngine(retriever, responseSynthesizer, {
+      filter,
+    });
+  };
+
+  // whatever is a key from your metadata
+  const queryEngine = await getQueryEngine({
+    whatever: {
+      $gte: 1,
+      $lte: 100,
+    },
+  });
+
+  const response = await queryEngine.query("How many results do you have?");
+
+  console.log(response.toString());
+}
+
+main().catch(console.error);
@@ -0,0 +1,15 @@
+import { OpenAI } from "llamaindex";
+
+(async () => {
+  const llm = new OpenAI({ model: "gpt-4-vision-preview", temperature: 0.1 });
+
+  // complete api
+  const response1 = await llm.complete("How are you?");
+  console.log(response1.message.content);
+
+  // chat api
+  const response2 = await llm.chat([
+    { content: "Tell me a joke!", role: "user" },
+  ]);
+  console.log(response2.message.content);
+})();
@@ -3,7 +3,7 @@
  "scripts": {
    "build": "turbo run build",
    "dev": "turbo run dev",
-    "format": "prettier --write \"**/*.{ts,tsx,md}\"",
+    "format": "prettier --write \"**/*.{js,jsx,ts,tsx,md}\"",
    "lint": "turbo run lint",
    "prepare": "husky install",
    "test": "turbo run test",
@@ -11,24 +11,27 @@
    "publish-snapshot": "turbo run build lint test && changeset version --snapshot && changeset publish"
  },
  "devDependencies": {
-    "@turbo/gen": "^1.10.12",
-    "@types/jest": "^29.5.3",
-    "eslint": "^7.32.0",
+    "@changesets/cli": "^2.26.2",
+    "@turbo/gen": "^1.10.16",
+    "@types/jest": "^29.5.10",
+    "eslint": "^8.54.0",
    "eslint-config-custom": "workspace:*",
    "husky": "^8.0.3",
-    "jest": "^29.6.2",
-    "prettier": "^3.0.2",
-    "prettier-plugin-organize-imports": "^3.2.3",
+    "jest": "^29.7.0",
+    "lint-staged": "^15.1.0",
+    "prettier": "^3.1.0",
+    "prettier-plugin-organize-imports": "^3.2.4",
    "ts-jest": "^29.1.1",
-    "turbo": "^1.10.12"
-  },
-  "packageManager": "pnpm@7.15.0",
-  "dependencies": {
-    "@changesets/cli": "^2.26.2"
+    "turbo": "^1.10.16"
  },
+  "packageManager": "pnpm@8.10.5+sha256.a4bd9bb7b48214bbfcd95f264bd75bb70d100e5d4b58808f5cd6ab40c6ac21c5",
  "pnpm": {
    "overrides": {
-      "trim": "1.0.1"
+      "trim": "1.0.1",
+      "@babel/traverse": "7.23.2"
    }
+  },
+  "lint-staged": {
+    "*.{js,jsx,ts,tsx,md}": "prettier --write"
  }
 }
@@ -1,5 +1,135 @@
 # llamaindex

+## 0.0.37
+
+### Patch Changes
+
+- 3bab231: Fixed errors (#225 and #226) Thanks @marcusschiesser
+
+## 0.0.36
+
+### Patch Changes
+
+- Support for Claude 2.1
+- Add AssemblyAI integration (thanks @Swimburger)
+- Use cryptoJS (thanks @marcusschiesser)
+- Add PGVectorStore (thanks @mtutty)
+- Add CLIP embeddings (thanks @marcusschiesser)
+- Add MongoDB support (thanks @marcusschiesser)
+
+## 0.0.35
+
+### Patch Changes
+
+- 63f2108: Add multimodal support (thanks @marcusschiesser)
+
+## 0.0.34
+
+### Patch Changes
+
+- 2a27e21: Add support for gpt-3.5-turbo-1106
+
+## 0.0.33
+
+### Patch Changes
+
+- 5e2e92c: gpt-4-1106-preview and gpt-4-vision-preview from OpenAI dev day
+
+## 0.0.32
+
+### Patch Changes
+
+- 90c0b83: Add HTMLReader (thanks @mtutty)
+- dfd22aa: Add observer/filter to the SimpleDirectoryReader (thanks @mtutty)
+
+## 0.0.31
+
+### Patch Changes
+
+- 6c55b2d: Give HistoryChatEngine pluggable options (thanks @marcusschiesser)
+- 8aa8c65: Add SimilarityPostProcessor (thanks @TomPenguin)
+- 6c55b2d: Added LLMMetadata (thanks @marcusschiesser)
+
+## 0.0.30
+
+### Patch Changes
+
+- 139abad: Streaming improvements including Anthropic (thanks @kkang2097)
+- 139abad: Portkey integration (Thank you @noble-varghese)
+- eb0e994: Add export for PromptHelper (thanks @zigamall)
+- eb0e994: Publish ESM module again
+- 139abad: Pinecone demo (thanks @Einsenhorn)
+
+## 0.0.29
+
+### Patch Changes
+
+- a52143b: Added DocxReader for Word documents (thanks @jayantasamaddar)
+- 1b7fd95: Updated OpenAI streaming (thanks @kkang2097)
+- 0db3f41: Migrated to Tiktoken lite, which hopefully fixes the Windows issue
+
+## 0.0.28
+
+### Patch Changes
+
+- 96bb657: Typesafe metadata (thanks @TomPenguin)
+- 96bb657: MongoReader (thanks @kkang2097)
+- 837854d: Make OutputParser less strict and add tests (Thanks @kkang2097)
+
+## 0.0.27
+
+### Patch Changes
+
+- 4a5591b: Chat History summarization (thanks @marcusschiesser)
+- 4a5591b: Notion database support (thanks @TomPenguin)
+- 4a5591b: KeywordIndex (thanks @swk777)
+
+## 0.0.26
+
+### Patch Changes
+
+- 5bb55bc: Add notion loader (thank you @TomPenguin!)
+
+## 0.0.25
+
+### Patch Changes
+
+- e21eca2: OpenAI 4.3.1 and Anthropic 0.6.2
+- 40a8f07: Update READMEs (thanks @andfk)
+- 40a8f07: Bug: missing exports from storage (thanks @aashutoshrathi)
+
+## 0.0.24
+
+### Patch Changes
+
+- e4af7b3: Renamed ListIndex to SummaryIndex to better indicate its use.
+- 259fe63: Strong types for prompts.
+
+## 0.0.23
+
+### Patch Changes
+
+- Added MetadataMode to ResponseSynthesizer (thanks @TomPenguin)
+- 9d6b2ed: Added Markdown Reader (huge shoutout to @swk777)
+
+## 0.0.22
+
+### Patch Changes
+
+- 454f3f8: CJK sentence splitting (thanks @TomPenguin)
+- 454f3f8: Export options for Windows formatted text files
+- 454f3f8: Disable long sentence splitting by default
+- 454f3f8: Make sentence splitter not split on decimals.
+- 99df58f: Anthropic 0.6.1 and OpenAI 4.2.0. Changed Anthropic timeout back to 60s
+
+## 0.0.21
+
+### Patch Changes
+
+- f7a57ca: Fixed metadata deserialization (thanks @marcagve)
+- 0a09de2: Update to OpenAI 4.1.0
+- f7a57ca: ChatGPT optimized prompts (thanks @LoganMarkewich)
+
 ## 0.0.20

 ### Patch Changes
@@ -20,7 +20,7 @@ In a new folder:
 export OPENAI_API_KEY="sk-......" # Replace with your key from https://platform.openai.com/account/api-keys
 pnpm init
 pnpm install typescript
-pnpm exec tsc –-init # if needed
+pnpm exec tsc --init # if needed
 pnpm install llamaindex
 pnpm install @types/node
 ```
@@ -36,7 +36,7 @@ async function main() {
  // Load essay from abramov.txt in Node
  const essay = await fs.readFile(
    "node_modules/llamaindex/examples/abramov.txt",
-    "utf-8"
+    "utf-8",
  );

  // Create Document object with essay
@@ -48,7 +48,7 @@ async function main() {
  // Query the index
  const queryEngine = index.asQueryEngine();
  const response = await queryEngine.query(
-    "What did the author do in college?"
+    "What did the author do in college?",
  );

  // Output response
@@ -61,7 +61,7 @@ main();
 Then you can run it using

 ```bash
-pnpm dlx ts-node example.ts
+pnpx ts-node example.ts
 ```

 ## Playground
@@ -1,34 +1,53 @@
 {
  "name": "llamaindex",
-  "version": "0.0.20",
+  "version": "0.0.37",
+  "license": "MIT",
  "dependencies": {
-    "@anthropic-ai/sdk": "^0.6.0",
+    "@anthropic-ai/sdk": "^0.9.1",
+    "@notionhq/client": "^2.2.13",
+    "@xenova/transformers": "^2.8.0",
+    "crypto-js": "^4.2.0",
+    "js-tiktoken": "^1.0.8",
    "lodash": "^4.17.21",
-    "openai": "^4.0.1",
+    "mammoth": "^1.6.0",
+    "md-utils-ts": "^2.0.0",
+    "mongodb": "^6.3.0",
+    "notion-md-crawler": "^0.0.2",
+    "openai": "^4.19.1",
    "papaparse": "^5.4.1",
    "pdf-parse": "^1.1.1",
-    "replicate": "^0.16.1",
-    "tiktoken-node": "^0.0.6",
-    "uuid": "^9.0.0",
+    "pg": "^8.11.3",
+    "pgvector": "^0.1.5",
+    "portkey-ai": "^0.1.16",
+    "rake-modified": "^1.0.8",
+    "replicate": "^0.21.1",
+    "string-strip-html": "^13.4.3",
+    "uuid": "^9.0.1",
    "wink-nlp": "^1.14.3"
  },
  "devDependencies": {
-    "@types/lodash": "^4.14.197",
-    "@types/node": "^18.17.6",
-    "@types/papaparse": "^5.3.7",
-    "@types/pdf-parse": "^1.1.1",
-    "@types/uuid": "^9.0.2",
+    "@types/crypto-js": "^4.2.1",
+    "@types/lodash": "^4.14.202",
+    "@types/node": "^18.18.12",
+    "@types/papaparse": "^5.3.13",
+    "@types/pdf-parse": "^1.1.4",
+    "@types/pg": "^8.10.7",
+    "@types/uuid": "^9.0.7",
    "node-stdlib-browser": "^1.2.0",
-    "tsup": "^7.2.0"
+    "tsup": "^7.2.0",
+    "typescript": "^5.3.2"
  },
  "engines": {
    "node": ">=18.0.0"
  },
  "types": "./dist/index.d.ts",
  "main": "./dist/index.js",
+  "module": "./dist/index.mjs",
+  "repository": "run-llama/LlamaIndexTS",
  "scripts": {
    "lint": "eslint .",
    "test": "jest",
-    "build": "tsup src/index.ts --format esm,cjs --dts"
+    "build": "tsup src/index.ts --format esm,cjs --dts",
+    "dev": "tsup src/index.ts --format esm,cjs --dts --watch"
  }
-}
+}
@@ -1,17 +1,20 @@
-import { ChatMessage, OpenAI, ChatResponse, LLM } from "./llm/LLM";
-import { TextNode } from "./Node";
+import { v4 as uuidv4 } from "uuid";
+import { ChatHistory } from "./ChatHistory";
+import { NodeWithScore, TextNode } from "./Node";
 import {
-  SimplePrompt,
-  contextSystemPrompt,
+  CondenseQuestionPrompt,
+  ContextSystemPrompt,
  defaultCondenseQuestionPrompt,
+  defaultContextSystemPrompt,
  messagesToHistoryStr,
 } from "./Prompt";
 import { BaseQueryEngine } from "./QueryEngine";
 import { Response } from "./Response";
 import { BaseRetriever } from "./Retriever";
 import { ServiceContext, serviceContextFromDefaults } from "./ServiceContext";
-import { v4 as uuidv4 } from "uuid";
 import { Event } from "./callbacks/CallbackManager";
+import { BaseNodePostprocessor } from "./indices/BaseNodePostprocessor";
+import { ChatMessage, LLM, OpenAI } from "./llm/LLM";

 /**
 * A ChatEngine is used to handle back and forth chats between the application and the LLM.
@@ -21,8 +24,16 @@ export interface ChatEngine {
   * Send message along with the class's current chat history to the LLM.
   * @param message
   * @param chatHistory optional chat history if you want to customize the chat history
+   * @param streaming optional streaming flag, which auto-sets the return value if True.
   */
-  chat(message: string, chatHistory?: ChatMessage[]): Promise<Response>;
+  chat<
+    T extends boolean | undefined = undefined,
+    R = T extends true ? AsyncGenerator<string, void, unknown> : Response,
+  >(
+    message: string,
+    chatHistory?: ChatMessage[],
+    streaming?: T,
+  ): Promise<R>;

  /**
   * Resets the chat history so that it's empty.
@@ -42,13 +53,45 @@ export class SimpleChatEngine implements ChatEngine {
    this.llm = init?.llm ?? new OpenAI();
  }

-  async chat(message: string, chatHistory?: ChatMessage[]): Promise<Response> {
+  async chat<
+    T extends boolean | undefined = undefined,
+    R = T extends true ? AsyncGenerator<string, void, unknown> : Response,
+  >(message: string, chatHistory?: ChatMessage[], streaming?: T): Promise<R> {
+    //Streaming option
+    if (streaming) {
+      return this.streamChat(message, chatHistory) as R;
+    }
+
+    //Non-streaming option
    chatHistory = chatHistory ?? this.chatHistory;
    chatHistory.push({ content: message, role: "user" });
-    const response = await this.llm.chat(chatHistory);
+    const response = await this.llm.chat(chatHistory, undefined);
    chatHistory.push(response.message);
    this.chatHistory = chatHistory;
-    return new Response(response.message.content);
+    return new Response(response.message.content) as R;
+  }
+
+  protected async *streamChat(
+    message: string,
+    chatHistory?: ChatMessage[],
+  ): AsyncGenerator<string, void, unknown> {
+    chatHistory = chatHistory ?? this.chatHistory;
+    chatHistory.push({ content: message, role: "user" });
+    const response_generator = await this.llm.chat(
+      chatHistory,
+      undefined,
+      true,
+    );
+
+    var accumulator: string = "";
+    for await (const part of response_generator) {
+      accumulator += part;
+      yield part;
+    }
+
+    chatHistory.push({ content: accumulator, role: "assistant" });
+    this.chatHistory = chatHistory;
+    return;
  }

  reset() {
@@ -70,13 +113,13 @@ export class CondenseQuestionChatEngine implements ChatEngine {
  queryEngine: BaseQueryEngine;
  chatHistory: ChatMessage[];
  serviceContext: ServiceContext;
-  condenseMessagePrompt: SimplePrompt;
+  condenseMessagePrompt: CondenseQuestionPrompt;

  constructor(init: {
    queryEngine: BaseQueryEngine;
    chatHistory: ChatMessage[];
    serviceContext?: ServiceContext;
-    condenseMessagePrompt?: SimplePrompt;
+    condenseMessagePrompt?: CondenseQuestionPrompt;
  }) {
    this.queryEngine = init.queryEngine;
    this.chatHistory = init?.chatHistory ?? [];
@@ -92,15 +135,19 @@ export class CondenseQuestionChatEngine implements ChatEngine {
    return this.serviceContext.llm.complete(
      defaultCondenseQuestionPrompt({
        question: question,
-        chat_history: chatHistoryStr,
-      })
+        chatHistory: chatHistoryStr,
+      }),
    );
  }

-  async chat(
+  async chat<
+    T extends boolean | undefined = undefined,
+    R = T extends true ? AsyncGenerator<string, void, unknown> : Response,
+  >(
    message: string,
-    chatHistory?: ChatMessage[] | undefined
-  ): Promise<Response> {
+    chatHistory?: ChatMessage[] | undefined,
+    streaming?: T,
+  ): Promise<R> {
    chatHistory = chatHistory ?? this.chatHistory;

    const condensedQuestion = (
@@ -112,7 +159,7 @@ export class CondenseQuestionChatEngine implements ChatEngine {
    chatHistory.push({ content: message, role: "user" });
    chatHistory.push({ content: response.response, role: "assistant" });

-    return response;
+    return response as R;
  }

  reset() {
@@ -120,54 +167,118 @@ export class CondenseQuestionChatEngine implements ChatEngine {
  }
 }

+export interface Context {
+  message: ChatMessage;
+  nodes: NodeWithScore[];
+}
+
+export interface ContextGenerator {
+  generate(message: string, parentEvent?: Event): Promise<Context>;
+}
+
+export class DefaultContextGenerator implements ContextGenerator {
+  retriever: BaseRetriever;
+  contextSystemPrompt: ContextSystemPrompt;
+  nodePostprocessors: BaseNodePostprocessor[];
+
+  constructor(init: {
+    retriever: BaseRetriever;
+    contextSystemPrompt?: ContextSystemPrompt;
+    nodePostprocessors?: BaseNodePostprocessor[];
+  }) {
+    this.retriever = init.retriever;
+    this.contextSystemPrompt =
+      init?.contextSystemPrompt ?? defaultContextSystemPrompt;
+    this.nodePostprocessors = init.nodePostprocessors || [];
+  }
+
+  private applyNodePostprocessors(nodes: NodeWithScore[]) {
+    return this.nodePostprocessors.reduce(
+      (nodes, nodePostprocessor) => nodePostprocessor.postprocessNodes(nodes),
+      nodes,
+    );
+  }
+
+  async generate(message: string, parentEvent?: Event): Promise<Context> {
+    if (!parentEvent) {
+      parentEvent = {
+        id: uuidv4(),
+        type: "wrapper",
+        tags: ["final"],
+      };
+    }
+    const sourceNodesWithScore = await this.retriever.retrieve(
+      message,
+      parentEvent,
+    );
+
+    const nodes = this.applyNodePostprocessors(sourceNodesWithScore);
+
+    return {
+      message: {
+        content: this.contextSystemPrompt({
+          context: nodes.map((r) => (r.node as TextNode).text).join("\n\n"),
+        }),
+        role: "system",
+      },
+      nodes,
+    };
+  }
+}
+
 /**
 * ContextChatEngine uses the Index to get the appropriate context for each query.
 * The context is stored in the system prompt, and the chat history is preserved,
 * ideally allowing the appropriate context to be surfaced for each query.
 */
 export class ContextChatEngine implements ChatEngine {
-  retriever: BaseRetriever;
-  chatModel: OpenAI;
+  chatModel: LLM;
  chatHistory: ChatMessage[];
+  contextGenerator: ContextGenerator;

  constructor(init: {
    retriever: BaseRetriever;
-    chatModel?: OpenAI;
+    chatModel?: LLM;
    chatHistory?: ChatMessage[];
+    contextSystemPrompt?: ContextSystemPrompt;
+    nodePostprocessors?: BaseNodePostprocessor[];
  }) {
-    this.retriever = init.retriever;
    this.chatModel =
      init.chatModel ?? new OpenAI({ model: "gpt-3.5-turbo-16k" });
    this.chatHistory = init?.chatHistory ?? [];
+    this.contextGenerator = new DefaultContextGenerator({
+      retriever: init.retriever,
+      contextSystemPrompt: init?.contextSystemPrompt,
+    });
  }

-  async chat(message: string, chatHistory?: ChatMessage[] | undefined) {
+  async chat<
+    T extends boolean | undefined = undefined,
+    R = T extends true ? AsyncGenerator<string, void, unknown> : Response,
+  >(
+    message: string,
+    chatHistory?: ChatMessage[] | undefined,
+    streaming?: T,
+  ): Promise<R> {
    chatHistory = chatHistory ?? this.chatHistory;

+    //Streaming option
+    if (streaming) {
+      return this.streamChat(message, chatHistory) as R;
+    }
+
    const parentEvent: Event = {
      id: uuidv4(),
      type: "wrapper",
      tags: ["final"],
    };
-    const sourceNodesWithScore = await this.retriever.retrieve(
-      message,
-      parentEvent
-    );
-
-    const systemMessage: ChatMessage = {
-      content: contextSystemPrompt({
-        context: sourceNodesWithScore
-          .map((r) => (r.node as TextNode).text)
-          .join("\n\n"),
-      }),
-      role: "system",
-    };
+    const context = await this.contextGenerator.generate(message, parentEvent);

    chatHistory.push({ content: message, role: "user" });

    const response = await this.chatModel.chat(
-      [systemMessage, ...chatHistory],
-      parentEvent
+      [context.message, ...chatHistory],
+      parentEvent,
    );
    chatHistory.push(response.message);

@@ -175,11 +286,146 @@ export class ContextChatEngine implements ChatEngine {

    return new Response(
      response.message.content,
-      sourceNodesWithScore.map((r) => r.node)
+      context.nodes.map((r) => r.node),
+    ) as R;
+  }
+
+  protected async *streamChat(
+    message: string,
+    chatHistory?: ChatMessage[] | undefined,
+  ): AsyncGenerator<string, void, unknown> {
+    chatHistory = chatHistory ?? this.chatHistory;
+
+    const parentEvent: Event = {
+      id: uuidv4(),
+      type: "wrapper",
+      tags: ["final"],
+    };
+    const context = await this.contextGenerator.generate(message, parentEvent);
+
+    chatHistory.push({ content: message, role: "user" });
+
+    const response_stream = await this.chatModel.chat(
+      [context.message, ...chatHistory],
+      parentEvent,
+      true,
    );
+    var accumulator: string = "";
+    for await (const part of response_stream) {
+      accumulator += part;
+      yield part;
+    }
+
+    chatHistory.push({ content: accumulator, role: "assistant" });
+
+    this.chatHistory = chatHistory;
+
+    return;
  }

  reset() {
    this.chatHistory = [];
  }
 }
+
+export interface MessageContentDetail {
+  type: "text" | "image_url";
+  text: string;
+  image_url: { url: string };
+}
+
+/**
+ * Extended type for the content of a message that allows for multi-modal messages.
+ */
+export type MessageContent = string | MessageContentDetail[];
+
+/**
+ * HistoryChatEngine is a ChatEngine that uses a `ChatHistory` object
+ * to keeps track of chat's message history.
+ * A `ChatHistory` object is passed as a parameter for each call to the `chat` method,
+ * so the state of the chat engine is preserved between calls.
+ * Optionally, a `ContextGenerator` can be used to generate an additional context for each call to `chat`.
+ */
+export class HistoryChatEngine {
+  llm: LLM;
+  contextGenerator?: ContextGenerator;
+
+  constructor(init?: Partial<HistoryChatEngine>) {
+    this.llm = init?.llm ?? new OpenAI();
+    this.contextGenerator = init?.contextGenerator;
+  }
+
+  async chat<
+    T extends boolean | undefined = undefined,
+    R = T extends true ? AsyncGenerator<string, void, unknown> : Response,
+  >(
+    message: MessageContent,
+    chatHistory: ChatHistory,
+    streaming?: T,
+  ): Promise<R> {
+    //Streaming option
+    if (streaming) {
+      return this.streamChat(message, chatHistory) as R;
+    }
+    const requestMessages = await this.prepareRequestMessages(
+      message,
+      chatHistory,
+    );
+    const response = await this.llm.chat(requestMessages);
+    chatHistory.addMessage(response.message);
+    return new Response(response.message.content) as R;
+  }
+
+  protected async *streamChat(
+    message: MessageContent,
+    chatHistory: ChatHistory,
+  ): AsyncGenerator<string, void, unknown> {
+    const requestMessages = await this.prepareRequestMessages(
+      message,
+      chatHistory,
+    );
+    const response_stream = await this.llm.chat(
+      requestMessages,
+      undefined,
+      true,
+    );
+
+    var accumulator = "";
+    for await (const part of response_stream) {
+      accumulator += part;
+      yield part;
+    }
+    chatHistory.addMessage({
+      content: accumulator,
+      role: "assistant",
+    });
+    return;
+  }
+
+  private async prepareRequestMessages(
+    message: MessageContent,
+    chatHistory: ChatHistory,
+  ) {
+    chatHistory.addMessage({
+      content: message,
+      role: "user",
+    });
+    let requestMessages;
+    let context;
+    if (this.contextGenerator) {
+      if (Array.isArray(message)) {
+        // message is of type MessageContentDetail[] - retrieve just the text parts and concatenate them
+        // so we can pass them to the context generator
+        message = (message as MessageContentDetail[])
+          .filter((c) => c.type === "text")
+          .map((c) => c.text)
+          .join("\n\n");
+      }
+      context = await this.contextGenerator.generate(message);
+    }
+    requestMessages = await chatHistory.requestMessages(
+      context ? [context.message] : undefined,
+    );
+    return requestMessages;
+  }
+}
@@ -0,0 +1,200 @@
+import { ChatMessage, LLM, MessageType, OpenAI } from "./llm/LLM";
+import {
+  defaultSummaryPrompt,
+  messagesToHistoryStr,
+  SummaryPrompt,
+} from "./Prompt";
+
+/**
+ * A ChatHistory is used to keep the state of back and forth chat messages
+ */
+export interface ChatHistory {
+  messages: ChatMessage[];
+  /**
+   * Adds a message to the chat history.
+   * @param message
+   */
+  addMessage(message: ChatMessage): void;
+
+  /**
+   * Returns the messages that should be used as input to the LLM.
+   */
+  requestMessages(transientMessages?: ChatMessage[]): Promise<ChatMessage[]>;
+
+  /**
+   * Resets the chat history so that it's empty.
+   */
+  reset(): void;
+
+  /**
+   * Returns the new messages since the last call to this function (or since calling the constructor)
+   */
+  newMessages(): ChatMessage[];
+}
+
+export class SimpleChatHistory implements ChatHistory {
+  messages: ChatMessage[];
+  private messagesBefore: number;
+
+  constructor(init?: Partial<SimpleChatHistory>) {
+    this.messages = init?.messages ?? [];
+    this.messagesBefore = this.messages.length;
+  }
+
+  addMessage(message: ChatMessage) {
+    this.messages.push(message);
+  }
+
+  async requestMessages(transientMessages?: ChatMessage[]) {
+    return [...(transientMessages ?? []), ...this.messages];
+  }
+
+  reset() {
+    this.messages = [];
+  }
+
+  newMessages() {
+    const newMessages = this.messages.slice(this.messagesBefore);
+    this.messagesBefore = this.messages.length;
+    return newMessages;
+  }
+}
+
+export class SummaryChatHistory implements ChatHistory {
+  tokensToSummarize: number;
+  messages: ChatMessage[];
+  summaryPrompt: SummaryPrompt;
+  llm: LLM;
+  private messagesBefore: number;
+
+  constructor(init?: Partial<SummaryChatHistory>) {
+    this.messages = init?.messages ?? [];
+    this.messagesBefore = this.messages.length;
+    this.summaryPrompt = init?.summaryPrompt ?? defaultSummaryPrompt;
+    this.llm = init?.llm ?? new OpenAI();
+    if (!this.llm.metadata.maxTokens) {
+      throw new Error(
+        "LLM maxTokens is not set. Needed so the summarizer ensures the context window size of the LLM.",
+      );
+    }
+    this.tokensToSummarize =
+      this.llm.metadata.contextWindow - this.llm.metadata.maxTokens;
+  }
+
+  private async summarize(): Promise<ChatMessage> {
+    // get the conversation messages to create summary
+    const messagesToSummarize = this.calcConversationMessages();
+
+    let promptMessages;
+    do {
+      promptMessages = [
+        {
+          content: this.summaryPrompt({
+            context: messagesToHistoryStr(messagesToSummarize),
+          }),
+          role: "user" as MessageType,
+        },
+      ];
+      // remove oldest message until the chat history is short enough for the context window
+      messagesToSummarize.shift();
+    } while (this.llm.tokens(promptMessages) > this.tokensToSummarize);
+
+    const response = await this.llm.chat(promptMessages);
+    return { content: response.message.content, role: "memory" };
+  }
+
+  addMessage(message: ChatMessage) {
+    this.messages.push(message);
+  }
+
+  // Find last summary message
+  private getLastSummaryIndex(): number | null {
+    const reversedMessages = this.messages.slice().reverse();
+    const index = reversedMessages.findIndex(
+      (message) => message.role === "memory",
+    );
+    if (index === -1) {
+      return null;
+    }
+    return this.messages.length - 1 - index;
+  }
+
+  private get systemMessages() {
+    // get array of all system messages
+    return this.messages.filter((message) => message.role === "system");
+  }
+
+  private get nonSystemMessages() {
+    // get array of all non-system messages
+    return this.messages.filter((message) => message.role !== "system");
+  }
+
+  /**
+   * Calculates the messages that describe the conversation so far.
+   * If there's no memory, all non-system messages are used.
+   * If there's a memory, uses all messages after the last summary message.
+   */
+  private calcConversationMessages(transformSummary?: boolean): ChatMessage[] {
+    const lastSummaryIndex = this.getLastSummaryIndex();
+    if (!lastSummaryIndex) {
+      // there's no memory, so just use all non-system messages
+      return this.nonSystemMessages;
+    } else {
+      // there's a memory, so use all messages after the last summary message
+      // and convert summary message so it can be send to the LLM
+      const summaryMessage: ChatMessage = transformSummary
+        ? {
+            content: `Summary of the conversation so far: ${this.messages[lastSummaryIndex].content}`,
+            role: "system",
+          }
+        : this.messages[lastSummaryIndex];
+      return [summaryMessage, ...this.messages.slice(lastSummaryIndex + 1)];
+    }
+  }
+
+  private calcCurrentRequestMessages(transientMessages?: ChatMessage[]) {
+    // TODO: check order: currently, we're sending:
+    // system messages first, then transient messages and then the messages that describe the conversation so far
+    return [
+      ...this.systemMessages,
+      ...(transientMessages ? transientMessages : []),
+      ...this.calcConversationMessages(true),
+    ];
+  }
+
+  async requestMessages(transientMessages?: ChatMessage[]) {
+    const requestMessages = this.calcCurrentRequestMessages(transientMessages);
+
+    // get tokens of current request messages and the transient messages
+    const tokens = this.llm.tokens(requestMessages);
+    if (tokens > this.tokensToSummarize) {
+      // if there are too many tokens for the next request, call summarize
+      const memoryMessage = await this.summarize();
+      const lastMessage = this.messages.at(-1);
+      if (lastMessage && lastMessage.role === "user") {
+        // if last message is a user message, ensure that it's sent after the new memory message
+        this.messages.pop();
+        this.messages.push(memoryMessage);
+        this.messages.push(lastMessage);
+      } else {
+        // otherwise just add the memory message
+        this.messages.push(memoryMessage);
+      }
+      // TODO: we still might have too many tokens
+      // e.g. too large system messages or transient messages
+      // how should we deal with that?
+      return this.calcCurrentRequestMessages(transientMessages);
+    }
+    return requestMessages;
+  }
+
+  reset() {
+    this.messages = [];
+  }
+
+  newMessages() {
+    const newMessages = this.messages.slice(this.messagesBefore);
+    this.messagesBefore = this.messages.length;
+    return newMessages;
+  }
+}
@@ -1,28 +1,54 @@
-import { Event, EventTag, EventType } from "./callbacks/CallbackManager";
+import { encodingForModel } from "js-tiktoken";
+
 import { v4 as uuidv4 } from "uuid";
+import { Event, EventTag, EventType } from "./callbacks/CallbackManager";
+
+export enum Tokenizers {
+  CL100K_BASE = "cl100k_base",
+}

 /**
 * Helper class singleton
 */
 class GlobalsHelper {
  defaultTokenizer: {
-    encode: (text: string) => number[];
-    decode: (tokens: number[]) => string;
+    encode: (text: string) => Uint32Array;
+    decode: (tokens: Uint32Array) => string;
  } | null = null;

-  tokenizer() {
+  private initDefaultTokenizer() {
+    const encoding = encodingForModel("text-embedding-ada-002"); // cl100k_base
+
+    this.defaultTokenizer = {
+      encode: (text: string) => {
+        return new Uint32Array(encoding.encode(text));
+      },
+      decode: (tokens: Uint32Array) => {
+        const numberArray = Array.from(tokens);
+        const text = encoding.decode(numberArray);
+        const uint8Array = new TextEncoder().encode(text);
+        return new TextDecoder().decode(uint8Array);
+      },
+    };
+  }
+
+  tokenizer(encoding?: string) {
+    if (encoding && encoding !== Tokenizers.CL100K_BASE) {
+      throw new Error(`Tokenizer encoding ${encoding} not yet supported`);
+    }
    if (!this.defaultTokenizer) {
-      const tiktoken = require("tiktoken-node");
-      this.defaultTokenizer = tiktoken.getEncoding("gpt2");
+      this.initDefaultTokenizer();
    }

    return this.defaultTokenizer!.encode.bind(this.defaultTokenizer);
  }

-  tokenizerDecoder() {
+  tokenizerDecoder(encoding?: string) {
+    if (encoding && encoding !== Tokenizers.CL100K_BASE) {
+      throw new Error(`Tokenizer encoding ${encoding} not yet supported`);
+    }
    if (!this.defaultTokenizer) {
-      const tiktoken = require("tiktoken-node");
-      this.defaultTokenizer = tiktoken.getEncoding("gpt2");
+      this.initDefaultTokenizer();
    }

    return this.defaultTokenizer!.decode.bind(this.defaultTokenizer);
@@ -1,4 +1,4 @@
-import crypto from "crypto"; // TODO Node dependency
+import CryptoJS from "crypto-js";
 import { v4 as uuidv4 } from "uuid";

 export enum NodeRelationship {
@@ -23,19 +23,23 @@ export enum MetadataMode {
  NONE = "NONE",
 }

-export interface RelatedNodeInfo {
+export type Metadata = Record<string, any>;
+
+export interface RelatedNodeInfo<T extends Metadata = Metadata> {
  nodeId: string;
  nodeType?: ObjectType;
-  metadata: Record<string, any>;
+  metadata: T;
  hash?: string;
 }

-export type RelatedNodeType = RelatedNodeInfo | RelatedNodeInfo[];
+export type RelatedNodeType<T extends Metadata = Metadata> =
+  | RelatedNodeInfo<T>
+  | RelatedNodeInfo<T>[];

 /**
 * Generic abstract class for retrievable nodes
 */
-export abstract class BaseNode {
+export abstract class BaseNode<T extends Metadata = Metadata> {
  /**
   * The unique ID of the Node/Document. The trailing underscore is here
   * to avoid collisions with the id keyword in Python.
@@ -46,13 +50,13 @@ export abstract class BaseNode {
  embedding?: number[];

  // Metadata fields
-  metadata: Record<string, any> = {};
+  metadata: T = {} as T;
  excludedEmbedMetadataKeys: string[] = [];
  excludedLlmMetadataKeys: string[] = [];
-  relationships: Partial<Record<NodeRelationship, RelatedNodeType>> = {};
+  relationships: Partial<Record<NodeRelationship, RelatedNodeType<T>>> = {};
  hash: string = "";

-  constructor(init?: Partial<BaseNode>) {
+  constructor(init?: Partial<BaseNode<T>>) {
    Object.assign(this, init);
  }

@@ -62,7 +66,7 @@ export abstract class BaseNode {
  abstract getMetadataStr(metadataMode: MetadataMode): string;
  abstract setContent(value: any): void;

-  get sourceNode(): RelatedNodeInfo | undefined {
+  get sourceNode(): RelatedNodeInfo<T> | undefined {
    const relationship = this.relationships[NodeRelationship.SOURCE];

    if (Array.isArray(relationship)) {
@@ -72,7 +76,7 @@ export abstract class BaseNode {
    return relationship;
  }

-  get prevNode(): RelatedNodeInfo | undefined {
+  get prevNode(): RelatedNodeInfo<T> | undefined {
    const relationship = this.relationships[NodeRelationship.PREVIOUS];

    if (Array.isArray(relationship)) {
@@ -84,7 +88,7 @@ export abstract class BaseNode {
    return relationship;
  }

-  get nextNode(): RelatedNodeInfo | undefined {
+  get nextNode(): RelatedNodeInfo<T> | undefined {
    const relationship = this.relationships[NodeRelationship.NEXT];

    if (Array.isArray(relationship)) {
@@ -94,7 +98,7 @@ export abstract class BaseNode {
    return relationship;
  }

-  get parentNode(): RelatedNodeInfo | undefined {
+  get parentNode(): RelatedNodeInfo<T> | undefined {
    const relationship = this.relationships[NodeRelationship.PARENT];

    if (Array.isArray(relationship)) {
@@ -104,7 +108,7 @@ export abstract class BaseNode {
    return relationship;
  }

-  get childNodes(): RelatedNodeInfo[] | undefined {
+  get childNodes(): RelatedNodeInfo<T>[] | undefined {
    const relationship = this.relationships[NodeRelationship.CHILD];

    if (!Array.isArray(relationship)) {
@@ -126,7 +130,7 @@ export abstract class BaseNode {
    return this.embedding;
  }

-  asRelatedNodeInfo(): RelatedNodeInfo {
+  asRelatedNodeInfo(): RelatedNodeInfo<T> {
    return {
      nodeId: this.id_,
      metadata: this.metadata,
@@ -146,7 +150,7 @@ export abstract class BaseNode {
 /**
 * TextNode is the default node type for text. Most common node type in LlamaIndex.TS
 */
-export class TextNode extends BaseNode {
+export class TextNode<T extends Metadata = Metadata> extends BaseNode<T> {
  text: string = "";
  startCharIdx?: number;
  endCharIdx?: number;
@@ -154,7 +158,7 @@ export class TextNode extends BaseNode {
  // metadataTemplate: NOTE write your own formatter if needed
  metadataSeparator: string = "\n";

-  constructor(init?: Partial<TextNode>) {
+  constructor(init?: Partial<TextNode<T>>) {
    super(init);
    Object.assign(this, init);

@@ -171,13 +175,13 @@ export class TextNode extends BaseNode {
   * @returns
   */
  generateHash() {
-    const hashFunction = crypto.createHash("sha256");
+    const hashFunction = CryptoJS.algo.SHA256.create();
    hashFunction.update(`type=${this.getType()}`);
    hashFunction.update(
      `startCharIdx=${this.startCharIdx} endCharIdx=${this.endCharIdx}`,
    );
    hashFunction.update(this.getContent(MetadataMode.ALL));
-    return hashFunction.digest("base64");
+    return hashFunction.finalize().toString(CryptoJS.enc.Base64);
  }

  getType(): ObjectType {
@@ -233,10 +237,10 @@ export class TextNode extends BaseNode {
 //   }
 // }

-export class IndexNode extends TextNode {
+export class IndexNode<T extends Metadata = Metadata> extends TextNode<T> {
  indexId: string = "";

-  constructor(init?: Partial<IndexNode>) {
+  constructor(init?: Partial<IndexNode<T>>) {
    super(init);
    Object.assign(this, init);

@@ -253,8 +257,8 @@ export class IndexNode extends TextNode {
 /**
 * A document is just a special text node with a docId.
 */
-export class Document extends TextNode {
-  constructor(init?: Partial<Document>) {
+export class Document<T extends Metadata = Metadata> extends TextNode<T> {
+  constructor(init?: Partial<Document<T>>) {
    super(init);
    Object.assign(this, init);

@@ -268,12 +272,13 @@ export class Document extends TextNode {
  }
 }

-export function jsonToNode(json: any) {
-  if (!json.type) {
+export function jsonToNode(json: any, type?: ObjectType) {
+  if (!json.type && !type) {
    throw new Error("Node type not found");
  }
+  const nodeType = type || json.type;

-  switch (json.type) {
+  switch (nodeType) {
    case ObjectType.TEXT:
      return new TextNode(json);
    case ObjectType.INDEX:
@@ -281,7 +286,7 @@ export function jsonToNode(json: any) {
    case ObjectType.DOCUMENT:
      return new Document(json);
    default:
-      throw new Error(`Invalid node type: ${json.type}`);
+      throw new Error(`Invalid node type: ${nodeType}`);
  }
 }

@@ -292,7 +297,7 @@ export function jsonToNode(json: any) {
 /**
 * A node with a similarity score
 */
-export interface NodeWithScore {
-  node: BaseNode;
-  score: number;
+export interface NodeWithScore<T extends Metadata = Metadata> {
+  node: BaseNode<T>;
+  score?: number;
 }
@@ -10,7 +10,7 @@ import { DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE } from "./constants";
 */
 export function getTextSplitsFromDocument(
  document: Document,
-  textSplitter: SentenceSplitter
+  textSplitter: SentenceSplitter,
 ) {
  const text = document.getText();
  const splits = textSplitter.splitText(text);
@@ -30,7 +30,7 @@ export function getNodesFromDocument(
  document: Document,
  textSplitter: SentenceSplitter,
  includeMetadata: boolean = true,
-  includePrevNextRel: boolean = true
+  includePrevNextRel: boolean = true,
 ) {
  let nodes: TextNode[] = [];

@@ -100,10 +100,10 @@ export class SimpleNodeParser implements NodeParser {
  }) {
    this.textSplitter =
      init?.textSplitter ??
-      new SentenceSplitter(
-        init?.chunkSize ?? DEFAULT_CHUNK_SIZE,
-        init?.chunkOverlap ?? DEFAULT_CHUNK_OVERLAP
-      );
+      new SentenceSplitter({
+        chunkSize: init?.chunkSize ?? DEFAULT_CHUNK_SIZE,
+        chunkOverlap: init?.chunkOverlap ?? DEFAULT_CHUNK_OVERLAP,
+      });
    this.includeMetadata = init?.includeMetadata ?? true;
    this.includePrevNextRel = init?.includePrevNextRel ?? true;
  }
@@ -28,7 +28,7 @@ class OutputParserError extends Error {

  constructor(
    message: string,
-    options: { cause?: Error; output?: string } = {}
+    options: { cause?: Error; output?: string } = {},
  ) {
    // @ts-ignore
    super(message, options); // https://github.com/tc39/proposal-error-cause
@@ -53,30 +53,31 @@ class OutputParserError extends Error {
 * @param text A markdown block with JSON
 * @returns parsed JSON object
 */
-function parseJsonMarkdown(text: string) {
+export function parseJsonMarkdown(text: string) {
  text = text.trim();

-  const beginDelimiter = "```json";
-  const endDelimiter = "```";
+  const left_square = text.indexOf("[");
+  const left_brace = text.indexOf("{");

-  const beginIndex = text.indexOf(beginDelimiter);
-  const endIndex = text.indexOf(
-    endDelimiter,
-    beginIndex + beginDelimiter.length
-  );
-  if (beginIndex === -1 || endIndex === -1) {
-    throw new OutputParserError("Not a json markdown", { output: text });
+  var left: number;
+  var right: number;
+  if (left_square < left_brace && left_square != -1) {
+    left = left_square;
+    right = text.lastIndexOf("]");
+  } else {
+    left = left_brace;
+    right = text.lastIndexOf("}");
  }
-
-  const jsonText = text.substring(beginIndex + beginDelimiter.length, endIndex);
-
+  const jsonText = text.substring(left, right + 1);
  try {
+    //Single JSON object case
+    if (left_square === -1) {
+      return [JSON.parse(jsonText)];
+    }
+    //Multiple JSON object case.
    return JSON.parse(jsonText);
  } catch (e) {
-    throw new OutputParserError("Not a valid json", {
-      cause: e as Error,
-      output: text,
-    });
+    throw new OutputParserError("Not a json markdown", { output: text });
  }
 }

@@ -7,28 +7,42 @@ import { ToolMetadata } from "./Tool";
 * NOTE this is a different interface compared to LlamaIndex Python
 * NOTE 2: we default to empty string to make it easy to calculate prompt sizes
 */
-export type SimplePrompt = (input: Record<string, string>) => string;
+export type SimplePrompt = (
+  input: Record<string, string | undefined>,
+) => string;

 /*
 DEFAULT_TEXT_QA_PROMPT_TMPL = (
-    "Context information is below. \n"
+    "Context information is below.\n"
+    "---------------------\n"
+    "{context_str}\n"
    "---------------------\n"
-    "{context_str}"
-    "\n---------------------\n"
    "Given the context information and not prior knowledge, "
-    "answer the question: {query_str}\n"
+    "answer the query.\n"
+    "Query: {query_str}\n"
+    "Answer: "
 )
 */

-export const defaultTextQaPrompt: SimplePrompt = (input) => {
-  const { context = "", query = "" } = input;
-
+export const defaultTextQaPrompt = ({ context = "", query = "" }) => {
  return `Context information is below.
 ---------------------
 ${context}
 ---------------------
-Given the context information and not prior knowledge, answer the question: ${query}
-`;
+Given the context information and not prior knowledge, answer the query.
+Query: ${query}
+Answer:`;
+};
+
+export type TextQaPrompt = typeof defaultTextQaPrompt;
+
+export const anthropicTextQaPrompt = ({ context = "", query = "" }) => {
+  return `Context information:
+<context>
+${context}
+</context>
+Given the context information and not prior knowledge, answer the query.
+Query: ${query}`;
 };

 /*
@@ -45,9 +59,7 @@ DEFAULT_SUMMARY_PROMPT_TMPL = (
 )
 */

-export const defaultSummaryPrompt: SimplePrompt = (input) => {
-  const { context = "" } = input;
-
+export const defaultSummaryPrompt = ({ context = "" }) => {
  return `Write a summary of the following. Try to use only the information provided. Try to include as many key details as possible.


@@ -58,9 +70,11 @@ SUMMARY:"""
 `;
 };

+export type SummaryPrompt = typeof defaultSummaryPrompt;
+
 /*
 DEFAULT_REFINE_PROMPT_TMPL = (
-    "The original question is as follows: {query_str}\n"
+    "The original query is as follows: {query_str}\n"
    "We have provided an existing answer: {existing_answer}\n"
    "We have the opportunity to refine the existing answer "
    "(only if needed) with some more context below.\n"
@@ -68,26 +82,55 @@ DEFAULT_REFINE_PROMPT_TMPL = (
    "{context_msg}\n"
    "------------\n"
    "Given the new context, refine the original answer to better "
-    "answer the question. "
-    "If the context isn't useful, return the original answer."
+    "answer the query. "
+    "If the context isn't useful, return the original answer.\n"
+    "Refined Answer: "
 )
 */

-export const defaultRefinePrompt: SimplePrompt = (input) => {
-  const { query = "", existingAnswer = "", context = "" } = input;
-
-  return `The original question is as follows: ${query}
+export const defaultRefinePrompt = ({
+  query = "",
+  existingAnswer = "",
+  context = "",
+}) => {
+  return `The original query is as follows: ${query}
 We have provided an existing answer: ${existingAnswer}
 We have the opportunity to refine the existing answer (only if needed) with some more context below.
 ------------
 ${context}
 ------------
-Given the new context, refine the original answer to better answer the question. If the context isn't useful, return the original answer.`;
+Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
+Refined Answer:`;
 };

-export const defaultChoiceSelectPrompt: SimplePrompt = (input) => {
-  const { context = "", query = "" } = input;
+export type RefinePrompt = typeof defaultRefinePrompt;

+/*
+DEFAULT_TREE_SUMMARIZE_TMPL = (
+  "Context information from multiple sources is below.\n"
+  "---------------------\n"
+  "{context_str}\n"
+  "---------------------\n"
+  "Given the information from multiple sources and not prior knowledge, "
+  "answer the query.\n"
+  "Query: {query_str}\n"
+  "Answer: "
+)
+*/
+
+export const defaultTreeSummarizePrompt = ({ context = "", query = "" }) => {
+  return `Context information from multiple sources is below.
+---------------------
+${context}
+---------------------
+Given the information from multiple sources and not prior knowledge, answer the query.
+Query: ${query}
+Answer:`;
+};
+
+export type TreeSummarizePrompt = typeof defaultTreeSummarizePrompt;
+
+export const defaultChoiceSelectPrompt = ({ context = "", query = "" }) => {
  return `A list of documents is shown below. Each document has a number next to it along 
 with a summary of the document. A question is also provided.
 Respond with the numbers of the documents
@@ -119,6 +162,8 @@ Question: ${query}
 Answer:`;
 };

+export type ChoiceSelectPrompt = typeof defaultChoiceSelectPrompt;
+
 /*
 PREFIX = """\
 Given a user question, and a list of tools, output a list of relevant sub-questions \
@@ -236,9 +281,7 @@ const exampleOutput: SubQuestion[] = [
  },
 ];

-export const defaultSubQuestionPrompt: SimplePrompt = (input) => {
-  const { toolsStr, queryStr } = input;
-
+export const defaultSubQuestionPrompt = ({ toolsStr = "", queryStr = "" }) => {
  return `Given a user question, and a list of tools, output a list of relevant sub-questions that when composed can help answer the full user question:

 # Example 1
@@ -268,6 +311,8 @@ ${queryStr}
 `;
 };

+export type SubQuestionPrompt = typeof defaultSubQuestionPrompt;
+
 // DEFAULT_TEMPLATE = """\
 // Given a conversation (between Human and Assistant) and a follow up message from Human, \
 // rewrite the message to be a standalone question that captures all relevant context \
@@ -282,9 +327,10 @@ ${queryStr}
 // <Standalone question>
 // """

-export const defaultCondenseQuestionPrompt: SimplePrompt = (input) => {
-  const { chatHistory, question } = input;
-
+export const defaultCondenseQuestionPrompt = ({
+  chatHistory = "",
+  question = "",
+}) => {
  return `Given a conversation (between Human and Assistant) and a follow up message from Human, rewrite the message to be a standalone question that captures all relevant context from the conversation.

 <Chat History>
@@ -297,6 +343,8 @@ ${question}
 `;
 };

+export type CondenseQuestionPrompt = typeof defaultCondenseQuestionPrompt;
+
 export function messagesToHistoryStr(messages: ChatMessage[]) {
  return messages.reduce((acc, message) => {
    acc += acc ? "\n" : "";
@@ -309,11 +357,42 @@ export function messagesToHistoryStr(messages: ChatMessage[]) {
  }, "");
 }

-export const contextSystemPrompt: SimplePrompt = (input) => {
-  const { context } = input;
-
+export const defaultContextSystemPrompt = ({ context = "" }) => {
  return `Context information is below.
 ---------------------
 ${context}
 ---------------------`;
 };
+
+export type ContextSystemPrompt = typeof defaultContextSystemPrompt;
+
+export const defaultKeywordExtractPrompt = ({
+  context = "",
+  maxKeywords = 10,
+}) => {
+  return `
+Some text is provided below. Given the text, extract up to ${maxKeywords} keywords from the text. Avoid stopwords.
+---------------------
+${context}
+---------------------
+Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'
+`;
+};
+
+export type KeywordExtractPrompt = typeof defaultKeywordExtractPrompt;
+
+export const defaultQueryKeywordExtractPrompt = ({
+  question = "",
+  maxKeywords = 10,
+}) => {
+  return `(
+  "A question is provided below. Given the question, extract up to ${maxKeywords} "
+  "keywords from the text. Focus on extracting the keywords that we can use "
+  "to best lookup answers to the question. Avoid stopwords."
+  "---------------------"
+  "${question}"
+  "---------------------"
+  "Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'"
+)`;
+};
+export type QueryKeywordExtractPrompt = typeof defaultQueryKeywordExtractPrompt;
@@ -2,9 +2,9 @@ import { globalsHelper } from "./GlobalsHelper";
 import { SimplePrompt } from "./Prompt";
 import { SentenceSplitter } from "./TextSplitter";
 import {
+  DEFAULT_CHUNK_OVERLAP_RATIO,
  DEFAULT_CONTEXT_WINDOW,
  DEFAULT_NUM_OUTPUTS,
-  DEFAULT_CHUNK_OVERLAP_RATIO,
  DEFAULT_PADDING,
 } from "./constants";

@@ -34,7 +34,7 @@ export class PromptHelper {
  numOutput = DEFAULT_NUM_OUTPUTS;
  chunkOverlapRatio = DEFAULT_CHUNK_OVERLAP_RATIO;
  chunkSizeLimit?: number;
-  tokenizer: (text: string) => number[];
+  tokenizer: (text: string) => Uint32Array;
  separator = " ";

  constructor(
@@ -42,8 +42,8 @@ export class PromptHelper {
    numOutput = DEFAULT_NUM_OUTPUTS,
    chunkOverlapRatio = DEFAULT_CHUNK_OVERLAP_RATIO,
    chunkSizeLimit?: number,
-    tokenizer?: (text: string) => number[],
-    separator = " "
+    tokenizer?: (text: string) => Uint32Array,
+    separator = " ",
  ) {
    this.contextWindow = contextWindow;
    this.numOutput = numOutput;
@@ -76,7 +76,7 @@ export class PromptHelper {
  private getAvailableChunkSize(
    prompt: SimplePrompt,
    numChunks = 1,
-    padding = 5
+    padding = 5,
  ) {
    const availableContextSize = this.getAvailableContextSize(prompt);

@@ -99,14 +99,14 @@ export class PromptHelper {
  getTextSplitterGivenPrompt(
    prompt: SimplePrompt,
    numChunks = 1,
-    padding = DEFAULT_PADDING
+    padding = DEFAULT_PADDING,
  ) {
    const chunkSize = this.getAvailableChunkSize(prompt, numChunks, padding);
    if (chunkSize === 0) {
      throw new Error("Got 0 as available chunk size");
    }
    const chunkOverlap = this.chunkOverlapRatio * chunkSize;
-    const textSplitter = new SentenceSplitter(chunkSize, chunkOverlap);
+    const textSplitter = new SentenceSplitter({ chunkSize, chunkOverlap });
    return textSplitter;
  }

@@ -120,7 +120,7 @@ export class PromptHelper {
  repack(
    prompt: SimplePrompt,
    textChunks: string[],
-    padding = DEFAULT_PADDING
+    padding = DEFAULT_PADDING,
  ) {
    const textSplitter = this.getTextSplitterGivenPrompt(prompt, 1, padding);
    const combinedStr = textChunks.join("\n\n");
@@ -1,3 +1,6 @@
+import { v4 as uuidv4 } from "uuid";
+import { Event } from "./callbacks/CallbackManager";
+import { BaseNodePostprocessor } from "./indices/BaseNodePostprocessor";
 import { NodeWithScore, TextNode } from "./Node";
 import {
  BaseQuestionGenerator,
@@ -7,8 +10,6 @@ import {
 import { Response } from "./Response";
 import { CompactAndRefine, ResponseSynthesizer } from "./ResponseSynthesizer";
 import { BaseRetriever } from "./Retriever";
-import { v4 as uuidv4 } from "uuid";
-import { Event } from "./callbacks/CallbackManager";
 import { ServiceContext, serviceContextFromDefaults } from "./ServiceContext";
 import { QueryEngineTool, ToolMetadata } from "./Tool";

@@ -30,16 +31,39 @@ export interface BaseQueryEngine {
 export class RetrieverQueryEngine implements BaseQueryEngine {
  retriever: BaseRetriever;
  responseSynthesizer: ResponseSynthesizer;
+  nodePostprocessors: BaseNodePostprocessor[];
+  preFilters?: unknown;

  constructor(
    retriever: BaseRetriever,
-    responseSynthesizer?: ResponseSynthesizer
+    responseSynthesizer?: ResponseSynthesizer,
+    preFilters?: unknown,
+    nodePostprocessors?: BaseNodePostprocessor[],
  ) {
    this.retriever = retriever;
    const serviceContext: ServiceContext | undefined =
      this.retriever.getServiceContext();
    this.responseSynthesizer =
      responseSynthesizer || new ResponseSynthesizer({ serviceContext });
+    this.preFilters = preFilters;
+    this.nodePostprocessors = nodePostprocessors || [];
+  }
+
+  private applyNodePostprocessors(nodes: NodeWithScore[]) {
+    return this.nodePostprocessors.reduce(
+      (nodes, nodePostprocessor) => nodePostprocessor.postprocessNodes(nodes),
+      nodes,
+    );
+  }
+
+  private async retrieve(query: string, parentEvent: Event) {
+    const nodes = await this.retriever.retrieve(
+      query,
+      parentEvent,
+      this.preFilters,
+    );
+
+    return this.applyNodePostprocessors(nodes);
  }

  async query(query: string, parentEvent?: Event) {
@@ -48,7 +72,7 @@ export class RetrieverQueryEngine implements BaseQueryEngine {
      type: "wrapper",
      tags: ["final"],
    };
-    const nodes = await this.retriever.retrieve(query, _parentEvent);
+    const nodes = await this.retrieve(query, _parentEvent);
    return this.responseSynthesizer.synthesize(query, nodes, _parentEvent);
  }
 }
@@ -122,7 +146,7 @@ export class SubQuestionQueryEngine implements BaseQueryEngine {
    };

    const subQNodes = await Promise.all(
-      subQuestions.map((subQ) => this.querySubQ(subQ, subQueryParentEvent))
+      subQuestions.map((subQ) => this.querySubQ(subQ, subQueryParentEvent)),
    );

    const nodes = subQNodes
@@ -133,7 +157,7 @@ export class SubQuestionQueryEngine implements BaseQueryEngine {

  private async querySubQ(
    subQ: SubQuestion,
-    parentEvent?: Event
+    parentEvent?: Event,
  ): Promise<NodeWithScore | null> {
    try {
      const question = subQ.subQuestion;
@@ -4,7 +4,7 @@ import {
  SubQuestionOutputParser,
 } from "./OutputParser";
 import {
-  SimplePrompt,
+  SubQuestionPrompt,
  buildToolsText,
  defaultSubQuestionPrompt,
 } from "./Prompt";
@@ -28,7 +28,7 @@ export interface BaseQuestionGenerator {
 */
 export class LLMQuestionGenerator implements BaseQuestionGenerator {
  llm: LLM;
-  prompt: SimplePrompt;
+  prompt: SubQuestionPrompt;
  outputParser: BaseOutputParser<StructuredOutput<SubQuestion[]>>;

  constructor(init?: Partial<LLMQuestionGenerator>) {
@@ -45,7 +45,7 @@ export class LLMQuestionGenerator implements BaseQuestionGenerator {
        this.prompt({
          toolsStr,
          queryStr,
-        })
+        }),
      )
    ).message.content;

@@ -1,14 +1,18 @@
+import { Event } from "./callbacks/CallbackManager";
+import { LLM } from "./llm/LLM";
 import { MetadataMode, NodeWithScore } from "./Node";
 import {
-  SimplePrompt,
  defaultRefinePrompt,
  defaultTextQaPrompt,
+  defaultTreeSummarizePrompt,
+  RefinePrompt,
+  SimplePrompt,
+  TextQaPrompt,
+  TreeSummarizePrompt,
 } from "./Prompt";
 import { getBiggestPrompt } from "./PromptHelper";
 import { Response } from "./Response";
 import { ServiceContext, serviceContextFromDefaults } from "./ServiceContext";
-import { Event } from "./callbacks/CallbackManager";
-import { LLM } from "./llm/LLM";

 /**
 * Response modes of the response synthesizer
@@ -35,7 +39,7 @@ interface BaseResponseBuilder {
    query: string,
    textChunks: string[],
    parentEvent?: Event,
-    prevResponse?: string
+    prevResponse?: string,
  ): Promise<string>;
 }

@@ -54,7 +58,7 @@ export class SimpleResponseBuilder implements BaseResponseBuilder {
  async getResponse(
    query: string,
    textChunks: string[],
-    parentEvent?: Event
+    parentEvent?: Event,
  ): Promise<string> {
    const input = {
      query,
@@ -72,13 +76,13 @@ export class SimpleResponseBuilder implements BaseResponseBuilder {
 */
 export class Refine implements BaseResponseBuilder {
  serviceContext: ServiceContext;
-  textQATemplate: SimplePrompt;
-  refineTemplate: SimplePrompt;
+  textQATemplate: TextQaPrompt;
+  refineTemplate: RefinePrompt;

  constructor(
    serviceContext: ServiceContext,
-    textQATemplate?: SimplePrompt,
-    refineTemplate?: SimplePrompt
+    textQATemplate?: TextQaPrompt,
+    refineTemplate?: RefinePrompt,
  ) {
    this.serviceContext = serviceContext;
    this.textQATemplate = textQATemplate ?? defaultTextQaPrompt;
@@ -89,7 +93,7 @@ export class Refine implements BaseResponseBuilder {
    query: string,
    textChunks: string[],
    parentEvent?: Event,
-    prevResponse?: string
+    prevResponse?: string,
  ): Promise<string> {
    let response: string | undefined = undefined;

@@ -101,7 +105,7 @@ export class Refine implements BaseResponseBuilder {
          prevResponse,
          query,
          chunk,
-          parentEvent
+          parentEvent,
        );
      }
      prevResponse = response;
@@ -113,7 +117,7 @@ export class Refine implements BaseResponseBuilder {
  private async giveResponseSingle(
    queryStr: string,
    textChunk: string,
-    parentEvent?: Event
+    parentEvent?: Event,
  ): Promise<string> {
    const textQATemplate: SimplePrompt = (input) =>
      this.textQATemplate({ ...input, query: queryStr });
@@ -130,7 +134,7 @@ export class Refine implements BaseResponseBuilder {
            textQATemplate({
              context: chunk,
            }),
-            parentEvent
+            parentEvent,
          )
        ).message.content;
      } else {
@@ -138,7 +142,7 @@ export class Refine implements BaseResponseBuilder {
          response,
          queryStr,
          chunk,
-          parentEvent
+          parentEvent,
        );
      }
    }
@@ -150,7 +154,7 @@ export class Refine implements BaseResponseBuilder {
    response: string,
    queryStr: string,
    textChunk: string,
-    parentEvent?: Event
+    parentEvent?: Event,
  ) {
    const refineTemplate: SimplePrompt = (input) =>
      this.refineTemplate({ ...input, query: queryStr });
@@ -166,7 +170,7 @@ export class Refine implements BaseResponseBuilder {
            context: chunk,
            existingAnswer: response,
          }),
-          parentEvent
+          parentEvent,
        )
      ).message.content;
    }
@@ -182,7 +186,7 @@ export class CompactAndRefine extends Refine {
    query: string,
    textChunks: string[],
    parentEvent?: Event,
-    prevResponse?: string
+    prevResponse?: string,
  ): Promise<string> {
    const textQATemplate: SimplePrompt = (input) =>
      this.textQATemplate({ ...input, query: query });
@@ -192,13 +196,13 @@ export class CompactAndRefine extends Refine {
    const maxPrompt = getBiggestPrompt([textQATemplate, refineTemplate]);
    const newTexts = this.serviceContext.promptHelper.repack(
      maxPrompt,
-      textChunks
+      textChunks,
    );
    const response = super.getResponse(
      query,
      newTexts,
      parentEvent,
-      prevResponse
+      prevResponse,
    );
    return response;
  }
@@ -208,52 +212,57 @@ export class CompactAndRefine extends Refine {
 */
 export class TreeSummarize implements BaseResponseBuilder {
  serviceContext: ServiceContext;
+  summaryTemplate: TreeSummarizePrompt;

-  constructor(serviceContext: ServiceContext) {
+  constructor(
+    serviceContext: ServiceContext,
+    summaryTemplate?: TreeSummarizePrompt,
+  ) {
    this.serviceContext = serviceContext;
+    this.summaryTemplate = summaryTemplate ?? defaultTreeSummarizePrompt;
  }

  async getResponse(
    query: string,
    textChunks: string[],
-    parentEvent?: Event
+    parentEvent?: Event,
  ): Promise<string> {
-    const summaryTemplate: SimplePrompt = (input) =>
-      defaultTextQaPrompt({ ...input, query: query });
-
    if (!textChunks || textChunks.length === 0) {
      throw new Error("Must have at least one text chunk");
    }

+    // Should we send the query here too?
    const packedTextChunks = this.serviceContext.promptHelper.repack(
-      summaryTemplate,
-      textChunks
+      this.summaryTemplate,
+      textChunks,
    );

    if (packedTextChunks.length === 1) {
      return (
        await this.serviceContext.llm.complete(
-          summaryTemplate({
+          this.summaryTemplate({
            context: packedTextChunks[0],
+            query,
          }),
-          parentEvent
+          parentEvent,
        )
      ).message.content;
    } else {
      const summaries = await Promise.all(
        packedTextChunks.map((chunk) =>
          this.serviceContext.llm.complete(
-            summaryTemplate({
+            this.summaryTemplate({
              context: chunk,
+              query,
            }),
-            parentEvent
-          )
-        )
+            parentEvent,
+          ),
+        ),
      );

      return this.getResponse(
        query,
-        summaries.map((s) => s.message.content)
+        summaries.map((s) => s.message.content),
      );
    }
  }
@@ -261,7 +270,7 @@ export class TreeSummarize implements BaseResponseBuilder {

 export function getResponseBuilder(
  serviceContext: ServiceContext,
-  responseMode?: ResponseMode
+  responseMode?: ResponseMode,
 ): BaseResponseBuilder {
  switch (responseMode) {
    case ResponseMode.SIMPLE:
@@ -281,31 +290,39 @@ export function getResponseBuilder(
 export class ResponseSynthesizer {
  responseBuilder: BaseResponseBuilder;
  serviceContext: ServiceContext;
+  metadataMode: MetadataMode;

  constructor({
    responseBuilder,
    serviceContext,
+    metadataMode = MetadataMode.NONE,
  }: {
    responseBuilder?: BaseResponseBuilder;
    serviceContext?: ServiceContext;
+    metadataMode?: MetadataMode;
  } = {}) {
    this.serviceContext = serviceContext ?? serviceContextFromDefaults();
    this.responseBuilder =
      responseBuilder ?? getResponseBuilder(this.serviceContext);
+    this.metadataMode = metadataMode;
  }

-  async synthesize(query: string, nodes: NodeWithScore[], parentEvent?: Event) {
-    let textChunks: string[] = nodes.map((node) =>
-      node.node.getContent(MetadataMode.NONE)
+  async synthesize(
+    query: string,
+    nodesWithScore: NodeWithScore[],
+    parentEvent?: Event,
+  ) {
+    let textChunks: string[] = nodesWithScore.map(({ node }) =>
+      node.getContent(this.metadataMode),
    );
    const response = await this.responseBuilder.getResponse(
      query,
      textChunks,
-      parentEvent
+      parentEvent,
    );
    return new Response(
      response,
-      nodes.map((node) => node.node)
+      nodesWithScore.map(({ node }) => node),
    );
  }
 }
--- a/Show More
+++ b/Show More