Merge branch 'main' of github.com:run-llama/LlamaIndexTS into seldo/deploy-fixes

Merge pull request #215 from run-llama/seldo/deploy-fixes
dotenv must load before chat_router or .env isn't picked up in time
2026-07-02 20:13:52 -04:00 · 2023-11-19 17:11:45 -08:00 · 2023-11-19 16:21:19 -08:00 · 2023-11-19 16:15:41 -08:00 · 2023-11-19 15:55:27 -08:00 · 2023-11-19 15:54:53 -08:00
272 changed files with 13624 additions and 3043 deletions
@@ -1,5 +1,5 @@
 name: Bugfix
-title: 'Sweep: '
+title: "Sweep: "
 description: Write something like "We notice ... behavior when ... happens instead of ...""
 labels: sweep
 body:
@@ -8,4 +8,4 @@ body:
    attributes:
      label: Details
      description: More details about the bug
-      placeholder: The bug might be in ... file
+      placeholder: The bug might be in ... file
@@ -1,5 +1,5 @@
 name: Feature Request
-title: 'Sweep: '
+title: "Sweep: "
 description: Write something like "Write an api endpoint that does "..." in the "..." file"
 labels: sweep
 body:
@@ -8,4 +8,4 @@ body:
    attributes:
      label: Details
      description: More details for Sweep
-      placeholder: The new endpoint should use the ... class from ... file because it contains ... logic
+      placeholder: The new endpoint should use the ... class from ... file because it contains ... logic
@@ -1,5 +1,5 @@
 name: Refactor
-title: 'Sweep: '
+title: "Sweep: "
 description: Write something like "Modify the ... api endpoint to use ... version and ... framework"
 labels: sweep
 body:
@@ -8,4 +8,4 @@ body:
    attributes:
      label: Details
      description: More details for Sweep
-      placeholder: We are migrating this function to ... version because ...
+      placeholder: We are migrating this function to ... version because ...
@@ -22,4 +22,4 @@ jobs:
        run: pnpm install

      - name: Run lint
-        run: pnpm run lint
+        run: pnpm run lint
@@ -7,18 +7,18 @@ jobs:
    runs-on: ubuntu-latest

    steps:
-    - name: Checkout code
-      uses: actions/checkout@v2
+      - name: Checkout code
+        uses: actions/checkout@v2

-    - name: Setup Node.js
-      uses: actions/setup-node@v2
-      with:
-        node-version: '18'
+      - name: Setup Node.js
+        uses: actions/setup-node@v2
+        with:
+          node-version: "18"

-    - name: Install dependencies
-      run: |
-        npm i -g pnpm
-        pnpm install
+      - name: Install dependencies
+        run: |
+          npm i -g pnpm
+          pnpm install

-    - name: Run tests
-      run: pnpm run test
+      - name: Run tests
+        run: pnpm run test
@@ -3,6 +3,7 @@
 # dependencies
 node_modules
 .pnp
+.pnpm-store
 .pnp.js

 # testing
@@ -36,3 +37,6 @@ yarn-error.log*
 .vercel

 dist/
+
+# vs code
+.vscode/launch.json
@@ -2,3 +2,4 @@
 . "$(dirname -- "$0")/_/husky.sh"

 pnpm lint
+npx lint-staged
@@ -20,7 +20,7 @@ In a new folder:
 export OPENAI_API_KEY="sk-......" # Replace with your key from https://platform.openai.com/account/api-keys
 pnpm init
 pnpm install typescript
-pnpm exec tsc –-init # if needed
+pnpm exec tsc --init # if needed
 pnpm install llamaindex
 pnpm install @types/node
 ```
@@ -36,7 +36,7 @@ async function main() {
  // Load essay from abramov.txt in Node
  const essay = await fs.readFile(
    "node_modules/llamaindex/examples/abramov.txt",
-    "utf-8"
+    "utf-8",
  );

  // Create Document object with essay
@@ -48,7 +48,7 @@ async function main() {
  // Query the index
  const queryEngine = index.asQueryEngine();
  const response = await queryEngine.query(
-    "What did the author do in college?"
+    "What did the author do in college?",
  );

  // Output response
@@ -61,7 +61,7 @@ main();
 Then you can run it using

 ```bash
-pnpm dlx ts-node example.ts
+pnpx ts-node example.ts
 ```

 ## Playground
@@ -84,6 +84,26 @@ Check out our NextJS playground at https://llama-playground.vercel.app/. The sou

 - [SimplePrompt](/packages/core/src/Prompt.ts): A simple standardized function call definition that takes in inputs and formats them in a template literal. SimplePrompts can be specialized using currying and combined using other SimplePrompt functions.

+## Note: NextJS:
+
+If you're using NextJS App Router, you'll need to use the NodeJS runtime (default) and add the follow config to your next.config.js to have it use imports/exports in the same way Node does.
+
+```js
+export const runtime = "nodejs"; // default
+```
+
+```js
+// next.config.js
+/** @type {import('next').NextConfig} */
+const nextConfig = {
+  experimental: {
+    serverComponentsExternalPackages: ["pdf-parse"], // Puts pdf-parse in actual NodeJS mode with NextJS App Router
+  },
+};
+
+module.exports = nextConfig;
+```
+
 ## Supported LLMs:

 - OpenAI GPT-3.5-turbo and GPT-4
@@ -1,3 +1,3 @@
 module.exports = {
-  presets: [require.resolve('@docusaurus/core/lib/babel/preset')],
+  presets: [require.resolve("@docusaurus/core/lib/babel/preset")],
 };
@@ -8,40 +8,42 @@ LlamaIndex.TS helps you build LLM-powered applications (e.g. Q&A, chatbot) over

 In this high-level concepts guide, you will learn:

-* how an LLM can answer questions using your own data.
-* key concepts and modules in LlamaIndex.TS for composing your own query pipeline.
+- how an LLM can answer questions using your own data.
+- key concepts and modules in LlamaIndex.TS for composing your own query pipeline.

 ## Answering Questions Across Your Data

 LlamaIndex uses a two stage method when using an LLM with your data:

-1) **indexing stage**: preparing a knowledge base, and
-2) **querying stage**: retrieving relevant context from the knowledge to assist the LLM in responding to a question
+1. **indexing stage**: preparing a knowledge base, and
+2. **querying stage**: retrieving relevant context from the knowledge to assist the LLM in responding to a question

 ![](./_static/concepts/rag.jpg)

 This process is also known as Retrieval Augmented Generation (RAG).

-LlamaIndex.TS provides the essential toolkit for making both steps super easy. 
+LlamaIndex.TS provides the essential toolkit for making both steps super easy.

 Let's explore each stage in detail.

 ### Indexing Stage
+
 LlamaIndex.TS help you prepare the knowledge base with a suite of data connectors and indexes.

-![](./_static/concepts/indexing.jpg) 
+![](./_static/concepts/indexing.jpg)

 [**Data Loaders**](./modules/high_level/data_loader.md):
 A data connector (i.e. `Reader`) ingest data from different data sources and data formats into a simple `Document` representation (text and simple metadata).

 [**Documents / Nodes**](./modules/high_level/documents_and_nodes.md): A `Document` is a generic container around any data source - for instance, a PDF, an API output, or retrieved data from a database. A `Node` is the atomic unit of data in LlamaIndex and represents a "chunk" of a source `Document`. It's a rich representation that includes metadata and relationships (to other nodes) to enable accurate and expressive retrieval operations.

-[**Data Indexes**](./modules/high_level/data_index.md): 
+[**Data Indexes**](./modules/high_level/data_index.md):
 Once you've ingested your data, LlamaIndex helps you index data into a format that's easy to retrieve.

 Under the hood, LlamaIndex parses the raw documents into intermediate representations, calculates vector embeddings, and stores your data in-memory or to disk.

 ### Querying Stage
+
 In the querying stage, the query pipeline retrieves the most relevant context given a user query,
 and pass that to the LLM (along with the query) to synthesize a response.

@@ -57,12 +59,13 @@ These building blocks can be customized to reflect ranking preferences, as well
 ![](./_static/concepts/querying.jpg)

 #### Building Blocks
-[**Retrievers**](./modules/low_level/retriever.md): 
+
+[**Retrievers**](./modules/low_level/retriever.md):
 A retriever defines how to efficiently retrieve relevant context from a knowledge base (i.e. index) when given a query.
 The specific retrieval logic differs for difference indices, the most popular being dense retrieval against a vector index.

 [**Response Synthesizers**](./modules/low_level/response_synthesizer.md):
-A response synthesizer generates a response from an LLM, using a user query and a given set of retrieved text chunks.  
+A response synthesizer generates a response from an LLM, using a user query and a given set of retrieved text chunks.

 #### Pipelines

@@ -70,7 +73,6 @@ A response synthesizer generates a response from an LLM, using a user query and
 A query engine is an end-to-end pipeline that allow you to ask question over your data.
 It takes in a natural language query, and returns a response, along with reference context retrieved and passed to the LLM.

-
-[**Chat Engines**](./modules/high_level/chat_engine.md): 
+[**Chat Engines**](./modules/high_level/chat_engine.md):
 A chat engine is an end-to-end pipeline for having a conversation with your data
 (multiple back-and-forth instead of a single question & answer).
@@ -6,18 +6,20 @@ sidebar_position: 4

 We include several end-to-end examples using LlamaIndex.TS in the repository

+Check out the examples below or try them out and complete them in minutes with interactive Github Codespace tutorials provided by Dev-Docs [here](https://codespaces.new/team-dev-docs/lits-dev-docs-playground?devcontainer_path=.devcontainer%2Fjavascript_ltsquickstart%2Fdevcontainer.json):
+
 ## [Chat Engine](https://github.com/run-llama/LlamaIndexTS/blob/main/apps/simple/chatEngine.ts)

 Read a file and chat about it with the LLM.

-## [List Index](https://github.com/run-llama/LlamaIndexTS/blob/main/apps/simple/listIndex.ts)
-
-Create a list index and query it. This example also use the `LLMRetriever`, which will use the LLM to select the best nodes to use when generating answer.
-
 ## [Vector Index](https://github.com/run-llama/LlamaIndexTS/blob/main/apps/simple/vectorIndex.ts)

 Create a vector index and query it. The vector index will use embeddings to fetch the top k most relevant nodes. By default, the top k is 2.

+## [Summary Index](https://github.com/run-llama/LlamaIndexTS/blob/main/apps/simple/summaryIndex.ts)
+
+Create a list index and query it. This example also use the `LLMRetriever`, which will use the LLM to select the best nodes to use when generating answer.
+
 ## [Save / Load an Index](https://github.com/run-llama/LlamaIndexTS/blob/main/apps/simple/storageContext.ts)

 Create and load a vector index. Persistance to disk in LlamaIndex.TS happens automatically once a storage context object is created.
@@ -28,7 +30,7 @@ Create a vector index and query it, while also configuring the the `LLM`, the `S

 ## [OpenAI LLM](https://github.com/run-llama/LlamaIndexTS/blob/main/apps/simple/openai.ts)

-Create an OpenAI LLM and directly use it for chat. 
+Create an OpenAI LLM and directly use it for chat.

 ## [Llama2 DeuceLLM](https://github.com/run-llama/LlamaIndexTS/blob/main/apps/simple/llamadeuce.ts)

@@ -40,4 +42,4 @@ Uses the `SubQuestionQueryEngine`, which breaks complex queries into multiple qu

 ## [Low Level Modules](https://github.com/run-llama/LlamaIndexTS/blob/main/apps/simple/lowlevel.ts)

-This example uses several low-level components, which removes the need for an actual query engine. These components can be used anywhere, in any application, or customized and sub-classed to meet your own needs.
+This example uses several low-level components, which removes the need for an actual query engine. These components can be used anywhere, in any application, or customized and sub-classed to meet your own needs.
@@ -0,0 +1,29 @@
+---
+sidebar_position: 5
+---
+
+# Environments
+
+LlamaIndex currently officially supports NodeJS 18 and NodeJS 20.
+
+## NextJS App Router
+
+If you're using NextJS App Router route handlers/serverless functions, you'll need to use the NodeJS mode:
+
+```js
+export const runtime = "nodejs"; // default
+```
+
+and you'll need to add an exception for pdf-parse in your next.config.js
+
+```js
+// next.config.js
+/** @type {import('next').NextConfig} */
+const nextConfig = {
+  experimental: {
+    serverComponentsExternalPackages: ["pdf-parse"], // Puts pdf-parse in actual NodeJS mode with NextJS App Router
+  },
+};
+
+module.exports = nextConfig;
+```
@@ -19,7 +19,7 @@ That's where **LlamaIndex.TS** comes in.

 LlamaIndex.TS provides the following tools:

- **Data loading** ingest your existing `txt` and `pdf` data directly
+- **Data loading** ingest your existing `.txt`, `.pdf`, `.csv`, `.md` and `.docx` data directly
 - **Data indexes** structure your data in intermediate representations that are easy and performant for LLMs to consume.
 - **Engines** provide natural language access to your data. For example:
  - Query engines are powerful retrieval interfaces for knowledge-augmented output.
@@ -1,3 +1,3 @@
 label: "Modules"
 collapsed: false
-position: 5
+position: 5
@@ -1 +1 @@
-label: High-Level Modules
+label: High-Level Modules
@@ -6,23 +6,18 @@ sidebar_position: 2

 An index is the basic container and organization for your data. LlamaIndex.TS supports two indexes:

- `ListIndex` - will send every `Node` in the index to the LLM in order to generate a response
 - `VectorStoreIndex` - will send the top-k `Node`s to the LLM when generating a response. The default top-k is 2.
+- `SummaryIndex` - will send every `Node` in the index to the LLM in order to generate a response

 ```typescript
-import {
-  Document,
-  VectorStoreIndex,
-} from "llamaindex";
+import { Document, VectorStoreIndex } from "llamaindex";

 const document = new Document({ text: "test" });

-const index = await VectorStoreIndex.fromDocuments(
-  [document]
-);
+const index = await VectorStoreIndex.fromDocuments([document]);
 ```

 ## API Reference

- [ListIndex](../../api/classes/ListIndex.md)
- [VectorStoreIndex](../../api/classes/VectorStoreIndex.md)
+- [SummaryIndex](../../api/classes/SummaryIndex.md)
+- [VectorStoreIndex](../../api/classes/VectorStoreIndex.md)
@@ -4,7 +4,7 @@ sidebar_position: 1

 # Reader / Loader

-LlamaIndex.TS supports easy loading of files from folders using the `SimpleDirectoryReader` class. Currently, `.txt` and `.pdf` files are supported, with more planned in the future!
+LlamaIndex.TS supports easy loading of files from folders using the `SimpleDirectoryReader` class. Currently, `.txt`, `.pdf`, `.csv`, `.md` and `.docx` files are supported, with more planned in the future!

 ```typescript
 import { SimpleDirectoryReader } from "llamaindex";
@@ -9,7 +9,7 @@ sidebar_position: 0
 ```typescript
 import { Document } from "llamaindex";

-document = new Document({ text: "text", metadata: { "key": "val" }});
+document = new Document({ text: "text", metadata: { key: "val" } });
 ```

 ## API Reference
@@ -1 +1 @@
-label: Low-Level Modules
+label: Low-Level Modules
@@ -4,7 +4,7 @@ sidebar_position: 1

 # Embedding

-The embedding model in LlamaIndex is responsible for creating numerical representations of text. By default, LlamaIndex will use the `text-embedding-ada-002` model from OpenAI. 
+The embedding model in LlamaIndex is responsible for creating numerical representations of text. By default, LlamaIndex will use the `text-embedding-ada-002` model from OpenAI.

 This can be explicitly set in the `ServiceContext` object.

@@ -4,7 +4,7 @@ sidebar_position: 0

 # LLM

-The LLM is responsible for reading text and generating natural language responses to queries. By default, LlamaIndex.TS uses `gpt-3.5-turbo`. 
+The LLM is responsible for reading text and generating natural language responses to queries. By default, LlamaIndex.TS uses `gpt-3.5-turbo`.

 The LLM can be explicitly set in the `ServiceContext` object.

@@ -19,4 +19,4 @@ const serviceContext = serviceContextFromDefaults({ llm: openaiLLM });
 ## API Reference

 - [OpenAI](../../api/classes/OpenAI.md)
- [ServiceContext](../../api/interfaces/ServiceContext.md)
+- [ServiceContext](../../api/interfaces/ServiceContext.md)
@@ -7,10 +7,7 @@ sidebar_position: 3
 The `NodeParser` in LlamaIndex is responbile for splitting `Document` objects into more manageable `Node` objects. When you call `.fromDocuments()`, the `NodeParser` from the `ServiceContext` is used to do this automatically for you. Alternatively, you can use it to split documents ahead of time.

 ```typescript
-import {
-  Document,
-  SimpleNodeParser,
-} from "llamaindex";
+import { Document, SimpleNodeParser } from "llamaindex";

 const nodeParser = new SimpleNodeParser();
 const nodes = nodeParser.getNodesFromDocuments([
@@ -25,7 +22,7 @@ The underlying text splitter will split text by sentences. It can also be used a
 ```typescript
 import { SentenceSplitter } from "llamaindex";

-const splitter = new SentenceSplitter({ chunkSize: 1, });
+const splitter = new SentenceSplitter({ chunkSize: 1 });

 const textSplits = splitter.splitText("Hello World");
 ```
@@ -6,26 +6,21 @@ sidebar_position: 6

 The ResponseSynthesizer is responsible for sending the query, nodes, and prompt templates to the LLM to generate a response. There are a few key modes for generating a response:

- `Refine`: "create and refine" an answer by sequentially going through each retrieved text chunk. 
-    This makes a separate LLM call per Node. Good for more detailed answers.
- `CompactAndRefine` (default): "compact" the prompt during each LLM call by stuffing as 
-    many text chunks that can fit within the maximum prompt size. If there are 
-    too many chunks to stuff in one prompt, "create and refine" an answer by going through
-    multiple compact prompts. The same as `refine`, but should result in less LLM calls.
- `TreeSummarize`: Given a set of text chunks and the query, recursively construct a tree 
-    and return the root node as the response. Good for summarization purposes.
+- `Refine`: "create and refine" an answer by sequentially going through each retrieved text chunk.
+  This makes a separate LLM call per Node. Good for more detailed answers.
+- `CompactAndRefine` (default): "compact" the prompt during each LLM call by stuffing as
+  many text chunks that can fit within the maximum prompt size. If there are
+  too many chunks to stuff in one prompt, "create and refine" an answer by going through
+  multiple compact prompts. The same as `refine`, but should result in less LLM calls.
+- `TreeSummarize`: Given a set of text chunks and the query, recursively construct a tree
+  and return the root node as the response. Good for summarization purposes.
 - `SimpleResponseBuilder`: Given a set of text chunks and the query, apply the query to each text
-    chunk while accumulating the responses into an array. Returns a concatenated string of all
-    responses. Good for when you need to run the same query separately against each text
-    chunk.
+  chunk while accumulating the responses into an array. Returns a concatenated string of all
+  responses. Good for when you need to run the same query separately against each text
+  chunk.

 ```typescript
-import {
-  TextNode,
-  NodeWithScore,
-  ResponseSynthesizer,
-  CompactAndRefine
-} from "llamaindex";
+import { NodeWithScore, ResponseSynthesizer, TextNode } from "llamaindex";

 const responseSynthesizer = new ResponseSynthesizer();

@@ -42,7 +37,7 @@ const nodesWithScore: NodeWithScore[] = [

 const response = await responseSynthesizer.synthesize(
  "What age am I?",
-  nodesWithScore
+  nodesWithScore,
 );
 console.log(response.response);
 ```
@@ -4,10 +4,10 @@ sidebar_position: 5

 # Retriever

-A retriever in LlamaIndex is what is used to fetch `Node`s from an index using a query string. For example, a `ListIndexRetriever` will fetch all nodes no matter the query. Meanwhile, a `VectorIndexRetriever` will only fetch the top-k most similar nodes.
+A retriever in LlamaIndex is what is used to fetch `Node`s from an index using a query string. Aa `VectorIndexRetriever` will fetch the top-k most similar nodes. Meanwhile, a `SummaryIndexRetriever` will fetch all nodes no matter the query.

 ```typescript
-const retriever = vector_index.asRetriever()
+const retriever = vector_index.asRetriever();
 retriever.similarityTopK = 3;

 // Fetch nodes!
@@ -16,6 +16,6 @@ const nodesWithScore = await retriever.retrieve("query string");

 ## API Reference

- [ListIndexRetriever](../../api/classes/ListIndexRetriever.md)
- [ListIndexLLMRetriever](../../api/classes/ListIndexLLMRetriever.md)
+- [SummaryIndexRetriever](../../api/classes/SummaryIndexRetriever.md)
+- [SummaryIndexLLMRetriever](../../api/classes/SummaryIndexLLMRetriever.md)
 - [VectorIndexRetriever](../../api/classes/VectorIndexRetriever.md)
@@ -11,10 +11,14 @@ Right now, only saving and loading from disk is supported, with future integrati
 ```typescript
 import { Document, VectorStoreIndex, storageContextFromDefaults } from "./src";

-const storageContext = await storageContextFromDefaults({ persistDir: "./storage" });
+const storageContext = await storageContextFromDefaults({
+  persistDir: "./storage",
+});

 const document = new Document({ text: "Test Text" });
-const index = await VectorStoreIndex.fromDocuments([document], { storageContext });
+const index = await VectorStoreIndex.fromDocuments([document], {
+  storageContext,
+});
 ```

 ## API Reference
@@ -25,7 +25,7 @@ async function main() {
  // Load essay from abramov.txt in Node
  const essay = await fs.readFile(
    "node_modules/llamaindex/examples/abramov.txt",
-    "utf-8"
+    "utf-8",
  );

  // Create Document object with essay
@@ -37,7 +37,7 @@ async function main() {
  // Query the index
  const queryEngine = index.asQueryEngine();
  const response = await queryEngine.query(
-    "What did the author do in college?"
+    "What did the author do in college?",
  );

  // Output response
@@ -139,6 +139,8 @@ const config = {
        entryPoints: ["../../packages/core/src/index.ts"],
        tsconfig: "../../packages/core/tsconfig.json",
        readme: "none",
+        sourceLinkTemplate:
+          "https://github.com/run-llama/LlamaIndexTS/blob/{gitRevision}/{path}#L{line}",
        sidebar: {
          position: 6,
        },
@@ -15,24 +15,24 @@
    "typecheck": "tsc"
  },
  "dependencies": {
-    "@docusaurus/core": "2.4.1",
-    "@docusaurus/preset-classic": "2.4.1",
-    "@docusaurus/remark-plugin-npm2yarn": "^2.4.1",
+    "@docusaurus/core": "2.4.3",
+    "@docusaurus/preset-classic": "2.4.3",
+    "@docusaurus/remark-plugin-npm2yarn": "^2.4.3",
    "@mdx-js/react": "^1.6.22",
    "clsx": "^1.2.1",
-    "postcss": "^8.4.28",
+    "postcss": "^8.4.31",
    "prism-react-renderer": "^1.3.5",
    "raw-loader": "^4.0.2",
    "react": "^17.0.2",
    "react-dom": "^17.0.2"
  },
  "devDependencies": {
-    "@docusaurus/module-type-aliases": "2.4.1",
-    "@docusaurus/types": "^2.4.1",
-    "@tsconfig/docusaurus": "^1.0.7",
+    "@docusaurus/module-type-aliases": "2.4.3",
+    "@docusaurus/types": "^2.4.3",
+    "@tsconfig/docusaurus": "^2.0.1",
    "docusaurus-plugin-typedoc": "^0.19.2",
    "typedoc": "^0.24.8",
-    "typedoc-plugin-markdown": "^3.15.4",
+    "typedoc-plugin-markdown": "^3.16.0",
    "typescript": "^4.9.5"
  },
  "browserslist": {
@@ -1,5 +1,5 @@
-import React from "react";
 import clsx from "clsx";
+import React from "react";
 import styles from "./styles.module.css";

 type FeatureItem = {
@@ -18,7 +18,7 @@
 }

 /* For readability concerns, you should choose a lighter palette in dark mode. */
-[data-theme='dark'] {
+[data-theme="dark"] {
  --ifm-color-primary: #25c2a0;
  --ifm-color-primary-dark: #21af90;
  --ifm-color-primary-darker: #1fa588;
@@ -1,5 +1,133 @@
 # simple

+## 0.0.33
+
+### Patch Changes
+
+- Updated dependencies [63f2108]
+  - llamaindex@0.0.35
+
+## 0.0.32
+
+### Patch Changes
+
+- Updated dependencies [2a27e21]
+  - llamaindex@0.0.34
+
+## 0.0.31
+
+### Patch Changes
+
+- Updated dependencies [5e2e92c]
+  - llamaindex@0.0.33
+
+## 0.0.30
+
+### Patch Changes
+
+- Updated dependencies [90c0b83]
+- Updated dependencies [dfd22aa]
+  - llamaindex@0.0.32
+
+## 0.0.29
+
+### Patch Changes
+
+- Updated dependencies [6c55b2d]
+- Updated dependencies [8aa8c65]
+- Updated dependencies [6c55b2d]
+  - llamaindex@0.0.31
+
+## 0.0.28
+
+### Patch Changes
+
+- Updated dependencies [139abad]
+- Updated dependencies [139abad]
+- Updated dependencies [eb0e994]
+- Updated dependencies [eb0e994]
+- Updated dependencies [139abad]
+  - llamaindex@0.0.30
+
+## 0.0.27
+
+### Patch Changes
+
+- Updated dependencies [a52143b]
+- Updated dependencies [1b7fd95]
+- Updated dependencies [0db3f41]
+  - llamaindex@0.0.29
+
+## 0.0.26
+
+### Patch Changes
+
+- Updated dependencies [96bb657]
+- Updated dependencies [96bb657]
+- Updated dependencies [837854d]
+  - llamaindex@0.0.28
+
+## 0.0.25
+
+### Patch Changes
+
+- Updated dependencies [4a5591b]
+- Updated dependencies [4a5591b]
+- Updated dependencies [4a5591b]
+  - llamaindex@0.0.27
+
+## 0.0.24
+
+### Patch Changes
+
+- Updated dependencies [5bb55bc]
+  - llamaindex@0.0.26
+
+## 0.0.23
+
+### Patch Changes
+
+- Updated dependencies [e21eca2]
+- Updated dependencies [40a8f07]
+- Updated dependencies [40a8f07]
+  - llamaindex@0.0.25
+
+## 0.0.22
+
+### Patch Changes
+
+- Updated dependencies [e4af7b3]
+- Updated dependencies [259fe63]
+  - llamaindex@0.0.24
+
+## 0.0.21
+
+### Patch Changes
+
+- Updated dependencies
+- Updated dependencies [9d6b2ed]
+  - llamaindex@0.0.23
+
+## 0.0.20
+
+### Patch Changes
+
+- Updated dependencies [454f3f8]
+- Updated dependencies [454f3f8]
+- Updated dependencies [454f3f8]
+- Updated dependencies [454f3f8]
+- Updated dependencies [99df58f]
+  - llamaindex@0.0.22
+
+## 0.0.19
+
+### Patch Changes
+
+- Updated dependencies [f7a57ca]
+- Updated dependencies [0a09de2]
+- Updated dependencies [f7a57ca]
+  - llamaindex@0.0.21
+
 ## 0.0.18

 ### Patch Changes
@@ -1,9 +1,10 @@
 # Simple Examples

 Due to packaging, you will need to run these commands to get started.
+
 ```bash
-pnpm --filter llamaindex build
 pnpm install
+pnpm --filter llamaindex build
 ```

 Then run the examples with `ts-node`, for example `npx ts-node vectorIndex.ts`
@@ -4,7 +4,6 @@ import {
  PapaCSVReader,
  ResponseSynthesizer,
  serviceContextFromDefaults,
-  SimplePrompt,
  VectorStoreIndex,
 } from "llamaindex";

@@ -23,9 +22,7 @@ async function main() {
    serviceContext,
  });

-  const csvPrompt: SimplePrompt = (input) => {
-    const { context = "", query = "" } = input;
-
+  const csvPrompt = ({ context = "", query = "" }) => {
    return `The following CSV file is loaded from ${path}
 \`\`\`csv
 ${context}
@@ -0,0 +1,24 @@
+import { SimpleDirectoryReader } from "llamaindex";
+
+function callback(
+  category: string,
+  name: string,
+  status: any,
+  message?: string,
+): boolean {
+  console.log(category, name, status, message);
+  if (name.endsWith(".pdf")) {
+    console.log("I DON'T WANT PDF FILES!");
+    return false;
+  }
+  return true;
+}
+
+async function main() {
+  // Load page
+  const reader = new SimpleDirectoryReader(callback);
+  const params = { directoryPath: "./data" };
+  await reader.loadData(params);
+}
+
+main().catch(console.error);
@@ -0,0 +1,21 @@
+import { HTMLReader, VectorStoreIndex } from "llamaindex";
+
+async function main() {
+  // Load page
+  const reader = new HTMLReader();
+  const documents = await reader.loadData("data/18-1_Changelog.html");
+
+  // Split text and create embeddings. Store them in a VectorStoreIndex
+  const index = await VectorStoreIndex.fromDocuments(documents);
+
+  // Query the index
+  const queryEngine = index.asQueryEngine();
+  const response = await queryEngine.query(
+    "What were the notable changes in 18.1?",
+  );
+
+  // Output response
+  console.log(response.toString());
+}
+
+main().catch(console.error);
@@ -0,0 +1,32 @@
+import {
+  Document,
+  KeywordTableIndex,
+  KeywordTableRetrieverMode,
+} from "llamaindex";
+import essay from "./essay";
+
+async function main() {
+  const document = new Document({ text: essay, id_: "essay" });
+  const index = await KeywordTableIndex.fromDocuments([document]);
+
+  const allModes: KeywordTableRetrieverMode[] = [
+    KeywordTableRetrieverMode.DEFAULT,
+    KeywordTableRetrieverMode.SIMPLE,
+    KeywordTableRetrieverMode.RAKE,
+  ];
+  allModes.forEach(async (mode) => {
+    const queryEngine = index.asQueryEngine({
+      retriever: index.asRetriever({
+        mode,
+      }),
+    });
+    const response = await queryEngine.query(
+      "What did the author do growing up?",
+    );
+    console.log(response.toString());
+  });
+}
+
+main().catch((e: Error) => {
+  console.error(e, e.stack);
+});
@@ -0,0 +1,47 @@
+import { ChatMessage, SimpleChatEngine } from "llamaindex";
+import { stdin as input, stdout as output } from "node:process";
+import readline from "node:readline/promises";
+import { Anthropic } from "../../packages/core/src/llm/LLM";
+
+async function main() {
+  const query: string = `
+Where is Istanbul?
+  `;
+
+  // const llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 });
+  const llm = new Anthropic();
+  const message: ChatMessage = { content: query, role: "user" };
+
+  //TODO: Add callbacks later
+
+  //Stream Complete
+  //Note: Setting streaming flag to true or false will auto-set your return type to
+  //either an AsyncGenerator or a Response.
+  // Omitting the streaming flag automatically sets streaming to false
+
+  const chatEngine: SimpleChatEngine = new SimpleChatEngine({
+    chatHistory: undefined,
+    llm: llm,
+  });
+
+  const rl = readline.createInterface({ input, output });
+  while (true) {
+    const query = await rl.question("Query: ");
+
+    if (!query) {
+      break;
+    }
+
+    //Case 1: .chat(query, undefined, true) => Stream
+    //Case 2: .chat(query, undefined, false) => Response object
+    //Case 3: .chat(query, undefined) => Response object
+    const chatStream = await chatEngine.chat(query, undefined, true);
+    var accumulated_result = "";
+    for await (const part of chatStream) {
+      accumulated_result += part;
+      process.stdout.write(part);
+    }
+  }
+}
+
+main();
@@ -1,9 +1,9 @@
 import {
  Document,
-  TextNode,
  NodeWithScore,
  ResponseSynthesizer,
  SimpleNodeParser,
+  TextNode,
 } from "llamaindex";

 (async () => {
@@ -29,7 +29,7 @@ import {

  const response = await responseSynthesizer.synthesize(
    "What age am I?",
-    nodesWithScore
+    nodesWithScore,
  );
  console.log(response.response);
 })();
@@ -0,0 +1,20 @@
+import { MarkdownReader, VectorStoreIndex } from "llamaindex";
+
+async function main() {
+  // Load Markdown file
+  const reader = new MarkdownReader();
+  const documents = await reader.loadData("node_modules/llamaindex/README.md");
+
+  // Split text and create embeddings. Store them in a VectorStoreIndex
+  const index = await VectorStoreIndex.fromDocuments(documents);
+
+  // Query the index
+  const queryEngine = index.asQueryEngine();
+
+  const response = await queryEngine.query("What does the example code do?");
+
+  // Output response
+  console.log(response.toString());
+}
+
+main().catch(console.error);
@@ -0,0 +1,68 @@
+import { MongoClient } from "mongodb";
+import { Document } from "../../packages/core/src/Node";
+import { VectorStoreIndex } from "../../packages/core/src/indices";
+import { SimpleMongoReader } from "../../packages/core/src/readers/SimpleMongoReader";
+
+import { stdin as input, stdout as output } from "node:process";
+import readline from "node:readline/promises";
+
+async function main() {
+  //Dummy test code
+  const query: object = { _id: "waldo" };
+  const options: object = {};
+  const projections: object = { embedding: 0 };
+  const limit: number = Infinity;
+  const uri: string = process.env.MONGODB_URI ?? "fake_uri";
+  const client: MongoClient = new MongoClient(uri);
+
+  //Where the real code starts
+  const MR = new SimpleMongoReader(client);
+  const documents: Document[] = await MR.loadData(
+    "data",
+    "posts",
+    1,
+    {},
+    options,
+    projections,
+  );
+
+  //
+  //If you need to look at low-level details of
+  // a queryEngine (for example, needing to check each individual node)
+  //
+
+  // Split text and create embeddings. Store them in a VectorStoreIndex
+  // var storageContext = await storageContextFromDefaults({});
+  // var serviceContext = serviceContextFromDefaults({});
+  // const docStore = storageContext.docStore;
+
+  // for (const doc of documents) {
+  //   docStore.setDocumentHash(doc.id_, doc.hash);
+  // }
+  // const nodes = serviceContext.nodeParser.getNodesFromDocuments(documents);
+  // console.log(nodes);
+
+  //
+  //Making Vector Store from documents
+  //
+
+  const index = await VectorStoreIndex.fromDocuments(documents);
+  // Create query engine
+  const queryEngine = index.asQueryEngine();
+
+  const rl = readline.createInterface({ input, output });
+  while (true) {
+    const query = await rl.question("Query: ");
+
+    if (!query) {
+      break;
+    }
+
+    const response = await queryEngine.query(query);
+
+    // Output response
+    console.log(response.toString());
+  }
+}
+
+main();
@@ -0,0 +1,89 @@
+import { Client } from "@notionhq/client";
+import { program } from "commander";
+import { NotionReader, VectorStoreIndex } from "llamaindex";
+import { stdin as input, stdout as output } from "node:process";
+// readline/promises is still experimental so not in @types/node yet
+// @ts-ignore
+import readline from "node:readline/promises";
+
+program
+  .argument("[page]", "Notion page id (must be provided)")
+  .action(async (page, _options, command) => {
+    // Initializing a client
+
+    if (!process.env.NOTION_TOKEN) {
+      console.log(
+        "No NOTION_TOKEN found in environment variables. You will need to register an integration https://www.notion.com/my-integrations and put it in your NOTION_TOKEN environment variable.",
+      );
+      return;
+    }
+
+    const notion = new Client({
+      auth: process.env.NOTION_TOKEN,
+    });
+
+    if (!page) {
+      const response = await notion.search({
+        filter: {
+          value: "page",
+          property: "object",
+        },
+        sort: {
+          direction: "descending",
+          timestamp: "last_edited_time",
+        },
+      });
+
+      const { results } = response;
+
+      if (results.length === 0) {
+        console.log(
+          "No pages found. You will need to share it with your integration. (tap the three dots on the top right, find Add connections, and add your integration)",
+        );
+        return;
+      } else {
+        const pages = results
+          .map((result) => {
+            if (!("url" in result)) {
+              return null;
+            }
+
+            return {
+              id: result.id,
+              url: result.url,
+            };
+          })
+          .filter((page) => page !== null);
+        console.log("Found pages:");
+        console.table(pages);
+        console.log(`To run, run ts-node ${command.name()} [page id]`);
+        return;
+      }
+    }
+
+    const reader = new NotionReader({ client: notion });
+    const documents = await reader.loadData(page);
+    console.log(documents);
+
+    // Split text and create embeddings. Store them in a VectorStoreIndex
+    const index = await VectorStoreIndex.fromDocuments(documents);
+
+    // Create query engine
+    const queryEngine = index.asQueryEngine();
+
+    const rl = readline.createInterface({ input, output });
+    while (true) {
+      const query = await rl.question("Query: ");
+
+      if (!query) {
+        break;
+      }
+
+      const response = await queryEngine.query(query);
+
+      // Output response
+      console.log(response.toString());
+    }
+  });
+
+program.parse();
@@ -1,14 +1,7 @@
 import { OpenAI } from "llamaindex";

 (async () => {
-  const llm = new OpenAI({
-    model: "gpt-3.5-turbo",
-    temperature: 0.1,
-    additionalChatOptions: { frequency_penalty: 0.1 },
-    additionalSessionOptions: {
-      defaultHeaders: { "X-Test-Header-Please-Ignore": "true" },
-    },
-  });
+  const llm = new OpenAI({ model: "gpt-4-1106-preview", temperature: 0.1 });

  // complete api
  const response1 = await llm.complete("How are you?");
@@ -16,7 +9,7 @@ import { OpenAI } from "llamaindex";

  // chat api
  const response2 = await llm.chat([
-    { content: "Tell me a joke!", role: "user" },
+    { content: "Tell me a joke.", role: "user" },
  ]);
  console.log(response2.message.content);
 })();
@@ -1,12 +1,16 @@
 {
-  "version": "0.0.18",
+  "version": "0.0.33",
  "private": true,
  "name": "simple",
  "dependencies": {
+    "@notionhq/client": "^2.2.13",
+    "@pinecone-database/pinecone": "^1.1.2",
+    "commander": "^11.1.0",
    "llamaindex": "workspace:*"
  },
  "devDependencies": {
-    "@types/node": "^18.17.6"
+    "@types/node": "^18.18.6",
+    "ts-node": "^10.9.1"
  },
  "scripts": {
    "lint": "eslint ."
@@ -0,0 +1,23 @@
+import { Portkey } from "llamaindex";
+
+(async () => {
+  const llms = [{}];
+  const portkey = new Portkey({
+    mode: "single",
+    llms: [
+      {
+        provider: "anyscale",
+        virtual_key: "anyscale-3b3c04",
+        model: "meta-llama/Llama-2-13b-chat-hf",
+        max_tokens: 2000,
+      },
+    ],
+  });
+  const result = portkey.stream_chat([
+    { role: "system", content: "You are a helpful assistant." },
+    { role: "user", content: "Tell me a joke." },
+  ]);
+  for await (const res of result) {
+    process.stdout.write(res);
+  }
+})();
@@ -1,9 +1,9 @@
 import {
  Document,
-  ListIndex,
-  ListRetrieverMode,
-  serviceContextFromDefaults,
  SimpleNodeParser,
+  SummaryIndex,
+  SummaryRetrieverMode,
+  serviceContextFromDefaults,
 } from "llamaindex";
 import essay from "./essay";

@@ -14,9 +14,11 @@ async function main() {
    }),
  });
  const document = new Document({ text: essay, id_: "essay" });
-  const index = await ListIndex.fromDocuments([document], { serviceContext });
+  const index = await SummaryIndex.fromDocuments([document], {
+    serviceContext,
+  });
  const queryEngine = index.asQueryEngine({
-    retriever: index.asRetriever({ mode: ListRetrieverMode.LLM }),
+    retriever: index.asRetriever({ mode: SummaryRetrieverMode.LLM }),
  });
  const response = await queryEngine.query(
    "What did the author do growing up?",
@@ -3,6 +3,7 @@ import {
  OpenAI,
  RetrieverQueryEngine,
  serviceContextFromDefaults,
+  SimilarityPostprocessor,
  VectorStoreIndex,
 } from "llamaindex";
 import essay from "./essay";
@@ -12,7 +13,7 @@ async function main() {
  const document = new Document({ text: essay, id_: "essay" });

  const serviceContext = serviceContextFromDefaults({
-    llm: new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.0 }),
+    llm: new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 }),
  });

  const index = await VectorStoreIndex.fromDocuments([document], {
@@ -21,8 +22,16 @@ async function main() {

  const retriever = index.asRetriever();
  retriever.similarityTopK = 5;
+  const nodePostprocessor = new SimilarityPostprocessor({
+    similarityCutoff: 0.7,
+  });
  // TODO: cannot pass responseSynthesizer into retriever query engine
-  const queryEngine = new RetrieverQueryEngine(retriever);
+  const queryEngine = new RetrieverQueryEngine(
+    retriever,
+    undefined,
+    undefined,
+    [nodePostprocessor],
+  );

  const response = await queryEngine.query(
    "What did the author do growing up?",
@@ -0,0 +1,197 @@
+import {
+  OpenAI,
+  ResponseSynthesizer,
+  RetrieverQueryEngine,
+  serviceContextFromDefaults,
+  TextNode,
+  TreeSummarize,
+  VectorIndexRetriever,
+  VectorStore,
+  VectorStoreIndex,
+  VectorStoreQuery,
+  VectorStoreQueryResult,
+} from "llamaindex";
+
+import { Index, Pinecone, RecordMetadata } from "@pinecone-database/pinecone";
+
+/**
+ * Please do not use this class in production; it's only for demonstration purposes.
+ */
+class PineconeVectorStore<T extends RecordMetadata = RecordMetadata>
+  implements VectorStore
+{
+  storesText = true;
+  isEmbeddingQuery = false;
+
+  indexName!: string;
+  pineconeClient!: Pinecone;
+  index!: Index<T>;
+
+  constructor({ indexName, client }: { indexName: string; client: Pinecone }) {
+    this.indexName = indexName;
+    this.pineconeClient = client;
+    this.index = client.index<T>(indexName);
+  }
+
+  client() {
+    return this.pineconeClient;
+  }
+
+  async query(
+    query: VectorStoreQuery,
+    kwargs?: any,
+  ): Promise<VectorStoreQueryResult> {
+    let queryEmbedding: number[] = [];
+    if (query.queryEmbedding) {
+      if (typeof query.alpha === "number") {
+        const alpha = query.alpha;
+        queryEmbedding = query.queryEmbedding.map((v) => v * alpha);
+      } else {
+        queryEmbedding = query.queryEmbedding;
+      }
+    }
+
+    // Current LlamaIndexTS implementation only support exact match filter, so we use kwargs instead.
+    const filter = kwargs?.filter || {};
+
+    const response = await this.index.query({
+      filter,
+      vector: queryEmbedding,
+      topK: query.similarityTopK,
+      includeValues: true,
+      includeMetadata: true,
+    });
+
+    console.log(
+      `Numbers of vectors returned by Pinecone after preFilters are applied: ${
+        response?.matches?.length || 0
+      }.`,
+    );
+
+    const topKIds: string[] = [];
+    const topKNodes: TextNode[] = [];
+    const topKScores: number[] = [];
+
+    const metadataToNode = (metadata?: T): Partial<TextNode> => {
+      if (!metadata) {
+        throw new Error("metadata is undefined.");
+      }
+
+      const nodeContent = metadata["_node_content"];
+      if (!nodeContent) {
+        throw new Error("nodeContent is undefined.");
+      }
+
+      if (typeof nodeContent !== "string") {
+        throw new Error("nodeContent is not a string.");
+      }
+
+      return JSON.parse(nodeContent);
+    };
+
+    if (response.matches) {
+      for (const match of response.matches) {
+        const node = new TextNode({
+          ...metadataToNode(match.metadata),
+          embedding: match.values,
+        });
+
+        topKIds.push(match.id);
+        topKNodes.push(node);
+        topKScores.push(match.score ?? 0);
+      }
+    }
+
+    const result = {
+      ids: topKIds,
+      nodes: topKNodes,
+      similarities: topKScores,
+    };
+
+    return result;
+  }
+
+  add(): Promise<string[]> {
+    return Promise.resolve([]);
+  }
+
+  delete(): Promise<void> {
+    throw new Error("Method `delete` not implemented.");
+  }
+
+  persist(): Promise<void> {
+    throw new Error("Method `persist` not implemented.");
+  }
+}
+
+/**
+ * The goal of this example is to show how to use Pinecone as a vector store
+ * for LlamaIndexTS with(out) preFilters.
+ *
+ * It should not be used in production like that,
+ * as you might want to find a proper PineconeVectorStore implementation.
+ */
+async function main() {
+  process.env.PINECONE_API_KEY = "Your Pinecone API Key.";
+  process.env.PINECONE_ENVIRONMENT = "Your Pinecone Environment.";
+  process.env.PINECONE_PROJECT_ID = "Your Pinecone Project ID.";
+  process.env.PINECONE_INDEX_NAME = "Your Pinecone Index Name.";
+  process.env.OPENAI_API_KEY = "Your OpenAI API Key.";
+  process.env.OPENAI_API_ORGANIZATION = "Your OpenAI API Organization.";
+
+  const getPineconeVectorStore = async () => {
+    return new PineconeVectorStore({
+      indexName: process.env.PINECONE_INDEX_NAME || "index-name",
+      client: new Pinecone(),
+    });
+  };
+
+  const getServiceContext = () => {
+    const openAI = new OpenAI({
+      model: "gpt-4",
+      apiKey: process.env.OPENAI_API_KEY,
+    });
+
+    return serviceContextFromDefaults({
+      llm: openAI,
+    });
+  };
+
+  const getQueryEngine = async (filter: unknown) => {
+    const vectorStore = await getPineconeVectorStore();
+    const serviceContext = getServiceContext();
+
+    const vectorStoreIndex = await VectorStoreIndex.fromVectorStore(
+      vectorStore,
+      serviceContext,
+    );
+
+    const retriever = new VectorIndexRetriever({
+      index: vectorStoreIndex,
+      similarityTopK: 500,
+    });
+
+    const responseSynthesizer = new ResponseSynthesizer({
+      serviceContext,
+      responseBuilder: new TreeSummarize(serviceContext),
+    });
+
+    return new RetrieverQueryEngine(retriever, responseSynthesizer, {
+      filter,
+    });
+  };
+
+  // whatever is a key from your metadata
+  const queryEngine = await getQueryEngine({
+    whatever: {
+      $gte: 1,
+      $lte: 100,
+    },
+  });
+
+  const response = await queryEngine.query("How many results do you have?");
+
+  console.log(response.toString());
+}
+
+main().catch(console.error);
@@ -0,0 +1,15 @@
+import { OpenAI } from "llamaindex";
+
+(async () => {
+  const llm = new OpenAI({ model: "gpt-4-vision-preview", temperature: 0.1 });
+
+  // complete api
+  const response1 = await llm.complete("How are you?");
+  console.log(response1.message.content);
+
+  // chat api
+  const response2 = await llm.chat([
+    { content: "Tell me a joke!", role: "user" },
+  ]);
+  console.log(response2.message.content);
+})();
@@ -4,7 +4,6 @@ import {
  PapaCSVReader,
  ResponseSynthesizer,
  serviceContextFromDefaults,
-  SimplePrompt,
  VectorStoreIndex,
 } from "llamaindex";

@@ -23,9 +22,7 @@ async function main() {
    serviceContext,
  });

-  const csvPrompt: SimplePrompt = (input) => {
-    const { context = "", query = "" } = input;
-
+  const csvPrompt = ({ context = "", query = "" }) => {
    return `The following CSV file is loaded from ${path}
 \`\`\`csv
 ${context}
@@ -0,0 +1,24 @@
+import { SimpleDirectoryReader } from "llamaindex";
+
+function callback(
+  category: string,
+  name: string,
+  status: any,
+  message?: string,
+): boolean {
+  console.log(category, name, status, message);
+  if (name.endsWith(".pdf")) {
+    console.log("I DON'T WANT PDF FILES!");
+    return false;
+  }
+  return true;
+}
+
+async function main() {
+  // Load page
+  const reader = new SimpleDirectoryReader(callback);
+  const params = { directoryPath: "./data" };
+  await reader.loadData(params);
+}
+
+main().catch(console.error);
@@ -0,0 +1,21 @@
+import { HTMLReader, VectorStoreIndex } from "llamaindex";
+
+async function main() {
+  // Load page
+  const reader = new HTMLReader();
+  const documents = await reader.loadData("data/18-1_Changelog.html");
+
+  // Split text and create embeddings. Store them in a VectorStoreIndex
+  const index = await VectorStoreIndex.fromDocuments(documents);
+
+  // Query the index
+  const queryEngine = index.asQueryEngine();
+  const response = await queryEngine.query(
+    "What were the notable changes in 18.1?",
+  );
+
+  // Output response
+  console.log(response.toString());
+}
+
+main().catch(console.error);
@@ -0,0 +1,32 @@
+import {
+  Document,
+  KeywordTableIndex,
+  KeywordTableRetrieverMode,
+} from "llamaindex";
+import essay from "./essay";
+
+async function main() {
+  const document = new Document({ text: essay, id_: "essay" });
+  const index = await KeywordTableIndex.fromDocuments([document]);
+
+  const allModes: KeywordTableRetrieverMode[] = [
+    KeywordTableRetrieverMode.DEFAULT,
+    KeywordTableRetrieverMode.SIMPLE,
+    KeywordTableRetrieverMode.RAKE,
+  ];
+  allModes.forEach(async (mode) => {
+    const queryEngine = index.asQueryEngine({
+      retriever: index.asRetriever({
+        mode,
+      }),
+    });
+    const response = await queryEngine.query(
+      "What did the author do growing up?",
+    );
+    console.log(response.toString());
+  });
+}
+
+main().catch((e: Error) => {
+  console.error(e, e.stack);
+});
@@ -0,0 +1,47 @@
+import { ChatMessage, SimpleChatEngine } from "llamaindex";
+import { stdin as input, stdout as output } from "node:process";
+import readline from "node:readline/promises";
+import { Anthropic } from "../../packages/core/src/llm/LLM";
+
+async function main() {
+  const query: string = `
+Where is Istanbul?
+  `;
+
+  // const llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 });
+  const llm = new Anthropic();
+  const message: ChatMessage = { content: query, role: "user" };
+
+  //TODO: Add callbacks later
+
+  //Stream Complete
+  //Note: Setting streaming flag to true or false will auto-set your return type to
+  //either an AsyncGenerator or a Response.
+  // Omitting the streaming flag automatically sets streaming to false
+
+  const chatEngine: SimpleChatEngine = new SimpleChatEngine({
+    chatHistory: undefined,
+    llm: llm,
+  });
+
+  const rl = readline.createInterface({ input, output });
+  while (true) {
+    const query = await rl.question("Query: ");
+
+    if (!query) {
+      break;
+    }
+
+    //Case 1: .chat(query, undefined, true) => Stream
+    //Case 2: .chat(query, undefined, false) => Response object
+    //Case 3: .chat(query, undefined) => Response object
+    const chatStream = await chatEngine.chat(query, undefined, true);
+    var accumulated_result = "";
+    for await (const part of chatStream) {
+      accumulated_result += part;
+      process.stdout.write(part);
+    }
+  }
+}
+
+main();
@@ -1,9 +1,9 @@
 import {
  Document,
-  TextNode,
  NodeWithScore,
  ResponseSynthesizer,
  SimpleNodeParser,
+  TextNode,
 } from "llamaindex";

 (async () => {
@@ -29,7 +29,7 @@ import {

  const response = await responseSynthesizer.synthesize(
    "What age am I?",
-    nodesWithScore
+    nodesWithScore,
  );
  console.log(response.response);
 })();
@@ -0,0 +1,20 @@
+import { MarkdownReader, VectorStoreIndex } from "llamaindex";
+
+async function main() {
+  // Load Markdown file
+  const reader = new MarkdownReader();
+  const documents = await reader.loadData("node_modules/llamaindex/README.md");
+
+  // Split text and create embeddings. Store them in a VectorStoreIndex
+  const index = await VectorStoreIndex.fromDocuments(documents);
+
+  // Query the index
+  const queryEngine = index.asQueryEngine();
+
+  const response = await queryEngine.query("What does the example code do?");
+
+  // Output response
+  console.log(response.toString());
+}
+
+main().catch(console.error);
@@ -0,0 +1,68 @@
+import { MongoClient } from "mongodb";
+import { Document } from "../../packages/core/src/Node";
+import { VectorStoreIndex } from "../../packages/core/src/indices";
+import { SimpleMongoReader } from "../../packages/core/src/readers/SimpleMongoReader";
+
+import { stdin as input, stdout as output } from "node:process";
+import readline from "node:readline/promises";
+
+async function main() {
+  //Dummy test code
+  const query: object = { _id: "waldo" };
+  const options: object = {};
+  const projections: object = { embedding: 0 };
+  const limit: number = Infinity;
+  const uri: string = process.env.MONGODB_URI ?? "fake_uri";
+  const client: MongoClient = new MongoClient(uri);
+
+  //Where the real code starts
+  const MR = new SimpleMongoReader(client);
+  const documents: Document[] = await MR.loadData(
+    "data",
+    "posts",
+    1,
+    {},
+    options,
+    projections,
+  );
+
+  //
+  //If you need to look at low-level details of
+  // a queryEngine (for example, needing to check each individual node)
+  //
+
+  // Split text and create embeddings. Store them in a VectorStoreIndex
+  // var storageContext = await storageContextFromDefaults({});
+  // var serviceContext = serviceContextFromDefaults({});
+  // const docStore = storageContext.docStore;
+
+  // for (const doc of documents) {
+  //   docStore.setDocumentHash(doc.id_, doc.hash);
+  // }
+  // const nodes = serviceContext.nodeParser.getNodesFromDocuments(documents);
+  // console.log(nodes);
+
+  //
+  //Making Vector Store from documents
+  //
+
+  const index = await VectorStoreIndex.fromDocuments(documents);
+  // Create query engine
+  const queryEngine = index.asQueryEngine();
+
+  const rl = readline.createInterface({ input, output });
+  while (true) {
+    const query = await rl.question("Query: ");
+
+    if (!query) {
+      break;
+    }
+
+    const response = await queryEngine.query(query);
+
+    // Output response
+    console.log(response.toString());
+  }
+}
+
+main();
@@ -0,0 +1,89 @@
+import { Client } from "@notionhq/client";
+import { program } from "commander";
+import { NotionReader, VectorStoreIndex } from "llamaindex";
+import { stdin as input, stdout as output } from "node:process";
+// readline/promises is still experimental so not in @types/node yet
+// @ts-ignore
+import readline from "node:readline/promises";
+
+program
+  .argument("[page]", "Notion page id (must be provided)")
+  .action(async (page, _options, command) => {
+    // Initializing a client
+
+    if (!process.env.NOTION_TOKEN) {
+      console.log(
+        "No NOTION_TOKEN found in environment variables. You will need to register an integration https://www.notion.com/my-integrations and put it in your NOTION_TOKEN environment variable.",
+      );
+      return;
+    }
+
+    const notion = new Client({
+      auth: process.env.NOTION_TOKEN,
+    });
+
+    if (!page) {
+      const response = await notion.search({
+        filter: {
+          value: "page",
+          property: "object",
+        },
+        sort: {
+          direction: "descending",
+          timestamp: "last_edited_time",
+        },
+      });
+
+      const { results } = response;
+
+      if (results.length === 0) {
+        console.log(
+          "No pages found. You will need to share it with your integration. (tap the three dots on the top right, find Add connections, and add your integration)",
+        );
+        return;
+      } else {
+        const pages = results
+          .map((result) => {
+            if (!("url" in result)) {
+              return null;
+            }
+
+            return {
+              id: result.id,
+              url: result.url,
+            };
+          })
+          .filter((page) => page !== null);
+        console.log("Found pages:");
+        console.table(pages);
+        console.log(`To run, run ts-node ${command.name()} [page id]`);
+        return;
+      }
+    }
+
+    const reader = new NotionReader({ client: notion });
+    const documents = await reader.loadData(page);
+    console.log(documents);
+
+    // Split text and create embeddings. Store them in a VectorStoreIndex
+    const index = await VectorStoreIndex.fromDocuments(documents);
+
+    // Create query engine
+    const queryEngine = index.asQueryEngine();
+
+    const rl = readline.createInterface({ input, output });
+    while (true) {
+      const query = await rl.question("Query: ");
+
+      if (!query) {
+        break;
+      }
+
+      const response = await queryEngine.query(query);
+
+      // Output response
+      console.log(response.toString());
+    }
+  });
+
+program.parse();
@@ -1,13 +1,15 @@
 import { OpenAI } from "llamaindex";

 (async () => {
-  const llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.0 });
-  
+  const llm = new OpenAI({ model: "gpt-4-1106-preview", temperature: 0.1 });
+
  // complete api
  const response1 = await llm.complete("How are you?");
  console.log(response1.message.content);

  // chat api
-  const response2 = await llm.chat([{ content: "Tell me a joke!", role: "user" }]);
+  const response2 = await llm.chat([
+    { content: "Tell me a joke.", role: "user" },
+  ]);
  console.log(response2.message.content);
 })();
@@ -0,0 +1,23 @@
+import { Portkey } from "llamaindex";
+
+(async () => {
+  const llms = [{}];
+  const portkey = new Portkey({
+    mode: "single",
+    llms: [
+      {
+        provider: "anyscale",
+        virtual_key: "anyscale-3b3c04",
+        model: "meta-llama/Llama-2-13b-chat-hf",
+        max_tokens: 2000,
+      },
+    ],
+  });
+  const result = portkey.stream_chat([
+    { role: "system", content: "You are a helpful assistant." },
+    { role: "user", content: "Tell me a joke." },
+  ]);
+  for await (const res of result) {
+    process.stdout.write(res);
+  }
+})();
@@ -0,0 +1,37 @@
+import { execSync } from "child_process";
+import {
+  PDFReader,
+  serviceContextFromDefaults,
+  storageContextFromDefaults,
+  VectorStoreIndex,
+} from "llamaindex";
+
+const STORAGE_DIR = "./cache";
+
+async function main() {
+  // write the index to disk
+  const serviceContext = serviceContextFromDefaults({});
+  const storageContext = await storageContextFromDefaults({
+    persistDir: `${STORAGE_DIR}`,
+  });
+  const reader = new PDFReader();
+  const documents = await reader.loadData("data/brk-2022.pdf");
+  await VectorStoreIndex.fromDocuments(documents, {
+    storageContext,
+    serviceContext,
+  });
+  console.log("wrote index to disk - now trying to read it");
+  // make index dir read only
+  execSync(`chmod -R 555 ${STORAGE_DIR}`);
+  // reopen index
+  const readOnlyStorageContext = await storageContextFromDefaults({
+    persistDir: `${STORAGE_DIR}`,
+  });
+  await VectorStoreIndex.init({
+    storageContext: readOnlyStorageContext,
+    serviceContext,
+  });
+  console.log("read only index successfully opened");
+}
+
+main().catch(console.error);
@@ -1,9 +1,9 @@
 import {
  Document,
-  ListIndex,
-  ListRetrieverMode,
-  serviceContextFromDefaults,
  SimpleNodeParser,
+  SummaryIndex,
+  SummaryRetrieverMode,
+  serviceContextFromDefaults,
 } from "llamaindex";
 import essay from "./essay";

@@ -14,9 +14,11 @@ async function main() {
    }),
  });
  const document = new Document({ text: essay, id_: "essay" });
-  const index = await ListIndex.fromDocuments([document], { serviceContext });
+  const index = await SummaryIndex.fromDocuments([document], {
+    serviceContext,
+  });
  const queryEngine = index.asQueryEngine({
-    retriever: index.asRetriever({ mode: ListRetrieverMode.LLM }),
+    retriever: index.asRetriever({ mode: SummaryRetrieverMode.LLM }),
  });
  const response = await queryEngine.query(
    "What did the author do growing up?",
@@ -3,6 +3,7 @@ import {
  OpenAI,
  RetrieverQueryEngine,
  serviceContextFromDefaults,
+  SimilarityPostprocessor,
  VectorStoreIndex,
 } from "llamaindex";
 import essay from "./essay";
@@ -12,7 +13,7 @@ async function main() {
  const document = new Document({ text: essay, id_: "essay" });

  const serviceContext = serviceContextFromDefaults({
-    llm: new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.0 }),
+    llm: new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 }),
  });

  const index = await VectorStoreIndex.fromDocuments([document], {
@@ -21,8 +22,16 @@ async function main() {

  const retriever = index.asRetriever();
  retriever.similarityTopK = 5;
+  const nodePostprocessor = new SimilarityPostprocessor({
+    similarityCutoff: 0.7,
+  });
  // TODO: cannot pass responseSynthesizer into retriever query engine
-  const queryEngine = new RetrieverQueryEngine(retriever);
+  const queryEngine = new RetrieverQueryEngine(
+    retriever,
+    undefined,
+    undefined,
+    [nodePostprocessor],
+  );

  const response = await queryEngine.query(
    "What did the author do growing up?",
@@ -0,0 +1,197 @@
+import {
+  OpenAI,
+  ResponseSynthesizer,
+  RetrieverQueryEngine,
+  serviceContextFromDefaults,
+  TextNode,
+  TreeSummarize,
+  VectorIndexRetriever,
+  VectorStore,
+  VectorStoreIndex,
+  VectorStoreQuery,
+  VectorStoreQueryResult,
+} from "llamaindex";
+
+import { Index, Pinecone, RecordMetadata } from "@pinecone-database/pinecone";
+
+/**
+ * Please do not use this class in production; it's only for demonstration purposes.
+ */
+class PineconeVectorStore<T extends RecordMetadata = RecordMetadata>
+  implements VectorStore
+{
+  storesText = true;
+  isEmbeddingQuery = false;
+
+  indexName!: string;
+  pineconeClient!: Pinecone;
+  index!: Index<T>;
+
+  constructor({ indexName, client }: { indexName: string; client: Pinecone }) {
+    this.indexName = indexName;
+    this.pineconeClient = client;
+    this.index = client.index<T>(indexName);
+  }
+
+  client() {
+    return this.pineconeClient;
+  }
+
+  async query(
+    query: VectorStoreQuery,
+    kwargs?: any,
+  ): Promise<VectorStoreQueryResult> {
+    let queryEmbedding: number[] = [];
+    if (query.queryEmbedding) {
+      if (typeof query.alpha === "number") {
+        const alpha = query.alpha;
+        queryEmbedding = query.queryEmbedding.map((v) => v * alpha);
+      } else {
+        queryEmbedding = query.queryEmbedding;
+      }
+    }
+
+    // Current LlamaIndexTS implementation only support exact match filter, so we use kwargs instead.
+    const filter = kwargs?.filter || {};
+
+    const response = await this.index.query({
+      filter,
+      vector: queryEmbedding,
+      topK: query.similarityTopK,
+      includeValues: true,
+      includeMetadata: true,
+    });
+
+    console.log(
+      `Numbers of vectors returned by Pinecone after preFilters are applied: ${
+        response?.matches?.length || 0
+      }.`,
+    );
+
+    const topKIds: string[] = [];
+    const topKNodes: TextNode[] = [];
+    const topKScores: number[] = [];
+
+    const metadataToNode = (metadata?: T): Partial<TextNode> => {
+      if (!metadata) {
+        throw new Error("metadata is undefined.");
+      }
+
+      const nodeContent = metadata["_node_content"];
+      if (!nodeContent) {
+        throw new Error("nodeContent is undefined.");
+      }
+
+      if (typeof nodeContent !== "string") {
+        throw new Error("nodeContent is not a string.");
+      }
+
+      return JSON.parse(nodeContent);
+    };
+
+    if (response.matches) {
+      for (const match of response.matches) {
+        const node = new TextNode({
+          ...metadataToNode(match.metadata),
+          embedding: match.values,
+        });
+
+        topKIds.push(match.id);
+        topKNodes.push(node);
+        topKScores.push(match.score ?? 0);
+      }
+    }
+
+    const result = {
+      ids: topKIds,
+      nodes: topKNodes,
+      similarities: topKScores,
+    };
+
+    return result;
+  }
+
+  add(): Promise<string[]> {
+    return Promise.resolve([]);
+  }
+
+  delete(): Promise<void> {
+    throw new Error("Method `delete` not implemented.");
+  }
+
+  persist(): Promise<void> {
+    throw new Error("Method `persist` not implemented.");
+  }
+}
+
+/**
+ * The goal of this example is to show how to use Pinecone as a vector store
+ * for LlamaIndexTS with(out) preFilters.
+ *
+ * It should not be used in production like that,
+ * as you might want to find a proper PineconeVectorStore implementation.
+ */
+async function main() {
+  process.env.PINECONE_API_KEY = "Your Pinecone API Key.";
+  process.env.PINECONE_ENVIRONMENT = "Your Pinecone Environment.";
+  process.env.PINECONE_PROJECT_ID = "Your Pinecone Project ID.";
+  process.env.PINECONE_INDEX_NAME = "Your Pinecone Index Name.";
+  process.env.OPENAI_API_KEY = "Your OpenAI API Key.";
+  process.env.OPENAI_API_ORGANIZATION = "Your OpenAI API Organization.";
+
+  const getPineconeVectorStore = async () => {
+    return new PineconeVectorStore({
+      indexName: process.env.PINECONE_INDEX_NAME || "index-name",
+      client: new Pinecone(),
+    });
+  };
+
+  const getServiceContext = () => {
+    const openAI = new OpenAI({
+      model: "gpt-4",
+      apiKey: process.env.OPENAI_API_KEY,
+    });
+
+    return serviceContextFromDefaults({
+      llm: openAI,
+    });
+  };
+
+  const getQueryEngine = async (filter: unknown) => {
+    const vectorStore = await getPineconeVectorStore();
+    const serviceContext = getServiceContext();
+
+    const vectorStoreIndex = await VectorStoreIndex.fromVectorStore(
+      vectorStore,
+      serviceContext,
+    );
+
+    const retriever = new VectorIndexRetriever({
+      index: vectorStoreIndex,
+      similarityTopK: 500,
+    });
+
+    const responseSynthesizer = new ResponseSynthesizer({
+      serviceContext,
+      responseBuilder: new TreeSummarize(serviceContext),
+    });
+
+    return new RetrieverQueryEngine(retriever, responseSynthesizer, {
+      filter,
+    });
+  };
+
+  // whatever is a key from your metadata
+  const queryEngine = await getQueryEngine({
+    whatever: {
+      $gte: 1,
+      $lte: 100,
+    },
+  });
+
+  const response = await queryEngine.query("How many results do you have?");
+
+  console.log(response.toString());
+}
+
+main().catch(console.error);
@@ -0,0 +1,15 @@
+import { OpenAI } from "llamaindex";
+
+(async () => {
+  const llm = new OpenAI({ model: "gpt-4-vision-preview", temperature: 0.1 });
+
+  // complete api
+  const response1 = await llm.complete("How are you?");
+  console.log(response1.message.content);
+
+  // chat api
+  const response2 = await llm.chat([
+    { content: "Tell me a joke!", role: "user" },
+  ]);
+  console.log(response2.message.content);
+})();
@@ -3,7 +3,7 @@
  "scripts": {
    "build": "turbo run build",
    "dev": "turbo run dev",
-    "format": "prettier --write \"**/*.{ts,tsx,md}\"",
+    "format": "prettier --write \"**/*.{js,jsx,ts,tsx,md}\"",
    "lint": "turbo run lint",
    "prepare": "husky install",
    "test": "turbo run test",
@@ -11,24 +11,27 @@
    "publish-snapshot": "turbo run build lint test && changeset version --snapshot && changeset publish"
  },
  "devDependencies": {
-    "@turbo/gen": "^1.10.12",
-    "@types/jest": "^29.5.3",
-    "eslint": "^7.32.0",
+    "@changesets/cli": "^2.26.2",
+    "@turbo/gen": "^1.10.16",
+    "@types/jest": "^29.5.8",
+    "eslint": "^8.53.0",
    "eslint-config-custom": "workspace:*",
    "husky": "^8.0.3",
-    "jest": "^29.6.2",
-    "prettier": "^3.0.2",
-    "prettier-plugin-organize-imports": "^3.2.3",
+    "jest": "^29.7.0",
+    "lint-staged": "^15.1.0",
+    "prettier": "^3.1.0",
+    "prettier-plugin-organize-imports": "^3.2.4",
    "ts-jest": "^29.1.1",
-    "turbo": "^1.10.12"
-  },
-  "packageManager": "pnpm@7.15.0",
-  "dependencies": {
-    "@changesets/cli": "^2.26.2"
+    "turbo": "^1.10.16"
  },
+  "packageManager": "pnpm@8.10.5+sha256.a4bd9bb7b48214bbfcd95f264bd75bb70d100e5d4b58808f5cd6ab40c6ac21c5",
  "pnpm": {
    "overrides": {
-      "trim": "1.0.1"
+      "trim": "1.0.1",
+      "@babel/traverse": "7.23.2"
    }
+  },
+  "lint-staged": {
+    "*.{js,jsx,ts,tsx,md}": "prettier --write"
  }
 }
@@ -1,5 +1,118 @@
 # llamaindex

+## 0.0.35
+
+### Patch Changes
+
+- 63f2108: Add multimodal support (thanks @marcusschiesser)
+
+## 0.0.34
+
+### Patch Changes
+
+- 2a27e21: Add support for gpt-3.5-turbo-1106
+
+## 0.0.33
+
+### Patch Changes
+
+- 5e2e92c: gpt-4-1106-preview and gpt-4-vision-preview from OpenAI dev day
+
+## 0.0.32
+
+### Patch Changes
+
+- 90c0b83: Add HTMLReader (thanks @mtutty)
+- dfd22aa: Add observer/filter to the SimpleDirectoryReader (thanks @mtutty)
+
+## 0.0.31
+
+### Patch Changes
+
+- 6c55b2d: Give HistoryChatEngine pluggable options (thanks @marcusschiesser)
+- 8aa8c65: Add SimilarityPostProcessor (thanks @TomPenguin)
+- 6c55b2d: Added LLMMetadata (thanks @marcusschiesser)
+
+## 0.0.30
+
+### Patch Changes
+
+- 139abad: Streaming improvements including Anthropic (thanks @kkang2097)
+- 139abad: Portkey integration (Thank you @noble-varghese)
+- eb0e994: Add export for PromptHelper (thanks @zigamall)
+- eb0e994: Publish ESM module again
+- 139abad: Pinecone demo (thanks @Einsenhorn)
+
+## 0.0.29
+
+### Patch Changes
+
+- a52143b: Added DocxReader for Word documents (thanks @jayantasamaddar)
+- 1b7fd95: Updated OpenAI streaming (thanks @kkang2097)
+- 0db3f41: Migrated to Tiktoken lite, which hopefully fixes the Windows issue
+
+## 0.0.28
+
+### Patch Changes
+
+- 96bb657: Typesafe metadata (thanks @TomPenguin)
+- 96bb657: MongoReader (thanks @kkang2097)
+- 837854d: Make OutputParser less strict and add tests (Thanks @kkang2097)
+
+## 0.0.27
+
+### Patch Changes
+
+- 4a5591b: Chat History summarization (thanks @marcusschiesser)
+- 4a5591b: Notion database support (thanks @TomPenguin)
+- 4a5591b: KeywordIndex (thanks @swk777)
+
+## 0.0.26
+
+### Patch Changes
+
+- 5bb55bc: Add notion loader (thank you @TomPenguin!)
+
+## 0.0.25
+
+### Patch Changes
+
+- e21eca2: OpenAI 4.3.1 and Anthropic 0.6.2
+- 40a8f07: Update READMEs (thanks @andfk)
+- 40a8f07: Bug: missing exports from storage (thanks @aashutoshrathi)
+
+## 0.0.24
+
+### Patch Changes
+
+- e4af7b3: Renamed ListIndex to SummaryIndex to better indicate its use.
+- 259fe63: Strong types for prompts.
+
+## 0.0.23
+
+### Patch Changes
+
+- Added MetadataMode to ResponseSynthesizer (thanks @TomPenguin)
+- 9d6b2ed: Added Markdown Reader (huge shoutout to @swk777)
+
+## 0.0.22
+
+### Patch Changes
+
+- 454f3f8: CJK sentence splitting (thanks @TomPenguin)
+- 454f3f8: Export options for Windows formatted text files
+- 454f3f8: Disable long sentence splitting by default
+- 454f3f8: Make sentence splitter not split on decimals.
+- 99df58f: Anthropic 0.6.1 and OpenAI 4.2.0. Changed Anthropic timeout back to 60s
+
+## 0.0.21
+
+### Patch Changes
+
+- f7a57ca: Fixed metadata deserialization (thanks @marcagve)
+- 0a09de2: Update to OpenAI 4.1.0
+- f7a57ca: ChatGPT optimized prompts (thanks @LoganMarkewich)
+
 ## 0.0.20

 ### Patch Changes
@@ -20,7 +20,7 @@ In a new folder:
 export OPENAI_API_KEY="sk-......" # Replace with your key from https://platform.openai.com/account/api-keys
 pnpm init
 pnpm install typescript
-pnpm exec tsc –-init # if needed
+pnpm exec tsc --init # if needed
 pnpm install llamaindex
 pnpm install @types/node
 ```
@@ -36,7 +36,7 @@ async function main() {
  // Load essay from abramov.txt in Node
  const essay = await fs.readFile(
    "node_modules/llamaindex/examples/abramov.txt",
-    "utf-8"
+    "utf-8",
  );

  // Create Document object with essay
@@ -48,7 +48,7 @@ async function main() {
  // Query the index
  const queryEngine = index.asQueryEngine();
  const response = await queryEngine.query(
-    "What did the author do in college?"
+    "What did the author do in college?",
  );

  // Output response
@@ -61,7 +61,7 @@ main();
 Then you can run it using

 ```bash
-pnpm dlx ts-node example.ts
+pnpx ts-node example.ts
 ```

 ## Playground
@@ -1,34 +1,47 @@
 {
  "name": "llamaindex",
-  "version": "0.0.20",
+  "version": "0.0.35",
+  "license": "MIT",
  "dependencies": {
-    "@anthropic-ai/sdk": "^0.6.0",
+    "@anthropic-ai/sdk": "^0.9.0",
+    "@notionhq/client": "^2.2.13",
+    "js-tiktoken": "^1.0.7",
    "lodash": "^4.17.21",
-    "openai": "^4.0.1",
+    "mammoth": "^1.6.0",
+    "md-utils-ts": "^2.0.0",
+    "mongodb": "^6.2.0",
+    "notion-md-crawler": "^0.0.2",
+    "openai": "^4.16.1",
    "papaparse": "^5.4.1",
    "pdf-parse": "^1.1.1",
-    "replicate": "^0.16.1",
-    "tiktoken-node": "^0.0.6",
-    "uuid": "^9.0.0",
+    "portkey-ai": "^0.1.16",
+    "rake-modified": "^1.0.8",
+    "replicate": "^0.21.1",
+    "string-strip-html": "^13.4.3",
+    "uuid": "^9.0.1",
    "wink-nlp": "^1.14.3"
  },
  "devDependencies": {
-    "@types/lodash": "^4.14.197",
-    "@types/node": "^18.17.6",
-    "@types/papaparse": "^5.3.7",
-    "@types/pdf-parse": "^1.1.1",
-    "@types/uuid": "^9.0.2",
+    "@types/lodash": "^4.14.200",
+    "@types/node": "^18.18.8",
+    "@types/papaparse": "^5.3.10",
+    "@types/pdf-parse": "^1.1.3",
+    "@types/uuid": "^9.0.6",
    "node-stdlib-browser": "^1.2.0",
-    "tsup": "^7.2.0"
+    "tsup": "^7.2.0",
+    "typescript": "^5.2.2"
  },
  "engines": {
    "node": ">=18.0.0"
  },
  "types": "./dist/index.d.ts",
  "main": "./dist/index.js",
+  "module": "./dist/index.mjs",
+  "repository": "run-llama/LlamaIndexTS",
  "scripts": {
    "lint": "eslint .",
    "test": "jest",
-    "build": "tsup src/index.ts --format esm,cjs --dts"
+    "build": "tsup src/index.ts --format esm,cjs --dts",
+    "dev": "tsup src/index.ts --format esm,cjs --dts --watch"
  }
 }
@@ -1,17 +1,20 @@
-import { ChatMessage, OpenAI, ChatResponse, LLM } from "./llm/LLM";
-import { TextNode } from "./Node";
+import { v4 as uuidv4 } from "uuid";
+import { ChatHistory } from "./ChatHistory";
+import { NodeWithScore, TextNode } from "./Node";
 import {
-  SimplePrompt,
-  contextSystemPrompt,
+  CondenseQuestionPrompt,
+  ContextSystemPrompt,
  defaultCondenseQuestionPrompt,
+  defaultContextSystemPrompt,
  messagesToHistoryStr,
 } from "./Prompt";
 import { BaseQueryEngine } from "./QueryEngine";
 import { Response } from "./Response";
 import { BaseRetriever } from "./Retriever";
 import { ServiceContext, serviceContextFromDefaults } from "./ServiceContext";
-import { v4 as uuidv4 } from "uuid";
 import { Event } from "./callbacks/CallbackManager";
+import { BaseNodePostprocessor } from "./indices/BaseNodePostprocessor";
+import { ChatMessage, LLM, OpenAI } from "./llm/LLM";

 /**
 * A ChatEngine is used to handle back and forth chats between the application and the LLM.
@@ -21,8 +24,16 @@ export interface ChatEngine {
   * Send message along with the class's current chat history to the LLM.
   * @param message
   * @param chatHistory optional chat history if you want to customize the chat history
+   * @param streaming optional streaming flag, which auto-sets the return value if True.
   */
-  chat(message: string, chatHistory?: ChatMessage[]): Promise<Response>;
+  chat<
+    T extends boolean | undefined = undefined,
+    R = T extends true ? AsyncGenerator<string, void, unknown> : Response,
+  >(
+    message: string,
+    chatHistory?: ChatMessage[],
+    streaming?: T,
+  ): Promise<R>;

  /**
   * Resets the chat history so that it's empty.
@@ -42,13 +53,45 @@ export class SimpleChatEngine implements ChatEngine {
    this.llm = init?.llm ?? new OpenAI();
  }

-  async chat(message: string, chatHistory?: ChatMessage[]): Promise<Response> {
+  async chat<
+    T extends boolean | undefined = undefined,
+    R = T extends true ? AsyncGenerator<string, void, unknown> : Response,
+  >(message: string, chatHistory?: ChatMessage[], streaming?: T): Promise<R> {
+    //Streaming option
+    if (streaming) {
+      return this.streamChat(message, chatHistory) as R;
+    }
+
+    //Non-streaming option
    chatHistory = chatHistory ?? this.chatHistory;
    chatHistory.push({ content: message, role: "user" });
-    const response = await this.llm.chat(chatHistory);
+    const response = await this.llm.chat(chatHistory, undefined);
    chatHistory.push(response.message);
    this.chatHistory = chatHistory;
-    return new Response(response.message.content);
+    return new Response(response.message.content) as R;
+  }
+
+  protected async *streamChat(
+    message: string,
+    chatHistory?: ChatMessage[],
+  ): AsyncGenerator<string, void, unknown> {
+    chatHistory = chatHistory ?? this.chatHistory;
+    chatHistory.push({ content: message, role: "user" });
+    const response_generator = await this.llm.chat(
+      chatHistory,
+      undefined,
+      true,
+    );
+
+    var accumulator: string = "";
+    for await (const part of response_generator) {
+      accumulator += part;
+      yield part;
+    }
+
+    chatHistory.push({ content: accumulator, role: "assistant" });
+    this.chatHistory = chatHistory;
+    return;
  }

  reset() {
@@ -70,13 +113,13 @@ export class CondenseQuestionChatEngine implements ChatEngine {
  queryEngine: BaseQueryEngine;
  chatHistory: ChatMessage[];
  serviceContext: ServiceContext;
-  condenseMessagePrompt: SimplePrompt;
+  condenseMessagePrompt: CondenseQuestionPrompt;

  constructor(init: {
    queryEngine: BaseQueryEngine;
    chatHistory: ChatMessage[];
    serviceContext?: ServiceContext;
-    condenseMessagePrompt?: SimplePrompt;
+    condenseMessagePrompt?: CondenseQuestionPrompt;
  }) {
    this.queryEngine = init.queryEngine;
    this.chatHistory = init?.chatHistory ?? [];
@@ -92,15 +135,19 @@ export class CondenseQuestionChatEngine implements ChatEngine {
    return this.serviceContext.llm.complete(
      defaultCondenseQuestionPrompt({
        question: question,
-        chat_history: chatHistoryStr,
-      })
+        chatHistory: chatHistoryStr,
+      }),
    );
  }

-  async chat(
+  async chat<
+    T extends boolean | undefined = undefined,
+    R = T extends true ? AsyncGenerator<string, void, unknown> : Response,
+  >(
    message: string,
-    chatHistory?: ChatMessage[] | undefined
-  ): Promise<Response> {
+    chatHistory?: ChatMessage[] | undefined,
+    streaming?: T,
+  ): Promise<R> {
    chatHistory = chatHistory ?? this.chatHistory;

    const condensedQuestion = (
@@ -112,7 +159,7 @@ export class CondenseQuestionChatEngine implements ChatEngine {
    chatHistory.push({ content: message, role: "user" });
    chatHistory.push({ content: response.response, role: "assistant" });

-    return response;
+    return response as R;
  }

  reset() {
@@ -120,54 +167,118 @@ export class CondenseQuestionChatEngine implements ChatEngine {
  }
 }

+export interface Context {
+  message: ChatMessage;
+  nodes: NodeWithScore[];
+}
+
+export interface ContextGenerator {
+  generate(message: string, parentEvent?: Event): Promise<Context>;
+}
+
+export class DefaultContextGenerator implements ContextGenerator {
+  retriever: BaseRetriever;
+  contextSystemPrompt: ContextSystemPrompt;
+  nodePostprocessors: BaseNodePostprocessor[];
+
+  constructor(init: {
+    retriever: BaseRetriever;
+    contextSystemPrompt?: ContextSystemPrompt;
+    nodePostprocessors?: BaseNodePostprocessor[];
+  }) {
+    this.retriever = init.retriever;
+    this.contextSystemPrompt =
+      init?.contextSystemPrompt ?? defaultContextSystemPrompt;
+    this.nodePostprocessors = init.nodePostprocessors || [];
+  }
+
+  private applyNodePostprocessors(nodes: NodeWithScore[]) {
+    return this.nodePostprocessors.reduce(
+      (nodes, nodePostprocessor) => nodePostprocessor.postprocessNodes(nodes),
+      nodes,
+    );
+  }
+
+  async generate(message: string, parentEvent?: Event): Promise<Context> {
+    if (!parentEvent) {
+      parentEvent = {
+        id: uuidv4(),
+        type: "wrapper",
+        tags: ["final"],
+      };
+    }
+    const sourceNodesWithScore = await this.retriever.retrieve(
+      message,
+      parentEvent,
+    );
+
+    const nodes = this.applyNodePostprocessors(sourceNodesWithScore);
+
+    return {
+      message: {
+        content: this.contextSystemPrompt({
+          context: nodes.map((r) => (r.node as TextNode).text).join("\n\n"),
+        }),
+        role: "system",
+      },
+      nodes,
+    };
+  }
+}
+
 /**
 * ContextChatEngine uses the Index to get the appropriate context for each query.
 * The context is stored in the system prompt, and the chat history is preserved,
 * ideally allowing the appropriate context to be surfaced for each query.
 */
 export class ContextChatEngine implements ChatEngine {
-  retriever: BaseRetriever;
-  chatModel: OpenAI;
+  chatModel: LLM;
  chatHistory: ChatMessage[];
+  contextGenerator: ContextGenerator;

  constructor(init: {
    retriever: BaseRetriever;
-    chatModel?: OpenAI;
+    chatModel?: LLM;
    chatHistory?: ChatMessage[];
+    contextSystemPrompt?: ContextSystemPrompt;
+    nodePostprocessors?: BaseNodePostprocessor[];
  }) {
-    this.retriever = init.retriever;
    this.chatModel =
      init.chatModel ?? new OpenAI({ model: "gpt-3.5-turbo-16k" });
    this.chatHistory = init?.chatHistory ?? [];
+    this.contextGenerator = new DefaultContextGenerator({
+      retriever: init.retriever,
+      contextSystemPrompt: init?.contextSystemPrompt,
+    });
  }

-  async chat(message: string, chatHistory?: ChatMessage[] | undefined) {
+  async chat<
+    T extends boolean | undefined = undefined,
+    R = T extends true ? AsyncGenerator<string, void, unknown> : Response,
+  >(
+    message: string,
+    chatHistory?: ChatMessage[] | undefined,
+    streaming?: T,
+  ): Promise<R> {
    chatHistory = chatHistory ?? this.chatHistory;

+    //Streaming option
+    if (streaming) {
+      return this.streamChat(message, chatHistory) as R;
+    }
+
    const parentEvent: Event = {
      id: uuidv4(),
      type: "wrapper",
      tags: ["final"],
    };
-    const sourceNodesWithScore = await this.retriever.retrieve(
-      message,
-      parentEvent
-    );
-
-    const systemMessage: ChatMessage = {
-      content: contextSystemPrompt({
-        context: sourceNodesWithScore
-          .map((r) => (r.node as TextNode).text)
-          .join("\n\n"),
-      }),
-      role: "system",
-    };
+    const context = await this.contextGenerator.generate(message, parentEvent);

    chatHistory.push({ content: message, role: "user" });

    const response = await this.chatModel.chat(
-      [systemMessage, ...chatHistory],
-      parentEvent
+      [context.message, ...chatHistory],
+      parentEvent,
    );
    chatHistory.push(response.message);

@@ -175,11 +286,146 @@ export class ContextChatEngine implements ChatEngine {

    return new Response(
      response.message.content,
-      sourceNodesWithScore.map((r) => r.node)
+      context.nodes.map((r) => r.node),
+    ) as R;
+  }
+
+  protected async *streamChat(
+    message: string,
+    chatHistory?: ChatMessage[] | undefined,
+  ): AsyncGenerator<string, void, unknown> {
+    chatHistory = chatHistory ?? this.chatHistory;
+
+    const parentEvent: Event = {
+      id: uuidv4(),
+      type: "wrapper",
+      tags: ["final"],
+    };
+    const context = await this.contextGenerator.generate(message, parentEvent);
+
+    chatHistory.push({ content: message, role: "user" });
+
+    const response_stream = await this.chatModel.chat(
+      [context.message, ...chatHistory],
+      parentEvent,
+      true,
    );
+    var accumulator: string = "";
+    for await (const part of response_stream) {
+      accumulator += part;
+      yield part;
+    }
+
+    chatHistory.push({ content: accumulator, role: "assistant" });
+
+    this.chatHistory = chatHistory;
+
+    return;
  }

  reset() {
    this.chatHistory = [];
  }
 }
+
+export interface MessageContentDetail {
+  type: "text" | "image_url";
+  text: string;
+  image_url: { url: string };
+}
+
+/**
+ * Extended type for the content of a message that allows for multi-modal messages.
+ */
+export type MessageContent = string | MessageContentDetail[];
+
+/**
+ * HistoryChatEngine is a ChatEngine that uses a `ChatHistory` object
+ * to keeps track of chat's message history.
+ * A `ChatHistory` object is passed as a parameter for each call to the `chat` method,
+ * so the state of the chat engine is preserved between calls.
+ * Optionally, a `ContextGenerator` can be used to generate an additional context for each call to `chat`.
+ */
+export class HistoryChatEngine {
+  llm: LLM;
+  contextGenerator?: ContextGenerator;
+
+  constructor(init?: Partial<HistoryChatEngine>) {
+    this.llm = init?.llm ?? new OpenAI();
+    this.contextGenerator = init?.contextGenerator;
+  }
+
+  async chat<
+    T extends boolean | undefined = undefined,
+    R = T extends true ? AsyncGenerator<string, void, unknown> : Response,
+  >(
+    message: MessageContent,
+    chatHistory: ChatHistory,
+    streaming?: T,
+  ): Promise<R> {
+    //Streaming option
+    if (streaming) {
+      return this.streamChat(message, chatHistory) as R;
+    }
+    const requestMessages = await this.prepareRequestMessages(
+      message,
+      chatHistory,
+    );
+    const response = await this.llm.chat(requestMessages);
+    chatHistory.addMessage(response.message);
+    return new Response(response.message.content) as R;
+  }
+
+  protected async *streamChat(
+    message: MessageContent,
+    chatHistory: ChatHistory,
+  ): AsyncGenerator<string, void, unknown> {
+    const requestMessages = await this.prepareRequestMessages(
+      message,
+      chatHistory,
+    );
+    const response_stream = await this.llm.chat(
+      requestMessages,
+      undefined,
+      true,
+    );
+
+    var accumulator = "";
+    for await (const part of response_stream) {
+      accumulator += part;
+      yield part;
+    }
+    chatHistory.addMessage({
+      content: accumulator,
+      role: "assistant",
+    });
+    return;
+  }
+
+  private async prepareRequestMessages(
+    message: MessageContent,
+    chatHistory: ChatHistory,
+  ) {
+    chatHistory.addMessage({
+      content: message,
+      role: "user",
+    });
+    let requestMessages;
+    let context;
+    if (this.contextGenerator) {
+      if (Array.isArray(message)) {
+        // message is of type MessageContentDetail[] - retrieve just the text parts and concatenate them
+        // so we can pass them to the context generator
+        message = (message as MessageContentDetail[])
+          .filter((c) => c.type === "text")
+          .map((c) => c.text)
+          .join("\n\n");
+      }
+      context = await this.contextGenerator.generate(message);
+    }
+    requestMessages = await chatHistory.requestMessages(
+      context ? [context.message] : undefined,
+    );
+    return requestMessages;
+  }
+}
@@ -0,0 +1,200 @@
+import { ChatMessage, LLM, MessageType, OpenAI } from "./llm/LLM";
+import {
+  defaultSummaryPrompt,
+  messagesToHistoryStr,
+  SummaryPrompt,
+} from "./Prompt";
+
+/**
+ * A ChatHistory is used to keep the state of back and forth chat messages
+ */
+export interface ChatHistory {
+  messages: ChatMessage[];
+  /**
+   * Adds a message to the chat history.
+   * @param message
+   */
+  addMessage(message: ChatMessage): void;
+
+  /**
+   * Returns the messages that should be used as input to the LLM.
+   */
+  requestMessages(transientMessages?: ChatMessage[]): Promise<ChatMessage[]>;
+
+  /**
+   * Resets the chat history so that it's empty.
+   */
+  reset(): void;
+
+  /**
+   * Returns the new messages since the last call to this function (or since calling the constructor)
+   */
+  newMessages(): ChatMessage[];
+}
+
+export class SimpleChatHistory implements ChatHistory {
+  messages: ChatMessage[];
+  private messagesBefore: number;
+
+  constructor(init?: Partial<SimpleChatHistory>) {
+    this.messages = init?.messages ?? [];
+    this.messagesBefore = this.messages.length;
+  }
+
+  addMessage(message: ChatMessage) {
+    this.messages.push(message);
+  }
+
+  async requestMessages(transientMessages?: ChatMessage[]) {
+    return [...(transientMessages ?? []), ...this.messages];
+  }
+
+  reset() {
+    this.messages = [];
+  }
+
+  newMessages() {
+    const newMessages = this.messages.slice(this.messagesBefore);
+    this.messagesBefore = this.messages.length;
+    return newMessages;
+  }
+}
+
+export class SummaryChatHistory implements ChatHistory {
+  tokensToSummarize: number;
+  messages: ChatMessage[];
+  summaryPrompt: SummaryPrompt;
+  llm: LLM;
+  private messagesBefore: number;
+
+  constructor(init?: Partial<SummaryChatHistory>) {
+    this.messages = init?.messages ?? [];
+    this.messagesBefore = this.messages.length;
+    this.summaryPrompt = init?.summaryPrompt ?? defaultSummaryPrompt;
+    this.llm = init?.llm ?? new OpenAI();
+    if (!this.llm.metadata.maxTokens) {
+      throw new Error(
+        "LLM maxTokens is not set. Needed so the summarizer ensures the context window size of the LLM.",
+      );
+    }
+    this.tokensToSummarize =
+      this.llm.metadata.contextWindow - this.llm.metadata.maxTokens;
+  }
+
+  private async summarize(): Promise<ChatMessage> {
+    // get the conversation messages to create summary
+    const messagesToSummarize = this.calcConversationMessages();
+
+    let promptMessages;
+    do {
+      promptMessages = [
+        {
+          content: this.summaryPrompt({
+            context: messagesToHistoryStr(messagesToSummarize),
+          }),
+          role: "user" as MessageType,
+        },
+      ];
+      // remove oldest message until the chat history is short enough for the context window
+      messagesToSummarize.shift();
+    } while (this.llm.tokens(promptMessages) > this.tokensToSummarize);
+
+    const response = await this.llm.chat(promptMessages);
+    return { content: response.message.content, role: "memory" };
+  }
+
+  addMessage(message: ChatMessage) {
+    this.messages.push(message);
+  }
+
+  // Find last summary message
+  private getLastSummaryIndex(): number | null {
+    const reversedMessages = this.messages.slice().reverse();
+    const index = reversedMessages.findIndex(
+      (message) => message.role === "memory",
+    );
+    if (index === -1) {
+      return null;
+    }
+    return this.messages.length - 1 - index;
+  }
+
+  private get systemMessages() {
+    // get array of all system messages
+    return this.messages.filter((message) => message.role === "system");
+  }
+
+  private get nonSystemMessages() {
+    // get array of all non-system messages
+    return this.messages.filter((message) => message.role !== "system");
+  }
+
+  /**
+   * Calculates the messages that describe the conversation so far.
+   * If there's no memory, all non-system messages are used.
+   * If there's a memory, uses all messages after the last summary message.
+   */
+  private calcConversationMessages(transformSummary?: boolean): ChatMessage[] {
+    const lastSummaryIndex = this.getLastSummaryIndex();
+    if (!lastSummaryIndex) {
+      // there's no memory, so just use all non-system messages
+      return this.nonSystemMessages;
+    } else {
+      // there's a memory, so use all messages after the last summary message
+      // and convert summary message so it can be send to the LLM
+      const summaryMessage: ChatMessage = transformSummary
+        ? {
+            content: `Summary of the conversation so far: ${this.messages[lastSummaryIndex].content}`,
+            role: "system",
+          }
+        : this.messages[lastSummaryIndex];
+      return [summaryMessage, ...this.messages.slice(lastSummaryIndex + 1)];
+    }
+  }
+
+  private calcCurrentRequestMessages(transientMessages?: ChatMessage[]) {
+    // TODO: check order: currently, we're sending:
+    // system messages first, then transient messages and then the messages that describe the conversation so far
+    return [
+      ...this.systemMessages,
+      ...(transientMessages ? transientMessages : []),
+      ...this.calcConversationMessages(true),
+    ];
+  }
+
+  async requestMessages(transientMessages?: ChatMessage[]) {
+    const requestMessages = this.calcCurrentRequestMessages(transientMessages);
+
+    // get tokens of current request messages and the transient messages
+    const tokens = this.llm.tokens(requestMessages);
+    if (tokens > this.tokensToSummarize) {
+      // if there are too many tokens for the next request, call summarize
+      const memoryMessage = await this.summarize();
+      const lastMessage = this.messages.at(-1);
+      if (lastMessage && lastMessage.role === "user") {
+        // if last message is a user message, ensure that it's sent after the new memory message
+        this.messages.pop();
+        this.messages.push(memoryMessage);
+        this.messages.push(lastMessage);
+      } else {
+        // otherwise just add the memory message
+        this.messages.push(memoryMessage);
+      }
+      // TODO: we still might have too many tokens
+      // e.g. too large system messages or transient messages
+      // how should we deal with that?
+      return this.calcCurrentRequestMessages(transientMessages);
+    }
+    return requestMessages;
+  }
+
+  reset() {
+    this.messages = [];
+  }
+
+  newMessages() {
+    const newMessages = this.messages.slice(this.messagesBefore);
+    this.messagesBefore = this.messages.length;
+    return newMessages;
+  }
+}
@@ -8,7 +8,7 @@ import {
  getAzureModel,
  shouldUseAzure,
 } from "./llm/azure";
-import { getOpenAISession, OpenAISession } from "./llm/openai";
+import { OpenAISession, getOpenAISession } from "./llm/openai";
 import { VectorStoreQueryMode } from "./storage/vectorStore/types";

 /**
@@ -280,9 +280,6 @@ export class OpenAIEmbedding extends BaseEmbedding {
  }

  private async getOpenAIEmbedding(input: string) {
-    input = input.replace(/\n/g, " ");
-    //^ NOTE this performance helper is in the OpenAI python library but may not be in the JS library
-
    const { data } = await this.session.openai.embeddings.create({
      model: this.model,
      input,
@@ -1,28 +1,54 @@
-import { Event, EventTag, EventType } from "./callbacks/CallbackManager";
+import { encodingForModel } from "js-tiktoken";
+
 import { v4 as uuidv4 } from "uuid";
+import { Event, EventTag, EventType } from "./callbacks/CallbackManager";
+
+export enum Tokenizers {
+  CL100K_BASE = "cl100k_base",
+}

 /**
 * Helper class singleton
 */
 class GlobalsHelper {
  defaultTokenizer: {
-    encode: (text: string) => number[];
-    decode: (tokens: number[]) => string;
+    encode: (text: string) => Uint32Array;
+    decode: (tokens: Uint32Array) => string;
  } | null = null;

-  tokenizer() {
+  private initDefaultTokenizer() {
+    const encoding = encodingForModel("text-embedding-ada-002"); // cl100k_base
+
+    this.defaultTokenizer = {
+      encode: (text: string) => {
+        return new Uint32Array(encoding.encode(text));
+      },
+      decode: (tokens: Uint32Array) => {
+        const numberArray = Array.from(tokens);
+        const text = encoding.decode(numberArray);
+        const uint8Array = new TextEncoder().encode(text);
+        return new TextDecoder().decode(uint8Array);
+      },
+    };
+  }
+
+  tokenizer(encoding?: string) {
+    if (encoding && encoding !== Tokenizers.CL100K_BASE) {
+      throw new Error(`Tokenizer encoding ${encoding} not yet supported`);
+    }
    if (!this.defaultTokenizer) {
-      const tiktoken = require("tiktoken-node");
-      this.defaultTokenizer = tiktoken.getEncoding("gpt2");
+      this.initDefaultTokenizer();
    }

    return this.defaultTokenizer!.encode.bind(this.defaultTokenizer);
  }

-  tokenizerDecoder() {
+  tokenizerDecoder(encoding?: string) {
+    if (encoding && encoding !== Tokenizers.CL100K_BASE) {
+      throw new Error(`Tokenizer encoding ${encoding} not yet supported`);
+    }
    if (!this.defaultTokenizer) {
-      const tiktoken = require("tiktoken-node");
-      this.defaultTokenizer = tiktoken.getEncoding("gpt2");
+      this.initDefaultTokenizer();
    }

    return this.defaultTokenizer!.decode.bind(this.defaultTokenizer);
@@ -23,19 +23,23 @@ export enum MetadataMode {
  NONE = "NONE",
 }

-export interface RelatedNodeInfo {
+export type Metadata = Record<string, any>;
+
+export interface RelatedNodeInfo<T extends Metadata = Metadata> {
  nodeId: string;
  nodeType?: ObjectType;
-  metadata: Record<string, any>;
+  metadata: T;
  hash?: string;
 }

-export type RelatedNodeType = RelatedNodeInfo | RelatedNodeInfo[];
+export type RelatedNodeType<T extends Metadata = Metadata> =
+  | RelatedNodeInfo<T>
+  | RelatedNodeInfo<T>[];

 /**
 * Generic abstract class for retrievable nodes
 */
-export abstract class BaseNode {
+export abstract class BaseNode<T extends Metadata = Metadata> {
  /**
   * The unique ID of the Node/Document. The trailing underscore is here
   * to avoid collisions with the id keyword in Python.
@@ -46,13 +50,13 @@ export abstract class BaseNode {
  embedding?: number[];

  // Metadata fields
-  metadata: Record<string, any> = {};
+  metadata: T = {} as T;
  excludedEmbedMetadataKeys: string[] = [];
  excludedLlmMetadataKeys: string[] = [];
-  relationships: Partial<Record<NodeRelationship, RelatedNodeType>> = {};
+  relationships: Partial<Record<NodeRelationship, RelatedNodeType<T>>> = {};
  hash: string = "";

-  constructor(init?: Partial<BaseNode>) {
+  constructor(init?: Partial<BaseNode<T>>) {
    Object.assign(this, init);
  }

@@ -62,7 +66,7 @@ export abstract class BaseNode {
  abstract getMetadataStr(metadataMode: MetadataMode): string;
  abstract setContent(value: any): void;

-  get sourceNode(): RelatedNodeInfo | undefined {
+  get sourceNode(): RelatedNodeInfo<T> | undefined {
    const relationship = this.relationships[NodeRelationship.SOURCE];

    if (Array.isArray(relationship)) {
@@ -72,7 +76,7 @@ export abstract class BaseNode {
    return relationship;
  }

-  get prevNode(): RelatedNodeInfo | undefined {
+  get prevNode(): RelatedNodeInfo<T> | undefined {
    const relationship = this.relationships[NodeRelationship.PREVIOUS];

    if (Array.isArray(relationship)) {
@@ -84,7 +88,7 @@ export abstract class BaseNode {
    return relationship;
  }

-  get nextNode(): RelatedNodeInfo | undefined {
+  get nextNode(): RelatedNodeInfo<T> | undefined {
    const relationship = this.relationships[NodeRelationship.NEXT];

    if (Array.isArray(relationship)) {
@@ -94,7 +98,7 @@ export abstract class BaseNode {
    return relationship;
  }

-  get parentNode(): RelatedNodeInfo | undefined {
+  get parentNode(): RelatedNodeInfo<T> | undefined {
    const relationship = this.relationships[NodeRelationship.PARENT];

    if (Array.isArray(relationship)) {
@@ -104,7 +108,7 @@ export abstract class BaseNode {
    return relationship;
  }

-  get childNodes(): RelatedNodeInfo[] | undefined {
+  get childNodes(): RelatedNodeInfo<T>[] | undefined {
    const relationship = this.relationships[NodeRelationship.CHILD];

    if (!Array.isArray(relationship)) {
@@ -126,7 +130,7 @@ export abstract class BaseNode {
    return this.embedding;
  }

-  asRelatedNodeInfo(): RelatedNodeInfo {
+  asRelatedNodeInfo(): RelatedNodeInfo<T> {
    return {
      nodeId: this.id_,
      metadata: this.metadata,
@@ -146,7 +150,7 @@ export abstract class BaseNode {
 /**
 * TextNode is the default node type for text. Most common node type in LlamaIndex.TS
 */
-export class TextNode extends BaseNode {
+export class TextNode<T extends Metadata = Metadata> extends BaseNode<T> {
  text: string = "";
  startCharIdx?: number;
  endCharIdx?: number;
@@ -154,7 +158,7 @@ export class TextNode extends BaseNode {
  // metadataTemplate: NOTE write your own formatter if needed
  metadataSeparator: string = "\n";

-  constructor(init?: Partial<TextNode>) {
+  constructor(init?: Partial<TextNode<T>>) {
    super(init);
    Object.assign(this, init);

@@ -233,10 +237,10 @@ export class TextNode extends BaseNode {
 //   }
 // }

-export class IndexNode extends TextNode {
+export class IndexNode<T extends Metadata = Metadata> extends TextNode<T> {
  indexId: string = "";

-  constructor(init?: Partial<IndexNode>) {
+  constructor(init?: Partial<IndexNode<T>>) {
    super(init);
    Object.assign(this, init);

@@ -253,8 +257,8 @@ export class IndexNode extends TextNode {
 /**
 * A document is just a special text node with a docId.
 */
-export class Document extends TextNode {
-  constructor(init?: Partial<Document>) {
+export class Document<T extends Metadata = Metadata> extends TextNode<T> {
+  constructor(init?: Partial<Document<T>>) {
    super(init);
    Object.assign(this, init);

@@ -292,7 +296,7 @@ export function jsonToNode(json: any) {
 /**
 * A node with a similarity score
 */
-export interface NodeWithScore {
-  node: BaseNode;
-  score: number;
+export interface NodeWithScore<T extends Metadata = Metadata> {
+  node: BaseNode<T>;
+  score?: number;
 }
@@ -10,7 +10,7 @@ import { DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE } from "./constants";
 */
 export function getTextSplitsFromDocument(
  document: Document,
-  textSplitter: SentenceSplitter
+  textSplitter: SentenceSplitter,
 ) {
  const text = document.getText();
  const splits = textSplitter.splitText(text);
@@ -30,7 +30,7 @@ export function getNodesFromDocument(
  document: Document,
  textSplitter: SentenceSplitter,
  includeMetadata: boolean = true,
-  includePrevNextRel: boolean = true
+  includePrevNextRel: boolean = true,
 ) {
  let nodes: TextNode[] = [];

@@ -100,10 +100,10 @@ export class SimpleNodeParser implements NodeParser {
  }) {
    this.textSplitter =
      init?.textSplitter ??
-      new SentenceSplitter(
-        init?.chunkSize ?? DEFAULT_CHUNK_SIZE,
-        init?.chunkOverlap ?? DEFAULT_CHUNK_OVERLAP
-      );
+      new SentenceSplitter({
+        chunkSize: init?.chunkSize ?? DEFAULT_CHUNK_SIZE,
+        chunkOverlap: init?.chunkOverlap ?? DEFAULT_CHUNK_OVERLAP,
+      });
    this.includeMetadata = init?.includeMetadata ?? true;
    this.includePrevNextRel = init?.includePrevNextRel ?? true;
  }
@@ -28,7 +28,7 @@ class OutputParserError extends Error {

  constructor(
    message: string,
-    options: { cause?: Error; output?: string } = {}
+    options: { cause?: Error; output?: string } = {},
  ) {
    // @ts-ignore
    super(message, options); // https://github.com/tc39/proposal-error-cause
@@ -53,30 +53,31 @@ class OutputParserError extends Error {
 * @param text A markdown block with JSON
 * @returns parsed JSON object
 */
-function parseJsonMarkdown(text: string) {
+export function parseJsonMarkdown(text: string) {
  text = text.trim();

-  const beginDelimiter = "```json";
-  const endDelimiter = "```";
+  const left_square = text.indexOf("[");
+  const left_brace = text.indexOf("{");

-  const beginIndex = text.indexOf(beginDelimiter);
-  const endIndex = text.indexOf(
-    endDelimiter,
-    beginIndex + beginDelimiter.length
-  );
-  if (beginIndex === -1 || endIndex === -1) {
-    throw new OutputParserError("Not a json markdown", { output: text });
+  var left: number;
+  var right: number;
+  if (left_square < left_brace && left_square != -1) {
+    left = left_square;
+    right = text.lastIndexOf("]");
+  } else {
+    left = left_brace;
+    right = text.lastIndexOf("}");
  }
-
-  const jsonText = text.substring(beginIndex + beginDelimiter.length, endIndex);
-
+  const jsonText = text.substring(left, right + 1);
  try {
+    //Single JSON object case
+    if (left_square === -1) {
+      return [JSON.parse(jsonText)];
+    }
+    //Multiple JSON object case.
    return JSON.parse(jsonText);
  } catch (e) {
-    throw new OutputParserError("Not a valid json", {
-      cause: e as Error,
-      output: text,
-    });
+    throw new OutputParserError("Not a json markdown", { output: text });
  }
 }

@@ -7,30 +7,35 @@ import { ToolMetadata } from "./Tool";
 * NOTE this is a different interface compared to LlamaIndex Python
 * NOTE 2: we default to empty string to make it easy to calculate prompt sizes
 */
-export type SimplePrompt = (input: Record<string, string>) => string;
+export type SimplePrompt = (
+  input: Record<string, string | undefined>,
+) => string;

 /*
 DEFAULT_TEXT_QA_PROMPT_TMPL = (
-    "Context information is below. \n"
+    "Context information is below.\n"
+    "---------------------\n"
+    "{context_str}\n"
    "---------------------\n"
-    "{context_str}"
-    "\n---------------------\n"
    "Given the context information and not prior knowledge, "
-    "answer the question: {query_str}\n"
+    "answer the query.\n"
+    "Query: {query_str}\n"
+    "Answer: "
 )
 */

-export const defaultTextQaPrompt: SimplePrompt = (input) => {
-  const { context = "", query = "" } = input;
-
+export const defaultTextQaPrompt = ({ context = "", query = "" }) => {
  return `Context information is below.
 ---------------------
 ${context}
 ---------------------
-Given the context information and not prior knowledge, answer the question: ${query}
-`;
+Given the context information and not prior knowledge, answer the query.
+Query: ${query}
+Answer:`;
 };

+export type TextQaPrompt = typeof defaultTextQaPrompt;
+
 /*
 DEFAULT_SUMMARY_PROMPT_TMPL = (
    "Write a summary of the following. Try to use only the "
@@ -45,9 +50,7 @@ DEFAULT_SUMMARY_PROMPT_TMPL = (
 )
 */

-export const defaultSummaryPrompt: SimplePrompt = (input) => {
-  const { context = "" } = input;
-
+export const defaultSummaryPrompt = ({ context = "" }) => {
  return `Write a summary of the following. Try to use only the information provided. Try to include as many key details as possible.


@@ -58,9 +61,11 @@ SUMMARY:"""
 `;
 };

+export type SummaryPrompt = typeof defaultSummaryPrompt;
+
 /*
 DEFAULT_REFINE_PROMPT_TMPL = (
-    "The original question is as follows: {query_str}\n"
+    "The original query is as follows: {query_str}\n"
    "We have provided an existing answer: {existing_answer}\n"
    "We have the opportunity to refine the existing answer "
    "(only if needed) with some more context below.\n"
@@ -68,26 +73,55 @@ DEFAULT_REFINE_PROMPT_TMPL = (
    "{context_msg}\n"
    "------------\n"
    "Given the new context, refine the original answer to better "
-    "answer the question. "
-    "If the context isn't useful, return the original answer."
+    "answer the query. "
+    "If the context isn't useful, return the original answer.\n"
+    "Refined Answer: "
 )
 */

-export const defaultRefinePrompt: SimplePrompt = (input) => {
-  const { query = "", existingAnswer = "", context = "" } = input;
-
-  return `The original question is as follows: ${query}
+export const defaultRefinePrompt = ({
+  query = "",
+  existingAnswer = "",
+  context = "",
+}) => {
+  return `The original query is as follows: ${query}
 We have provided an existing answer: ${existingAnswer}
 We have the opportunity to refine the existing answer (only if needed) with some more context below.
 ------------
 ${context}
 ------------
-Given the new context, refine the original answer to better answer the question. If the context isn't useful, return the original answer.`;
+Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
+Refined Answer:`;
 };

-export const defaultChoiceSelectPrompt: SimplePrompt = (input) => {
-  const { context = "", query = "" } = input;
+export type RefinePrompt = typeof defaultRefinePrompt;

+/*
+DEFAULT_TREE_SUMMARIZE_TMPL = (
+  "Context information from multiple sources is below.\n"
+  "---------------------\n"
+  "{context_str}\n"
+  "---------------------\n"
+  "Given the information from multiple sources and not prior knowledge, "
+  "answer the query.\n"
+  "Query: {query_str}\n"
+  "Answer: "
+)
+*/
+
+export const defaultTreeSummarizePrompt = ({ context = "", query = "" }) => {
+  return `Context information from multiple sources is below.
+---------------------
+${context}
+---------------------
+Given the information from multiple sources and not prior knowledge, answer the query.
+Query: ${query}
+Answer:`;
+};
+
+export type TreeSummarizePrompt = typeof defaultTreeSummarizePrompt;
+
+export const defaultChoiceSelectPrompt = ({ context = "", query = "" }) => {
  return `A list of documents is shown below. Each document has a number next to it along 
 with a summary of the document. A question is also provided.
 Respond with the numbers of the documents
@@ -119,6 +153,8 @@ Question: ${query}
 Answer:`;
 };

+export type ChoiceSelectPrompt = typeof defaultChoiceSelectPrompt;
+
 /*
 PREFIX = """\
 Given a user question, and a list of tools, output a list of relevant sub-questions \
@@ -236,9 +272,7 @@ const exampleOutput: SubQuestion[] = [
  },
 ];

-export const defaultSubQuestionPrompt: SimplePrompt = (input) => {
-  const { toolsStr, queryStr } = input;
-
+export const defaultSubQuestionPrompt = ({ toolsStr = "", queryStr = "" }) => {
  return `Given a user question, and a list of tools, output a list of relevant sub-questions that when composed can help answer the full user question:

 # Example 1
@@ -268,6 +302,8 @@ ${queryStr}
 `;
 };

+export type SubQuestionPrompt = typeof defaultSubQuestionPrompt;
+
 // DEFAULT_TEMPLATE = """\
 // Given a conversation (between Human and Assistant) and a follow up message from Human, \
 // rewrite the message to be a standalone question that captures all relevant context \
@@ -282,9 +318,10 @@ ${queryStr}
 // <Standalone question>
 // """

-export const defaultCondenseQuestionPrompt: SimplePrompt = (input) => {
-  const { chatHistory, question } = input;
-
+export const defaultCondenseQuestionPrompt = ({
+  chatHistory = "",
+  question = "",
+}) => {
  return `Given a conversation (between Human and Assistant) and a follow up message from Human, rewrite the message to be a standalone question that captures all relevant context from the conversation.

 <Chat History>
@@ -297,6 +334,8 @@ ${question}
 `;
 };

+export type CondenseQuestionPrompt = typeof defaultCondenseQuestionPrompt;
+
 export function messagesToHistoryStr(messages: ChatMessage[]) {
  return messages.reduce((acc, message) => {
    acc += acc ? "\n" : "";
@@ -309,11 +348,42 @@ export function messagesToHistoryStr(messages: ChatMessage[]) {
  }, "");
 }

-export const contextSystemPrompt: SimplePrompt = (input) => {
-  const { context } = input;
-
+export const defaultContextSystemPrompt = ({ context = "" }) => {
  return `Context information is below.
 ---------------------
 ${context}
 ---------------------`;
 };
+
+export type ContextSystemPrompt = typeof defaultContextSystemPrompt;
+
+export const defaultKeywordExtractPrompt = ({
+  context = "",
+  maxKeywords = 10,
+}) => {
+  return `
+Some text is provided below. Given the text, extract up to ${maxKeywords} keywords from the text. Avoid stopwords.
+---------------------
+${context}
+---------------------
+Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'
+`;
+};
+
+export type KeywordExtractPrompt = typeof defaultKeywordExtractPrompt;
+
+export const defaultQueryKeywordExtractPrompt = ({
+  question = "",
+  maxKeywords = 10,
+}) => {
+  return `(
+  "A question is provided below. Given the question, extract up to ${maxKeywords} "
+  "keywords from the text. Focus on extracting the keywords that we can use "
+  "to best lookup answers to the question. Avoid stopwords."
+  "---------------------"
+  "${question}"
+  "---------------------"
+  "Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'"
+)`;
+};
+export type QueryKeywordExtractPrompt = typeof defaultQueryKeywordExtractPrompt;
@@ -2,9 +2,9 @@ import { globalsHelper } from "./GlobalsHelper";
 import { SimplePrompt } from "./Prompt";
 import { SentenceSplitter } from "./TextSplitter";
 import {
+  DEFAULT_CHUNK_OVERLAP_RATIO,
  DEFAULT_CONTEXT_WINDOW,
  DEFAULT_NUM_OUTPUTS,
-  DEFAULT_CHUNK_OVERLAP_RATIO,
  DEFAULT_PADDING,
 } from "./constants";

@@ -34,7 +34,7 @@ export class PromptHelper {
  numOutput = DEFAULT_NUM_OUTPUTS;
  chunkOverlapRatio = DEFAULT_CHUNK_OVERLAP_RATIO;
  chunkSizeLimit?: number;
-  tokenizer: (text: string) => number[];
+  tokenizer: (text: string) => Uint32Array;
  separator = " ";

  constructor(
@@ -42,8 +42,8 @@ export class PromptHelper {
    numOutput = DEFAULT_NUM_OUTPUTS,
    chunkOverlapRatio = DEFAULT_CHUNK_OVERLAP_RATIO,
    chunkSizeLimit?: number,
-    tokenizer?: (text: string) => number[],
-    separator = " "
+    tokenizer?: (text: string) => Uint32Array,
+    separator = " ",
  ) {
    this.contextWindow = contextWindow;
    this.numOutput = numOutput;
@@ -76,7 +76,7 @@ export class PromptHelper {
  private getAvailableChunkSize(
    prompt: SimplePrompt,
    numChunks = 1,
-    padding = 5
+    padding = 5,
  ) {
    const availableContextSize = this.getAvailableContextSize(prompt);

@@ -99,14 +99,14 @@ export class PromptHelper {
  getTextSplitterGivenPrompt(
    prompt: SimplePrompt,
    numChunks = 1,
-    padding = DEFAULT_PADDING
+    padding = DEFAULT_PADDING,
  ) {
    const chunkSize = this.getAvailableChunkSize(prompt, numChunks, padding);
    if (chunkSize === 0) {
      throw new Error("Got 0 as available chunk size");
    }
    const chunkOverlap = this.chunkOverlapRatio * chunkSize;
-    const textSplitter = new SentenceSplitter(chunkSize, chunkOverlap);
+    const textSplitter = new SentenceSplitter({ chunkSize, chunkOverlap });
    return textSplitter;
  }

@@ -120,7 +120,7 @@ export class PromptHelper {
  repack(
    prompt: SimplePrompt,
    textChunks: string[],
-    padding = DEFAULT_PADDING
+    padding = DEFAULT_PADDING,
  ) {
    const textSplitter = this.getTextSplitterGivenPrompt(prompt, 1, padding);
    const combinedStr = textChunks.join("\n\n");
@@ -1,3 +1,4 @@
+import { v4 as uuidv4 } from "uuid";
 import { NodeWithScore, TextNode } from "./Node";
 import {
  BaseQuestionGenerator,
@@ -7,10 +8,10 @@ import {
 import { Response } from "./Response";
 import { CompactAndRefine, ResponseSynthesizer } from "./ResponseSynthesizer";
 import { BaseRetriever } from "./Retriever";
-import { v4 as uuidv4 } from "uuid";
-import { Event } from "./callbacks/CallbackManager";
 import { ServiceContext, serviceContextFromDefaults } from "./ServiceContext";
 import { QueryEngineTool, ToolMetadata } from "./Tool";
+import { Event } from "./callbacks/CallbackManager";
+import { BaseNodePostprocessor } from "./indices/BaseNodePostprocessor";

 /**
 * A query engine is a question answerer that can use one or more steps.
@@ -30,16 +31,39 @@ export interface BaseQueryEngine {
 export class RetrieverQueryEngine implements BaseQueryEngine {
  retriever: BaseRetriever;
  responseSynthesizer: ResponseSynthesizer;
+  nodePostprocessors: BaseNodePostprocessor[];
+  preFilters?: unknown;

  constructor(
    retriever: BaseRetriever,
-    responseSynthesizer?: ResponseSynthesizer
+    responseSynthesizer?: ResponseSynthesizer,
+    preFilters?: unknown,
+    nodePostprocessors?: BaseNodePostprocessor[],
  ) {
    this.retriever = retriever;
    const serviceContext: ServiceContext | undefined =
      this.retriever.getServiceContext();
    this.responseSynthesizer =
      responseSynthesizer || new ResponseSynthesizer({ serviceContext });
+    this.preFilters = preFilters;
+    this.nodePostprocessors = nodePostprocessors || [];
+  }
+
+  private applyNodePostprocessors(nodes: NodeWithScore[]) {
+    return this.nodePostprocessors.reduce(
+      (nodes, nodePostprocessor) => nodePostprocessor.postprocessNodes(nodes),
+      nodes,
+    );
+  }
+
+  private async retrieve(query: string, parentEvent: Event) {
+    const nodes = await this.retriever.retrieve(
+      query,
+      parentEvent,
+      this.preFilters,
+    );
+
+    return this.applyNodePostprocessors(nodes);
  }

  async query(query: string, parentEvent?: Event) {
@@ -48,7 +72,7 @@ export class RetrieverQueryEngine implements BaseQueryEngine {
      type: "wrapper",
      tags: ["final"],
    };
-    const nodes = await this.retriever.retrieve(query, _parentEvent);
+    const nodes = await this.retrieve(query, _parentEvent);
    return this.responseSynthesizer.synthesize(query, nodes, _parentEvent);
  }
 }
@@ -122,7 +146,7 @@ export class SubQuestionQueryEngine implements BaseQueryEngine {
    };

    const subQNodes = await Promise.all(
-      subQuestions.map((subQ) => this.querySubQ(subQ, subQueryParentEvent))
+      subQuestions.map((subQ) => this.querySubQ(subQ, subQueryParentEvent)),
    );

    const nodes = subQNodes
@@ -133,7 +157,7 @@ export class SubQuestionQueryEngine implements BaseQueryEngine {

  private async querySubQ(
    subQ: SubQuestion,
-    parentEvent?: Event
+    parentEvent?: Event,
  ): Promise<NodeWithScore | null> {
    try {
      const question = subQ.subQuestion;
@@ -4,7 +4,7 @@ import {
  SubQuestionOutputParser,
 } from "./OutputParser";
 import {
-  SimplePrompt,
+  SubQuestionPrompt,
  buildToolsText,
  defaultSubQuestionPrompt,
 } from "./Prompt";
@@ -28,7 +28,7 @@ export interface BaseQuestionGenerator {
 */
 export class LLMQuestionGenerator implements BaseQuestionGenerator {
  llm: LLM;
-  prompt: SimplePrompt;
+  prompt: SubQuestionPrompt;
  outputParser: BaseOutputParser<StructuredOutput<SubQuestion[]>>;

  constructor(init?: Partial<LLMQuestionGenerator>) {
@@ -45,7 +45,7 @@ export class LLMQuestionGenerator implements BaseQuestionGenerator {
        this.prompt({
          toolsStr,
          queryStr,
-        })
+        }),
      )
    ).message.content;

@@ -1,14 +1,18 @@
+import { Event } from "./callbacks/CallbackManager";
+import { LLM } from "./llm/LLM";
 import { MetadataMode, NodeWithScore } from "./Node";
 import {
-  SimplePrompt,
  defaultRefinePrompt,
  defaultTextQaPrompt,
+  defaultTreeSummarizePrompt,
+  RefinePrompt,
+  SimplePrompt,
+  TextQaPrompt,
+  TreeSummarizePrompt,
 } from "./Prompt";
 import { getBiggestPrompt } from "./PromptHelper";
 import { Response } from "./Response";
 import { ServiceContext, serviceContextFromDefaults } from "./ServiceContext";
-import { Event } from "./callbacks/CallbackManager";
-import { LLM } from "./llm/LLM";

 /**
 * Response modes of the response synthesizer
@@ -35,7 +39,7 @@ interface BaseResponseBuilder {
    query: string,
    textChunks: string[],
    parentEvent?: Event,
-    prevResponse?: string
+    prevResponse?: string,
  ): Promise<string>;
 }

@@ -54,7 +58,7 @@ export class SimpleResponseBuilder implements BaseResponseBuilder {
  async getResponse(
    query: string,
    textChunks: string[],
-    parentEvent?: Event
+    parentEvent?: Event,
  ): Promise<string> {
    const input = {
      query,
@@ -72,13 +76,13 @@ export class SimpleResponseBuilder implements BaseResponseBuilder {
 */
 export class Refine implements BaseResponseBuilder {
  serviceContext: ServiceContext;
-  textQATemplate: SimplePrompt;
-  refineTemplate: SimplePrompt;
+  textQATemplate: TextQaPrompt;
+  refineTemplate: RefinePrompt;

  constructor(
    serviceContext: ServiceContext,
-    textQATemplate?: SimplePrompt,
-    refineTemplate?: SimplePrompt
+    textQATemplate?: TextQaPrompt,
+    refineTemplate?: RefinePrompt,
  ) {
    this.serviceContext = serviceContext;
    this.textQATemplate = textQATemplate ?? defaultTextQaPrompt;
@@ -89,7 +93,7 @@ export class Refine implements BaseResponseBuilder {
    query: string,
    textChunks: string[],
    parentEvent?: Event,
-    prevResponse?: string
+    prevResponse?: string,
  ): Promise<string> {
    let response: string | undefined = undefined;

@@ -101,7 +105,7 @@ export class Refine implements BaseResponseBuilder {
          prevResponse,
          query,
          chunk,
-          parentEvent
+          parentEvent,
        );
      }
      prevResponse = response;
@@ -113,7 +117,7 @@ export class Refine implements BaseResponseBuilder {
  private async giveResponseSingle(
    queryStr: string,
    textChunk: string,
-    parentEvent?: Event
+    parentEvent?: Event,
  ): Promise<string> {
    const textQATemplate: SimplePrompt = (input) =>
      this.textQATemplate({ ...input, query: queryStr });
@@ -130,7 +134,7 @@ export class Refine implements BaseResponseBuilder {
            textQATemplate({
              context: chunk,
            }),
-            parentEvent
+            parentEvent,
          )
        ).message.content;
      } else {
@@ -138,7 +142,7 @@ export class Refine implements BaseResponseBuilder {
          response,
          queryStr,
          chunk,
-          parentEvent
+          parentEvent,
        );
      }
    }
@@ -150,7 +154,7 @@ export class Refine implements BaseResponseBuilder {
    response: string,
    queryStr: string,
    textChunk: string,
-    parentEvent?: Event
+    parentEvent?: Event,
  ) {
    const refineTemplate: SimplePrompt = (input) =>
      this.refineTemplate({ ...input, query: queryStr });
@@ -166,7 +170,7 @@ export class Refine implements BaseResponseBuilder {
            context: chunk,
            existingAnswer: response,
          }),
-          parentEvent
+          parentEvent,
        )
      ).message.content;
    }
@@ -182,7 +186,7 @@ export class CompactAndRefine extends Refine {
    query: string,
    textChunks: string[],
    parentEvent?: Event,
-    prevResponse?: string
+    prevResponse?: string,
  ): Promise<string> {
    const textQATemplate: SimplePrompt = (input) =>
      this.textQATemplate({ ...input, query: query });
@@ -192,13 +196,13 @@ export class CompactAndRefine extends Refine {
    const maxPrompt = getBiggestPrompt([textQATemplate, refineTemplate]);
    const newTexts = this.serviceContext.promptHelper.repack(
      maxPrompt,
-      textChunks
+      textChunks,
    );
    const response = super.getResponse(
      query,
      newTexts,
      parentEvent,
-      prevResponse
+      prevResponse,
    );
    return response;
  }
@@ -208,52 +212,57 @@ export class CompactAndRefine extends Refine {
 */
 export class TreeSummarize implements BaseResponseBuilder {
  serviceContext: ServiceContext;
+  summaryTemplate: TreeSummarizePrompt;

-  constructor(serviceContext: ServiceContext) {
+  constructor(
+    serviceContext: ServiceContext,
+    summaryTemplate?: TreeSummarizePrompt,
+  ) {
    this.serviceContext = serviceContext;
+    this.summaryTemplate = summaryTemplate ?? defaultTreeSummarizePrompt;
  }

  async getResponse(
    query: string,
    textChunks: string[],
-    parentEvent?: Event
+    parentEvent?: Event,
  ): Promise<string> {
-    const summaryTemplate: SimplePrompt = (input) =>
-      defaultTextQaPrompt({ ...input, query: query });
-
    if (!textChunks || textChunks.length === 0) {
      throw new Error("Must have at least one text chunk");
    }

+    // Should we send the query here too?
    const packedTextChunks = this.serviceContext.promptHelper.repack(
-      summaryTemplate,
-      textChunks
+      this.summaryTemplate,
+      textChunks,
    );

    if (packedTextChunks.length === 1) {
      return (
        await this.serviceContext.llm.complete(
-          summaryTemplate({
+          this.summaryTemplate({
            context: packedTextChunks[0],
+            query,
          }),
-          parentEvent
+          parentEvent,
        )
      ).message.content;
    } else {
      const summaries = await Promise.all(
        packedTextChunks.map((chunk) =>
          this.serviceContext.llm.complete(
-            summaryTemplate({
+            this.summaryTemplate({
              context: chunk,
+              query,
            }),
-            parentEvent
-          )
-        )
+            parentEvent,
+          ),
+        ),
      );

      return this.getResponse(
        query,
-        summaries.map((s) => s.message.content)
+        summaries.map((s) => s.message.content),
      );
    }
  }
@@ -261,7 +270,7 @@ export class TreeSummarize implements BaseResponseBuilder {

 export function getResponseBuilder(
  serviceContext: ServiceContext,
-  responseMode?: ResponseMode
+  responseMode?: ResponseMode,
 ): BaseResponseBuilder {
  switch (responseMode) {
    case ResponseMode.SIMPLE:
@@ -281,31 +290,39 @@ export function getResponseBuilder(
 export class ResponseSynthesizer {
  responseBuilder: BaseResponseBuilder;
  serviceContext: ServiceContext;
+  metadataMode: MetadataMode;

  constructor({
    responseBuilder,
    serviceContext,
+    metadataMode = MetadataMode.NONE,
  }: {
    responseBuilder?: BaseResponseBuilder;
    serviceContext?: ServiceContext;
+    metadataMode?: MetadataMode;
  } = {}) {
    this.serviceContext = serviceContext ?? serviceContextFromDefaults();
    this.responseBuilder =
      responseBuilder ?? getResponseBuilder(this.serviceContext);
+    this.metadataMode = metadataMode;
  }

-  async synthesize(query: string, nodes: NodeWithScore[], parentEvent?: Event) {
-    let textChunks: string[] = nodes.map((node) =>
-      node.node.getContent(MetadataMode.NONE)
+  async synthesize(
+    query: string,
+    nodesWithScore: NodeWithScore[],
+    parentEvent?: Event,
+  ) {
+    let textChunks: string[] = nodesWithScore.map(({ node }) =>
+      node.getContent(this.metadataMode),
    );
    const response = await this.responseBuilder.getResponse(
      query,
      textChunks,
-      parentEvent
+      parentEvent,
    );
    return new Response(
      response,
-      nodes.map((node) => node.node)
+      nodesWithScore.map(({ node }) => node),
    );
  }
 }
@@ -1,11 +1,15 @@
+import { Event } from "./callbacks/CallbackManager";
 import { NodeWithScore } from "./Node";
 import { ServiceContext } from "./ServiceContext";
-import { Event } from "./callbacks/CallbackManager";

 /**
 * Retrievers retrieve the nodes that most closely match our query in similarity.
 */
 export interface BaseRetriever {
-  retrieve(query: string, parentEvent?: Event): Promise<NodeWithScore[]>;
+  retrieve(
+    query: string,
+    parentEvent?: Event,
+    preFilters?: unknown,
+  ): Promise<NodeWithScore[]>;
  getServiceContext(): ServiceContext;
 }
@@ -1,8 +1,8 @@
 import { BaseEmbedding, OpenAIEmbedding } from "./Embedding";
-import { LLM, OpenAI } from "./llm/LLM";
 import { NodeParser, SimpleNodeParser } from "./NodeParser";
 import { PromptHelper } from "./PromptHelper";
 import { CallbackManager } from "./callbacks/CallbackManager";
+import { LLM, OpenAI } from "./llm/LLM";

 /**
 * The ServiceContext is a collection of components that are used in different parts of the application.
@@ -47,7 +47,7 @@ export function serviceContextFromDefaults(options?: ServiceContextOptions) {

 export function serviceContextFromServiceContext(
  serviceContext: ServiceContext,
-  options: ServiceContextOptions
+  options: ServiceContextOptions,
 ) {
  const newServiceContext = { ...serviceContext };
  if (options.llm) {
@@ -1,7 +1,7 @@
 // GitHub translated

 import { globalsHelper } from "./GlobalsHelper";
-import { DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_OVERLAP } from "./constants";
+import { DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE } from "./constants";

 class TextSplit {
  textChunk: string;
@@ -9,7 +9,7 @@ class TextSplit {

  constructor(
    textChunk: string,
-    numCharOverlap: number | undefined = undefined
+    numCharOverlap: number | undefined = undefined,
  ) {
    this.textChunk = textChunk;
    this.numCharOverlap = numCharOverlap;
@@ -18,6 +18,38 @@ class TextSplit {

 type SplitRep = { text: string; numTokens: number };

+/**
+ * Tokenizes sentences. Suitable for English and most European languages.
+ * @param text
+ * @returns
+ */
+export const englishSentenceTokenizer = (text: string) => {
+  // The first part is a lazy match for any character.
+  return text.match(/.+?[.?!]+[\])'"`’”]*(?:\s|$)|.+/g);
+};
+
+/**
+ * Tokenizes sentences. Suitable for Chinese, Japanese, and Korean.
+ * @param text
+ * @returns
+ */
+export const cjkSentenceTokenizer = (text: string) => {
+  // Accepts english style sentence endings with space and
+  // CJK style sentence endings with no space.
+  return text.match(
+    /.+?[.?!]+[\])'"`’”]*(?:\s|$)|.+?[。？！]+[\])'"`’”]*(?:\s|$)?|.+/g,
+  );
+};
+
+export const unixLineSeparator = "\n";
+export const windowsLineSeparator = "\r\n";
+export const unixParagraphSeparator = unixLineSeparator + unixLineSeparator;
+export const windowsParagraphSeparator =
+  windowsLineSeparator + windowsLineSeparator;
+
+// In theory there's also Mac style \r only, but it's pre-OSX and I don't think
+// many documents will use it.
+
 /**
 * SentenceSplitter is our default text splitter that supports splitting into sentences, paragraphs, or fixed length chunks with overlap.
 *
@@ -29,46 +61,44 @@ export class SentenceSplitter {
  private tokenizer: any;
  private tokenizerDecoder: any;
  private paragraphSeparator: string;
-  private chunkingTokenizerFn: any;
-  // private _callback_manager: any;
+  private chunkingTokenizerFn: (text: string) => RegExpMatchArray | null;
+  private splitLongSentences: boolean;
+
+  constructor(options?: {
+    chunkSize?: number;
+    chunkOverlap?: number;
+    tokenizer?: any;
+    tokenizerDecoder?: any;
+    paragraphSeparator?: string;
+    chunkingTokenizerFn?: (text: string) => RegExpMatchArray | null;
+    splitLongSentences?: boolean;
+  }) {
+    const {
+      chunkSize = DEFAULT_CHUNK_SIZE,
+      chunkOverlap = DEFAULT_CHUNK_OVERLAP,
+      tokenizer = null,
+      tokenizerDecoder = null,
+      paragraphSeparator = unixParagraphSeparator,
+      chunkingTokenizerFn = undefined,
+      splitLongSentences = false,
+    } = options ?? {};

-  constructor(
-    chunkSize: number = DEFAULT_CHUNK_SIZE,
-    chunkOverlap: number = DEFAULT_CHUNK_OVERLAP,
-    tokenizer: any = null,
-    tokenizerDecoder: any = null,
-    paragraphSeparator: string = "\n\n\n",
-    chunkingTokenizerFn: any = undefined
-    // callback_manager: any = undefined
-  ) {
    if (chunkOverlap > chunkSize) {
      throw new Error(
-        `Got a larger chunk overlap (${chunkOverlap}) than chunk size (${chunkSize}), should be smaller.`
+        `Got a larger chunk overlap (${chunkOverlap}) than chunk size (${chunkSize}), should be smaller.`,
      );
    }
    this.chunkSize = chunkSize;
    this.chunkOverlap = chunkOverlap;
    // this._callback_manager = callback_manager || new CallbackManager([]);

-    if (chunkingTokenizerFn == undefined) {
-      // define a callable mapping a string to a list of strings
-      const defaultChunkingTokenizerFn = (text: string) => {
-        var result = text.match(/[^.?!]+[.!?]+[\])'"`’”]*|.+/g);
-        return result;
-      };
-
-      chunkingTokenizerFn = defaultChunkingTokenizerFn;
-    }
-
-    if (tokenizer == undefined || tokenizerDecoder == undefined) {
-      tokenizer = globalsHelper.tokenizer();
-      tokenizerDecoder = globalsHelper.tokenizerDecoder();
-    }
-    this.tokenizer = tokenizer;
-    this.tokenizerDecoder = tokenizerDecoder;
+    this.tokenizer = tokenizer ?? globalsHelper.tokenizer();
+    this.tokenizerDecoder =
+      tokenizerDecoder ?? globalsHelper.tokenizerDecoder();

    this.paragraphSeparator = paragraphSeparator;
-    this.chunkingTokenizerFn = chunkingTokenizerFn;
+    this.chunkingTokenizerFn = chunkingTokenizerFn ?? englishSentenceTokenizer;
+    this.splitLongSentences = splitLongSentences;
  }

  private getEffectiveChunkSize(extraInfoStr?: string): number {
@@ -79,7 +109,7 @@ export class SentenceSplitter {
      effectiveChunkSize = this.chunkSize - numExtraTokens;
      if (effectiveChunkSize <= 0) {
        throw new Error(
-          "Effective chunk size is non positive after considering extra_info"
+          "Effective chunk size is non positive after considering extra_info",
        );
      }
    } else {
@@ -119,7 +149,12 @@ export class SentenceSplitter {
    // Next we split the text using the chunk tokenizer fn/
    let splits = [];
    for (const parText of paragraphSplits) {
-      let sentenceSplits = this.chunkingTokenizerFn(parText);
+      const sentenceSplits = this.chunkingTokenizerFn(parText);
+
+      if (!sentenceSplits) {
+        continue;
+      }
+
      for (const sentence_split of sentenceSplits) {
        splits.push(sentence_split.trim());
      }
@@ -127,13 +162,28 @@ export class SentenceSplitter {
    return splits;
  }

+  /**
+   * Splits sentences into chunks if necessary.
+   *
+   * This isn't great behavior because it can split down the middle of a
+   * word or in non-English split down the middle of a Unicode codepoint
+   * so the splitting is turned off by default. If you need it, please
+   * set the splitLongSentences option to true.
+   * @param sentenceSplits
+   * @param effectiveChunkSize
+   * @returns
+   */
  private processSentenceSplits(
    sentenceSplits: string[],
-    effectiveChunkSize: number
+    effectiveChunkSize: number,
  ): SplitRep[] {
-    // Process sentence splits
-    // Primarily check if any sentences exceed the chunk size. If they don't,
-    // force split by tokenizer
+    if (!this.splitLongSentences) {
+      return sentenceSplits.map((split) => ({
+        text: split,
+        numTokens: this.tokenizer(split).length,
+      }));
+    }
+
    let newSplits: SplitRep[] = [];
    for (const split of sentenceSplits) {
      let splitTokens = this.tokenizer(split);
@@ -143,7 +193,7 @@ export class SentenceSplitter {
      } else {
        for (let i = 0; i < splitLen; i += effectiveChunkSize) {
          const cur_split = this.tokenizerDecoder(
-            splitTokens.slice(i, i + effectiveChunkSize)
+            splitTokens.slice(i, i + effectiveChunkSize),
          );
          newSplits.push({ text: cur_split, numTokens: effectiveChunkSize });
        }
@@ -154,7 +204,7 @@ export class SentenceSplitter {

  combineTextSplits(
    newSentenceSplits: SplitRep[],
-    effectiveChunkSize: number
+    effectiveChunkSize: number,
  ): TextSplit[] {
    // go through sentence splits, combine to chunks that are within the chunk size

@@ -178,8 +228,8 @@ export class SentenceSplitter {
            curChunkSentences
              .map((sentence) => sentence.text)
              .join(" ")
-              .trim()
-          )
+              .trim(),
+          ),
        );

        const lastChunkSentences = curChunkSentences;
@@ -210,8 +260,8 @@ export class SentenceSplitter {
        curChunkSentences
          .map((sentence) => sentence.text)
          .join(" ")
-          .trim()
-      )
+          .trim(),
+      ),
    );
    return docs;
  }
@@ -232,13 +282,13 @@ export class SentenceSplitter {
    // force split by tokenizer
    let newSentenceSplits = this.processSentenceSplits(
      sentenceSplits,
-      effectiveChunkSize
+      effectiveChunkSize,
    );

    // combine sentence splits into chunks of text that can then be returned
    let combinedTextSplits = this.combineTextSplits(
      newSentenceSplits,
-      effectiveChunkSize
+      effectiveChunkSize,
    );

    return combinedTextSplits;
@@ -20,7 +20,8 @@ interface BaseCallbackResponse {
  event: Event;
 }

-export interface StreamToken {
+//Specify StreamToken per mainstream LLM
+export interface DefaultStreamToken {
  id: string;
  object: string;
  created: number;
@@ -29,16 +30,34 @@ export interface StreamToken {
    index: number;
    delta: {
      content?: string | null;
-      role?: "user" | "assistant" | "system" | "function";
+      role?: "user" | "assistant" | "system" | "function" | "tool";
    };
    finish_reason: string | null;
  }[];
 }

+//OpenAI stream token schema is the default.
+//Note: Anthropic and Replicate also use similar token schemas.
+export type OpenAIStreamToken = DefaultStreamToken;
+export type AnthropicStreamToken = {
+  completion: string;
+  model: string;
+  stop_reason: string | undefined;
+  stop?: boolean | undefined;
+  log_id?: string;
+};
+
+//
+//Callback Responses
+//
+//TODO: Write Embedding Callbacks
+
+//StreamCallbackResponse should let practitioners implement callbacks out of the box...
+//When custom streaming LLMs are involved, people are expected to write their own StreamCallbackResponses
 export interface StreamCallbackResponse extends BaseCallbackResponse {
  index: number;
  isDone?: boolean;
-  token?: StreamToken;
+  token?: DefaultStreamToken;
 }

 export interface RetrievalCallbackResponse extends BaseCallbackResponse {
@@ -1,45 +0,0 @@
-import { ChatCompletionChunk } from "openai/resources/chat";
-import { Stream } from "openai/streaming";
-import { globalsHelper } from "../../GlobalsHelper";
-import { MessageType } from "../../llm/LLM";
-import { Event, StreamCallbackResponse } from "../CallbackManager";
-
-/**
- * Handles the OpenAI streaming interface and pipes it to the callback function
- * @param response - The response from the OpenAI API.
- * @param onLLMStream - A callback function to handle the LLM stream.
- * @param parentEvent - An optional parent event.
- * @returns A promise that resolves to an object with a message and a role.
- */
-export async function handleOpenAIStream({
-  response,
-  onLLMStream,
-  parentEvent,
-}: {
-  response: Stream<ChatCompletionChunk>;
-  onLLMStream: (data: StreamCallbackResponse) => void;
-  parentEvent?: Event;
-}): Promise<{ message: string; role: MessageType }> {
-  const event = globalsHelper.createEvent({
-    parentEvent,
-    type: "llmPredict",
-  });
-  let index = 0;
-  let cumulativeText = "";
-  let messageRole: MessageType = "assistant";
-  for await (const part of response) {
-    const { content = "", role = "assistant" } = part.choices[0].delta;
-
-    // ignore the first token
-    if (!content && role === "assistant" && index === 0) {
-      continue;
-    }
-
-    cumulativeText += content;
-    messageRole = role;
-    onLLMStream?.({ event, index, token: part });
-    index++;
-  }
-  onLLMStream?.({ event, index, isDone: true });
-  return { message: cumulativeText, role: messageRole };
-}
@@ -1,28 +1,29 @@
 export * from "./ChatEngine";
-export * from "./constants";
+export * from "./ChatHistory";
 export * from "./Embedding";
 export * from "./GlobalsHelper";
-export * from "./llm/LLM";
 export * from "./Node";
 export * from "./NodeParser";
 export * from "./OutputParser";
 export * from "./Prompt";
-export * from "./QuestionGenerator";
+export * from "./PromptHelper";
 export * from "./QueryEngine";
+export * from "./QuestionGenerator";
 export * from "./Response";
 export * from "./ResponseSynthesizer";
 export * from "./Retriever";
 export * from "./ServiceContext";
 export * from "./TextSplitter";
 export * from "./Tool";
-
-export * from "./indices";
-
 export * from "./callbacks/CallbackManager";
-
-export * from "./readers/base";
-export * from "./readers/PDFReader";
+export * from "./constants";
+export * from "./indices";
+export * from "./llm/LLM";
 export * from "./readers/CSVReader";
+export * from "./readers/HTMLReader";
+export * from "./readers/MarkdownReader";
+export * from "./readers/NotionReader";
+export * from "./readers/PDFReader";
 export * from "./readers/SimpleDirectoryReader";
-
+export * from "./readers/base";
 export * from "./storage";
@@ -4,9 +4,9 @@ import { BaseQueryEngine } from "../QueryEngine";
 import { ResponseSynthesizer } from "../ResponseSynthesizer";
 import { BaseRetriever } from "../Retriever";
 import { ServiceContext } from "../ServiceContext";
+import { StorageContext } from "../storage/StorageContext";
 import { BaseDocumentStore } from "../storage/docStore/types";
 import { BaseIndexStore } from "../storage/indexStore/types";
-import { StorageContext } from "../storage/StorageContext";
 import { VectorStore } from "../storage/vectorStore/types";

 /**
@@ -39,6 +39,7 @@ export abstract class IndexStruct {
 export enum IndexStructType {
  SIMPLE_DICT = "simple_dict",
  LIST = "list",
+  KEYWORD_TABLE = "keyword_table",
 }

 export class IndexDict extends IndexStruct {
@@ -106,6 +107,36 @@ export class IndexList extends IndexStruct {
  }
 }

+// A table of keywords mapping keywords to text chunks.
+export class KeywordTable extends IndexStruct {
+  table: Map<string, Set<string>> = new Map();
+  type: IndexStructType = IndexStructType.KEYWORD_TABLE;
+  addNode(keywords: string[], nodeId: string): void {
+    keywords.forEach((keyword) => {
+      if (!this.table.has(keyword)) {
+        this.table.set(keyword, new Set());
+      }
+      this.table.get(keyword)!.add(nodeId);
+    });
+  }
+
+  deleteNode(keywords: string[], nodeId: string) {
+    keywords.forEach((keyword) => {
+      if (this.table.has(keyword)) {
+        this.table.get(keyword)!.delete(nodeId);
+      }
+    });
+  }
+
+  toJson(): Record<string, unknown> {
+    return {
+      ...super.toJson(),
+      table: this.table,
+      type: this.type,
+    };
+  }
+}
+
 export interface BaseIndexInit<T> {
  serviceContext: ServiceContext;
  storageContext: StorageContext;
@@ -0,0 +1,20 @@
+import { NodeWithScore } from "../Node";
+
+export interface BaseNodePostprocessor {
+  postprocessNodes: (nodes: NodeWithScore[]) => NodeWithScore[];
+}
+
+export class SimilarityPostprocessor implements BaseNodePostprocessor {
+  similarityCutoff?: number;
+
+  constructor(options?: { similarityCutoff?: number }) {
+    this.similarityCutoff = options?.similarityCutoff;
+  }
+
+  postprocessNodes(nodes: NodeWithScore[]) {
+    if (this.similarityCutoff === undefined) return nodes;
+
+    const cutoff = this.similarityCutoff || 0;
+    return nodes.filter((node) => node.score && node.score >= cutoff);
+  }
+}
@@ -1,3 +1,5 @@
 export * from "./BaseIndex";
-export * from "./list";
+export * from "./BaseNodePostprocessor";
+export * from "./keyword";
+export * from "./summary";
 export * from "./vectorStore";
@@ -0,0 +1,274 @@
+import { BaseNode, Document, MetadataMode } from "../../Node";
+import { defaultKeywordExtractPrompt } from "../../Prompt";
+import { BaseQueryEngine, RetrieverQueryEngine } from "../../QueryEngine";
+import { ResponseSynthesizer } from "../../ResponseSynthesizer";
+import { BaseRetriever } from "../../Retriever";
+import {
+  ServiceContext,
+  serviceContextFromDefaults,
+} from "../../ServiceContext";
+import { StorageContext, storageContextFromDefaults } from "../../storage";
+import { BaseDocumentStore } from "../../storage/docStore/types";
+import {
+  BaseIndex,
+  BaseIndexInit,
+  IndexStructType,
+  KeywordTable,
+} from "../BaseIndex";
+import { BaseNodePostprocessor } from "../BaseNodePostprocessor";
+import {
+  KeywordTableLLMRetriever,
+  KeywordTableRAKERetriever,
+  KeywordTableSimpleRetriever,
+} from "./KeywordTableIndexRetriever";
+import { extractKeywordsGivenResponse } from "./utils";
+
+export interface KeywordIndexOptions {
+  nodes?: BaseNode[];
+  indexStruct?: KeywordTable;
+  indexId?: string;
+  serviceContext?: ServiceContext;
+  storageContext?: StorageContext;
+}
+export enum KeywordTableRetrieverMode {
+  DEFAULT = "DEFAULT",
+  SIMPLE = "SIMPLE",
+  RAKE = "RAKE",
+}
+
+const KeywordTableRetrieverMap = {
+  [KeywordTableRetrieverMode.DEFAULT]: KeywordTableLLMRetriever,
+  [KeywordTableRetrieverMode.SIMPLE]: KeywordTableSimpleRetriever,
+  [KeywordTableRetrieverMode.RAKE]: KeywordTableRAKERetriever,
+};
+
+/**
+ * The KeywordTableIndex, an index that extracts keywords from each Node and builds a mapping from each keyword to the corresponding Nodes of that keyword.
+ */
+export class KeywordTableIndex extends BaseIndex<KeywordTable> {
+  constructor(init: BaseIndexInit<KeywordTable>) {
+    super(init);
+  }
+
+  static async init(options: KeywordIndexOptions): Promise<KeywordTableIndex> {
+    const storageContext =
+      options.storageContext ?? (await storageContextFromDefaults({}));
+    const serviceContext =
+      options.serviceContext ?? serviceContextFromDefaults({});
+    const { docStore, indexStore } = storageContext;
+
+    // Setup IndexStruct from storage
+    let indexStructs = (await indexStore.getIndexStructs()) as KeywordTable[];
+    let indexStruct: KeywordTable | null;
+
+    if (options.indexStruct && indexStructs.length > 0) {
+      throw new Error(
+        "Cannot initialize index with both indexStruct and indexStore",
+      );
+    }
+
+    if (options.indexStruct) {
+      indexStruct = options.indexStruct;
+    } else if (indexStructs.length == 1) {
+      indexStruct = indexStructs[0];
+    } else if (indexStructs.length > 1 && options.indexId) {
+      indexStruct = (await indexStore.getIndexStruct(
+        options.indexId,
+      )) as KeywordTable;
+    } else {
+      indexStruct = null;
+    }
+
+    // check indexStruct type
+    if (indexStruct && indexStruct.type !== IndexStructType.KEYWORD_TABLE) {
+      throw new Error(
+        "Attempting to initialize KeywordTableIndex with non-keyword table indexStruct",
+      );
+    }
+
+    if (indexStruct) {
+      if (options.nodes) {
+        throw new Error(
+          "Cannot initialize KeywordTableIndex with both nodes and indexStruct",
+        );
+      }
+    } else {
+      if (!options.nodes) {
+        throw new Error(
+          "Cannot initialize KeywordTableIndex without nodes or indexStruct",
+        );
+      }
+      indexStruct = await KeywordTableIndex.buildIndexFromNodes(
+        options.nodes,
+        storageContext.docStore,
+        serviceContext,
+      );
+
+      await indexStore.addIndexStruct(indexStruct);
+    }
+
+    return new KeywordTableIndex({
+      storageContext,
+      serviceContext,
+      docStore,
+      indexStore,
+      indexStruct,
+    });
+  }
+
+  asRetriever(options?: any): BaseRetriever {
+    const { mode = KeywordTableRetrieverMode.DEFAULT, ...otherOptions } =
+      options ?? {};
+    const KeywordTableRetriever =
+      KeywordTableRetrieverMap[mode as KeywordTableRetrieverMode];
+    if (KeywordTableRetriever) {
+      return new KeywordTableRetriever({ index: this, ...otherOptions });
+    }
+    throw new Error(`Unknown retriever mode: ${mode}`);
+  }
+
+  asQueryEngine(options?: {
+    retriever?: BaseRetriever;
+    responseSynthesizer?: ResponseSynthesizer;
+    preFilters?: unknown;
+    nodePostprocessors?: BaseNodePostprocessor[];
+  }): BaseQueryEngine {
+    const { retriever, responseSynthesizer } = options ?? {};
+    return new RetrieverQueryEngine(
+      retriever ?? this.asRetriever(),
+      responseSynthesizer,
+      options?.preFilters,
+      options?.nodePostprocessors,
+    );
+  }
+
+  static async extractKeywords(
+    text: string,
+    serviceContext: ServiceContext,
+  ): Promise<Set<string>> {
+    const response = await serviceContext.llm.complete(
+      defaultKeywordExtractPrompt({
+        context: text,
+      }),
+    );
+    return extractKeywordsGivenResponse(response.message.content, "KEYWORDS:");
+  }
+
+  /**
+   * High level API: split documents, get keywords, and build index.
+   * @param documents
+   * @param storageContext
+   * @param serviceContext
+   * @returns
+   */
+  static async fromDocuments(
+    documents: Document[],
+    args: {
+      storageContext?: StorageContext;
+      serviceContext?: ServiceContext;
+    } = {},
+  ): Promise<KeywordTableIndex> {
+    let { storageContext, serviceContext } = args;
+    storageContext = storageContext ?? (await storageContextFromDefaults({}));
+    serviceContext = serviceContext ?? serviceContextFromDefaults({});
+    const docStore = storageContext.docStore;
+
+    docStore.addDocuments(documents, true);
+    for (const doc of documents) {
+      docStore.setDocumentHash(doc.id_, doc.hash);
+    }
+
+    const nodes = serviceContext.nodeParser.getNodesFromDocuments(documents);
+    const index = await KeywordTableIndex.init({
+      nodes,
+      storageContext,
+      serviceContext,
+    });
+    return index;
+  }
+
+  /**
+   * Get keywords for nodes and place them into the index.
+   * @param nodes
+   * @param serviceContext
+   * @param vectorStore
+   * @returns
+   */
+  static async buildIndexFromNodes(
+    nodes: BaseNode[],
+    docStore: BaseDocumentStore,
+    serviceContext: ServiceContext,
+  ): Promise<KeywordTable> {
+    const indexStruct = new KeywordTable();
+    await docStore.addDocuments(nodes, true);
+    for (const node of nodes) {
+      const keywords = await KeywordTableIndex.extractKeywords(
+        node.getContent(MetadataMode.LLM),
+        serviceContext,
+      );
+      indexStruct.addNode([...keywords], node.id_);
+    }
+    return indexStruct;
+  }
+
+  async insertNodes(nodes: BaseNode[]) {
+    for (let node of nodes) {
+      const keywords = await KeywordTableIndex.extractKeywords(
+        node.getContent(MetadataMode.LLM),
+        this.serviceContext,
+      );
+      this.indexStruct.addNode([...keywords], node.id_);
+    }
+  }
+
+  deleteNode(nodeId: string): void {
+    const keywordsToDelete: Set<string> = new Set();
+    for (const [keyword, existingNodeIds] of Object.entries(
+      this.indexStruct.table,
+    )) {
+      const index = existingNodeIds.indexOf(nodeId);
+      if (index !== -1) {
+        existingNodeIds.splice(index, 1);
+
+        // Delete keywords that have zero nodes
+        if (existingNodeIds.length === 0) {
+          keywordsToDelete.add(keyword);
+        }
+      }
+    }
+    this.indexStruct.deleteNode([...keywordsToDelete], nodeId);
+  }
+
+  async deleteNodes(nodeIds: string[], deleteFromDocStore: boolean) {
+    nodeIds.forEach((nodeId) => {
+      this.deleteNode(nodeId);
+    });
+
+    if (deleteFromDocStore) {
+      for (const nodeId of nodeIds) {
+        await this.docStore.deleteDocument(nodeId, false);
+      }
+    }
+
+    await this.storageContext.indexStore.addIndexStruct(this.indexStruct);
+  }
+
+  async deleteRefDoc(
+    refDocId: string,
+    deleteFromDocStore?: boolean,
+  ): Promise<void> {
+    const refDocInfo = await this.docStore.getRefDocInfo(refDocId);
+
+    if (!refDocInfo) {
+      return;
+    }
+
+    await this.deleteNodes(refDocInfo.nodeIds, false);
+
+    if (deleteFromDocStore) {
+      await this.docStore.deleteRefDoc(refDocId, false);
+    }
+
+    return;
+  }
+}
@@ -0,0 +1,119 @@
+import { NodeWithScore } from "../../Node";
+import {
+  defaultKeywordExtractPrompt,
+  defaultQueryKeywordExtractPrompt,
+  KeywordExtractPrompt,
+  QueryKeywordExtractPrompt,
+} from "../../Prompt";
+import { BaseRetriever } from "../../Retriever";
+import { ServiceContext } from "../../ServiceContext";
+import { BaseDocumentStore } from "../../storage/docStore/types";
+import { KeywordTable } from "../BaseIndex";
+import { KeywordTableIndex } from "./KeywordTableIndex";
+import {
+  extractKeywordsGivenResponse,
+  rakeExtractKeywords,
+  simpleExtractKeywords,
+} from "./utils";
+
+// Base Keyword Table Retriever
+abstract class BaseKeywordTableRetriever implements BaseRetriever {
+  protected index: KeywordTableIndex;
+  protected indexStruct: KeywordTable;
+  protected docstore: BaseDocumentStore;
+  protected serviceContext: ServiceContext;
+
+  protected maxKeywordsPerQuery: number; // Maximum number of keywords to extract from query.
+  protected numChunksPerQuery: number; // Maximum number of text chunks to query.
+  protected keywordExtractTemplate: KeywordExtractPrompt; // A Keyword Extraction Prompt
+  protected queryKeywordExtractTemplate: QueryKeywordExtractPrompt; // A Query Keyword Extraction Prompt
+
+  constructor({
+    index,
+    keywordExtractTemplate,
+    queryKeywordExtractTemplate,
+    maxKeywordsPerQuery = 10,
+    numChunksPerQuery = 10,
+  }: {
+    index: KeywordTableIndex;
+    keywordExtractTemplate?: KeywordExtractPrompt;
+    queryKeywordExtractTemplate?: QueryKeywordExtractPrompt;
+    maxKeywordsPerQuery: number;
+    numChunksPerQuery: number;
+  }) {
+    this.index = index;
+    this.indexStruct = index.indexStruct;
+    this.docstore = index.docStore;
+    this.serviceContext = index.serviceContext;
+
+    this.maxKeywordsPerQuery = maxKeywordsPerQuery;
+    this.numChunksPerQuery = numChunksPerQuery;
+    this.keywordExtractTemplate =
+      keywordExtractTemplate || defaultKeywordExtractPrompt;
+    this.queryKeywordExtractTemplate =
+      queryKeywordExtractTemplate || defaultQueryKeywordExtractPrompt;
+  }
+
+  abstract getKeywords(query: string): Promise<string[]>;
+
+  async retrieve(query: string): Promise<NodeWithScore[]> {
+    const keywords = await this.getKeywords(query);
+    const chunkIndicesCount: { [key: string]: number } = {};
+    const filteredKeywords = keywords.filter((keyword) =>
+      this.indexStruct.table.has(keyword),
+    );
+
+    for (let keyword of filteredKeywords) {
+      for (let nodeId of this.indexStruct.table.get(keyword) || []) {
+        chunkIndicesCount[nodeId] = (chunkIndicesCount[nodeId] ?? 0) + 1;
+      }
+    }
+
+    const sortedChunkIndices = Object.keys(chunkIndicesCount)
+      .sort((a, b) => chunkIndicesCount[b] - chunkIndicesCount[a])
+      .slice(0, this.numChunksPerQuery);
+
+    const sortedNodes = await this.docstore.getNodes(sortedChunkIndices);
+
+    return sortedNodes.map((node) => ({ node }));
+  }
+
+  getServiceContext(): ServiceContext {
+    return this.index.serviceContext;
+  }
+}
+
+// Extracts keywords using LLMs.
+export class KeywordTableLLMRetriever extends BaseKeywordTableRetriever {
+  async getKeywords(query: string): Promise<string[]> {
+    const response = await this.serviceContext.llm.complete(
+      this.queryKeywordExtractTemplate({
+        question: query,
+        maxKeywords: this.maxKeywordsPerQuery,
+      }),
+    );
+    const keywords = extractKeywordsGivenResponse(
+      response.message.content,
+      "KEYWORDS:",
+    );
+    return [...keywords];
+  }
+}
+
+// Extracts keywords using simple regex-based keyword extractor.
+export class KeywordTableSimpleRetriever extends BaseKeywordTableRetriever {
+  getKeywords(query: string): Promise<string[]> {
+    return Promise.resolve([
+      ...simpleExtractKeywords(query, this.maxKeywordsPerQuery),
+    ]);
+  }
+}
+
+// Extracts keywords using RAKE keyword extractor
+export class KeywordTableRAKERetriever extends BaseKeywordTableRetriever {
+  getKeywords(query: string): Promise<string[]> {
+    return Promise.resolve([
+      ...rakeExtractKeywords(query, this.maxKeywordsPerQuery),
+    ]);
+  }
+}
@@ -0,0 +1,9 @@
+export {
+  KeywordTableIndex,
+  KeywordTableRetrieverMode,
+} from "./KeywordTableIndex";
+export {
+  KeywordTableLLMRetriever,
+  KeywordTableRAKERetriever,
+  KeywordTableSimpleRetriever,
+} from "./KeywordTableIndexRetriever";
@@ -0,0 +1,81 @@
+// @ts-ignore
+import rake from "rake-modified";
+
+// Get subtokens from a list of tokens., filtering for stopwords.
+export function expandTokensWithSubtokens(tokens: Set<string>): Set<string> {
+  const results: Set<string> = new Set();
+  const regex: RegExp = /\w+/g;
+
+  for (let token of tokens) {
+    results.add(token);
+    const subTokens: RegExpMatchArray | null = token.match(regex);
+    if (subTokens && subTokens.length > 1) {
+      for (let w of subTokens) {
+        results.add(w);
+      }
+    }
+  }
+  return results;
+}
+
+export function extractKeywordsGivenResponse(
+  response: string,
+  startToken: string = "",
+  lowercase: boolean = true,
+): Set<string> {
+  const results: string[] = [];
+  response = response.trim();
+
+  if (response.startsWith(startToken)) {
+    response = response.substring(startToken.length);
+  }
+
+  const keywords: string[] = response.split(",");
+  for (let k of keywords) {
+    let rk: string = k;
+    if (lowercase) {
+      rk = rk.toLowerCase();
+    }
+    results.push(rk.trim());
+  }
+
+  return expandTokensWithSubtokens(new Set(results));
+}
+
+export function simpleExtractKeywords(
+  textChunk: string,
+  maxKeywords?: number,
+): Set<string> {
+  const regex: RegExp = /\w+/g;
+  let tokens: string[] = [...textChunk.matchAll(regex)].map((token) =>
+    token[0].toLowerCase().trim(),
+  );
+
+  // Creating a frequency map
+  const valueCounts: { [key: string]: number } = {};
+  for (let token of tokens) {
+    valueCounts[token] = (valueCounts[token] || 0) + 1;
+  }
+
+  // Sorting tokens by frequency
+  const sortedTokens: string[] = Object.keys(valueCounts).sort(
+    (a, b) => valueCounts[b] - valueCounts[a],
+  );
+
+  const keywords: string[] = maxKeywords
+    ? sortedTokens.slice(0, maxKeywords)
+    : sortedTokens;
+
+  return new Set(keywords);
+}
+
+export function rakeExtractKeywords(
+  textChunk: string,
+  maxKeywords?: number,
+): Set<string> {
+  const keywords = Object.keys(rake(textChunk));
+  const limitedKeywords = maxKeywords
+    ? keywords.slice(0, maxKeywords)
+    : keywords;
+  return new Set(limitedKeywords);
+}
@@ -1,5 +0,0 @@
-export { ListIndex, ListRetrieverMode } from "./ListIndex";
-export {
-  ListIndexRetriever,
-  ListIndexLLMRetriever,
-} from "./ListIndexRetriever";
@@ -10,29 +10,30 @@ import {
  ServiceContext,
  serviceContextFromDefaults,
 } from "../../ServiceContext";
-import { BaseDocumentStore, RefDocInfo } from "../../storage/docStore/types";
 import {
  StorageContext,
  storageContextFromDefaults,
 } from "../../storage/StorageContext";
+import { BaseDocumentStore, RefDocInfo } from "../../storage/docStore/types";
 import {
  BaseIndex,
  BaseIndexInit,
  IndexList,
  IndexStructType,
 } from "../BaseIndex";
+import { BaseNodePostprocessor } from "../BaseNodePostprocessor";
 import {
-  ListIndexLLMRetriever,
-  ListIndexRetriever,
-} from "./ListIndexRetriever";
+  SummaryIndexLLMRetriever,
+  SummaryIndexRetriever,
+} from "./SummaryIndexRetriever";

-export enum ListRetrieverMode {
+export enum SummaryRetrieverMode {
  DEFAULT = "default",
  // EMBEDDING = "embedding",
  LLM = "llm",
 }

-export interface ListIndexOptions {
+export interface SummaryIndexOptions {
  nodes?: BaseNode[];
  indexStruct?: IndexList;
  indexId?: string;
@@ -41,14 +42,14 @@ export interface ListIndexOptions {
 }

 /**
- * A ListIndex keeps nodes in a sequential list structure
+ * A SummaryIndex keeps nodes in a sequential order for use with summarization.
 */
-export class ListIndex extends BaseIndex<IndexList> {
+export class SummaryIndex extends BaseIndex<IndexList> {
  constructor(init: BaseIndexInit<IndexList>) {
    super(init);
  }

-  static async init(options: ListIndexOptions): Promise<ListIndex> {
+  static async init(options: SummaryIndexOptions): Promise<SummaryIndex> {
    const storageContext =
      options.storageContext ?? (await storageContextFromDefaults({}));
    const serviceContext =
@@ -80,23 +81,23 @@ export class ListIndex extends BaseIndex<IndexList> {
    // check indexStruct type
    if (indexStruct && indexStruct.type !== IndexStructType.LIST) {
      throw new Error(
-        "Attempting to initialize ListIndex with non-list indexStruct",
+        "Attempting to initialize SummaryIndex with non-list indexStruct",
      );
    }

    if (indexStruct) {
      if (options.nodes) {
        throw new Error(
-          "Cannot initialize VectorStoreIndex with both nodes and indexStruct",
+          "Cannot initialize SummaryIndex with both nodes and indexStruct",
        );
      }
    } else {
      if (!options.nodes) {
        throw new Error(
-          "Cannot initialize VectorStoreIndex without nodes or indexStruct",
+          "Cannot initialize SummaryIndex without nodes or indexStruct",
        );
      }
-      indexStruct = await ListIndex.buildIndexFromNodes(
+      indexStruct = await SummaryIndex.buildIndexFromNodes(
        options.nodes,
        storageContext.docStore,
      );
@@ -104,7 +105,7 @@ export class ListIndex extends BaseIndex<IndexList> {
      await indexStore.addIndexStruct(indexStruct);
    }

-    return new ListIndex({
+    return new SummaryIndex({
      storageContext,
      serviceContext,
      docStore,
@@ -119,7 +120,7 @@ export class ListIndex extends BaseIndex<IndexList> {
      storageContext?: StorageContext;
      serviceContext?: ServiceContext;
    } = {},
-  ): Promise<ListIndex> {
+  ): Promise<SummaryIndex> {
    let { storageContext, serviceContext } = args;
    storageContext = storageContext ?? (await storageContextFromDefaults({}));
    serviceContext = serviceContext ?? serviceContextFromDefaults({});
@@ -131,7 +132,7 @@ export class ListIndex extends BaseIndex<IndexList> {
    }

    const nodes = serviceContext.nodeParser.getNodesFromDocuments(documents);
-    const index = await ListIndex.init({
+    const index = await SummaryIndex.init({
      nodes,
      storageContext,
      serviceContext,
@@ -139,14 +140,14 @@ export class ListIndex extends BaseIndex<IndexList> {
    return index;
  }

-  asRetriever(options?: { mode: ListRetrieverMode }): BaseRetriever {
-    const { mode = ListRetrieverMode.DEFAULT } = options ?? {};
+  asRetriever(options?: { mode: SummaryRetrieverMode }): BaseRetriever {
+    const { mode = SummaryRetrieverMode.DEFAULT } = options ?? {};

    switch (mode) {
-      case ListRetrieverMode.DEFAULT:
-        return new ListIndexRetriever(this);
-      case ListRetrieverMode.LLM:
-        return new ListIndexLLMRetriever(this);
+      case SummaryRetrieverMode.DEFAULT:
+        return new SummaryIndexRetriever(this);
+      case SummaryRetrieverMode.LLM:
+        return new SummaryIndexLLMRetriever(this);
      default:
        throw new Error(`Unknown retriever mode: ${mode}`);
    }
@@ -155,6 +156,8 @@ export class ListIndex extends BaseIndex<IndexList> {
  asQueryEngine(options?: {
    retriever?: BaseRetriever;
    responseSynthesizer?: ResponseSynthesizer;
+    preFilters?: unknown;
+    nodePostprocessors?: BaseNodePostprocessor[];
  }): BaseQueryEngine {
    let { retriever, responseSynthesizer } = options ?? {};

@@ -170,7 +173,12 @@ export class ListIndex extends BaseIndex<IndexList> {
      });
    }

-    return new RetrieverQueryEngine(retriever, responseSynthesizer);
+    return new RetrieverQueryEngine(
+      retriever,
+      responseSynthesizer,
+      options?.preFilters,
+      options?.nodePostprocessors,
+    );
  }

  static async buildIndexFromNodes(
@@ -253,4 +261,5 @@ export class ListIndex extends BaseIndex<IndexList> {
 }

 // Legacy
-export type GPTListIndex = ListIndex;
+export type ListIndex = SummaryIndex;
+export type ListRetrieverMode = SummaryRetrieverMode;
@@ -1,25 +1,25 @@
-import { BaseRetriever } from "../../Retriever";
+import _ from "lodash";
+import { globalsHelper } from "../../GlobalsHelper";
 import { NodeWithScore } from "../../Node";
-import { ListIndex } from "./ListIndex";
+import { ChoiceSelectPrompt, defaultChoiceSelectPrompt } from "../../Prompt";
+import { BaseRetriever } from "../../Retriever";
 import { ServiceContext } from "../../ServiceContext";
+import { Event } from "../../callbacks/CallbackManager";
+import { SummaryIndex } from "./SummaryIndex";
 import {
-  NodeFormatterFunction,
  ChoiceSelectParserFunction,
+  NodeFormatterFunction,
  defaultFormatNodeBatchFn,
  defaultParseChoiceSelectAnswerFn,
 } from "./utils";
-import { SimplePrompt, defaultChoiceSelectPrompt } from "../../Prompt";
-import _ from "lodash";
-import { globalsHelper } from "../../GlobalsHelper";
-import { Event } from "../../callbacks/CallbackManager";

 /**
- * Simple retriever for ListIndex that returns all nodes
+ * Simple retriever for SummaryIndex that returns all nodes
 */
-export class ListIndexRetriever implements BaseRetriever {
-  index: ListIndex;
+export class SummaryIndexRetriever implements BaseRetriever {
+  index: SummaryIndex;

-  constructor(index: ListIndex) {
+  constructor(index: SummaryIndex) {
    this.index = index;
  }

@@ -51,23 +51,23 @@ export class ListIndexRetriever implements BaseRetriever {
 }

 /**
- * LLM retriever for ListIndex.
+ * LLM retriever for SummaryIndex which lets you select the most relevant chunks.
 */
-export class ListIndexLLMRetriever implements BaseRetriever {
-  index: ListIndex;
-  choiceSelectPrompt: SimplePrompt;
+export class SummaryIndexLLMRetriever implements BaseRetriever {
+  index: SummaryIndex;
+  choiceSelectPrompt: ChoiceSelectPrompt;
  choiceBatchSize: number;
  formatNodeBatchFn: NodeFormatterFunction;
  parseChoiceSelectAnswerFn: ChoiceSelectParserFunction;
  serviceContext: ServiceContext;

  constructor(
-    index: ListIndex,
-    choiceSelectPrompt?: SimplePrompt,
+    index: SummaryIndex,
+    choiceSelectPrompt?: ChoiceSelectPrompt,
    choiceBatchSize: number = 10,
    formatNodeBatchFn?: NodeFormatterFunction,
    parseChoiceSelectAnswerFn?: ChoiceSelectParserFunction,
-    serviceContext?: ServiceContext
+    serviceContext?: ServiceContext,
  ) {
    this.index = index;
    this.choiceSelectPrompt = choiceSelectPrompt || defaultChoiceSelectPrompt;
@@ -95,7 +95,7 @@ export class ListIndexLLMRetriever implements BaseRetriever {
      // parseResult is a map from doc number to relevance score
      const parseResult = this.parseChoiceSelectAnswerFn(
        rawResponse,
-        nodesBatch.length
+        nodesBatch.length,
      );
      const choiceNodeIds = nodeIdsBatch.filter((nodeId, idx) => {
        return `${idx}` in parseResult;
@@ -128,3 +128,7 @@ export class ListIndexLLMRetriever implements BaseRetriever {
    return this.serviceContext;
  }
 }
+
+// Legacy
+export type ListIndexRetriever = SummaryIndexRetriever;
+export type ListIndexLLMRetriever = SummaryIndexLLMRetriever;
@@ -0,0 +1,10 @@
+export { SummaryIndex, SummaryRetrieverMode } from "./SummaryIndex";
+export type { ListIndex, ListRetrieverMode } from "./SummaryIndex";
+export {
+  SummaryIndexLLMRetriever,
+  SummaryIndexRetriever,
+} from "./SummaryIndexRetriever";
+export type {
+  ListIndexLLMRetriever,
+  ListIndexRetriever,
+} from "./SummaryIndexRetriever";
@@ -1,9 +1,9 @@
-import { BaseNode, MetadataMode } from "../../Node";
 import _ from "lodash";
+import { BaseNode, MetadataMode } from "../../Node";

 export type NodeFormatterFunction = (summaryNodes: BaseNode[]) => string;
 export const defaultFormatNodeBatchFn: NodeFormatterFunction = (
-  summaryNodes: BaseNode[]
+  summaryNodes: BaseNode[],
 ): string => {
  return summaryNodes
    .map((node, idx) => {
@@ -20,13 +20,13 @@ export type ChoiceSelectParseResult = { [docNumber: number]: number };
 export type ChoiceSelectParserFunction = (
  answer: string,
  numChoices: number,
-  raiseErr?: boolean
+  raiseErr?: boolean,
 ) => ChoiceSelectParseResult;

 export const defaultParseChoiceSelectAnswerFn: ChoiceSelectParserFunction = (
  answer: string,
  numChoices: number,
-  raiseErr: boolean = false
+  raiseErr: boolean = false,
 ): ChoiceSelectParseResult => {
  // split the line into the answer number and relevance score portions
  const lineTokens: string[][] = answer
@@ -36,7 +36,7 @@ export const defaultParseChoiceSelectAnswerFn: ChoiceSelectParserFunction = (
      if (lineTokens.length !== 2) {
        if (raiseErr) {
          throw new Error(
-            `Invalid answer line: ${line}. Answer line must be of the form: answer_num: <int>, answer_relevance: <float>`
+            `Invalid answer line: ${line}. Answer line must be of the form: answer_num: <int>, answer_relevance: <float>`,
          );
        } else {
          return null;
@@ -55,7 +55,7 @@ export const defaultParseChoiceSelectAnswerFn: ChoiceSelectParserFunction = (
        if (docNum < 1 || docNum > numChoices) {
          if (raiseErr) {
            throw new Error(
-              `Invalid answer number: ${docNum}. Answer number must be between 1 and ${numChoices}`
+              `Invalid answer number: ${docNum}. Answer number must be between 1 and ${numChoices}`,
            );
          }
        } else {
@@ -68,6 +68,6 @@ export const defaultParseChoiceSelectAnswerFn: ChoiceSelectParserFunction = (
      }
      return parseResult;
    },
-    {}
+    {},
  );
 };
@@ -1,14 +1,14 @@
-import { VectorStoreIndex } from "./VectorStoreIndex";
-import { globalsHelper } from "../../GlobalsHelper";
-import { NodeWithScore } from "../../Node";
-import { ServiceContext } from "../../ServiceContext";
 import { Event } from "../../callbacks/CallbackManager";
 import { DEFAULT_SIMILARITY_TOP_K } from "../../constants";
+import { globalsHelper } from "../../GlobalsHelper";
+import { NodeWithScore } from "../../Node";
+import { BaseRetriever } from "../../Retriever";
+import { ServiceContext } from "../../ServiceContext";
 import {
  VectorStoreQuery,
  VectorStoreQueryMode,
 } from "../../storage/vectorStore/types";
-import { BaseRetriever } from "../../Retriever";
+import { VectorStoreIndex } from "./VectorStoreIndex";

 /**
 * VectorIndexRetriever retrieves nodes from a VectorIndex.
@@ -32,7 +32,11 @@ export class VectorIndexRetriever implements BaseRetriever {
    this.similarityTopK = similarityTopK ?? DEFAULT_SIMILARITY_TOP_K;
  }

-  async retrieve(query: string, parentEvent?: Event): Promise<NodeWithScore[]> {
+  async retrieve(
+    query: string,
+    parentEvent?: Event,
+    preFilters?: unknown,
+  ): Promise<NodeWithScore[]> {
    const queryEmbedding =
      await this.serviceContext.embedModel.getQueryEmbedding(query);

@@ -41,10 +45,15 @@ export class VectorIndexRetriever implements BaseRetriever {
      mode: VectorStoreQueryMode.DEFAULT,
      similarityTopK: this.similarityTopK,
    };
-    const result = await this.index.vectorStore.query(q);
+    const result = await this.index.vectorStore.query(q, preFilters);

    let nodesWithScores: NodeWithScore[] = [];
    for (let i = 0; i < result.ids.length; i++) {
+      const nodeFromResult = result.nodes?.[i];
+      if (!this.index.indexStruct.nodesDict[result.ids[i]] && nodeFromResult) {
+        this.index.indexStruct.nodesDict[result.ids[i]] = nodeFromResult;
+      }
+
      const node = this.index.indexStruct.nodesDict[result.ids[i]];
      nodesWithScores.push({
        node: node,
--- a/Show More
+++ b/Show More