llamaindex 0.0.37

changeset
Merge pull request #228 from run-llama/ms/create-llama-fixes
2026-07-02 20:13:52 -04:00 · 2023-11-23 10:54:44 -08:00 · 2023-11-23 10:53:30 -08:00 · 2023-11-23 10:50:13 -08:00 · 2023-11-23 20:58:43 +07:00 · 2023-11-23 18:23:24 +07:00
77 changed files with 59886 additions and 618 deletions
@@ -1,7 +1,6 @@
 name: Bugfix
-title: "Sweep: "
+title: ""
 description: Write something like "We notice ... behavior when ... happens instead of ...""
-labels: sweep
 body:
  - type: textarea
    id: description
@@ -1,11 +1,10 @@
 name: Feature Request
-title: "Sweep: "
-description: Write something like "Write an api endpoint that does "..." in the "..." file"
-labels: sweep
+title: ""
+description: Write something like "Write an api endpoint that does "..." in the "..." file". If you would like to use sweep.dev prefix with "Sweep:"
 body:
  - type: textarea
    id: description
    attributes:
      label: Details
-      description: More details for Sweep
+      description: More details
      placeholder: The new endpoint should use the ... class from ... file because it contains ... logic
@@ -1,11 +1,10 @@
 name: Refactor
-title: "Sweep: "
-description: Write something like "Modify the ... api endpoint to use ... version and ... framework"
-labels: sweep
+title: ""
+description: Write something like "Modify the ... api endpoint to use ... version and ... framework" If you would like to use sweep.dev prefix with "Sweep:"
 body:
  - type: textarea
    id: description
    attributes:
      label: Details
-      description: More details for Sweep
+      description: More details
      placeholder: We are migrating this function to ... version because ...
@@ -4,5 +4,6 @@
  "editor.defaultFormatter": "esbenp.prettier-vscode",
  "[xml]": {
    "editor.defaultFormatter": "redhat.vscode-xml"
-  }
-}
+  },
+  "jest.rootPath": "./packages/core"
+}
@@ -0,0 +1,34 @@
+/* eslint-disable turbo/no-undeclared-env-vars */
+import * as dotenv from "dotenv";
+import * as fs from "fs";
+import { MongoClient } from "mongodb";
+
+// Load environment variables from local .env file
+dotenv.config();
+
+const jsonFile = "tinytweets.json";
+const mongoUri = process.env.MONGODB_URI!;
+const databaseName = process.env.MONGODB_DATABASE!;
+const collectionName = process.env.MONGODB_COLLECTION!;
+
+async function importJsonToMongo() {
+  // Load the tweets from a local file
+  const tweets = JSON.parse(fs.readFileSync(jsonFile, "utf-8"));
+
+  // Create a new client and connect to the server
+  const client = new MongoClient(mongoUri);
+
+  const db = client.db(databaseName);
+  const collection = db.collection(collectionName);
+
+  // Insert the tweets into mongo
+  await collection.insertMany(tweets);
+
+  console.log(
+    `Data imported successfully to the MongoDB collection ${collectionName}.`,
+  );
+  await client.close();
+}
+
+// Run the import function
+importJsonToMongo();
@@ -0,0 +1,50 @@
+/* eslint-disable turbo/no-undeclared-env-vars */
+import * as dotenv from "dotenv";
+import {
+  MongoDBAtlasVectorSearch,
+  SimpleMongoReader,
+  storageContextFromDefaults,
+  VectorStoreIndex,
+} from "llamaindex";
+import { MongoClient } from "mongodb";
+
+// Load environment variables from local .env file
+dotenv.config();
+
+const mongoUri = process.env.MONGODB_URI!;
+const databaseName = process.env.MONGODB_DATABASE!;
+const collectionName = process.env.MONGODB_COLLECTION!;
+const vectorCollectionName = process.env.MONGODB_VECTORS!;
+const indexName = process.env.MONGODB_VECTOR_INDEX!;
+
+async function loadAndIndex() {
+  // Create a new client and connect to the server
+  const client = new MongoClient(mongoUri);
+  // load objects from mongo and convert them into LlamaIndex Document objects
+  // llamaindex has a special class that does this for you
+  // it pulls every object in a given collection
+  const reader = new SimpleMongoReader(client);
+  const documents = await reader.loadData(databaseName, collectionName, [
+    "full_text",
+  ]);
+
+  // create Atlas as a vector store
+  const vectorStore = new MongoDBAtlasVectorSearch({
+    mongodbClient: client,
+    dbName: databaseName,
+    collectionName: vectorCollectionName, // this is where your embeddings will be stored
+    indexName: indexName, // this is the name of the index you will need to create
+  });
+
+  // now create an index from all the Documents and store them in Atlas
+  const storageContext = await storageContextFromDefaults({ vectorStore });
+  await VectorStoreIndex.fromDocuments(documents, { storageContext });
+  console.log(
+    `Successfully created embeddings in the MongoDB collection ${vectorCollectionName}.`,
+  );
+  await client.close();
+}
+
+loadAndIndex();
+
+// you can't query your index yet because you need to create a vector search index in mongodb's UI now
@@ -0,0 +1,34 @@
+/* eslint-disable turbo/no-undeclared-env-vars */
+import * as dotenv from "dotenv";
+import {
+  MongoDBAtlasVectorSearch,
+  serviceContextFromDefaults,
+  VectorStoreIndex,
+} from "llamaindex";
+import { MongoClient } from "mongodb";
+
+// Load environment variables from local .env file
+dotenv.config();
+
+async function query() {
+  const client = new MongoClient(process.env.MONGODB_URI!);
+  const serviceContext = serviceContextFromDefaults();
+  const store = new MongoDBAtlasVectorSearch({
+    mongodbClient: client,
+    dbName: process.env.MONGODB_DATABASE!,
+    collectionName: process.env.MONGODB_VECTORS!,
+    indexName: process.env.MONGODB_VECTOR_INDEX!,
+  });
+
+  const index = await VectorStoreIndex.fromVectorStore(store, serviceContext);
+
+  const retriever = index.asRetriever({ similarityTopK: 20 });
+  const queryEngine = index.asQueryEngine({ retriever });
+  const result = await queryEngine.query(
+    "What does the author think of web frameworks?",
+  );
+  console.log(result.response);
+  await client.close();
+}
+
+query();
@@ -0,0 +1,20 @@
+# mongodb-llamaindexts
+
+## 0.0.3
+
+### Patch Changes
+
+- Updated dependencies [3bab231]
+  - llamaindex@0.0.37
+
+## 0.0.2
+
+### Patch Changes
+
+- Updated dependencies
+- Updated dependencies
+- Updated dependencies
+- Updated dependencies
+- Updated dependencies
+- Updated dependencies
+  - llamaindex@0.0.36
@@ -0,0 +1,127 @@
+# LlamaIndexTS retrieval augmented generation with MongoDB
+
+### Prepare Environment
+
+Make sure to run `pnpm install` and set your OpenAI environment variable before running these examples.
+
+```
+pnpm install
+export OPENAI_API_KEY="sk-..."
+```
+
+### Sign up for MongoDB Atlas
+
+We'll be using MongoDB's hosted database service, [MongoDB Atlas](https://www.mongodb.com/cloud/atlas/register). You can sign up for free and get a small hosted cluster for free:
+
+![MongoDB Atlas signup](./docs/1_signup.png)
+
+The signup process will walk you through the process of creating your cluster and ensuring it's configured for you to access. Once the cluster is created, choose "Connect" and then "Connect to your application". Choose Python, and you'll be presented with a connection string that looks like this:
+
+![MongoDB Atlas connection string](./docs/2_connection_string.png)
+
+### Set up environment variables
+
+Copy the connection string (make sure you include your password) and put it into a file called `.env` in the root of this repo. It should look like this:
+
+```
+MONGODB_URI=mongodb+srv://seldo:xxxxxxxxxxx@llamaindexdemocluster.xfrdhpz.mongodb.net/?retryWrites=true&w=majority
+```
+
+You will also need to choose a name for your database, and the collection where we will store the tweets, and also include them in .env. They can be any string, but this is what we used:
+
+```
+MONGODB_DATABASE=tiny_tweets_db
+MONGODB_COLLECTION=tiny_tweets_collection
+```
+
+### Import tweets into MongoDB
+
+You are now ready to import our ready-made data set into Mongo. This is the file `tinytweets.json`, a selection of approximately 1000 tweets from @seldo on Twitter in mid-2019. With your environment set up you can do this by running
+
+```
+pnpm ts-node 1_import.ts
+```
+
+If you don't want to use tweets, you can replace `json_file` with any other array of JSON objects, but you will need to modify some code later to make sure the correct field gets indexed. There is no LlamaIndex-specific code here; you can load your data into Mongo any way you want to.
+
+### Load and index your data
+
+Now we're ready to index our data. To do this, LlamaIndex will pull your text out of Mongo, split it into chunks, and then send those chunks to OpenAI to be turned into [vector embeddings](https://docs.llamaindex.ai/en/stable/understanding/indexing/indexing.html#what-is-an-embedding). The embeddings will then be stored in a new collection in Mongo. This will take a while depending how much text you have, but the good news is that once it's done you will be able to query quickly without needing to re-index.
+
+We'll be using OpenAI to do the embedding, so now is when you need to [generate an OpenAI API key](https://platform.openai.com/account/api-keys) if you haven't already and add it to your `.env` file like this:
+
+```
+OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+```
+
+You'll also need to pick a name for the new collection where the embeddings will be stored, and add it to `.env`, along with the name of a vector search index (we'll be creating this in the next step, after you've indexed your data):
+
+```
+MONGODB_VECTORS=tiny_tweets_vectors
+MONGODB_VECTOR_INDEX=tiny_tweets_vector_index
+```
+
+If the data you're indexing is the tweets we gave you, you're ready to go:
+
+```bash
+pnpm ts-node 2_load_and_index.ts
+```
+
+> Note: this script is running a couple of minutes and currently doesn't show any progress.
+
+What you're doing here is creating a Reader which loads the data out of Mongo in the collection and database specified. It looks for text in a set of specific keys in each object. In this case we've given it just one key, "full_text".
+
+Now you're creating a vector search client for Mongo. In addition to a MongoDB client object, you again tell it what database everything is in. This time you give it the name of the collection where you'll store the vector embeddings, and the name of the vector search index you'll create in the next step.
+
+### Create a vector search index
+
+Now if all has gone well you should be able to log in to the Mongo Atlas UI and see two collections in your database: the original data in `tiny_tweets_collection`, and the vector embeddings in `tiny_tweets_vectors`.
+
+![MongoDB Atlas collections](./docs/3_vectors_in_db.png)
+
+Now it's time to create the vector search index so that you can query the data.
+It's not yet possible to programmatically create a vector search index using the [`createIndex`](https://www.mongodb.com/docs/manual/reference/method/db.collection.createIndex/) function, therefore we have to create one manually in the UI.
+To do so, first, click the Search tab, and then click "Create Search Index":
+
+![MongoDB Atlas create search index](./docs/4_search_tab.png)
+
+We have to use the JSON editor, as the Visual Editor does not yet support to create a vector search index:
+
+![MongoDB Atlas JSON editor](./docs/5_json_editor.png)
+
+Now under "database and collection" select `tiny_tweets_db` and within that select `tiny_tweets_vectors`. Then under "Index name" enter `tiny_tweets_vector_index` (or whatever value you put for MONGODB_VECTOR_INDEX in `.env`). Under that, you'll want to enter this JSON object:
+
+```json
+{
+  "mappings": {
+    "dynamic": true,
+    "fields": {
+      "embedding": {
+        "dimensions": 1536,
+        "similarity": "cosine",
+        "type": "knnVector"
+      }
+    }
+  }
+}
+```
+
+This tells Mongo that the `embedding` field in each document (in the `tiny_tweets_vectors` collection) is a vector of 1536 dimensions (this is the size of embeddings used by OpenAI), and that we want to use cosine similarity to compare vectors. You don't need to worry too much about these values unless you want to use a different LLM to OpenAI entirely.
+
+The UI will ask you to review and confirm your choices, then you need to wait a minute or two while it generates the index. If all goes well, you should see something like this screen:
+
+![MongoDB Atlas index created](./docs/7_index_created.png)
+
+Now you're ready to query your data!
+
+### Run a test query
+
+You can do this by running
+
+```bash
+pnpm ts-node 3_query.ts
+```
+
+This sets up a connection to Atlas just like `2_load_and_index.ts` did, then it creates a [query engine](https://docs.llamaindex.ai/en/stable/understanding/querying/querying.html#getting-started) and runs a query against it.
+
+If all is well, you should get a nuanced opinion about web frameworks.
@@ -0,0 +1,17 @@
+{
+  "version": "0.0.3",
+  "private": true,
+  "name": "mongodb-llamaindexts",
+  "dependencies": {
+    "llamaindex": "workspace:*",
+    "dotenv": "^16.3.1",
+    "mongodb": "^6.2.0"
+  },
+  "devDependencies": {
+    "@types/node": "^18.18.6",
+    "ts-node": "^10.9.1"
+  },
+  "scripts": {
+    "lint": "eslint ."
+  }
+}
@@ -1,5 +1,24 @@
 # simple

+## 0.0.35
+
+### Patch Changes
+
+- Updated dependencies [3bab231]
+  - llamaindex@0.0.37
+
+## 0.0.34
+
+### Patch Changes
+
+- Updated dependencies
+- Updated dependencies
+- Updated dependencies
+- Updated dependencies
+- Updated dependencies
+- Updated dependencies
+  - llamaindex@0.0.36
+
 ## 0.0.33

 ### Patch Changes
@@ -4,8 +4,6 @@ import { Anthropic } from "llamaindex";
  const anthropic = new Anthropic();
  const result = await anthropic.chat([
    { content: "You want to talk in rhymes.", role: "system" },
-    { content: "Hello, world!", role: "user" },
-    { content: "Hello!", role: "assistant" },
    {
      content:
        "How much wood would a woodchuck chuck if a woodchuck could chuck wood?",
@@ -1,47 +0,0 @@
-import { ChatMessage, SimpleChatEngine } from "llamaindex";
-import { stdin as input, stdout as output } from "node:process";
-import readline from "node:readline/promises";
-import { Anthropic } from "../../packages/core/src/llm/LLM";
-
-async function main() {
-  const query: string = `
-Where is Istanbul?
-  `;
-
-  // const llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 });
-  const llm = new Anthropic();
-  const message: ChatMessage = { content: query, role: "user" };
-
-  //TODO: Add callbacks later
-
-  //Stream Complete
-  //Note: Setting streaming flag to true or false will auto-set your return type to
-  //either an AsyncGenerator or a Response.
-  // Omitting the streaming flag automatically sets streaming to false
-
-  const chatEngine: SimpleChatEngine = new SimpleChatEngine({
-    chatHistory: undefined,
-    llm: llm,
-  });
-
-  const rl = readline.createInterface({ input, output });
-  while (true) {
-    const query = await rl.question("Query: ");
-
-    if (!query) {
-      break;
-    }
-
-    //Case 1: .chat(query, undefined, true) => Stream
-    //Case 2: .chat(query, undefined, false) => Response object
-    //Case 3: .chat(query, undefined) => Response object
-    const chatStream = await chatEngine.chat(query, undefined, true);
-    var accumulated_result = "";
-    for await (const part of chatStream) {
-      accumulated_result += part;
-      process.stdout.write(part);
-    }
-  }
-}
-
-main();
@@ -1,6 +1,6 @@
 import { MongoClient } from "mongodb";
-import { Document } from "../../packages/core/src/Node";
 import { VectorStoreIndex } from "../../packages/core/src/indices";
+import { Document } from "../../packages/core/src/Node";
 import { SimpleMongoReader } from "../../packages/core/src/readers/SimpleMongoReader";

 import { stdin as input, stdout as output } from "node:process";
@@ -1,5 +1,5 @@
 {
-  "version": "0.0.33",
+  "version": "0.0.35",
  "private": true,
  "name": "simple",
  "dependencies": {
@@ -0,0 +1,33 @@
+# Postgres Vector Store
+
+There are two scripts available here: load-docs.ts and query.ts
+
+## Prerequisites
+
+You'll need a postgres database instance against which to run these scripts. A simple docker command would look like this:
+
+> `docker run -d --rm --name vector-db -p 5432:5432 -e "POSTGRES_HOST_AUTH_METHOD=trust" ankane/pgvector`
+
+Set the PGHOST and PGUSER (and PGPASSWORD) environment variables to match your database setup.
+
+You'll also need a value for OPENAI_API_KEY in your environment.
+
+**NOTE:** Using `--rm` in the example docker command above means that the vector store will be deleted every time the container is stopped. For production purposes, use a volume to ensure persistence across restarts.
+
+## Setup and Loading Docs
+
+Read and follow the instructions in the README.md file located one directory up to make sure your JS/TS dependencies are set up. The commands listed below are also run from that parent directory.
+
+To import documents and save the embedding vectors to your database:
+
+> `npx ts-node pg-vector-store/load-docs.ts data`
+
+where data is the directory containing your input files. Using the _data_ directory in the example above will read all of the files in that directory using the llamaindexTS default readers for each file type.
+
+## RAG Querying
+
+To query using the resulting vector store:
+
+> `npx ts-node pg-vector-store/query.ts`
+
+The script will prompt for a question, then process and present the answer using the PGVectorStore data and your OpenAI API key. It will continue to prompt until you enter `q`, `quit` or `exit` as the next query.
@@ -0,0 +1,68 @@
+// load-docs.ts
+import fs from "fs/promises";
+import {
+  SimpleDirectoryReader,
+  storageContextFromDefaults,
+  VectorStoreIndex,
+} from "llamaindex";
+import { PGVectorStore } from "../../../packages/core/src/storage/vectorStore/PGVectorStore";
+
+async function getSourceFilenames(sourceDir: string) {
+  return await fs
+    .readdir(sourceDir)
+    .then((fileNames) => fileNames.map((file) => sourceDir + "/" + file));
+}
+
+function callback(
+  category: string,
+  name: string,
+  status: any,
+  message: string = "",
+): boolean {
+  console.log(category, name, status, message);
+  return true;
+}
+
+async function main(args: any) {
+  const sourceDir: string = args.length > 2 ? args[2] : "../data";
+
+  console.log(`Finding documents in ${sourceDir}`);
+  const fileList = await getSourceFilenames(sourceDir);
+  const count = fileList.length;
+  console.log(`Found ${count} files`);
+
+  console.log(`Importing contents from ${count} files in ${sourceDir}`);
+  var fileName = "";
+  try {
+    // Passing callback fn to the ctor here
+    // will enable looging to console.
+    // See callback fn, defined above.
+    const rdr = new SimpleDirectoryReader(callback);
+    const docs = await rdr.loadData({ directoryPath: sourceDir });
+
+    const pgvs = new PGVectorStore();
+    pgvs.setCollection(sourceDir);
+    pgvs.clearCollection();
+
+    const ctx = await storageContextFromDefaults({ vectorStore: pgvs });
+
+    console.debug("  - creating vector store");
+    const index = await VectorStoreIndex.fromDocuments(docs, {
+      storageContext: ctx,
+    });
+    console.debug("  - done.");
+  } catch (err) {
+    console.error(fileName, err);
+    console.log(
+      "If your PGVectorStore init failed, make sure to set env vars for PGUSER or USER, PGHOST, PGPORT and PGPASSWORD as needed.",
+    );
+    process.exit(1);
+  }
+
+  console.log(
+    "Done. Try running query.ts to ask questions against the imported embeddings.",
+  );
+  process.exit(0);
+}
+
+main(process.argv).catch((err) => console.error(err));
@@ -0,0 +1,67 @@
+import { VectorStoreIndex } from "../../../packages/core/src/indices/vectorStore/VectorStoreIndex";
+import { serviceContextFromDefaults } from "../../../packages/core/src/ServiceContext";
+import { PGVectorStore } from "../../../packages/core/src/storage/vectorStore/PGVectorStore";
+
+async function main() {
+  const readline = require("readline").createInterface({
+    input: process.stdin,
+    output: process.stdout,
+  });
+
+  try {
+    const pgvs = new PGVectorStore();
+    // Optional - set your collection name, default is no filter on this field.
+    // pgvs.setCollection();
+
+    const ctx = serviceContextFromDefaults();
+    const index = await VectorStoreIndex.fromVectorStore(pgvs, ctx);
+
+    // Query the index
+    const queryEngine = await index.asQueryEngine();
+
+    let question = "";
+    while (!isQuit(question)) {
+      question = await getUserInput(readline);
+
+      if (isQuit(question)) {
+        readline.close();
+        process.exit(0);
+      }
+
+      try {
+        const answer = await queryEngine.query(question);
+        console.log(answer.response);
+      } catch (error) {
+        console.error("Error:", error);
+      }
+    }
+  } catch (err) {
+    console.error(err);
+    console.log(
+      "If your PGVectorStore init failed, make sure to set env vars for PGUSER or USER, PGHOST, PGPORT and PGPASSWORD as needed.",
+    );
+    process.exit(1);
+  }
+}
+
+function isQuit(question: string) {
+  return ["q", "quit", "exit"].includes(question.trim().toLowerCase());
+}
+
+// Function to get user input as a promise
+function getUserInput(readline: any): Promise<string> {
+  return new Promise((resolve) => {
+    readline.question(
+      "What would you like to know?\n>",
+      (userInput: string) => {
+        resolve(userInput);
+      },
+    );
+  });
+}
+
+main()
+  .catch(console.error)
+  .finally(() => {
+    process.exit(1);
+  });
@@ -2,7 +2,10 @@ import fs from "node:fs/promises";

 import {
  Anthropic,
+  anthropicTextQaPrompt,
+  CompactAndRefine,
  Document,
+  ResponseSynthesizer,
  serviceContextFromDefaults,
  VectorStoreIndex,
 } from "llamaindex";
@@ -18,12 +21,20 @@ async function main() {

  // Split text and create embeddings. Store them in a VectorStoreIndex
  const serviceContext = serviceContextFromDefaults({ llm: new Anthropic() });
+
+  const responseSynthesizer = new ResponseSynthesizer({
+    responseBuilder: new CompactAndRefine(
+      serviceContext,
+      anthropicTextQaPrompt,
+    ),
+  });
+
  const index = await VectorStoreIndex.fromDocuments([document], {
    serviceContext,
  });

  // Query the index
-  const queryEngine = index.asQueryEngine();
+  const queryEngine = index.asQueryEngine({ responseSynthesizer });
  const response = await queryEngine.query(
    "What did the author do in college?",
  );
@@ -4,8 +4,6 @@ import { Anthropic } from "llamaindex";
  const anthropic = new Anthropic();
  const result = await anthropic.chat([
    { content: "You want to talk in rhymes.", role: "system" },
-    { content: "Hello, world!", role: "user" },
-    { content: "Hello!", role: "assistant" },
    {
      content:
        "How much wood would a woodchuck chuck if a woodchuck could chuck wood?",
@@ -0,0 +1,33 @@
+import { ClipEmbedding, similarity, SimilarityType } from "llamaindex";
+
+async function main() {
+  const clip = new ClipEmbedding();
+
+  // Get text embeddings
+  const text1 = "a car";
+  const textEmbedding1 = await clip.getTextEmbedding(text1);
+  const text2 = "a football match";
+  const textEmbedding2 = await clip.getTextEmbedding(text2);
+
+  // Get image embedding
+  const image =
+    "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/football-match.jpg";
+  const imageEmbedding = await clip.getImageEmbedding(image);
+
+  // Calc similarity
+  const sim1 = similarity(
+    textEmbedding1,
+    imageEmbedding,
+    SimilarityType.DEFAULT,
+  );
+  const sim2 = similarity(
+    textEmbedding2,
+    imageEmbedding,
+    SimilarityType.DEFAULT,
+  );
+
+  console.log(`Similarity between "${text1}" and the image is ${sim1}`);
+  console.log(`Similarity between "${text2}" and the image is ${sim2}`);
+}
+
+main();
@@ -1,47 +0,0 @@
-import { ChatMessage, SimpleChatEngine } from "llamaindex";
-import { stdin as input, stdout as output } from "node:process";
-import readline from "node:readline/promises";
-import { Anthropic } from "../../packages/core/src/llm/LLM";
-
-async function main() {
-  const query: string = `
-Where is Istanbul?
-  `;
-
-  // const llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0.1 });
-  const llm = new Anthropic();
-  const message: ChatMessage = { content: query, role: "user" };
-
-  //TODO: Add callbacks later
-
-  //Stream Complete
-  //Note: Setting streaming flag to true or false will auto-set your return type to
-  //either an AsyncGenerator or a Response.
-  // Omitting the streaming flag automatically sets streaming to false
-
-  const chatEngine: SimpleChatEngine = new SimpleChatEngine({
-    chatHistory: undefined,
-    llm: llm,
-  });
-
-  const rl = readline.createInterface({ input, output });
-  while (true) {
-    const query = await rl.question("Query: ");
-
-    if (!query) {
-      break;
-    }
-
-    //Case 1: .chat(query, undefined, true) => Stream
-    //Case 2: .chat(query, undefined, false) => Response object
-    //Case 3: .chat(query, undefined) => Response object
-    const chatStream = await chatEngine.chat(query, undefined, true);
-    var accumulated_result = "";
-    for await (const part of chatStream) {
-      accumulated_result += part;
-      process.stdout.write(part);
-    }
-  }
-}
-
-main();
@@ -2,7 +2,10 @@ import fs from "node:fs/promises";

 import {
  Anthropic,
+  anthropicTextQaPrompt,
+  CompactAndRefine,
  Document,
+  ResponseSynthesizer,
  serviceContextFromDefaults,
  VectorStoreIndex,
 } from "llamaindex";
@@ -18,12 +21,20 @@ async function main() {

  // Split text and create embeddings. Store them in a VectorStoreIndex
  const serviceContext = serviceContextFromDefaults({ llm: new Anthropic() });
+
+  const responseSynthesizer = new ResponseSynthesizer({
+    responseBuilder: new CompactAndRefine(
+      serviceContext,
+      anthropicTextQaPrompt,
+    ),
+  });
+
  const index = await VectorStoreIndex.fromDocuments([document], {
    serviceContext,
  });

  // Query the index
-  const queryEngine = index.asQueryEngine();
+  const queryEngine = index.asQueryEngine({ responseSynthesizer });
  const response = await queryEngine.query(
    "What did the author do in college?",
  );
@@ -13,8 +13,8 @@
  "devDependencies": {
    "@changesets/cli": "^2.26.2",
    "@turbo/gen": "^1.10.16",
-    "@types/jest": "^29.5.8",
-    "eslint": "^8.53.0",
+    "@types/jest": "^29.5.10",
+    "eslint": "^8.54.0",
    "eslint-config-custom": "workspace:*",
    "husky": "^8.0.3",
    "jest": "^29.7.0",
@@ -1,5 +1,22 @@
 # llamaindex

+## 0.0.37
+
+### Patch Changes
+
+- 3bab231: Fixed errors (#225 and #226) Thanks @marcusschiesser
+
+## 0.0.36
+
+### Patch Changes
+
+- Support for Claude 2.1
+- Add AssemblyAI integration (thanks @Swimburger)
+- Use cryptoJS (thanks @marcusschiesser)
+- Add PGVectorStore (thanks @mtutty)
+- Add CLIP embeddings (thanks @marcusschiesser)
+- Add MongoDB support (thanks @marcusschiesser)
+
 ## 0.0.35

 ### Patch Changes
@@ -1,19 +1,23 @@
 {
  "name": "llamaindex",
-  "version": "0.0.35",
+  "version": "0.0.37",
  "license": "MIT",
  "dependencies": {
-    "@anthropic-ai/sdk": "^0.9.0",
+    "@anthropic-ai/sdk": "^0.9.1",
    "@notionhq/client": "^2.2.13",
-    "js-tiktoken": "^1.0.7",
+    "@xenova/transformers": "^2.8.0",
+    "crypto-js": "^4.2.0",
+    "js-tiktoken": "^1.0.8",
    "lodash": "^4.17.21",
    "mammoth": "^1.6.0",
    "md-utils-ts": "^2.0.0",
-    "mongodb": "^6.2.0",
+    "mongodb": "^6.3.0",
    "notion-md-crawler": "^0.0.2",
-    "openai": "^4.16.1",
+    "openai": "^4.19.1",
    "papaparse": "^5.4.1",
    "pdf-parse": "^1.1.1",
+    "pg": "^8.11.3",
+    "pgvector": "^0.1.5",
    "portkey-ai": "^0.1.16",
    "rake-modified": "^1.0.8",
    "replicate": "^0.21.1",
@@ -22,14 +26,16 @@
    "wink-nlp": "^1.14.3"
  },
  "devDependencies": {
-    "@types/lodash": "^4.14.200",
-    "@types/node": "^18.18.8",
-    "@types/papaparse": "^5.3.10",
-    "@types/pdf-parse": "^1.1.3",
-    "@types/uuid": "^9.0.6",
+    "@types/crypto-js": "^4.2.1",
+    "@types/lodash": "^4.14.202",
+    "@types/node": "^18.18.12",
+    "@types/papaparse": "^5.3.13",
+    "@types/pdf-parse": "^1.1.4",
+    "@types/pg": "^8.10.7",
+    "@types/uuid": "^9.0.7",
    "node-stdlib-browser": "^1.2.0",
    "tsup": "^7.2.0",
-    "typescript": "^5.2.2"
+    "typescript": "^5.3.2"
  },
  "engines": {
    "node": ">=18.0.0"
@@ -44,4 +50,4 @@
    "build": "tsup src/index.ts --format esm,cjs --dts",
    "dev": "tsup src/index.ts --format esm,cjs --dts --watch"
  }
-}
+}
@@ -1,4 +1,4 @@
-import crypto from "crypto"; // TODO Node dependency
+import CryptoJS from "crypto-js";
 import { v4 as uuidv4 } from "uuid";

 export enum NodeRelationship {
@@ -175,13 +175,13 @@ export class TextNode<T extends Metadata = Metadata> extends BaseNode<T> {
   * @returns
   */
  generateHash() {
-    const hashFunction = crypto.createHash("sha256");
+    const hashFunction = CryptoJS.algo.SHA256.create();
    hashFunction.update(`type=${this.getType()}`);
    hashFunction.update(
      `startCharIdx=${this.startCharIdx} endCharIdx=${this.endCharIdx}`,
    );
    hashFunction.update(this.getContent(MetadataMode.ALL));
-    return hashFunction.digest("base64");
+    return hashFunction.finalize().toString(CryptoJS.enc.Base64);
  }

  getType(): ObjectType {
@@ -272,12 +272,13 @@ export class Document<T extends Metadata = Metadata> extends TextNode<T> {
  }
 }

-export function jsonToNode(json: any) {
-  if (!json.type) {
+export function jsonToNode(json: any, type?: ObjectType) {
+  if (!json.type && !type) {
    throw new Error("Node type not found");
  }
+  const nodeType = type || json.type;

-  switch (json.type) {
+  switch (nodeType) {
    case ObjectType.TEXT:
      return new TextNode(json);
    case ObjectType.INDEX:
@@ -285,7 +286,7 @@ export function jsonToNode(json: any) {
    case ObjectType.DOCUMENT:
      return new Document(json);
    default:
-      throw new Error(`Invalid node type: ${json.type}`);
+      throw new Error(`Invalid node type: ${nodeType}`);
  }
 }

@@ -36,6 +36,15 @@ Answer:`;

 export type TextQaPrompt = typeof defaultTextQaPrompt;

+export const anthropicTextQaPrompt = ({ context = "", query = "" }) => {
+  return `Context information:
+<context>
+${context}
+</context>
+Given the context information and not prior knowledge, answer the query.
+Query: ${query}`;
+};
+
 /*
 DEFAULT_SUMMARY_PROMPT_TMPL = (
    "Write a summary of the following. Try to use only the "
@@ -1,4 +1,6 @@
 import { v4 as uuidv4 } from "uuid";
+import { Event } from "./callbacks/CallbackManager";
+import { BaseNodePostprocessor } from "./indices/BaseNodePostprocessor";
 import { NodeWithScore, TextNode } from "./Node";
 import {
  BaseQuestionGenerator,
@@ -10,8 +12,6 @@ import { CompactAndRefine, ResponseSynthesizer } from "./ResponseSynthesizer";
 import { BaseRetriever } from "./Retriever";
 import { ServiceContext, serviceContextFromDefaults } from "./ServiceContext";
 import { QueryEngineTool, ToolMetadata } from "./Tool";
-import { Event } from "./callbacks/CallbackManager";
-import { BaseNodePostprocessor } from "./indices/BaseNodePostprocessor";

 /**
 * A query engine is a question answerer that can use one or more steps.
@@ -1,8 +1,8 @@
-import { BaseEmbedding, OpenAIEmbedding } from "./Embedding";
+import { CallbackManager } from "./callbacks/CallbackManager";
+import { BaseEmbedding, OpenAIEmbedding } from "./embeddings";
+import { LLM, OpenAI } from "./llm/LLM";
 import { NodeParser, SimpleNodeParser } from "./NodeParser";
 import { PromptHelper } from "./PromptHelper";
-import { CallbackManager } from "./callbacks/CallbackManager";
-import { LLM, OpenAI } from "./llm/LLM";

 /**
 * The ServiceContext is a collection of components that are used in different parts of the application.
@@ -0,0 +1,78 @@
+import { MultiModalEmbedding } from "./MultiModalEmbedding";
+import { ImageType, readImage } from "./utils";
+
+export enum ClipEmbeddingModelType {
+  XENOVA_CLIP_VIT_BASE_PATCH32 = "Xenova/clip-vit-base-patch32",
+  XENOVA_CLIP_VIT_BASE_PATCH16 = "Xenova/clip-vit-base-patch16",
+}
+
+export class ClipEmbedding extends MultiModalEmbedding {
+  modelType: ClipEmbeddingModelType =
+    ClipEmbeddingModelType.XENOVA_CLIP_VIT_BASE_PATCH16;
+
+  private tokenizer: any;
+  private processor: any;
+  private visionModel: any;
+  private textModel: any;
+
+  async getTokenizer() {
+    if (!this.tokenizer) {
+      const { AutoTokenizer } = await import("@xenova/transformers");
+      this.tokenizer = await AutoTokenizer.from_pretrained(this.modelType);
+    }
+    return this.tokenizer;
+  }
+
+  async getProcessor() {
+    if (!this.processor) {
+      const { AutoProcessor } = await import("@xenova/transformers");
+      this.processor = await AutoProcessor.from_pretrained(this.modelType);
+    }
+    return this.processor;
+  }
+
+  async getVisionModel() {
+    if (!this.visionModel) {
+      const { CLIPVisionModelWithProjection } = await import(
+        "@xenova/transformers"
+      );
+      this.visionModel = await CLIPVisionModelWithProjection.from_pretrained(
+        this.modelType,
+      );
+    }
+
+    return this.visionModel;
+  }
+
+  async getTextModel() {
+    if (!this.textModel) {
+      const { CLIPTextModelWithProjection } = await import(
+        "@xenova/transformers"
+      );
+      this.textModel = await CLIPTextModelWithProjection.from_pretrained(
+        this.modelType,
+      );
+    }
+
+    return this.textModel;
+  }
+
+  async getImageEmbedding(image: ImageType): Promise<number[]> {
+    const loadedImage = await readImage(image);
+    const imageInputs = await (await this.getProcessor())(loadedImage);
+    const { image_embeds } = await (await this.getVisionModel())(imageInputs);
+    return image_embeds.data;
+  }
+
+  async getTextEmbedding(text: string): Promise<number[]> {
+    const textInputs = await (
+      await this.getTokenizer()
+    )([text], { padding: true, truncation: true });
+    const { text_embeds } = await (await this.getTextModel())(textInputs);
+    return text_embeds.data;
+  }
+
+  async getQueryEmbedding(query: string): Promise<number[]> {
+    return this.getTextEmbedding(query);
+  }
+}
@@ -0,0 +1,17 @@
+import { BaseEmbedding } from "./types";
+import { ImageType } from "./utils";
+
+/*
+ * Base class for Multi Modal embeddings.
+ */
+
+export abstract class MultiModalEmbedding extends BaseEmbedding {
+  abstract getImageEmbedding(images: ImageType): Promise<number[]>;
+
+  async getImageEmbeddings(images: ImageType[]): Promise<number[][]> {
+    // Embed the input sequence of images asynchronously.
+    return Promise.all(
+      images.map((imgFilePath) => this.getImageEmbedding(imgFilePath)),
+    );
+  }
+}
@@ -0,0 +1,92 @@
+import { ClientOptions as OpenAIClientOptions } from "openai";
+import {
+  AzureOpenAIConfig,
+  getAzureBaseUrl,
+  getAzureConfigFromEnv,
+  getAzureModel,
+  shouldUseAzure,
+} from "../llm/azure";
+import { OpenAISession, getOpenAISession } from "../llm/openai";
+import { BaseEmbedding } from "./types";
+
+export enum OpenAIEmbeddingModelType {
+  TEXT_EMBED_ADA_002 = "text-embedding-ada-002",
+}
+
+export class OpenAIEmbedding extends BaseEmbedding {
+  model: OpenAIEmbeddingModelType;
+
+  // OpenAI session params
+  apiKey?: string = undefined;
+  maxRetries: number;
+  timeout?: number;
+  additionalSessionOptions?: Omit<
+    Partial<OpenAIClientOptions>,
+    "apiKey" | "maxRetries" | "timeout"
+  >;
+
+  session: OpenAISession;
+
+  constructor(init?: Partial<OpenAIEmbedding> & { azure?: AzureOpenAIConfig }) {
+    super();
+
+    this.model = OpenAIEmbeddingModelType.TEXT_EMBED_ADA_002;
+
+    this.maxRetries = init?.maxRetries ?? 10;
+    this.timeout = init?.timeout ?? 60 * 1000; // Default is 60 seconds
+    this.additionalSessionOptions = init?.additionalSessionOptions;
+
+    if (init?.azure || shouldUseAzure()) {
+      const azureConfig = getAzureConfigFromEnv({
+        ...init?.azure,
+        model: getAzureModel(this.model),
+      });
+
+      if (!azureConfig.apiKey) {
+        throw new Error(
+          "Azure API key is required for OpenAI Azure models. Please set the AZURE_OPENAI_KEY environment variable.",
+        );
+      }
+
+      this.apiKey = azureConfig.apiKey;
+      this.session =
+        init?.session ??
+        getOpenAISession({
+          azure: true,
+          apiKey: this.apiKey,
+          baseURL: getAzureBaseUrl(azureConfig),
+          maxRetries: this.maxRetries,
+          timeout: this.timeout,
+          defaultQuery: { "api-version": azureConfig.apiVersion },
+          ...this.additionalSessionOptions,
+        });
+    } else {
+      this.apiKey = init?.apiKey ?? undefined;
+      this.session =
+        init?.session ??
+        getOpenAISession({
+          apiKey: this.apiKey,
+          maxRetries: this.maxRetries,
+          timeout: this.timeout,
+          ...this.additionalSessionOptions,
+        });
+    }
+  }
+
+  private async getOpenAIEmbedding(input: string) {
+    const { data } = await this.session.openai.embeddings.create({
+      model: this.model,
+      input,
+    });
+
+    return data[0].embedding;
+  }
+
+  async getTextEmbedding(text: string): Promise<number[]> {
+    return this.getOpenAIEmbedding(text);
+  }
+
+  async getQueryEmbedding(query: string): Promise<number[]> {
+    return this.getOpenAIEmbedding(query);
+  }
+}
@@ -0,0 +1,5 @@
+export * from "./ClipEmbedding";
+export * from "./MultiModalEmbedding";
+export * from "./OpenAIEmbedding";
+export * from "./types";
+export * from "./utils";
@@ -0,0 +1,24 @@
+import { similarity } from "./utils";
+
+/**
+ * Similarity type
+ * Default is cosine similarity. Dot product and negative Euclidean distance are also supported.
+ */
+export enum SimilarityType {
+  DEFAULT = "cosine",
+  DOT_PRODUCT = "dot_product",
+  EUCLIDEAN = "euclidean",
+}
+
+export abstract class BaseEmbedding {
+  similarity(
+    embedding1: number[],
+    embedding2: number[],
+    mode: SimilarityType = SimilarityType.DEFAULT,
+  ): number {
+    return similarity(embedding1, embedding2, mode);
+  }
+
+  abstract getTextEmbedding(text: string): Promise<number[]>;
+  abstract getQueryEmbedding(query: string): Promise<number[]>;
+}
@@ -1,33 +1,16 @@
-import { ClientOptions as OpenAIClientOptions } from "openai";
-
-import { DEFAULT_SIMILARITY_TOP_K } from "./constants";
-import {
-  AzureOpenAIConfig,
-  getAzureBaseUrl,
-  getAzureConfigFromEnv,
-  getAzureModel,
-  shouldUseAzure,
-} from "./llm/azure";
-import { OpenAISession, getOpenAISession } from "./llm/openai";
-import { VectorStoreQueryMode } from "./storage/vectorStore/types";
-
-/**
- * Similarity type
- * Default is cosine similarity. Dot product and negative Euclidean distance are also supported.
- */
-export enum SimilarityType {
-  DEFAULT = "cosine",
-  DOT_PRODUCT = "dot_product",
-  EUCLIDEAN = "euclidean",
-}
+import _ from "lodash";
+import { DEFAULT_SIMILARITY_TOP_K } from "../constants";
+import { VectorStoreQueryMode } from "../storage";
+import { SimilarityType } from "./types";

 /**
 * The similarity between two embeddings.
 * @param embedding1
 * @param embedding2
 * @param mode
- * @returns similartiy score with higher numbers meaning the two embeddings are more similar
+ * @returns similarity score with higher numbers meaning the two embeddings are more similar
 */
+
 export function similarity(
  embedding1: number[],
  embedding2: number[],
@@ -42,7 +25,6 @@ export function similarity(
  // will probably cause some avoidable loss of floating point precision
  // ml-distance is worth watching although they currently also use the naive
  // formulas
-
  function norm(x: number[]): number {
    let result = 0;
    for (let i = 0; i < x.length; i++) {
@@ -201,98 +183,14 @@ export function getTopKMMREmbeddings(

  return [resultSimilarities, resultIds];
 }
-
-export abstract class BaseEmbedding {
-  similarity(
-    embedding1: number[],
-    embedding2: number[],
-    mode: SimilarityType = SimilarityType.DEFAULT,
-  ): number {
-    return similarity(embedding1, embedding2, mode);
-  }
-
-  abstract getTextEmbedding(text: string): Promise<number[]>;
-  abstract getQueryEmbedding(query: string): Promise<number[]>;
-}
-
-enum OpenAIEmbeddingModelType {
-  TEXT_EMBED_ADA_002 = "text-embedding-ada-002",
-}
-
-export class OpenAIEmbedding extends BaseEmbedding {
-  model: OpenAIEmbeddingModelType;
-
-  // OpenAI session params
-  apiKey?: string = undefined;
-  maxRetries: number;
-  timeout?: number;
-  additionalSessionOptions?: Omit<
-    Partial<OpenAIClientOptions>,
-    "apiKey" | "maxRetries" | "timeout"
-  >;
-
-  session: OpenAISession;
-
-  constructor(init?: Partial<OpenAIEmbedding> & { azure?: AzureOpenAIConfig }) {
-    super();
-
-    this.model = OpenAIEmbeddingModelType.TEXT_EMBED_ADA_002;
-
-    this.maxRetries = init?.maxRetries ?? 10;
-    this.timeout = init?.timeout ?? 60 * 1000; // Default is 60 seconds
-    this.additionalSessionOptions = init?.additionalSessionOptions;
-
-    if (init?.azure || shouldUseAzure()) {
-      const azureConfig = getAzureConfigFromEnv({
-        ...init?.azure,
-        model: getAzureModel(this.model),
-      });
-
-      if (!azureConfig.apiKey) {
-        throw new Error(
-          "Azure API key is required for OpenAI Azure models. Please set the AZURE_OPENAI_KEY environment variable.",
-        );
-      }
-
-      this.apiKey = azureConfig.apiKey;
-      this.session =
-        init?.session ??
-        getOpenAISession({
-          azure: true,
-          apiKey: this.apiKey,
-          baseURL: getAzureBaseUrl(azureConfig),
-          maxRetries: this.maxRetries,
-          timeout: this.timeout,
-          defaultQuery: { "api-version": azureConfig.apiVersion },
-          ...this.additionalSessionOptions,
-        });
-    } else {
-      this.apiKey = init?.apiKey ?? undefined;
-      this.session =
-        init?.session ??
-        getOpenAISession({
-          apiKey: this.apiKey,
-          maxRetries: this.maxRetries,
-          timeout: this.timeout,
-          ...this.additionalSessionOptions,
-        });
-    }
-  }
-
-  private async getOpenAIEmbedding(input: string) {
-    const { data } = await this.session.openai.embeddings.create({
-      model: this.model,
-      input,
-    });
-
-    return data[0].embedding;
-  }
-
-  async getTextEmbedding(text: string): Promise<number[]> {
-    return this.getOpenAIEmbedding(text);
-  }
-
-  async getQueryEmbedding(query: string): Promise<number[]> {
-    return this.getOpenAIEmbedding(query);
+export async function readImage(input: ImageType) {
+  const { RawImage } = await import("@xenova/transformers");
+  if (input instanceof Blob) {
+    return await RawImage.fromBlob(input);
+  } else if (_.isString(input) || input instanceof URL) {
+    return await RawImage.fromURL(input);
+  } else {
+    throw new Error(`Unsupported input type: ${typeof input}`);
  }
 }
+export type ImageType = string | Blob | URL;
@@ -1,6 +1,5 @@
 export * from "./ChatEngine";
 export * from "./ChatHistory";
-export * from "./Embedding";
 export * from "./GlobalsHelper";
 export * from "./Node";
 export * from "./NodeParser";
@@ -17,6 +16,7 @@ export * from "./TextSplitter";
 export * from "./Tool";
 export * from "./callbacks/CallbackManager";
 export * from "./constants";
+export * from "./embeddings";
 export * from "./indices";
 export * from "./llm/LLM";
 export * from "./readers/CSVReader";
@@ -25,5 +25,6 @@ export * from "./readers/MarkdownReader";
 export * from "./readers/NotionReader";
 export * from "./readers/PDFReader";
 export * from "./readers/SimpleDirectoryReader";
+export * from "./readers/SimpleMongoReader";
 export * from "./readers/base";
 export * from "./storage";
@@ -10,11 +10,11 @@ import {
  ServiceContext,
  serviceContextFromDefaults,
 } from "../../ServiceContext";
+import { BaseDocumentStore, RefDocInfo } from "../../storage/docStore/types";
 import {
  StorageContext,
  storageContextFromDefaults,
 } from "../../storage/StorageContext";
-import { BaseDocumentStore, RefDocInfo } from "../../storage/docStore/types";
 import {
  BaseIndex,
  BaseIndexInit,
@@ -639,7 +639,7 @@ If a question does not make any sense, or is not factually coherent, explain why

 export const ALL_AVAILABLE_ANTHROPIC_MODELS = {
  // both models have 100k context window, see https://docs.anthropic.com/claude/reference/selecting-a-model
-  "claude-2": { contextWindow: 100000 },
+  "claude-2": { contextWindow: 200000 },
  "claude-instant-1": { contextWindow: 100000 },
 };

@@ -705,10 +705,12 @@ export class Anthropic implements LLM {
        return (
          acc +
          `${
-            message.role === "assistant"
-              ? ANTHROPIC_AI_PROMPT
-              : ANTHROPIC_HUMAN_PROMPT
-          } ${message.content} `
+            message.role === "system"
+              ? ""
+              : message.role === "assistant"
+                ? ANTHROPIC_AI_PROMPT + " "
+                : ANTHROPIC_HUMAN_PROMPT + " "
+          }${message.content.trim()}`
        );
      }, "") + ANTHROPIC_AI_PROMPT
    );
@@ -729,6 +731,7 @@ export class Anthropic implements LLM {
      }
      return this.streamChat(messages, parentEvent) as R;
    }
+
    //Non-streaming
    const response = await this.session.anthropic.completions.create({
      model: this.model,
@@ -1,7 +1,7 @@
 import mammoth from "mammoth";
 import { Document } from "../Node";
-import { GenericFileSystem } from "../storage/FileSystem";
 import { DEFAULT_FS } from "../storage/constants";
+import { GenericFileSystem } from "../storage/FileSystem";
 import { BaseReader } from "./base";

 export class DocxReader implements BaseReader {
@@ -1,5 +1,5 @@
 import { MongoClient } from "mongodb";
-import { Document } from "../Node";
+import { Document, Metadata } from "../Node";
 import { BaseReader } from "./base";

 /**
@@ -13,39 +13,70 @@ export class SimpleMongoReader implements BaseReader {
  }

  /**
-   * Loads data from MongoDB collection
-   * @param {string} db_name - The name of the database to load.
-   * @param {string} collection_name - The name of the collection to load.
-   * @param {Number} [max_docs = 0] - Maximum number of documents to return. 0 means no limit.
-   * @param {Record<string, any>} [query_dict={}] - Specific query, as specified by MongoDB NodeJS documentation.
-   * @param {Record<string, any>} [query_options={}] - Specific query options, as specified by MongoDB NodeJS documentation.
-   * @param {Record<string, any>} [projection = {}] - Projection options, as specified by MongoDB NodeJS documentation.
-   * @returns {Promise<Document[]>}
+   * Flattens an array of strings or string arrays into a single-dimensional array of strings.
+   * @param texts - The array of strings or string arrays to flatten.
+   * @returns The flattened array of strings.
   */
-  async loadData(
-    db_name: string,
-    collection_name: string,
-    max_docs = 0,
-    //For later: Think about whether we want to pass generic objects in...
-    query_dict: Record<string, any> = {},
-    query_options: Record<string, any> = {},
-    projection: Record<string, any> = {},
-  ): Promise<Document[]> {
-    //Get items from collection using built-in functions
-    const cursor: Partial<Document>[] = await this.client
-      .db(db_name)
-      .collection(collection_name)
-      .find(query_dict, query_options)
-      .limit(max_docs)
-      .project(projection)
-      .toArray();
+  private flatten(texts: Array<string | string[]>): string[] {
+    return texts.reduce<string[]>(
+      (result, text) => result.concat(text instanceof Array ? text : [text]),
+      [],
+    );
+  }
+
+  /**
+   * Loads data from MongoDB collection
+   * @param {string} dbName - The name of the database to load.
+   * @param {string} collectionName - The name of the collection to load.
+   * @param {string[]} fieldNames - An array of field names to retrieve from each document. Defaults to ["text"].
+   * @param {string} separator - The separator to join multiple field values. Defaults to an empty string.
+   * @param {Record<string, any>} filterQuery - Specific query, as specified by MongoDB NodeJS documentation.
+   * @param {Number} maxDocs - The maximum number of documents to retrieve. Defaults to 0 (retrieve all documents).
+   * @param {string[]} metadataNames - An optional array of metadata field names. If specified extracts this information as metadata.
+   * @returns {Promise<Document[]>}
+   * @throws If a field specified in fieldNames or metadataNames is not found in a MongoDB document.
+   */
+  public async loadData(
+    dbName: string,
+    collectionName: string,
+    fieldNames: string[] = ["text"],
+    separator: string = "",
+    filterQuery: Record<string, any> = {},
+    maxDocs: number = 0,
+    metadataNames?: string[],
+  ): Promise<Document[]> {
+    const db = this.client.db(dbName);
+    // Get items from collection
+    const cursor = db
+      .collection(collectionName)
+      .find(filterQuery)
+      .limit(maxDocs);

-    //Aggregate results and return
    const documents: Document[] = [];
-    cursor.forEach((element: Partial<Document>) => {
-      //For later: Metadata filtering
-      documents.push(new Document({ text: JSON.stringify(element) }));
-    });
+
+    for await (const item of cursor) {
+      try {
+        const texts: Array<string | string[]> = fieldNames.map(
+          (name) => item[name],
+        );
+        const flattenedTexts = this.flatten(texts);
+        const text = flattenedTexts.join(separator);
+
+        let metadata: Metadata = {};
+        if (metadataNames) {
+          // extract metadata if fields are specified
+          metadata = Object.fromEntries(
+            metadataNames.map((name) => [name, item[name]]),
+          );
+        }
+
+        documents.push(new Document({ text, metadata }));
+      } catch (err) {
+        throw new Error(
+          `Field not found in Mongo document: ${(err as Error).message}`,
+        );
+      }
+    }
    return documents;
  }
 }
@@ -7,5 +7,6 @@ export { SimpleIndexStore } from "./indexStore/SimpleIndexStore";
 export * from "./indexStore/types";
 export { SimpleKVStore } from "./kvStore/SimpleKVStore";
 export * from "./kvStore/types";
+export { MongoDBAtlasVectorSearch } from "./vectorStore/MongoDBAtlasVectorStore";
 export { SimpleVectorStore } from "./vectorStore/SimpleVectorStore";
 export * from "./vectorStore/types";
@@ -0,0 +1,164 @@
+import { BulkWriteOptions, Collection, MongoClient } from "mongodb";
+import { BaseNode, MetadataMode } from "../../Node";
+import {
+  MetadataFilters,
+  VectorStore,
+  VectorStoreQuery,
+  VectorStoreQueryResult,
+} from "./types";
+import { metadataDictToNode, nodeToMetadata } from "./utils";
+
+// Utility function to convert metadata filters to MongoDB filter
+function toMongoDBFilter(
+  standardFilters: MetadataFilters,
+): Record<string, any> {
+  const filters: Record<string, any> = {};
+  for (const filter of standardFilters.filters) {
+    filters[filter.key] = filter.value;
+  }
+  return filters;
+}
+
+// MongoDB Atlas Vector Store class implementing VectorStore
+export class MongoDBAtlasVectorSearch implements VectorStore {
+  storesText: boolean = true;
+  flatMetadata: boolean = true;
+
+  mongodbClient: MongoClient;
+  indexName: string;
+  embeddingKey: string;
+  idKey: string;
+  textKey: string;
+  metadataKey: string;
+  insertOptions?: BulkWriteOptions;
+  private collection: Collection;
+
+  constructor(
+    init: Partial<MongoDBAtlasVectorSearch> & {
+      dbName: string;
+      collectionName: string;
+    },
+  ) {
+    if (init.mongodbClient) {
+      this.mongodbClient = init.mongodbClient;
+    } else {
+      const mongoUri = process.env.MONGODB_URI;
+      if (!mongoUri) {
+        throw new Error(
+          "Must specify MONGODB_URI via env variable if not directly passing in client.",
+        );
+      }
+      this.mongodbClient = new MongoClient(mongoUri);
+    }
+
+    this.collection = this.mongodbClient
+      .db(init.dbName ?? "default_db")
+      .collection(init.collectionName ?? "default_collection");
+    this.indexName = init.indexName ?? "default";
+    this.embeddingKey = init.embeddingKey ?? "embedding";
+    this.idKey = init.idKey ?? "id";
+    this.textKey = init.textKey ?? "text";
+    this.metadataKey = init.metadataKey ?? "metadata";
+    this.insertOptions = init.insertOptions;
+  }
+
+  async add(nodes: BaseNode[]): Promise<string[]> {
+    if (!nodes || nodes.length === 0) {
+      return [];
+    }
+    const dataToInsert = nodes.map((node) => {
+      const metadata = nodeToMetadata(
+        node,
+        true,
+        this.textKey,
+        this.flatMetadata,
+      );
+
+      return {
+        [this.idKey]: node.id_,
+        [this.embeddingKey]: node.getEmbedding(),
+        [this.textKey]: node.getContent(MetadataMode.NONE) || "",
+        [this.metadataKey]: metadata,
+      };
+    });
+
+    console.debug("Inserting data into MongoDB: ", dataToInsert);
+    const insertResult = await this.collection.insertMany(
+      dataToInsert,
+      this.insertOptions,
+    );
+    console.debug("Result of insert: ", insertResult);
+    return nodes.map((node) => node.id_);
+  }
+
+  async delete(refDocId: string, deleteOptions?: any): Promise<void> {
+    await this.collection.deleteOne(
+      {
+        [`${this.metadataKey}.ref_doc_id`]: refDocId,
+      },
+      deleteOptions,
+    );
+  }
+
+  get client(): any {
+    return this.mongodbClient;
+  }
+
+  async query(
+    query: VectorStoreQuery,
+    options?: any,
+  ): Promise<VectorStoreQueryResult> {
+    const params: any = {
+      queryVector: query.queryEmbedding,
+      path: this.embeddingKey,
+      numCandidates: query.similarityTopK * 10,
+      limit: query.similarityTopK,
+      index: this.indexName,
+    };
+
+    if (query.filters) {
+      params.filter = toMongoDBFilter(query.filters);
+    }
+
+    const queryField = { $vectorSearch: params };
+    const pipeline = [
+      queryField,
+      {
+        $project: {
+          score: { $meta: "vectorSearchScore" },
+          [this.embeddingKey]: 0,
+        },
+      },
+    ];
+
+    console.debug("Running query pipeline: ", pipeline);
+    const cursor = await this.collection.aggregate(pipeline);
+
+    const nodes: BaseNode[] = [];
+    const ids: string[] = [];
+    const similarities: number[] = [];
+
+    for await (const res of await cursor) {
+      const text = res[this.textKey];
+      const score = res.score;
+      const id = res[this.idKey];
+      const metadata = res[this.metadataKey];
+
+      const node = metadataDictToNode(metadata);
+      node.setContent(text);
+
+      ids.push(id);
+      nodes.push(node);
+      similarities.push(score);
+    }
+
+    const result = {
+      nodes,
+      similarities,
+      ids,
+    };
+
+    console.debug("Result of query (ids):", ids);
+    return result;
+  }
+}
@@ -0,0 +1,266 @@
+import pg from "pg";
+import pgvector from "pgvector/pg";
+
+import { VectorStore, VectorStoreQuery, VectorStoreQueryResult } from "./types";
+
+import { BaseNode, Document, Metadata, MetadataMode } from "../../Node";
+import { GenericFileSystem } from "../FileSystem";
+
+export const PGVECTOR_SCHEMA = "public";
+export const PGVECTOR_TABLE = "llamaindex_embedding";
+
+/**
+ * Provides support for writing and querying vector data in Postgres.
+ */
+export class PGVectorStore implements VectorStore {
+  storesText: boolean = true;
+
+  private collection: string = "";
+
+  /*
+    FROM pg LIBRARY:
+    type Config = {
+      user?: string, // default process.env.PGUSER || process.env.USER
+      password?: string or function, //default process.env.PGPASSWORD
+      host?: string, // default process.env.PGHOST
+      database?: string, // default process.env.PGDATABASE || user
+      port?: number, // default process.env.PGPORT
+      connectionString?: string, // e.g. postgres://user:password@host:5432/database
+      ssl?: any, // passed directly to node.TLSSocket, supports all tls.connect options
+      types?: any, // custom type parsers
+      statement_timeout?: number, // number of milliseconds before a statement in query will time out, default is no timeout
+      query_timeout?: number, // number of milliseconds before a query call will timeout, default is no timeout
+      application_name?: string, // The name of the application that created this Client instance
+      connectionTimeoutMillis?: number, // number of milliseconds to wait for connection, default is no timeout
+      idle_in_transaction_session_timeout?: number // number of milliseconds before terminating any session with an open idle transaction, default is no timeout
+    }  
+  */
+  db?: pg.Client;
+
+  constructor() {}
+
+  /**
+   * Setter for the collection property.
+   * Using a collection allows for simple segregation of vector data,
+   * e.g. by user, source, or access-level.
+   * Leave/set blank to ignore the collection value when querying.
+   * @param coll Name for the collection.
+   */
+  setCollection(coll: string) {
+    this.collection = coll;
+  }
+
+  /**
+   * Getter for the collection property.
+   * Using a collection allows for simple segregation of vector data,
+   * e.g. by user, source, or access-level.
+   * Leave/set blank to ignore the collection value when querying.
+   * @returns The currently-set collection value.  Default is empty string.
+   */
+  getCollection(): string {
+    return this.collection;
+  }
+
+  private async getDb(): Promise<pg.Client> {
+    if (!this.db) {
+      try {
+        // Create DB connection
+        // Read connection params from env - see comment block above
+        const db = new pg.Client();
+        await db.connect();
+
+        // Check vector extension
+        db.query("CREATE EXTENSION IF NOT EXISTS vector");
+        await pgvector.registerType(db);
+
+        // Check schema, table(s), index(es)
+        await this.checkSchema(db);
+
+        // All good?  Keep the connection reference
+        this.db = db;
+      } catch (err: any) {
+        console.error(err);
+        return Promise.reject(err);
+      }
+    }
+
+    return Promise.resolve(this.db);
+  }
+
+  private async checkSchema(db: pg.Client) {
+    await db.query(`CREATE SCHEMA IF NOT EXISTS ${PGVECTOR_SCHEMA}`);
+
+    const tbl = `CREATE TABLE IF NOT EXISTS ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE}(
+      id uuid DEFAULT gen_random_uuid() PRIMARY KEY,
+      external_id VARCHAR,
+      collection VARCHAR,
+      document TEXT,
+      metadata JSONB DEFAULT '{}',
+      embeddings VECTOR(1536)
+    )`;
+    await db.query(tbl);
+
+    const idxs = `CREATE INDEX IF NOT EXISTS idx_${PGVECTOR_TABLE}_external_id ON ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE} (external_id);
+      CREATE INDEX IF NOT EXISTS idx_${PGVECTOR_TABLE}_collection ON ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE} (collection);`;
+    await db.query(idxs);
+
+    // TODO add IVFFlat or HNSW indexing?
+    return db;
+  }
+
+  // isEmbeddingQuery?: boolean | undefined;
+
+  /**
+   * Connects to the database specified in environment vars.
+   * This method also checks and creates the vector extension,
+   * the destination table and indexes if not found.
+   * @returns A connection to the database, or the error encountered while connecting/setting up.
+   */
+  client() {
+    return this.getDb();
+  }
+
+  /**
+   * Delete all vector records for the specified collection.
+   * NOTE: Uses the collection property controlled by setCollection/getCollection.
+   * @returns The result of the delete query.
+   */
+  async clearCollection() {
+    const sql: string = `DELETE FROM ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE} 
+      WHERE collection = $1`;
+
+    const db = (await this.getDb()) as pg.Client;
+    const ret = await db.query(sql, [this.collection]);
+
+    return ret;
+  }
+
+  /**
+   * Adds vector record(s) to the table.
+   * NOTE: Uses the collection property controlled by setCollection/getCollection.
+   * @param embeddingResults The Nodes to be inserted, optionally including metadata tuples.
+   * @returns A list of zero or more id values for the created records.
+   */
+  async add(embeddingResults: BaseNode<Metadata>[]): Promise<string[]> {
+    const sql: string = `INSERT INTO ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE} 
+      (id, external_id, collection, document, metadata, embeddings) 
+      VALUES ($1, $2, $3, $4, $5, $6)`;
+
+    const db = (await this.getDb()) as pg.Client;
+
+    let ret: string[] = [];
+    for (let index = 0; index < embeddingResults.length; index++) {
+      const row = embeddingResults[index];
+
+      let id: any = row.id_.length ? row.id_ : null;
+      let meta = row.metadata || {};
+      meta.create_date = new Date();
+
+      const params = [
+        id,
+        "",
+        this.collection,
+        row.getContent(MetadataMode.EMBED),
+        meta,
+        "[" + row.getEmbedding().join(",") + "]",
+      ];
+
+      try {
+        const result = await db.query(sql, params);
+
+        if (result.rows.length) {
+          id = result.rows[0].id as string;
+          ret.push(id);
+        }
+      } catch (err) {
+        const msg = `${err}`;
+        console.log(msg, err);
+      }
+    }
+
+    return Promise.resolve(ret);
+  }
+
+  /**
+   * Deletes a single record from the database by id.
+   * NOTE: Uses the collection property controlled by setCollection/getCollection.
+   * @param refDocId Unique identifier for the record to delete.
+   * @param deleteKwargs Required by VectorStore interface.  Currently ignored.
+   * @returns Promise that resolves if the delete query did not throw an error.
+   */
+  async delete(refDocId: string, deleteKwargs?: any): Promise<void> {
+    const collectionCriteria = this.collection.length
+      ? "AND collection = $2"
+      : "";
+    const sql: string = `DELETE FROM ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE} 
+      WHERE id = $1 ${collectionCriteria}`;
+
+    const db = (await this.getDb()) as pg.Client;
+    const params = this.collection.length
+      ? [refDocId, this.collection]
+      : [refDocId];
+    await db.query(sql, params);
+    return Promise.resolve();
+  }
+
+  /**
+   * Query the vector store for the closest matching data to the query embeddings
+   * @param query The VectorStoreQuery to be used
+   * @param options Required by VectorStore interface.  Currently ignored.
+   * @returns Zero or more Document instances with data from the vector store.
+   */
+  async query(
+    query: VectorStoreQuery,
+    options?: any,
+  ): Promise<VectorStoreQueryResult> {
+    // TODO QUERY TYPES:
+    //    Distance:       SELECT embedding <-> $1 AS distance FROM items;
+    //    Inner Product:  SELECT (embedding <#> $1) * -1 AS inner_product FROM items;
+    //    Cosine Sim:     SELECT 1 - (embedding <=> $1) AS cosine_similarity FROM items;
+
+    const embedding = "[" + query.queryEmbedding?.join(",") + "]";
+    const max = query.similarityTopK ?? 2;
+    const where = this.collection.length ? "WHERE collection = $2" : "";
+    // TODO Add collection filter if set
+    const sql = `SELECT * FROM ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE}
+      ${where}
+      ORDER BY embeddings <-> $1 LIMIT ${max}
+    `;
+
+    const db = (await this.getDb()) as pg.Client;
+    const params = this.collection.length
+      ? [embedding, this.collection]
+      : [embedding];
+    const results = await db.query(sql, params);
+
+    const nodes = results.rows.map((row) => {
+      return new Document({
+        id_: row.id,
+        text: row.document,
+        metadata: row.metadata,
+        embedding: row.embeddings,
+      });
+    });
+
+    const ret = {
+      nodes: nodes,
+      similarities: results.rows.map((row) => row.embeddings),
+      ids: results.rows.map((row) => row.id),
+    };
+
+    return Promise.resolve(ret);
+  }
+
+  /**
+   * Required by VectorStore interface.  Currently ignored.
+   * @param persistPath
+   * @param fs
+   * @returns Resolved Promise.
+   */
+  persist(
+    persistPath: string,
+    fs?: GenericFileSystem | undefined,
+  ): Promise<void> {
+    return Promise.resolve();
+  }
+}
@@ -1,11 +1,11 @@
 import _ from "lodash";
 import * as path from "path";
+import { BaseNode } from "../../Node";
 import {
  getTopKEmbeddings,
  getTopKEmbeddingsLearner,
  getTopKMMREmbeddings,
-} from "../../Embedding";
-import { BaseNode } from "../../Node";
+} from "../../embeddings";
 import { GenericFileSystem, exists } from "../FileSystem";
 import { DEFAULT_FS, DEFAULT_PERSIST_DIR } from "../constants";
 import {
@@ -1,5 +1,4 @@
 import { BaseNode } from "../../Node";
-import { GenericFileSystem } from "../FileSystem";

 export interface VectorStoreQueryResult {
  nodes?: BaseNode[];
@@ -62,10 +61,9 @@ export interface VectorStore {
  isEmbeddingQuery?: boolean;
  client(): any;
  add(embeddingResults: BaseNode[]): Promise<string[]>;
-  delete(refDocId: string, deleteKwargs?: any): Promise<void>;
+  delete(refDocId: string, deleteOptions?: any): Promise<void>;
  query(
    query: VectorStoreQuery,
    options?: any,
  ): Promise<VectorStoreQueryResult>;
-  persist(persistPath: string, fs?: GenericFileSystem): Promise<void>;
 }
@@ -0,0 +1,59 @@
+import { BaseNode, jsonToNode, Metadata, ObjectType } from "../../Node";
+
+const DEFAULT_TEXT_KEY = "text";
+
+export function validateIsFlat(obj: { [key: string]: any }): void {
+  for (let key in obj) {
+    if (typeof obj[key] === "object" && obj[key] !== null) {
+      throw new Error(`Value for metadata ${key} must not be another object`);
+    }
+  }
+}
+
+export function nodeToMetadata(
+  node: BaseNode,
+  removeText: boolean = false,
+  textField: string = DEFAULT_TEXT_KEY,
+  flatMetadata: boolean = false,
+): Metadata {
+  const nodeObj = node.toJSON();
+  const metadata = node.metadata;
+
+  if (flatMetadata) {
+    validateIsFlat(node.metadata);
+  }
+
+  if (removeText) {
+    nodeObj[textField] = "";
+  }
+
+  nodeObj["embedding"] = null;
+
+  metadata["_node_content"] = JSON.stringify(nodeObj);
+  metadata["_node_type"] = node.constructor.name.replace("_", ""); // remove leading underscore to be compatible with Python
+
+  metadata["document_id"] = node.sourceNode?.nodeId || "None";
+  metadata["doc_id"] = node.sourceNode?.nodeId || "None";
+  metadata["ref_doc_id"] = node.sourceNode?.nodeId || "None";
+
+  return metadata;
+}
+
+export function metadataDictToNode(metadata: Metadata): BaseNode {
+  const nodeContent = metadata["_node_content"];
+  if (!nodeContent) {
+    throw new Error("Node content not found in metadata.");
+  }
+  const nodeObj = JSON.parse(nodeContent);
+
+  // Note: we're using the name of the class stored in `_node_type`
+  // and not the type attribute to reconstruct
+  // the node. This way we're compatible with LlamaIndex Python
+  const node_type = metadata["_node_type"];
+  switch (node_type) {
+    case "IndexNode":
+      return jsonToNode(nodeObj, ObjectType.INDEX);
+    default:
+      return jsonToNode(nodeObj, ObjectType.TEXT);
+  }
+}
@@ -1,18 +1,18 @@
-import { OpenAIEmbedding } from "../Embedding";
+import {
+  CallbackManager,
+  RetrievalCallbackResponse,
+  StreamCallbackResponse,
+} from "../callbacks/CallbackManager";
+import { OpenAIEmbedding } from "../embeddings";
+import { SummaryIndex } from "../indices/summary";
+import { VectorStoreIndex } from "../indices/vectorStore/VectorStoreIndex";
+import { OpenAI } from "../llm/LLM";
 import { Document } from "../Node";
 import {
  ResponseSynthesizer,
  SimpleResponseBuilder,
 } from "../ResponseSynthesizer";
 import { ServiceContext, serviceContextFromDefaults } from "../ServiceContext";
-import {
-  CallbackManager,
-  RetrievalCallbackResponse,
-  StreamCallbackResponse,
-} from "../callbacks/CallbackManager";
-import { SummaryIndex } from "../indices/summary";
-import { VectorStoreIndex } from "../indices/vectorStore/VectorStoreIndex";
-import { OpenAI } from "../llm/LLM";
 import { mockEmbeddingModel, mockLlmGeneration } from "./utility/mockOpenAI";

 // Mock the OpenAI getOpenAISession function during testing
@@ -1,4 +1,4 @@
-import { SimilarityType, similarity } from "../Embedding";
+import { similarity, SimilarityType } from "../embeddings";

 describe("similarity", () => {
  test("throws error on mismatched lengths", () => {
@@ -0,0 +1,15 @@
+import { TextNode } from "../Node";
+
+describe("TextNode", () => {
+  let node: TextNode;
+
+  beforeEach(() => {
+    node = new TextNode({ text: "Hello World" });
+  });
+
+  describe("generateHash", () => {
+    it("should generate a hash", () => {
+      expect(node.hash).toBe("nTSKdUTYqR52MPv/brvb4RTGeqedTEqG9QN8KSAj2Do=");
+    });
+  });
+});
@@ -1,6 +1,6 @@
-import { OpenAIEmbedding } from "../../Embedding";
-import { globalsHelper } from "../../GlobalsHelper";
 import { CallbackManager, Event } from "../../callbacks/CallbackManager";
+import { OpenAIEmbedding } from "../../embeddings";
+import { globalsHelper } from "../../GlobalsHelper";
 import { ChatMessage, OpenAI } from "../../llm/LLM";

 export function mockLlmGeneration({
@@ -1,5 +1,11 @@
 # create-llama

+## 0.0.9
+
+### Patch Changes
+
+- acfe232: Deployment fixes (thanks @seldo)
+
 ## 0.0.8

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "create-llama",
-  "version": "0.0.8",
+  "version": "0.0.9",
  "keywords": [
    "rag",
    "llamaindex",
@@ -5,6 +5,8 @@ import {
  VectorStoreIndex,
 } from "llamaindex";

+import * as dotenv from "dotenv";
+
 import {
  CHUNK_OVERLAP,
  CHUNK_SIZE,
@@ -12,6 +14,9 @@ import {
  STORAGE_DIR,
 } from "./constants.mjs";

+// Load environment variables from local .env file
+dotenv.config();
+
 async function getRuntime(func) {
  const start = Date.now();
  await func();
@@ -14,26 +14,14 @@ import {
  TemplateFramework,
 } from "./types";

-const envFileNameMap: Record<TemplateFramework, string> = {
-  nextjs: ".env.local",
-  express: ".env",
-  fastapi: ".env",
-};
-
-const createEnvLocalFile = async (
-  root: string,
-  framework: TemplateFramework,
-  openAIKey?: string,
-) => {
+const createEnvLocalFile = async (root: string, openAIKey?: string) => {
  if (openAIKey) {
-    const envFileName = envFileNameMap[framework];
-    if (!envFileName) return;
+    const envFileName = ".env";
    await fs.writeFile(
      path.join(root, envFileName),
      `OPENAI_API_KEY=${openAIKey}\n`,
    );
    console.log(`Created '${envFileName}' file containing OPENAI_API_KEY`);
-    process.env["OPENAI_API_KEY"] = openAIKey;
  }
 };

@@ -42,7 +30,16 @@ const copyTestData = async (
  framework: TemplateFramework,
  packageManager?: PackageManager,
  engine?: TemplateEngine,
+  openAIKey?: string,
 ) => {
+  if (framework === "nextjs") {
+    // XXX: This is a hack to make the build for nextjs work with pdf-parse
+    // pdf-parse needs './test/data/05-versions-space.pdf' to exist - can be removed when pdf-parse is removed
+    const srcFile = path.join(__dirname, "components", "data", "101.pdf");
+    const destPath = path.join(root, "test", "data");
+    await fs.mkdir(destPath, { recursive: true });
+    await fs.copyFile(srcFile, path.join(destPath, "05-versions-space.pdf"));
+  }
  if (engine === "context" || framework === "fastapi") {
    const srcPath = path.join(__dirname, "components", "data");
    const destPath = path.join(root, "data");
@@ -54,7 +51,7 @@ const copyTestData = async (
  }

  if (packageManager && engine === "context") {
-    if (process.env["OPENAI_API_KEY"]) {
+    if (openAIKey || process.env["OPENAI_API_KEY"]) {
      console.log(
        `\nRunning ${cyan(
          `${packageManager} run generate`,
@@ -226,6 +223,7 @@ const installTSTemplate = async ({
      "tailwind-merge": "^2",
      "@radix-ui/react-slot": "^1",
      "class-variance-authority": "^0.7",
+      clsx: "^1.2.1",
      "lucide-react": "^0.291",
      remark: "^14.0.3",
      "remark-code-import": "^1.2.0",
@@ -313,7 +311,7 @@ export const installTemplate = async (
    // This is a backend, so we need to copy the test data and create the env file.

    // Copy the environment file to the target directory.
-    await createEnvLocalFile(props.root, props.framework, props.openAIKey);
+    await createEnvLocalFile(props.root, props.openAIKey);

    // Copy test pdf file
    await copyTestData(
@@ -321,6 +319,7 @@ export const installTemplate = async (
      props.framework,
      props.packageManager,
      props.engine,
+      props.openAIKey,
    );
  }
 };
@@ -9,6 +9,14 @@ poetry install
 poetry shell
 ```

+By default, we use the OpenAI LLM (though you can customize, see app/api/routers/chat.py). As a result you need to specify an `OPENAI_API_KEY` in an .env file in this directory.
+
+Example `backend/.env` file:
+
+```
+OPENAI_API_KEY=<openai_api_key>
+```
+
 Second, run the development server:

 ```
@@ -1,2 +1,3 @@
 __pycache__
 storage
+.env
@@ -1,5 +1,15 @@
 /** @type {import('next').NextConfig} */
 const nextConfig = {
+  webpack: (config) => {
+    // See https://webpack.js.org/configuration/resolve/#resolvealias
+    config.resolve.alias = {
+      ...config.resolve.alias,
+      sharp$: false,
+      "onnxruntime-node$": false,
+      mongodb$: false,
+    };
+    return config;
+  },
  experimental: {
    serverComponentsExternalPackages: ["llamaindex"],
    outputFileTracingIncludes: {
@@ -2,6 +2,16 @@
 const nextConfig = {
  output: "export",
  images: { unoptimized: true },
+  webpack: (config) => {
+    // See https://webpack.js.org/configuration/resolve/#resolvealias
+    config.resolve.alias = {
+      ...config.resolve.alias,
+      sharp$: false,
+      "onnxruntime-node$": false,
+      mongodb$: false,
+    };
+    return config;
+  },
  experimental: {
    serverComponentsExternalPackages: ["llamaindex"],
    outputFileTracingIncludes: {
@@ -9,6 +9,7 @@
  },
  "dependencies": {
    "llamaindex": "0.0.31",
+    "dotenv": "^16.3.1",
    "nanoid": "^5",
    "next": "^13",
    "react": "^18",
@@ -18,11 +19,11 @@
    "@types/node": "^20",
    "@types/react": "^18",
    "@types/react-dom": "^18",
-    "autoprefixer": "^10",
+    "autoprefixer": "^10.1",
    "eslint": "^8",
    "eslint-config-next": "^13",
    "postcss": "^8",
-    "tailwindcss": "^3",
+    "tailwindcss": "^3.3",
    "typescript": "^5"
  }
 }
@@ -1,7 +1,11 @@
 {
  "compilerOptions": {
    "target": "es5",
-    "lib": ["dom", "dom.iterable", "esnext"],
+    "lib": [
+      "dom",
+      "dom.iterable",
+      "esnext"
+    ],
    "allowJs": true,
    "skipLibCheck": true,
    "strict": true,
@@ -19,9 +23,19 @@
      }
    ],
    "paths": {
-      "@/*": ["./*"]
-    }
+      "@/*": [
+        "./*"
+      ]
+    },
+    "forceConsistentCasingInFileNames": true,
  },
-  "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
-  "exclude": ["node_modules"]
-}
+  "include": [
+    "next-env.d.ts",
+    "**/*.ts",
+    "**/*.tsx",
+    ".next/types/**/*.ts"
+  ],
+  "exclude": [
+    "node_modules"
+  ]
+}
@@ -18,7 +18,7 @@ Then call the express API endpoint `/api/chat` to see the result:

 ```
 curl --location 'localhost:8000/api/chat' \
--header 'Content-Type: application/json' \
+--header 'Content-Type: text/plain' \
 --data '{ "messages": [{ "role": "user", "content": "Hello" }] }'
 ```

@@ -9,7 +9,7 @@
    "dev": "concurrently \"tsup index.ts --format esm --dts --watch\" \"nodemon -q dist/index.js\""
  },
  "dependencies": {
-    "ai": "^2",
+    "ai": "^2.2.5",
    "cors": "^2.8.5",
    "dotenv": "^16.3.1",
    "express": "^4",
@@ -25,4 +25,4 @@
    "tsup": "^7",
    "typescript": "^5"
  }
-}
+}
@@ -9,6 +9,14 @@ poetry install
 poetry shell
 ```

+By default, we use the OpenAI LLM (though you can customize, see app/api/routers/chat.py). As a result you need to specify an `OPENAI_API_KEY` in an .env file in this directory.
+
+Example `backend/.env` file:
+
+```
+OPENAI_API_KEY=<openai_api_key>
+```
+
 Second, run the development server:

 ```
@@ -1,2 +1,3 @@
 __pycache__
 storage
+.env
@@ -1,5 +1,15 @@
 /** @type {import('next').NextConfig} */
 const nextConfig = {
+  webpack: (config) => {
+    // See https://webpack.js.org/configuration/resolve/#resolvealias
+    config.resolve.alias = {
+      ...config.resolve.alias,
+      sharp$: false,
+      "onnxruntime-node$": false,
+      mongodb$: false,
+    };
+    return config;
+  },
  experimental: {
    serverComponentsExternalPackages: ["llamaindex"],
    outputFileTracingIncludes: {
@@ -2,6 +2,16 @@
 const nextConfig = {
  output: "export",
  images: { unoptimized: true },
+  webpack: (config) => {
+    // See https://webpack.js.org/configuration/resolve/#resolvealias
+    config.resolve.alias = {
+      ...config.resolve.alias,
+      sharp$: false,
+      "onnxruntime-node$": false,
+      mongodb$: false,
+    };
+    return config;
+  },
  experimental: {
    serverComponentsExternalPackages: ["llamaindex"],
    outputFileTracingIncludes: {
@@ -8,8 +8,9 @@
    "lint": "next lint"
  },
  "dependencies": {
-    "ai": "^2",
+    "ai": "^2.2.5",
    "llamaindex": "0.0.31",
+    "dotenv": "^16.3.1",
    "next": "^13",
    "react": "^18",
    "react-dom": "^18"
@@ -18,11 +19,11 @@
    "@types/node": "^20",
    "@types/react": "^18",
    "@types/react-dom": "^18",
-    "autoprefixer": "^10",
+    "autoprefixer": "^10.1",
    "eslint": "^8",
    "eslint-config-next": "^13",
    "postcss": "^8",
-    "tailwindcss": "^3",
+    "tailwindcss": "^3.3",
    "typescript": "^5"
  }
 }
@@ -1,7 +1,11 @@
 {
  "compilerOptions": {
    "target": "es5",
-    "lib": ["dom", "dom.iterable", "esnext"],
+    "lib": [
+      "dom",
+      "dom.iterable",
+      "esnext"
+    ],
    "allowJs": true,
    "skipLibCheck": true,
    "strict": true,
@@ -19,9 +23,19 @@
      }
    ],
    "paths": {
-      "@/*": ["./*"]
-    }
+      "@/*": [
+        "./*"
+      ]
+    },
+    "forceConsistentCasingInFileNames": true,
  },
-  "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
-  "exclude": ["node_modules"]
-}
+  "include": [
+    "next-env.d.ts",
+    "**/*.ts",
+    "**/*.tsx",
+    ".next/types/**/*.ts"
+  ],
+  "exclude": [
+    "node_modules"
+  ]
+}
Author	SHA1	Message	Date
yisding	ad7537dd84	llamaindex 0.0.37	2023-11-23 10:54:44 -08:00
yisding	3bab23172a	changeset	2023-11-23 10:53:30 -08:00
yisding	18c132d494	Merge pull request #228 from run-llama/ms/create-llama-fixes Several fixes for improving compatibility with Next.JS	2023-11-23 10:50:13 -08:00
Marcus Schiesser	d072353e08	fix: copy pdf-parse test doc for npm build	2023-11-23 20:58:43 +07:00
Marcus Schiesser	c8bbc101cc	feat: remove AssemblyAIReader as it's not working with Next.JS	2023-11-23 18:23:24 +07:00
Marcus Schiesser	b93f748998	fix: don't resolve mongodb for next.js	2023-11-23 18:20:15 +07:00
Marcus Schiesser	ecb100448a	fix: remove forceConsistentCasingInFileNames warning	2023-11-23 18:19:29 +07:00
Marcus Schiesser	c749c856b5	fix: add missing clsx package	2023-11-23 18:18:35 +07:00
Marcus Schiesser	0baf278972	fix: transformers.js not working with nextjs	2023-11-23 16:46:18 +07:00
Marcus Schiesser	ae7780266a	fix: curl test for express (streaming)	2023-11-23 15:56:36 +07:00
Marcus Schiesser	587960aebe	fix: use dotenv for npm run generate, use `.env` for NextJS, fix package versions for pnpm	2023-11-23 15:55:47 +07:00
Marcus Schiesser	4e1b6784f7	fix: pdfparse not working with in ESM version	2023-11-23 14:22:29 +07:00
yisding	8b381f2640	LITS 0.0.36	2023-11-21 22:33:14 -08:00
yisding	0dc7fa6c34	Merge pull request #170 from Swimburger/assemblyai Add AssemblyAI integration	2023-11-21 21:46:08 -08:00
yisding	2a2bf682bf	small fix in example	2023-11-21 21:44:58 -08:00
yisding	87526129fb	Merge branch 'main' into assemblyai	2023-11-21 21:39:35 -08:00
yisding	8ed1b7aa46	Merge pull request #179 from mtutty/add-pgvector-store Add PGVectorStore	2023-11-21 21:35:12 -08:00
yisding	4084bd0ecc	Merge branch 'main' into add-pgvector-store	2023-11-21 21:33:41 -08:00
yisding	d11eaceaf1	Merge pull request #223 from run-llama/claude-21 support for claude-2.1	2023-11-21 21:30:21 -08:00
yisding	1e6986fbc5	pnpm lockfile	2023-11-21 21:20:30 -08:00
yisding	11a19bdec7	make sweep optional in issues	2023-11-21 21:15:32 -08:00
yisding	51064f1b90	Merge pull request #221 from run-llama/ms/add-clip-embeddings feat: add clip embedding to llamaindex	2023-11-21 21:04:01 -08:00
yisding	3385cd19e8	support for claude-2.1 Added custom RAG prompt for Claude. Supporting system message format.	2023-11-21 21:01:54 -08:00
yisding	852f8517df	Merge pull request #209 from run-llama/jerry/edit_readme add .env instructions	2023-11-21 21:01:35 -08:00
Marcus Schiesser	bb917f9818	refactor: moved embeddings to embeddings folder	2023-11-21 14:20:10 +07:00
Marcus Schiesser	10248fb29f	chore: move clip example	2023-11-21 13:53:38 +07:00
Marcus Schiesser	446dc85bdd	fix: usage of transformers.js as CJS	2023-11-21 13:42:40 +07:00
Marcus Schiesser	4aa2c226a9	feat: add clip embedding to llamaindex	2023-11-21 11:01:29 +07:00
Marcus Schiesser	bf9ba8313a	test clip embeddings	2023-11-21 10:59:37 +07:00
yisding	444b59c557	Merge pull request #218 from run-llama/ms/use-cryptojs feat: use cryptojs instead of crypto	2023-11-20 18:25:31 -08:00
yisding	b2e1df94db	Merge remote-tracking branch 'origin/main' into ms/use-cryptojs	2023-11-20 18:24:30 -08:00
yisding	b4963cabc8	Merge pull request #204 from run-llama/ms/add-mongodb-vector Feat: added support for MongoDB as vector DB	2023-11-20 18:09:09 -08:00
Marcus Schiesser	2851024340	feat: use cryptojs instead of crypto (removes nodejs dep)	2023-11-20 13:56:04 +07:00
yisding	7f25a25729	create-llama 0.0.9	2023-11-19 18:30:32 -08:00
yisding	acfe23265a	changeset	2023-11-19 18:17:57 -08:00
yisding	2c6fbbd7dd	Merge pull request #217 from run-llama/seldo/python-gitignore	2023-11-19 17:30:49 -08:00
Laurie Voss	f84507f513	Merge branch 'main' of github.com:run-llama/LlamaIndexTS into seldo/python-env	2023-11-19 17:26:50 -08:00
Laurie Voss	be6a9e4a48	Default .gitignore should ignore .env	2023-11-19 17:26:25 -08:00
yisding	69e7634619	Merge pull request #216 from run-llama/seldo/python-env	2023-11-19 17:14:42 -08:00
Jerry Liu	3e8c923641	cr	2023-11-17 19:39:23 -08:00
Marcus Schiesser	df5cbe30a6	fix: missing JSON parsing and improved compatibility with Python	2023-11-17 15:06:31 +07:00
Marcus Schiesser	9e1a536778	docs: createIndex doesn't work	2023-11-17 14:58:20 +07:00
Marcus Schiesser	a1db8833ef	feat: sync'ed SimpleMongReader with Python 0.9 and tested/fixed mongodb scripts	2023-11-17 14:05:12 +07:00
Marcus Schiesser	95dd0e0158	feat: add mongo db vector support with example	2023-11-17 14:05:12 +07:00
Michael Tutty	19f3c857d5	Add comment blocks and support for collection filtering	2023-11-11 18:13:41 +00:00
Michael Tutty	7f3da73aa4	Final cleanup, README for example scripts	2023-11-11 17:48:01 +00:00
Michael Tutty	c384c2b610	Resolve upstream conflicts	2023-11-11 16:56:45 +00:00
Michael Tutty	dcf358f27d	Resolve upstream updates/conflicts	2023-11-10 02:16:42 +00:00
Michael Tutty	40afc8c0e2	Add PGVectorStore, dependencies, example scripts	2023-11-10 02:04:35 +00:00
Niels Swimberghe	b22bc8a799	Add AssemblyAI integration	2023-10-31 15:43:33 -04:00