Compare commits

...

15 Commits

Author SHA1 Message Date
thucpn 4a520e05ff Merge branch 'feat/integrate-postgresql-datasource' of github.com:run-llama/LlamaIndexTS into feat/integrate-postgresql-datasource 2024-01-04 15:37:12 +07:00
thucpn b6c109baa5 chore: resolve conflict 2024-01-04 15:36:58 +07:00
Thuc Pham a89534b704 feat: use connection string for create llama 2024-01-04 15:36:21 +07:00
Thuc Pham 9132d3c23e feat: integrate create-llama with postgresql on cloud 2024-01-04 15:36:21 +07:00
Thuc Pham e78eece82d feat: update document to use timescale postgresql 2024-01-04 15:36:20 +07:00
thucpn 29b0f078ed fix: add await to connect db 2024-01-04 15:36:20 +07:00
thucpn 4ba9a6cbe5 docs: update question and log 2024-01-04 15:36:20 +07:00
thucpn 0e4290e525 feat: integrate postgresql datasource 2024-01-04 15:35:44 +07:00
thucpn 1ea126bb5b chore: resolve conflict 2024-01-04 15:33:02 +07:00
Thuc Pham 426eb43b41 feat: use connection string for create llama 2024-01-04 07:47:03 +00:00
Thuc Pham 47f706bbd5 feat: integrate create-llama with postgresql on cloud 2024-01-03 10:29:41 +00:00
Thuc Pham 55fc9c6beb feat: update document to use timescale postgresql 2024-01-03 09:13:42 +00:00
thucpn 9a42038a71 fix: add await to connect db 2023-12-26 17:23:24 +07:00
thucpn c4b43e5afb docs: update question and log 2023-12-26 17:10:19 +07:00
thucpn 03e302f6b1 feat: integrate postgresql datasource 2023-12-21 11:10:37 +07:00
9 changed files with 148 additions and 35 deletions
+13
View File
@@ -14,6 +14,19 @@ You'll also need a value for OPENAI_API_KEY in your environment.
**NOTE:** Using `--rm` in the example docker command above means that the vector store will be deleted every time the container is stopped. For production purposes, use a volume to ensure persistence across restarts.
## Use a database on cloud
We recommend using a managed database service on cloud. For example, you can use [Timescale](https://docs.timescale.com/use-timescale/latest/services/create-a-service/?ref=timescale.com) to create a PostgreSQL database instance. You can then run the following command to set up environment variables for the database connection:
```bash
export PGHOST=<your database host>
export PGUSER=<your database user>
export PGPASSWORD=<your database password>
export PGDATABASE=<your database name>
export PGPORT=<your database port>
export OPENAI_API_KEY=<your openai api key>s
```
## Setup and Loading Docs
Read and follow the instructions in the README.md file located one directory up to make sure your JS/TS dependencies are set up. The commands listed below are also run from that parent directory.
@@ -16,6 +16,8 @@ export class PGVectorStore implements VectorStore {
storesText: boolean = true;
private collection: string = "";
private schemaName: string = PGVECTOR_SCHEMA;
private tableName: string = PGVECTOR_TABLE;
/*
FROM pg LIBRARY:
@@ -37,7 +39,10 @@ export class PGVectorStore implements VectorStore {
*/
db?: pg.Client;
constructor() {}
constructor(config?: { schemaName?: string; tableName?: string }) {
this.schemaName = config?.schemaName ?? PGVECTOR_SCHEMA;
this.tableName = config?.tableName ?? PGVECTOR_TABLE;
}
/**
* Setter for the collection property.
@@ -66,7 +71,9 @@ export class PGVectorStore implements VectorStore {
try {
// Create DB connection
// Read connection params from env - see comment block above
const db = new pg.Client();
const db = new pg.Client({
connectionString: process.env.PG_CONNECTION_STRING,
});
await db.connect();
// Check vector extension
@@ -88,9 +95,9 @@ export class PGVectorStore implements VectorStore {
}
private async checkSchema(db: pg.Client) {
await db.query(`CREATE SCHEMA IF NOT EXISTS ${PGVECTOR_SCHEMA}`);
await db.query(`CREATE SCHEMA IF NOT EXISTS ${this.schemaName}`);
const tbl = `CREATE TABLE IF NOT EXISTS ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE}(
const tbl = `CREATE TABLE IF NOT EXISTS ${this.schemaName}.${this.tableName}(
id uuid DEFAULT gen_random_uuid() PRIMARY KEY,
external_id VARCHAR,
collection VARCHAR,
@@ -100,8 +107,8 @@ export class PGVectorStore implements VectorStore {
)`;
await db.query(tbl);
const idxs = `CREATE INDEX IF NOT EXISTS idx_${PGVECTOR_TABLE}_external_id ON ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE} (external_id);
CREATE INDEX IF NOT EXISTS idx_${PGVECTOR_TABLE}_collection ON ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE} (collection);`;
const idxs = `CREATE INDEX IF NOT EXISTS idx_${this.tableName}_external_id ON ${this.schemaName}.${this.tableName} (external_id);
CREATE INDEX IF NOT EXISTS idx_${this.tableName}_collection ON ${this.schemaName}.${this.tableName} (collection);`;
await db.query(idxs);
// TODO add IVFFlat or HNSW indexing?
@@ -126,7 +133,7 @@ export class PGVectorStore implements VectorStore {
* @returns The result of the delete query.
*/
async clearCollection() {
const sql: string = `DELETE FROM ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE}
const sql: string = `DELETE FROM ${this.schemaName}.${this.tableName}
WHERE collection = $1`;
const db = (await this.getDb()) as pg.Client;
@@ -147,7 +154,7 @@ export class PGVectorStore implements VectorStore {
return Promise.resolve([]);
}
const sql: string = `INSERT INTO ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE}
const sql: string = `INSERT INTO ${this.schemaName}.${this.tableName}
(id, external_id, collection, document, metadata, embeddings)
VALUES ($1, $2, $3, $4, $5, $6)`;
@@ -197,7 +204,7 @@ export class PGVectorStore implements VectorStore {
const collectionCriteria = this.collection.length
? "AND collection = $2"
: "";
const sql: string = `DELETE FROM ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE}
const sql: string = `DELETE FROM ${this.schemaName}.${this.tableName}
WHERE id = $1 ${collectionCriteria}`;
const db = (await this.getDb()) as pg.Client;
@@ -230,7 +237,7 @@ export class PGVectorStore implements VectorStore {
const sql = `SELECT
v.*,
embeddings <-> $1 s
FROM ${PGVECTOR_SCHEMA}.${PGVECTOR_TABLE} v
FROM ${this.schemaName}.${this.tableName} v
${where}
ORDER BY s
LIMIT ${max}
+23 -24
View File
@@ -233,30 +233,29 @@ export const askQuestions = async (
program.engine = engine;
preferences.engine = engine;
}
}
if (program.engine !== "simple" && !program.vectorDb) {
if (ciInfo.isCI) {
program.vectorDb = getPrefOrDefault("vectorDb");
} else {
const { vectorDb } = await prompts(
{
type: "select",
name: "vectorDb",
message: "Would you like to use a vector database?",
choices: [
{
title: "No, just store the data in the file system",
value: "none",
},
{ title: "MongoDB", value: "mongo" },
],
initial: 0,
},
handlers,
);
program.vectorDb = vectorDb;
preferences.vectorDb = vectorDb;
if (program.engine !== "simple" && !program.vectorDb) {
if (ciInfo.isCI) {
program.vectorDb = getPrefOrDefault("vectorDb");
} else {
const { vectorDb } = await prompts(
{
type: "select",
name: "vectorDb",
message: "Would you like to use a vector database?",
choices: [
{
title: "No, just store the data in the file system",
value: "none",
},
{ title: "MongoDB", value: "mongo" },
],
initial: 0,
},
handlers,
);
program.vectorDb = vectorDb;
preferences.vectorDb = vectorDb;
}
}
}
@@ -0,0 +1,36 @@
/* eslint-disable turbo/no-undeclared-env-vars */
import * as dotenv from "dotenv";
import {
PGVectorStore,
SimpleDirectoryReader,
VectorStoreIndex,
storageContextFromDefaults,
} from "llamaindex";
import { STORAGE_DIR, checkRequiredEnvVars } from "./shared.mjs";
dotenv.config();
async function loadAndIndex() {
// load objects from storage and convert them into LlamaIndex Document objects
const documents = await new SimpleDirectoryReader().loadData({
directoryPath: STORAGE_DIR,
});
// create postgres vector store
const vectorStore = new PGVectorStore();
vectorStore.setCollection(STORAGE_DIR);
vectorStore.clearCollection();
// create index from all the Documents
console.log("Start creating embeddings...");
const storageContext = await storageContextFromDefaults({ vectorStore });
await VectorStoreIndex.fromDocuments(documents, { storageContext });
console.log(`Successfully created embeddings.`);
}
(async () => {
checkRequiredEnvVars();
await loadAndIndex();
console.log("Finished generating storage.");
process.exit(0);
})();
@@ -0,0 +1,29 @@
/* eslint-disable turbo/no-undeclared-env-vars */
import {
ContextChatEngine,
LLM,
PGVectorStore,
VectorStoreIndex,
serviceContextFromDefaults,
} from "llamaindex";
import { CHUNK_OVERLAP, CHUNK_SIZE, checkRequiredEnvVars } from "./shared.mjs";
async function getDataSource(llm: LLM) {
checkRequiredEnvVars();
const pgvs = new PGVectorStore();
const serviceContext = serviceContextFromDefaults({
llm,
chunkSize: CHUNK_SIZE,
chunkOverlap: CHUNK_OVERLAP,
});
return await VectorStoreIndex.fromVectorStore(pgvs, serviceContext);
}
export async function createChatEngine(llm: LLM) {
const index = await getDataSource(llm);
const retriever = index.asRetriever({ similarityTopK: 5 });
return new ContextChatEngine({
chatModel: llm,
retriever,
});
}
@@ -0,0 +1,22 @@
export const STORAGE_DIR = "./data";
export const CHUNK_SIZE = 512;
export const CHUNK_OVERLAP = 20;
const REQUIRED_ENV_VARS = ["PG_CONNECTION_STRING", "OPENAI_API_KEY"];
export function checkRequiredEnvVars() {
const missingEnvVars = REQUIRED_ENV_VARS.filter((envVar) => {
return !process.env[envVar];
});
if (missingEnvVars.length > 0) {
console.log(
`The following environment variables are required but missing: ${missingEnvVars.join(
", ",
)}`,
);
throw new Error(
`Missing environment variables: ${missingEnvVars.join(", ")}`,
);
}
}
+5
View File
@@ -49,6 +49,11 @@ const createEnvLocalFile = async (
content += `MONGODB_VECTOR_INDEX=\n`;
break;
}
case "pg": {
content += `# For generating a connection URI, see https://docs.timescale.com/use-timescale/latest/services/create-a-service\n`;
content += `PG_CONNECTION_STRING=\n`;
break;
}
}
if (content) {
+1 -1
View File
@@ -4,7 +4,7 @@ export type TemplateType = "simple" | "streaming" | "community";
export type TemplateFramework = "nextjs" | "express" | "fastapi";
export type TemplateEngine = "simple" | "context";
export type TemplateUI = "html" | "shadcn";
export type TemplateVectorDB = "none" | "mongo";
export type TemplateVectorDB = "none" | "mongo" | "pg";
export interface InstallTemplateArgs {
appName: string;
+2
View File
@@ -44,6 +44,8 @@ module.exports = {
"NOTION_TOKEN",
"MONGODB_URI",
"PG_CONNECTION_STRING",
"https_proxy",
"npm_config_user_agent",
"NEXT_PUBLIC_CHAT_API",