Compare commits

..

7 Commits

Author SHA1 Message Date
leehuwuj c3215ccc7b better log 2024-05-02 15:23:06 +07:00
leehuwuj 18ca18123f split code to run_ingestion_pipeline and persist_storage 2024-05-02 15:18:40 +07:00
leehuwuj 5ecb0c9fb7 update comments and remove stores_index 2024-05-02 14:15:56 +07:00
leehuwuj 7e45f604e6 Fix dimensions typo in settings.py 2024-05-02 10:45:58 +07:00
leehuwuj bbacf0f199 refactor code and comments 2024-05-02 10:43:54 +07:00
leehuwuj c0c6df80c7 fix redundant stashed code 2024-05-02 09:25:05 +07:00
leehuwuj 3b39a12ad6 Refactor code to persist the docstore and index in the SimpleVectorStore case 2024-05-02 08:50:09 +07:00
59 changed files with 333 additions and 907 deletions
+5
View File
@@ -0,0 +1,5 @@
---
"create-llama": patch
---
Use ingestion pipeline for Python
+5
View File
@@ -0,0 +1,5 @@
---
"create-llama": patch
---
Display events (e.g. retrieving nodes) per chat message
-8
View File
@@ -1,13 +1,5 @@
# create-llama
## 0.1.1
### Patch Changes
- 7bd3ed5: Support Anthropic and Gemini as model providers
- 7bd3ed5: Support new agents from LITS 0.3
- cfb5257: Display events (e.g. retrieving nodes) per chat message
## 0.1.0
### Minor Changes
-18
View File
@@ -173,24 +173,6 @@ const getModelEnvs = (modelConfig: ModelConfig): EnvVar[] => {
},
]
: []),
...(modelConfig.provider === "anthropic"
? [
{
name: "ANTHROPIC_API_KEY",
description: "The Anthropic API key to use.",
value: modelConfig.apiKey,
},
]
: []),
...(modelConfig.provider === "gemini"
? [
{
name: "GOOGLE_API_KEY",
description: "The Google API key to use.",
value: modelConfig.apiKey,
},
]
: []),
];
};
+2 -1
View File
@@ -9,6 +9,7 @@ import { createBackendEnvFile, createFrontendEnvFile } from "./env-variables";
import { PackageManager } from "./get-pkg-manager";
import { installLlamapackProject } from "./llama-pack";
import { isHavingPoetryLockFile, tryPoetryRun } from "./poetry";
import { isModelConfigured } from "./providers";
import { installPythonTemplate } from "./python";
import { downloadAndExtractRepo } from "./repo";
import { ConfigFileType, writeToolsConfig } from "./tools";
@@ -37,7 +38,7 @@ async function generateContextData(
? "poetry run generate"
: `${packageManager} run generate`,
)}`;
const modelConfigured = modelConfig.isConfigured();
const modelConfigured = isModelConfigured(modelConfig);
const llamaCloudKeyConfigured = useLlamaParse
? llamaCloudKey || process.env["LLAMA_CLOUD_API_KEY"]
: true;
-106
View File
@@ -1,106 +0,0 @@
import ciInfo from "ci-info";
import prompts from "prompts";
import { ModelConfigParams } from ".";
import { questionHandlers, toChoice } from "../../questions";
const MODELS = [
"claude-3-opus",
"claude-3-sonnet",
"claude-3-haiku",
"claude-2.1",
"claude-instant-1.2",
];
const DEFAULT_MODEL = MODELS[0];
// TODO: get embedding vector dimensions from the anthropic sdk (currently not supported)
// Use huggingface embedding models for now
enum HuggingFaceEmbeddingModelType {
XENOVA_ALL_MINILM_L6_V2 = "all-MiniLM-L6-v2",
XENOVA_ALL_MPNET_BASE_V2 = "all-mpnet-base-v2",
}
type ModelData = {
dimensions: number;
};
const EMBEDDING_MODELS: Record<HuggingFaceEmbeddingModelType, ModelData> = {
[HuggingFaceEmbeddingModelType.XENOVA_ALL_MINILM_L6_V2]: {
dimensions: 384,
},
[HuggingFaceEmbeddingModelType.XENOVA_ALL_MPNET_BASE_V2]: {
dimensions: 768,
},
};
const DEFAULT_EMBEDDING_MODEL = Object.keys(EMBEDDING_MODELS)[0];
const DEFAULT_DIMENSIONS = Object.values(EMBEDDING_MODELS)[0].dimensions;
type AnthropicQuestionsParams = {
apiKey?: string;
askModels: boolean;
};
export async function askAnthropicQuestions({
askModels,
apiKey,
}: AnthropicQuestionsParams): Promise<ModelConfigParams> {
const config: ModelConfigParams = {
apiKey,
model: DEFAULT_MODEL,
embeddingModel: DEFAULT_EMBEDDING_MODEL,
dimensions: DEFAULT_DIMENSIONS,
isConfigured(): boolean {
if (config.apiKey) {
return true;
}
if (process.env["ANTHROPIC_API_KEY"]) {
return true;
}
return false;
},
};
if (!config.apiKey) {
const { key } = await prompts(
{
type: "text",
name: "key",
message:
"Please provide your Anthropic API key (or leave blank to use ANTHROPIC_API_KEY env variable):",
},
questionHandlers,
);
config.apiKey = key || process.env.ANTHROPIC_API_KEY;
}
// use default model values in CI or if user should not be asked
const useDefaults = ciInfo.isCI || !askModels;
if (!useDefaults) {
const { model } = await prompts(
{
type: "select",
name: "model",
message: "Which LLM model would you like to use?",
choices: MODELS.map(toChoice),
initial: 0,
},
questionHandlers,
);
config.model = model;
const { embeddingModel } = await prompts(
{
type: "select",
name: "embeddingModel",
message: "Which embedding model would you like to use?",
choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
initial: 0,
},
questionHandlers,
);
config.embeddingModel = embeddingModel;
config.dimensions =
EMBEDDING_MODELS[
embeddingModel as HuggingFaceEmbeddingModelType
].dimensions;
}
return config;
}
-87
View File
@@ -1,87 +0,0 @@
import ciInfo from "ci-info";
import prompts from "prompts";
import { ModelConfigParams } from ".";
import { questionHandlers, toChoice } from "../../questions";
const MODELS = ["gemini-1.5-pro-latest", "gemini-pro", "gemini-pro-vision"];
type ModelData = {
dimensions: number;
};
const EMBEDDING_MODELS: Record<string, ModelData> = {
"embedding-001": { dimensions: 768 },
"text-embedding-004": { dimensions: 768 },
};
const DEFAULT_MODEL = MODELS[0];
const DEFAULT_EMBEDDING_MODEL = Object.keys(EMBEDDING_MODELS)[0];
const DEFAULT_DIMENSIONS = Object.values(EMBEDDING_MODELS)[0].dimensions;
type GeminiQuestionsParams = {
apiKey?: string;
askModels: boolean;
};
export async function askGeminiQuestions({
askModels,
apiKey,
}: GeminiQuestionsParams): Promise<ModelConfigParams> {
const config: ModelConfigParams = {
apiKey,
model: DEFAULT_MODEL,
embeddingModel: DEFAULT_EMBEDDING_MODEL,
dimensions: DEFAULT_DIMENSIONS,
isConfigured(): boolean {
if (config.apiKey) {
return true;
}
if (process.env["GOOGLE_API_KEY"]) {
return true;
}
return false;
},
};
if (!config.apiKey) {
const { key } = await prompts(
{
type: "text",
name: "key",
message:
"Please provide your Google API key (or leave blank to use GOOGLE_API_KEY env variable):",
},
questionHandlers,
);
config.apiKey = key || process.env.GOOGLE_API_KEY;
}
// use default model values in CI or if user should not be asked
const useDefaults = ciInfo.isCI || !askModels;
if (!useDefaults) {
const { model } = await prompts(
{
type: "select",
name: "model",
message: "Which LLM model would you like to use?",
choices: MODELS.map(toChoice),
initial: 0,
},
questionHandlers,
);
config.model = model;
const { embeddingModel } = await prompts(
{
type: "select",
name: "embeddingModel",
message: "Which embedding model would you like to use?",
choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
initial: 0,
},
questionHandlers,
);
config.embeddingModel = embeddingModel;
config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions;
}
return config;
}
+10 -11
View File
@@ -2,10 +2,8 @@ import ciInfo from "ci-info";
import prompts from "prompts";
import { questionHandlers } from "../../questions";
import { ModelConfig, ModelProvider } from "../types";
import { askAnthropicQuestions } from "./anthropic";
import { askGeminiQuestions } from "./gemini";
import { askOllamaQuestions } from "./ollama";
import { askOpenAIQuestions } from "./openai";
import { askOpenAIQuestions, isOpenAIConfigured } from "./openai";
const DEFAULT_MODEL_PROVIDER = "openai";
@@ -33,8 +31,6 @@ export async function askModelConfig({
value: "openai",
},
{ title: "Ollama", value: "ollama" },
{ title: "Anthropic", value: "anthropic" },
{ title: "Gemini", value: "gemini" },
],
initial: 0,
},
@@ -48,12 +44,6 @@ export async function askModelConfig({
case "ollama":
modelConfig = await askOllamaQuestions({ askModels });
break;
case "anthropic":
modelConfig = await askAnthropicQuestions({ askModels });
break;
case "gemini":
modelConfig = await askGeminiQuestions({ askModels });
break;
default:
modelConfig = await askOpenAIQuestions({
openAiKey,
@@ -65,3 +55,12 @@ export async function askModelConfig({
provider: modelProvider,
};
}
export function isModelConfigured(modelConfig: ModelConfig): boolean {
switch (modelConfig.provider) {
case "openai":
return isOpenAIConfigured(modelConfig);
default:
return true;
}
}
-3
View File
@@ -29,9 +29,6 @@ export async function askOllamaQuestions({
model: DEFAULT_MODEL,
embeddingModel: DEFAULT_EMBEDDING_MODEL,
dimensions: EMBEDDING_MODELS[DEFAULT_EMBEDDING_MODEL].dimensions,
isConfigured(): boolean {
return true;
},
};
// use default model values in CI or if user should not be asked
+11 -9
View File
@@ -20,15 +20,6 @@ export async function askOpenAIQuestions({
model: DEFAULT_MODEL,
embeddingModel: DEFAULT_EMBEDDING_MODEL,
dimensions: getDimensions(DEFAULT_EMBEDDING_MODEL),
isConfigured(): boolean {
if (config.apiKey) {
return true;
}
if (process.env["OPENAI_API_KEY"]) {
return true;
}
return false;
},
};
if (!config.apiKey) {
@@ -40,6 +31,7 @@ export async function askOpenAIQuestions({
? "Please provide your OpenAI API key (or leave blank to use OPENAI_API_KEY env variable):"
: "Please provide your OpenAI API key (leave blank to skip):",
validate: (value: string) => {
console.log(value);
if (askModels && !value) {
if (process.env.OPENAI_API_KEY) {
return true;
@@ -86,6 +78,16 @@ export async function askOpenAIQuestions({
return config;
}
export function isOpenAIConfigured(params: ModelConfigParams): boolean {
if (params.apiKey) {
return true;
}
if (process.env["OPENAI_API_KEY"]) {
return true;
}
return false;
}
async function getAvailableModelChoices(
selectEmbedding: boolean,
apiKey?: string,
-20
View File
@@ -127,26 +127,6 @@ const getAdditionalDependencies = (
version: "0.2.2",
});
break;
case "anthropic":
dependencies.push({
name: "llama-index-llms-anthropic",
version: "0.1.10",
});
dependencies.push({
name: "llama-index-embeddings-huggingface",
version: "0.2.0",
});
break;
case "gemini":
dependencies.push({
name: "llama-index-llms-gemini",
version: "0.1.7",
});
dependencies.push({
name: "llama-index-embeddings-gemini",
version: "0.1.6",
});
break;
}
return dependencies;
+1 -2
View File
@@ -1,14 +1,13 @@
import { PackageManager } from "../helpers/get-pkg-manager";
import { Tool } from "./tools";
export type ModelProvider = "openai" | "ollama" | "anthropic" | "gemini";
export type ModelProvider = "openai" | "ollama";
export type ModelConfig = {
provider: ModelProvider;
apiKey?: string;
model: string;
embeddingModel: string;
dimensions: number;
isConfigured(): boolean;
};
export type TemplateType = "streaming" | "community" | "llamapack";
export type TemplateFramework = "nextjs" | "express" | "fastapi";
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "create-llama",
"version": "0.1.1",
"version": "0.1.0",
"description": "Create LlamaIndex-powered apps with one command",
"keywords": [
"rag",
+5 -4
View File
@@ -14,7 +14,7 @@ import { COMMUNITY_OWNER, COMMUNITY_REPO } from "./helpers/constant";
import { EXAMPLE_FILE } from "./helpers/datasources";
import { templatesDir } from "./helpers/dir";
import { getAvailableLlamapackOptions } from "./helpers/llama-pack";
import { askModelConfig } from "./helpers/providers";
import { askModelConfig, isModelConfigured } from "./helpers/providers";
import { getProjectOptions } from "./helpers/repo";
import { supportedTools, toolsRequireConfig } from "./helpers/tools";
@@ -257,8 +257,7 @@ export const askQuestions = async (
},
];
const modelConfigured =
!program.llamapack && program.modelConfig.isConfigured();
const modelConfigured = isModelConfigured(program.modelConfig);
// If using LlamaParse, require LlamaCloud API key
const llamaCloudKeyConfigured = program.useLlamaParse
? program.llamaCloudKey || process.env["LLAMA_CLOUD_API_KEY"]
@@ -269,7 +268,8 @@ export const askQuestions = async (
!hasVectorDb &&
modelConfigured &&
llamaCloudKeyConfigured &&
!toolsRequireConfig(program.tools)
!toolsRequireConfig(program.tools) &&
!program.llamapack
) {
actionChoices.push({
title:
@@ -398,6 +398,7 @@ export const askQuestions = async (
if (program.framework === "express" || program.framework === "fastapi") {
// if a backend-only framework is selected, ask whether we should create a frontend
// (only for streaming backends)
if (program.frontend === undefined) {
if (ciInfo.isCI) {
program.frontend = getPrefOrDefault("frontend");
@@ -1,4 +1,4 @@
import { BaseToolWithCall, OpenAIAgent, QueryEngineTool } from "llamaindex";
import { BaseTool, OpenAIAgent, QueryEngineTool } from "llamaindex";
import { ToolsFactory } from "llamaindex/tools/ToolsFactory";
import fs from "node:fs/promises";
import path from "node:path";
@@ -6,7 +6,7 @@ import { getDataSource } from "./index";
import { STORAGE_CACHE_DIR } from "./shared";
export async function createChatEngine() {
let tools: BaseToolWithCall[] = [];
let tools: BaseTool[] = [];
// Add a query engine tool if we have a data source
// Delete this code if you don't have a data source
+1 -4
View File
@@ -27,10 +27,7 @@ def llama_parse_parser():
def get_file_documents(config: FileLoaderConfig):
from llama_index.core.readers import SimpleDirectoryReader
reader = SimpleDirectoryReader(
config.data_dir,
recursive=True,
)
reader = SimpleDirectoryReader(config.data_dir, recursive=True, filename_as_id=True)
if config.use_llama_parse:
parser = llama_parse_parser()
reader.file_extractor = {".pdf": parser}
@@ -1,37 +0,0 @@
from dotenv import load_dotenv
load_dotenv()
import os
import logging
from llama_index.core.storage import StorageContext
from llama_index.core.indices import VectorStoreIndex
from llama_index.vector_stores.astra_db import AstraDBVectorStore
from app.settings import init_settings
from app.engine.loaders import get_documents
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
def generate_datasource():
init_settings()
logger.info("Creating new index")
documents = get_documents()
store = AstraDBVectorStore(
token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
api_endpoint=os.environ["ASTRA_DB_ENDPOINT"],
collection_name=os.environ["ASTRA_DB_COLLECTION"],
embedding_dimension=int(os.environ["EMBEDDING_DIM"]),
)
storage_context = StorageContext.from_defaults(vector_store=store)
VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
show_progress=True, # this will show you a progress bar as the embeddings are created
)
logger.info(f"Successfully created embeddings in the AstraDB")
if __name__ == "__main__":
generate_datasource()
@@ -1,21 +1,12 @@
import logging
import os
from llama_index.core.indices import VectorStoreIndex
from llama_index.vector_stores.astra_db import AstraDBVectorStore
logger = logging.getLogger("uvicorn")
def get_index():
logger.info("Connecting to index from AstraDB...")
def get_vector_store():
store = AstraDBVectorStore(
token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
api_endpoint=os.environ["ASTRA_DB_ENDPOINT"],
collection_name=os.environ["ASTRA_DB_COLLECTION"],
embedding_dimension=int(os.environ["EMBEDDING_DIM"]),
)
index = VectorStoreIndex.from_vector_store(store)
logger.info("Finished connecting to index from AstraDB.")
return index
return store
@@ -1,39 +0,0 @@
from dotenv import load_dotenv
load_dotenv()
import os
import logging
from llama_index.core.storage import StorageContext
from llama_index.core.indices import VectorStoreIndex
from llama_index.vector_stores.milvus import MilvusVectorStore
from app.settings import init_settings
from app.engine.loaders import get_documents
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
def generate_datasource():
init_settings()
logger.info("Creating new index")
# load the documents and create the index
documents = get_documents()
store = MilvusVectorStore(
uri=os.environ["MILVUS_ADDRESS"],
user=os.getenv("MILVUS_USERNAME"),
password=os.getenv("MILVUS_PASSWORD"),
collection_name=os.getenv("MILVUS_COLLECTION"),
dim=int(os.getenv("EMBEDDING_DIM")),
)
storage_context = StorageContext.from_defaults(vector_store=store)
VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
show_progress=True, # this will show you a progress bar as the embeddings are created
)
logger.info(f"Successfully created embeddings in the Milvus")
if __name__ == "__main__":
generate_datasource()
@@ -1,22 +0,0 @@
import logging
import os
from llama_index.core.indices import VectorStoreIndex
from llama_index.vector_stores.milvus import MilvusVectorStore
logger = logging.getLogger("uvicorn")
def get_index():
logger.info("Connecting to index from Milvus...")
store = MilvusVectorStore(
uri=os.getenv("MILVUS_ADDRESS"),
user=os.getenv("MILVUS_USERNAME"),
password=os.getenv("MILVUS_PASSWORD"),
collection_name=os.getenv("MILVUS_COLLECTION"),
dim=int(os.getenv("EMBEDDING_DIM")),
)
index = VectorStoreIndex.from_vector_store(store)
logger.info("Finished connecting to index from Milvus.")
return index
@@ -0,0 +1,13 @@
import os
from llama_index.vector_stores.milvus import MilvusVectorStore
def get_vector_store():
store = MilvusVectorStore(
uri=os.environ["MILVUS_ADDRESS"],
user=os.getenv("MILVUS_USERNAME"),
password=os.getenv("MILVUS_PASSWORD"),
collection_name=os.getenv("MILVUS_COLLECTION"),
dim=int(os.getenv("EMBEDDING_DIM")),
)
return store
@@ -1,43 +0,0 @@
from dotenv import load_dotenv
load_dotenv()
import os
import logging
from llama_index.core.storage import StorageContext
from llama_index.core.indices import VectorStoreIndex
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from app.settings import init_settings
from app.engine.loaders import get_documents
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
def generate_datasource():
init_settings()
logger.info("Creating new index")
# load the documents and create the index
documents = get_documents()
store = MongoDBAtlasVectorSearch(
db_name=os.environ["MONGODB_DATABASE"],
collection_name=os.environ["MONGODB_VECTORS"],
index_name=os.environ["MONGODB_VECTOR_INDEX"],
)
storage_context = StorageContext.from_defaults(vector_store=store)
VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
show_progress=True, # this will show you a progress bar as the embeddings are created
)
logger.info(
f"Successfully created embeddings in the MongoDB collection {os.environ['MONGODB_VECTORS']}"
)
logger.info(
"""IMPORTANT: You can't query your index yet because you need to create a vector search index in MongoDB's UI now.
See https://github.com/run-llama/mongodb-demo/tree/main?tab=readme-ov-file#create-a-vector-search-index"""
)
if __name__ == "__main__":
generate_datasource()
@@ -1,20 +0,0 @@
import logging
import os
from llama_index.core.indices import VectorStoreIndex
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
logger = logging.getLogger("uvicorn")
def get_index():
logger.info("Connecting to index from MongoDB...")
store = MongoDBAtlasVectorSearch(
db_name=os.environ["MONGODB_DATABASE"],
collection_name=os.environ["MONGODB_VECTORS"],
index_name=os.environ["MONGODB_VECTOR_INDEX"],
)
index = VectorStoreIndex.from_vector_store(store)
logger.info("Finished connecting to index from MongoDB.")
return index
@@ -0,0 +1,11 @@
import os
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
def get_vector_store():
store = MongoDBAtlasVectorSearch(
db_name=os.environ["MONGODB_DATABASE"],
collection_name=os.environ["MONGODB_VECTORS"],
index_name=os.environ["MONGODB_VECTOR_INDEX"],
)
return store
@@ -1 +0,0 @@
STORAGE_DIR = "storage" # directory to cache the generated index
@@ -1,32 +0,0 @@
from dotenv import load_dotenv
load_dotenv()
import logging
from llama_index.core.indices import (
VectorStoreIndex,
)
from app.engine.constants import STORAGE_DIR
from app.engine.loaders import get_documents
from app.settings import init_settings
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
def generate_datasource():
init_settings()
logger.info("Creating new index")
# load the documents and create the index
documents = get_documents()
index = VectorStoreIndex.from_documents(
documents,
)
# store it for later
index.storage_context.persist(STORAGE_DIR)
logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}")
if __name__ == "__main__":
generate_datasource()
@@ -1,20 +0,0 @@
import logging
import os
from app.engine.constants import STORAGE_DIR
from llama_index.core.storage import StorageContext
from llama_index.core.indices import load_index_from_storage
logger = logging.getLogger("uvicorn")
def get_index():
# check if storage already exists
if not os.path.exists(STORAGE_DIR):
return None
# load the existing index
logger.info(f"Loading index from {STORAGE_DIR}...")
storage_context = StorageContext.from_defaults(persist_dir=STORAGE_DIR)
index = load_index_from_storage(storage_context)
logger.info(f"Finished loading index from {STORAGE_DIR}")
return index
@@ -0,0 +1,16 @@
import os
from llama_index.core.vector_stores import SimpleVectorStore
from app.constants import STORAGE_DIR
def get_vector_store():
if not os.path.exists(STORAGE_DIR):
# Vector store hasn't been persisted before, create a new one
vector_store = SimpleVectorStore()
else:
# Vector store has already been persisted before at STORAGE_DIR - load it
vector_store = SimpleVectorStore.from_persist_dir(
STORAGE_DIR, namespace="default"
)
return vector_store
@@ -1,2 +0,0 @@
PGVECTOR_SCHEMA = "public"
PGVECTOR_TABLE = "llamaindex_embedding"
@@ -1,35 +0,0 @@
from dotenv import load_dotenv
load_dotenv()
import logging
from llama_index.core.indices import VectorStoreIndex
from llama_index.core.storage import StorageContext
from app.engine.loaders import get_documents
from app.settings import init_settings
from app.engine.utils import init_pg_vector_store_from_env
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
def generate_datasource():
init_settings()
logger.info("Creating new index")
# load the documents and create the index
documents = get_documents()
store = init_pg_vector_store_from_env()
storage_context = StorageContext.from_defaults(vector_store=store)
VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
show_progress=True, # this will show you a progress bar as the embeddings are created
)
logger.info(
f"Successfully created embeddings in the PG vector store, schema={store.schema_name} table={store.table_name}"
)
if __name__ == "__main__":
generate_datasource()
@@ -1,13 +0,0 @@
import logging
from llama_index.core.indices.vector_store import VectorStoreIndex
from app.engine.utils import init_pg_vector_store_from_env
logger = logging.getLogger("uvicorn")
def get_index():
logger.info("Connecting to index from PGVector...")
store = init_pg_vector_store_from_env()
index = VectorStoreIndex.from_vector_store(store)
logger.info("Finished connecting to index from PGVector.")
return index
@@ -1,10 +1,13 @@
import os
from llama_index.vector_stores.postgres import PGVectorStore
from urllib.parse import urlparse
from app.engine.constants import PGVECTOR_SCHEMA, PGVECTOR_TABLE
STORAGE_DIR = "storage"
PGVECTOR_SCHEMA = "public"
PGVECTOR_TABLE = "llamaindex_embedding"
def init_pg_vector_store_from_env():
def get_vector_store():
original_conn_string = os.environ.get("PG_CONNECTION_STRING")
if original_conn_string is None or original_conn_string == "":
raise ValueError("PG_CONNECTION_STRING environment variable is not set.")
@@ -24,4 +27,5 @@ def init_pg_vector_store_from_env():
async_connection_string=async_conn_string,
schema_name=PGVECTOR_SCHEMA,
table_name=PGVECTOR_TABLE,
embed_dim=int(os.environ.get("EMBEDDING_DIM", 768)),
)
@@ -1,39 +0,0 @@
from dotenv import load_dotenv
load_dotenv()
import os
import logging
from llama_index.core.storage import StorageContext
from llama_index.core.indices import VectorStoreIndex
from llama_index.vector_stores.pinecone import PineconeVectorStore
from app.settings import init_settings
from app.engine.loaders import get_documents
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
def generate_datasource():
init_settings()
logger.info("Creating new index")
# load the documents and create the index
documents = get_documents()
store = PineconeVectorStore(
api_key=os.environ["PINECONE_API_KEY"],
index_name=os.environ["PINECONE_INDEX_NAME"],
environment=os.environ["PINECONE_ENVIRONMENT"],
)
storage_context = StorageContext.from_defaults(vector_store=store)
VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
show_progress=True, # this will show you a progress bar as the embeddings are created
)
logger.info(
f"Successfully created embeddings and save to your Pinecone index {os.environ['PINECONE_INDEX_NAME']}"
)
if __name__ == "__main__":
generate_datasource()
@@ -1,20 +0,0 @@
import logging
import os
from llama_index.core.indices import VectorStoreIndex
from llama_index.vector_stores.pinecone import PineconeVectorStore
logger = logging.getLogger("uvicorn")
def get_index():
logger.info("Connecting to index from Pinecone...")
store = PineconeVectorStore(
api_key=os.environ["PINECONE_API_KEY"],
index_name=os.environ["PINECONE_INDEX_NAME"],
environment=os.environ["PINECONE_ENVIRONMENT"],
)
index = VectorStoreIndex.from_vector_store(store)
logger.info("Finished connecting to index from Pinecone.")
return index
@@ -0,0 +1,11 @@
import os
from llama_index.vector_stores.pinecone import PineconeVectorStore
def get_vector_store():
store = PineconeVectorStore(
api_key=os.environ["PINECONE_API_KEY"],
index_name=os.environ["PINECONE_INDEX_NAME"],
environment=os.environ["PINECONE_ENVIRONMENT"],
)
return store
@@ -1,37 +0,0 @@
import logging
import os
from app.engine.loaders import get_documents
from app.settings import init_settings
from dotenv import load_dotenv
from llama_index.core.indices import VectorStoreIndex
from llama_index.core.storage import StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
load_dotenv()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
def generate_datasource():
init_settings()
logger.info("Creating new index with Qdrant")
# load the documents and create the index
documents = get_documents()
store = QdrantVectorStore(
collection_name=os.getenv("QDRANT_COLLECTION"),
url=os.getenv("QDRANT_URL"),
api_key=os.getenv("QDRANT_API_KEY"),
)
storage_context = StorageContext.from_defaults(vector_store=store)
VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
show_progress=True, # this will show you a progress bar as the embeddings are created
)
logger.info(
f"Successfully uploaded documents to the {os.getenv('QDRANT_COLLECTION')} collection."
)
if __name__ == "__main__":
generate_datasource()
@@ -1,20 +0,0 @@
import logging
import os
from llama_index.core.indices import VectorStoreIndex
from llama_index.vector_stores.qdrant import QdrantVectorStore
logger = logging.getLogger("uvicorn")
def get_index():
logger.info("Connecting to Qdrant collection..")
store = QdrantVectorStore(
collection_name=os.getenv("QDRANT_COLLECTION"),
url=os.getenv("QDRANT_URL"),
api_key=os.getenv("QDRANT_API_KEY"),
)
index = VectorStoreIndex.from_vector_store(store)
logger.info("Finished connecting to Qdrant collection.")
return index
@@ -0,0 +1,11 @@
import os
from llama_index.vector_stores.qdrant import QdrantVectorStore
def get_vector_store():
store = QdrantVectorStore(
collection_name=os.getenv("QDRANT_COLLECTION"),
url=os.getenv("QDRANT_URL"),
api_key=os.getenv("QDRANT_API_KEY"),
)
return store
@@ -1,5 +1,4 @@
import { VectorStoreIndex } from "llamaindex";
import { storageContextFromDefaults } from "llamaindex/storage/StorageContext";
import { VectorStoreIndex, storageContextFromDefaults } from "llamaindex";
import * as dotenv from "dotenv";
@@ -1,5 +1,8 @@
import { SimpleDocumentStore, VectorStoreIndex } from "llamaindex";
import { storageContextFromDefaults } from "llamaindex/storage/StorageContext";
import {
SimpleDocumentStore,
storageContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
import { STORAGE_CACHE_DIR } from "./shared";
export async function getDataSource() {
@@ -14,8 +14,7 @@
"cors": "^2.8.5",
"dotenv": "^16.3.1",
"express": "^4.18.2",
"llamaindex": "0.3.3",
"pdf2json": "3.0.5"
"llamaindex": "0.2.10"
},
"devDependencies": {
"@types/cors": "^2.8.16",
@@ -1,11 +1,6 @@
import { Message, StreamData, streamToResponse } from "ai";
import { Request, Response } from "express";
import {
CallbackManager,
ChatMessage,
MessageContent,
Settings,
} from "llamaindex";
import { ChatMessage, MessageContent, Settings } from "llamaindex";
import { createChatEngine } from "./engine/chat";
import { LlamaIndexStream } from "./llamaindex-stream";
import { appendEventData } from "./stream-helper";
@@ -50,15 +45,14 @@ export const chat = async (req: Request, res: Response) => {
// Init Vercel AI StreamData
const vercelStreamData = new StreamData();
appendEventData(
vercelStreamData,
`Retrieving context for query: '${userMessage.content}'`,
);
// Setup callbacks
const callbackManager = new CallbackManager();
callbackManager.on("retrieve", (data) => {
// Setup callback for streaming data before chatting
Settings.callbackManager.on("retrieve", (data) => {
const { nodes } = data.detail;
appendEventData(
vercelStreamData,
`Retrieving context for query: '${userMessage.content}'`,
);
appendEventData(
vercelStreamData,
`Retrieved ${nodes.length} sources to use as context for the query`,
@@ -66,23 +60,31 @@ export const chat = async (req: Request, res: Response) => {
});
// Calling LlamaIndex's ChatEngine to get a streamed response
const response = await Settings.withCallbackManager(callbackManager, () => {
return chatEngine.chat({
message: userMessageContent,
chatHistory: messages as ChatMessage[],
stream: true,
});
const response = await chatEngine.chat({
message: userMessageContent,
chatHistory: messages as ChatMessage[],
stream: true,
});
// Return a stream, which can be consumed by the Vercel/AI client
const stream = LlamaIndexStream(response, vercelStreamData, {
const { stream } = LlamaIndexStream(response, vercelStreamData, {
parserOptions: {
image_url: data?.imageUrl,
},
});
const processedStream = stream.pipeThrough(vercelStreamData.stream);
return streamToResponse(processedStream, res);
// Pipe LlamaIndexStream to response
const processedStream = stream.pipeThrough(vercelStreamData.stream);
return streamToResponse(processedStream, res, {
headers: {
// response MUST have the `X-Experimental-Stream-Data: 'true'` header
// so that the client uses the correct parsing logic, see
// https://sdk.vercel.ai/docs/api-reference/stream-data#on-the-server
"X-Experimental-Stream-Data": "true",
"Content-Type": "text/plain; charset=utf-8",
"Access-Control-Expose-Headers": "X-Experimental-Stream-Data",
},
});
} catch (error) {
console.error("[LlamaIndex]", error);
return res.status(500).json({
@@ -1,17 +1,10 @@
import {
Anthropic,
GEMINI_EMBEDDING_MODEL,
GEMINI_MODEL,
Gemini,
GeminiEmbedding,
Ollama,
OllamaEmbedding,
OpenAI,
OpenAIEmbedding,
Settings,
} from "llamaindex";
import { HuggingFaceEmbedding } from "llamaindex/embeddings/HuggingFaceEmbedding";
import { OllamaEmbedding } from "llamaindex/embeddings/OllamaEmbedding";
import { ALL_AVAILABLE_ANTHROPIC_MODELS } from "llamaindex/llm/anthropic";
import { Ollama } from "llamaindex/llm/ollama";
const CHUNK_SIZE = 512;
const CHUNK_OVERLAP = 20;
@@ -19,21 +12,10 @@ const CHUNK_OVERLAP = 20;
export const initSettings = async () => {
// HINT: you can delete the initialization code for unused model providers
console.log(`Using '${process.env.MODEL_PROVIDER}' model provider`);
if (!process.env.MODEL || !process.env.EMBEDDING_MODEL) {
throw new Error("'MODEL' and 'EMBEDDING_MODEL' env variables must be set.");
}
switch (process.env.MODEL_PROVIDER) {
case "ollama":
initOllama();
break;
case "anthropic":
initAnthropic();
break;
case "gemini":
initGemini();
break;
default:
initOpenAI();
break;
@@ -56,6 +38,11 @@ function initOpenAI() {
}
function initOllama() {
if (!process.env.MODEL || !process.env.EMBEDDING_MODEL) {
throw new Error(
"Using Ollama as model provider, 'MODEL' and 'EMBEDDING_MODEL' env variables must be set.",
);
}
Settings.llm = new Ollama({
model: process.env.MODEL ?? "",
});
@@ -63,25 +50,3 @@ function initOllama() {
model: process.env.EMBEDDING_MODEL ?? "",
});
}
function initAnthropic() {
const embedModelMap: Record<string, string> = {
"all-MiniLM-L6-v2": "Xenova/all-MiniLM-L6-v2",
"all-mpnet-base-v2": "Xenova/all-mpnet-base-v2",
};
Settings.llm = new Anthropic({
model: process.env.MODEL as keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS,
});
Settings.embedModel = new HuggingFaceEmbedding({
modelType: embedModelMap[process.env.EMBEDDING_MODEL!],
});
}
function initGemini() {
Settings.llm = new Gemini({
model: process.env.MODEL as GEMINI_MODEL,
});
Settings.embedModel = new GeminiEmbedding({
model: process.env.EMBEDDING_MODEL as GEMINI_EMBEDDING_MODEL,
});
}
@@ -9,22 +9,16 @@ import {
Metadata,
NodeWithScore,
Response,
ToolCallLLMMessageOptions,
StreamingAgentChatResponse,
} from "llamaindex";
import { AgentStreamChatResponse } from "llamaindex/agent/base";
import { appendImageData, appendSourceData } from "./stream-helper";
type LlamaIndexResponse =
| AgentStreamChatResponse<ToolCallLLMMessageOptions>
| Response;
type ParserOptions = {
image_url?: string;
};
function createParser(
res: AsyncIterable<LlamaIndexResponse>,
res: AsyncIterable<Response>,
data: StreamData,
opts?: ParserOptions,
) {
@@ -39,27 +33,17 @@ function createParser(
async pull(controller): Promise<void> {
const { value, done } = await it.next();
if (done) {
if (sourceNodes) {
appendSourceData(data, sourceNodes);
}
appendSourceData(data, sourceNodes);
controller.close();
data.close();
return;
}
let delta;
if (value instanceof Response) {
// handle Response type
if (value.sourceNodes) {
// get source nodes from the first response
sourceNodes = value.sourceNodes;
}
delta = value.response ?? "";
} else {
// handle other types
delta = value.response.delta;
if (!sourceNodes) {
// get source nodes from the first response
sourceNodes = value.sourceNodes;
}
const text = trimStartOfStream(delta ?? "");
const text = trimStartOfStream(value.response ?? "");
if (text) {
controller.enqueue(text);
}
@@ -68,14 +52,21 @@ function createParser(
}
export function LlamaIndexStream(
response: AsyncIterable<LlamaIndexResponse>,
response: StreamingAgentChatResponse | AsyncIterable<Response>,
data: StreamData,
opts?: {
callbacks?: AIStreamCallbacksAndOptions;
parserOptions?: ParserOptions;
},
): ReadableStream<Uint8Array> {
return createParser(response, data, opts?.parserOptions)
.pipeThrough(createCallbacksTransformer(opts?.callbacks))
.pipeThrough(createStreamDataTransformer());
): { stream: ReadableStream; data: StreamData } {
const res =
response instanceof StreamingAgentChatResponse
? response.response
: response;
return {
stream: createParser(res, data, opts?.parserOptions)
.pipeThrough(createCallbacksTransformer(opts?.callbacks))
.pipeThrough(createStreamDataTransformer()),
data,
};
}
@@ -0,0 +1 @@
STORAGE_DIR = "storage" # directory to save the stores to (document store and if used, the `SimpleVectorStore`)
@@ -0,0 +1,96 @@
from dotenv import load_dotenv
load_dotenv()
import os
import logging
from llama_index.core.settings import Settings
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.vector_stores import SimpleVectorStore
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage import StorageContext
from llama_index.core import VectorStoreIndex
from app.constants import STORAGE_DIR
from app.settings import init_settings
from app.engine.loaders import get_documents
from app.engine.vectordb import get_vector_store
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
def get_doc_store():
if not os.path.exists(STORAGE_DIR):
docstore = SimpleDocumentStore()
return docstore
else:
return SimpleDocumentStore.from_persist_dir(STORAGE_DIR)
def run_ingestion_pipeline(docstore, vector_store, documents):
# Create ingestion pipeline
ingestion_pipeline = IngestionPipeline(
transformations=[
SentenceSplitter(
chunk_size=Settings.chunk_size,
chunk_overlap=Settings.chunk_overlap,
),
Settings.embed_model,
],
docstore=docstore,
docstore_strategy="upserts_and_delete",
)
# llama_index having an typing issue when passing vector_store to IngestionPipeline
# so we need to set it manually after initialization
ingestion_pipeline.vector_store = vector_store
# Run the ingestion pipeline and store the results
nodes = ingestion_pipeline.run(show_progress=True, documents=documents)
return nodes
def persist_storage(docstore, vector_store, nodes):
storage_context = StorageContext.from_defaults(
docstore=docstore,
vector_store=vector_store,
)
# SimpleVectorStore does not include index by default
# so we need to create the index manually
# can be removed if using other vector store
if isinstance(vector_store, SimpleVectorStore):
VectorStoreIndex(
nodes=nodes,
storage_context=storage_context,
store_nodes_override=True, # Need enable this to store the nodes and index's id
)
storage_context.persist(STORAGE_DIR)
def generate_datasource():
init_settings()
logger.info("Generate index for the provided data")
# Get the stores and documents or create new ones
documents = get_documents()
docstore = get_doc_store()
vector_store = get_vector_store()
# Run the ingestion pipeline
nodes = run_ingestion_pipeline(
docstore=docstore,
vector_store=vector_store,
documents=documents,
)
# Build the index and persist storage
persist_storage(docstore, vector_store, nodes)
logger.info("Finished generating the index")
if __name__ == "__main__":
generate_datasource()
@@ -0,0 +1,27 @@
import logging
from llama_index.core import load_index_from_storage
from llama_index.core.storage import StorageContext
from llama_index.core.indices.vector_store import VectorStoreIndex
from llama_index.core.vector_stores.simple import SimpleVectorStore
from app.constants import STORAGE_DIR
from app.engine.vectordb import get_vector_store
logger = logging.getLogger("uvicorn")
def get_index():
logger.info("Loading the index...")
store = get_vector_store()
# If the store is a SimpleVectorStore, we need to load the index from the storage
if isinstance(store, SimpleVectorStore):
index = load_index_from_storage(
StorageContext.from_defaults(
vector_store=store,
persist_dir=STORAGE_DIR,
)
)
else:
index = VectorStoreIndex.from_vector_store(store)
logger.info("Loaded index successfully.")
return index
@@ -9,10 +9,6 @@ def init_settings():
init_openai()
elif model_provider == "ollama":
init_ollama()
elif model_provider == "anthropic":
init_anthropic()
elif model_provider == "gemini":
init_gemini()
else:
raise ValueError(f"Invalid model provider: {model_provider}")
Settings.chunk_size = int(os.getenv("CHUNK_SIZE", "1024"))
@@ -46,47 +42,3 @@ def init_openai():
"dimensions": int(dimensions) if dimensions is not None else None,
}
Settings.embed_model = OpenAIEmbedding(**config)
def init_anthropic():
from llama_index.llms.anthropic import Anthropic
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
model_map: Dict[str, str] = {
"claude-3-opus": "claude-3-opus-20240229",
"claude-3-sonnet": "claude-3-sonnet-20240229",
"claude-3-haiku": "claude-3-haiku-20240307",
"claude-2.1": "claude-2.1",
"claude-instant-1.2": "claude-instant-1.2",
}
embed_model_map: Dict[str, str] = {
"all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
"all-mpnet-base-v2": "sentence-transformers/all-mpnet-base-v2",
}
Settings.llm = Anthropic(model=model_map[os.getenv("MODEL")])
Settings.embed_model = HuggingFaceEmbedding(
model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")]
)
def init_gemini():
from llama_index.llms.gemini import Gemini
from llama_index.embeddings.gemini import GeminiEmbedding
model_map: Dict[str, str] = {
"gemini-1.5-pro-latest": "models/gemini-1.5-pro-latest",
"gemini-pro": "models/gemini-pro",
"gemini-pro-vision": "models/gemini-pro-vision",
}
embed_model_map: Dict[str, str] = {
"embedding-001": "models/embedding-001",
"text-embedding-004": "models/text-embedding-004",
}
Settings.llm = Gemini(model=model_map[os.getenv("MODEL")])
Settings.embed_model = GeminiEmbedding(
model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")]
)
@@ -1,17 +1,10 @@
import {
Anthropic,
GEMINI_EMBEDDING_MODEL,
GEMINI_MODEL,
Gemini,
GeminiEmbedding,
Ollama,
OllamaEmbedding,
OpenAI,
OpenAIEmbedding,
Settings,
} from "llamaindex";
import { HuggingFaceEmbedding } from "llamaindex/embeddings/HuggingFaceEmbedding";
import { OllamaEmbedding } from "llamaindex/embeddings/OllamaEmbedding";
import { ALL_AVAILABLE_ANTHROPIC_MODELS } from "llamaindex/llm/anthropic";
import { Ollama } from "llamaindex/llm/ollama";
const CHUNK_SIZE = 512;
const CHUNK_OVERLAP = 20;
@@ -19,21 +12,10 @@ const CHUNK_OVERLAP = 20;
export const initSettings = async () => {
// HINT: you can delete the initialization code for unused model providers
console.log(`Using '${process.env.MODEL_PROVIDER}' model provider`);
if (!process.env.MODEL || !process.env.EMBEDDING_MODEL) {
throw new Error("'MODEL' and 'EMBEDDING_MODEL' env variables must be set.");
}
switch (process.env.MODEL_PROVIDER) {
case "ollama":
initOllama();
break;
case "anthropic":
initAnthropic();
break;
case "gemini":
initGemini();
break;
default:
initOpenAI();
break;
@@ -56,6 +38,11 @@ function initOpenAI() {
}
function initOllama() {
if (!process.env.MODEL || !process.env.EMBEDDING_MODEL) {
throw new Error(
"Using Ollama as model provider, 'MODEL' and 'EMBEDDING_MODEL' env variables must be set.",
);
}
Settings.llm = new Ollama({
model: process.env.MODEL ?? "",
});
@@ -63,25 +50,3 @@ function initOllama() {
model: process.env.EMBEDDING_MODEL ?? "",
});
}
function initAnthropic() {
const embedModelMap: Record<string, string> = {
"all-MiniLM-L6-v2": "Xenova/all-MiniLM-L6-v2",
"all-mpnet-base-v2": "Xenova/all-mpnet-base-v2",
};
Settings.llm = new Anthropic({
model: process.env.MODEL as keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS,
});
Settings.embedModel = new HuggingFaceEmbedding({
modelType: embedModelMap[process.env.EMBEDDING_MODEL!],
});
}
function initGemini() {
Settings.llm = new Gemini({
model: process.env.MODEL as GEMINI_MODEL,
});
Settings.embedModel = new GeminiEmbedding({
model: process.env.EMBEDDING_MODEL as GEMINI_EMBEDDING_MODEL,
});
}
@@ -9,22 +9,16 @@ import {
Metadata,
NodeWithScore,
Response,
ToolCallLLMMessageOptions,
StreamingAgentChatResponse,
} from "llamaindex";
import { AgentStreamChatResponse } from "llamaindex/agent/base";
import { appendImageData, appendSourceData } from "./stream-helper";
type LlamaIndexResponse =
| AgentStreamChatResponse<ToolCallLLMMessageOptions>
| Response;
type ParserOptions = {
image_url?: string;
};
function createParser(
res: AsyncIterable<LlamaIndexResponse>,
res: AsyncIterable<Response>,
data: StreamData,
opts?: ParserOptions,
) {
@@ -39,27 +33,17 @@ function createParser(
async pull(controller): Promise<void> {
const { value, done } = await it.next();
if (done) {
if (sourceNodes) {
appendSourceData(data, sourceNodes);
}
appendSourceData(data, sourceNodes);
controller.close();
data.close();
return;
}
let delta;
if (value instanceof Response) {
// handle Response type
if (value.sourceNodes) {
// get source nodes from the first response
sourceNodes = value.sourceNodes;
}
delta = value.response ?? "";
} else {
// handle other types
delta = value.response.delta;
if (!sourceNodes) {
// get source nodes from the first response
sourceNodes = value.sourceNodes;
}
const text = trimStartOfStream(delta ?? "");
const text = trimStartOfStream(value.response ?? "");
if (text) {
controller.enqueue(text);
}
@@ -68,14 +52,21 @@ function createParser(
}
export function LlamaIndexStream(
response: AsyncIterable<LlamaIndexResponse>,
response: StreamingAgentChatResponse | AsyncIterable<Response>,
data: StreamData,
opts?: {
callbacks?: AIStreamCallbacksAndOptions;
parserOptions?: ParserOptions;
},
): ReadableStream<Uint8Array> {
return createParser(response, data, opts?.parserOptions)
.pipeThrough(createCallbacksTransformer(opts?.callbacks))
.pipeThrough(createStreamDataTransformer());
): { stream: ReadableStream; data: StreamData } {
const res =
response instanceof StreamingAgentChatResponse
? response.response
: response;
return {
stream: createParser(res, data, opts?.parserOptions)
.pipeThrough(createCallbacksTransformer(opts?.callbacks))
.pipeThrough(createStreamDataTransformer()),
data,
};
}
@@ -1,11 +1,6 @@
import { initObservability } from "@/app/observability";
import { Message, StreamData, StreamingTextResponse } from "ai";
import {
CallbackManager,
ChatMessage,
MessageContent,
Settings,
} from "llamaindex";
import { ChatMessage, MessageContent, Settings } from "llamaindex";
import { NextRequest, NextResponse } from "next/server";
import { createChatEngine } from "./engine/chat";
import { initSettings } from "./engine/settings";
@@ -62,15 +57,14 @@ export async function POST(request: NextRequest) {
// Init Vercel AI StreamData
const vercelStreamData = new StreamData();
appendEventData(
vercelStreamData,
`Retrieving context for query: '${userMessage.content}'`,
);
// Setup callbacks
const callbackManager = new CallbackManager();
callbackManager.on("retrieve", (data) => {
// Setup callback for streaming data before chatting
Settings.callbackManager.on("retrieve", (data) => {
const { nodes } = data.detail;
appendEventData(
vercelStreamData,
`Retrieving context for query: '${userMessage.content}'`,
);
appendEventData(
vercelStreamData,
`Retrieved ${nodes.length} sources to use as context for the query`,
@@ -78,16 +72,14 @@ export async function POST(request: NextRequest) {
});
// Calling LlamaIndex's ChatEngine to get a streamed response
const response = await Settings.withCallbackManager(callbackManager, () => {
return chatEngine.chat({
message: userMessageContent,
chatHistory: messages as ChatMessage[],
stream: true,
});
const response = await chatEngine.chat({
message: userMessageContent,
chatHistory: messages as ChatMessage[],
stream: true,
});
// Transform LlamaIndex stream to Vercel/AI format
const stream = LlamaIndexStream(response, vercelStreamData, {
const { stream } = LlamaIndexStream(response, vercelStreamData, {
parserOptions: {
image_url: data?.imageUrl,
},
@@ -17,10 +17,9 @@
"class-variance-authority": "^0.7.0",
"clsx": "^1.2.1",
"dotenv": "^16.3.1",
"llamaindex": "0.3.3",
"llamaindex": "0.2.10",
"lucide-react": "^0.294.0",
"next": "^14.0.3",
"pdf2json": "3.0.5",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"react-markdown": "^8.0.7",