Compare commits

...

11 Commits

Author SHA1 Message Date
leehuwuj 6111943229 update paths for macos 2024-03-22 07:15:42 +07:00
leehuwuj 3e5debb407 update windows and using paths 2024-03-22 07:07:38 +07:00
leehuwuj bf6028f271 add missing llamakey 2024-03-21 14:59:19 +07:00
leehuwuj a4d7737274 update multi file selection for windows 2024-03-21 14:51:43 +07:00
leehuwuj e9f32f27b2 allow to copy multiple files/folders 2024-03-21 09:13:42 +07:00
leehuwuj ce6e4b717c add none data source option 2024-03-21 08:44:21 +07:00
leehuwuj 9bbbd66da1 filter datasource and separate llamaParse question 2024-03-20 16:47:53 +07:00
leehuwuj 485452b9aa add copy loader code 2024-03-20 08:45:25 +07:00
leehuwuj 07af59a08a stg 2024-03-19 17:03:40 +07:00
leehuwuj 6c848a20ad change to array dataSources 2024-03-19 16:57:09 +07:00
leehuwuj 617dbca4f9 add dataSources 2024-03-19 15:02:02 +07:00
13 changed files with 341 additions and 244 deletions
+2 -2
View File
@@ -41,7 +41,7 @@ export async function createApp({
vectorDb,
externalPort,
postInstallAction,
dataSource,
dataSources,
tools,
observability,
}: InstallAppArgs): Promise<void> {
@@ -89,7 +89,7 @@ export async function createApp({
vectorDb,
externalPort,
postInstallAction,
dataSource,
dataSources,
tools,
observability,
};
+31 -22
View File
@@ -5,6 +5,7 @@ import {
TemplateDataSource,
TemplateFramework,
TemplateVectorDB,
WebSourceConfig,
} from "./types";
type EnvVar = {
@@ -99,26 +100,32 @@ const getVectorDBEnvs = (vectorDb: TemplateVectorDB) => {
}
};
const getDataSourceEnvs = (dataSource: TemplateDataSource) => {
switch (dataSource.type) {
case "web":
return [
{
name: "BASE_URL",
description: "The base URL to start web scraping.",
},
{
name: "URL_PREFIX",
description: "The prefix of the URL to start web scraping.",
},
{
name: "MAX_DEPTH",
description: "The maximum depth to scrape.",
},
];
default:
return [];
const getDataSourceEnvs = (dataSources: TemplateDataSource[]) => {
const envs = [];
for (const source of dataSources) {
switch (source.type) {
case "web":
const config = source.config as WebSourceConfig;
envs.push(
{
name: "BASE_URL",
description: "The base URL to start web scraping.",
value: config.baseUrl,
},
{
name: "URL_PREFIX",
description: "The prefix of the URL to start web scraping.",
value: config.baseUrl,
},
{
name: "MAX_DEPTH",
description: "The maximum depth to scrape.",
value: config.depth?.toString(),
},
);
}
}
return envs;
};
export const createBackendEnvFile = async (
@@ -130,7 +137,7 @@ export const createBackendEnvFile = async (
model?: string;
embeddingModel?: string;
framework?: TemplateFramework;
dataSource?: TemplateDataSource;
dataSources?: TemplateDataSource[];
port?: number;
},
) => {
@@ -152,7 +159,7 @@ export const createBackendEnvFile = async (
// Add vector database environment variables
...(opts.vectorDb ? getVectorDBEnvs(opts.vectorDb) : []),
// Add data source environment variables
...(opts.dataSource ? getDataSourceEnvs(opts.dataSource) : []),
...(opts.dataSources ? getDataSourceEnvs(opts.dataSources) : []),
];
let envVars: EnvVar[] = [];
if (opts.framework === "fastapi") {
@@ -204,7 +211,9 @@ We have provided context information below.
Given this information, please answer the question: {query_str}
"`,
},
(opts?.dataSource?.config as FileSourceConfig).useLlamaParse
opts?.dataSources?.some(
(ds) => (ds.config as FileSourceConfig).useLlamaParse,
)
? {
name: "LLAMA_CLOUD_API_KEY",
description: `The Llama Cloud API key.`,
+35 -19
View File
@@ -27,8 +27,8 @@ async function generateContextData(
packageManager?: PackageManager,
openAiKey?: string,
vectorDb?: TemplateVectorDB,
dataSource?: TemplateDataSource,
llamaCloudKey?: string,
useLlamaParse?: boolean,
) {
if (packageManager) {
const runGenerate = `${cyan(
@@ -37,8 +37,7 @@ async function generateContextData(
: `${packageManager} run generate`,
)}`;
const openAiKeyConfigured = openAiKey || process.env["OPENAI_API_KEY"];
const llamaCloudKeyConfigured = (dataSource?.config as FileSourceConfig)
?.useLlamaParse
const llamaCloudKeyConfigured = useLlamaParse
? llamaCloudKey || process.env["LLAMA_CLOUD_API_KEY"]
: true;
const hasVectorDb = vectorDb && vectorDb !== "none";
@@ -82,18 +81,19 @@ const copyContextData = async (
dataSource?: TemplateDataSource,
) => {
const destPath = path.join(root, "data");
const dataSourceConfig = dataSource?.config as FileSourceConfig;
// Copy file
if (dataSource?.type === "file") {
if (dataSourceConfig.path) {
console.log(`\nCopying file to ${cyan(destPath)}\n`);
if (dataSourceConfig.paths) {
await fs.mkdir(destPath, { recursive: true });
await fs.copyFile(
dataSourceConfig.path,
path.join(destPath, path.basename(dataSourceConfig.path)),
console.log(
"Copying data from files:",
dataSourceConfig.paths.toString(),
);
for (const p of dataSourceConfig.paths) {
await fs.copyFile(p, path.join(destPath, path.basename(p)));
}
} else {
console.log("Missing file path in config");
process.exit(1);
@@ -103,13 +103,20 @@ const copyContextData = async (
// Copy folder
if (dataSource?.type === "folder") {
const srcPath =
dataSourceConfig.path ?? path.join(templatesDir, "components", "data");
console.log(`\nCopying data to ${cyan(destPath)}\n`);
await copy("**", destPath, {
parents: true,
cwd: srcPath,
});
// Example data does not have path config, set the default path
const srcPaths = dataSourceConfig.paths ?? [
path.join(templatesDir, "components", "data"),
];
console.log("Copying data from folders: ", srcPaths);
for (const p of srcPaths) {
const folderName = path.basename(p);
const destFolderPath = path.join(destPath, folderName);
await fs.mkdir(destFolderPath, { recursive: true });
await copy("**", destFolderPath, {
parents: true,
cwd: p,
});
}
return;
}
};
@@ -160,12 +167,17 @@ export const installTemplate = async (
model: props.model,
embeddingModel: props.embeddingModel,
framework: props.framework,
dataSource: props.dataSource,
dataSources: props.dataSources,
port: props.externalPort,
});
if (props.engine === "context") {
await copyContextData(props.root, props.dataSource);
console.log("\nGenerating context data...\n");
props.dataSources.forEach(async (ds) => {
if (ds.type === "file" || ds.type === "folder") {
await copyContextData(props.root, ds);
}
});
if (
props.postInstallAction === "runApp" ||
props.postInstallAction === "dependencies"
@@ -175,8 +187,12 @@ export const installTemplate = async (
props.packageManager,
props.openAiKey,
props.vectorDb,
props.dataSource,
props.llamaCloudKey,
props.dataSources.some(
(ds) =>
(ds.type === "file" || ds.type === "folder") &&
(ds.config as FileSourceConfig).useLlamaParse,
),
);
}
}
+45 -18
View File
@@ -175,7 +175,7 @@ export const installPythonTemplate = async ({
framework,
engine,
vectorDb,
dataSource,
dataSources,
tools,
postInstallAction,
}: Pick<
@@ -185,7 +185,7 @@ export const installPythonTemplate = async ({
| "template"
| "engine"
| "vectorDb"
| "dataSource"
| "dataSources"
| "tools"
| "postInstallAction"
>) => {
@@ -250,27 +250,54 @@ export const installPythonTemplate = async ({
});
}
const dataSourceType = dataSource?.type;
if (dataSourceType !== undefined && dataSourceType !== "none") {
let loaderFolder: string;
if (dataSourceType === "file" || dataSourceType === "folder") {
const dataSourceConfig = dataSource?.config as FileSourceConfig;
loaderFolder = dataSourceConfig.useLlamaParse ? "llama_parse" : "file";
} else {
loaderFolder = dataSourceType;
}
await copy("**", enginePath, {
if (dataSources.length > 0 || dataSources[0].type !== "none") {
// Copy loader.py file to enginePath
await copy("loader.py", enginePath, {
parents: true,
cwd: path.join(compPath, "loaders", "python", loaderFolder),
cwd: path.join(compPath, "loaders", "python"),
});
// Copy data source loaders
const loaderPath = path.join(enginePath, "loaders");
for (const source of dataSources) {
const sourceType = source.type;
if (sourceType === "file" || sourceType === "folder") {
const sourceConfig = source.config as FileSourceConfig;
const loaderFolder = sourceConfig.useLlamaParse
? "llama_parse"
: "file";
await copy("**", loaderPath, {
parents: true,
cwd: path.join(compPath, "loaders", "python", loaderFolder),
});
} else {
await copy("**", loaderPath, {
parents: true,
cwd: path.join(compPath, "loaders", "python", sourceType),
});
}
}
}
// const dataSourceType = dataSource?.type;
// if (dataSourceType !== undefined && dataSourceType !== "none") {
// let loaderFolder: string;
// if (dataSourceType === "file" || dataSourceType === "folder") {
// const dataSourceConfig = dataSource?.config as FileSourceConfig;
// loaderFolder = dataSourceConfig.useLlamaParse ? "llama_parse" : "file";
// } else {
// loaderFolder = dataSourceType;
// }
// await copy("**", enginePath, {
// parents: true,
// cwd: path.join(compPath, "loaders", "python", loaderFolder),
// });
// }
}
const addOnDependencies = getAdditionalDependencies(
vectorDb,
dataSource,
tools,
);
const addOnDependencies = dataSources
.map((ds) => getAdditionalDependencies(vectorDb, ds, tools))
.flat();
await addDependencies(root, addOnDependencies);
if (postInstallAction === "runApp" || postInstallAction === "dependencies") {
+2 -2
View File
@@ -19,7 +19,7 @@ export type TemplateDataSourceType = "none" | "file" | "folder" | "web";
export type TemplateObservability = "none" | "opentelemetry";
// Config for both file and folder
export type FileSourceConfig = {
path?: string;
paths?: string[];
useLlamaParse?: boolean;
};
export type WebSourceConfig = {
@@ -44,7 +44,7 @@ export interface InstallTemplateArgs {
framework: TemplateFramework;
engine: TemplateEngine;
ui: TemplateUI;
dataSource?: TemplateDataSource;
dataSources: TemplateDataSource[];
eslint: boolean;
customApiPath?: string;
openAiKey?: string;
+1 -1
View File
@@ -303,7 +303,7 @@ async function run(): Promise<void> {
vectorDb: program.vectorDb,
externalPort: program.externalPort,
postInstallAction: program.postInstallAction,
dataSource: program.dataSource,
dataSources: program.dataSources,
tools: program.tools,
observability: program.observability,
});
+199 -168
View File
@@ -9,6 +9,7 @@ import prompts from "prompts";
import { InstallAppArgs } from "./create-app";
import {
FileSourceConfig,
TemplateDataSource,
TemplateDataSourceType,
TemplateFramework,
} from "./helpers";
@@ -40,31 +41,34 @@ const MACOS_FILE_SELECTION_SCRIPT = `
osascript -l JavaScript -e '
a = Application.currentApplication();
a.includeStandardAdditions = true;
a.chooseFile({ withPrompt: "Please select a file to process:" }).toString()
a.chooseFile({ withPrompt: "Please select files to process:", multipleSelectionsAllowed: true }).map(file => file.toString())
'`;
const MACOS_FOLDER_SELECTION_SCRIPT = `
osascript -l JavaScript -e '
a = Application.currentApplication();
a.includeStandardAdditions = true;
a.chooseFolder({ withPrompt: "Please select a folder to process:" }).toString()
a.chooseFolder({ withPrompt: "Please select folders to process:", multipleSelectionsAllowed: true }).map(folder => folder.toString())
'`;
const WINDOWS_FILE_SELECTION_SCRIPT = `
Add-Type -AssemblyName System.Windows.Forms
$openFileDialog = New-Object System.Windows.Forms.OpenFileDialog
$openFileDialog.InitialDirectory = [Environment]::GetFolderPath('Desktop')
$openFileDialog.Multiselect = $true
$result = $openFileDialog.ShowDialog()
if ($result -eq 'OK') {
$openFileDialog.FileName
$openFileDialog.FileNames
}
`;
const WINDOWS_FOLDER_SELECTION_SCRIPT = `
Add-Type -AssemblyName System.windows.forms
$folderBrowser = New-Object System.Windows.Forms.FolderBrowserDialog
$dialogResult = $folderBrowser.ShowDialog()
if ($dialogResult -eq [System.Windows.Forms.DialogResult]::OK)
{
$folderBrowser.SelectedPath
}
$folderBrowser.SelectedPath = [Environment]::GetFolderPath('Desktop')
$folderBrowser.Description = "Please select folders to process:"
$folderBrowser.ShowNewFolderButton = $true
$folderBrowser.RootFolder = [System.Environment+SpecialFolder]::Desktop
$folderBrowser.SelectedPath = [System.IO.Path]::GetFullPath($folderBrowser.SelectedPath)
$folderBrowser.ShowDialog() | Out-Null
$folderBrowser.SelectedPath, $folderBrowser.SelectedPaths
`;
const defaults: QuestionArgs = {
@@ -81,10 +85,7 @@ const defaults: QuestionArgs = {
communityProjectConfig: undefined,
llamapack: "",
postInstallAction: "dependencies",
dataSource: {
type: "none",
config: {},
},
dataSources: [],
tools: [],
};
@@ -122,30 +123,53 @@ const getVectorDbChoices = (framework: TemplateFramework) => {
return displayedChoices;
};
const getDataSourceChoices = (framework: TemplateFramework) => {
const choices = [
{
title: "No data, just a simple chat",
value: "simple",
},
{ title: "Use an example PDF", value: "exampleFile" },
];
if (process.platform === "win32" || process.platform === "darwin") {
export const getDataSourceChoices = (
framework: TemplateFramework,
selectedDataSource: TemplateDataSource[],
) => {
const choices = [];
if (selectedDataSource.length > 0) {
choices.push({
title: `Use a local file (${supportedContextFileTypes.join(", ")})`,
value: "localFile",
});
choices.push({
title: `Use a local folder`,
value: "localFolder",
title: "No",
value: "none",
});
}
if (framework === "fastapi") {
if (selectedDataSource === undefined || selectedDataSource.length === 0) {
choices.push({
title: "No data, just a simple chat",
value: "none",
});
choices.push({
title: "Use an example PDF",
value: "exampleFile",
});
}
if (!selectedDataSource.some((ds) => ds.type === "file")) {
choices.push({
title: `Use local files (${supportedContextFileTypes.join(", ")})`,
value: "file",
});
}
if (!selectedDataSource.some((ds) => ds.type === "folder")) {
choices.push({
title: "Use local folder",
value: "folder",
});
}
if (
!selectedDataSource.some((ds) => ds.type === "web") &&
(process.platform === "win32" || process.platform === "darwin") &&
framework === "fastapi"
) {
choices.push({
title: "Use website content (requires Chrome)",
value: "web",
});
}
return choices;
};
@@ -173,9 +197,15 @@ const selectLocalContextData = async (type: TemplateDataSourceType) => {
process.exit(1);
}
selectedPath = execSync(execScript, execOpts).toString().trim();
if (type === "file") {
const fileType = path.extname(selectedPath);
if (!supportedContextFileTypes.includes(fileType)) {
const paths =
process.platform === "win32"
? selectedPath.split("\r\n")
: selectedPath.split(", ");
for (const p of paths) {
if (
type == "file" &&
!supportedContextFileTypes.includes(path.extname(p))
) {
console.log(
red(
`Please select a supported file type: ${supportedContextFileTypes}`,
@@ -184,7 +214,7 @@ const selectLocalContextData = async (type: TemplateDataSourceType) => {
process.exit(1);
}
}
return selectedPath;
return paths;
} catch (error) {
console.log(
red(
@@ -309,9 +339,11 @@ export const askQuestions = async (
const openAiKeyConfigured =
program.openAiKey || process.env["OPENAI_API_KEY"];
// If using LlamaParse, require LlamaCloud API key
const llamaCloudKeyConfigured = (
program.dataSource?.config as FileSourceConfig
)?.useLlamaParse
const useLlamaParse = program.dataSources.some(
(ds) =>
ds.type === "file" && (ds.config as FileSourceConfig).useLlamaParse,
);
const llamaCloudKeyConfigured = useLlamaParse
? program.llamaCloudKey || process.env["LLAMA_CLOUD_API_KEY"]
: true;
const hasVectorDb = program.vectorDb && program.vectorDb !== "none";
@@ -614,124 +646,149 @@ export const askQuestions = async (
console.log("File or folder not found");
process.exit(1);
} else {
program.dataSource = {
type: fs.lstatSync(program.files).isDirectory() ? "folder" : "file",
config: {
path: program.files,
program.dataSources = [
{
type: fs.lstatSync(program.files).isDirectory() ? "folder" : "file",
config: {
paths: program.files.split(","),
},
},
};
];
}
}
// Asking for data source
if (!program.engine) {
program.dataSources = getPrefOrDefault("dataSources");
if (ciInfo.isCI) {
program.engine = getPrefOrDefault("engine");
} else {
const { dataSource } = await prompts(
{
type: "select",
name: "dataSource",
message: "Which data source would you like to use?",
choices: getDataSourceChoices(program.framework),
initial: 1,
},
handlers,
);
// Initialize with default config
program.dataSource = getPrefOrDefault("dataSource");
if (program.dataSource) {
switch (dataSource) {
case "simple":
program.engine = "simple";
program.dataSource = { type: "none", config: {} };
break;
case "exampleFile":
program.engine = "context";
// Treat example as a folder data source with no config
program.dataSource = { type: "folder", config: {} };
break;
case "localFile":
program.engine = "context";
program.dataSource = {
type: "file",
config: {
path: await selectLocalContextData("file"),
},
};
break;
case "localFolder":
program.engine = "context";
program.dataSource = {
type: "folder",
config: {
path: await selectLocalContextData("folder"),
},
};
break;
case "web":
program.engine = "context";
program.dataSource.type = "web";
break;
}
}
}
} else if (!program.dataSource) {
// Handle a case when engine is specified but dataSource is not
if (program.engine === "context") {
program.dataSource = {
type: "folder",
config: {},
};
} else if (program.engine === "simple") {
program.dataSource = {
type: "none",
config: {},
};
}
}
if (
(program.dataSource?.type === "file" ||
program.dataSource?.type === "folder") &&
program.framework === "fastapi"
) {
if (ciInfo.isCI) {
program.llamaCloudKey = getPrefOrDefault("llamaCloudKey");
} else {
const dataSourceConfig = program.dataSource.config as FileSourceConfig;
dataSourceConfig.useLlamaParse = program.llamaParse;
// Is pdf file selected as data source or is it a folder data source
const askingLlamaParse =
dataSourceConfig.useLlamaParse === undefined &&
(program.dataSource.type === "folder"
? true
: dataSourceConfig.path &&
path.extname(dataSourceConfig.path) === ".pdf");
// Ask if user wants to use LlamaParse
if (askingLlamaParse) {
const { useLlamaParse } = await prompts(
for (let i = 0; i < 2; i++) {
const { selectedSource } = await prompts(
{
type: "toggle",
name: "useLlamaParse",
type: "select",
name: "selectedSource",
message:
"Would you like to use LlamaParse (improved parser for RAG - requires API key)?",
initial: true,
active: "yes",
inactive: "no",
i === 0
? "Which data source would you like to use?"
: "Would you like to add another data source?",
choices: getDataSourceChoices(
program.framework,
program.dataSources,
),
initial: 0,
},
handlers,
);
dataSourceConfig.useLlamaParse = useLlamaParse;
program.dataSource.config = dataSourceConfig;
// Asking for data source config
// Select None data source, No need to config and asking for another data source
if (selectedSource === "none") {
if (selectedSource.length === 0) {
program.dataSources = [
{
type: "none",
config: {},
},
];
}
break;
}
const dataSource = {
type: selectedSource === "exampleFile" ? "folder" : selectedSource,
config: {},
};
// Select local file or folder
if (selectedSource === "file" || selectedSource === "folder") {
const selectedPaths = await selectLocalContextData(selectedSource);
dataSource.config = {
paths: selectedPaths,
};
}
// Selected web data source
else if (selectedSource === "web") {
let { baseUrl } = await prompts(
{
type: "text",
name: "baseUrl",
message: "Please provide base URL of the website:",
initial: "https://www.llamaindex.ai",
},
handlers,
);
try {
if (!baseUrl.includes("://")) {
baseUrl = `https://${baseUrl}`;
}
const checkUrl = new URL(baseUrl);
if (
checkUrl.protocol !== "https:" &&
checkUrl.protocol !== "http:"
) {
throw new Error("Invalid protocol");
}
} catch (error) {
console.log(
red(
"Invalid URL provided! Please provide a valid URL (e.g. https://www.llamaindex.ai)",
),
);
process.exit(1);
}
dataSource.config = {
baseUrl: baseUrl,
depth: 1,
};
}
program.dataSources.push(dataSource);
// No need to ask for another data source if user selected example data
if (selectedSource === "exampleFile") {
break;
}
}
// Ask for LlamaCloud API key
if (
dataSourceConfig.useLlamaParse &&
program.llamaCloudKey === undefined
program.dataSources.length === 0 ||
program.dataSources[0].type === "none"
) {
program.engine = "simple";
} else {
program.engine = "context";
}
}
}
// Asking for LlamaParse
// Is user selected pdf file or is there a folder data source
if (!program.llamaParse && program.engine === "context") {
const askingLlamaParse = program.dataSources.some(
(ds) =>
ds.type === "folder" ||
(ds.type === "file" &&
(ds.config as FileSourceConfig).paths?.some(
(p) => path.extname(p) === ".pdf",
)),
);
if (askingLlamaParse) {
const { useLlamaParse } = await prompts(
{
type: "toggle",
name: "useLlamaParse",
message:
"Would you like to use LlamaParse (improved parser for RAG - requires API key)?",
initial: true,
active: "yes",
inactive: "no",
},
handlers,
);
// Ask for LlamaCloud API key
if (useLlamaParse && program.llamaCloudKey === undefined) {
const { llamaCloudKey } = await prompts(
{
type: "text",
@@ -743,41 +800,15 @@ export const askQuestions = async (
);
program.llamaCloudKey = llamaCloudKey;
}
// TODO: Consider separate llamaParse to another config
program.dataSources.forEach((dataSource) => {
if (dataSource.type === "file" || dataSource.type === "folder") {
(dataSource.config as FileSourceConfig).useLlamaParse = useLlamaParse;
}
});
}
}
if (program.dataSource?.type === "web" && program.framework === "fastapi") {
let { baseUrl } = await prompts(
{
type: "text",
name: "baseUrl",
message: "Please provide base URL of the website:",
initial: "https://www.llamaindex.ai",
},
handlers,
);
try {
if (!baseUrl.includes("://")) {
baseUrl = `https://${baseUrl}`;
}
const checkUrl = new URL(baseUrl);
if (checkUrl.protocol !== "https:" && checkUrl.protocol !== "http:") {
throw new Error("Invalid protocol");
}
} catch (error) {
console.log(
red(
"Invalid URL provided! Please provide a valid URL (e.g. https://www.llamaindex.ai)",
),
);
process.exit(1);
}
program.dataSource.config = {
baseUrl: baseUrl,
depth: 1,
};
}
if (program.engine !== "simple" && !program.vectorDb) {
if (ciInfo.isCI) {
program.vectorDb = getPrefOrDefault("vectorDb");
@@ -0,0 +1,12 @@
import os
import importlib
def get_documents():
# For each file in .loaders, import the module and call the get_documents function
for loader in os.listdir(os.path.join(os.path.dirname(__file__), "loaders")):
if loader.endswith(".py"):
loader = loader[:-3]
module = importlib.import_module(f"app.engine.loaders.{loader}")
documents = module.get_documents()
yield documents
@@ -2,10 +2,10 @@ from dotenv import load_dotenv
load_dotenv()
import os
import logging
from llama_index.core.indices import (
VectorStoreIndex,
)
from llama_index.core.indices import VectorStoreIndex
from llama_index.core.storage import StorageContext
from app.engine.constants import STORAGE_DIR
from app.engine.loader import get_documents
from app.settings import init_settings
@@ -16,15 +16,17 @@ logger = logging.getLogger()
def generate_datasource():
logger.info("Creating new index")
# load the documents and create the index
documents = get_documents()
index = VectorStoreIndex.from_documents(
documents,
)
# store it for later
index.storage_context.persist(STORAGE_DIR)
logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}")
storage_context = StorageContext.from_defaults()
docs = []
for doc in get_documents():
storage_context.docstore.add_documents(doc)
docs.extend(doc)
index = VectorStoreIndex.from_documents(docs, storage_context=storage_context)
index.storage_context.persist(persist_dir=STORAGE_DIR)
logger.info(f"Generated index at {STORAGE_DIR}")
if __name__ == "__main__":