mirror of
https://github.com/run-llama/create-llama.git
synced 2026-07-02 19:14:28 -04:00
Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6111943229 | |||
| 3e5debb407 | |||
| bf6028f271 | |||
| a4d7737274 | |||
| e9f32f27b2 | |||
| ce6e4b717c | |||
| 9bbbd66da1 | |||
| 485452b9aa | |||
| 07af59a08a | |||
| 6c848a20ad | |||
| 617dbca4f9 |
+2
-2
@@ -41,7 +41,7 @@ export async function createApp({
|
||||
vectorDb,
|
||||
externalPort,
|
||||
postInstallAction,
|
||||
dataSource,
|
||||
dataSources,
|
||||
tools,
|
||||
observability,
|
||||
}: InstallAppArgs): Promise<void> {
|
||||
@@ -89,7 +89,7 @@ export async function createApp({
|
||||
vectorDb,
|
||||
externalPort,
|
||||
postInstallAction,
|
||||
dataSource,
|
||||
dataSources,
|
||||
tools,
|
||||
observability,
|
||||
};
|
||||
|
||||
+31
-22
@@ -5,6 +5,7 @@ import {
|
||||
TemplateDataSource,
|
||||
TemplateFramework,
|
||||
TemplateVectorDB,
|
||||
WebSourceConfig,
|
||||
} from "./types";
|
||||
|
||||
type EnvVar = {
|
||||
@@ -99,26 +100,32 @@ const getVectorDBEnvs = (vectorDb: TemplateVectorDB) => {
|
||||
}
|
||||
};
|
||||
|
||||
const getDataSourceEnvs = (dataSource: TemplateDataSource) => {
|
||||
switch (dataSource.type) {
|
||||
case "web":
|
||||
return [
|
||||
{
|
||||
name: "BASE_URL",
|
||||
description: "The base URL to start web scraping.",
|
||||
},
|
||||
{
|
||||
name: "URL_PREFIX",
|
||||
description: "The prefix of the URL to start web scraping.",
|
||||
},
|
||||
{
|
||||
name: "MAX_DEPTH",
|
||||
description: "The maximum depth to scrape.",
|
||||
},
|
||||
];
|
||||
default:
|
||||
return [];
|
||||
const getDataSourceEnvs = (dataSources: TemplateDataSource[]) => {
|
||||
const envs = [];
|
||||
for (const source of dataSources) {
|
||||
switch (source.type) {
|
||||
case "web":
|
||||
const config = source.config as WebSourceConfig;
|
||||
envs.push(
|
||||
{
|
||||
name: "BASE_URL",
|
||||
description: "The base URL to start web scraping.",
|
||||
value: config.baseUrl,
|
||||
},
|
||||
{
|
||||
name: "URL_PREFIX",
|
||||
description: "The prefix of the URL to start web scraping.",
|
||||
value: config.baseUrl,
|
||||
},
|
||||
{
|
||||
name: "MAX_DEPTH",
|
||||
description: "The maximum depth to scrape.",
|
||||
value: config.depth?.toString(),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
return envs;
|
||||
};
|
||||
|
||||
export const createBackendEnvFile = async (
|
||||
@@ -130,7 +137,7 @@ export const createBackendEnvFile = async (
|
||||
model?: string;
|
||||
embeddingModel?: string;
|
||||
framework?: TemplateFramework;
|
||||
dataSource?: TemplateDataSource;
|
||||
dataSources?: TemplateDataSource[];
|
||||
port?: number;
|
||||
},
|
||||
) => {
|
||||
@@ -152,7 +159,7 @@ export const createBackendEnvFile = async (
|
||||
// Add vector database environment variables
|
||||
...(opts.vectorDb ? getVectorDBEnvs(opts.vectorDb) : []),
|
||||
// Add data source environment variables
|
||||
...(opts.dataSource ? getDataSourceEnvs(opts.dataSource) : []),
|
||||
...(opts.dataSources ? getDataSourceEnvs(opts.dataSources) : []),
|
||||
];
|
||||
let envVars: EnvVar[] = [];
|
||||
if (opts.framework === "fastapi") {
|
||||
@@ -204,7 +211,9 @@ We have provided context information below.
|
||||
Given this information, please answer the question: {query_str}
|
||||
"`,
|
||||
},
|
||||
(opts?.dataSource?.config as FileSourceConfig).useLlamaParse
|
||||
opts?.dataSources?.some(
|
||||
(ds) => (ds.config as FileSourceConfig).useLlamaParse,
|
||||
)
|
||||
? {
|
||||
name: "LLAMA_CLOUD_API_KEY",
|
||||
description: `The Llama Cloud API key.`,
|
||||
|
||||
+35
-19
@@ -27,8 +27,8 @@ async function generateContextData(
|
||||
packageManager?: PackageManager,
|
||||
openAiKey?: string,
|
||||
vectorDb?: TemplateVectorDB,
|
||||
dataSource?: TemplateDataSource,
|
||||
llamaCloudKey?: string,
|
||||
useLlamaParse?: boolean,
|
||||
) {
|
||||
if (packageManager) {
|
||||
const runGenerate = `${cyan(
|
||||
@@ -37,8 +37,7 @@ async function generateContextData(
|
||||
: `${packageManager} run generate`,
|
||||
)}`;
|
||||
const openAiKeyConfigured = openAiKey || process.env["OPENAI_API_KEY"];
|
||||
const llamaCloudKeyConfigured = (dataSource?.config as FileSourceConfig)
|
||||
?.useLlamaParse
|
||||
const llamaCloudKeyConfigured = useLlamaParse
|
||||
? llamaCloudKey || process.env["LLAMA_CLOUD_API_KEY"]
|
||||
: true;
|
||||
const hasVectorDb = vectorDb && vectorDb !== "none";
|
||||
@@ -82,18 +81,19 @@ const copyContextData = async (
|
||||
dataSource?: TemplateDataSource,
|
||||
) => {
|
||||
const destPath = path.join(root, "data");
|
||||
|
||||
const dataSourceConfig = dataSource?.config as FileSourceConfig;
|
||||
|
||||
// Copy file
|
||||
if (dataSource?.type === "file") {
|
||||
if (dataSourceConfig.path) {
|
||||
console.log(`\nCopying file to ${cyan(destPath)}\n`);
|
||||
if (dataSourceConfig.paths) {
|
||||
await fs.mkdir(destPath, { recursive: true });
|
||||
await fs.copyFile(
|
||||
dataSourceConfig.path,
|
||||
path.join(destPath, path.basename(dataSourceConfig.path)),
|
||||
console.log(
|
||||
"Copying data from files:",
|
||||
dataSourceConfig.paths.toString(),
|
||||
);
|
||||
for (const p of dataSourceConfig.paths) {
|
||||
await fs.copyFile(p, path.join(destPath, path.basename(p)));
|
||||
}
|
||||
} else {
|
||||
console.log("Missing file path in config");
|
||||
process.exit(1);
|
||||
@@ -103,13 +103,20 @@ const copyContextData = async (
|
||||
|
||||
// Copy folder
|
||||
if (dataSource?.type === "folder") {
|
||||
const srcPath =
|
||||
dataSourceConfig.path ?? path.join(templatesDir, "components", "data");
|
||||
console.log(`\nCopying data to ${cyan(destPath)}\n`);
|
||||
await copy("**", destPath, {
|
||||
parents: true,
|
||||
cwd: srcPath,
|
||||
});
|
||||
// Example data does not have path config, set the default path
|
||||
const srcPaths = dataSourceConfig.paths ?? [
|
||||
path.join(templatesDir, "components", "data"),
|
||||
];
|
||||
console.log("Copying data from folders: ", srcPaths);
|
||||
for (const p of srcPaths) {
|
||||
const folderName = path.basename(p);
|
||||
const destFolderPath = path.join(destPath, folderName);
|
||||
await fs.mkdir(destFolderPath, { recursive: true });
|
||||
await copy("**", destFolderPath, {
|
||||
parents: true,
|
||||
cwd: p,
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
};
|
||||
@@ -160,12 +167,17 @@ export const installTemplate = async (
|
||||
model: props.model,
|
||||
embeddingModel: props.embeddingModel,
|
||||
framework: props.framework,
|
||||
dataSource: props.dataSource,
|
||||
dataSources: props.dataSources,
|
||||
port: props.externalPort,
|
||||
});
|
||||
|
||||
if (props.engine === "context") {
|
||||
await copyContextData(props.root, props.dataSource);
|
||||
console.log("\nGenerating context data...\n");
|
||||
props.dataSources.forEach(async (ds) => {
|
||||
if (ds.type === "file" || ds.type === "folder") {
|
||||
await copyContextData(props.root, ds);
|
||||
}
|
||||
});
|
||||
if (
|
||||
props.postInstallAction === "runApp" ||
|
||||
props.postInstallAction === "dependencies"
|
||||
@@ -175,8 +187,12 @@ export const installTemplate = async (
|
||||
props.packageManager,
|
||||
props.openAiKey,
|
||||
props.vectorDb,
|
||||
props.dataSource,
|
||||
props.llamaCloudKey,
|
||||
props.dataSources.some(
|
||||
(ds) =>
|
||||
(ds.type === "file" || ds.type === "folder") &&
|
||||
(ds.config as FileSourceConfig).useLlamaParse,
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
+45
-18
@@ -175,7 +175,7 @@ export const installPythonTemplate = async ({
|
||||
framework,
|
||||
engine,
|
||||
vectorDb,
|
||||
dataSource,
|
||||
dataSources,
|
||||
tools,
|
||||
postInstallAction,
|
||||
}: Pick<
|
||||
@@ -185,7 +185,7 @@ export const installPythonTemplate = async ({
|
||||
| "template"
|
||||
| "engine"
|
||||
| "vectorDb"
|
||||
| "dataSource"
|
||||
| "dataSources"
|
||||
| "tools"
|
||||
| "postInstallAction"
|
||||
>) => {
|
||||
@@ -250,27 +250,54 @@ export const installPythonTemplate = async ({
|
||||
});
|
||||
}
|
||||
|
||||
const dataSourceType = dataSource?.type;
|
||||
if (dataSourceType !== undefined && dataSourceType !== "none") {
|
||||
let loaderFolder: string;
|
||||
if (dataSourceType === "file" || dataSourceType === "folder") {
|
||||
const dataSourceConfig = dataSource?.config as FileSourceConfig;
|
||||
loaderFolder = dataSourceConfig.useLlamaParse ? "llama_parse" : "file";
|
||||
} else {
|
||||
loaderFolder = dataSourceType;
|
||||
}
|
||||
await copy("**", enginePath, {
|
||||
if (dataSources.length > 0 || dataSources[0].type !== "none") {
|
||||
// Copy loader.py file to enginePath
|
||||
await copy("loader.py", enginePath, {
|
||||
parents: true,
|
||||
cwd: path.join(compPath, "loaders", "python", loaderFolder),
|
||||
cwd: path.join(compPath, "loaders", "python"),
|
||||
});
|
||||
|
||||
// Copy data source loaders
|
||||
const loaderPath = path.join(enginePath, "loaders");
|
||||
for (const source of dataSources) {
|
||||
const sourceType = source.type;
|
||||
if (sourceType === "file" || sourceType === "folder") {
|
||||
const sourceConfig = source.config as FileSourceConfig;
|
||||
const loaderFolder = sourceConfig.useLlamaParse
|
||||
? "llama_parse"
|
||||
: "file";
|
||||
await copy("**", loaderPath, {
|
||||
parents: true,
|
||||
cwd: path.join(compPath, "loaders", "python", loaderFolder),
|
||||
});
|
||||
} else {
|
||||
await copy("**", loaderPath, {
|
||||
parents: true,
|
||||
cwd: path.join(compPath, "loaders", "python", sourceType),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// const dataSourceType = dataSource?.type;
|
||||
// if (dataSourceType !== undefined && dataSourceType !== "none") {
|
||||
// let loaderFolder: string;
|
||||
// if (dataSourceType === "file" || dataSourceType === "folder") {
|
||||
// const dataSourceConfig = dataSource?.config as FileSourceConfig;
|
||||
// loaderFolder = dataSourceConfig.useLlamaParse ? "llama_parse" : "file";
|
||||
// } else {
|
||||
// loaderFolder = dataSourceType;
|
||||
// }
|
||||
// await copy("**", enginePath, {
|
||||
// parents: true,
|
||||
// cwd: path.join(compPath, "loaders", "python", loaderFolder),
|
||||
// });
|
||||
// }
|
||||
}
|
||||
|
||||
const addOnDependencies = getAdditionalDependencies(
|
||||
vectorDb,
|
||||
dataSource,
|
||||
tools,
|
||||
);
|
||||
const addOnDependencies = dataSources
|
||||
.map((ds) => getAdditionalDependencies(vectorDb, ds, tools))
|
||||
.flat();
|
||||
await addDependencies(root, addOnDependencies);
|
||||
|
||||
if (postInstallAction === "runApp" || postInstallAction === "dependencies") {
|
||||
|
||||
+2
-2
@@ -19,7 +19,7 @@ export type TemplateDataSourceType = "none" | "file" | "folder" | "web";
|
||||
export type TemplateObservability = "none" | "opentelemetry";
|
||||
// Config for both file and folder
|
||||
export type FileSourceConfig = {
|
||||
path?: string;
|
||||
paths?: string[];
|
||||
useLlamaParse?: boolean;
|
||||
};
|
||||
export type WebSourceConfig = {
|
||||
@@ -44,7 +44,7 @@ export interface InstallTemplateArgs {
|
||||
framework: TemplateFramework;
|
||||
engine: TemplateEngine;
|
||||
ui: TemplateUI;
|
||||
dataSource?: TemplateDataSource;
|
||||
dataSources: TemplateDataSource[];
|
||||
eslint: boolean;
|
||||
customApiPath?: string;
|
||||
openAiKey?: string;
|
||||
|
||||
@@ -303,7 +303,7 @@ async function run(): Promise<void> {
|
||||
vectorDb: program.vectorDb,
|
||||
externalPort: program.externalPort,
|
||||
postInstallAction: program.postInstallAction,
|
||||
dataSource: program.dataSource,
|
||||
dataSources: program.dataSources,
|
||||
tools: program.tools,
|
||||
observability: program.observability,
|
||||
});
|
||||
|
||||
+199
-168
@@ -9,6 +9,7 @@ import prompts from "prompts";
|
||||
import { InstallAppArgs } from "./create-app";
|
||||
import {
|
||||
FileSourceConfig,
|
||||
TemplateDataSource,
|
||||
TemplateDataSourceType,
|
||||
TemplateFramework,
|
||||
} from "./helpers";
|
||||
@@ -40,31 +41,34 @@ const MACOS_FILE_SELECTION_SCRIPT = `
|
||||
osascript -l JavaScript -e '
|
||||
a = Application.currentApplication();
|
||||
a.includeStandardAdditions = true;
|
||||
a.chooseFile({ withPrompt: "Please select a file to process:" }).toString()
|
||||
a.chooseFile({ withPrompt: "Please select files to process:", multipleSelectionsAllowed: true }).map(file => file.toString())
|
||||
'`;
|
||||
const MACOS_FOLDER_SELECTION_SCRIPT = `
|
||||
osascript -l JavaScript -e '
|
||||
a = Application.currentApplication();
|
||||
a.includeStandardAdditions = true;
|
||||
a.chooseFolder({ withPrompt: "Please select a folder to process:" }).toString()
|
||||
a.chooseFolder({ withPrompt: "Please select folders to process:", multipleSelectionsAllowed: true }).map(folder => folder.toString())
|
||||
'`;
|
||||
const WINDOWS_FILE_SELECTION_SCRIPT = `
|
||||
Add-Type -AssemblyName System.Windows.Forms
|
||||
$openFileDialog = New-Object System.Windows.Forms.OpenFileDialog
|
||||
$openFileDialog.InitialDirectory = [Environment]::GetFolderPath('Desktop')
|
||||
$openFileDialog.Multiselect = $true
|
||||
$result = $openFileDialog.ShowDialog()
|
||||
if ($result -eq 'OK') {
|
||||
$openFileDialog.FileName
|
||||
$openFileDialog.FileNames
|
||||
}
|
||||
`;
|
||||
const WINDOWS_FOLDER_SELECTION_SCRIPT = `
|
||||
Add-Type -AssemblyName System.windows.forms
|
||||
$folderBrowser = New-Object System.Windows.Forms.FolderBrowserDialog
|
||||
$dialogResult = $folderBrowser.ShowDialog()
|
||||
if ($dialogResult -eq [System.Windows.Forms.DialogResult]::OK)
|
||||
{
|
||||
$folderBrowser.SelectedPath
|
||||
}
|
||||
$folderBrowser.SelectedPath = [Environment]::GetFolderPath('Desktop')
|
||||
$folderBrowser.Description = "Please select folders to process:"
|
||||
$folderBrowser.ShowNewFolderButton = $true
|
||||
$folderBrowser.RootFolder = [System.Environment+SpecialFolder]::Desktop
|
||||
$folderBrowser.SelectedPath = [System.IO.Path]::GetFullPath($folderBrowser.SelectedPath)
|
||||
$folderBrowser.ShowDialog() | Out-Null
|
||||
$folderBrowser.SelectedPath, $folderBrowser.SelectedPaths
|
||||
`;
|
||||
|
||||
const defaults: QuestionArgs = {
|
||||
@@ -81,10 +85,7 @@ const defaults: QuestionArgs = {
|
||||
communityProjectConfig: undefined,
|
||||
llamapack: "",
|
||||
postInstallAction: "dependencies",
|
||||
dataSource: {
|
||||
type: "none",
|
||||
config: {},
|
||||
},
|
||||
dataSources: [],
|
||||
tools: [],
|
||||
};
|
||||
|
||||
@@ -122,30 +123,53 @@ const getVectorDbChoices = (framework: TemplateFramework) => {
|
||||
return displayedChoices;
|
||||
};
|
||||
|
||||
const getDataSourceChoices = (framework: TemplateFramework) => {
|
||||
const choices = [
|
||||
{
|
||||
title: "No data, just a simple chat",
|
||||
value: "simple",
|
||||
},
|
||||
{ title: "Use an example PDF", value: "exampleFile" },
|
||||
];
|
||||
if (process.platform === "win32" || process.platform === "darwin") {
|
||||
export const getDataSourceChoices = (
|
||||
framework: TemplateFramework,
|
||||
selectedDataSource: TemplateDataSource[],
|
||||
) => {
|
||||
const choices = [];
|
||||
if (selectedDataSource.length > 0) {
|
||||
choices.push({
|
||||
title: `Use a local file (${supportedContextFileTypes.join(", ")})`,
|
||||
value: "localFile",
|
||||
});
|
||||
choices.push({
|
||||
title: `Use a local folder`,
|
||||
value: "localFolder",
|
||||
title: "No",
|
||||
value: "none",
|
||||
});
|
||||
}
|
||||
if (framework === "fastapi") {
|
||||
if (selectedDataSource === undefined || selectedDataSource.length === 0) {
|
||||
choices.push({
|
||||
title: "No data, just a simple chat",
|
||||
value: "none",
|
||||
});
|
||||
choices.push({
|
||||
title: "Use an example PDF",
|
||||
value: "exampleFile",
|
||||
});
|
||||
}
|
||||
|
||||
if (!selectedDataSource.some((ds) => ds.type === "file")) {
|
||||
choices.push({
|
||||
title: `Use local files (${supportedContextFileTypes.join(", ")})`,
|
||||
value: "file",
|
||||
});
|
||||
}
|
||||
|
||||
if (!selectedDataSource.some((ds) => ds.type === "folder")) {
|
||||
choices.push({
|
||||
title: "Use local folder",
|
||||
value: "folder",
|
||||
});
|
||||
}
|
||||
|
||||
if (
|
||||
!selectedDataSource.some((ds) => ds.type === "web") &&
|
||||
(process.platform === "win32" || process.platform === "darwin") &&
|
||||
framework === "fastapi"
|
||||
) {
|
||||
choices.push({
|
||||
title: "Use website content (requires Chrome)",
|
||||
value: "web",
|
||||
});
|
||||
}
|
||||
|
||||
return choices;
|
||||
};
|
||||
|
||||
@@ -173,9 +197,15 @@ const selectLocalContextData = async (type: TemplateDataSourceType) => {
|
||||
process.exit(1);
|
||||
}
|
||||
selectedPath = execSync(execScript, execOpts).toString().trim();
|
||||
if (type === "file") {
|
||||
const fileType = path.extname(selectedPath);
|
||||
if (!supportedContextFileTypes.includes(fileType)) {
|
||||
const paths =
|
||||
process.platform === "win32"
|
||||
? selectedPath.split("\r\n")
|
||||
: selectedPath.split(", ");
|
||||
for (const p of paths) {
|
||||
if (
|
||||
type == "file" &&
|
||||
!supportedContextFileTypes.includes(path.extname(p))
|
||||
) {
|
||||
console.log(
|
||||
red(
|
||||
`Please select a supported file type: ${supportedContextFileTypes}`,
|
||||
@@ -184,7 +214,7 @@ const selectLocalContextData = async (type: TemplateDataSourceType) => {
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
return selectedPath;
|
||||
return paths;
|
||||
} catch (error) {
|
||||
console.log(
|
||||
red(
|
||||
@@ -309,9 +339,11 @@ export const askQuestions = async (
|
||||
const openAiKeyConfigured =
|
||||
program.openAiKey || process.env["OPENAI_API_KEY"];
|
||||
// If using LlamaParse, require LlamaCloud API key
|
||||
const llamaCloudKeyConfigured = (
|
||||
program.dataSource?.config as FileSourceConfig
|
||||
)?.useLlamaParse
|
||||
const useLlamaParse = program.dataSources.some(
|
||||
(ds) =>
|
||||
ds.type === "file" && (ds.config as FileSourceConfig).useLlamaParse,
|
||||
);
|
||||
const llamaCloudKeyConfigured = useLlamaParse
|
||||
? program.llamaCloudKey || process.env["LLAMA_CLOUD_API_KEY"]
|
||||
: true;
|
||||
const hasVectorDb = program.vectorDb && program.vectorDb !== "none";
|
||||
@@ -614,124 +646,149 @@ export const askQuestions = async (
|
||||
console.log("File or folder not found");
|
||||
process.exit(1);
|
||||
} else {
|
||||
program.dataSource = {
|
||||
type: fs.lstatSync(program.files).isDirectory() ? "folder" : "file",
|
||||
config: {
|
||||
path: program.files,
|
||||
program.dataSources = [
|
||||
{
|
||||
type: fs.lstatSync(program.files).isDirectory() ? "folder" : "file",
|
||||
config: {
|
||||
paths: program.files.split(","),
|
||||
},
|
||||
},
|
||||
};
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
// Asking for data source
|
||||
if (!program.engine) {
|
||||
program.dataSources = getPrefOrDefault("dataSources");
|
||||
if (ciInfo.isCI) {
|
||||
program.engine = getPrefOrDefault("engine");
|
||||
} else {
|
||||
const { dataSource } = await prompts(
|
||||
{
|
||||
type: "select",
|
||||
name: "dataSource",
|
||||
message: "Which data source would you like to use?",
|
||||
choices: getDataSourceChoices(program.framework),
|
||||
initial: 1,
|
||||
},
|
||||
handlers,
|
||||
);
|
||||
// Initialize with default config
|
||||
program.dataSource = getPrefOrDefault("dataSource");
|
||||
if (program.dataSource) {
|
||||
switch (dataSource) {
|
||||
case "simple":
|
||||
program.engine = "simple";
|
||||
program.dataSource = { type: "none", config: {} };
|
||||
break;
|
||||
case "exampleFile":
|
||||
program.engine = "context";
|
||||
// Treat example as a folder data source with no config
|
||||
program.dataSource = { type: "folder", config: {} };
|
||||
break;
|
||||
case "localFile":
|
||||
program.engine = "context";
|
||||
program.dataSource = {
|
||||
type: "file",
|
||||
config: {
|
||||
path: await selectLocalContextData("file"),
|
||||
},
|
||||
};
|
||||
break;
|
||||
case "localFolder":
|
||||
program.engine = "context";
|
||||
program.dataSource = {
|
||||
type: "folder",
|
||||
config: {
|
||||
path: await selectLocalContextData("folder"),
|
||||
},
|
||||
};
|
||||
break;
|
||||
case "web":
|
||||
program.engine = "context";
|
||||
program.dataSource.type = "web";
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (!program.dataSource) {
|
||||
// Handle a case when engine is specified but dataSource is not
|
||||
if (program.engine === "context") {
|
||||
program.dataSource = {
|
||||
type: "folder",
|
||||
config: {},
|
||||
};
|
||||
} else if (program.engine === "simple") {
|
||||
program.dataSource = {
|
||||
type: "none",
|
||||
config: {},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
(program.dataSource?.type === "file" ||
|
||||
program.dataSource?.type === "folder") &&
|
||||
program.framework === "fastapi"
|
||||
) {
|
||||
if (ciInfo.isCI) {
|
||||
program.llamaCloudKey = getPrefOrDefault("llamaCloudKey");
|
||||
} else {
|
||||
const dataSourceConfig = program.dataSource.config as FileSourceConfig;
|
||||
dataSourceConfig.useLlamaParse = program.llamaParse;
|
||||
|
||||
// Is pdf file selected as data source or is it a folder data source
|
||||
const askingLlamaParse =
|
||||
dataSourceConfig.useLlamaParse === undefined &&
|
||||
(program.dataSource.type === "folder"
|
||||
? true
|
||||
: dataSourceConfig.path &&
|
||||
path.extname(dataSourceConfig.path) === ".pdf");
|
||||
|
||||
// Ask if user wants to use LlamaParse
|
||||
if (askingLlamaParse) {
|
||||
const { useLlamaParse } = await prompts(
|
||||
for (let i = 0; i < 2; i++) {
|
||||
const { selectedSource } = await prompts(
|
||||
{
|
||||
type: "toggle",
|
||||
name: "useLlamaParse",
|
||||
type: "select",
|
||||
name: "selectedSource",
|
||||
message:
|
||||
"Would you like to use LlamaParse (improved parser for RAG - requires API key)?",
|
||||
initial: true,
|
||||
active: "yes",
|
||||
inactive: "no",
|
||||
i === 0
|
||||
? "Which data source would you like to use?"
|
||||
: "Would you like to add another data source?",
|
||||
choices: getDataSourceChoices(
|
||||
program.framework,
|
||||
program.dataSources,
|
||||
),
|
||||
initial: 0,
|
||||
},
|
||||
handlers,
|
||||
);
|
||||
dataSourceConfig.useLlamaParse = useLlamaParse;
|
||||
program.dataSource.config = dataSourceConfig;
|
||||
|
||||
// Asking for data source config
|
||||
// Select None data source, No need to config and asking for another data source
|
||||
if (selectedSource === "none") {
|
||||
if (selectedSource.length === 0) {
|
||||
program.dataSources = [
|
||||
{
|
||||
type: "none",
|
||||
config: {},
|
||||
},
|
||||
];
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
const dataSource = {
|
||||
type: selectedSource === "exampleFile" ? "folder" : selectedSource,
|
||||
config: {},
|
||||
};
|
||||
|
||||
// Select local file or folder
|
||||
if (selectedSource === "file" || selectedSource === "folder") {
|
||||
const selectedPaths = await selectLocalContextData(selectedSource);
|
||||
dataSource.config = {
|
||||
paths: selectedPaths,
|
||||
};
|
||||
}
|
||||
|
||||
// Selected web data source
|
||||
else if (selectedSource === "web") {
|
||||
let { baseUrl } = await prompts(
|
||||
{
|
||||
type: "text",
|
||||
name: "baseUrl",
|
||||
message: "Please provide base URL of the website:",
|
||||
initial: "https://www.llamaindex.ai",
|
||||
},
|
||||
handlers,
|
||||
);
|
||||
try {
|
||||
if (!baseUrl.includes("://")) {
|
||||
baseUrl = `https://${baseUrl}`;
|
||||
}
|
||||
const checkUrl = new URL(baseUrl);
|
||||
if (
|
||||
checkUrl.protocol !== "https:" &&
|
||||
checkUrl.protocol !== "http:"
|
||||
) {
|
||||
throw new Error("Invalid protocol");
|
||||
}
|
||||
} catch (error) {
|
||||
console.log(
|
||||
red(
|
||||
"Invalid URL provided! Please provide a valid URL (e.g. https://www.llamaindex.ai)",
|
||||
),
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
dataSource.config = {
|
||||
baseUrl: baseUrl,
|
||||
depth: 1,
|
||||
};
|
||||
}
|
||||
program.dataSources.push(dataSource);
|
||||
|
||||
// No need to ask for another data source if user selected example data
|
||||
if (selectedSource === "exampleFile") {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Ask for LlamaCloud API key
|
||||
if (
|
||||
dataSourceConfig.useLlamaParse &&
|
||||
program.llamaCloudKey === undefined
|
||||
program.dataSources.length === 0 ||
|
||||
program.dataSources[0].type === "none"
|
||||
) {
|
||||
program.engine = "simple";
|
||||
} else {
|
||||
program.engine = "context";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Asking for LlamaParse
|
||||
// Is user selected pdf file or is there a folder data source
|
||||
if (!program.llamaParse && program.engine === "context") {
|
||||
const askingLlamaParse = program.dataSources.some(
|
||||
(ds) =>
|
||||
ds.type === "folder" ||
|
||||
(ds.type === "file" &&
|
||||
(ds.config as FileSourceConfig).paths?.some(
|
||||
(p) => path.extname(p) === ".pdf",
|
||||
)),
|
||||
);
|
||||
if (askingLlamaParse) {
|
||||
const { useLlamaParse } = await prompts(
|
||||
{
|
||||
type: "toggle",
|
||||
name: "useLlamaParse",
|
||||
message:
|
||||
"Would you like to use LlamaParse (improved parser for RAG - requires API key)?",
|
||||
initial: true,
|
||||
active: "yes",
|
||||
inactive: "no",
|
||||
},
|
||||
handlers,
|
||||
);
|
||||
// Ask for LlamaCloud API key
|
||||
if (useLlamaParse && program.llamaCloudKey === undefined) {
|
||||
const { llamaCloudKey } = await prompts(
|
||||
{
|
||||
type: "text",
|
||||
@@ -743,41 +800,15 @@ export const askQuestions = async (
|
||||
);
|
||||
program.llamaCloudKey = llamaCloudKey;
|
||||
}
|
||||
// TODO: Consider separate llamaParse to another config
|
||||
program.dataSources.forEach((dataSource) => {
|
||||
if (dataSource.type === "file" || dataSource.type === "folder") {
|
||||
(dataSource.config as FileSourceConfig).useLlamaParse = useLlamaParse;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (program.dataSource?.type === "web" && program.framework === "fastapi") {
|
||||
let { baseUrl } = await prompts(
|
||||
{
|
||||
type: "text",
|
||||
name: "baseUrl",
|
||||
message: "Please provide base URL of the website:",
|
||||
initial: "https://www.llamaindex.ai",
|
||||
},
|
||||
handlers,
|
||||
);
|
||||
try {
|
||||
if (!baseUrl.includes("://")) {
|
||||
baseUrl = `https://${baseUrl}`;
|
||||
}
|
||||
const checkUrl = new URL(baseUrl);
|
||||
if (checkUrl.protocol !== "https:" && checkUrl.protocol !== "http:") {
|
||||
throw new Error("Invalid protocol");
|
||||
}
|
||||
} catch (error) {
|
||||
console.log(
|
||||
red(
|
||||
"Invalid URL provided! Please provide a valid URL (e.g. https://www.llamaindex.ai)",
|
||||
),
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
program.dataSource.config = {
|
||||
baseUrl: baseUrl,
|
||||
depth: 1,
|
||||
};
|
||||
}
|
||||
|
||||
if (program.engine !== "simple" && !program.vectorDb) {
|
||||
if (ciInfo.isCI) {
|
||||
program.vectorDb = getPrefOrDefault("vectorDb");
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
import os
|
||||
import importlib
|
||||
|
||||
|
||||
def get_documents():
|
||||
# For each file in .loaders, import the module and call the get_documents function
|
||||
for loader in os.listdir(os.path.join(os.path.dirname(__file__), "loaders")):
|
||||
if loader.endswith(".py"):
|
||||
loader = loader[:-3]
|
||||
module = importlib.import_module(f"app.engine.loaders.{loader}")
|
||||
documents = module.get_documents()
|
||||
yield documents
|
||||
@@ -2,10 +2,10 @@ from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
import os
|
||||
import logging
|
||||
from llama_index.core.indices import (
|
||||
VectorStoreIndex,
|
||||
)
|
||||
from llama_index.core.indices import VectorStoreIndex
|
||||
from llama_index.core.storage import StorageContext
|
||||
from app.engine.constants import STORAGE_DIR
|
||||
from app.engine.loader import get_documents
|
||||
from app.settings import init_settings
|
||||
@@ -16,15 +16,17 @@ logger = logging.getLogger()
|
||||
|
||||
|
||||
def generate_datasource():
|
||||
logger.info("Creating new index")
|
||||
# load the documents and create the index
|
||||
documents = get_documents()
|
||||
index = VectorStoreIndex.from_documents(
|
||||
documents,
|
||||
)
|
||||
# store it for later
|
||||
index.storage_context.persist(STORAGE_DIR)
|
||||
logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}")
|
||||
storage_context = StorageContext.from_defaults()
|
||||
|
||||
docs = []
|
||||
|
||||
for doc in get_documents():
|
||||
storage_context.docstore.add_documents(doc)
|
||||
docs.extend(doc)
|
||||
|
||||
index = VectorStoreIndex.from_documents(docs, storage_context=storage_context)
|
||||
index.storage_context.persist(persist_dir=STORAGE_DIR)
|
||||
logger.info(f"Generated index at {STORAGE_DIR}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user