mirror of
https://github.com/run-llama/create-llama.git
synced 2026-07-01 21:04:08 -04:00
chore: replace Python examples with llama-deploy (#701)
This commit is contained in:
@@ -0,0 +1,5 @@
|
||||
---
|
||||
"create-llama": patch
|
||||
---
|
||||
|
||||
chore: replace Python examples with llama-deploy
|
||||
@@ -63,15 +63,6 @@ jobs:
|
||||
run: pnpm run pack-install
|
||||
working-directory: packages/create-llama
|
||||
|
||||
- name: Build and store server package
|
||||
run: |
|
||||
pnpm run build
|
||||
wheel_file=$(ls dist/*.whl | head -n 1)
|
||||
mkdir -p "${{ runner.temp }}"
|
||||
cp "$wheel_file" "${{ runner.temp }}/"
|
||||
echo "SERVER_PACKAGE_PATH=${{ runner.temp }}/$(basename "$wheel_file")" >> $GITHUB_ENV
|
||||
working-directory: python/llama-index-server
|
||||
|
||||
- name: Run Playwright tests for Python
|
||||
run: pnpm run e2e:python
|
||||
env:
|
||||
|
||||
@@ -3,11 +3,8 @@ import { exec } from "child_process";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import util from "util";
|
||||
import {
|
||||
ALL_USE_CASES,
|
||||
TemplateFramework,
|
||||
TemplateVectorDB,
|
||||
} from "../../helpers/types";
|
||||
import { TemplateFramework, TemplateUseCase, TemplateVectorDB } from "../../helpers";
|
||||
import { ALL_PYTHON_USE_CASES } from "../../helpers/use-case";
|
||||
import { RunCreateLlamaOptions, createTestDir, runCreateLlama } from "../utils";
|
||||
|
||||
const execAsync = util.promisify(exec);
|
||||
@@ -17,11 +14,15 @@ const vectorDb: TemplateVectorDB = process.env.VECTORDB
|
||||
? (process.env.VECTORDB as TemplateVectorDB)
|
||||
: "none";
|
||||
|
||||
const useCases: TemplateUseCase[] = vectorDb === "llamacloud" ? [
|
||||
"agentic_rag", "deep_research", "financial_report"
|
||||
] : ALL_PYTHON_USE_CASES
|
||||
|
||||
test.describe("Mypy check", () => {
|
||||
test.describe.configure({ retries: 0 });
|
||||
|
||||
test.describe("LlamaIndexServer", async () => {
|
||||
for (const useCase of ALL_USE_CASES) {
|
||||
for (const useCase of useCases) {
|
||||
test(`should pass mypy for use case: ${useCase}`, async () => {
|
||||
const cwd = await createTestDir();
|
||||
await createAndCheckLlamaProject({
|
||||
|
||||
@@ -2,11 +2,11 @@ import { expect, test } from "@playwright/test";
|
||||
import { ChildProcess } from "child_process";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import { type TemplateFramework, type TemplateVectorDB } from "../../helpers";
|
||||
import {
|
||||
ALL_USE_CASES,
|
||||
type TemplateFramework,
|
||||
type TemplateVectorDB,
|
||||
} from "../../helpers";
|
||||
ALL_PYTHON_USE_CASES,
|
||||
ALL_TYPESCRIPT_USE_CASES,
|
||||
} from "../../helpers/use-case";
|
||||
import { createTestDir, runCreateLlama } from "../utils";
|
||||
|
||||
const templateFramework: TemplateFramework = process.env.FRAMEWORK
|
||||
@@ -17,10 +17,15 @@ const vectorDb: TemplateVectorDB = process.env.VECTORDB
|
||||
: "none";
|
||||
const llamaCloudProjectName = "create-llama";
|
||||
const llamaCloudIndexName = "e2e-test";
|
||||
const allUseCases =
|
||||
templateFramework === "nextjs"
|
||||
? ALL_TYPESCRIPT_USE_CASES
|
||||
: ALL_PYTHON_USE_CASES;
|
||||
const isPythonLlamaDeploy = templateFramework === "fastapi";
|
||||
|
||||
const userMessage = "Write a blog post about physical standards for letters";
|
||||
|
||||
for (const useCase of ALL_USE_CASES) {
|
||||
for (const useCase of allUseCases) {
|
||||
test.describe(`Test use case ${useCase} ${templateFramework} ${vectorDb}`, async () => {
|
||||
let port: number;
|
||||
let cwd: string;
|
||||
@@ -35,7 +40,7 @@ for (const useCase of ALL_USE_CASES) {
|
||||
templateFramework,
|
||||
vectorDb,
|
||||
port,
|
||||
postInstallAction: "runApp",
|
||||
postInstallAction: isPythonLlamaDeploy ? "dependencies" : "runApp",
|
||||
useCase,
|
||||
llamaCloudProjectName,
|
||||
llamaCloudIndexName,
|
||||
@@ -50,6 +55,11 @@ for (const useCase of ALL_USE_CASES) {
|
||||
});
|
||||
|
||||
test("Frontend should have a title", async ({ page }) => {
|
||||
test.skip(
|
||||
isPythonLlamaDeploy,
|
||||
"Skip frontend tests for Python LllamaDeploy",
|
||||
);
|
||||
|
||||
await page.goto(`http://localhost:${port}`);
|
||||
await expect(page.getByText("Built by LlamaIndex")).toBeVisible({
|
||||
timeout: 5 * 60 * 1000,
|
||||
@@ -60,8 +70,10 @@ for (const useCase of ALL_USE_CASES) {
|
||||
page,
|
||||
}) => {
|
||||
test.skip(
|
||||
useCase === "financial_report" || useCase === "deep_research",
|
||||
"Skip chat tests for financial report and deep research.",
|
||||
useCase === "financial_report" ||
|
||||
useCase === "deep_research" ||
|
||||
isPythonLlamaDeploy,
|
||||
"Skip chat tests for financial report and deep research. Also skip for Python LlamaDeploy",
|
||||
);
|
||||
await page.goto(`http://localhost:${port}`);
|
||||
await page.fill("form textarea", userMessage);
|
||||
|
||||
@@ -4,11 +4,11 @@ import fs from "fs";
|
||||
import path from "path";
|
||||
import util from "util";
|
||||
import {
|
||||
ALL_USE_CASES,
|
||||
TemplateFramework,
|
||||
TemplateUseCase,
|
||||
TemplateVectorDB,
|
||||
} from "../../helpers/types";
|
||||
import { ALL_TYPESCRIPT_USE_CASES } from "../../helpers/use-case";
|
||||
import { createTestDir, runCreateLlama } from "../utils";
|
||||
|
||||
const execAsync = util.promisify(exec);
|
||||
@@ -21,7 +21,7 @@ const vectorDb: TemplateVectorDB = process.env.VECTORDB
|
||||
test.describe("Test resolve TS dependencies", () => {
|
||||
test.describe.configure({ retries: 0 });
|
||||
|
||||
for (const useCase of ALL_USE_CASES) {
|
||||
for (const useCase of ALL_TYPESCRIPT_USE_CASES) {
|
||||
const optionDescription = `useCase: ${useCase}, vectorDb: ${vectorDb}`;
|
||||
test.describe(`${optionDescription}`, () => {
|
||||
test(`${optionDescription}`, async () => {
|
||||
|
||||
@@ -1,20 +1,17 @@
|
||||
import fs from "fs/promises";
|
||||
import path from "path";
|
||||
import {
|
||||
EnvVar,
|
||||
InstallTemplateArgs,
|
||||
ModelConfig,
|
||||
TemplateFramework,
|
||||
TemplateType,
|
||||
TemplateUseCase,
|
||||
TemplateVectorDB,
|
||||
} from "./types";
|
||||
|
||||
import { TSYSTEMS_LLMHUB_API_URL } from "./providers/llmhub";
|
||||
|
||||
export type EnvVar = {
|
||||
name?: string;
|
||||
description?: string;
|
||||
value?: string;
|
||||
};
|
||||
import { USE_CASE_CONFIGS } from "./use-case";
|
||||
|
||||
const renderEnvVar = (envVars: EnvVar[]): string => {
|
||||
return envVars.reduce(
|
||||
@@ -228,7 +225,15 @@ Otherwise, use CHROMA_HOST and CHROMA_PORT config above`,
|
||||
}
|
||||
};
|
||||
|
||||
const getModelEnvs = (modelConfig: ModelConfig): EnvVar[] => {
|
||||
const getModelEnvs = (
|
||||
modelConfig: ModelConfig,
|
||||
framework: TemplateFramework,
|
||||
template: TemplateType,
|
||||
useCase: TemplateUseCase,
|
||||
): EnvVar[] => {
|
||||
const isPythonLlamaDeploy =
|
||||
framework === "fastapi" && template === "llamaindexserver";
|
||||
|
||||
return [
|
||||
{
|
||||
name: "MODEL",
|
||||
@@ -240,10 +245,25 @@ const getModelEnvs = (modelConfig: ModelConfig): EnvVar[] => {
|
||||
description: "Name of the embedding model to use.",
|
||||
value: modelConfig.embeddingModel,
|
||||
},
|
||||
{
|
||||
name: "CONVERSATION_STARTERS",
|
||||
description: "The questions to help users get started (multi-line).",
|
||||
},
|
||||
...(isPythonLlamaDeploy
|
||||
? [
|
||||
{
|
||||
name: "NEXT_PUBLIC_STARTER_QUESTIONS",
|
||||
description:
|
||||
"Initial questions to display in the chat (`starterQuestions`)",
|
||||
value: JSON.stringify(
|
||||
USE_CASE_CONFIGS[useCase]?.starterQuestions ?? [],
|
||||
),
|
||||
},
|
||||
]
|
||||
: [
|
||||
{
|
||||
name: "CONVERSATION_STARTERS",
|
||||
description:
|
||||
"The questions to help users get started (multi-line).",
|
||||
},
|
||||
]),
|
||||
...(USE_CASE_CONFIGS[useCase]?.additionalEnvVars ?? []),
|
||||
...(modelConfig.provider === "openai"
|
||||
? [
|
||||
{
|
||||
@@ -251,14 +271,18 @@ const getModelEnvs = (modelConfig: ModelConfig): EnvVar[] => {
|
||||
description: "The OpenAI API key to use.",
|
||||
value: modelConfig.apiKey,
|
||||
},
|
||||
{
|
||||
name: "LLM_TEMPERATURE",
|
||||
description: "Temperature for sampling from the model.",
|
||||
},
|
||||
{
|
||||
name: "LLM_MAX_TOKENS",
|
||||
description: "Maximum number of tokens to generate.",
|
||||
},
|
||||
...(isPythonLlamaDeploy
|
||||
? []
|
||||
: [
|
||||
{
|
||||
name: "LLM_TEMPERATURE",
|
||||
description: "Temperature for sampling from the model.",
|
||||
},
|
||||
{
|
||||
name: "LLM_MAX_TOKENS",
|
||||
description: "Maximum number of tokens to generate.",
|
||||
},
|
||||
]),
|
||||
]
|
||||
: []),
|
||||
...(modelConfig.provider === "anthropic"
|
||||
@@ -367,11 +391,12 @@ const getModelEnvs = (modelConfig: ModelConfig): EnvVar[] => {
|
||||
|
||||
const getFrameworkEnvs = (
|
||||
framework: TemplateFramework,
|
||||
template?: TemplateType,
|
||||
port?: number,
|
||||
): EnvVar[] => {
|
||||
const sPort = port?.toString() || "8000";
|
||||
const result: EnvVar[] = [];
|
||||
if (framework === "fastapi") {
|
||||
if (framework === "fastapi" && template !== "llamaindexserver") {
|
||||
result.push(
|
||||
...[
|
||||
{
|
||||
@@ -403,6 +428,7 @@ export const createBackendEnvFile = async (
|
||||
| "template"
|
||||
| "port"
|
||||
| "useLlamaParse"
|
||||
| "useCase"
|
||||
>,
|
||||
) => {
|
||||
// Init env values
|
||||
@@ -418,11 +444,27 @@ export const createBackendEnvFile = async (
|
||||
]
|
||||
: []),
|
||||
...getVectorDBEnvs(opts.vectorDb, opts.framework, opts.template),
|
||||
...getFrameworkEnvs(opts.framework, opts.port),
|
||||
...getModelEnvs(opts.modelConfig),
|
||||
...getFrameworkEnvs(opts.framework, opts.template, opts.port),
|
||||
...getModelEnvs(
|
||||
opts.modelConfig,
|
||||
opts.framework,
|
||||
opts.template,
|
||||
opts.useCase,
|
||||
),
|
||||
];
|
||||
// Render and write env file
|
||||
const content = renderEnvVar(envVars);
|
||||
await fs.writeFile(path.join(root, envFileName), content);
|
||||
|
||||
const isPythonLlamaDeploy =
|
||||
opts.framework === "fastapi" && opts.template === "llamaindexserver";
|
||||
|
||||
// each llama-deploy service will need a .env inside its directory
|
||||
// this .env will be copied along with workflow code when service is deployed
|
||||
// so that we need to put the .env file inside src/ instead of root
|
||||
const envPath = isPythonLlamaDeploy
|
||||
? path.join(root, "src", envFileName)
|
||||
: path.join(root, envFileName);
|
||||
|
||||
await fs.writeFile(envPath, content);
|
||||
console.log(`Created '${envFileName}' file. Please check the settings.`);
|
||||
};
|
||||
|
||||
@@ -117,8 +117,13 @@ const downloadFile = async (url: string, destPath: string) => {
|
||||
const prepareContextData = async (
|
||||
root: string,
|
||||
dataSources: TemplateDataSource[],
|
||||
isPythonLlamaDeploy: boolean,
|
||||
) => {
|
||||
await makeDir(path.join(root, "data"));
|
||||
const dataDir = isPythonLlamaDeploy
|
||||
? path.join(root, "ui", "data")
|
||||
: path.join(root, "data");
|
||||
|
||||
await makeDir(dataDir);
|
||||
for (const dataSource of dataSources) {
|
||||
const dataSourceConfig = dataSource?.config as FileSourceConfig;
|
||||
// If the path is URLs, download the data and save it to the data directory
|
||||
@@ -128,8 +133,7 @@ const prepareContextData = async (
|
||||
dataSourceConfig.url.toString(),
|
||||
);
|
||||
const destPath = path.join(
|
||||
root,
|
||||
"data",
|
||||
dataDir,
|
||||
dataSourceConfig.filename ??
|
||||
path.basename(dataSourceConfig.url.toString()),
|
||||
);
|
||||
@@ -137,11 +141,7 @@ const prepareContextData = async (
|
||||
} else {
|
||||
// Copy local data
|
||||
console.log("Copying data from path:", dataSourceConfig.path);
|
||||
const destPath = path.join(
|
||||
root,
|
||||
"data",
|
||||
path.basename(dataSourceConfig.path),
|
||||
);
|
||||
const destPath = path.join(dataDir, path.basename(dataSourceConfig.path));
|
||||
await fsExtra.copy(dataSourceConfig.path, destPath);
|
||||
}
|
||||
}
|
||||
@@ -156,6 +156,9 @@ export const installTemplate = async (props: InstallTemplateArgs) => {
|
||||
await installTSTemplate(props);
|
||||
}
|
||||
|
||||
const isPythonLlamaDeploy =
|
||||
props.framework === "fastapi" && props.template === "llamaindexserver";
|
||||
|
||||
// This is a backend, so we need to copy the test data and create the env file.
|
||||
|
||||
// Copy the environment file to the target directory.
|
||||
@@ -164,6 +167,7 @@ export const installTemplate = async (props: InstallTemplateArgs) => {
|
||||
await prepareContextData(
|
||||
props.root,
|
||||
props.dataSources.filter((ds) => ds.type === "file"),
|
||||
isPythonLlamaDeploy,
|
||||
);
|
||||
|
||||
if (
|
||||
@@ -183,10 +187,12 @@ export const installTemplate = async (props: InstallTemplateArgs) => {
|
||||
);
|
||||
}
|
||||
|
||||
// Create outputs directory
|
||||
await makeDir(path.join(props.root, "output/tools"));
|
||||
await makeDir(path.join(props.root, "output/uploaded"));
|
||||
await makeDir(path.join(props.root, "output/llamacloud"));
|
||||
if (!isPythonLlamaDeploy) {
|
||||
// Create outputs directory (llama-deploy doesn't need this)
|
||||
await makeDir(path.join(props.root, "output/tools"));
|
||||
await makeDir(path.join(props.root, "output/uploaded"));
|
||||
await makeDir(path.join(props.root, "output/llamacloud"));
|
||||
}
|
||||
};
|
||||
|
||||
export * from "./types";
|
||||
|
||||
@@ -7,27 +7,33 @@ import { isUvAvailable, tryUvSync } from "./uv";
|
||||
|
||||
import { assetRelocator, copy } from "./copy";
|
||||
import { templatesDir } from "./dir";
|
||||
import {
|
||||
InstallTemplateArgs,
|
||||
ModelConfig,
|
||||
TemplateDataSource,
|
||||
TemplateVectorDB,
|
||||
} from "./types";
|
||||
|
||||
interface Dependency {
|
||||
name: string;
|
||||
version?: string;
|
||||
extras?: string[];
|
||||
constraints?: Record<string, string>;
|
||||
}
|
||||
import { Dependency, InstallTemplateArgs } from "./types";
|
||||
import { USE_CASE_CONFIGS } from "./use-case";
|
||||
|
||||
const getAdditionalDependencies = (
|
||||
modelConfig: ModelConfig,
|
||||
vectorDb?: TemplateVectorDB,
|
||||
dataSources?: TemplateDataSource[],
|
||||
opts: Pick<
|
||||
InstallTemplateArgs,
|
||||
| "framework"
|
||||
| "template"
|
||||
| "useCase"
|
||||
| "modelConfig"
|
||||
| "vectorDb"
|
||||
| "dataSources"
|
||||
>,
|
||||
) => {
|
||||
const { framework, template, useCase, modelConfig, vectorDb, dataSources } =
|
||||
opts;
|
||||
|
||||
const dependencies: Dependency[] = [];
|
||||
|
||||
const isPythonLlamaDeploy =
|
||||
framework === "fastapi" && template === "llamaindexserver";
|
||||
const useCaseDependencies =
|
||||
USE_CASE_CONFIGS[useCase]?.additionalDependencies ?? [];
|
||||
if (isPythonLlamaDeploy && useCaseDependencies.length > 0) {
|
||||
dependencies.push(...useCaseDependencies);
|
||||
}
|
||||
|
||||
// Add vector db dependencies
|
||||
switch (vectorDb) {
|
||||
case "mongo": {
|
||||
@@ -412,13 +418,17 @@ const installLlamaIndexServerTemplate = async ({
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
await copy("*.py", path.join(root, "app"), {
|
||||
const srcDir = path.join(root, "src");
|
||||
const uiDir = path.join(root, "ui");
|
||||
|
||||
// copy workflow code to src folder
|
||||
await copy("*.py", srcDir, {
|
||||
parents: true,
|
||||
cwd: path.join(templatesDir, "components", "use-cases", "python", useCase),
|
||||
});
|
||||
|
||||
// copy model provider settings to app folder
|
||||
await copy("**", path.join(root, "app"), {
|
||||
// copy model provider settings to src folder
|
||||
await copy("**", srcDir, {
|
||||
cwd: path.join(
|
||||
templatesDir,
|
||||
"components",
|
||||
@@ -428,32 +438,26 @@ const installLlamaIndexServerTemplate = async ({
|
||||
),
|
||||
});
|
||||
|
||||
// Copy custom UI component code
|
||||
await copy(`*`, path.join(root, "components"), {
|
||||
// copy ts server to ui folder
|
||||
await copy("**", uiDir, {
|
||||
parents: true,
|
||||
cwd: path.join(templatesDir, "components", "ts-proxy"),
|
||||
});
|
||||
|
||||
// Copy custom UI components to ui/components folder
|
||||
await copy(`*`, path.join(uiDir, "components"), {
|
||||
parents: true,
|
||||
cwd: path.join(templatesDir, "components", "ui", "use-cases", useCase),
|
||||
});
|
||||
|
||||
// Copy layout components to layout folder in root
|
||||
await copy("*", path.join(root, "layout"), {
|
||||
// Copy layout components to ui/layout folder
|
||||
await copy("*", path.join(uiDir, "layout"), {
|
||||
parents: true,
|
||||
cwd: path.join(templatesDir, "components", "ui", "layout"),
|
||||
});
|
||||
|
||||
if (useLlamaParse) {
|
||||
await copy("index.py", path.join(root, "app"), {
|
||||
parents: true,
|
||||
cwd: path.join(
|
||||
templatesDir,
|
||||
"components",
|
||||
"vectordbs",
|
||||
"llamaindexserver",
|
||||
"llamacloud",
|
||||
"python",
|
||||
),
|
||||
});
|
||||
// TODO: Consider moving generate.py to app folder.
|
||||
await copy("generate.py", path.join(root), {
|
||||
await copy("**", srcDir, {
|
||||
parents: true,
|
||||
cwd: path.join(
|
||||
templatesDir,
|
||||
@@ -471,6 +475,12 @@ const installLlamaIndexServerTemplate = async ({
|
||||
cwd: path.join(templatesDir, "components", "use-cases", "python", useCase),
|
||||
rename: assetRelocator,
|
||||
});
|
||||
|
||||
// Clean up, remove generate.py and index.py for non-data use cases
|
||||
if (["code_generator", "document_generator", "hitl"].includes(useCase)) {
|
||||
await fs.unlink(path.join(srcDir, "generate.py"));
|
||||
await fs.unlink(path.join(srcDir, "index.py"));
|
||||
}
|
||||
};
|
||||
|
||||
export const installPythonTemplate = async ({
|
||||
@@ -517,11 +527,14 @@ export const installPythonTemplate = async ({
|
||||
}
|
||||
|
||||
console.log("Adding additional dependencies");
|
||||
const addOnDependencies = getAdditionalDependencies(
|
||||
const addOnDependencies = getAdditionalDependencies({
|
||||
framework,
|
||||
template,
|
||||
useCase,
|
||||
modelConfig,
|
||||
vectorDb,
|
||||
dataSources,
|
||||
);
|
||||
});
|
||||
|
||||
await addDependencies(root, addOnDependencies);
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { SpawnOptions, spawn } from "child_process";
|
||||
import { SpawnOptions, exec, spawn } from "child_process";
|
||||
import waitPort from "wait-port";
|
||||
import { TemplateFramework, TemplateType } from "./types";
|
||||
|
||||
const createProcess = (
|
||||
@@ -47,6 +48,58 @@ export function runTSApp(appPath: string, port: number) {
|
||||
});
|
||||
}
|
||||
|
||||
// TODO: support run multiple LlamaDeploy server in the same machine
|
||||
async function runPythonLlamaDeployServer(
|
||||
appPath: string,
|
||||
port: number = 4501,
|
||||
) {
|
||||
console.log("Starting llama_deploy server...", port);
|
||||
const serverProcess = exec("uv run -m llama_deploy.apiserver", {
|
||||
cwd: appPath,
|
||||
env: {
|
||||
...process.env,
|
||||
LLAMA_DEPLOY_APISERVER_PORT: `${port}`,
|
||||
},
|
||||
});
|
||||
|
||||
// Pipe output to console
|
||||
serverProcess.stdout?.pipe(process.stdout);
|
||||
serverProcess.stderr?.pipe(process.stderr);
|
||||
|
||||
// Wait for the server to be ready
|
||||
console.log("Waiting for server to be ready...");
|
||||
await waitPort({ port, host: "localhost", timeout: 30000 });
|
||||
|
||||
// create the deployment with explicit host configuration
|
||||
console.log("llama_deploy server started, creating deployment...", port);
|
||||
await createProcess(
|
||||
"uv",
|
||||
[
|
||||
"run",
|
||||
"llamactl",
|
||||
"-s",
|
||||
`http://localhost:${port}`,
|
||||
"deploy",
|
||||
"llama_deploy.yml",
|
||||
],
|
||||
{
|
||||
stdio: "inherit",
|
||||
cwd: appPath,
|
||||
shell: true,
|
||||
},
|
||||
);
|
||||
console.log(`Deployment created successfully!`);
|
||||
|
||||
// Keep the main process alive and handle cleanup
|
||||
return new Promise(() => {
|
||||
process.on("SIGINT", () => {
|
||||
console.log("\nShutting down...");
|
||||
serverProcess.kill();
|
||||
process.exit(0);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export async function runApp(
|
||||
appPath: string,
|
||||
template: TemplateType,
|
||||
@@ -57,6 +110,11 @@ export async function runApp(
|
||||
// Start the app
|
||||
const defaultPort = framework === "nextjs" ? 3000 : 8000;
|
||||
|
||||
if (template === "llamaindexserver" && framework === "fastapi") {
|
||||
await runPythonLlamaDeployServer(appPath, port);
|
||||
return;
|
||||
}
|
||||
|
||||
const appRunner = framework === "fastapi" ? runFastAPIApp : runTSApp;
|
||||
await appRunner(appPath, port || defaultPort, template);
|
||||
} catch (error) {
|
||||
|
||||
@@ -49,14 +49,6 @@ export type TemplateUseCase =
|
||||
| "document_generator"
|
||||
| "hitl";
|
||||
|
||||
export const ALL_USE_CASES: TemplateUseCase[] = [
|
||||
"agentic_rag",
|
||||
"deep_research",
|
||||
"financial_report",
|
||||
"code_generator",
|
||||
"document_generator",
|
||||
"hitl",
|
||||
];
|
||||
// Config for both file and folder
|
||||
export type FileSourceConfig =
|
||||
| {
|
||||
@@ -97,3 +89,16 @@ export interface InstallTemplateArgs {
|
||||
postInstallAction: TemplatePostInstallAction;
|
||||
useCase: TemplateUseCase;
|
||||
}
|
||||
|
||||
export type EnvVar = {
|
||||
name?: string;
|
||||
description?: string;
|
||||
value?: string;
|
||||
};
|
||||
|
||||
export interface Dependency {
|
||||
name: string;
|
||||
version?: string;
|
||||
extras?: string[];
|
||||
constraints?: Record<string, string>;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
import { Dependency, EnvVar, TemplateUseCase } from "./types";
|
||||
|
||||
export const ALL_TYPESCRIPT_USE_CASES: TemplateUseCase[] = [
|
||||
"agentic_rag",
|
||||
"deep_research",
|
||||
"financial_report",
|
||||
"code_generator",
|
||||
"document_generator",
|
||||
"hitl",
|
||||
];
|
||||
|
||||
export const ALL_PYTHON_USE_CASES: TemplateUseCase[] = [
|
||||
"agentic_rag",
|
||||
"deep_research",
|
||||
"financial_report",
|
||||
"code_generator",
|
||||
"document_generator",
|
||||
];
|
||||
|
||||
export const USE_CASE_CONFIGS: Record<
|
||||
TemplateUseCase,
|
||||
{
|
||||
starterQuestions: string[];
|
||||
additionalEnvVars?: EnvVar[];
|
||||
additionalDependencies?: Dependency[];
|
||||
}
|
||||
> = {
|
||||
agentic_rag: {
|
||||
starterQuestions: [
|
||||
"Letter standard in the document",
|
||||
"Summarize the document",
|
||||
],
|
||||
},
|
||||
financial_report: {
|
||||
starterQuestions: [
|
||||
"Compare Apple and Tesla financial performance",
|
||||
"Generate a PDF report for Tesla financial",
|
||||
],
|
||||
additionalEnvVars: [
|
||||
{
|
||||
name: "E2B_API_KEY",
|
||||
description: "The E2B API key to use to use code interpreter tool",
|
||||
},
|
||||
],
|
||||
additionalDependencies: [
|
||||
{
|
||||
name: "e2b-code-interpreter",
|
||||
version: ">=1.1.1,<2.0.0",
|
||||
},
|
||||
{
|
||||
name: "markdown",
|
||||
version: ">=3.7,<4.0",
|
||||
},
|
||||
{
|
||||
name: "xhtml2pdf",
|
||||
version: ">=0.2.17,<1.0.0",
|
||||
},
|
||||
],
|
||||
},
|
||||
deep_research: {
|
||||
starterQuestions: [
|
||||
"Research about Apple and Tesla",
|
||||
"Financial performance of Tesla",
|
||||
],
|
||||
},
|
||||
code_generator: {
|
||||
starterQuestions: [
|
||||
"Generate a code for a simple calculator",
|
||||
"Generate a code for a todo list app",
|
||||
],
|
||||
},
|
||||
document_generator: {
|
||||
starterQuestions: [
|
||||
"Generate a document about LlamaIndex",
|
||||
"Generate a document about LLM",
|
||||
],
|
||||
},
|
||||
hitl: {
|
||||
starterQuestions: [
|
||||
"List all the files in the current directory",
|
||||
"Check git status",
|
||||
],
|
||||
},
|
||||
};
|
||||
@@ -21,7 +21,7 @@ export const askQuestions = async (
|
||||
askModels: askModelsFromArgs,
|
||||
} = args;
|
||||
|
||||
const { useCase, framework } = await prompts(
|
||||
const { useCase } = await prompts(
|
||||
[
|
||||
{
|
||||
type: useCaseFromArgs ? null : "select",
|
||||
@@ -65,20 +65,28 @@ export const askQuestions = async (
|
||||
],
|
||||
initial: 0,
|
||||
},
|
||||
{
|
||||
type: frameworkFromArgs ? null : "select",
|
||||
name: "framework",
|
||||
message: "What language do you want to use?",
|
||||
choices: [
|
||||
{ title: "Python (FastAPI)", value: "fastapi" },
|
||||
{ title: "Typescript (NextJS)", value: "nextjs" },
|
||||
],
|
||||
initial: 0,
|
||||
},
|
||||
],
|
||||
questionHandlers,
|
||||
);
|
||||
|
||||
const { framework } = await prompts(
|
||||
{
|
||||
type: frameworkFromArgs ? null : "select",
|
||||
name: "framework",
|
||||
message: "What language do you want to use?",
|
||||
choices: [
|
||||
// For Python Human in the Loop use case, please refer to this chat-ui example:
|
||||
// https://github.com/run-llama/chat-ui/blob/main/examples/llamadeploy/chat/src/cli_workflow.py
|
||||
...(useCase !== "hitl"
|
||||
? [{ title: "Python (FastAPI)", value: "fastapi" }]
|
||||
: []),
|
||||
{ title: "Typescript (NextJS)", value: "nextjs" },
|
||||
],
|
||||
initial: 0,
|
||||
},
|
||||
questionHandlers,
|
||||
);
|
||||
|
||||
const finalUseCase = (useCaseFromArgs ?? useCase) as TemplateUseCase;
|
||||
const finalFramework = (frameworkFromArgs ?? framework) as TemplateFramework;
|
||||
if (!finalUseCase) {
|
||||
@@ -102,7 +110,12 @@ export const askQuestions = async (
|
||||
// Ask for LlamaCloud
|
||||
let llamaCloudKey = llamaCloudKeyFromArgs ?? process.env.LLAMA_CLOUD_API_KEY;
|
||||
let vectorDb: TemplateVectorDB = vectorDbFromArgs ?? "none";
|
||||
if (!vectorDbFromArgs && useCaseConfig.dataSources) {
|
||||
|
||||
if (
|
||||
!vectorDbFromArgs &&
|
||||
useCaseConfig.dataSources &&
|
||||
!["code_generator", "document_generator", "hitl"].includes(finalUseCase) // these use cases don't use data so no need to ask for LlamaCloud
|
||||
) {
|
||||
const { useLlamaCloud } = await prompts(
|
||||
{
|
||||
type: "toggle",
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
import { LlamaIndexServer } from "@llamaindex/server";
|
||||
|
||||
new LlamaIndexServer({
|
||||
uiConfig: {
|
||||
componentsDir: "components",
|
||||
layoutDir: "layout",
|
||||
llamaDeploy: { deployment: "chat", workflow: "workflow" },
|
||||
},
|
||||
}).start();
|
||||
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"name": "llamaindex-server-ui",
|
||||
"version": "0.0.1",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "nodemon --exec tsx index.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@llamaindex/server": "0.2.10",
|
||||
"dotenv": "^16.4.7"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.10.3",
|
||||
"nodemon": "^3.1.10",
|
||||
"tsx": "4.7.2",
|
||||
"typescript": "^5.3.2"
|
||||
}
|
||||
}
|
||||
@@ -21,7 +21,7 @@ export default function Header() {
|
||||
</a>
|
||||
<img
|
||||
className="h-[24px] w-[24px] rounded-sm"
|
||||
src="/llama.png"
|
||||
src="https://ui.llamaindex.ai/llama.png"
|
||||
alt="Llama Logo"
|
||||
/>
|
||||
</div>
|
||||
|
||||
+76
-29
@@ -1,59 +1,106 @@
|
||||
This is a [LlamaIndex](https://www.llamaindex.ai/) simple agentic RAG project using [Agent Workflows](https://docs.llamaindex.ai/en/stable/examples/agent/agent_workflow_basic/).
|
||||
# LlamaIndex Workflow Example
|
||||
|
||||
## Getting Started
|
||||
This is a [LlamaIndex](https://www.llamaindex.ai/) project that using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/) deployed with [LlamaDeploy](https://github.com/run-llama/llama_deploy).
|
||||
|
||||
First, setup the environment with uv:
|
||||
LlamaDeploy is a system for deploying and managing LlamaIndex workflows, while LlamaIndexServer provides a pre-built TypeScript server with an integrated chat UI that can connect directly to LlamaDeploy deployments. This example shows how you can quickly set up a complete chat application by combining these two technologies/
|
||||
|
||||
> **_Note:_** This step is not needed if you are using the dev-container.
|
||||
## Prerequisites
|
||||
|
||||
```shell
|
||||
If you haven't installed uv, you can follow the instructions [here](https://docs.astral.sh/uv/getting-started/installation/) to install it.
|
||||
|
||||
You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [src/settings.py](src/settings.py).
|
||||
|
||||
Please setup their API keys in the `src/.env` file.
|
||||
|
||||
## Installation
|
||||
|
||||
Both the SDK and the CLI are part of the LlamaDeploy Python package. To install, just run:
|
||||
|
||||
```bash
|
||||
uv sync
|
||||
```
|
||||
|
||||
Then check the parameters that have been pre-configured in the `.env` file in this directory.
|
||||
Make sure you have set the `OPENAI_API_KEY` for the LLM.
|
||||
If you don't have uv installed, you can follow the instructions [here](https://docs.astral.sh/uv/getting-started/installation/).
|
||||
|
||||
Second, generate the embeddings of the documents in the `./data` directory:
|
||||
## Generate Index
|
||||
|
||||
Generate the embeddings of the documents in the `./data` directory:
|
||||
|
||||
```shell
|
||||
uv run generate
|
||||
```
|
||||
|
||||
Third, run the development server:
|
||||
## Running the Deployment
|
||||
|
||||
```shell
|
||||
uv run fastapi dev
|
||||
```
|
||||
|
||||
Then open [http://localhost:8000](http://localhost:8000) with your browser to start the chat UI.
|
||||
|
||||
To start the app optimized for **production**, run:
|
||||
At this point we have all we need to run this deployment. Ideally, we would have the API server already running
|
||||
somewhere in the cloud, but to get started let's start an instance locally. Run the following python script
|
||||
from a shell:
|
||||
|
||||
```
|
||||
uv run fastapi run
|
||||
$ uv run -m llama_deploy.apiserver
|
||||
INFO: Started server process [10842]
|
||||
INFO: Waiting for application startup.
|
||||
INFO: Application startup complete.
|
||||
INFO: Uvicorn running on http://0.0.0.0:4501 (Press CTRL+C to quit)
|
||||
```
|
||||
|
||||
## Configure LLM and Embedding Model
|
||||
From another shell, use the CLI, `llamactl`, to create the deployment:
|
||||
|
||||
You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [settings.py](app/settings.py).
|
||||
```
|
||||
$ uv run llamactl deploy llama_deploy.yml
|
||||
Deployment successful: chat
|
||||
```
|
||||
|
||||
## UI Interface
|
||||
|
||||
LlamaDeploy will serve the UI through the apiserver. Point the browser to [http://localhost:4501/deployments/chat/ui](http://localhost:4501/deployments/chat/ui) to interact with your deployment through a user-friendly interface.
|
||||
|
||||
## API endpoints
|
||||
|
||||
You can find all the endpoints in the [API documentation](http://localhost:4501/docs). To get started, you can try the following endpoints:
|
||||
|
||||
Create a new task:
|
||||
|
||||
```bash
|
||||
curl -X POST 'http://localhost:4501/deployments/chat/tasks/create' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"input": "{\"user_msg\":\"Hello\",\"chat_history\":[]}",
|
||||
"service_id": "workflow"
|
||||
}'
|
||||
```
|
||||
|
||||
Stream events:
|
||||
|
||||
```bash
|
||||
curl 'http://localhost:4501/deployments/chat/tasks/0b411be6-005d-43f0-9b6b-6a0017f08002/events?session_id=dd36442c-45ca-4eaa-8d75-b4e6dad1a83e&raw_event=true' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
Note that the task_id and session_id are returned when creating a new task.
|
||||
|
||||
## Use Case
|
||||
|
||||
We have prepared an [example workflow](./app/workflow.py) for the agentic RAG use case, where you can ask questions about the example documents in the [./data](./data) directory.
|
||||
We have prepared an [example workflow](./src/workflow.py) for the agentic RAG use case, where you can ask questions about the example documents in the [./data](./data) directory.
|
||||
To update the workflow, you can modify the code in [`src/workflow.py`](src/workflow.py).
|
||||
|
||||
You can start by sending an request on the [chat UI](http://localhost:8000) or you can test the `/api/chat` endpoint with the following curl request:
|
||||
## Customize the UI
|
||||
|
||||
```
|
||||
curl --location 'localhost:8000/api/chat' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{ "messages": [{ "role": "user", "content": "What standards for a letter exist?" }] }'
|
||||
```
|
||||
The UI is served by LLamaIndexServer package, you can configure the UI by modifying the `uiConfig` in the [ui/index.ts](ui/index.ts) file.
|
||||
|
||||
The following are the available options:
|
||||
|
||||
- `starterQuestions`: Predefined questions for chat interface
|
||||
- `componentsDir`: Directory for custom event components
|
||||
- `layoutDir`: Directory for custom layout components
|
||||
- `llamaCloudIndexSelector`: Enable LlamaCloud integration
|
||||
- `llamaDeploy`: The LlamaDeploy configration (deployment name and workflow name that defined in the [llama_deploy.yml](llama_deploy.yml) file)
|
||||
|
||||
## Learn More
|
||||
|
||||
To learn more about LlamaIndex, take a look at the following resources:
|
||||
|
||||
- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex.
|
||||
- [Workflows Introduction](https://docs.llamaindex.ai/en/stable/understanding/workflows/) - learn about LlamaIndex workflows.
|
||||
- [LlamaDeploy GitHub Repository](https://github.com/run-llama/llama_deploy)
|
||||
- [Chat-UI Documentation](https://ts.llamaindex.ai/docs/chat-ui)
|
||||
|
||||
You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
|
||||
You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
|
||||
@@ -0,0 +1,106 @@
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from llama_index.core import QueryBundle
|
||||
from llama_index.core.postprocessor.types import BaseNodePostprocessor
|
||||
from llama_index.core.prompts import PromptTemplate
|
||||
from llama_index.core.query_engine.retriever_query_engine import RetrieverQueryEngine
|
||||
from llama_index.core.response_synthesizers import Accumulate
|
||||
from llama_index.core.schema import NodeWithScore
|
||||
from llama_index.core.tools.query_engine import QueryEngineTool
|
||||
|
||||
|
||||
# Used as a prompt for synthesizer
|
||||
# Override this prompt by setting the `CITATION_PROMPT` environment variable
|
||||
CITATION_PROMPT = """
|
||||
Context information is below.
|
||||
------------------
|
||||
{context_str}
|
||||
------------------
|
||||
The context are multiple text chunks, each text chunk has its own citation_id at the beginning.
|
||||
Use the citation_id for citation construction.
|
||||
|
||||
Answer the following query with citations:
|
||||
------------------
|
||||
{query_str}
|
||||
------------------
|
||||
|
||||
## Citation format
|
||||
|
||||
[citation:id]
|
||||
|
||||
Where:
|
||||
- [citation:] is a matching pattern which is required for all citations.
|
||||
- `id` is the `citation_id` provided in the context or previous response.
|
||||
|
||||
Example:
|
||||
```
|
||||
Here is a response that uses context information [citation:90ca859f-4f32-40ca-8cd0-edfad4fb298b]
|
||||
and other ideas that don't use context information [citation:17b2cc9a-27ae-4b6d-bede-5ca60fc00ff4] .\n
|
||||
The citation block will be displayed automatically with useful information for the user in the UI [citation:1c606612-e75f-490e-8374-44e79f818d19] .
|
||||
```
|
||||
|
||||
## Requirements:
|
||||
1. Always include citations for every fact from the context information in your response.
|
||||
2. Make sure that the citation_id is correct with the context, don't mix up the citation_id with other information.
|
||||
|
||||
Now, you answer the query with citations:
|
||||
"""
|
||||
|
||||
|
||||
class NodeCitationProcessor(BaseNodePostprocessor):
|
||||
"""
|
||||
Add a new field `citation_id` to the metadata of the node by copying the id from the node.
|
||||
Useful for citation construction.
|
||||
"""
|
||||
|
||||
def _postprocess_nodes(
|
||||
self,
|
||||
nodes: List[NodeWithScore],
|
||||
query_bundle: Optional[QueryBundle] = None,
|
||||
) -> List[NodeWithScore]:
|
||||
for node_score in nodes:
|
||||
node_score.node.metadata["citation_id"] = node_score.node.node_id
|
||||
return nodes
|
||||
|
||||
|
||||
class CitationSynthesizer(Accumulate):
|
||||
"""
|
||||
Overload the Accumulate synthesizer to:
|
||||
1. Update prepare node metadata for citation id
|
||||
2. Update text_qa_template to include citations
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
text_qa_template = kwargs.pop("text_qa_template", None)
|
||||
if text_qa_template is None:
|
||||
text_qa_template = PromptTemplate(template=CITATION_PROMPT)
|
||||
super().__init__(text_qa_template=text_qa_template, **kwargs)
|
||||
|
||||
|
||||
# Add this prompt to your agent system prompt
|
||||
CITATION_SYSTEM_PROMPT = (
|
||||
"\nAnswer the user question using the response from the query tool. "
|
||||
"It's important to respect the citation information in the response. "
|
||||
"Don't mix up the citation_id, keep them at the correct fact."
|
||||
)
|
||||
|
||||
|
||||
def enable_citation(query_engine_tool: QueryEngineTool) -> QueryEngineTool:
|
||||
"""
|
||||
Enable citation for a query engine tool by using CitationSynthesizer and NodePostprocessor.
|
||||
Note: This function will override the response synthesizer of your query engine.
|
||||
"""
|
||||
query_engine = query_engine_tool.query_engine
|
||||
if not isinstance(query_engine, RetrieverQueryEngine):
|
||||
raise ValueError(
|
||||
"Citation feature requires a RetrieverQueryEngine. Your tool's query engine is a "
|
||||
f"{type(query_engine)}."
|
||||
)
|
||||
# Update the response synthesizer and node postprocessors
|
||||
query_engine._response_synthesizer = CitationSynthesizer()
|
||||
query_engine._node_postprocessors += [NodeCitationProcessor()]
|
||||
query_engine_tool._query_engine = query_engine
|
||||
|
||||
# Update tool metadata
|
||||
query_engine_tool.metadata.description += "\nThe output will include citations with the format [citation:id] for each chunk of information in the knowledge base."
|
||||
return query_engine_tool
|
||||
@@ -0,0 +1,47 @@
|
||||
import os
|
||||
from typing import Any, Optional
|
||||
|
||||
from llama_index.core.base.base_query_engine import BaseQueryEngine
|
||||
from llama_index.core.indices.base import BaseIndex
|
||||
from llama_index.core.tools.query_engine import QueryEngineTool
|
||||
|
||||
def create_query_engine(index: BaseIndex, **kwargs: Any) -> BaseQueryEngine:
|
||||
"""
|
||||
Create a query engine for the given index.
|
||||
|
||||
Args:
|
||||
index: The index to create a query engine for.
|
||||
params (optional): Additional parameters for the query engine, e.g: similarity_top_k
|
||||
"""
|
||||
top_k = int(os.getenv("TOP_K", 0))
|
||||
if top_k != 0 and kwargs.get("filters") is None:
|
||||
kwargs["similarity_top_k"] = top_k
|
||||
|
||||
return index.as_query_engine(**kwargs)
|
||||
|
||||
|
||||
def get_query_engine_tool(
|
||||
index: BaseIndex,
|
||||
name: Optional[str] = None,
|
||||
description: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> QueryEngineTool:
|
||||
"""
|
||||
Get a query engine tool for the given index.
|
||||
|
||||
Args:
|
||||
index: The index to create a query engine for.
|
||||
name (optional): The name of the tool.
|
||||
description (optional): The description of the tool.
|
||||
"""
|
||||
if name is None:
|
||||
name = "query_index"
|
||||
if description is None:
|
||||
description = "Use this tool to retrieve information from a knowledge base. Provide a specific query and can call the tool multiple times if necessary."
|
||||
query_engine = create_query_engine(index, **kwargs)
|
||||
tool = QueryEngineTool.from_defaults(
|
||||
query_engine=query_engine,
|
||||
name=name,
|
||||
description=description,
|
||||
)
|
||||
return tool
|
||||
+13
-10
@@ -1,18 +1,18 @@
|
||||
from typing import Optional
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from app.index import get_index
|
||||
from llama_index.core.agent.workflow import AgentWorkflow
|
||||
from llama_index.core.settings import Settings
|
||||
from llama_index.server.api.models import ChatRequest
|
||||
from llama_index.server.tools.index import get_query_engine_tool
|
||||
from llama_index.server.tools.index.citation import (
|
||||
CITATION_SYSTEM_PROMPT,
|
||||
enable_citation,
|
||||
)
|
||||
|
||||
from src.index import get_index
|
||||
from src.query import get_query_engine_tool
|
||||
from src.citation import CITATION_SYSTEM_PROMPT, enable_citation
|
||||
from src.settings import init_settings
|
||||
|
||||
|
||||
def create_workflow(chat_request: Optional[ChatRequest] = None) -> AgentWorkflow:
|
||||
index = get_index(chat_request=chat_request)
|
||||
def create_workflow() -> AgentWorkflow:
|
||||
load_dotenv()
|
||||
init_settings()
|
||||
index = get_index()
|
||||
if index is None:
|
||||
raise RuntimeError(
|
||||
"Index not found! Please run `uv run generate` to index the data first."
|
||||
@@ -30,3 +30,6 @@ def create_workflow(chat_request: Optional[ChatRequest] = None) -> AgentWorkflow
|
||||
llm=Settings.llm,
|
||||
system_prompt=system_prompt,
|
||||
)
|
||||
|
||||
|
||||
workflow = create_workflow()
|
||||
|
||||
+71
-37
@@ -1,65 +1,99 @@
|
||||
This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/).
|
||||
# LlamaIndex Workflow Example
|
||||
|
||||
## Getting Started
|
||||
This is a [LlamaIndex](https://www.llamaindex.ai/) project that using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/) deployed with [LlamaDeploy](https://github.com/run-llama/llama_deploy).
|
||||
|
||||
First, setup the environment with uv:
|
||||
LlamaDeploy is a system for deploying and managing LlamaIndex workflows, while LlamaIndexServer provides a pre-built TypeScript server with an integrated chat UI that can connect directly to LlamaDeploy deployments. This example shows how you can quickly set up a complete chat application by combining these two technologies/
|
||||
|
||||
> **_Note:_** This step is not needed if you are using the dev-container.
|
||||
## Prerequisites
|
||||
|
||||
```shell
|
||||
If you haven't installed uv, you can follow the instructions [here](https://docs.astral.sh/uv/getting-started/installation/) to install it.
|
||||
|
||||
You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [src/settings.py](src/settings.py).
|
||||
|
||||
Please setup their API keys in the `src/.env` file.
|
||||
|
||||
|
||||
## Installation
|
||||
|
||||
Both the SDK and the CLI are part of the LlamaDeploy Python package. To install, just run:
|
||||
|
||||
```bash
|
||||
uv sync
|
||||
```
|
||||
|
||||
Then check the parameters that have been pre-configured in the `.env` file in this directory.
|
||||
Make sure you have set the `OPENAI_API_KEY` for the LLM.
|
||||
If you don't have uv installed, you can follow the instructions [here](https://docs.astral.sh/uv/getting-started/installation/).
|
||||
|
||||
Then, run the development server:
|
||||
## Running the Deployment
|
||||
|
||||
```shell
|
||||
uv run fastapi dev
|
||||
```
|
||||
|
||||
Then open [http://localhost:8000](http://localhost:8000) with your browser to start the chat UI.
|
||||
|
||||
To start the app optimized for **production**, run:
|
||||
At this point we have all we need to run this deployment. Ideally, we would have the API server already running
|
||||
somewhere in the cloud, but to get started let's start an instance locally. Run the following python script
|
||||
from a shell:
|
||||
|
||||
```
|
||||
uv run fastapi run
|
||||
$ uv run -m llama_deploy.apiserver
|
||||
INFO: Started server process [10842]
|
||||
INFO: Waiting for application startup.
|
||||
INFO: Application startup complete.
|
||||
INFO: Uvicorn running on http://0.0.0.0:4501 (Press CTRL+C to quit)
|
||||
```
|
||||
|
||||
## Configure LLM and Embedding Model
|
||||
From another shell, use the CLI, `llamactl`, to create the deployment:
|
||||
|
||||
You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [settings.py](app/settings.py).
|
||||
```
|
||||
$ uv run llamactl deploy llama_deploy.yml
|
||||
Deployment successful: chat
|
||||
```
|
||||
|
||||
## UI Interface
|
||||
|
||||
LlamaDeploy will serve the UI through the apiserver. Point the browser to [http://localhost:4501/deployments/chat/ui](http://localhost:4501/deployments/chat/ui) to interact with your deployment through a user-friendly interface.
|
||||
|
||||
## API endpoints
|
||||
|
||||
You can find all the endpoints in the [API documentation](http://localhost:4501/docs). To get started, you can try the following endpoints:
|
||||
|
||||
Create a new task:
|
||||
|
||||
```bash
|
||||
curl -X POST 'http://localhost:4501/deployments/chat/tasks/create' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"input": "{\"user_msg\":\"Hello\",\"chat_history\":[]}",
|
||||
"service_id": "workflow"
|
||||
}'
|
||||
```
|
||||
|
||||
Stream events:
|
||||
|
||||
```bash
|
||||
curl 'http://localhost:4501/deployments/chat/tasks/0b411be6-005d-43f0-9b6b-6a0017f08002/events?session_id=dd36442c-45ca-4eaa-8d75-b4e6dad1a83e&raw_event=true' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
Note that the task_id and session_id are returned when creating a new task.
|
||||
|
||||
## Use Case
|
||||
|
||||
AI-powered code generator that can help you generate app with a chat interface, code editor and app preview.
|
||||
|
||||
To update the workflow, you can modify the code in [`workflow.py`](app/workflow.py).
|
||||
|
||||
You can start by sending an request on the [chat UI](http://localhost:8000) or you can test the `/api/chat` endpoint with the following curl request:
|
||||
|
||||
```
|
||||
curl --location 'localhost:8000/api/chat' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{ "messages": [{ "role": "user", "content": "Create a report comparing the finances of Apple and Tesla" }] }'
|
||||
```
|
||||
To update the workflow, you can modify the code in [`src/workflow.py`](src/workflow.py).
|
||||
|
||||
## Customize the UI
|
||||
|
||||
To customize the UI, you can start by modifying the [./components/ui_event.jsx](./components/ui_event.jsx) file.
|
||||
The UI is served by LLamaIndexServer package, you can configure the UI by modifying the `uiConfig` in the [ui/index.ts](ui/index.ts) file.
|
||||
|
||||
You can also generate a new code for the workflow using LLM by running the following command:
|
||||
The following are the available options:
|
||||
|
||||
```
|
||||
uv run generate_ui
|
||||
```
|
||||
- `starterQuestions`: Predefined questions for chat interface
|
||||
- `componentsDir`: Directory for custom event components
|
||||
- `layoutDir`: Directory for custom layout components
|
||||
- `llamaCloudIndexSelector`: Enable LlamaCloud integration
|
||||
- `llamaDeploy`: The LlamaDeploy configration (deployment name and workflow name that defined in the [llama_deploy.yml](llama_deploy.yml) file)
|
||||
|
||||
## Learn More
|
||||
|
||||
To learn more about LlamaIndex, take a look at the following resources:
|
||||
|
||||
- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex.
|
||||
- [Workflows Introduction](https://docs.llamaindex.ai/en/stable/understanding/workflows/) - learn about LlamaIndex workflows.
|
||||
- [LlamaIndex Server](https://pypi.org/project/llama-index-server/)
|
||||
- [LlamaDeploy GitHub Repository](https://github.com/run-llama/llama_deploy)
|
||||
- [Chat-UI Documentation](https://ts.llamaindex.ai/docs/chat-ui)
|
||||
|
||||
You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
|
||||
You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
|
||||
@@ -0,0 +1,131 @@
|
||||
import json
|
||||
import re
|
||||
from typing import List, Optional, Any
|
||||
|
||||
from pydantic import ValidationError
|
||||
from llama_index.core.chat_ui.models.artifact import (
|
||||
Artifact,
|
||||
ArtifactType,
|
||||
CodeArtifactData,
|
||||
DocumentArtifactData,
|
||||
)
|
||||
from llama_index.core.llms import ChatMessage
|
||||
|
||||
INLINE_ANNOTATION_KEY = "annotation"
|
||||
|
||||
|
||||
def get_inline_annotations(message: ChatMessage) -> List[Any]:
|
||||
"""Extract inline annotations from a chat message."""
|
||||
markdown_content = message.content
|
||||
|
||||
inline_annotations: List[Any] = []
|
||||
|
||||
# Regex to match annotation code blocks
|
||||
# Matches ```annotation followed by content until closing ```
|
||||
annotation_regex = re.compile(
|
||||
rf"```{re.escape(INLINE_ANNOTATION_KEY)}\s*\n([\s\S]*?)\n```", re.MULTILINE
|
||||
)
|
||||
|
||||
for match in annotation_regex.finditer(markdown_content):
|
||||
json_content = match.group(1).strip() if match.group(1) else None
|
||||
|
||||
if not json_content:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Parse the JSON content
|
||||
parsed = json.loads(json_content)
|
||||
|
||||
# Check for required fields in the parsed annotation
|
||||
if (
|
||||
not isinstance(parsed, dict)
|
||||
or "type" not in parsed
|
||||
or "data" not in parsed
|
||||
):
|
||||
continue
|
||||
|
||||
# Extract the annotation data
|
||||
inline_annotations.append(parsed)
|
||||
except (json.JSONDecodeError, ValidationError) as error:
|
||||
# Skip invalid annotations - they might be malformed JSON or invalid schema
|
||||
print(f"Failed to parse annotation: {error}")
|
||||
|
||||
return inline_annotations
|
||||
|
||||
|
||||
def artifact_from_message(message: ChatMessage) -> Optional[Artifact]:
|
||||
"""Create an artifact from a chat message if it contains artifact annotations."""
|
||||
inline_annotations = get_inline_annotations(message)
|
||||
|
||||
for annotation in inline_annotations:
|
||||
if isinstance(annotation, dict) and annotation.get("type") == "artifact":
|
||||
try:
|
||||
# Create artifact data based on type
|
||||
artifact_data = annotation.get("data")
|
||||
if not artifact_data:
|
||||
continue
|
||||
|
||||
artifact_type = artifact_data.get("type")
|
||||
|
||||
if artifact_type == "code":
|
||||
# Get the nested data object that contains the actual code information
|
||||
code_info = artifact_data.get("data", {})
|
||||
code_data = CodeArtifactData(
|
||||
file_name=code_info.get("file_name", ""),
|
||||
code=code_info.get("code", ""),
|
||||
language=code_info.get("language", ""),
|
||||
)
|
||||
artifact = Artifact(
|
||||
created_at=artifact_data.get("created_at"),
|
||||
type=ArtifactType.CODE,
|
||||
data=code_data,
|
||||
)
|
||||
elif artifact_type == "document":
|
||||
# Get the nested data object that contains the actual document information
|
||||
doc_info = artifact_data.get("data", {})
|
||||
doc_data = DocumentArtifactData(
|
||||
title=doc_info.get("title", ""),
|
||||
content=doc_info.get("content", ""),
|
||||
type=doc_info.get("type", "markdown"),
|
||||
sources=doc_info.get("sources"),
|
||||
)
|
||||
artifact = Artifact(
|
||||
created_at=artifact_data.get("created_at"),
|
||||
type=ArtifactType.DOCUMENT,
|
||||
data=doc_data,
|
||||
)
|
||||
else:
|
||||
continue
|
||||
|
||||
return artifact
|
||||
except Exception as e:
|
||||
print(
|
||||
f"Failed to parse artifact from annotation: {annotation}. Error: {e}"
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_artifacts(chat_history: List[ChatMessage]) -> List[Artifact]:
|
||||
"""
|
||||
Return a list of artifacts sorted by their creation time.
|
||||
Artifacts without a creation time are placed at the end.
|
||||
"""
|
||||
artifacts = []
|
||||
|
||||
for message in chat_history:
|
||||
artifact = artifact_from_message(message)
|
||||
if artifact is not None:
|
||||
artifacts.append(artifact)
|
||||
|
||||
# Sort by creation time, with None values at the end
|
||||
return sorted(
|
||||
artifacts,
|
||||
key=lambda a: (a.created_at is None, a.created_at),
|
||||
)
|
||||
|
||||
|
||||
def get_last_artifact(chat_history: List[ChatMessage]) -> Optional[Artifact]:
|
||||
"""Get the last artifact from chat history."""
|
||||
artifacts = get_artifacts(chat_history)
|
||||
return artifacts[-1] if len(artifacts) > 0 else None
|
||||
+33
-26
@@ -2,11 +2,10 @@ import re
|
||||
import time
|
||||
from typing import Any, Literal, Optional, Union
|
||||
|
||||
from llama_index.core.chat_engine.types import ChatMessage
|
||||
from llama_index.core.llms import LLM
|
||||
from llama_index.core import Settings
|
||||
from llama_index.core.llms import LLM, ChatMessage
|
||||
from llama_index.core.memory import ChatMemoryBuffer
|
||||
from llama_index.core.prompts import PromptTemplate
|
||||
from llama_index.llms.openai import OpenAI
|
||||
from llama_index.core.workflow import (
|
||||
Context,
|
||||
Event,
|
||||
@@ -15,25 +14,25 @@ from llama_index.core.workflow import (
|
||||
Workflow,
|
||||
step,
|
||||
)
|
||||
from llama_index.server.api.models import (
|
||||
from llama_index.core.chat_ui.models.artifact import (
|
||||
Artifact,
|
||||
ArtifactEvent,
|
||||
ArtifactType,
|
||||
ChatRequest,
|
||||
CodeArtifactData,
|
||||
UIEvent,
|
||||
)
|
||||
from llama_index.server.api.utils import get_last_artifact
|
||||
from llama_index.core.chat_ui.events import (
|
||||
UIEvent,
|
||||
ArtifactEvent,
|
||||
)
|
||||
|
||||
from src.utils import get_last_artifact
|
||||
from src.settings import init_settings
|
||||
from pydantic import BaseModel, Field
|
||||
from dotenv import load_dotenv
|
||||
|
||||
|
||||
def create_workflow(chat_request: ChatRequest) -> Workflow:
|
||||
workflow = CodeArtifactWorkflow(
|
||||
llm=OpenAI(model="gpt-4.1"),
|
||||
chat_request=chat_request,
|
||||
timeout=120.0,
|
||||
)
|
||||
return workflow
|
||||
def create_workflow() -> Workflow:
|
||||
load_dotenv()
|
||||
init_settings()
|
||||
return CodeArtifactWorkflow(timeout=120.0)
|
||||
|
||||
|
||||
class Requirement(BaseModel):
|
||||
@@ -83,8 +82,6 @@ class CodeArtifactWorkflow(Workflow):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
llm: LLM,
|
||||
chat_request: ChatRequest,
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""
|
||||
@@ -93,9 +90,8 @@ class CodeArtifactWorkflow(Workflow):
|
||||
chat_request: The chat request from the chat app to use.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.llm = llm
|
||||
self.chat_request = chat_request
|
||||
self.last_artifact = get_last_artifact(chat_request)
|
||||
self.llm: LLM = Settings.llm
|
||||
self.last_artifact: Optional[Artifact] = None
|
||||
|
||||
@step
|
||||
async def prepare_chat_history(self, ctx: Context, ev: StartEvent) -> PlanEvent:
|
||||
@@ -103,13 +99,21 @@ class CodeArtifactWorkflow(Workflow):
|
||||
if user_msg is None:
|
||||
raise ValueError("user_msg is required to run the workflow")
|
||||
await ctx.set("user_msg", user_msg)
|
||||
chat_history = ev.chat_history or []
|
||||
chat_history.append(
|
||||
|
||||
# prepare chat history from StartEvent
|
||||
messages = [
|
||||
ChatMessage(
|
||||
role="user",
|
||||
content=user_msg,
|
||||
role=msg.get("role", "user"),
|
||||
content=msg.get("content", ""),
|
||||
)
|
||||
)
|
||||
for msg in ev.get("chat_history", [])
|
||||
]
|
||||
chat_history = [*messages, ChatMessage(role="user", content=user_msg)]
|
||||
|
||||
# extract inline artifact from chat history
|
||||
last_artifact = get_last_artifact(messages)
|
||||
self.last_artifact = last_artifact
|
||||
|
||||
memory = ChatMemoryBuffer.from_defaults(
|
||||
chat_history=chat_history,
|
||||
llm=self.llm,
|
||||
@@ -373,3 +377,6 @@ class CodeArtifactWorkflow(Workflow):
|
||||
)
|
||||
)
|
||||
return StopEvent(result=response_stream)
|
||||
|
||||
|
||||
workflow = create_workflow()
|
||||
|
||||
+72
-35
@@ -1,69 +1,106 @@
|
||||
This is a [LlamaIndex](https://www.llamaindex.ai/) multi-agents project using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/).
|
||||
# LlamaIndex Workflow Example
|
||||
|
||||
## Getting Started
|
||||
This is a [LlamaIndex](https://www.llamaindex.ai/) project that using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/) deployed with [LlamaDeploy](https://github.com/run-llama/llama_deploy).
|
||||
|
||||
First, setup the environment with uv:
|
||||
LlamaDeploy is a system for deploying and managing LlamaIndex workflows, while LlamaIndexServer provides a pre-built TypeScript server with an integrated chat UI that can connect directly to LlamaDeploy deployments. This example shows how you can quickly set up a complete chat application by combining these two technologies/
|
||||
|
||||
> **_Note:_** This step is not needed if you are using the dev-container.
|
||||
## Prerequisites
|
||||
|
||||
```shell
|
||||
If you haven't installed uv, you can follow the instructions [here](https://docs.astral.sh/uv/getting-started/installation/) to install it.
|
||||
|
||||
You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [src/settings.py](src/settings.py).
|
||||
|
||||
Please setup their API keys in the `src/.env` file.
|
||||
|
||||
## Installation
|
||||
|
||||
Both the SDK and the CLI are part of the LlamaDeploy Python package. To install, just run:
|
||||
|
||||
```bash
|
||||
uv sync
|
||||
```
|
||||
|
||||
Then check the parameters that have been pre-configured in the `.env` file in this directory.
|
||||
Make sure you have set the `OPENAI_API_KEY` for the LLM.
|
||||
If you don't have uv installed, you can follow the instructions [here](https://docs.astral.sh/uv/getting-started/installation/).
|
||||
|
||||
Second, generate the embeddings of the documents in the `./data` directory:
|
||||
## Generate Index
|
||||
|
||||
Generate the embeddings of the documents in the `./data` directory:
|
||||
|
||||
```shell
|
||||
uv run generate
|
||||
```
|
||||
|
||||
Third, run the development server:
|
||||
## Running the Deployment
|
||||
|
||||
```shell
|
||||
uv run fastapi dev
|
||||
```
|
||||
|
||||
Then open [http://localhost:8000](http://localhost:8000) with your browser to start the chat UI.
|
||||
|
||||
To start the app optimized for **production**, run:
|
||||
At this point we have all we need to run this deployment. Ideally, we would have the API server already running
|
||||
somewhere in the cloud, but to get started let's start an instance locally. Run the following python script
|
||||
from a shell:
|
||||
|
||||
```
|
||||
uv run fastapi run
|
||||
$ uv run -m llama_deploy.apiserver
|
||||
INFO: Started server process [10842]
|
||||
INFO: Waiting for application startup.
|
||||
INFO: Application startup complete.
|
||||
INFO: Uvicorn running on http://0.0.0.0:4501 (Press CTRL+C to quit)
|
||||
```
|
||||
|
||||
## Configure LLM and Embedding Model
|
||||
From another shell, use the CLI, `llamactl`, to create the deployment:
|
||||
|
||||
You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [settings.py](app/settings.py).
|
||||
```
|
||||
$ uv run llamactl deploy llama_deploy.yml
|
||||
Deployment successful: chat
|
||||
```
|
||||
|
||||
## UI Interface
|
||||
|
||||
LlamaDeploy will serve the UI through the apiserver. Point the browser to [http://localhost:4501/deployments/chat/ui](http://localhost:4501/deployments/chat/ui) to interact with your deployment through a user-friendly interface.
|
||||
|
||||
## API endpoints
|
||||
|
||||
You can find all the endpoints in the [API documentation](http://localhost:4501/docs). To get started, you can try the following endpoints:
|
||||
|
||||
Create a new task:
|
||||
|
||||
```bash
|
||||
curl -X POST 'http://localhost:4501/deployments/chat/tasks/create' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"input": "{\"user_msg\":\"Hello\",\"chat_history\":[]}",
|
||||
"service_id": "workflow"
|
||||
}'
|
||||
```
|
||||
|
||||
Stream events:
|
||||
|
||||
```bash
|
||||
curl 'http://localhost:4501/deployments/chat/tasks/0b411be6-005d-43f0-9b6b-6a0017f08002/events?session_id=dd36442c-45ca-4eaa-8d75-b4e6dad1a83e&raw_event=true' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
Note that the task_id and session_id are returned when creating a new task.
|
||||
|
||||
## Use Case
|
||||
|
||||
We have prepared an [example workflow](./app/workflow.py) for the deep research use case, where you can ask questions about the example documents in the [./data](./data) directory.
|
||||
|
||||
You can start by sending an request on the [chat UI](http://localhost:8000) or you can test the `/api/chat` endpoint with the following curl request:
|
||||
|
||||
```
|
||||
curl --location 'localhost:8000/api/chat' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{ "messages": [{ "role": "user", "content": "Create a report comparing the finances of Apple and Tesla" }] }'
|
||||
```
|
||||
To update the workflow, you can modify the code in [`src/workflow.py`](src/workflow.py).
|
||||
|
||||
## Customize the UI
|
||||
|
||||
To customize the UI, you can start by modifying the [./components/ui_event.jsx](./components/ui_event.jsx) file.
|
||||
The UI is served by LLamaIndexServer package, you can configure the UI by modifying the `uiConfig` in the [ui/index.ts](ui/index.ts) file.
|
||||
|
||||
You can also generate a new code for the workflow using LLM by running the following command:
|
||||
The following are the available options:
|
||||
|
||||
```
|
||||
uv run generate_ui
|
||||
```
|
||||
- `starterQuestions`: Predefined questions for chat interface
|
||||
- `componentsDir`: Directory for custom event components
|
||||
- `layoutDir`: Directory for custom layout components
|
||||
- `llamaCloudIndexSelector`: Enable LlamaCloud integration
|
||||
- `llamaDeploy`: The LlamaDeploy configration (deployment name and workflow name that defined in the [llama_deploy.yml](llama_deploy.yml) file)
|
||||
|
||||
## Learn More
|
||||
|
||||
To learn more about LlamaIndex, take a look at the following resources:
|
||||
|
||||
- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex.
|
||||
- [Workflows Introduction](https://docs.llamaindex.ai/en/stable/understanding/workflows/) - learn about LlamaIndex workflows.
|
||||
- [LlamaDeploy GitHub Repository](https://github.com/run-llama/llama_deploy)
|
||||
- [Chat-UI Documentation](https://ts.llamaindex.ai/docs/chat-ui)
|
||||
|
||||
You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
|
||||
You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
|
||||
@@ -0,0 +1,46 @@
|
||||
from typing import AsyncGenerator, Union
|
||||
from llama_index.core.base.llms.types import (
|
||||
CompletionResponse,
|
||||
CompletionResponseAsyncGen,
|
||||
ChatResponse,
|
||||
)
|
||||
from llama_index.core.workflow import Context
|
||||
from llama_index.core.agent.workflow.workflow_events import AgentStream
|
||||
|
||||
|
||||
async def write_response_to_stream(
|
||||
res: Union[CompletionResponse, CompletionResponseAsyncGen, AsyncGenerator[ChatResponse, None]],
|
||||
ctx: Context,
|
||||
current_agent_name: str = "assistant",
|
||||
) -> str:
|
||||
"""
|
||||
Handle both streaming and non-streaming LLM responses.
|
||||
|
||||
Args:
|
||||
res: The LLM response (either streaming or non-streaming)
|
||||
ctx: The workflow context for writing events to stream
|
||||
current_agent_name: The name of the current agent (default: "assistant")
|
||||
|
||||
Returns:
|
||||
The final response text as a string
|
||||
"""
|
||||
final_response = ""
|
||||
|
||||
if isinstance(res, AsyncGenerator):
|
||||
# Handle streaming response (CompletionResponseAsyncGen or ChatResponse AsyncGenerator)
|
||||
async for chunk in res:
|
||||
ctx.write_event_to_stream(
|
||||
AgentStream(
|
||||
delta=chunk.delta or "",
|
||||
response=final_response,
|
||||
current_agent_name=current_agent_name,
|
||||
tool_calls=[],
|
||||
raw=getattr(chunk, 'raw', None) or "",
|
||||
)
|
||||
)
|
||||
final_response += chunk.delta or ""
|
||||
else:
|
||||
# Handle non-streaming response (CompletionResponse)
|
||||
final_response = res.text
|
||||
|
||||
return final_response
|
||||
+35
-23
@@ -1,9 +1,11 @@
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
import time
|
||||
from typing import List, Literal, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from app.index import get_index
|
||||
from llama_index.core.base.llms.types import (
|
||||
CompletionResponse,
|
||||
CompletionResponseAsyncGen,
|
||||
@@ -23,26 +25,31 @@ from llama_index.core.workflow import (
|
||||
Workflow,
|
||||
step,
|
||||
)
|
||||
from llama_index.server.api.models import (
|
||||
ArtifactEvent,
|
||||
ArtifactType,
|
||||
ChatRequest,
|
||||
SourceNodesEvent,
|
||||
UIEvent,
|
||||
from llama_index.core.chat_ui.models.artifact import (
|
||||
Artifact,
|
||||
ArtifactType,
|
||||
DocumentArtifactData,
|
||||
DocumentArtifactSource,
|
||||
)
|
||||
import time
|
||||
from llama_index.server.utils.stream import write_response_to_stream
|
||||
from pydantic import BaseModel, Field
|
||||
from llama_index.core.chat_ui.events import (
|
||||
UIEvent,
|
||||
ArtifactEvent,
|
||||
SourceNodesEvent,
|
||||
)
|
||||
|
||||
from src.index import get_index
|
||||
from src.settings import init_settings
|
||||
from src.utils import write_response_to_stream
|
||||
|
||||
logger = logging.getLogger("uvicorn")
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
def create_workflow(chat_request: Optional[ChatRequest] = None) -> Workflow:
|
||||
index = get_index(chat_request=chat_request)
|
||||
def create_workflow() -> Workflow:
|
||||
load_dotenv()
|
||||
init_settings()
|
||||
# TODO: load index in StartEvent
|
||||
index = get_index()
|
||||
if index is None:
|
||||
raise ValueError(
|
||||
"Index is not found. Try run generation script to create the index first."
|
||||
@@ -140,21 +147,23 @@ class DeepResearchWorkflow(Workflow):
|
||||
"""
|
||||
self.stream = ev.get("stream", True)
|
||||
self.user_request = ev.get("user_msg")
|
||||
chat_history = ev.get("chat_history")
|
||||
if chat_history is not None:
|
||||
self.memory.put_messages(chat_history)
|
||||
|
||||
messages = [
|
||||
ChatMessage(
|
||||
role=msg.get("role", "user"),
|
||||
content=msg.get("content", ""),
|
||||
)
|
||||
for msg in ev.get("chat_history", [])
|
||||
]
|
||||
user_message = ChatMessage(role="user", content=self.user_request)
|
||||
chat_history = [*messages, user_message]
|
||||
self.memory.put_messages(chat_history)
|
||||
|
||||
await ctx.set("total_questions", 0)
|
||||
|
||||
# Add user message to memory
|
||||
self.memory.put_messages(
|
||||
messages=[
|
||||
ChatMessage(
|
||||
role=MessageRole.USER,
|
||||
content=self.user_request,
|
||||
)
|
||||
]
|
||||
)
|
||||
self.memory.put_messages(messages=[user_message])
|
||||
|
||||
ctx.write_event_to_stream(
|
||||
UIEvent(
|
||||
type="ui_event",
|
||||
@@ -574,3 +583,6 @@ def _get_text_node_content_for_citation(node: NodeWithScore) -> str:
|
||||
node_id = node.node.node_id
|
||||
content = f"<Citation id='{node_id}'>\n{node.get_content(metadata_mode=MetadataMode.LLM)}</Citation id='{node_id}'>"
|
||||
return content
|
||||
|
||||
|
||||
workflow = create_workflow()
|
||||
+71
-37
@@ -1,66 +1,100 @@
|
||||
This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/).
|
||||
# LlamaIndex Workflow Example
|
||||
|
||||
## Getting Started
|
||||
This is a [LlamaIndex](https://www.llamaindex.ai/) project that using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/) deployed with [LlamaDeploy](https://github.com/run-llama/llama_deploy).
|
||||
|
||||
First, setup the environment with uv:
|
||||
LlamaDeploy is a system for deploying and managing LlamaIndex workflows, while LlamaIndexServer provides a pre-built TypeScript server with an integrated chat UI that can connect directly to LlamaDeploy deployments. This example shows how you can quickly set up a complete chat application by combining these two technologies/
|
||||
|
||||
> **_Note:_** This step is not needed if you are using the dev-container.
|
||||
## Prerequisites
|
||||
|
||||
```shell
|
||||
If you haven't installed uv, you can follow the instructions [here](https://docs.astral.sh/uv/getting-started/installation/) to install it.
|
||||
|
||||
You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [src/settings.py](src/settings.py).
|
||||
|
||||
Please setup their API keys in the `src/.env` file.
|
||||
|
||||
## Installation
|
||||
|
||||
Both the SDK and the CLI are part of the LlamaDeploy Python package. To install, just run:
|
||||
|
||||
```bash
|
||||
uv sync
|
||||
```
|
||||
|
||||
Then check the parameters that have been pre-configured in the `.env` file in this directory.
|
||||
Make sure you have set the `OPENAI_API_KEY` for the LLM.
|
||||
If you don't have uv installed, you can follow the instructions [here](https://docs.astral.sh/uv/getting-started/installation/).
|
||||
|
||||
Then, run the development server:
|
||||
## Running the Deployment
|
||||
|
||||
```shell
|
||||
uv run fastapi dev
|
||||
```
|
||||
|
||||
Then open [http://localhost:8000](http://localhost:8000) with your browser to start the chat UI.
|
||||
|
||||
To start the app optimized for **production**, run:
|
||||
At this point we have all we need to run this deployment. Ideally, we would have the API server already running
|
||||
somewhere in the cloud, but to get started let's start an instance locally. Run the following python script
|
||||
from a shell:
|
||||
|
||||
```
|
||||
uv run fastapi run
|
||||
$ uv run -m llama_deploy.apiserver
|
||||
INFO: Started server process [10842]
|
||||
INFO: Waiting for application startup.
|
||||
INFO: Application startup complete.
|
||||
INFO: Uvicorn running on http://0.0.0.0:4501 (Press CTRL+C to quit)
|
||||
```
|
||||
|
||||
## Configure LLM and Embedding Model
|
||||
From another shell, use the CLI, `llamactl`, to create the deployment:
|
||||
|
||||
You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [settings.py](app/settings.py).
|
||||
```
|
||||
$ uv run llamactl deploy llama_deploy.yml
|
||||
Deployment successful: chat
|
||||
```
|
||||
|
||||
## UI Interface
|
||||
|
||||
LlamaDeploy will serve the UI through the apiserver. Point the browser to [http://localhost:4501/deployments/chat/ui](http://localhost:4501/deployments/chat/ui) to interact with your deployment through a user-friendly interface.
|
||||
|
||||
## API endpoints
|
||||
|
||||
You can find all the endpoints in the [API documentation](http://localhost:4501/docs). To get started, you can try the following endpoints:
|
||||
|
||||
Create a new task:
|
||||
|
||||
```bash
|
||||
curl -X POST 'http://localhost:4501/deployments/chat/tasks/create' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"input": "{\"user_msg\":\"Hello\",\"chat_history\":[]}",
|
||||
"service_id": "workflow"
|
||||
}'
|
||||
```
|
||||
|
||||
Stream events:
|
||||
|
||||
```bash
|
||||
curl 'http://localhost:4501/deployments/chat/tasks/0b411be6-005d-43f0-9b6b-6a0017f08002/events?session_id=dd36442c-45ca-4eaa-8d75-b4e6dad1a83e&raw_event=true' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
Note that the task_id and session_id are returned when creating a new task.
|
||||
|
||||
## Use Case
|
||||
|
||||
AI-powered document generator that can help you generate documents with a chat interface and simple markdown editor.
|
||||
|
||||
To update the workflow, you can modify the code in [`workflow.py`](app/workflow.py).
|
||||
|
||||
You can start by sending an request on the [chat UI](http://localhost:8000) or you can test the `/api/chat` endpoint with the following curl request:
|
||||
|
||||
```
|
||||
curl --location 'localhost:8000/api/chat' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{ "messages": [{ "role": "user", "content": "Create a report comparing the finances of Apple and Tesla" }] }'
|
||||
```
|
||||
To update the workflow, you can modify the code in [`src/workflow.py`](src/workflow.py).
|
||||
|
||||
## Customize the UI
|
||||
|
||||
To customize the UI, you can start by modifying the [./components/ui_event.jsx](./components/ui_event.jsx) file.
|
||||
The UI is served by LLamaIndexServer package, you can configure the UI by modifying the `uiConfig` in the [ui/index.ts](ui/index.ts) file.
|
||||
|
||||
You can also generate a new code for the workflow using LLM by running the following command:
|
||||
The following are the available options:
|
||||
|
||||
```
|
||||
uv run generate_ui
|
||||
```
|
||||
- `starterQuestions`: Predefined questions for chat interface
|
||||
- `componentsDir`: Directory for custom event components
|
||||
- `layoutDir`: Directory for custom layout components
|
||||
- `llamaCloudIndexSelector`: Enable LlamaCloud integration
|
||||
- `llamaDeploy`: The LlamaDeploy configration (deployment name and workflow name that defined in the [llama_deploy.yml](llama_deploy.yml) file)
|
||||
|
||||
To customize the UI, you can start by modifying the [./ui/components/ui_event.jsx](./ui/components/ui_event.jsx) file.
|
||||
|
||||
## Learn More
|
||||
|
||||
To learn more about LlamaIndex, take a look at the following resources:
|
||||
|
||||
- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex.
|
||||
- [Workflows Introduction](https://docs.llamaindex.ai/en/stable/understanding/workflows/) - learn about LlamaIndex workflows.
|
||||
- [LlamaIndex Server](https://pypi.org/project/llama-index-server/)
|
||||
- [LlamaDeploy GitHub Repository](https://github.com/run-llama/llama_deploy)
|
||||
- [Chat-UI Documentation](https://ts.llamaindex.ai/docs/chat-ui)
|
||||
|
||||
You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
|
||||
You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
|
||||
+131
@@ -0,0 +1,131 @@
|
||||
import json
|
||||
import re
|
||||
from typing import List, Optional, Any
|
||||
|
||||
from pydantic import ValidationError
|
||||
from llama_index.core.chat_ui.models.artifact import (
|
||||
Artifact,
|
||||
ArtifactType,
|
||||
CodeArtifactData,
|
||||
DocumentArtifactData,
|
||||
)
|
||||
from llama_index.core.llms import ChatMessage
|
||||
|
||||
INLINE_ANNOTATION_KEY = "annotation"
|
||||
|
||||
|
||||
def get_inline_annotations(message: ChatMessage) -> List[Any]:
|
||||
"""Extract inline annotations from a chat message."""
|
||||
markdown_content = message.content
|
||||
|
||||
inline_annotations: List[Any] = []
|
||||
|
||||
# Regex to match annotation code blocks
|
||||
# Matches ```annotation followed by content until closing ```
|
||||
annotation_regex = re.compile(
|
||||
rf"```{re.escape(INLINE_ANNOTATION_KEY)}\s*\n([\s\S]*?)\n```", re.MULTILINE
|
||||
)
|
||||
|
||||
for match in annotation_regex.finditer(markdown_content):
|
||||
json_content = match.group(1).strip() if match.group(1) else None
|
||||
|
||||
if not json_content:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Parse the JSON content
|
||||
parsed = json.loads(json_content)
|
||||
|
||||
# Check for required fields in the parsed annotation
|
||||
if (
|
||||
not isinstance(parsed, dict)
|
||||
or "type" not in parsed
|
||||
or "data" not in parsed
|
||||
):
|
||||
continue
|
||||
|
||||
# Extract the annotation data
|
||||
inline_annotations.append(parsed)
|
||||
except (json.JSONDecodeError, ValidationError) as error:
|
||||
# Skip invalid annotations - they might be malformed JSON or invalid schema
|
||||
print(f"Failed to parse annotation: {error}")
|
||||
|
||||
return inline_annotations
|
||||
|
||||
|
||||
def artifact_from_message(message: ChatMessage) -> Optional[Artifact]:
|
||||
"""Create an artifact from a chat message if it contains artifact annotations."""
|
||||
inline_annotations = get_inline_annotations(message)
|
||||
|
||||
for annotation in inline_annotations:
|
||||
if isinstance(annotation, dict) and annotation.get("type") == "artifact":
|
||||
try:
|
||||
# Create artifact data based on type
|
||||
artifact_data = annotation.get("data")
|
||||
if not artifact_data:
|
||||
continue
|
||||
|
||||
artifact_type = artifact_data.get("type")
|
||||
|
||||
if artifact_type == "code":
|
||||
# Get the nested data object that contains the actual code information
|
||||
code_info = artifact_data.get("data", {})
|
||||
code_data = CodeArtifactData(
|
||||
file_name=code_info.get("file_name", ""),
|
||||
code=code_info.get("code", ""),
|
||||
language=code_info.get("language", ""),
|
||||
)
|
||||
artifact = Artifact(
|
||||
created_at=artifact_data.get("created_at"),
|
||||
type=ArtifactType.CODE,
|
||||
data=code_data,
|
||||
)
|
||||
elif artifact_type == "document":
|
||||
# Get the nested data object that contains the actual document information
|
||||
doc_info = artifact_data.get("data", {})
|
||||
doc_data = DocumentArtifactData(
|
||||
title=doc_info.get("title", ""),
|
||||
content=doc_info.get("content", ""),
|
||||
type=doc_info.get("type", "markdown"),
|
||||
sources=doc_info.get("sources"),
|
||||
)
|
||||
artifact = Artifact(
|
||||
created_at=artifact_data.get("created_at"),
|
||||
type=ArtifactType.DOCUMENT,
|
||||
data=doc_data,
|
||||
)
|
||||
else:
|
||||
continue
|
||||
|
||||
return artifact
|
||||
except Exception as e:
|
||||
print(
|
||||
f"Failed to parse artifact from annotation: {annotation}. Error: {e}"
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_artifacts(chat_history: List[ChatMessage]) -> List[Artifact]:
|
||||
"""
|
||||
Return a list of artifacts sorted by their creation time.
|
||||
Artifacts without a creation time are placed at the end.
|
||||
"""
|
||||
artifacts = []
|
||||
|
||||
for message in chat_history:
|
||||
artifact = artifact_from_message(message)
|
||||
if artifact is not None:
|
||||
artifacts.append(artifact)
|
||||
|
||||
# Sort by creation time, with None values at the end
|
||||
return sorted(
|
||||
artifacts,
|
||||
key=lambda a: (a.created_at is None, a.created_at),
|
||||
)
|
||||
|
||||
|
||||
def get_last_artifact(chat_history: List[ChatMessage]) -> Optional[Artifact]:
|
||||
"""Get the last artifact from chat history."""
|
||||
artifacts = get_artifacts(chat_history)
|
||||
return artifacts[-1] if len(artifacts) > 0 else None
|
||||
+53
-41
@@ -1,10 +1,9 @@
|
||||
import re
|
||||
import time
|
||||
from typing import Any, Literal, Optional
|
||||
from typing import Any, Literal, Optional, Union
|
||||
|
||||
from llama_index.core.chat_engine.types import ChatMessage
|
||||
from llama_index.core.llms import LLM
|
||||
from llama_index.llms.openai import OpenAI
|
||||
from llama_index.core import Settings
|
||||
from llama_index.core.llms import LLM, ChatMessage
|
||||
from llama_index.core.memory import ChatMemoryBuffer
|
||||
from llama_index.core.prompts import PromptTemplate
|
||||
from llama_index.core.workflow import (
|
||||
@@ -15,26 +14,25 @@ from llama_index.core.workflow import (
|
||||
Workflow,
|
||||
step,
|
||||
)
|
||||
from llama_index.server.api.models import (
|
||||
from llama_index.core.chat_ui.models.artifact import (
|
||||
Artifact,
|
||||
ArtifactEvent,
|
||||
ArtifactType,
|
||||
ChatRequest,
|
||||
DocumentArtifactData,
|
||||
UIEvent,
|
||||
)
|
||||
from llama_index.server.api.utils import get_last_artifact
|
||||
from llama_index.core.chat_ui.events import (
|
||||
UIEvent,
|
||||
ArtifactEvent,
|
||||
)
|
||||
|
||||
from src.utils import get_last_artifact
|
||||
from src.settings import init_settings
|
||||
from pydantic import BaseModel, Field
|
||||
from dotenv import load_dotenv
|
||||
|
||||
|
||||
def create_workflow(chat_request: ChatRequest) -> Workflow:
|
||||
workflow = DocumentArtifactWorkflow(
|
||||
llm=OpenAI(model="gpt-4.1"),
|
||||
chat_request=chat_request,
|
||||
timeout=120.0,
|
||||
)
|
||||
return workflow
|
||||
|
||||
def create_workflow() -> Workflow:
|
||||
load_dotenv()
|
||||
init_settings()
|
||||
return DocumentArtifactWorkflow(timeout=120.0)
|
||||
|
||||
class DocumentRequirement(BaseModel):
|
||||
type: Literal["markdown", "html"]
|
||||
@@ -81,8 +79,6 @@ class DocumentArtifactWorkflow(Workflow):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
llm: LLM,
|
||||
chat_request: ChatRequest,
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""
|
||||
@@ -91,9 +87,8 @@ class DocumentArtifactWorkflow(Workflow):
|
||||
chat_request: The chat request from the chat app to use.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.llm = llm
|
||||
self.chat_request = chat_request
|
||||
self.last_artifact = get_last_artifact(chat_request)
|
||||
self.llm: LLM = Settings.llm
|
||||
self.last_artifact: Optional[Artifact] = None
|
||||
|
||||
@step
|
||||
async def prepare_chat_history(self, ctx: Context, ev: StartEvent) -> PlanEvent:
|
||||
@@ -101,13 +96,21 @@ class DocumentArtifactWorkflow(Workflow):
|
||||
if user_msg is None:
|
||||
raise ValueError("user_msg is required to run the workflow")
|
||||
await ctx.set("user_msg", user_msg)
|
||||
chat_history = ev.chat_history or []
|
||||
chat_history.append(
|
||||
|
||||
# prepare chat history from StartEvent
|
||||
messages = [
|
||||
ChatMessage(
|
||||
role="user",
|
||||
content=user_msg,
|
||||
role=msg.get("role", "user"),
|
||||
content=msg.get("content", ""),
|
||||
)
|
||||
)
|
||||
for msg in ev.get("chat_history", [])
|
||||
]
|
||||
chat_history = [*messages, ChatMessage(role="user", content=user_msg)]
|
||||
|
||||
# extract inline artifact from chat history
|
||||
last_artifact = get_last_artifact(messages)
|
||||
self.last_artifact = last_artifact
|
||||
|
||||
memory = ChatMemoryBuffer.from_defaults(
|
||||
chat_history=chat_history,
|
||||
llm=self.llm,
|
||||
@@ -115,9 +118,9 @@ class DocumentArtifactWorkflow(Workflow):
|
||||
await ctx.set("memory", memory)
|
||||
return PlanEvent(
|
||||
user_msg=user_msg,
|
||||
context=str(self.last_artifact.model_dump_json())
|
||||
if self.last_artifact
|
||||
else "",
|
||||
context=(
|
||||
str(self.last_artifact.model_dump_json()) if self.last_artifact else ""
|
||||
),
|
||||
)
|
||||
|
||||
@step
|
||||
@@ -135,7 +138,8 @@ class DocumentArtifactWorkflow(Workflow):
|
||||
),
|
||||
)
|
||||
)
|
||||
prompt = PromptTemplate("""
|
||||
prompt = PromptTemplate(
|
||||
"""
|
||||
You are a documentation analyst responsible for analyzing the user's request and providing requirements for document generation or update.
|
||||
Follow these instructions:
|
||||
1. Carefully analyze the conversation history and the user's request to determine what has been done and what the next step should be.
|
||||
@@ -176,10 +180,13 @@ class DocumentArtifactWorkflow(Workflow):
|
||||
|
||||
Now, please plan for the user's request:
|
||||
{user_msg}
|
||||
""").format(
|
||||
context=""
|
||||
if event.context is None
|
||||
else f"## The context is: \n{event.context}\n",
|
||||
"""
|
||||
).format(
|
||||
context=(
|
||||
""
|
||||
if event.context is None
|
||||
else f"## The context is: \n{event.context}\n"
|
||||
),
|
||||
user_msg=event.user_msg,
|
||||
)
|
||||
response = await self.llm.acomplete(
|
||||
@@ -232,7 +239,8 @@ class DocumentArtifactWorkflow(Workflow):
|
||||
),
|
||||
)
|
||||
)
|
||||
prompt = PromptTemplate("""
|
||||
prompt = PromptTemplate(
|
||||
"""
|
||||
You are a skilled technical writer who can help users with documentation.
|
||||
You are given a task to generate or update a document for a given requirement.
|
||||
|
||||
@@ -265,10 +273,11 @@ class DocumentArtifactWorkflow(Workflow):
|
||||
|
||||
Now, please generate the document for the following requirement:
|
||||
{requirement}
|
||||
""").format(
|
||||
previous_artifact=self.last_artifact.model_dump_json()
|
||||
if self.last_artifact
|
||||
else "",
|
||||
"""
|
||||
).format(
|
||||
previous_artifact=(
|
||||
self.last_artifact.model_dump_json() if self.last_artifact else ""
|
||||
),
|
||||
requirement=event.requirement,
|
||||
)
|
||||
response = await self.llm.acomplete(
|
||||
@@ -345,3 +354,6 @@ class DocumentArtifactWorkflow(Workflow):
|
||||
)
|
||||
)
|
||||
return StopEvent(result=response_stream)
|
||||
|
||||
|
||||
workflow = create_workflow()
|
||||
|
||||
+254
@@ -0,0 +1,254 @@
|
||||
import logging
|
||||
import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, AsyncGenerator, Optional
|
||||
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
from llama_index.core.base.llms.types import ChatMessage, ChatResponse
|
||||
from llama_index.core.llms.function_calling import FunctionCallingLLM
|
||||
from llama_index.core.tools import (
|
||||
BaseTool,
|
||||
FunctionTool,
|
||||
ToolOutput,
|
||||
ToolSelection,
|
||||
)
|
||||
from llama_index.core.workflow import Context
|
||||
from llama_index.core.agent.workflow.workflow_events import ToolCall, ToolCallResult
|
||||
|
||||
from src.events import AgentRunEvent, AgentRunEventType
|
||||
|
||||
logger = logging.getLogger("uvicorn")
|
||||
|
||||
|
||||
class ToolCallOutput(BaseModel):
|
||||
tool_call_id: str
|
||||
tool_output: ToolOutput
|
||||
|
||||
|
||||
class ContextAwareTool(FunctionTool, ABC):
|
||||
@abstractmethod
|
||||
async def acall(self, ctx: Context, input: Any) -> ToolOutput: # type: ignore
|
||||
pass
|
||||
|
||||
|
||||
class ChatWithToolsResponse(BaseModel):
|
||||
"""
|
||||
A tool call response from chat_with_tools.
|
||||
"""
|
||||
|
||||
tool_calls: Optional[list[ToolSelection]]
|
||||
tool_call_message: Optional[ChatMessage]
|
||||
generator: Optional[AsyncGenerator[ChatResponse | None, None]]
|
||||
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
def is_calling_different_tools(self) -> bool:
|
||||
tool_names = {tool_call.tool_name for tool_call in self.tool_calls or []}
|
||||
return len(tool_names) > 1
|
||||
|
||||
def has_tool_calls(self) -> bool:
|
||||
return self.tool_calls is not None and len(self.tool_calls) > 0
|
||||
|
||||
def tool_name(self) -> str:
|
||||
if not self.has_tool_calls():
|
||||
raise ValueError("No tool calls")
|
||||
if self.is_calling_different_tools():
|
||||
raise ValueError("Calling different tools")
|
||||
return self.tool_calls[0].tool_name # type: ignore
|
||||
|
||||
async def full_response(self) -> str:
|
||||
assert self.generator is not None
|
||||
full_response = ""
|
||||
async for chunk in self.generator:
|
||||
content = chunk.delta # type: ignore
|
||||
if content:
|
||||
full_response += content
|
||||
return full_response
|
||||
|
||||
|
||||
async def chat_with_tools( # type: ignore
|
||||
llm: FunctionCallingLLM,
|
||||
tools: list[BaseTool],
|
||||
chat_history: list[ChatMessage],
|
||||
) -> ChatWithToolsResponse:
|
||||
"""
|
||||
Request LLM to call tools or not.
|
||||
This function doesn't change the memory.
|
||||
"""
|
||||
generator = _tool_call_generator(llm, tools, chat_history)
|
||||
is_tool_call = await generator.__anext__()
|
||||
if is_tool_call:
|
||||
# Last chunk is the full response
|
||||
# Wait for the last chunk
|
||||
full_response = None
|
||||
async for chunk in generator:
|
||||
full_response = chunk
|
||||
assert isinstance(full_response, ChatResponse)
|
||||
return ChatWithToolsResponse(
|
||||
tool_calls=llm.get_tool_calls_from_response(full_response),
|
||||
tool_call_message=full_response.message,
|
||||
generator=None,
|
||||
)
|
||||
else:
|
||||
return ChatWithToolsResponse(
|
||||
tool_calls=None,
|
||||
tool_call_message=None,
|
||||
generator=generator, # type: ignore
|
||||
)
|
||||
|
||||
|
||||
async def call_tools(
|
||||
ctx: Context,
|
||||
agent_name: str,
|
||||
tools: list[BaseTool],
|
||||
tool_calls: list[ToolSelection],
|
||||
emit_agent_events: bool = True,
|
||||
) -> list[ToolCallOutput]:
|
||||
"""
|
||||
Call tools and return the tool call responses.
|
||||
"""
|
||||
if len(tool_calls) == 0:
|
||||
return []
|
||||
tools_by_name = {tool.metadata.get_name(): tool for tool in tools}
|
||||
if len(tool_calls) == 1:
|
||||
if emit_agent_events:
|
||||
ctx.write_event_to_stream(
|
||||
AgentRunEvent(
|
||||
name=agent_name,
|
||||
msg=f"{tool_calls[0].tool_name}: {tool_calls[0].tool_kwargs}",
|
||||
)
|
||||
)
|
||||
return [
|
||||
await call_tool(ctx, tools_by_name[tool_calls[0].tool_name], tool_calls[0])
|
||||
]
|
||||
# Multiple tool calls, show progress
|
||||
tool_call_outputs: list[ToolCallOutput] = []
|
||||
|
||||
progress_id = str(uuid.uuid4())
|
||||
total_steps = len(tool_calls)
|
||||
if emit_agent_events:
|
||||
ctx.write_event_to_stream(
|
||||
AgentRunEvent(
|
||||
name=agent_name,
|
||||
msg=f"Making {total_steps} tool calls",
|
||||
)
|
||||
)
|
||||
for i, tool_call in enumerate(tool_calls):
|
||||
tool = tools_by_name.get(tool_call.tool_name)
|
||||
if not tool:
|
||||
tool_call_outputs.append(
|
||||
ToolCallOutput(
|
||||
tool_call_id=tool_call.tool_id,
|
||||
tool_output=ToolOutput(
|
||||
is_error=True,
|
||||
content=f"Tool {tool_call.tool_name} does not exist",
|
||||
tool_name=tool_call.tool_name,
|
||||
raw_input=tool_call.tool_kwargs,
|
||||
raw_output={
|
||||
"error": f"Tool {tool_call.tool_name} does not exist",
|
||||
},
|
||||
),
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
tool_call_output = await call_tool(
|
||||
ctx,
|
||||
tool,
|
||||
tool_call,
|
||||
)
|
||||
if emit_agent_events:
|
||||
ctx.write_event_to_stream(
|
||||
AgentRunEvent(
|
||||
name=agent_name,
|
||||
msg=f"{tool_call.tool_name}: {tool_call.tool_kwargs}",
|
||||
event_type=AgentRunEventType.PROGRESS,
|
||||
data={
|
||||
"id": progress_id,
|
||||
"total": total_steps,
|
||||
"current": i,
|
||||
},
|
||||
)
|
||||
)
|
||||
tool_call_outputs.append(tool_call_output)
|
||||
return tool_call_outputs
|
||||
|
||||
|
||||
async def call_tool(
|
||||
ctx: Context,
|
||||
tool: BaseTool,
|
||||
tool_call: ToolSelection,
|
||||
) -> ToolCallOutput:
|
||||
ctx.write_event_to_stream(
|
||||
ToolCall(
|
||||
tool_name=tool_call.tool_name,
|
||||
tool_id=tool_call.tool_id,
|
||||
tool_kwargs=tool_call.tool_kwargs,
|
||||
)
|
||||
)
|
||||
try:
|
||||
if isinstance(tool, ContextAwareTool):
|
||||
if ctx is None:
|
||||
raise ValueError("Context is required for context aware tool")
|
||||
# inject context for calling an context aware tool
|
||||
output = await tool.acall(ctx=ctx, **tool_call.tool_kwargs)
|
||||
else:
|
||||
output = await tool.acall(**tool_call.tool_kwargs) # type: ignore
|
||||
except Exception as e:
|
||||
logger.error(f"Got error in tool {tool_call.tool_name}: {e!s}")
|
||||
output = ToolOutput(
|
||||
is_error=True,
|
||||
content=f"Error: {e!s}",
|
||||
tool_name=tool.metadata.get_name(),
|
||||
raw_input=tool_call.tool_kwargs,
|
||||
raw_output={
|
||||
"error": str(e),
|
||||
},
|
||||
)
|
||||
ctx.write_event_to_stream(
|
||||
ToolCallResult(
|
||||
tool_name=tool_call.tool_name,
|
||||
tool_kwargs=tool_call.tool_kwargs,
|
||||
tool_id=tool_call.tool_id,
|
||||
tool_output=output,
|
||||
return_direct=False,
|
||||
)
|
||||
)
|
||||
return ToolCallOutput(
|
||||
tool_call_id=tool_call.tool_id,
|
||||
tool_output=output,
|
||||
)
|
||||
|
||||
|
||||
async def _tool_call_generator(
|
||||
llm: FunctionCallingLLM,
|
||||
tools: list[BaseTool],
|
||||
chat_history: list[ChatMessage],
|
||||
) -> AsyncGenerator[ChatResponse | bool, None]:
|
||||
response_stream = await llm.astream_chat_with_tools(
|
||||
tools,
|
||||
chat_history=chat_history,
|
||||
allow_parallel_tool_calls=False,
|
||||
)
|
||||
|
||||
full_response = None
|
||||
yielded_indicator = False
|
||||
async for chunk in response_stream:
|
||||
if "tool_calls" not in chunk.message.additional_kwargs:
|
||||
# Yield a boolean to indicate whether the response is a tool call
|
||||
if not yielded_indicator:
|
||||
yield False
|
||||
yielded_indicator = True
|
||||
|
||||
# if not a tool call, yield the chunks!
|
||||
yield chunk # type: ignore
|
||||
elif not yielded_indicator:
|
||||
# Yield the indicator for a tool call
|
||||
yield True
|
||||
yielded_indicator = True
|
||||
|
||||
full_response = chunk
|
||||
|
||||
if full_response:
|
||||
yield full_response # type: ignore
|
||||
+252
@@ -0,0 +1,252 @@
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
import re
|
||||
from enum import Enum
|
||||
from io import BytesIO
|
||||
|
||||
from llama_index.core.tools.function_tool import FunctionTool
|
||||
|
||||
# use nextjs for file server
|
||||
WORKFLOW="chat"
|
||||
|
||||
# define nextjs file server url prefix
|
||||
FILE_SERVER_URL_PREFIX = f"/deployments/{WORKFLOW}/ui/api/files/output/tools"
|
||||
|
||||
# When deploying to llama_deploy, ui folder will be copied to deployments folder in the temp directory
|
||||
# We need to save generated documents to that exact ui directory to make it accessible to the file server
|
||||
# eg: /tmp/llama_deploy/deployments/chat/ui/output/tools/generated_report.pdf
|
||||
LLAMA_DEPLOY_DIR = os.path.join(tempfile.gettempdir(), "llama_deploy", "deployments")
|
||||
OUTPUT_DIR = os.path.join(LLAMA_DEPLOY_DIR, WORKFLOW, "ui", "output", "tools")
|
||||
|
||||
class DocumentType(Enum):
|
||||
PDF = "pdf"
|
||||
HTML = "html"
|
||||
|
||||
|
||||
COMMON_STYLES = """
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
line-height: 1.3;
|
||||
color: #333;
|
||||
}
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
margin-top: 1em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
p {
|
||||
margin-bottom: 0.7em;
|
||||
}
|
||||
code {
|
||||
background-color: #f4f4f4;
|
||||
padding: 2px 4px;
|
||||
border-radius: 4px;
|
||||
}
|
||||
pre {
|
||||
background-color: #f4f4f4;
|
||||
padding: 10px;
|
||||
border-radius: 4px;
|
||||
overflow-x: auto;
|
||||
}
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
width: 100%;
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
th, td {
|
||||
border: 1px solid #ddd;
|
||||
padding: 8px;
|
||||
text-align: left;
|
||||
}
|
||||
th {
|
||||
background-color: #f2f2f2;
|
||||
font-weight: bold;
|
||||
}
|
||||
"""
|
||||
|
||||
HTML_SPECIFIC_STYLES = """
|
||||
body {
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
}
|
||||
"""
|
||||
|
||||
PDF_SPECIFIC_STYLES = """
|
||||
@page {
|
||||
size: letter;
|
||||
margin: 2cm;
|
||||
}
|
||||
body {
|
||||
font-size: 11pt;
|
||||
}
|
||||
h1 { font-size: 18pt; }
|
||||
h2 { font-size: 16pt; }
|
||||
h3 { font-size: 14pt; }
|
||||
h4, h5, h6 { font-size: 12pt; }
|
||||
pre, code {
|
||||
font-family: Courier, monospace;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
"""
|
||||
|
||||
HTML_TEMPLATE = """
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<style>
|
||||
{common_styles}
|
||||
{specific_styles}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
{content}
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
class DocumentGenerator:
|
||||
def __init__(self, file_server_url_prefix: str | None = FILE_SERVER_URL_PREFIX):
|
||||
if not file_server_url_prefix:
|
||||
raise ValueError("file_server_url_prefix is required")
|
||||
self.file_server_url_prefix = file_server_url_prefix
|
||||
|
||||
@classmethod
|
||||
def _generate_html_content(cls, original_content: str) -> str:
|
||||
"""
|
||||
Generate HTML content from the original markdown content.
|
||||
"""
|
||||
try:
|
||||
import markdown # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Failed to import required modules. Please install markdown."
|
||||
)
|
||||
|
||||
# Convert markdown to HTML with fenced code and table extensions
|
||||
return markdown.markdown(original_content, extensions=["fenced_code", "tables"])
|
||||
|
||||
@classmethod
|
||||
def _generate_pdf(cls, html_content: str) -> BytesIO:
|
||||
"""
|
||||
Generate a PDF from the HTML content.
|
||||
"""
|
||||
try:
|
||||
from xhtml2pdf import pisa
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Failed to import required modules. Please install xhtml2pdf."
|
||||
)
|
||||
|
||||
pdf_html = HTML_TEMPLATE.format(
|
||||
common_styles=COMMON_STYLES,
|
||||
specific_styles=PDF_SPECIFIC_STYLES,
|
||||
content=html_content,
|
||||
)
|
||||
|
||||
buffer = BytesIO()
|
||||
pdf = pisa.pisaDocument(
|
||||
BytesIO(pdf_html.encode("UTF-8")), buffer, encoding="UTF-8"
|
||||
)
|
||||
|
||||
if pdf.err:
|
||||
logging.error(f"PDF generation failed: {pdf.err}")
|
||||
raise ValueError("PDF generation failed")
|
||||
|
||||
buffer.seek(0)
|
||||
return buffer
|
||||
|
||||
@classmethod
|
||||
def _generate_html(cls, html_content: str) -> str:
|
||||
"""
|
||||
Generate a complete HTML document with the given HTML content.
|
||||
"""
|
||||
return HTML_TEMPLATE.format(
|
||||
common_styles=COMMON_STYLES,
|
||||
specific_styles=HTML_SPECIFIC_STYLES,
|
||||
content=html_content,
|
||||
)
|
||||
|
||||
def generate_document(
|
||||
self, original_content: str, document_type: str, file_name: str
|
||||
) -> str:
|
||||
"""
|
||||
To generate document as PDF or HTML file.
|
||||
Parameters:
|
||||
original_content: str (markdown style)
|
||||
document_type: str (pdf or html) specify the type of the file format based on the use case
|
||||
file_name: str (name of the document file) must be a valid file name, no extensions needed
|
||||
Returns:
|
||||
str (URL to the document file): A file URL ready to serve.
|
||||
"""
|
||||
try:
|
||||
doc_type = DocumentType(document_type.lower())
|
||||
except ValueError:
|
||||
raise ValueError(
|
||||
f"Invalid document type: {document_type}. Must be 'pdf' or 'html'."
|
||||
)
|
||||
# Always generate html content first
|
||||
html_content = self._generate_html_content(original_content)
|
||||
|
||||
# Based on the type of document, generate the corresponding file
|
||||
if doc_type == DocumentType.PDF:
|
||||
content = self._generate_pdf(html_content)
|
||||
file_extension = "pdf"
|
||||
elif doc_type == DocumentType.HTML:
|
||||
content = BytesIO(self._generate_html(html_content).encode("utf-8"))
|
||||
file_extension = "html"
|
||||
else:
|
||||
raise ValueError(f"Unexpected document type: {document_type}")
|
||||
|
||||
file_name = self._validate_file_name(file_name)
|
||||
file_path = os.path.join(OUTPUT_DIR, f"{file_name}.{file_extension}")
|
||||
|
||||
self._write_to_file(content, file_path)
|
||||
|
||||
return (
|
||||
f"{self.file_server_url_prefix}/{file_name}.{file_extension}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _write_to_file(content: BytesIO, file_path: str) -> None:
|
||||
"""
|
||||
Write the content to a file.
|
||||
"""
|
||||
try:
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
with open(file_path, "wb") as file:
|
||||
file.write(content.getvalue())
|
||||
except Exception:
|
||||
raise
|
||||
|
||||
@staticmethod
|
||||
def _validate_file_name(file_name: str) -> str:
|
||||
"""
|
||||
Validate the file name.
|
||||
"""
|
||||
# Don't allow directory traversal
|
||||
if os.path.isabs(file_name):
|
||||
raise ValueError("File name is not allowed.")
|
||||
# Don't allow special characters
|
||||
if re.match(r"^[a-zA-Z0-9_.-]+$", file_name):
|
||||
return file_name
|
||||
else:
|
||||
raise ValueError("File name is not allowed to contain special characters.")
|
||||
|
||||
@classmethod
|
||||
def _validate_packages(cls) -> None:
|
||||
try:
|
||||
import markdown # noqa: F401
|
||||
import xhtml2pdf # noqa: F401
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Failed to import required modules. Please install markdown and xhtml2pdf "
|
||||
"using `pip install markdown xhtml2pdf`"
|
||||
)
|
||||
|
||||
def to_tool(self) -> FunctionTool:
|
||||
self._validate_packages()
|
||||
return FunctionTool.from_defaults(self.generate_document)
|
||||
+32
@@ -0,0 +1,32 @@
|
||||
from typing import List, Optional
|
||||
from enum import Enum
|
||||
from llama_index.core.base.llms.types import ChatMessage
|
||||
from llama_index.core.tools import ToolSelection
|
||||
from llama_index.core.workflow import Event
|
||||
|
||||
|
||||
class AgentRunEventType(Enum):
|
||||
TEXT = "text"
|
||||
PROGRESS = "progress"
|
||||
|
||||
|
||||
class AgentRunEvent(Event):
|
||||
name: str
|
||||
msg: str
|
||||
event_type: AgentRunEventType = AgentRunEventType.TEXT
|
||||
data: Optional[dict] = None
|
||||
|
||||
|
||||
class InputEvent(Event):
|
||||
input: List[ChatMessage]
|
||||
response: bool = False
|
||||
|
||||
|
||||
class ResearchEvent(Event):
|
||||
input: list[ToolSelection]
|
||||
|
||||
class AnalyzeEvent(Event):
|
||||
input: list[ToolSelection] | ChatMessage
|
||||
|
||||
class ReportEvent(Event):
|
||||
input: list[ToolSelection]
|
||||
+280
@@ -0,0 +1,280 @@
|
||||
import base64
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_index.core.tools import FunctionTool
|
||||
|
||||
logger = logging.getLogger("uvicorn")
|
||||
|
||||
|
||||
class FileMetadata(BaseModel):
|
||||
"""Simple file metadata model"""
|
||||
|
||||
id: str
|
||||
type: str
|
||||
size: int
|
||||
url: str
|
||||
path: str
|
||||
|
||||
|
||||
class InterpreterExtraResult(BaseModel):
|
||||
type: str
|
||||
content: Optional[str] = None
|
||||
filename: Optional[str] = None
|
||||
url: Optional[str] = None
|
||||
|
||||
|
||||
class E2BToolOutput(BaseModel):
|
||||
is_error: bool
|
||||
logs: "Logs" # type: ignore # noqa: F821
|
||||
error_message: Optional[str] = None
|
||||
results: List[InterpreterExtraResult] = []
|
||||
retry_count: int = 0
|
||||
|
||||
|
||||
class E2BCodeInterpreter:
|
||||
output_dir = "output/tools"
|
||||
uploaded_files_dir = "output/uploaded"
|
||||
interpreter: Optional["Sandbox"] = None # type: ignore # noqa: F821
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
output_dir: Optional[str] = None,
|
||||
uploaded_files_dir: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
api_key: The API key for the E2B Code Interpreter.
|
||||
output_dir: The directory for the output files. Default is `output/tools`.
|
||||
uploaded_files_dir: The directory for the files to be uploaded to the sandbox. Default is `output/uploaded`.
|
||||
"""
|
||||
self._validate_package()
|
||||
if not api_key:
|
||||
raise ValueError(
|
||||
"api_key is required to run code interpreter. Get it here: https://e2b.dev/docs/getting-started/api-key"
|
||||
)
|
||||
self.api_key = api_key
|
||||
self.output_dir = output_dir or "output/tools"
|
||||
self.uploaded_files_dir = uploaded_files_dir or "output/uploaded"
|
||||
|
||||
@classmethod
|
||||
def _validate_package(cls) -> None:
|
||||
try:
|
||||
from e2b_code_interpreter import Sandbox # noqa: F401
|
||||
from e2b_code_interpreter.models import Logs # noqa: F401
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"e2b_code_interpreter is not installed. Please install it using `pip install e2b-code-interpreter`."
|
||||
)
|
||||
|
||||
def __del__(self) -> None:
|
||||
"""
|
||||
Kill the interpreter when the tool is no longer in use.
|
||||
"""
|
||||
if self.interpreter is not None:
|
||||
self.interpreter.kill()
|
||||
|
||||
def _init_interpreter(self, sandbox_files: List[str] = []) -> None:
|
||||
"""
|
||||
Lazily initialize the interpreter.
|
||||
"""
|
||||
from e2b_code_interpreter import Sandbox
|
||||
|
||||
logger.info(f"Initializing interpreter with {len(sandbox_files)} files")
|
||||
self.interpreter = Sandbox(api_key=self.api_key)
|
||||
if len(sandbox_files) > 0:
|
||||
for file_path in sandbox_files:
|
||||
file_name = os.path.basename(file_path)
|
||||
local_file_path = os.path.join(self.uploaded_files_dir, file_name)
|
||||
with open(local_file_path, "rb") as f:
|
||||
content = f.read()
|
||||
if self.interpreter and self.interpreter.files:
|
||||
self.interpreter.files.write(file_path, content)
|
||||
logger.info(f"Uploaded {len(sandbox_files)} files to sandbox")
|
||||
|
||||
def _process_file_name(self, file_name: str) -> tuple[str, str]:
|
||||
"""
|
||||
Process original file name to generate a unique file id and extension.
|
||||
"""
|
||||
_id = str(uuid.uuid4())
|
||||
name, extension = os.path.splitext(file_name)
|
||||
extension = extension.lstrip(".")
|
||||
if extension == "":
|
||||
raise ValueError("File name is not valid! It must have an extension.")
|
||||
# sanitize the name
|
||||
name = re.sub(r"[^a-zA-Z0-9.]", "_", name)
|
||||
file_id = f"{name}_{_id}.{extension}"
|
||||
return file_id, extension
|
||||
|
||||
def _get_file_url(self, file_id: str, save_dir: str) -> str:
|
||||
"""
|
||||
Get the URL of a file.
|
||||
"""
|
||||
# Ensure the path uses forward slashes for URLs
|
||||
url_path = f"{save_dir}/{file_id}".replace("\\", "/")
|
||||
return f"/api/files/{url_path}"
|
||||
|
||||
def _save_file(self, content: bytes, file_name: str, save_dir: str) -> FileMetadata:
|
||||
file_id, extension = self._process_file_name(file_name)
|
||||
file_path = os.path.join(save_dir, file_id)
|
||||
|
||||
# Write the file directly
|
||||
try:
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(content)
|
||||
except PermissionError as e:
|
||||
logger.error(f"Permission denied when writing to file {file_path}: {e!s}")
|
||||
raise
|
||||
except OSError as e:
|
||||
logger.error(f"IO error occurred when writing to file {file_path}: {e!s}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error when writing to file {file_path}: {e!s}")
|
||||
raise
|
||||
|
||||
logger.info(f"Saved file to {file_path}")
|
||||
|
||||
file_size = os.path.getsize(file_path)
|
||||
file_url = self._get_file_url(file_id, save_dir)
|
||||
|
||||
return FileMetadata(
|
||||
id=file_id,
|
||||
type=extension,
|
||||
size=file_size,
|
||||
url=file_url,
|
||||
path=file_path,
|
||||
)
|
||||
|
||||
def _save_to_disk(self, base64_data: str, ext: str) -> FileMetadata:
|
||||
buffer = base64.b64decode(base64_data)
|
||||
|
||||
# Output from e2b doesn't have a name. Create a random name for it.
|
||||
filename = f"e2b_file_{uuid.uuid4()}.{ext}"
|
||||
|
||||
return self._save_file(buffer, file_name=filename, save_dir=self.output_dir)
|
||||
|
||||
def _parse_result(self, result: Any) -> List[InterpreterExtraResult]:
|
||||
"""
|
||||
The result could include multiple formats (e.g. png, svg, etc.) but encoded in base64
|
||||
We save each result to disk and return saved file metadata (extension, filename, url).
|
||||
"""
|
||||
if not result:
|
||||
return []
|
||||
|
||||
output = []
|
||||
|
||||
try:
|
||||
formats = result.formats()
|
||||
results = [result[format] for format in formats]
|
||||
|
||||
for ext, data in zip(formats, results):
|
||||
if ext in ["png", "svg", "jpeg", "pdf"]:
|
||||
document_file = self._save_to_disk(data, ext)
|
||||
output.append(
|
||||
InterpreterExtraResult(
|
||||
type=ext,
|
||||
filename=document_file.id,
|
||||
url=document_file.url,
|
||||
)
|
||||
)
|
||||
else:
|
||||
# Try serialize data to string
|
||||
try:
|
||||
data = str(data)
|
||||
except Exception as e:
|
||||
data = f"Error when serializing data: {e}"
|
||||
output.append(
|
||||
InterpreterExtraResult(
|
||||
type=ext,
|
||||
content=data,
|
||||
)
|
||||
)
|
||||
except Exception as error:
|
||||
logger.exception(error, exc_info=True)
|
||||
logger.error("Error when parsing output from E2b interpreter tool", error)
|
||||
|
||||
return output
|
||||
|
||||
def interpret(
|
||||
self,
|
||||
code: str,
|
||||
sandbox_files: List[str] = [],
|
||||
retry_count: int = 0,
|
||||
) -> E2BToolOutput:
|
||||
"""
|
||||
Execute Python code in a Jupyter notebook cell. The tool will return the result, stdout, stderr, display_data, and error.
|
||||
If the code needs to use a file, ALWAYS pass the file path in the sandbox_files argument.
|
||||
You have a maximum of 3 retries to get the code to run successfully.
|
||||
|
||||
Parameters:
|
||||
code (str): The Python code to be executed in a single cell.
|
||||
sandbox_files (List[str]): List of local file paths to be used by the code. The tool will throw an error if a file is not found.
|
||||
retry_count (int): Number of times the tool has been retried.
|
||||
"""
|
||||
from e2b_code_interpreter.models import Logs
|
||||
|
||||
if retry_count > 2:
|
||||
return E2BToolOutput(
|
||||
is_error=True,
|
||||
logs=Logs(
|
||||
stdout="",
|
||||
stderr="",
|
||||
display_data="",
|
||||
error="",
|
||||
),
|
||||
error_message="Failed to execute the code after 3 retries. Explain the error to the user and suggest a fix.",
|
||||
retry_count=retry_count,
|
||||
)
|
||||
|
||||
if self.interpreter is None:
|
||||
self._init_interpreter(sandbox_files)
|
||||
|
||||
if self.interpreter:
|
||||
logger.info(
|
||||
f"\n{'=' * 50}\n> Running following AI-generated code:\n{code}\n{'=' * 50}"
|
||||
)
|
||||
exec = self.interpreter.run_code(code)
|
||||
|
||||
if exec.error:
|
||||
error_message = f"The code failed to execute successfully. Error: {exec.error}. Try to fix the code and run again."
|
||||
logger.error(error_message)
|
||||
# Calling the generated code caused an error. Kill the interpreter and return the error to the LLM so it can try to fix the error
|
||||
try:
|
||||
self.interpreter.kill() # type: ignore
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
self.interpreter = None
|
||||
output = E2BToolOutput(
|
||||
is_error=True,
|
||||
logs=exec.logs,
|
||||
results=[],
|
||||
error_message=error_message,
|
||||
retry_count=retry_count + 1,
|
||||
)
|
||||
else:
|
||||
if len(exec.results) == 0:
|
||||
output = E2BToolOutput(is_error=False, logs=exec.logs, results=[])
|
||||
else:
|
||||
results = self._parse_result(exec.results[0])
|
||||
output = E2BToolOutput(
|
||||
is_error=False,
|
||||
logs=exec.logs,
|
||||
results=results,
|
||||
retry_count=retry_count + 1,
|
||||
)
|
||||
return output
|
||||
else:
|
||||
raise ValueError("Interpreter is not initialized.")
|
||||
|
||||
def to_tool(self) -> FunctionTool:
|
||||
self._validate_package()
|
||||
return FunctionTool.from_defaults(self.interpret)
|
||||
@@ -0,0 +1,47 @@
|
||||
import os
|
||||
from typing import Any, Optional
|
||||
|
||||
from llama_index.core.base.base_query_engine import BaseQueryEngine
|
||||
from llama_index.core.indices.base import BaseIndex
|
||||
from llama_index.core.tools.query_engine import QueryEngineTool
|
||||
|
||||
def create_query_engine(index: BaseIndex, **kwargs: Any) -> BaseQueryEngine:
|
||||
"""
|
||||
Create a query engine for the given index.
|
||||
|
||||
Args:
|
||||
index: The index to create a query engine for.
|
||||
params (optional): Additional parameters for the query engine, e.g: similarity_top_k
|
||||
"""
|
||||
top_k = int(os.getenv("TOP_K", 0))
|
||||
if top_k != 0 and kwargs.get("filters") is None:
|
||||
kwargs["similarity_top_k"] = top_k
|
||||
|
||||
return index.as_query_engine(**kwargs)
|
||||
|
||||
|
||||
def get_query_engine_tool(
|
||||
index: BaseIndex,
|
||||
name: Optional[str] = None,
|
||||
description: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> QueryEngineTool:
|
||||
"""
|
||||
Get a query engine tool for the given index.
|
||||
|
||||
Args:
|
||||
index: The index to create a query engine for.
|
||||
name (optional): The name of the tool.
|
||||
description (optional): The description of the tool.
|
||||
"""
|
||||
if name is None:
|
||||
name = "query_index"
|
||||
if description is None:
|
||||
description = "Use this tool to retrieve information from a knowledge base. Provide a specific query and can call the tool multiple times if necessary."
|
||||
query_engine = create_query_engine(index, **kwargs)
|
||||
tool = QueryEngineTool.from_defaults(
|
||||
query_engine=query_engine,
|
||||
name=name,
|
||||
description=description,
|
||||
)
|
||||
return tool
|
||||
@@ -0,0 +1,46 @@
|
||||
from typing import AsyncGenerator, Union
|
||||
from llama_index.core.base.llms.types import (
|
||||
CompletionResponse,
|
||||
CompletionResponseAsyncGen,
|
||||
ChatResponse,
|
||||
)
|
||||
from llama_index.core.workflow import Context
|
||||
from llama_index.core.agent.workflow.workflow_events import AgentStream
|
||||
|
||||
|
||||
async def write_response_to_stream(
|
||||
res: Union[CompletionResponse, CompletionResponseAsyncGen, AsyncGenerator[ChatResponse, None]],
|
||||
ctx: Context,
|
||||
current_agent_name: str = "assistant",
|
||||
) -> str:
|
||||
"""
|
||||
Handle both streaming and non-streaming LLM responses.
|
||||
|
||||
Args:
|
||||
res: The LLM response (either streaming or non-streaming)
|
||||
ctx: The workflow context for writing events to stream
|
||||
current_agent_name: The name of the current agent (default: "assistant")
|
||||
|
||||
Returns:
|
||||
The final response text as a string
|
||||
"""
|
||||
final_response = ""
|
||||
|
||||
if isinstance(res, AsyncGenerator):
|
||||
# Handle streaming response (CompletionResponseAsyncGen or ChatResponse AsyncGenerator)
|
||||
async for chunk in res:
|
||||
ctx.write_event_to_stream(
|
||||
AgentStream(
|
||||
delta=chunk.delta or "",
|
||||
response=final_response,
|
||||
current_agent_name=current_agent_name,
|
||||
tool_calls=[],
|
||||
raw=getattr(chunk, 'raw', None) or "",
|
||||
)
|
||||
)
|
||||
final_response += chunk.delta or ""
|
||||
else:
|
||||
# Handle non-streaming response (CompletionResponse)
|
||||
final_response = res.text
|
||||
|
||||
return final_response
|
||||
+35
-36
@@ -1,7 +1,9 @@
|
||||
import os
|
||||
from typing import List, Optional
|
||||
from enum import Enum
|
||||
from dotenv import load_dotenv
|
||||
|
||||
|
||||
from app.index import get_index
|
||||
from llama_index.core import Settings
|
||||
from llama_index.core.base.llms.types import ChatMessage, MessageRole
|
||||
from llama_index.core.llms.function_calling import FunctionCallingLLM
|
||||
@@ -15,19 +17,27 @@ from llama_index.core.workflow import (
|
||||
Workflow,
|
||||
step,
|
||||
)
|
||||
from llama_index.server.api.models import AgentRunEvent, ChatRequest
|
||||
from llama_index.server.settings import server_settings
|
||||
from llama_index.server.tools.document_generator import DocumentGenerator
|
||||
from llama_index.server.tools.index import get_query_engine_tool
|
||||
from llama_index.server.tools.interpreter import E2BCodeInterpreter
|
||||
from llama_index.server.utils.agent_tool import (
|
||||
call_tools,
|
||||
chat_with_tools,
|
||||
|
||||
from src.index import get_index
|
||||
from src.settings import init_settings
|
||||
from src.query import get_query_engine_tool
|
||||
from src.document_generator import DocumentGenerator
|
||||
from src.interpreter import E2BCodeInterpreter
|
||||
from src.events import (
|
||||
InputEvent,
|
||||
ResearchEvent,
|
||||
AnalyzeEvent,
|
||||
ReportEvent,
|
||||
AgentRunEvent,
|
||||
)
|
||||
from src.agent_tool import call_tools, chat_with_tools
|
||||
from src.utils import write_response_to_stream
|
||||
|
||||
|
||||
def create_workflow(chat_request: Optional[ChatRequest] = None) -> Workflow:
|
||||
index = get_index(chat_request=chat_request)
|
||||
def create_workflow() -> Workflow:
|
||||
load_dotenv()
|
||||
init_settings()
|
||||
index = get_index()
|
||||
if index is None:
|
||||
raise ValueError(
|
||||
"Index is not found. Try run generation script to create the index first."
|
||||
@@ -39,9 +49,7 @@ def create_workflow(chat_request: Optional[ChatRequest] = None) -> Workflow:
|
||||
"E2B_API_KEY is required to use the code interpreter tool. Please check README.md to know how to get the key."
|
||||
)
|
||||
code_interpreter_tool = E2BCodeInterpreter(api_key=e2b_api_key).to_tool()
|
||||
document_generator_tool = DocumentGenerator(
|
||||
file_server_url_prefix=server_settings.file_server_url_prefix,
|
||||
).to_tool()
|
||||
document_generator_tool = DocumentGenerator().to_tool()
|
||||
|
||||
return FinancialReportWorkflow(
|
||||
query_engine_tool=query_engine_tool,
|
||||
@@ -51,23 +59,6 @@ def create_workflow(chat_request: Optional[ChatRequest] = None) -> Workflow:
|
||||
)
|
||||
|
||||
|
||||
class InputEvent(Event):
|
||||
input: List[ChatMessage]
|
||||
response: bool = False
|
||||
|
||||
|
||||
class ResearchEvent(Event):
|
||||
input: list[ToolSelection]
|
||||
|
||||
|
||||
class AnalyzeEvent(Event):
|
||||
input: list[ToolSelection] | ChatMessage
|
||||
|
||||
|
||||
class ReportEvent(Event):
|
||||
input: list[ToolSelection]
|
||||
|
||||
|
||||
class FinancialReportWorkflow(Workflow):
|
||||
"""
|
||||
A workflow to generate a financial report using indexed documents.
|
||||
@@ -129,10 +120,14 @@ class FinancialReportWorkflow(Workflow):
|
||||
async def prepare_chat_history(self, ctx: Context, ev: StartEvent) -> InputEvent:
|
||||
self.stream = ev.get("stream", True)
|
||||
user_msg = ev.get("user_msg")
|
||||
chat_history = ev.get("chat_history")
|
||||
|
||||
if chat_history is not None:
|
||||
self.memory.put_messages(chat_history)
|
||||
messages = [
|
||||
ChatMessage(
|
||||
role=msg.get("role", "user"),
|
||||
content=msg.get("content", ""),
|
||||
)
|
||||
for msg in ev.get("chat_history", [])
|
||||
]
|
||||
self.memory.put_messages(messages)
|
||||
|
||||
# Add user message to memory
|
||||
self.memory.put(ChatMessage(role=MessageRole.USER, content=user_msg))
|
||||
@@ -164,7 +159,8 @@ class FinancialReportWorkflow(Workflow):
|
||||
)
|
||||
if not response.has_tool_calls():
|
||||
if self.stream:
|
||||
return StopEvent(result=response.generator)
|
||||
final_response = await write_response_to_stream(response.generator, ctx)
|
||||
return StopEvent(result=final_response)
|
||||
else:
|
||||
return StopEvent(result=await response.full_response())
|
||||
# calling different tools at the same time is not supported at the moment
|
||||
@@ -331,3 +327,6 @@ class FinancialReportWorkflow(Workflow):
|
||||
)
|
||||
# After the tool calls, fallback to the input with the latest chat history
|
||||
return InputEvent(input=self.memory.get())
|
||||
|
||||
|
||||
workflow = create_workflow()
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/).
|
||||
|
||||
## Getting Started
|
||||
|
||||
First, setup the environment with uv:
|
||||
|
||||
> **_Note:_** This step is not needed if you are using the dev-container.
|
||||
|
||||
```shell
|
||||
uv sync
|
||||
```
|
||||
|
||||
Then check the parameters that have been pre-configured in the `.env` file in this directory.
|
||||
Make sure you have set the `OPENAI_API_KEY` for the LLM.
|
||||
|
||||
Then, run the development server:
|
||||
|
||||
```shell
|
||||
uv run fastapi dev
|
||||
```
|
||||
|
||||
Then open [http://localhost:8000](http://localhost:8000) with your browser to start the chat UI.
|
||||
|
||||
To start the app optimized for **production**, run:
|
||||
|
||||
```
|
||||
uv run fastapi run
|
||||
```
|
||||
|
||||
## Configure LLM and Embedding Model
|
||||
|
||||
You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [settings.py](app/settings.py).
|
||||
|
||||
## Use Case
|
||||
|
||||
This example shows how to use the LlamaIndexServer with a human in the loop. It allows you to start CLI commands that are reviewed by a human before execution.
|
||||
|
||||
To update the workflow, you can modify the code in [`workflow.py`](app/workflow.py).
|
||||
|
||||
You can start by sending an request on the [chat UI](http://localhost:8000) or you can test the `/api/chat` endpoint with the following curl request:
|
||||
|
||||
```
|
||||
curl --location 'localhost:8000/api/chat' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{ "messages": [{ "role": "user", "content": "Show me the files in the current directory" }] }'
|
||||
```
|
||||
|
||||
## How does HITL work?
|
||||
|
||||
### Events
|
||||
|
||||
The human-in-the-loop approach used here is based on a simple idea: the workflow pauses and waits for a human response before proceeding to the next step.
|
||||
|
||||
To do this, you will need to implement two custom events:
|
||||
|
||||
- [HumanInputEvent](https://github.com/run-llama/create-llama/blob/main/packages/server/src/utils/hitl/events.ts): This event is used to request input from the user.
|
||||
- [HumanResponseEvent](https://github.com/run-llama/create-llama/blob/main/packages/server/src/utils/hitl/events.ts): This event is sent to the workflow to resume execution with input from the user.
|
||||
|
||||
In this example, we have implemented these two custom events in [`events.ts`](src/app/events.ts):
|
||||
|
||||
- `cliHumanInputEvent` – to request input from the user for CLI command execution.
|
||||
- `cliHumanResponseEvent` – to resume the workflow with the response from the user.
|
||||
|
||||
```typescript
|
||||
export const cliHumanInputEvent = humanInputEvent<{
|
||||
type: "cli_human_input";
|
||||
data: { command: string };
|
||||
response: typeof cliHumanResponseEvent;
|
||||
}>();
|
||||
|
||||
export const cliHumanResponseEvent = humanResponseEvent<{
|
||||
type: "human_response";
|
||||
data: { execute: boolean; command: string };
|
||||
}>();
|
||||
```
|
||||
|
||||
### UI Component
|
||||
|
||||
HITL also needs a custom UI component, that is shown when the LlamaIndexServer receives the `cliHumanInputEvent`. The name of the component is defined in the `type` field of the `cliHumanInputEvent` - in our case, it is `cli_human_input`, which corresponds to the [cli_human_input.tsx](./components/cli_human_input.tsx) component.
|
||||
|
||||
The custom component must use `append` to send a message with a `human_response` annotation. The data of the annotation must be in the format of the response event `cliHumanResponseEvent`, in our case, for sending to execute the command `ls -l`, we would send:
|
||||
|
||||
```tsx
|
||||
append({
|
||||
content: "Yes",
|
||||
role: "user",
|
||||
annotations: [
|
||||
{
|
||||
type: "human_response",
|
||||
data: {
|
||||
execute: true,
|
||||
command: "ls -l", // The command to execute
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
```
|
||||
|
||||
This component displays the command to execute and the user can choose to execute or cancel the command execution.
|
||||
|
||||
## Learn More
|
||||
|
||||
To learn more about LlamaIndex, take a look at the following resources:
|
||||
|
||||
- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex.
|
||||
- [Workflows Introduction](https://docs.llamaindex.ai/en/stable/understanding/workflows/) - learn about LlamaIndex workflows.
|
||||
- [LlamaIndex Server](https://pypi.org/project/llama-index-server/)
|
||||
|
||||
You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
|
||||
@@ -1,34 +0,0 @@
|
||||
from typing import Type
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_index.server.models import HumanInputEvent, HumanResponseEvent
|
||||
|
||||
|
||||
class CLIHumanResponseEvent(HumanResponseEvent):
|
||||
execute: bool = Field(
|
||||
description="True if the human wants to execute the command, False otherwise."
|
||||
)
|
||||
command: str = Field(description="The command to execute.")
|
||||
|
||||
|
||||
class CLICommand(BaseModel):
|
||||
command: str = Field(description="The command to execute.")
|
||||
|
||||
|
||||
# We need an event that extends from HumanInputEvent for HITL feature
|
||||
class CLIHumanInputEvent(HumanInputEvent):
|
||||
"""
|
||||
CLIInputRequiredEvent is sent when the agent needs permission from the user to execute the CLI command or not.
|
||||
Render this event by showing the command and a boolean button to execute the command or not.
|
||||
"""
|
||||
|
||||
event_type: str = (
|
||||
"cli_human_input" # used by UI to render with appropriate component
|
||||
)
|
||||
response_event_type: Type = (
|
||||
CLIHumanResponseEvent # used by workflow to resume with the correct event
|
||||
)
|
||||
data: CLICommand = Field( # the data that sent to the UI for rendering
|
||||
description="The command to execute.",
|
||||
)
|
||||
@@ -1,87 +0,0 @@
|
||||
import platform
|
||||
import subprocess
|
||||
from typing import Any
|
||||
|
||||
from app.events import CLICommand, CLIHumanInputEvent, CLIHumanResponseEvent
|
||||
|
||||
from llama_index.core.prompts import PromptTemplate
|
||||
from llama_index.core.settings import Settings
|
||||
from llama_index.core.workflow import (
|
||||
Context,
|
||||
StartEvent,
|
||||
StopEvent,
|
||||
Workflow,
|
||||
step,
|
||||
)
|
||||
|
||||
|
||||
def create_workflow() -> Workflow:
|
||||
return CLIWorkflow()
|
||||
|
||||
|
||||
class CLIWorkflow(Workflow):
|
||||
"""
|
||||
A workflow has ability to execute command line tool with human in the loop for confirmation.
|
||||
"""
|
||||
|
||||
default_prompt = PromptTemplate(
|
||||
template="""
|
||||
You are a helpful assistant who can write CLI commands to execute using {cli_language}.
|
||||
Your task is to analyze the user's request and write a CLI command to execute.
|
||||
|
||||
## User Request
|
||||
{user_request}
|
||||
|
||||
Don't be verbose, only respond with the CLI command without any other text.
|
||||
"""
|
||||
)
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
# HITL Workflow should disable timeout otherwise, we will get a timeout error from callback
|
||||
kwargs["timeout"] = None
|
||||
super().__init__(**kwargs)
|
||||
|
||||
@step
|
||||
async def start(self, ctx: Context, ev: StartEvent) -> CLIHumanInputEvent:
|
||||
user_msg = ev.user_msg
|
||||
if user_msg is None:
|
||||
raise ValueError("Missing user_msg in StartEvent")
|
||||
await ctx.set("user_msg", user_msg)
|
||||
# Request LLM to generate a CLI command
|
||||
os_name = platform.system()
|
||||
if os_name == "Linux" or os_name == "Darwin":
|
||||
cli_language = "bash"
|
||||
else:
|
||||
cli_language = "cmd"
|
||||
prompt = self.default_prompt.format(
|
||||
user_request=user_msg, cli_language=cli_language
|
||||
)
|
||||
llm = Settings.llm
|
||||
if llm is None:
|
||||
raise ValueError("Missing LLM in Settings")
|
||||
response = await llm.acomplete(prompt, formatted=True)
|
||||
command = response.text.strip()
|
||||
if command == "":
|
||||
raise ValueError("Couldn't generate a command")
|
||||
# Send the command to the user for confirmation
|
||||
await ctx.set("command", command)
|
||||
return CLIHumanInputEvent( # type: ignore
|
||||
data=CLICommand(command=command),
|
||||
response_event_type=CLIHumanResponseEvent,
|
||||
)
|
||||
|
||||
@step
|
||||
async def handle_human_response(
|
||||
self,
|
||||
ctx: Context,
|
||||
ev: CLIHumanResponseEvent, # This event is sent by LlamaIndexServer when user response
|
||||
) -> StopEvent:
|
||||
# If we have human response, check the confirmation and execute the command
|
||||
if ev.execute:
|
||||
command = ev.command or ""
|
||||
if command == "":
|
||||
raise ValueError("Missing command in CLIExecutionEvent")
|
||||
res = subprocess.run(command, shell=True, capture_output=True, text=True)
|
||||
return StopEvent(result=res.stdout or res.stderr)
|
||||
else:
|
||||
return StopEvent(result=None)
|
||||
+41
-29
@@ -6,12 +6,12 @@ load_dotenv()
|
||||
|
||||
import logging
|
||||
|
||||
from app.index import get_index
|
||||
from app.settings import init_settings
|
||||
from llama_index.server.services.llamacloud.generate import (
|
||||
load_to_llamacloud,
|
||||
)
|
||||
from llama_index.core.readers import SimpleDirectoryReader
|
||||
from tqdm import tqdm
|
||||
|
||||
from src.index import get_index
|
||||
from src.service import LLamaCloudFileService
|
||||
from src.settings import init_settings
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger()
|
||||
@@ -25,29 +25,41 @@ def generate_index():
|
||||
if index is None:
|
||||
raise ValueError("Index not found and could not be created")
|
||||
|
||||
load_to_llamacloud(index, logger=logger)
|
||||
# use SimpleDirectoryReader to retrieve the files to process
|
||||
reader = SimpleDirectoryReader(
|
||||
"ui/data",
|
||||
recursive=True,
|
||||
)
|
||||
files_to_process = reader.input_files
|
||||
|
||||
# add each file to the LlamaCloud pipeline
|
||||
error_files = []
|
||||
for input_file in tqdm(
|
||||
files_to_process,
|
||||
desc="Processing files",
|
||||
unit="file",
|
||||
):
|
||||
with open(input_file, "rb") as f:
|
||||
logger.debug(
|
||||
f"Adding file {input_file} to pipeline {index.name} in project {index.project_name}"
|
||||
)
|
||||
try:
|
||||
LLamaCloudFileService.add_file_to_pipeline(
|
||||
index.project.id,
|
||||
index.pipeline.id,
|
||||
f,
|
||||
custom_metadata={},
|
||||
wait_for_processing=False,
|
||||
)
|
||||
except Exception as e:
|
||||
error_files.append(input_file)
|
||||
logger.error(f"Error adding file {input_file}: {e}")
|
||||
|
||||
if error_files:
|
||||
logger.error(f"Failed to add the following files: {error_files}")
|
||||
|
||||
logger.info("Finished generating the index")
|
||||
|
||||
|
||||
def generate_ui_for_workflow():
|
||||
"""
|
||||
Generate UI for UIEventData event in app/workflow.py
|
||||
"""
|
||||
import asyncio
|
||||
from llama_index.llms.openai import OpenAI
|
||||
from main import COMPONENT_DIR
|
||||
|
||||
# To generate UI components for additional event types,
|
||||
# import the corresponding data model (e.g., MyCustomEventData)
|
||||
# and run the generate_ui_for_workflow function with the imported model.
|
||||
# Make sure the output filename of the generated UI component matches the event type (here `ui_event`)
|
||||
try:
|
||||
from app.workflow import UIEventData # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError("Couldn't generate UI component for the current workflow.")
|
||||
from llama_index.server.gen_ui import generate_event_component
|
||||
|
||||
# works also well with Claude 3.7 Sonnet or Gemini Pro 2.5
|
||||
llm = OpenAI(model="gpt-4.1")
|
||||
code = asyncio.run(generate_event_component(event_cls=UIEventData, llm=llm))
|
||||
with open(f"{COMPONENT_DIR}/ui_event.jsx", "w") as f:
|
||||
f.write(code)
|
||||
if __name__ == "__main__":
|
||||
generate_index()
|
||||
|
||||
+145
-6
@@ -1,7 +1,146 @@
|
||||
from llama_index.server.services.llamacloud import (
|
||||
LlamaCloudIndex,
|
||||
get_client,
|
||||
get_index,
|
||||
)
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
__all__ = ["LlamaCloudIndex", "get_client", "get_index"]
|
||||
from llama_cloud import PipelineType
|
||||
from llama_index.core.callbacks import CallbackManager
|
||||
from llama_index.core.ingestion.api_utils import (
|
||||
get_client as llama_cloud_get_client,
|
||||
)
|
||||
from llama_index.core.settings import Settings
|
||||
from llama_index.indices.managed.llama_cloud import LlamaCloudIndex
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
logger = logging.getLogger("uvicorn")
|
||||
|
||||
|
||||
class LlamaCloudConfig(BaseModel):
|
||||
# Private attributes
|
||||
api_key: str = Field(
|
||||
exclude=True, # Exclude from the model representation
|
||||
)
|
||||
base_url: Optional[str] = Field(
|
||||
exclude=True,
|
||||
)
|
||||
organization_id: Optional[str] = Field(
|
||||
exclude=True,
|
||||
)
|
||||
# Configuration attributes, can be set by the user
|
||||
pipeline: str = Field(
|
||||
description="The name of the pipeline to use",
|
||||
)
|
||||
project: str = Field(
|
||||
description="The name of the LlamaCloud project",
|
||||
)
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
if "api_key" not in kwargs:
|
||||
kwargs["api_key"] = os.getenv("LLAMA_CLOUD_API_KEY")
|
||||
if "base_url" not in kwargs:
|
||||
kwargs["base_url"] = os.getenv("LLAMA_CLOUD_BASE_URL")
|
||||
if "organization_id" not in kwargs:
|
||||
kwargs["organization_id"] = os.getenv("LLAMA_CLOUD_ORGANIZATION_ID")
|
||||
if "pipeline" not in kwargs:
|
||||
kwargs["pipeline"] = os.getenv("LLAMA_CLOUD_INDEX_NAME")
|
||||
if "project" not in kwargs:
|
||||
kwargs["project"] = os.getenv("LLAMA_CLOUD_PROJECT_NAME")
|
||||
super().__init__(**kwargs)
|
||||
|
||||
# Validate and throw error if the env variables are not set before starting the app
|
||||
@field_validator("pipeline", "project", "api_key", mode="before")
|
||||
@classmethod
|
||||
def validate_fields(cls, value):
|
||||
if value is None:
|
||||
raise ValueError(
|
||||
"Please set LLAMA_CLOUD_INDEX_NAME, LLAMA_CLOUD_PROJECT_NAME and LLAMA_CLOUD_API_KEY"
|
||||
" to your environment variables or config them in .env file"
|
||||
)
|
||||
return value
|
||||
|
||||
def to_client_kwargs(self) -> dict:
|
||||
return {
|
||||
"api_key": self.api_key,
|
||||
"base_url": self.base_url,
|
||||
}
|
||||
|
||||
|
||||
class IndexConfig(BaseModel):
|
||||
llama_cloud_pipeline_config: LlamaCloudConfig = Field(
|
||||
default_factory=lambda: LlamaCloudConfig(),
|
||||
alias="llamaCloudPipeline",
|
||||
)
|
||||
callback_manager: Optional[CallbackManager] = Field(
|
||||
default=None,
|
||||
)
|
||||
|
||||
def to_index_kwargs(self) -> dict:
|
||||
return {
|
||||
"name": self.llama_cloud_pipeline_config.pipeline,
|
||||
"project_name": self.llama_cloud_pipeline_config.project,
|
||||
"api_key": self.llama_cloud_pipeline_config.api_key,
|
||||
"base_url": self.llama_cloud_pipeline_config.base_url,
|
||||
"organization_id": self.llama_cloud_pipeline_config.organization_id,
|
||||
"callback_manager": self.callback_manager,
|
||||
}
|
||||
|
||||
|
||||
def get_index(
|
||||
config: IndexConfig = None,
|
||||
create_if_missing: bool = False,
|
||||
):
|
||||
if config is None:
|
||||
config = IndexConfig()
|
||||
# Check whether the index exists
|
||||
try:
|
||||
index = LlamaCloudIndex(**config.to_index_kwargs())
|
||||
return index
|
||||
except ValueError:
|
||||
logger.warning("Index not found")
|
||||
if create_if_missing:
|
||||
logger.info("Creating index")
|
||||
_create_index(config)
|
||||
return LlamaCloudIndex(**config.to_index_kwargs())
|
||||
return None
|
||||
|
||||
|
||||
def get_client():
|
||||
config = LlamaCloudConfig()
|
||||
return llama_cloud_get_client(**config.to_client_kwargs())
|
||||
|
||||
|
||||
def _create_index(
|
||||
config: IndexConfig,
|
||||
):
|
||||
client = get_client()
|
||||
pipeline_name = config.llama_cloud_pipeline_config.pipeline
|
||||
|
||||
pipelines = client.pipelines.search_pipelines(
|
||||
pipeline_name=pipeline_name,
|
||||
pipeline_type=PipelineType.MANAGED.value,
|
||||
)
|
||||
if len(pipelines) == 0:
|
||||
from llama_index.embeddings.openai import OpenAIEmbedding
|
||||
|
||||
if not isinstance(Settings.embed_model, OpenAIEmbedding):
|
||||
raise ValueError(
|
||||
"Creating a new pipeline with a non-OpenAI embedding model is not supported."
|
||||
)
|
||||
client.pipelines.upsert_pipeline(
|
||||
request={
|
||||
"name": pipeline_name,
|
||||
"embedding_config": {
|
||||
"type": "OPENAI_EMBEDDING",
|
||||
"component": {
|
||||
"api_key": os.getenv("OPENAI_API_KEY"), # editable
|
||||
"model_name": os.getenv("EMBEDDING_MODEL"),
|
||||
},
|
||||
},
|
||||
"transform_config": {
|
||||
"mode": "auto",
|
||||
"config": {
|
||||
"chunk_size": Settings.chunk_size, # editable
|
||||
"chunk_overlap": Settings.chunk_overlap, # editable
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
+74
@@ -0,0 +1,74 @@
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import typing
|
||||
from io import BytesIO
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
||||
|
||||
from llama_cloud import ManagedIngestionStatus, PipelineFileCreateCustomMetadataValue
|
||||
from pydantic import BaseModel
|
||||
|
||||
from src.index import get_client
|
||||
|
||||
logger = logging.getLogger("uvicorn")
|
||||
|
||||
|
||||
class LlamaCloudFile(BaseModel):
|
||||
file_name: str
|
||||
pipeline_id: str
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, LlamaCloudFile):
|
||||
return NotImplemented
|
||||
return (
|
||||
self.file_name == other.file_name and self.pipeline_id == other.pipeline_id
|
||||
)
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.file_name, self.pipeline_id))
|
||||
|
||||
|
||||
class LLamaCloudFileService:
|
||||
LOCAL_STORE_PATH = "output/llamacloud"
|
||||
DOWNLOAD_FILE_NAME_TPL = "{pipeline_id}${filename}"
|
||||
|
||||
@classmethod
|
||||
def add_file_to_pipeline(
|
||||
cls,
|
||||
project_id: str,
|
||||
pipeline_id: str,
|
||||
upload_file: Union[typing.IO, Tuple[str, BytesIO]],
|
||||
custom_metadata: Optional[Dict[str, PipelineFileCreateCustomMetadataValue]],
|
||||
wait_for_processing: bool = True,
|
||||
) -> str:
|
||||
client = get_client()
|
||||
file = client.files.upload_file(project_id=project_id, upload_file=upload_file)
|
||||
file_id = file.id
|
||||
files = [
|
||||
{
|
||||
"file_id": file_id,
|
||||
"custom_metadata": {"file_id": file_id, **(custom_metadata or {})},
|
||||
}
|
||||
]
|
||||
files = client.pipelines.add_files_to_pipeline_api(pipeline_id, request=files)
|
||||
|
||||
if not wait_for_processing:
|
||||
return file_id
|
||||
|
||||
# Wait 2s for the file to be processed
|
||||
max_attempts = 20
|
||||
attempt = 0
|
||||
while attempt < max_attempts:
|
||||
result = client.pipelines.get_pipeline_file_status(
|
||||
file_id=file_id, pipeline_id=pipeline_id
|
||||
)
|
||||
if result.status == ManagedIngestionStatus.ERROR:
|
||||
raise Exception(f"File processing failed: {str(result)}")
|
||||
if result.status == ManagedIngestionStatus.SUCCESS:
|
||||
# File is ingested - return the file id
|
||||
return file_id
|
||||
attempt += 1
|
||||
time.sleep(0.1) # Sleep for 100ms
|
||||
raise Exception(
|
||||
f"File processing did not complete after {max_attempts} attempts."
|
||||
)
|
||||
@@ -1,68 +0,0 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
def generate_index():
|
||||
"""
|
||||
Index the documents in the data directory.
|
||||
"""
|
||||
from app.index import STORAGE_DIR
|
||||
from app.settings import init_settings
|
||||
from llama_index.core.indices import (
|
||||
VectorStoreIndex,
|
||||
)
|
||||
from llama_index.core.readers import SimpleDirectoryReader
|
||||
|
||||
load_dotenv()
|
||||
init_settings()
|
||||
|
||||
logger.info("Creating new index")
|
||||
# load the documents and create the index
|
||||
reader = SimpleDirectoryReader(
|
||||
os.environ.get("DATA_DIR", "data"),
|
||||
recursive=True,
|
||||
)
|
||||
documents = reader.load_data()
|
||||
index = VectorStoreIndex.from_documents(
|
||||
documents,
|
||||
show_progress=True,
|
||||
)
|
||||
# store it for later
|
||||
index.storage_context.persist(STORAGE_DIR)
|
||||
logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}")
|
||||
|
||||
|
||||
def generate_ui_for_workflow():
|
||||
"""
|
||||
Generate UI for UIEventData event in app/workflow.py
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
from app.settings import init_settings
|
||||
from llama_index.core.settings import Settings
|
||||
from main import COMPONENT_DIR
|
||||
|
||||
load_dotenv()
|
||||
init_settings()
|
||||
|
||||
# To generate UI components for additional event types,
|
||||
# import the corresponding data model (e.g., MyCustomEventData)
|
||||
# and run the generate_ui_for_workflow function with the imported model.
|
||||
# Make sure the output filename of the generated UI component matches the event type (here `ui_event`)
|
||||
try:
|
||||
from app.workflow import UIEventData # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError("Couldn't generate UI component for the current workflow.")
|
||||
from llama_index.server.gen_ui import generate_event_component
|
||||
|
||||
# works well with OpenAI gpt-4.1, Claude 3.7 Sonnet or Gemini Pro 2.5
|
||||
code = asyncio.run(
|
||||
generate_event_component(event_cls=UIEventData, llm=Settings.llm)
|
||||
)
|
||||
with open(f"{COMPONENT_DIR}/ui_event.jsx", "w") as f:
|
||||
f.write(code)
|
||||
@@ -0,0 +1,24 @@
|
||||
name: chat
|
||||
|
||||
control-plane:
|
||||
port: 8000
|
||||
|
||||
default-service: workflow
|
||||
|
||||
services:
|
||||
workflow:
|
||||
name: Workflow
|
||||
source:
|
||||
type: local
|
||||
name: src
|
||||
path: src/workflow:workflow
|
||||
python-dependencies:
|
||||
- llama-index-llms-openai>=0.4.5
|
||||
- llama-index-core>=0.12.45
|
||||
|
||||
ui:
|
||||
name: My Nextjs App
|
||||
port: 3000
|
||||
source:
|
||||
type: local
|
||||
name: ui
|
||||
@@ -1,32 +0,0 @@
|
||||
import logging
|
||||
|
||||
from app.settings import init_settings
|
||||
from app.workflow import create_workflow
|
||||
from dotenv import load_dotenv
|
||||
from llama_index.server import LlamaIndexServer, UIConfig
|
||||
|
||||
logger = logging.getLogger("uvicorn")
|
||||
|
||||
# A path to a directory where the customized UI code is stored
|
||||
COMPONENT_DIR = "components"
|
||||
|
||||
|
||||
def create_app():
|
||||
app = LlamaIndexServer(
|
||||
workflow_factory=create_workflow, # A factory function that creates a new workflow for each request
|
||||
ui_config=UIConfig(
|
||||
component_dir=COMPONENT_DIR,
|
||||
dev_mode=True, # Please disable this in production
|
||||
layout_dir="layout",
|
||||
),
|
||||
logger=logger,
|
||||
env="dev",
|
||||
)
|
||||
# You can also add custom FastAPI routes to app
|
||||
app.add_api_route("/api/health", lambda: {"message": "OK"}, status_code=200)
|
||||
return app
|
||||
|
||||
|
||||
load_dotenv()
|
||||
init_settings()
|
||||
app = create_app()
|
||||
@@ -9,12 +9,17 @@ readme = "README.md"
|
||||
requires-python = ">=3.11,<3.14"
|
||||
dependencies = [
|
||||
"python-dotenv>=1.0.0,<2.0.0",
|
||||
"pydantic<2.10",
|
||||
"pydantic>=2.11.5",
|
||||
"aiostream>=0.5.2,<0.6.0",
|
||||
"llama-index-core>=0.12.28,<0.13.0",
|
||||
"llama-index-server>=0.1.17,<0.2.0",
|
||||
"llama-index-readers-file>=0.4.6,<1.0.0",
|
||||
"llama-index-indices-managed-llama-cloud>=0.6.3,<1.0.0",
|
||||
"llama-deploy",
|
||||
]
|
||||
|
||||
[tool.uv.sources]
|
||||
llama-deploy = { git = "https://github.com/run-llama/llama_deploy" }
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"mypy>=1.8.0,<2.0.0",
|
||||
@@ -23,9 +28,7 @@ dev = [
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
generate = "generate:generate_index"
|
||||
generate_index = "generate:generate_index"
|
||||
generate_ui = "generate:generate_ui_for_workflow"
|
||||
generate = "src.generate:generate_index"
|
||||
|
||||
|
||||
[tool.mypy]
|
||||
@@ -43,7 +46,7 @@ strict_optional = false
|
||||
disable_error_code = [ "return-value", "assignment" ]
|
||||
|
||||
[[tool.mypy.overrides]]
|
||||
module = "app.*"
|
||||
module = "src.*"
|
||||
ignore_missing_imports = false
|
||||
|
||||
[tool.hatch.metadata]
|
||||
@@ -51,4 +54,7 @@ allow-direct-references = true
|
||||
|
||||
[build-system]
|
||||
requires = [ "hatchling>=1.24" ]
|
||||
build-backend = "hatchling.build"
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["src"]
|
||||
@@ -0,0 +1,37 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
def generate_index():
|
||||
"""
|
||||
Index the documents in the data directory.
|
||||
"""
|
||||
from src.index import STORAGE_DIR
|
||||
from src.settings import init_settings
|
||||
from llama_index.core.indices import (
|
||||
VectorStoreIndex,
|
||||
)
|
||||
from llama_index.core.readers import SimpleDirectoryReader
|
||||
|
||||
load_dotenv()
|
||||
init_settings()
|
||||
|
||||
logger.info("Creating new index")
|
||||
# load the documents and create the index
|
||||
reader = SimpleDirectoryReader(
|
||||
os.environ.get("DATA_DIR", "ui/data"),
|
||||
recursive=True,
|
||||
)
|
||||
documents = reader.load_data()
|
||||
index = VectorStoreIndex.from_documents(
|
||||
documents,
|
||||
show_progress=True,
|
||||
)
|
||||
# store it for later
|
||||
index.storage_context.persist(STORAGE_DIR)
|
||||
logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}")
|
||||
+4
-6
@@ -1,23 +1,21 @@
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from llama_index.core.indices import load_index_from_storage
|
||||
from llama_index.server.api.models import ChatRequest
|
||||
from llama_index.server.tools.index.utils import get_storage_context
|
||||
from llama_index.core.storage import StorageContext
|
||||
|
||||
logger = logging.getLogger("uvicorn")
|
||||
|
||||
STORAGE_DIR = "storage"
|
||||
STORAGE_DIR = "src/storage"
|
||||
|
||||
|
||||
def get_index(chat_request: Optional[ChatRequest] = None):
|
||||
def get_index():
|
||||
# check if storage already exists
|
||||
if not os.path.exists(STORAGE_DIR):
|
||||
return None
|
||||
# load the existing index
|
||||
logger.info(f"Loading index from {STORAGE_DIR}...")
|
||||
storage_context = get_storage_context(STORAGE_DIR)
|
||||
storage_context = StorageContext.from_defaults(persist_dir=STORAGE_DIR)
|
||||
index = load_index_from_storage(storage_context)
|
||||
logger.info(f"Finished loading index from {STORAGE_DIR}")
|
||||
return index
|
||||
Reference in New Issue
Block a user