chore: move @llamaindex/server to chat-ui repo (#709 )

Release 0.6.3 (#708 )
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
2026-07-01 21:04:08 -04:00 · 2025-07-16 09:15:42 +08:00 · 2025-07-15 10:10:46 +07:00 · 2025-07-15 09:33:22 +07:00 · 2025-07-11 13:59:42 +08:00 · 2025-07-11 12:08:10 +08:00
216 changed files with 2915 additions and 16638 deletions
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+chore: bump @llamaindex/server 0.3.0 in templates
@@ -63,15 +63,6 @@ jobs:
        run: pnpm run pack-install
        working-directory: packages/create-llama

-      - name: Build and store server package
-        run: |
-          pnpm run build
-          wheel_file=$(ls dist/*.whl | head -n 1)
-          mkdir -p "${{ runner.temp }}"
-          cp "$wheel_file" "${{ runner.temp }}/"
-          echo "SERVER_PACKAGE_PATH=${{ runner.temp }}/$(basename "$wheel_file")" >> $GITHUB_ENV
-        working-directory: python/llama-index-server
-
      - name: Run Playwright tests for Python
        run: pnpm run e2e:python
        env:
@@ -132,21 +123,6 @@ jobs:
        run: pnpm run pack-install
        working-directory: packages/create-llama

-      - name: Build server
-        run: pnpm run build
-        working-directory: packages/server
-
-      - name: Pack @llamaindex/server package
-        run: |
-          pnpm pack --pack-destination "${{ runner.temp }}"
-          if [ "${{ runner.os }}" == "Windows" ]; then
-            file=$(find "${{ runner.temp }}" -name "llamaindex-server-*.tgz" | head -n 1)
-            mv "$file" "${{ runner.temp }}/llamaindex-server.tgz"
-          else
-            mv ${{ runner.temp }}/llamaindex-server-*.tgz ${{ runner.temp }}/llamaindex-server.tgz
-          fi
-        working-directory: packages/server
-
      - name: Run Playwright tests for TypeScript
        run: |
          pnpm run e2e:ts
@@ -155,7 +131,6 @@ jobs:
          LLAMA_CLOUD_API_KEY: ${{ secrets.LLAMA_CLOUD_API_KEY }}
          FRAMEWORK: ${{ matrix.frameworks }}
          VECTORDB: ${{ matrix.vectordbs }}
-          SERVER_PACKAGE_PATH: ${{ runner.temp }}/llamaindex-server.tgz
        working-directory: packages/create-llama

      - uses: actions/upload-artifact@v4
@@ -44,10 +44,6 @@ jobs:
      - name: Run build
        run: pnpm run build

-      - name: Run Typecheck for examples
-        run: pnpm run typecheck
-        working-directory: packages/server/examples
-
      - name: Run Python format check
        uses: chartboost/ruff-action@v1
        with:
@@ -6,8 +6,6 @@ cache/
 build/
 .next/
 out/
-packages/server/server/
-packages/server/project/
 **/playwright-report/
 **/test-results/

@@ -11,7 +11,6 @@ Create-llama is a monorepo containing CLI tools and server frameworks for buildi
 ### Monorepo Structure

 - **`packages/create-llama/`**: Main CLI tool for scaffolding LlamaIndex applications
- **`packages/server/`**: TypeScript/Next.js server framework (`@llamaindex/server`)
 - **`python/llama-index-server/`**: Python/FastAPI server framework
 - **Root**: Workspace configuration and shared development tools

@@ -44,15 +43,6 @@ npm run e2e       # Playwright tests for generated projects
 npm run clean     # Clean build artifacts and template caches
 ```

-### TypeScript Server Package
-
-```bash
-cd packages/server
-pnpm dev          # Watch mode with bunchee
-pnpm build        # Multi-step build: ESM/CJS + Next.js + static assets
-pnpm clean        # Clean all build outputs
-```
-
 ### Python Server Package

 ```bash
@@ -84,13 +74,6 @@ The CLI uses a sophisticated template system in `packages/create-llama/templates

 ## Server Framework Architecture

-### TypeScript Server (`@llamaindex/server`)
-
- **Core**: `LlamaIndexServer` class wrapping Next.js with workflow support
- **Frontend**: React-based chat UI with shadcn/ui components
- **API**: `/api/chat` endpoint with streaming responses
- **Build Process**: Complex multi-step build including static assets for Python integration
-
 ### Python Server (`llama-index-server`)

 - **Core**: `LlamaIndexServer` class extending FastAPI
@@ -111,7 +111,7 @@ non-interactively. For a list of the latest options, call `create-llama --help`.

 The generated code is using the LlamaIndex Server, which serves LlamaIndex Workflows and Agent Workflows via an API server. See the following docs for more information:

- [LlamaIndex Server For TypeScript](./packages/server/README.md)
+- [LlamaIndex Server For TypeScript](https://github.com/run-llama/chat-ui/tree/main/packages/server)
 - [LlamaIndex Server For Python](./python/llama-index-server/README.md)

 Inspired by and adapted from [create-next-app](https://github.com/vercel/next.js/tree/canary/packages/create-next-app)
@@ -31,19 +31,6 @@ export default tseslint.config(
      "@typescript-eslint/ban-ts-comment": "off",
    },
  },
-  {
-    files: ["packages/server/**"],
-    rules: {
-      "no-irregular-whitespace": "off",
-      "@typescript-eslint/no-unused-vars": "off",
-      "@typescript-eslint/no-explicit-any": [
-        "error",
-        {
-          ignoreRestArgs: true,
-        },
-      ],
-    },
-  },
  {
    ignores: [
      "python/**",
@@ -57,9 +44,6 @@ export default tseslint.config(
      "**/out/**",
      "**/node_modules/**",
      "**/build/**",
-      "packages/server/server/**",
-      "packages/server/project/**",
-      "packages/server/bin/**",
    ],
  },
 );
@@ -1,5 +1,18 @@
 # create-llama

+## 0.6.3
+
+### Patch Changes
+
+- fec752e: refactor: llamacloud configs
+
+## 0.6.2
+
+### Patch Changes
+
+- 28b46be: chore: replace Python examples with llama-deploy
+- 93e2abe: fix: unused imports and format
+
 ## 0.6.1

 ### Patch Changes
@@ -3,11 +3,8 @@ import { exec } from "child_process";
 import fs from "fs";
 import path from "path";
 import util from "util";
-import {
-  ALL_USE_CASES,
-  TemplateFramework,
-  TemplateVectorDB,
-} from "../../helpers/types";
+import { TemplateFramework, TemplateUseCase, TemplateVectorDB } from "../../helpers";
+import { ALL_PYTHON_USE_CASES } from "../../helpers/use-case";
 import { RunCreateLlamaOptions, createTestDir, runCreateLlama } from "../utils";

 const execAsync = util.promisify(exec);
@@ -17,11 +14,15 @@ const vectorDb: TemplateVectorDB = process.env.VECTORDB
  ? (process.env.VECTORDB as TemplateVectorDB)
  : "none";

+const useCases: TemplateUseCase[] = vectorDb === "llamacloud" ? [
+  "agentic_rag", "deep_research", "financial_report"
+] : ALL_PYTHON_USE_CASES
+
 test.describe("Mypy check", () => {
  test.describe.configure({ retries: 0 });

  test.describe("LlamaIndexServer", async () => {
-    for (const useCase of ALL_USE_CASES) {
+    for (const useCase of useCases) {
      test(`should pass mypy for use case: ${useCase}`, async () => {
        const cwd = await createTestDir();
        await createAndCheckLlamaProject({
@@ -2,11 +2,11 @@ import { expect, test } from "@playwright/test";
 import { ChildProcess } from "child_process";
 import fs from "fs";
 import path from "path";
+import { type TemplateFramework, type TemplateVectorDB } from "../../helpers";
 import {
-  ALL_USE_CASES,
-  type TemplateFramework,
-  type TemplateVectorDB,
-} from "../../helpers";
+  ALL_PYTHON_USE_CASES,
+  ALL_TYPESCRIPT_USE_CASES,
+} from "../../helpers/use-case";
 import { createTestDir, runCreateLlama } from "../utils";

 const templateFramework: TemplateFramework = process.env.FRAMEWORK
@@ -17,10 +17,15 @@ const vectorDb: TemplateVectorDB = process.env.VECTORDB
  : "none";
 const llamaCloudProjectName = "create-llama";
 const llamaCloudIndexName = "e2e-test";
+const allUseCases =
+  templateFramework === "nextjs"
+    ? ALL_TYPESCRIPT_USE_CASES
+    : ALL_PYTHON_USE_CASES;
+const isPythonLlamaDeploy = templateFramework === "fastapi";

 const userMessage = "Write a blog post about physical standards for letters";

-for (const useCase of ALL_USE_CASES) {
+for (const useCase of allUseCases) {
  test.describe(`Test use case ${useCase} ${templateFramework} ${vectorDb}`, async () => {
    let port: number;
    let cwd: string;
@@ -35,7 +40,7 @@ for (const useCase of ALL_USE_CASES) {
        templateFramework,
        vectorDb,
        port,
-        postInstallAction: "runApp",
+        postInstallAction: isPythonLlamaDeploy ? "dependencies" : "runApp",
        useCase,
        llamaCloudProjectName,
        llamaCloudIndexName,
@@ -50,6 +55,11 @@ for (const useCase of ALL_USE_CASES) {
    });

    test("Frontend should have a title", async ({ page }) => {
+      test.skip(
+        isPythonLlamaDeploy,
+        "Skip frontend tests for Python LllamaDeploy",
+      );
+
      await page.goto(`http://localhost:${port}`);
      await expect(page.getByText("Built by LlamaIndex")).toBeVisible({
        timeout: 5 * 60 * 1000,
@@ -60,8 +70,10 @@ for (const useCase of ALL_USE_CASES) {
      page,
    }) => {
      test.skip(
-        useCase === "financial_report" || useCase === "deep_research",
-        "Skip chat tests for financial report and deep research.",
+        useCase === "financial_report" ||
+          useCase === "deep_research" ||
+          isPythonLlamaDeploy,
+        "Skip chat tests for financial report and deep research. Also skip for Python LlamaDeploy",
      );
      await page.goto(`http://localhost:${port}`);
      await page.fill("form textarea", userMessage);
@@ -4,11 +4,11 @@ import fs from "fs";
 import path from "path";
 import util from "util";
 import {
-  ALL_USE_CASES,
  TemplateFramework,
  TemplateUseCase,
  TemplateVectorDB,
 } from "../../helpers/types";
+import { ALL_TYPESCRIPT_USE_CASES } from "../../helpers/use-case";
 import { createTestDir, runCreateLlama } from "../utils";

 const execAsync = util.promisify(exec);
@@ -21,7 +21,7 @@ const vectorDb: TemplateVectorDB = process.env.VECTORDB
 test.describe("Test resolve TS dependencies", () => {
  test.describe.configure({ retries: 0 });

-  for (const useCase of ALL_USE_CASES) {
+  for (const useCase of ALL_TYPESCRIPT_USE_CASES) {
    const optionDescription = `useCase: ${useCase}, vectorDb: ${vectorDb}`;
    test.describe(`${optionDescription}`, () => {
      test(`${optionDescription}`, async () => {
@@ -1,20 +1,17 @@
 import fs from "fs/promises";
 import path from "path";
 import {
+  EnvVar,
  InstallTemplateArgs,
  ModelConfig,
  TemplateFramework,
  TemplateType,
+  TemplateUseCase,
  TemplateVectorDB,
 } from "./types";

 import { TSYSTEMS_LLMHUB_API_URL } from "./providers/llmhub";
-
-export type EnvVar = {
-  name?: string;
-  description?: string;
-  value?: string;
-};
+import { USE_CASE_CONFIGS } from "./use-case";

 const renderEnvVar = (envVars: EnvVar[]): string => {
  return envVars.reduce(
@@ -228,7 +225,15 @@ Otherwise, use CHROMA_HOST and CHROMA_PORT config above`,
  }
 };

-const getModelEnvs = (modelConfig: ModelConfig): EnvVar[] => {
+const getModelEnvs = (
+  modelConfig: ModelConfig,
+  framework: TemplateFramework,
+  template: TemplateType,
+  useCase: TemplateUseCase,
+): EnvVar[] => {
+  const isPythonLlamaDeploy =
+    framework === "fastapi" && template === "llamaindexserver";
+
  return [
    {
      name: "MODEL",
@@ -240,10 +245,25 @@ const getModelEnvs = (modelConfig: ModelConfig): EnvVar[] => {
      description: "Name of the embedding model to use.",
      value: modelConfig.embeddingModel,
    },
-    {
-      name: "CONVERSATION_STARTERS",
-      description: "The questions to help users get started (multi-line).",
-    },
+    ...(isPythonLlamaDeploy
+      ? [
+          {
+            name: "NEXT_PUBLIC_STARTER_QUESTIONS",
+            description:
+              "Initial questions to display in the chat (`starterQuestions`)",
+            value: JSON.stringify(
+              USE_CASE_CONFIGS[useCase]?.starterQuestions ?? [],
+            ),
+          },
+        ]
+      : [
+          {
+            name: "CONVERSATION_STARTERS",
+            description:
+              "The questions to help users get started (multi-line).",
+          },
+        ]),
+    ...(USE_CASE_CONFIGS[useCase]?.additionalEnvVars ?? []),
    ...(modelConfig.provider === "openai"
      ? [
          {
@@ -251,14 +271,18 @@ const getModelEnvs = (modelConfig: ModelConfig): EnvVar[] => {
            description: "The OpenAI API key to use.",
            value: modelConfig.apiKey,
          },
-          {
-            name: "LLM_TEMPERATURE",
-            description: "Temperature for sampling from the model.",
-          },
-          {
-            name: "LLM_MAX_TOKENS",
-            description: "Maximum number of tokens to generate.",
-          },
+          ...(isPythonLlamaDeploy
+            ? []
+            : [
+                {
+                  name: "LLM_TEMPERATURE",
+                  description: "Temperature for sampling from the model.",
+                },
+                {
+                  name: "LLM_MAX_TOKENS",
+                  description: "Maximum number of tokens to generate.",
+                },
+              ]),
        ]
      : []),
    ...(modelConfig.provider === "anthropic"
@@ -367,11 +391,12 @@ const getModelEnvs = (modelConfig: ModelConfig): EnvVar[] => {

 const getFrameworkEnvs = (
  framework: TemplateFramework,
+  template?: TemplateType,
  port?: number,
 ): EnvVar[] => {
  const sPort = port?.toString() || "8000";
  const result: EnvVar[] = [];
-  if (framework === "fastapi") {
+  if (framework === "fastapi" && template !== "llamaindexserver") {
    result.push(
      ...[
        {
@@ -403,6 +428,7 @@ export const createBackendEnvFile = async (
    | "template"
    | "port"
    | "useLlamaParse"
+    | "useCase"
  >,
 ) => {
  // Init env values
@@ -418,11 +444,27 @@ export const createBackendEnvFile = async (
        ]
      : []),
    ...getVectorDBEnvs(opts.vectorDb, opts.framework, opts.template),
-    ...getFrameworkEnvs(opts.framework, opts.port),
-    ...getModelEnvs(opts.modelConfig),
+    ...getFrameworkEnvs(opts.framework, opts.template, opts.port),
+    ...getModelEnvs(
+      opts.modelConfig,
+      opts.framework,
+      opts.template,
+      opts.useCase,
+    ),
  ];
  // Render and write env file
  const content = renderEnvVar(envVars);
-  await fs.writeFile(path.join(root, envFileName), content);
+
+  const isPythonLlamaDeploy =
+    opts.framework === "fastapi" && opts.template === "llamaindexserver";
+
+  // each llama-deploy service will need a .env inside its directory
+  // this .env will be copied along with workflow code when service is deployed
+  // so that we need to put the .env file inside src/ instead of root
+  const envPath = isPythonLlamaDeploy
+    ? path.join(root, "src", envFileName)
+    : path.join(root, envFileName);
+
+  await fs.writeFile(envPath, content);
  console.log(`Created '${envFileName}' file. Please check the settings.`);
 };
@@ -117,8 +117,13 @@ const downloadFile = async (url: string, destPath: string) => {
 const prepareContextData = async (
  root: string,
  dataSources: TemplateDataSource[],
+  isPythonLlamaDeploy: boolean,
 ) => {
-  await makeDir(path.join(root, "data"));
+  const dataDir = isPythonLlamaDeploy
+    ? path.join(root, "ui", "data")
+    : path.join(root, "data");
+
+  await makeDir(dataDir);
  for (const dataSource of dataSources) {
    const dataSourceConfig = dataSource?.config as FileSourceConfig;
    // If the path is URLs, download the data and save it to the data directory
@@ -128,8 +133,7 @@ const prepareContextData = async (
        dataSourceConfig.url.toString(),
      );
      const destPath = path.join(
-        root,
-        "data",
+        dataDir,
        dataSourceConfig.filename ??
          path.basename(dataSourceConfig.url.toString()),
      );
@@ -137,11 +141,7 @@ const prepareContextData = async (
    } else {
      // Copy local data
      console.log("Copying data from path:", dataSourceConfig.path);
-      const destPath = path.join(
-        root,
-        "data",
-        path.basename(dataSourceConfig.path),
-      );
+      const destPath = path.join(dataDir, path.basename(dataSourceConfig.path));
      await fsExtra.copy(dataSourceConfig.path, destPath);
    }
  }
@@ -156,6 +156,9 @@ export const installTemplate = async (props: InstallTemplateArgs) => {
    await installTSTemplate(props);
  }

+  const isPythonLlamaDeploy =
+    props.framework === "fastapi" && props.template === "llamaindexserver";
+
  // This is a backend, so we need to copy the test data and create the env file.

  // Copy the environment file to the target directory.
@@ -164,6 +167,7 @@ export const installTemplate = async (props: InstallTemplateArgs) => {
  await prepareContextData(
    props.root,
    props.dataSources.filter((ds) => ds.type === "file"),
+    isPythonLlamaDeploy,
  );

  if (
@@ -183,10 +187,12 @@ export const installTemplate = async (props: InstallTemplateArgs) => {
    );
  }

-  // Create outputs directory
-  await makeDir(path.join(props.root, "output/tools"));
-  await makeDir(path.join(props.root, "output/uploaded"));
-  await makeDir(path.join(props.root, "output/llamacloud"));
+  if (!isPythonLlamaDeploy) {
+    // Create outputs directory (llama-deploy doesn't need this)
+    await makeDir(path.join(props.root, "output/tools"));
+    await makeDir(path.join(props.root, "output/uploaded"));
+    await makeDir(path.join(props.root, "output/llamacloud"));
+  }
 };

 export * from "./types";
@@ -7,27 +7,33 @@ import { isUvAvailable, tryUvSync } from "./uv";

 import { assetRelocator, copy } from "./copy";
 import { templatesDir } from "./dir";
-import {
-  InstallTemplateArgs,
-  ModelConfig,
-  TemplateDataSource,
-  TemplateVectorDB,
-} from "./types";
-
-interface Dependency {
-  name: string;
-  version?: string;
-  extras?: string[];
-  constraints?: Record<string, string>;
-}
+import { Dependency, InstallTemplateArgs } from "./types";
+import { USE_CASE_CONFIGS } from "./use-case";

 const getAdditionalDependencies = (
-  modelConfig: ModelConfig,
-  vectorDb?: TemplateVectorDB,
-  dataSources?: TemplateDataSource[],
+  opts: Pick<
+    InstallTemplateArgs,
+    | "framework"
+    | "template"
+    | "useCase"
+    | "modelConfig"
+    | "vectorDb"
+    | "dataSources"
+  >,
 ) => {
+  const { framework, template, useCase, modelConfig, vectorDb, dataSources } =
+    opts;
+
  const dependencies: Dependency[] = [];

+  const isPythonLlamaDeploy =
+    framework === "fastapi" && template === "llamaindexserver";
+  const useCaseDependencies =
+    USE_CASE_CONFIGS[useCase]?.additionalDependencies ?? [];
+  if (isPythonLlamaDeploy && useCaseDependencies.length > 0) {
+    dependencies.push(...useCaseDependencies);
+  }
+
  // Add vector db dependencies
  switch (vectorDb) {
    case "mongo": {
@@ -412,13 +418,17 @@ const installLlamaIndexServerTemplate = async ({
    process.exit(1);
  }

-  await copy("*.py", path.join(root, "app"), {
+  const srcDir = path.join(root, "src");
+  const uiDir = path.join(root, "ui");
+
+  // copy workflow code to src folder
+  await copy("*.py", srcDir, {
    parents: true,
    cwd: path.join(templatesDir, "components", "use-cases", "python", useCase),
  });

-  // copy model provider settings to app folder
-  await copy("**", path.join(root, "app"), {
+  // copy model provider settings to src folder
+  await copy("**", srcDir, {
    cwd: path.join(
      templatesDir,
      "components",
@@ -428,32 +438,34 @@ const installLlamaIndexServerTemplate = async ({
    ),
  });

-  // Copy custom UI component code
-  await copy(`*`, path.join(root, "components"), {
+  // copy ts server to ui folder
+  const tsProxyDir = path.join(templatesDir, "components", "ts-proxy");
+  await copy("package.json", uiDir, {
+    parents: true,
+    cwd: tsProxyDir,
+  });
+  const serverFileLocation = useLlamaParse
+    ? path.join(tsProxyDir, "llamacloud")
+    : path.join(tsProxyDir);
+  await copy("index.ts", uiDir, {
+    parents: true,
+    cwd: serverFileLocation,
+  });
+
+  // Copy custom UI components to ui/components folder
+  await copy(`*`, path.join(uiDir, "components"), {
    parents: true,
    cwd: path.join(templatesDir, "components", "ui", "use-cases", useCase),
  });

-  // Copy layout components to layout folder in root
-  await copy("*", path.join(root, "layout"), {
+  // Copy layout components to ui/layout folder
+  await copy("*", path.join(uiDir, "layout"), {
    parents: true,
    cwd: path.join(templatesDir, "components", "ui", "layout"),
  });

  if (useLlamaParse) {
-    await copy("index.py", path.join(root, "app"), {
-      parents: true,
-      cwd: path.join(
-        templatesDir,
-        "components",
-        "vectordbs",
-        "llamaindexserver",
-        "llamacloud",
-        "python",
-      ),
-    });
-    // TODO: Consider moving generate.py to app folder.
-    await copy("generate.py", path.join(root), {
+    await copy("**", srcDir, {
      parents: true,
      cwd: path.join(
        templatesDir,
@@ -471,6 +483,12 @@ const installLlamaIndexServerTemplate = async ({
    cwd: path.join(templatesDir, "components", "use-cases", "python", useCase),
    rename: assetRelocator,
  });
+
+  // Clean up, remove generate.py and index.py for non-data use cases
+  if (["code_generator", "document_generator", "hitl"].includes(useCase)) {
+    await fs.unlink(path.join(srcDir, "generate.py"));
+    await fs.unlink(path.join(srcDir, "index.py"));
+  }
 };

 export const installPythonTemplate = async ({
@@ -517,11 +535,14 @@ export const installPythonTemplate = async ({
  }

  console.log("Adding additional dependencies");
-  const addOnDependencies = getAdditionalDependencies(
+  const addOnDependencies = getAdditionalDependencies({
+    framework,
+    template,
+    useCase,
    modelConfig,
    vectorDb,
    dataSources,
-  );
+  });

  await addDependencies(root, addOnDependencies);

@@ -1,4 +1,5 @@
-import { SpawnOptions, spawn } from "child_process";
+import { SpawnOptions, exec, spawn } from "child_process";
+import waitPort from "wait-port";
 import { TemplateFramework, TemplateType } from "./types";

 const createProcess = (
@@ -47,6 +48,58 @@ export function runTSApp(appPath: string, port: number) {
  });
 }

+// TODO: support run multiple LlamaDeploy server in the same machine
+async function runPythonLlamaDeployServer(
+  appPath: string,
+  port: number = 4501,
+) {
+  console.log("Starting llama_deploy server...", port);
+  const serverProcess = exec("uv run -m llama_deploy.apiserver", {
+    cwd: appPath,
+    env: {
+      ...process.env,
+      LLAMA_DEPLOY_APISERVER_PORT: `${port}`,
+    },
+  });
+
+  // Pipe output to console
+  serverProcess.stdout?.pipe(process.stdout);
+  serverProcess.stderr?.pipe(process.stderr);
+
+  // Wait for the server to be ready
+  console.log("Waiting for server to be ready...");
+  await waitPort({ port, host: "localhost", timeout: 30000 });
+
+  // create the deployment with explicit host configuration
+  console.log("llama_deploy server started, creating deployment...", port);
+  await createProcess(
+    "uv",
+    [
+      "run",
+      "llamactl",
+      "-s",
+      `http://localhost:${port}`,
+      "deploy",
+      "llama_deploy.yml",
+    ],
+    {
+      stdio: "inherit",
+      cwd: appPath,
+      shell: true,
+    },
+  );
+  console.log(`Deployment created successfully!`);
+
+  // Keep the main process alive and handle cleanup
+  return new Promise(() => {
+    process.on("SIGINT", () => {
+      console.log("\nShutting down...");
+      serverProcess.kill();
+      process.exit(0);
+    });
+  });
+}
+
 export async function runApp(
  appPath: string,
  template: TemplateType,
@@ -57,6 +110,11 @@ export async function runApp(
    // Start the app
    const defaultPort = framework === "nextjs" ? 3000 : 8000;

+    if (template === "llamaindexserver" && framework === "fastapi") {
+      await runPythonLlamaDeployServer(appPath, port);
+      return;
+    }
+
    const appRunner = framework === "fastapi" ? runFastAPIApp : runTSApp;
    await appRunner(appPath, port || defaultPort, template);
  } catch (error) {
@@ -49,14 +49,6 @@ export type TemplateUseCase =
  | "document_generator"
  | "hitl";

-export const ALL_USE_CASES: TemplateUseCase[] = [
-  "agentic_rag",
-  "deep_research",
-  "financial_report",
-  "code_generator",
-  "document_generator",
-  "hitl",
-];
 // Config for both file and folder
 export type FileSourceConfig =
  | {
@@ -97,3 +89,16 @@ export interface InstallTemplateArgs {
  postInstallAction: TemplatePostInstallAction;
  useCase: TemplateUseCase;
 }
+
+export type EnvVar = {
+  name?: string;
+  description?: string;
+  value?: string;
+};
+
+export interface Dependency {
+  name: string;
+  version?: string;
+  extras?: string[];
+  constraints?: Record<string, string>;
+}
@@ -133,6 +133,18 @@ export const installTSTemplate = async ({
      modelConfig,
      dataSources,
    });
+
+    if (vectorDb === "llamacloud") {
+      // replace index.ts with llamacloud/index.ts
+      await fs.rm(path.join(root, "src", "index.ts"));
+      await copy("index.ts", path.join(root, "src"), {
+        parents: true,
+        cwd: path.join(root, "src", "llamacloud"),
+      });
+    }
+
+    // remove llamacloud folder
+    await fs.rm(path.join(root, "src", "llamacloud"), { recursive: true });
  } else {
    throw new Error(`Template ${template} not supported`);
  }
@@ -246,16 +258,6 @@ async function updatePackageJson({
    };
  }

-  // if having custom server package tgz file, use it for testing @llamaindex/server
-  const serverPackagePath = process.env.SERVER_PACKAGE_PATH;
-  if (serverPackagePath) {
-    const relativePath = path.relative(process.cwd(), serverPackagePath);
-    packageJson.dependencies = {
-      ...packageJson.dependencies,
-      "@llamaindex/server": `file:${relativePath}`,
-    };
-  }
-
  await fs.writeFile(
    packageJsonFile,
    JSON.stringify(packageJson, null, 2) + os.EOL,
@@ -0,0 +1,84 @@
+import { Dependency, EnvVar, TemplateUseCase } from "./types";
+
+export const ALL_TYPESCRIPT_USE_CASES: TemplateUseCase[] = [
+  "agentic_rag",
+  "deep_research",
+  "financial_report",
+  "code_generator",
+  "document_generator",
+  "hitl",
+];
+
+export const ALL_PYTHON_USE_CASES: TemplateUseCase[] = [
+  "agentic_rag",
+  "deep_research",
+  "financial_report",
+  "code_generator",
+  "document_generator",
+];
+
+export const USE_CASE_CONFIGS: Record<
+  TemplateUseCase,
+  {
+    starterQuestions: string[];
+    additionalEnvVars?: EnvVar[];
+    additionalDependencies?: Dependency[];
+  }
+> = {
+  agentic_rag: {
+    starterQuestions: [
+      "Letter standard in the document",
+      "Summarize the document",
+    ],
+  },
+  financial_report: {
+    starterQuestions: [
+      "Compare Apple and Tesla financial performance",
+      "Generate a PDF report for Tesla financial",
+    ],
+    additionalEnvVars: [
+      {
+        name: "E2B_API_KEY",
+        description: "The E2B API key to use to use code interpreter tool",
+      },
+    ],
+    additionalDependencies: [
+      {
+        name: "e2b-code-interpreter",
+        version: ">=1.1.1,<2.0.0",
+      },
+      {
+        name: "markdown",
+        version: ">=3.7,<4.0",
+      },
+      {
+        name: "xhtml2pdf",
+        version: ">=0.2.17,<1.0.0",
+      },
+    ],
+  },
+  deep_research: {
+    starterQuestions: [
+      "Research about Apple and Tesla",
+      "Financial performance of Tesla",
+    ],
+  },
+  code_generator: {
+    starterQuestions: [
+      "Generate a code for a simple calculator",
+      "Generate a code for a todo list app",
+    ],
+  },
+  document_generator: {
+    starterQuestions: [
+      "Generate a document about LlamaIndex",
+      "Generate a document about LLM",
+    ],
+  },
+  hitl: {
+    starterQuestions: [
+      "List all the files in the current directory",
+      "Check git status",
+    ],
+  },
+};
@@ -1,6 +1,6 @@
 {
  "name": "create-llama",
-  "version": "0.6.1",
+  "version": "0.6.3",
  "description": "Create LlamaIndex-powered apps with one command",
  "keywords": [
    "rag",
@@ -21,7 +21,7 @@ export const askQuestions = async (
    askModels: askModelsFromArgs,
  } = args;

-  const { useCase, framework } = await prompts(
+  const { useCase } = await prompts(
    [
      {
        type: useCaseFromArgs ? null : "select",
@@ -65,20 +65,28 @@ export const askQuestions = async (
        ],
        initial: 0,
      },
-      {
-        type: frameworkFromArgs ? null : "select",
-        name: "framework",
-        message: "What language do you want to use?",
-        choices: [
-          { title: "Python (FastAPI)", value: "fastapi" },
-          { title: "Typescript (NextJS)", value: "nextjs" },
-        ],
-        initial: 0,
-      },
    ],
    questionHandlers,
  );

+  const { framework } = await prompts(
+    {
+      type: frameworkFromArgs ? null : "select",
+      name: "framework",
+      message: "What language do you want to use?",
+      choices: [
+        // For Python Human in the Loop use case, please refer to this chat-ui example:
+        // https://github.com/run-llama/chat-ui/blob/main/examples/llamadeploy/chat/src/cli_workflow.py
+        ...(useCase !== "hitl"
+          ? [{ title: "Python (FastAPI)", value: "fastapi" }]
+          : []),
+        { title: "Typescript (NextJS)", value: "nextjs" },
+      ],
+      initial: 0,
+    },
+    questionHandlers,
+  );
+
  const finalUseCase = (useCaseFromArgs ?? useCase) as TemplateUseCase;
  const finalFramework = (frameworkFromArgs ?? framework) as TemplateFramework;
  if (!finalUseCase) {
@@ -102,7 +110,12 @@ export const askQuestions = async (
  // Ask for LlamaCloud
  let llamaCloudKey = llamaCloudKeyFromArgs ?? process.env.LLAMA_CLOUD_API_KEY;
  let vectorDb: TemplateVectorDB = vectorDbFromArgs ?? "none";
-  if (!vectorDbFromArgs && useCaseConfig.dataSources) {
+
+  if (
+    !vectorDbFromArgs &&
+    useCaseConfig.dataSources &&
+    !["code_generator", "document_generator", "hitl"].includes(finalUseCase) // these use cases don't use data so no need to ask for LlamaCloud
+  ) {
    const { useLlamaCloud } = await prompts(
      {
        type: "toggle",
@@ -0,0 +1,9 @@
+import { LlamaIndexServer } from "@llamaindex/server";
+
+new LlamaIndexServer({
+  uiConfig: {
+    componentsDir: "components",
+    layoutDir: "layout",
+    llamaDeploy: { deployment: "chat", workflow: "workflow" },
+  },
+}).start();
@@ -0,0 +1,12 @@
+import { LlamaIndexServer } from "@llamaindex/server";
+
+new LlamaIndexServer({
+  uiConfig: {
+    componentsDir: "components",
+    layoutDir: "layout",
+    llamaDeploy: { deployment: "chat", workflow: "workflow" },
+  },
+  llamaCloud: {
+    outputDir: "output/llamacloud",
+  },
+}).start();
@@ -0,0 +1,18 @@
+{
+  "name": "llamaindex-server-ui",
+  "version": "0.0.1",
+  "private": true,
+  "scripts": {
+    "dev": "nodemon --exec tsx index.ts"
+  },
+  "dependencies": {
+    "@llamaindex/server": "^0.3.0",
+    "dotenv": "^16.4.7"
+  },
+  "devDependencies": {
+    "@types/node": "^20.10.3",
+    "nodemon": "^3.1.10",
+    "tsx": "4.7.2",
+    "typescript": "^5.3.2"
+  }
+}
@@ -21,7 +21,7 @@ export default function Header() {
          </a>
          <img
            className="h-[24px] w-[24px] rounded-sm"
-            src="/llama.png"
+            src="https://ui.llamaindex.ai/llama.png"
            alt="Llama Logo"
          />
        </div>
@@ -1,59 +1,113 @@
-This is a [LlamaIndex](https://www.llamaindex.ai/) simple agentic RAG project using [Agent Workflows](https://docs.llamaindex.ai/en/stable/examples/agent/agent_workflow_basic/).
+# LlamaIndex Workflow Example

-## Getting Started
+This is a [LlamaIndex](https://www.llamaindex.ai/) project that using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/) deployed with [LlamaDeploy](https://github.com/run-llama/llama_deploy).

-First, setup the environment with uv:
+LlamaDeploy is a system for deploying and managing LlamaIndex workflows, while LlamaIndexServer provides a pre-built TypeScript server with an integrated chat UI that can connect directly to LlamaDeploy deployments. This example shows how you can quickly set up a complete chat application by combining these two technologies/

-> **_Note:_** This step is not needed if you are using the dev-container.
+## Prerequisites

-```shell
+If you haven't installed uv, you can follow the instructions [here](https://docs.astral.sh/uv/getting-started/installation/) to install it.
+
+You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [src/settings.py](src/settings.py).
+
+Please setup their API keys in the `src/.env` file.
+
+## Installation
+
+Both the SDK and the CLI are part of the LlamaDeploy Python package. To install, just run:
+
+```bash
 uv sync
 ```

-Then check the parameters that have been pre-configured in the `.env` file in this directory.
-Make sure you have set the `OPENAI_API_KEY` for the LLM.
+If you don't have uv installed, you can follow the instructions [here](https://docs.astral.sh/uv/getting-started/installation/).

-Second, generate the embeddings of the documents in the `./data` directory:
+## Generate Index
+
+Generate the embeddings of the documents in the `./data` directory:

 ```shell
 uv run generate
 ```

-Third, run the development server:
+## Running the Deployment

-```shell
-uv run fastapi dev
-```
-
-Then open [http://localhost:8000](http://localhost:8000) with your browser to start the chat UI.
-
-To start the app optimized for **production**, run:
+At this point we have all we need to run this deployment. Ideally, we would have the API server already running
+somewhere in the cloud, but to get started let's start an instance locally. Run the following python script
+from a shell:

 ```
-uv run fastapi run
+$ uv run -m llama_deploy.apiserver
+INFO:     Started server process [10842]
+INFO:     Waiting for application startup.
+INFO:     Application startup complete.
+INFO:     Uvicorn running on http://0.0.0.0:4501 (Press CTRL+C to quit)
 ```

-## Configure LLM and Embedding Model
+From another shell, use the CLI, `llamactl`, to create the deployment:

-You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [settings.py](app/settings.py).
+```
+$ uv run llamactl deploy llama_deploy.yml
+Deployment successful: chat
+```
+
+## UI Interface
+
+LlamaDeploy will serve the UI through the apiserver. Point the browser to [http://localhost:4501/deployments/chat/ui](http://localhost:4501/deployments/chat/ui) to interact with your deployment through a user-friendly interface.
+
+## API endpoints
+
+You can find all the endpoints in the [API documentation](http://localhost:4501/docs). To get started, you can try the following endpoints:
+
+Create a new task:
+
+```bash
+curl -X POST 'http://localhost:4501/deployments/chat/tasks/create' \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "input": "{\"user_msg\":\"Hello\",\"chat_history\":[]}",
+    "service_id": "workflow"
+  }'
+```
+
+Stream events:
+
+```bash
+curl 'http://localhost:4501/deployments/chat/tasks/0b411be6-005d-43f0-9b6b-6a0017f08002/events?session_id=dd36442c-45ca-4eaa-8d75-b4e6dad1a83e&raw_event=true' \
+  -H 'Content-Type: application/json'
+```
+
+Note that the task_id and session_id are returned when creating a new task.

 ## Use Case

-We have prepared an [example workflow](./app/workflow.py) for the agentic RAG use case, where you can ask questions about the example documents in the [./data](./data) directory.
+We have prepared an [example workflow](./src/workflow.py) for the agentic RAG use case, where you can ask questions about the example documents in the [./data](./data) directory.
+To update the workflow, you can modify the code in [`src/workflow.py`](src/workflow.py).

-You can start by sending an request on the [chat UI](http://localhost:8000) or you can test the `/api/chat` endpoint with the following curl request:
+## Customize the UI

-```
-curl --location 'localhost:8000/api/chat' \
--header 'Content-Type: application/json' \
--data '{ "messages": [{ "role": "user", "content": "What standards for a letter exist?" }] }'
-```
+The UI is served by LLamaIndexServer package, you can configure the UI by modifying the `uiConfig` in the [ui/index.ts](ui/index.ts) file.
+
+The following are the available options:
+
+- `starterQuestions`: Predefined questions for chat interface
+- `componentsDir`: Directory for custom event components
+- `layoutDir`: Directory for custom layout components
+- `llamaDeploy`: The LlamaDeploy configration (deployment name and workflow name that defined in the [llama_deploy.yml](llama_deploy.yml) file)
+
+## LlamaCloud Integration
+
+You can enable LlamaCloud integration by setting the `llamaCloud` option in the [ui/index.ts](ui/index.ts) file.
+
+The following are the available options:
+
+- `outputDir`: The directory for LlamaCloud output

 ## Learn More

-To learn more about LlamaIndex, take a look at the following resources:
-
 - [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex.
 - [Workflows Introduction](https://docs.llamaindex.ai/en/stable/understanding/workflows/) - learn about LlamaIndex workflows.
+- [LlamaDeploy GitHub Repository](https://github.com/run-llama/llama_deploy)
+- [Chat-UI Documentation](https://ts.llamaindex.ai/docs/chat-ui)

-You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
+You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
@@ -0,0 +1,106 @@
+from typing import Any, List, Optional
+
+from llama_index.core import QueryBundle
+from llama_index.core.postprocessor.types import BaseNodePostprocessor
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.query_engine.retriever_query_engine import RetrieverQueryEngine
+from llama_index.core.response_synthesizers import Accumulate
+from llama_index.core.schema import NodeWithScore
+from llama_index.core.tools.query_engine import QueryEngineTool
+
+
+# Used as a prompt for synthesizer
+# Override this prompt by setting the `CITATION_PROMPT` environment variable
+CITATION_PROMPT = """
+Context information is below.
+------------------
+{context_str}
+------------------
+The context are multiple text chunks, each text chunk has its own citation_id at the beginning.
+Use the citation_id for citation construction.
+
+Answer the following query with citations:
+------------------
+{query_str}
+------------------
+
+## Citation format
+
+[citation:id]
+
+Where:
+- [citation:] is a matching pattern which is required for all citations.
+- `id` is the `citation_id` provided in the context or previous response.
+
+Example:
+```
+    Here is a response that uses context information [citation:90ca859f-4f32-40ca-8cd0-edfad4fb298b] 
+    and other ideas that don't use context information [citation:17b2cc9a-27ae-4b6d-bede-5ca60fc00ff4] .\n
+    The citation block will be displayed automatically with useful information for the user in the UI [citation:1c606612-e75f-490e-8374-44e79f818d19] .
+```
+
+## Requirements:
+1. Always include citations for every fact from the context information in your response. 
+2. Make sure that the citation_id is correct with the context, don't mix up the citation_id with other information.
+
+Now, you answer the query with citations:
+"""
+
+
+class NodeCitationProcessor(BaseNodePostprocessor):
+    """
+    Add a new field `citation_id` to the metadata of the node by copying the id from the node.
+    Useful for citation construction.
+    """
+
+    def _postprocess_nodes(
+        self,
+        nodes: List[NodeWithScore],
+        query_bundle: Optional[QueryBundle] = None,
+    ) -> List[NodeWithScore]:
+        for node_score in nodes:
+            node_score.node.metadata["citation_id"] = node_score.node.node_id
+        return nodes
+
+
+class CitationSynthesizer(Accumulate):
+    """
+    Overload the Accumulate synthesizer to:
+    1. Update prepare node metadata for citation id
+    2. Update text_qa_template to include citations
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        text_qa_template = kwargs.pop("text_qa_template", None)
+        if text_qa_template is None:
+            text_qa_template = PromptTemplate(template=CITATION_PROMPT)
+        super().__init__(text_qa_template=text_qa_template, **kwargs)
+
+
+# Add this prompt to your agent system prompt
+CITATION_SYSTEM_PROMPT = (
+    "\nAnswer the user question using the response from the query tool. "
+    "It's important to respect the citation information in the response. "
+    "Don't mix up the citation_id, keep them at the correct fact."
+)
+
+
+def enable_citation(query_engine_tool: QueryEngineTool) -> QueryEngineTool:
+    """
+    Enable citation for a query engine tool by using CitationSynthesizer and NodePostprocessor.
+    Note: This function will override the response synthesizer of your query engine.
+    """
+    query_engine = query_engine_tool.query_engine
+    if not isinstance(query_engine, RetrieverQueryEngine):
+        raise ValueError(
+            "Citation feature requires a RetrieverQueryEngine. Your tool's query engine is a "
+            f"{type(query_engine)}."
+        )
+    # Update the response synthesizer and node postprocessors
+    query_engine._response_synthesizer = CitationSynthesizer()
+    query_engine._node_postprocessors += [NodeCitationProcessor()]
+    query_engine_tool._query_engine = query_engine
+
+    # Update tool metadata
+    query_engine_tool.metadata.description += "\nThe output will include citations with the format [citation:id] for each chunk of information in the knowledge base."
+    return query_engine_tool
@@ -0,0 +1,48 @@
+import os
+from typing import Any, Optional
+
+from llama_index.core.base.base_query_engine import BaseQueryEngine
+from llama_index.core.indices.base import BaseIndex
+from llama_index.core.tools.query_engine import QueryEngineTool
+
+
+def create_query_engine(index: BaseIndex, **kwargs: Any) -> BaseQueryEngine:
+    """
+    Create a query engine for the given index.
+
+    Args:
+        index: The index to create a query engine for.
+        params (optional): Additional parameters for the query engine, e.g: similarity_top_k
+    """
+    top_k = int(os.getenv("TOP_K", 0))
+    if top_k != 0 and kwargs.get("filters") is None:
+        kwargs["similarity_top_k"] = top_k
+
+    return index.as_query_engine(**kwargs)
+
+
+def get_query_engine_tool(
+    index: BaseIndex,
+    name: Optional[str] = None,
+    description: Optional[str] = None,
+    **kwargs: Any,
+) -> QueryEngineTool:
+    """
+    Get a query engine tool for the given index.
+
+    Args:
+        index: The index to create a query engine for.
+        name (optional): The name of the tool.
+        description (optional): The description of the tool.
+    """
+    if name is None:
+        name = "query_index"
+    if description is None:
+        description = "Use this tool to retrieve information from a knowledge base. Provide a specific query and can call the tool multiple times if necessary."
+    query_engine = create_query_engine(index, **kwargs)
+    tool = QueryEngineTool.from_defaults(
+        query_engine=query_engine,
+        name=name,
+        description=description,
+    )
+    return tool
@@ -1,18 +1,18 @@
-from typing import Optional
+from dotenv import load_dotenv

-from app.index import get_index
 from llama_index.core.agent.workflow import AgentWorkflow
 from llama_index.core.settings import Settings
-from llama_index.server.api.models import ChatRequest
-from llama_index.server.tools.index import get_query_engine_tool
-from llama_index.server.tools.index.citation import (
-    CITATION_SYSTEM_PROMPT,
-    enable_citation,
-)
+
+from src.index import get_index
+from src.query import get_query_engine_tool
+from src.citation import CITATION_SYSTEM_PROMPT, enable_citation
+from src.settings import init_settings


-def create_workflow(chat_request: Optional[ChatRequest] = None) -> AgentWorkflow:
-    index = get_index(chat_request=chat_request)
+def create_workflow() -> AgentWorkflow:
+    load_dotenv()
+    init_settings()
+    index = get_index()
    if index is None:
        raise RuntimeError(
            "Index not found! Please run `uv run generate` to index the data first."
@@ -30,3 +30,6 @@ def create_workflow(chat_request: Optional[ChatRequest] = None) -> AgentWorkflow
        llm=Settings.llm,
        system_prompt=system_prompt,
    )
+
+
+workflow = create_workflow()
@@ -1,65 +1,106 @@
-This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/).
+# LlamaIndex Workflow Example

-## Getting Started
+This is a [LlamaIndex](https://www.llamaindex.ai/) project that using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/) deployed with [LlamaDeploy](https://github.com/run-llama/llama_deploy).

-First, setup the environment with uv:
+LlamaDeploy is a system for deploying and managing LlamaIndex workflows, while LlamaIndexServer provides a pre-built TypeScript server with an integrated chat UI that can connect directly to LlamaDeploy deployments. This example shows how you can quickly set up a complete chat application by combining these two technologies/

-> **_Note:_** This step is not needed if you are using the dev-container.
+## Prerequisites

-```shell
+If you haven't installed uv, you can follow the instructions [here](https://docs.astral.sh/uv/getting-started/installation/) to install it.
+
+You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [src/settings.py](src/settings.py).
+
+Please setup their API keys in the `src/.env` file.
+
+
+## Installation
+
+Both the SDK and the CLI are part of the LlamaDeploy Python package. To install, just run:
+
+```bash
 uv sync
 ```

-Then check the parameters that have been pre-configured in the `.env` file in this directory.
-Make sure you have set the `OPENAI_API_KEY` for the LLM.
+If you don't have uv installed, you can follow the instructions [here](https://docs.astral.sh/uv/getting-started/installation/).

-Then, run the development server:
+## Running the Deployment

-```shell
-uv run fastapi dev
-```
-
-Then open [http://localhost:8000](http://localhost:8000) with your browser to start the chat UI.
-
-To start the app optimized for **production**, run:
+At this point we have all we need to run this deployment. Ideally, we would have the API server already running
+somewhere in the cloud, but to get started let's start an instance locally. Run the following python script
+from a shell:

 ```
-uv run fastapi run
+$ uv run -m llama_deploy.apiserver
+INFO:     Started server process [10842]
+INFO:     Waiting for application startup.
+INFO:     Application startup complete.
+INFO:     Uvicorn running on http://0.0.0.0:4501 (Press CTRL+C to quit)
 ```

-## Configure LLM and Embedding Model
+From another shell, use the CLI, `llamactl`, to create the deployment:

-You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [settings.py](app/settings.py).
+```
+$ uv run llamactl deploy llama_deploy.yml
+Deployment successful: chat
+```
+
+## UI Interface
+
+LlamaDeploy will serve the UI through the apiserver. Point the browser to [http://localhost:4501/deployments/chat/ui](http://localhost:4501/deployments/chat/ui) to interact with your deployment through a user-friendly interface.
+
+## API endpoints
+
+You can find all the endpoints in the [API documentation](http://localhost:4501/docs). To get started, you can try the following endpoints:
+
+Create a new task:
+
+```bash
+curl -X POST 'http://localhost:4501/deployments/chat/tasks/create' \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "input": "{\"user_msg\":\"Hello\",\"chat_history\":[]}",
+    "service_id": "workflow"
+  }'
+```
+
+Stream events:
+
+```bash
+curl 'http://localhost:4501/deployments/chat/tasks/0b411be6-005d-43f0-9b6b-6a0017f08002/events?session_id=dd36442c-45ca-4eaa-8d75-b4e6dad1a83e&raw_event=true' \
+  -H 'Content-Type: application/json'
+```
+
+Note that the task_id and session_id are returned when creating a new task.

 ## Use Case
+
 AI-powered code generator that can help you generate app with a chat interface, code editor and app preview.
-
-To update the workflow, you can modify the code in [`workflow.py`](app/workflow.py).
-
-You can start by sending an request on the [chat UI](http://localhost:8000) or you can test the `/api/chat` endpoint with the following curl request:
-
-```
-curl --location 'localhost:8000/api/chat' \
--header 'Content-Type: application/json' \
--data '{ "messages": [{ "role": "user", "content": "Create a report comparing the finances of Apple and Tesla" }] }'
-```
+To update the workflow, you can modify the code in [`src/workflow.py`](src/workflow.py).

 ## Customize the UI

-To customize the UI, you can start by modifying the [./components/ui_event.jsx](./components/ui_event.jsx) file.
+The UI is served by LLamaIndexServer package, you can configure the UI by modifying the `uiConfig` in the [ui/index.ts](ui/index.ts) file.

-You can also generate a new code for the workflow using LLM by running the following command:
+The following are the available options:

-```
-uv run generate_ui
-```
+- `starterQuestions`: Predefined questions for chat interface
+- `componentsDir`: Directory for custom event components
+- `layoutDir`: Directory for custom layout components
+- `llamaDeploy`: The LlamaDeploy configration (deployment name and workflow name that defined in the [llama_deploy.yml](llama_deploy.yml) file)
+
+## LlamaCloud Integration
+
+You can enable LlamaCloud integration by setting the `llamaCloud` option in the [ui/index.ts](ui/index.ts) file.
+
+The following are the available options:
+
+- `outputDir`: The directory for LlamaCloud output

 ## Learn More

-To learn more about LlamaIndex, take a look at the following resources:
-
 - [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex.
 - [Workflows Introduction](https://docs.llamaindex.ai/en/stable/understanding/workflows/) - learn about LlamaIndex workflows.
- [LlamaIndex Server](https://pypi.org/project/llama-index-server/)
+- [LlamaDeploy GitHub Repository](https://github.com/run-llama/llama_deploy)
+- [Chat-UI Documentation](https://ts.llamaindex.ai/docs/chat-ui)

-You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
+You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
@@ -0,0 +1,131 @@
+import json
+import re
+from typing import List, Optional, Any
+
+from pydantic import ValidationError
+from llama_index.core.chat_ui.models.artifact import (
+    Artifact,
+    ArtifactType,
+    CodeArtifactData,
+    DocumentArtifactData,
+)
+from llama_index.core.llms import ChatMessage
+
+INLINE_ANNOTATION_KEY = "annotation"
+
+
+def get_inline_annotations(message: ChatMessage) -> List[Any]:
+    """Extract inline annotations from a chat message."""
+    markdown_content = message.content
+
+    inline_annotations: List[Any] = []
+
+    # Regex to match annotation code blocks
+    # Matches ```annotation followed by content until closing ```
+    annotation_regex = re.compile(
+        rf"```{re.escape(INLINE_ANNOTATION_KEY)}\s*\n([\s\S]*?)\n```", re.MULTILINE
+    )
+
+    for match in annotation_regex.finditer(markdown_content):
+        json_content = match.group(1).strip() if match.group(1) else None
+
+        if not json_content:
+            continue
+
+        try:
+            # Parse the JSON content
+            parsed = json.loads(json_content)
+
+            # Check for required fields in the parsed annotation
+            if (
+                not isinstance(parsed, dict)
+                or "type" not in parsed
+                or "data" not in parsed
+            ):
+                continue
+
+            # Extract the annotation data
+            inline_annotations.append(parsed)
+        except (json.JSONDecodeError, ValidationError) as error:
+            # Skip invalid annotations - they might be malformed JSON or invalid schema
+            print(f"Failed to parse annotation: {error}")
+
+    return inline_annotations
+
+
+def artifact_from_message(message: ChatMessage) -> Optional[Artifact]:
+    """Create an artifact from a chat message if it contains artifact annotations."""
+    inline_annotations = get_inline_annotations(message)
+
+    for annotation in inline_annotations:
+        if isinstance(annotation, dict) and annotation.get("type") == "artifact":
+            try:
+                # Create artifact data based on type
+                artifact_data = annotation.get("data")
+                if not artifact_data:
+                    continue
+
+                artifact_type = artifact_data.get("type")
+
+                if artifact_type == "code":
+                    # Get the nested data object that contains the actual code information
+                    code_info = artifact_data.get("data", {})
+                    code_data = CodeArtifactData(
+                        file_name=code_info.get("file_name", ""),
+                        code=code_info.get("code", ""),
+                        language=code_info.get("language", ""),
+                    )
+                    artifact = Artifact(
+                        created_at=artifact_data.get("created_at"),
+                        type=ArtifactType.CODE,
+                        data=code_data,
+                    )
+                elif artifact_type == "document":
+                    # Get the nested data object that contains the actual document information
+                    doc_info = artifact_data.get("data", {})
+                    doc_data = DocumentArtifactData(
+                        title=doc_info.get("title", ""),
+                        content=doc_info.get("content", ""),
+                        type=doc_info.get("type", "markdown"),
+                        sources=doc_info.get("sources"),
+                    )
+                    artifact = Artifact(
+                        created_at=artifact_data.get("created_at"),
+                        type=ArtifactType.DOCUMENT,
+                        data=doc_data,
+                    )
+                else:
+                    continue
+
+                return artifact
+            except Exception as e:
+                print(
+                    f"Failed to parse artifact from annotation: {annotation}. Error: {e}"
+                )
+
+    return None
+
+
+def get_artifacts(chat_history: List[ChatMessage]) -> List[Artifact]:
+    """
+    Return a list of artifacts sorted by their creation time.
+    Artifacts without a creation time are placed at the end.
+    """
+    artifacts = []
+
+    for message in chat_history:
+        artifact = artifact_from_message(message)
+        if artifact is not None:
+            artifacts.append(artifact)
+
+    # Sort by creation time, with None values at the end
+    return sorted(
+        artifacts,
+        key=lambda a: (a.created_at is None, a.created_at),
+    )
+
+
+def get_last_artifact(chat_history: List[ChatMessage]) -> Optional[Artifact]:
+    """Get the last artifact from chat history."""
+    artifacts = get_artifacts(chat_history)
+    return artifacts[-1] if len(artifacts) > 0 else None
@@ -2,11 +2,10 @@ import re
 import time
 from typing import Any, Literal, Optional, Union

-from llama_index.core.chat_engine.types import ChatMessage
-from llama_index.core.llms import LLM
+from llama_index.core import Settings
+from llama_index.core.llms import LLM, ChatMessage
 from llama_index.core.memory import ChatMemoryBuffer
 from llama_index.core.prompts import PromptTemplate
-from llama_index.llms.openai import OpenAI
 from llama_index.core.workflow import (
    Context,
    Event,
@@ -15,25 +14,26 @@ from llama_index.core.workflow import (
    Workflow,
    step,
 )
-from llama_index.server.api.models import (
+from llama_index.core.chat_ui.models.artifact import (
    Artifact,
-    ArtifactEvent,
    ArtifactType,
-    ChatRequest,
    CodeArtifactData,
-    UIEvent,
 )
-from llama_index.server.api.utils import get_last_artifact
+from llama_index.core.chat_ui.events import (
+    UIEvent,
+    ArtifactEvent,
+)
+
+from src.utils import get_last_artifact
+from src.settings import init_settings
 from pydantic import BaseModel, Field
+from dotenv import load_dotenv


-def create_workflow(chat_request: ChatRequest) -> Workflow:
-    workflow = CodeArtifactWorkflow(
-        llm=OpenAI(model="gpt-4.1"),
-        chat_request=chat_request,
-        timeout=120.0,
-    )
-    return workflow
+def create_workflow() -> Workflow:
+    load_dotenv()
+    init_settings()
+    return CodeArtifactWorkflow(timeout=120.0)


 class Requirement(BaseModel):
@@ -83,8 +83,6 @@ class CodeArtifactWorkflow(Workflow):

    def __init__(
        self,
-        llm: LLM,
-        chat_request: ChatRequest,
        **kwargs: Any,
    ):
        """
@@ -93,9 +91,8 @@ class CodeArtifactWorkflow(Workflow):
            chat_request: The chat request from the chat app to use.
        """
        super().__init__(**kwargs)
-        self.llm = llm
-        self.chat_request = chat_request
-        self.last_artifact = get_last_artifact(chat_request)
+        self.llm: LLM = Settings.llm
+        self.last_artifact: Optional[Artifact] = None

    @step
    async def prepare_chat_history(self, ctx: Context, ev: StartEvent) -> PlanEvent:
@@ -103,13 +100,21 @@ class CodeArtifactWorkflow(Workflow):
        if user_msg is None:
            raise ValueError("user_msg is required to run the workflow")
        await ctx.set("user_msg", user_msg)
-        chat_history = ev.chat_history or []
-        chat_history.append(
+
+        # prepare chat history from StartEvent
+        messages = [
            ChatMessage(
-                role="user",
-                content=user_msg,
+                role=msg.get("role", "user"),
+                content=msg.get("content", ""),
            )
-        )
+            for msg in ev.get("chat_history", [])
+        ]
+        chat_history = [*messages, ChatMessage(role="user", content=user_msg)]
+
+        # extract inline artifact from chat history
+        last_artifact = get_last_artifact(messages)
+        self.last_artifact = last_artifact
+
        memory = ChatMemoryBuffer.from_defaults(
            chat_history=chat_history,
            llm=self.llm,
@@ -373,3 +378,6 @@ class CodeArtifactWorkflow(Workflow):
            )
        )
        return StopEvent(result=response_stream)
+
+
+workflow = create_workflow()
@@ -1,69 +1,113 @@
-This is a [LlamaIndex](https://www.llamaindex.ai/) multi-agents project using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/).
+# LlamaIndex Workflow Example

-## Getting Started
+This is a [LlamaIndex](https://www.llamaindex.ai/) project that using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/) deployed with [LlamaDeploy](https://github.com/run-llama/llama_deploy).

-First, setup the environment with uv:
+LlamaDeploy is a system for deploying and managing LlamaIndex workflows, while LlamaIndexServer provides a pre-built TypeScript server with an integrated chat UI that can connect directly to LlamaDeploy deployments. This example shows how you can quickly set up a complete chat application by combining these two technologies/

-> **_Note:_** This step is not needed if you are using the dev-container.
+## Prerequisites

-```shell
+If you haven't installed uv, you can follow the instructions [here](https://docs.astral.sh/uv/getting-started/installation/) to install it.
+
+You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [src/settings.py](src/settings.py).
+
+Please setup their API keys in the `src/.env` file.
+
+## Installation
+
+Both the SDK and the CLI are part of the LlamaDeploy Python package. To install, just run:
+
+```bash
 uv sync
 ```

-Then check the parameters that have been pre-configured in the `.env` file in this directory.
-Make sure you have set the `OPENAI_API_KEY` for the LLM.
+If you don't have uv installed, you can follow the instructions [here](https://docs.astral.sh/uv/getting-started/installation/).

-Second, generate the embeddings of the documents in the `./data` directory:
+## Generate Index
+
+Generate the embeddings of the documents in the `./data` directory:

 ```shell
 uv run generate
 ```

-Third, run the development server:
+## Running the Deployment

-```shell
-uv run fastapi dev
-```
-
-Then open [http://localhost:8000](http://localhost:8000) with your browser to start the chat UI.
-
-To start the app optimized for **production**, run:
+At this point we have all we need to run this deployment. Ideally, we would have the API server already running
+somewhere in the cloud, but to get started let's start an instance locally. Run the following python script
+from a shell:

 ```
-uv run fastapi run
+$ uv run -m llama_deploy.apiserver
+INFO:     Started server process [10842]
+INFO:     Waiting for application startup.
+INFO:     Application startup complete.
+INFO:     Uvicorn running on http://0.0.0.0:4501 (Press CTRL+C to quit)
 ```

-## Configure LLM and Embedding Model
+From another shell, use the CLI, `llamactl`, to create the deployment:

-You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [settings.py](app/settings.py).
+```
+$ uv run llamactl deploy llama_deploy.yml
+Deployment successful: chat
+```
+
+## UI Interface
+
+LlamaDeploy will serve the UI through the apiserver. Point the browser to [http://localhost:4501/deployments/chat/ui](http://localhost:4501/deployments/chat/ui) to interact with your deployment through a user-friendly interface.
+
+## API endpoints
+
+You can find all the endpoints in the [API documentation](http://localhost:4501/docs). To get started, you can try the following endpoints:
+
+Create a new task:
+
+```bash
+curl -X POST 'http://localhost:4501/deployments/chat/tasks/create' \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "input": "{\"user_msg\":\"Hello\",\"chat_history\":[]}",
+    "service_id": "workflow"
+  }'
+```
+
+Stream events:
+
+```bash
+curl 'http://localhost:4501/deployments/chat/tasks/0b411be6-005d-43f0-9b6b-6a0017f08002/events?session_id=dd36442c-45ca-4eaa-8d75-b4e6dad1a83e&raw_event=true' \
+  -H 'Content-Type: application/json'
+```
+
+Note that the task_id and session_id are returned when creating a new task.

 ## Use Case

 We have prepared an [example workflow](./app/workflow.py) for the deep research use case, where you can ask questions about the example documents in the [./data](./data) directory.
-
-You can start by sending an request on the [chat UI](http://localhost:8000) or you can test the `/api/chat` endpoint with the following curl request:
-
-```
-curl --location 'localhost:8000/api/chat' \
--header 'Content-Type: application/json' \
--data '{ "messages": [{ "role": "user", "content": "Create a report comparing the finances of Apple and Tesla" }] }'
-```
+To update the workflow, you can modify the code in [`src/workflow.py`](src/workflow.py).

 ## Customize the UI

-To customize the UI, you can start by modifying the [./components/ui_event.jsx](./components/ui_event.jsx) file.
+The UI is served by LLamaIndexServer package, you can configure the UI by modifying the `uiConfig` in the [ui/index.ts](ui/index.ts) file.

-You can also generate a new code for the workflow using LLM by running the following command:
+The following are the available options:

-```
-uv run generate_ui
-```
+- `starterQuestions`: Predefined questions for chat interface
+- `componentsDir`: Directory for custom event components
+- `layoutDir`: Directory for custom layout components
+- `llamaDeploy`: The LlamaDeploy configration (deployment name and workflow name that defined in the [llama_deploy.yml](llama_deploy.yml) file)
+
+## LlamaCloud Integration
+
+You can enable LlamaCloud integration by setting the `llamaCloud` option in the [ui/index.ts](ui/index.ts) file.
+
+The following are the available options:
+
+- `outputDir`: The directory for LlamaCloud output

 ## Learn More

-To learn more about LlamaIndex, take a look at the following resources:
-
 - [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex.
 - [Workflows Introduction](https://docs.llamaindex.ai/en/stable/understanding/workflows/) - learn about LlamaIndex workflows.
+- [LlamaDeploy GitHub Repository](https://github.com/run-llama/llama_deploy)
+- [Chat-UI Documentation](https://ts.llamaindex.ai/docs/chat-ui)

-You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
+You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
@@ -0,0 +1,50 @@
+from typing import AsyncGenerator, Union
+from llama_index.core.base.llms.types import (
+    CompletionResponse,
+    CompletionResponseAsyncGen,
+    ChatResponse,
+)
+from llama_index.core.workflow import Context
+from llama_index.core.agent.workflow.workflow_events import AgentStream
+
+
+async def write_response_to_stream(
+    res: Union[
+        CompletionResponse,
+        CompletionResponseAsyncGen,
+        AsyncGenerator[ChatResponse, None],
+    ],
+    ctx: Context,
+    current_agent_name: str = "assistant",
+) -> str:
+    """
+    Handle both streaming and non-streaming LLM responses.
+
+    Args:
+        res: The LLM response (either streaming or non-streaming)
+        ctx: The workflow context for writing events to stream
+        current_agent_name: The name of the current agent (default: "assistant")
+
+    Returns:
+        The final response text as a string
+    """
+    final_response = ""
+
+    if isinstance(res, AsyncGenerator):
+        # Handle streaming response (CompletionResponseAsyncGen or ChatResponse AsyncGenerator)
+        async for chunk in res:
+            ctx.write_event_to_stream(
+                AgentStream(
+                    delta=chunk.delta or "",
+                    response=final_response,
+                    current_agent_name=current_agent_name,
+                    tool_calls=[],
+                    raw=getattr(chunk, "raw", None) or "",
+                )
+            )
+            final_response += chunk.delta or ""
+    else:
+        # Handle non-streaming response (CompletionResponse)
+        final_response = res.text
+
+    return final_response
@@ -1,9 +1,11 @@
 import logging
 import os
 import uuid
+import time
 from typing import List, Literal, Optional
+from pydantic import BaseModel, Field
+from dotenv import load_dotenv

-from app.index import get_index
 from llama_index.core.base.llms.types import (
    CompletionResponse,
    CompletionResponseAsyncGen,
@@ -23,26 +25,31 @@ from llama_index.core.workflow import (
    Workflow,
    step,
 )
-from llama_index.server.api.models import (
-    ArtifactEvent,
-    ArtifactType,
-    ChatRequest,
-    SourceNodesEvent,
-    UIEvent,
+from llama_index.core.chat_ui.models.artifact import (
    Artifact,
+    ArtifactType,
    DocumentArtifactData,
    DocumentArtifactSource,
 )
-import time
-from llama_index.server.utils.stream import write_response_to_stream
-from pydantic import BaseModel, Field
+from llama_index.core.chat_ui.events import (
+    UIEvent,
+    ArtifactEvent,
+    SourceNodesEvent,
+)
+
+from src.index import get_index
+from src.settings import init_settings
+from src.utils import write_response_to_stream

 logger = logging.getLogger("uvicorn")
 logger.setLevel(logging.INFO)


-def create_workflow(chat_request: Optional[ChatRequest] = None) -> Workflow:
-    index = get_index(chat_request=chat_request)
+def create_workflow() -> Workflow:
+    load_dotenv()
+    init_settings()
+    # TODO: load index in StartEvent
+    index = get_index()
    if index is None:
        raise ValueError(
            "Index is not found. Try run generation script to create the index first."
@@ -140,21 +147,23 @@ class DeepResearchWorkflow(Workflow):
        """
        self.stream = ev.get("stream", True)
        self.user_request = ev.get("user_msg")
-        chat_history = ev.get("chat_history")
-        if chat_history is not None:
-            self.memory.put_messages(chat_history)
+
+        messages = [
+            ChatMessage(
+                role=msg.get("role", "user"),
+                content=msg.get("content", ""),
+            )
+            for msg in ev.get("chat_history", [])
+        ]
+        user_message = ChatMessage(role="user", content=self.user_request)
+        chat_history = [*messages, user_message]
+        self.memory.put_messages(chat_history)

        await ctx.set("total_questions", 0)

        # Add user message to memory
-        self.memory.put_messages(
-            messages=[
-                ChatMessage(
-                    role=MessageRole.USER,
-                    content=self.user_request,
-                )
-            ]
-        )
+        self.memory.put_messages(messages=[user_message])
+
        ctx.write_event_to_stream(
            UIEvent(
                type="ui_event",
@@ -574,3 +583,6 @@ def _get_text_node_content_for_citation(node: NodeWithScore) -> str:
    node_id = node.node.node_id
    content = f"<Citation id='{node_id}'>\n{node.get_content(metadata_mode=MetadataMode.LLM)}</Citation id='{node_id}'>"
    return content
+
+
+workflow = create_workflow()
@@ -1,66 +1,107 @@
-This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/).
+# LlamaIndex Workflow Example

-## Getting Started
+This is a [LlamaIndex](https://www.llamaindex.ai/) project that using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/) deployed with [LlamaDeploy](https://github.com/run-llama/llama_deploy).

-First, setup the environment with uv:
+LlamaDeploy is a system for deploying and managing LlamaIndex workflows, while LlamaIndexServer provides a pre-built TypeScript server with an integrated chat UI that can connect directly to LlamaDeploy deployments. This example shows how you can quickly set up a complete chat application by combining these two technologies/

-> **_Note:_** This step is not needed if you are using the dev-container.
+## Prerequisites

-```shell
+If you haven't installed uv, you can follow the instructions [here](https://docs.astral.sh/uv/getting-started/installation/) to install it.
+
+You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [src/settings.py](src/settings.py).
+
+Please setup their API keys in the `src/.env` file.
+
+## Installation
+
+Both the SDK and the CLI are part of the LlamaDeploy Python package. To install, just run:
+
+```bash
 uv sync
 ```

-Then check the parameters that have been pre-configured in the `.env` file in this directory.
-Make sure you have set the `OPENAI_API_KEY` for the LLM.
+If you don't have uv installed, you can follow the instructions [here](https://docs.astral.sh/uv/getting-started/installation/).

-Then, run the development server:
+## Running the Deployment

-```shell
-uv run fastapi dev
-```
-
-Then open [http://localhost:8000](http://localhost:8000) with your browser to start the chat UI.
-
-To start the app optimized for **production**, run:
+At this point we have all we need to run this deployment. Ideally, we would have the API server already running
+somewhere in the cloud, but to get started let's start an instance locally. Run the following python script
+from a shell:

 ```
-uv run fastapi run
+$ uv run -m llama_deploy.apiserver
+INFO:     Started server process [10842]
+INFO:     Waiting for application startup.
+INFO:     Application startup complete.
+INFO:     Uvicorn running on http://0.0.0.0:4501 (Press CTRL+C to quit)
 ```

-## Configure LLM and Embedding Model
+From another shell, use the CLI, `llamactl`, to create the deployment:

-You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [settings.py](app/settings.py).
+```
+$ uv run llamactl deploy llama_deploy.yml
+Deployment successful: chat
+```
+
+## UI Interface
+
+LlamaDeploy will serve the UI through the apiserver. Point the browser to [http://localhost:4501/deployments/chat/ui](http://localhost:4501/deployments/chat/ui) to interact with your deployment through a user-friendly interface.
+
+## API endpoints
+
+You can find all the endpoints in the [API documentation](http://localhost:4501/docs). To get started, you can try the following endpoints:
+
+Create a new task:
+
+```bash
+curl -X POST 'http://localhost:4501/deployments/chat/tasks/create' \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "input": "{\"user_msg\":\"Hello\",\"chat_history\":[]}",
+    "service_id": "workflow"
+  }'
+```
+
+Stream events:
+
+```bash
+curl 'http://localhost:4501/deployments/chat/tasks/0b411be6-005d-43f0-9b6b-6a0017f08002/events?session_id=dd36442c-45ca-4eaa-8d75-b4e6dad1a83e&raw_event=true' \
+  -H 'Content-Type: application/json'
+```
+
+Note that the task_id and session_id are returned when creating a new task.

 ## Use Case

 AI-powered document generator that can help you generate documents with a chat interface and simple markdown editor.
-
-To update the workflow, you can modify the code in [`workflow.py`](app/workflow.py).
-
-You can start by sending an request on the [chat UI](http://localhost:8000) or you can test the `/api/chat` endpoint with the following curl request:
-
-```
-curl --location 'localhost:8000/api/chat' \
--header 'Content-Type: application/json' \
--data '{ "messages": [{ "role": "user", "content": "Create a report comparing the finances of Apple and Tesla" }] }'
-```
+To update the workflow, you can modify the code in [`src/workflow.py`](src/workflow.py).

 ## Customize the UI

-To customize the UI, you can start by modifying the [./components/ui_event.jsx](./components/ui_event.jsx) file.
+The UI is served by LLamaIndexServer package, you can configure the UI by modifying the `uiConfig` in the [ui/index.ts](ui/index.ts) file.

-You can also generate a new code for the workflow using LLM by running the following command:
+The following are the available options:

-```
-uv run generate_ui
-```
+- `starterQuestions`: Predefined questions for chat interface
+- `componentsDir`: Directory for custom event components
+- `layoutDir`: Directory for custom layout components
+- `llamaDeploy`: The LlamaDeploy configration (deployment name and workflow name that defined in the [llama_deploy.yml](llama_deploy.yml) file)
+
+To customize the UI, you can start by modifying the [./ui/components/ui_event.jsx](./ui/components/ui_event.jsx) file.
+
+## LlamaCloud Integration
+
+You can enable LlamaCloud integration by setting the `llamaCloud` option in the [ui/index.ts](ui/index.ts) file.
+
+The following are the available options:
+
+- `outputDir`: The directory for LlamaCloud output

 ## Learn More

-To learn more about LlamaIndex, take a look at the following resources:
-
 - [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex.
 - [Workflows Introduction](https://docs.llamaindex.ai/en/stable/understanding/workflows/) - learn about LlamaIndex workflows.
- [LlamaIndex Server](https://pypi.org/project/llama-index-server/)
+- [LlamaDeploy GitHub Repository](https://github.com/run-llama/llama_deploy)
+- [Chat-UI Documentation](https://ts.llamaindex.ai/docs/chat-ui)

-You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
+You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
@@ -0,0 +1,131 @@
+import json
+import re
+from typing import List, Optional, Any
+
+from pydantic import ValidationError
+from llama_index.core.chat_ui.models.artifact import (
+    Artifact,
+    ArtifactType,
+    CodeArtifactData,
+    DocumentArtifactData,
+)
+from llama_index.core.llms import ChatMessage
+
+INLINE_ANNOTATION_KEY = "annotation"
+
+
+def get_inline_annotations(message: ChatMessage) -> List[Any]:
+    """Extract inline annotations from a chat message."""
+    markdown_content = message.content
+
+    inline_annotations: List[Any] = []
+
+    # Regex to match annotation code blocks
+    # Matches ```annotation followed by content until closing ```
+    annotation_regex = re.compile(
+        rf"```{re.escape(INLINE_ANNOTATION_KEY)}\s*\n([\s\S]*?)\n```", re.MULTILINE
+    )
+
+    for match in annotation_regex.finditer(markdown_content):
+        json_content = match.group(1).strip() if match.group(1) else None
+
+        if not json_content:
+            continue
+
+        try:
+            # Parse the JSON content
+            parsed = json.loads(json_content)
+
+            # Check for required fields in the parsed annotation
+            if (
+                not isinstance(parsed, dict)
+                or "type" not in parsed
+                or "data" not in parsed
+            ):
+                continue
+
+            # Extract the annotation data
+            inline_annotations.append(parsed)
+        except (json.JSONDecodeError, ValidationError) as error:
+            # Skip invalid annotations - they might be malformed JSON or invalid schema
+            print(f"Failed to parse annotation: {error}")
+
+    return inline_annotations
+
+
+def artifact_from_message(message: ChatMessage) -> Optional[Artifact]:
+    """Create an artifact from a chat message if it contains artifact annotations."""
+    inline_annotations = get_inline_annotations(message)
+
+    for annotation in inline_annotations:
+        if isinstance(annotation, dict) and annotation.get("type") == "artifact":
+            try:
+                # Create artifact data based on type
+                artifact_data = annotation.get("data")
+                if not artifact_data:
+                    continue
+
+                artifact_type = artifact_data.get("type")
+
+                if artifact_type == "code":
+                    # Get the nested data object that contains the actual code information
+                    code_info = artifact_data.get("data", {})
+                    code_data = CodeArtifactData(
+                        file_name=code_info.get("file_name", ""),
+                        code=code_info.get("code", ""),
+                        language=code_info.get("language", ""),
+                    )
+                    artifact = Artifact(
+                        created_at=artifact_data.get("created_at"),
+                        type=ArtifactType.CODE,
+                        data=code_data,
+                    )
+                elif artifact_type == "document":
+                    # Get the nested data object that contains the actual document information
+                    doc_info = artifact_data.get("data", {})
+                    doc_data = DocumentArtifactData(
+                        title=doc_info.get("title", ""),
+                        content=doc_info.get("content", ""),
+                        type=doc_info.get("type", "markdown"),
+                        sources=doc_info.get("sources"),
+                    )
+                    artifact = Artifact(
+                        created_at=artifact_data.get("created_at"),
+                        type=ArtifactType.DOCUMENT,
+                        data=doc_data,
+                    )
+                else:
+                    continue
+
+                return artifact
+            except Exception as e:
+                print(
+                    f"Failed to parse artifact from annotation: {annotation}. Error: {e}"
+                )
+
+    return None
+
+
+def get_artifacts(chat_history: List[ChatMessage]) -> List[Artifact]:
+    """
+    Return a list of artifacts sorted by their creation time.
+    Artifacts without a creation time are placed at the end.
+    """
+    artifacts = []
+
+    for message in chat_history:
+        artifact = artifact_from_message(message)
+        if artifact is not None:
+            artifacts.append(artifact)
+
+    # Sort by creation time, with None values at the end
+    return sorted(
+        artifacts,
+        key=lambda a: (a.created_at is None, a.created_at),
+    )
+
+
+def get_last_artifact(chat_history: List[ChatMessage]) -> Optional[Artifact]:
+    """Get the last artifact from chat history."""
+    artifacts = get_artifacts(chat_history)
+    return artifacts[-1] if len(artifacts) > 0 else None
@@ -2,9 +2,8 @@ import re
 import time
 from typing import Any, Literal, Optional

-from llama_index.core.chat_engine.types import ChatMessage
-from llama_index.core.llms import LLM
-from llama_index.llms.openai import OpenAI
+from llama_index.core import Settings
+from llama_index.core.llms import LLM, ChatMessage
 from llama_index.core.memory import ChatMemoryBuffer
 from llama_index.core.prompts import PromptTemplate
 from llama_index.core.workflow import (
@@ -15,25 +14,26 @@ from llama_index.core.workflow import (
    Workflow,
    step,
 )
-from llama_index.server.api.models import (
+from llama_index.core.chat_ui.models.artifact import (
    Artifact,
-    ArtifactEvent,
    ArtifactType,
-    ChatRequest,
    DocumentArtifactData,
-    UIEvent,
 )
-from llama_index.server.api.utils import get_last_artifact
+from llama_index.core.chat_ui.events import (
+    UIEvent,
+    ArtifactEvent,
+)
+
+from src.utils import get_last_artifact
+from src.settings import init_settings
 from pydantic import BaseModel, Field
+from dotenv import load_dotenv


-def create_workflow(chat_request: ChatRequest) -> Workflow:
-    workflow = DocumentArtifactWorkflow(
-        llm=OpenAI(model="gpt-4.1"),
-        chat_request=chat_request,
-        timeout=120.0,
-    )
-    return workflow
+def create_workflow() -> Workflow:
+    load_dotenv()
+    init_settings()
+    return DocumentArtifactWorkflow(timeout=120.0)


 class DocumentRequirement(BaseModel):
@@ -81,8 +81,6 @@ class DocumentArtifactWorkflow(Workflow):

    def __init__(
        self,
-        llm: LLM,
-        chat_request: ChatRequest,
        **kwargs: Any,
    ):
        """
@@ -91,9 +89,8 @@ class DocumentArtifactWorkflow(Workflow):
            chat_request: The chat request from the chat app to use.
        """
        super().__init__(**kwargs)
-        self.llm = llm
-        self.chat_request = chat_request
-        self.last_artifact = get_last_artifact(chat_request)
+        self.llm: LLM = Settings.llm
+        self.last_artifact: Optional[Artifact] = None

    @step
    async def prepare_chat_history(self, ctx: Context, ev: StartEvent) -> PlanEvent:
@@ -101,13 +98,21 @@ class DocumentArtifactWorkflow(Workflow):
        if user_msg is None:
            raise ValueError("user_msg is required to run the workflow")
        await ctx.set("user_msg", user_msg)
-        chat_history = ev.chat_history or []
-        chat_history.append(
+
+        # prepare chat history from StartEvent
+        messages = [
            ChatMessage(
-                role="user",
-                content=user_msg,
+                role=msg.get("role", "user"),
+                content=msg.get("content", ""),
            )
-        )
+            for msg in ev.get("chat_history", [])
+        ]
+        chat_history = [*messages, ChatMessage(role="user", content=user_msg)]
+
+        # extract inline artifact from chat history
+        last_artifact = get_last_artifact(messages)
+        self.last_artifact = last_artifact
+
        memory = ChatMemoryBuffer.from_defaults(
            chat_history=chat_history,
            llm=self.llm,
@@ -115,9 +120,9 @@ class DocumentArtifactWorkflow(Workflow):
        await ctx.set("memory", memory)
        return PlanEvent(
            user_msg=user_msg,
-            context=str(self.last_artifact.model_dump_json())
-            if self.last_artifact
-            else "",
+            context=(
+                str(self.last_artifact.model_dump_json()) if self.last_artifact else ""
+            ),
        )

    @step
@@ -135,7 +140,8 @@ class DocumentArtifactWorkflow(Workflow):
                ),
            )
        )
-        prompt = PromptTemplate("""
+        prompt = PromptTemplate(
+            """
         You are a documentation analyst responsible for analyzing the user's request and providing requirements for document generation or update.
         Follow these instructions:
         1. Carefully analyze the conversation history and the user's request to determine what has been done and what the next step should be.
@@ -176,10 +182,13 @@ class DocumentArtifactWorkflow(Workflow):

         Now, please plan for the user's request:
         {user_msg}
-        """).format(
-            context=""
-            if event.context is None
-            else f"## The context is: \n{event.context}\n",
+        """
+        ).format(
+            context=(
+                ""
+                if event.context is None
+                else f"## The context is: \n{event.context}\n"
+            ),
            user_msg=event.user_msg,
        )
        response = await self.llm.acomplete(
@@ -232,7 +241,8 @@ class DocumentArtifactWorkflow(Workflow):
                ),
            )
        )
-        prompt = PromptTemplate("""
+        prompt = PromptTemplate(
+            """
         You are a skilled technical writer who can help users with documentation.
         You are given a task to generate or update a document for a given requirement.

@@ -265,10 +275,11 @@ class DocumentArtifactWorkflow(Workflow):

         Now, please generate the document for the following requirement:
         {requirement}
-         """).format(
-            previous_artifact=self.last_artifact.model_dump_json()
-            if self.last_artifact
-            else "",
+         """
+        ).format(
+            previous_artifact=(
+                self.last_artifact.model_dump_json() if self.last_artifact else ""
+            ),
            requirement=event.requirement,
        )
        response = await self.llm.acomplete(
@@ -345,3 +356,6 @@ class DocumentArtifactWorkflow(Workflow):
            )
        )
        return StopEvent(result=response_stream)
+
+
+workflow = create_workflow()
@@ -0,0 +1,254 @@
+import logging
+import uuid
+from abc import ABC, abstractmethod
+from typing import Any, AsyncGenerator, Optional
+
+from pydantic import BaseModel, ConfigDict
+
+from llama_index.core.base.llms.types import ChatMessage, ChatResponse
+from llama_index.core.llms.function_calling import FunctionCallingLLM
+from llama_index.core.tools import (
+    BaseTool,
+    FunctionTool,
+    ToolOutput,
+    ToolSelection,
+)
+from llama_index.core.workflow import Context
+from llama_index.core.agent.workflow.workflow_events import ToolCall, ToolCallResult
+
+from src.events import AgentRunEvent, AgentRunEventType
+
+logger = logging.getLogger("uvicorn")
+
+
+class ToolCallOutput(BaseModel):
+    tool_call_id: str
+    tool_output: ToolOutput
+
+
+class ContextAwareTool(FunctionTool, ABC):
+    @abstractmethod
+    async def acall(self, ctx: Context, input: Any) -> ToolOutput:  # type: ignore
+        pass
+
+
+class ChatWithToolsResponse(BaseModel):
+    """
+    A tool call response from chat_with_tools.
+    """
+
+    tool_calls: Optional[list[ToolSelection]]
+    tool_call_message: Optional[ChatMessage]
+    generator: Optional[AsyncGenerator[ChatResponse | None, None]]
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    def is_calling_different_tools(self) -> bool:
+        tool_names = {tool_call.tool_name for tool_call in self.tool_calls or []}
+        return len(tool_names) > 1
+
+    def has_tool_calls(self) -> bool:
+        return self.tool_calls is not None and len(self.tool_calls) > 0
+
+    def tool_name(self) -> str:
+        if not self.has_tool_calls():
+            raise ValueError("No tool calls")
+        if self.is_calling_different_tools():
+            raise ValueError("Calling different tools")
+        return self.tool_calls[0].tool_name  # type: ignore
+
+    async def full_response(self) -> str:
+        assert self.generator is not None
+        full_response = ""
+        async for chunk in self.generator:
+            content = chunk.delta  # type: ignore
+            if content:
+                full_response += content
+        return full_response
+
+
+async def chat_with_tools(  # type: ignore
+    llm: FunctionCallingLLM,
+    tools: list[BaseTool],
+    chat_history: list[ChatMessage],
+) -> ChatWithToolsResponse:
+    """
+    Request LLM to call tools or not.
+    This function doesn't change the memory.
+    """
+    generator = _tool_call_generator(llm, tools, chat_history)
+    is_tool_call = await generator.__anext__()
+    if is_tool_call:
+        # Last chunk is the full response
+        # Wait for the last chunk
+        full_response = None
+        async for chunk in generator:
+            full_response = chunk
+        assert isinstance(full_response, ChatResponse)
+        return ChatWithToolsResponse(
+            tool_calls=llm.get_tool_calls_from_response(full_response),
+            tool_call_message=full_response.message,
+            generator=None,
+        )
+    else:
+        return ChatWithToolsResponse(
+            tool_calls=None,
+            tool_call_message=None,
+            generator=generator,  # type: ignore
+        )
+
+
+async def call_tools(
+    ctx: Context,
+    agent_name: str,
+    tools: list[BaseTool],
+    tool_calls: list[ToolSelection],
+    emit_agent_events: bool = True,
+) -> list[ToolCallOutput]:
+    """
+    Call tools and return the tool call responses.
+    """
+    if len(tool_calls) == 0:
+        return []
+    tools_by_name = {tool.metadata.get_name(): tool for tool in tools}
+    if len(tool_calls) == 1:
+        if emit_agent_events:
+            ctx.write_event_to_stream(
+                AgentRunEvent(
+                    name=agent_name,
+                    msg=f"{tool_calls[0].tool_name}: {tool_calls[0].tool_kwargs}",
+                )
+            )
+        return [
+            await call_tool(ctx, tools_by_name[tool_calls[0].tool_name], tool_calls[0])
+        ]
+    # Multiple tool calls, show progress
+    tool_call_outputs: list[ToolCallOutput] = []
+
+    progress_id = str(uuid.uuid4())
+    total_steps = len(tool_calls)
+    if emit_agent_events:
+        ctx.write_event_to_stream(
+            AgentRunEvent(
+                name=agent_name,
+                msg=f"Making {total_steps} tool calls",
+            )
+        )
+    for i, tool_call in enumerate(tool_calls):
+        tool = tools_by_name.get(tool_call.tool_name)
+        if not tool:
+            tool_call_outputs.append(
+                ToolCallOutput(
+                    tool_call_id=tool_call.tool_id,
+                    tool_output=ToolOutput(
+                        is_error=True,
+                        content=f"Tool {tool_call.tool_name} does not exist",
+                        tool_name=tool_call.tool_name,
+                        raw_input=tool_call.tool_kwargs,
+                        raw_output={
+                            "error": f"Tool {tool_call.tool_name} does not exist",
+                        },
+                    ),
+                )
+            )
+            continue
+
+        tool_call_output = await call_tool(
+            ctx,
+            tool,
+            tool_call,
+        )
+        if emit_agent_events:
+            ctx.write_event_to_stream(
+                AgentRunEvent(
+                    name=agent_name,
+                    msg=f"{tool_call.tool_name}: {tool_call.tool_kwargs}",
+                    event_type=AgentRunEventType.PROGRESS,
+                    data={
+                        "id": progress_id,
+                        "total": total_steps,
+                        "current": i,
+                    },
+                )
+            )
+        tool_call_outputs.append(tool_call_output)
+    return tool_call_outputs
+
+
+async def call_tool(
+    ctx: Context,
+    tool: BaseTool,
+    tool_call: ToolSelection,
+) -> ToolCallOutput:
+    ctx.write_event_to_stream(
+        ToolCall(
+            tool_name=tool_call.tool_name,
+            tool_id=tool_call.tool_id,
+            tool_kwargs=tool_call.tool_kwargs,
+        )
+    )
+    try:
+        if isinstance(tool, ContextAwareTool):
+            if ctx is None:
+                raise ValueError("Context is required for context aware tool")
+            # inject context for calling an context aware tool
+            output = await tool.acall(ctx=ctx, **tool_call.tool_kwargs)
+        else:
+            output = await tool.acall(**tool_call.tool_kwargs)  # type: ignore
+    except Exception as e:
+        logger.error(f"Got error in tool {tool_call.tool_name}: {e!s}")
+        output = ToolOutput(
+            is_error=True,
+            content=f"Error: {e!s}",
+            tool_name=tool.metadata.get_name(),
+            raw_input=tool_call.tool_kwargs,
+            raw_output={
+                "error": str(e),
+            },
+        )
+    ctx.write_event_to_stream(
+        ToolCallResult(
+            tool_name=tool_call.tool_name,
+            tool_kwargs=tool_call.tool_kwargs,
+            tool_id=tool_call.tool_id,
+            tool_output=output,
+            return_direct=False,
+        )
+    )
+    return ToolCallOutput(
+        tool_call_id=tool_call.tool_id,
+        tool_output=output,
+    )
+
+
+async def _tool_call_generator(
+    llm: FunctionCallingLLM,
+    tools: list[BaseTool],
+    chat_history: list[ChatMessage],
+) -> AsyncGenerator[ChatResponse | bool, None]:
+    response_stream = await llm.astream_chat_with_tools(
+        tools,
+        chat_history=chat_history,
+        allow_parallel_tool_calls=False,
+    )
+
+    full_response = None
+    yielded_indicator = False
+    async for chunk in response_stream:
+        if "tool_calls" not in chunk.message.additional_kwargs:
+            # Yield a boolean to indicate whether the response is a tool call
+            if not yielded_indicator:
+                yield False
+                yielded_indicator = True
+
+            # if not a tool call, yield the chunks!
+            yield chunk  # type: ignore
+        elif not yielded_indicator:
+            # Yield the indicator for a tool call
+            yield True
+            yielded_indicator = True
+
+        full_response = chunk
+
+    if full_response:
+        yield full_response  # type: ignore
@@ -0,0 +1,251 @@
+import logging
+import os
+import tempfile
+import re
+from enum import Enum
+from io import BytesIO
+
+from llama_index.core.tools.function_tool import FunctionTool
+
+# use nextjs for file server
+WORKFLOW = "chat"
+
+# define nextjs file server url prefix
+FILE_SERVER_URL_PREFIX = f"/deployments/{WORKFLOW}/ui/api/files/output/tools"
+
+# When deploying to llama_deploy, ui folder will be copied to deployments folder in the temp directory
+# We need to save generated documents to that exact ui directory to make it accessible to the file server
+# eg: /tmp/llama_deploy/deployments/chat/ui/output/tools/generated_report.pdf
+LLAMA_DEPLOY_DIR = os.path.join(tempfile.gettempdir(), "llama_deploy", "deployments")
+OUTPUT_DIR = os.path.join(LLAMA_DEPLOY_DIR, WORKFLOW, "ui", "output", "tools")
+
+
+class DocumentType(Enum):
+    PDF = "pdf"
+    HTML = "html"
+
+
+COMMON_STYLES = """
+body {
+    font-family: Arial, sans-serif;
+    line-height: 1.3;
+    color: #333;
+}
+h1, h2, h3, h4, h5, h6 {
+    margin-top: 1em;
+    margin-bottom: 0.5em;
+}
+p {
+    margin-bottom: 0.7em;
+}
+code {
+    background-color: #f4f4f4;
+    padding: 2px 4px;
+    border-radius: 4px;
+}
+pre {
+    background-color: #f4f4f4;
+    padding: 10px;
+    border-radius: 4px;
+    overflow-x: auto;
+}
+table {
+    border-collapse: collapse;
+    width: 100%;
+    margin-bottom: 1em;
+}
+th, td {
+    border: 1px solid #ddd;
+    padding: 8px;
+    text-align: left;
+}
+th {
+    background-color: #f2f2f2;
+    font-weight: bold;
+}
+"""
+
+HTML_SPECIFIC_STYLES = """
+body {
+    max-width: 800px;
+    margin: 0 auto;
+    padding: 20px;
+}
+"""
+
+PDF_SPECIFIC_STYLES = """
+@page {
+    size: letter;
+    margin: 2cm;
+}
+body {
+    font-size: 11pt;
+}
+h1 { font-size: 18pt; }
+h2 { font-size: 16pt; }
+h3 { font-size: 14pt; }
+h4, h5, h6 { font-size: 12pt; }
+pre, code {
+    font-family: Courier, monospace;
+    font-size: 0.9em;
+}
+"""
+
+HTML_TEMPLATE = """
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <style>
+        {common_styles}
+        {specific_styles}
+    </style>
+</head>
+<body>
+    {content}
+</body>
+</html>
+"""
+
+
+class DocumentGenerator:
+    def __init__(self, file_server_url_prefix: str | None = FILE_SERVER_URL_PREFIX):
+        if not file_server_url_prefix:
+            raise ValueError("file_server_url_prefix is required")
+        self.file_server_url_prefix = file_server_url_prefix
+
+    @classmethod
+    def _generate_html_content(cls, original_content: str) -> str:
+        """
+        Generate HTML content from the original markdown content.
+        """
+        try:
+            import markdown  # type: ignore
+        except ImportError:
+            raise ImportError(
+                "Failed to import required modules. Please install markdown."
+            )
+
+        # Convert markdown to HTML with fenced code and table extensions
+        return markdown.markdown(original_content, extensions=["fenced_code", "tables"])
+
+    @classmethod
+    def _generate_pdf(cls, html_content: str) -> BytesIO:
+        """
+        Generate a PDF from the HTML content.
+        """
+        try:
+            from xhtml2pdf import pisa
+        except ImportError:
+            raise ImportError(
+                "Failed to import required modules. Please install xhtml2pdf."
+            )
+
+        pdf_html = HTML_TEMPLATE.format(
+            common_styles=COMMON_STYLES,
+            specific_styles=PDF_SPECIFIC_STYLES,
+            content=html_content,
+        )
+
+        buffer = BytesIO()
+        pdf = pisa.pisaDocument(
+            BytesIO(pdf_html.encode("UTF-8")), buffer, encoding="UTF-8"
+        )
+
+        if pdf.err:
+            logging.error(f"PDF generation failed: {pdf.err}")
+            raise ValueError("PDF generation failed")
+
+        buffer.seek(0)
+        return buffer
+
+    @classmethod
+    def _generate_html(cls, html_content: str) -> str:
+        """
+        Generate a complete HTML document with the given HTML content.
+        """
+        return HTML_TEMPLATE.format(
+            common_styles=COMMON_STYLES,
+            specific_styles=HTML_SPECIFIC_STYLES,
+            content=html_content,
+        )
+
+    def generate_document(
+        self, original_content: str, document_type: str, file_name: str
+    ) -> str:
+        """
+        To generate document as PDF or HTML file.
+        Parameters:
+            original_content: str (markdown style)
+            document_type: str (pdf or html) specify the type of the file format based on the use case
+            file_name: str (name of the document file) must be a valid file name, no extensions needed
+        Returns:
+            str (URL to the document file): A file URL ready to serve.
+        """
+        try:
+            doc_type = DocumentType(document_type.lower())
+        except ValueError:
+            raise ValueError(
+                f"Invalid document type: {document_type}. Must be 'pdf' or 'html'."
+            )
+        # Always generate html content first
+        html_content = self._generate_html_content(original_content)
+
+        # Based on the type of document, generate the corresponding file
+        if doc_type == DocumentType.PDF:
+            content = self._generate_pdf(html_content)
+            file_extension = "pdf"
+        elif doc_type == DocumentType.HTML:
+            content = BytesIO(self._generate_html(html_content).encode("utf-8"))
+            file_extension = "html"
+        else:
+            raise ValueError(f"Unexpected document type: {document_type}")
+
+        file_name = self._validate_file_name(file_name)
+        file_path = os.path.join(OUTPUT_DIR, f"{file_name}.{file_extension}")
+
+        self._write_to_file(content, file_path)
+
+        return f"{self.file_server_url_prefix}/{file_name}.{file_extension}"
+
+    @staticmethod
+    def _write_to_file(content: BytesIO, file_path: str) -> None:
+        """
+        Write the content to a file.
+        """
+        try:
+            os.makedirs(os.path.dirname(file_path), exist_ok=True)
+            with open(file_path, "wb") as file:
+                file.write(content.getvalue())
+        except Exception:
+            raise
+
+    @staticmethod
+    def _validate_file_name(file_name: str) -> str:
+        """
+        Validate the file name.
+        """
+        # Don't allow directory traversal
+        if os.path.isabs(file_name):
+            raise ValueError("File name is not allowed.")
+        # Don't allow special characters
+        if re.match(r"^[a-zA-Z0-9_.-]+$", file_name):
+            return file_name
+        else:
+            raise ValueError("File name is not allowed to contain special characters.")
+
+    @classmethod
+    def _validate_packages(cls) -> None:
+        try:
+            import markdown  # noqa: F401
+            import xhtml2pdf  # noqa: F401
+        except ImportError:
+            raise ImportError(
+                "Failed to import required modules. Please install markdown and xhtml2pdf "
+                "using `pip install markdown xhtml2pdf`"
+            )
+
+    def to_tool(self) -> FunctionTool:
+        self._validate_packages()
+        return FunctionTool.from_defaults(self.generate_document)
@@ -0,0 +1,34 @@
+from typing import List, Optional
+from enum import Enum
+from llama_index.core.base.llms.types import ChatMessage
+from llama_index.core.tools import ToolSelection
+from llama_index.core.workflow import Event
+
+
+class AgentRunEventType(Enum):
+    TEXT = "text"
+    PROGRESS = "progress"
+
+
+class AgentRunEvent(Event):
+    name: str
+    msg: str
+    event_type: AgentRunEventType = AgentRunEventType.TEXT
+    data: Optional[dict] = None
+
+
+class InputEvent(Event):
+    input: List[ChatMessage]
+    response: bool = False
+
+
+class ResearchEvent(Event):
+    input: list[ToolSelection]
+
+
+class AnalyzeEvent(Event):
+    input: list[ToolSelection] | ChatMessage
+
+
+class ReportEvent(Event):
+    input: list[ToolSelection]
@@ -0,0 +1,279 @@
+import base64
+import logging
+import os
+import re
+import uuid
+from typing import Any, List, Optional
+
+from pydantic import BaseModel
+
+from llama_index.core.tools import FunctionTool
+
+logger = logging.getLogger("uvicorn")
+
+
+class FileMetadata(BaseModel):
+    """Simple file metadata model"""
+
+    id: str
+    type: str
+    size: int
+    url: str
+    path: str
+
+
+class InterpreterExtraResult(BaseModel):
+    type: str
+    content: Optional[str] = None
+    filename: Optional[str] = None
+    url: Optional[str] = None
+
+
+class E2BToolOutput(BaseModel):
+    is_error: bool
+    logs: "Logs"  # type: ignore # noqa: F821
+    error_message: Optional[str] = None
+    results: List[InterpreterExtraResult] = []
+    retry_count: int = 0
+
+
+class E2BCodeInterpreter:
+    output_dir = "output/tools"
+    uploaded_files_dir = "output/uploaded"
+    interpreter: Optional["Sandbox"] = None  # type: ignore # noqa: F821
+
+    def __init__(
+        self,
+        api_key: str,
+        output_dir: Optional[str] = None,
+        uploaded_files_dir: Optional[str] = None,
+    ):
+        """
+        Args:
+            api_key: The API key for the E2B Code Interpreter.
+            output_dir: The directory for the output files. Default is `output/tools`.
+            uploaded_files_dir: The directory for the files to be uploaded to the sandbox. Default is `output/uploaded`.
+        """
+        self._validate_package()
+        if not api_key:
+            raise ValueError(
+                "api_key is required to run code interpreter. Get it here: https://e2b.dev/docs/getting-started/api-key"
+            )
+        self.api_key = api_key
+        self.output_dir = output_dir or "output/tools"
+        self.uploaded_files_dir = uploaded_files_dir or "output/uploaded"
+
+    @classmethod
+    def _validate_package(cls) -> None:
+        try:
+            from e2b_code_interpreter import Sandbox  # noqa: F401
+            from e2b_code_interpreter.models import Logs  # noqa: F401
+        except ImportError:
+            raise ImportError(
+                "e2b_code_interpreter is not installed. Please install it using `pip install e2b-code-interpreter`."
+            )
+
+    def __del__(self) -> None:
+        """
+        Kill the interpreter when the tool is no longer in use.
+        """
+        if self.interpreter is not None:
+            self.interpreter.kill()
+
+    def _init_interpreter(self, sandbox_files: List[str] = []) -> None:
+        """
+        Lazily initialize the interpreter.
+        """
+        from e2b_code_interpreter import Sandbox
+
+        logger.info(f"Initializing interpreter with {len(sandbox_files)} files")
+        self.interpreter = Sandbox(api_key=self.api_key)
+        if len(sandbox_files) > 0:
+            for file_path in sandbox_files:
+                file_name = os.path.basename(file_path)
+                local_file_path = os.path.join(self.uploaded_files_dir, file_name)
+                with open(local_file_path, "rb") as f:
+                    content = f.read()
+                    if self.interpreter and self.interpreter.files:
+                        self.interpreter.files.write(file_path, content)
+            logger.info(f"Uploaded {len(sandbox_files)} files to sandbox")
+
+    def _process_file_name(self, file_name: str) -> tuple[str, str]:
+        """
+        Process original file name to generate a unique file id and extension.
+        """
+        _id = str(uuid.uuid4())
+        name, extension = os.path.splitext(file_name)
+        extension = extension.lstrip(".")
+        if extension == "":
+            raise ValueError("File name is not valid! It must have an extension.")
+        # sanitize the name
+        name = re.sub(r"[^a-zA-Z0-9.]", "_", name)
+        file_id = f"{name}_{_id}.{extension}"
+        return file_id, extension
+
+    def _get_file_url(self, file_id: str, save_dir: str) -> str:
+        """
+        Get the URL of a file.
+        """
+        # Ensure the path uses forward slashes for URLs
+        url_path = f"{save_dir}/{file_id}".replace("\\", "/")
+        return f"/api/files/{url_path}"
+
+    def _save_file(self, content: bytes, file_name: str, save_dir: str) -> FileMetadata:
+        file_id, extension = self._process_file_name(file_name)
+        file_path = os.path.join(save_dir, file_id)
+
+        # Write the file directly
+        try:
+            os.makedirs(os.path.dirname(file_path), exist_ok=True)
+            with open(file_path, "wb") as f:
+                f.write(content)
+        except PermissionError as e:
+            logger.error(f"Permission denied when writing to file {file_path}: {e!s}")
+            raise
+        except OSError as e:
+            logger.error(f"IO error occurred when writing to file {file_path}: {e!s}")
+            raise
+        except Exception as e:
+            logger.error(f"Unexpected error when writing to file {file_path}: {e!s}")
+            raise
+
+        logger.info(f"Saved file to {file_path}")
+
+        file_size = os.path.getsize(file_path)
+        file_url = self._get_file_url(file_id, save_dir)
+
+        return FileMetadata(
+            id=file_id,
+            type=extension,
+            size=file_size,
+            url=file_url,
+            path=file_path,
+        )
+
+    def _save_to_disk(self, base64_data: str, ext: str) -> FileMetadata:
+        buffer = base64.b64decode(base64_data)
+
+        # Output from e2b doesn't have a name. Create a random name for it.
+        filename = f"e2b_file_{uuid.uuid4()}.{ext}"
+
+        return self._save_file(buffer, file_name=filename, save_dir=self.output_dir)
+
+    def _parse_result(self, result: Any) -> List[InterpreterExtraResult]:
+        """
+        The result could include multiple formats (e.g. png, svg, etc.) but encoded in base64
+        We save each result to disk and return saved file metadata (extension, filename, url).
+        """
+        if not result:
+            return []
+
+        output = []
+
+        try:
+            formats = result.formats()
+            results = [result[format] for format in formats]
+
+            for ext, data in zip(formats, results):
+                if ext in ["png", "svg", "jpeg", "pdf"]:
+                    document_file = self._save_to_disk(data, ext)
+                    output.append(
+                        InterpreterExtraResult(
+                            type=ext,
+                            filename=document_file.id,
+                            url=document_file.url,
+                        )
+                    )
+                else:
+                    # Try serialize data to string
+                    try:
+                        data = str(data)
+                    except Exception as e:
+                        data = f"Error when serializing data: {e}"
+                    output.append(
+                        InterpreterExtraResult(
+                            type=ext,
+                            content=data,
+                        )
+                    )
+        except Exception as error:
+            logger.exception(error, exc_info=True)
+            logger.error("Error when parsing output from E2b interpreter tool", error)
+
+        return output
+
+    def interpret(
+        self,
+        code: str,
+        sandbox_files: List[str] = [],
+        retry_count: int = 0,
+    ) -> E2BToolOutput:
+        """
+        Execute Python code in a Jupyter notebook cell. The tool will return the result, stdout, stderr, display_data, and error.
+        If the code needs to use a file, ALWAYS pass the file path in the sandbox_files argument.
+        You have a maximum of 3 retries to get the code to run successfully.
+
+        Parameters:
+            code (str): The Python code to be executed in a single cell.
+            sandbox_files (List[str]): List of local file paths to be used by the code. The tool will throw an error if a file is not found.
+            retry_count (int): Number of times the tool has been retried.
+        """
+        from e2b_code_interpreter.models import Logs
+
+        if retry_count > 2:
+            return E2BToolOutput(
+                is_error=True,
+                logs=Logs(
+                    stdout="",
+                    stderr="",
+                    display_data="",
+                    error="",
+                ),
+                error_message="Failed to execute the code after 3 retries. Explain the error to the user and suggest a fix.",
+                retry_count=retry_count,
+            )
+
+        if self.interpreter is None:
+            self._init_interpreter(sandbox_files)
+
+        if self.interpreter:
+            logger.info(
+                f"\n{'=' * 50}\n> Running following AI-generated code:\n{code}\n{'=' * 50}"
+            )
+            exec = self.interpreter.run_code(code)
+
+            if exec.error:
+                error_message = f"The code failed to execute successfully. Error: {exec.error}. Try to fix the code and run again."
+                logger.error(error_message)
+                # Calling the generated code caused an error. Kill the interpreter and return the error to the LLM so it can try to fix the error
+                try:
+                    self.interpreter.kill()  # type: ignore
+                except Exception:
+                    pass
+                finally:
+                    self.interpreter = None
+                output = E2BToolOutput(
+                    is_error=True,
+                    logs=exec.logs,
+                    results=[],
+                    error_message=error_message,
+                    retry_count=retry_count + 1,
+                )
+            else:
+                if len(exec.results) == 0:
+                    output = E2BToolOutput(is_error=False, logs=exec.logs, results=[])
+                else:
+                    results = self._parse_result(exec.results[0])
+                    output = E2BToolOutput(
+                        is_error=False,
+                        logs=exec.logs,
+                        results=results,
+                        retry_count=retry_count + 1,
+                    )
+            return output
+        else:
+            raise ValueError("Interpreter is not initialized.")
+
+    def to_tool(self) -> FunctionTool:
+        self._validate_package()
+        return FunctionTool.from_defaults(self.interpret)
@@ -0,0 +1,48 @@
+import os
+from typing import Any, Optional
+
+from llama_index.core.base.base_query_engine import BaseQueryEngine
+from llama_index.core.indices.base import BaseIndex
+from llama_index.core.tools.query_engine import QueryEngineTool
+
+
+def create_query_engine(index: BaseIndex, **kwargs: Any) -> BaseQueryEngine:
+    """
+    Create a query engine for the given index.
+
+    Args:
+        index: The index to create a query engine for.
+        params (optional): Additional parameters for the query engine, e.g: similarity_top_k
+    """
+    top_k = int(os.getenv("TOP_K", 0))
+    if top_k != 0 and kwargs.get("filters") is None:
+        kwargs["similarity_top_k"] = top_k
+
+    return index.as_query_engine(**kwargs)
+
+
+def get_query_engine_tool(
+    index: BaseIndex,
+    name: Optional[str] = None,
+    description: Optional[str] = None,
+    **kwargs: Any,
+) -> QueryEngineTool:
+    """
+    Get a query engine tool for the given index.
+
+    Args:
+        index: The index to create a query engine for.
+        name (optional): The name of the tool.
+        description (optional): The description of the tool.
+    """
+    if name is None:
+        name = "query_index"
+    if description is None:
+        description = "Use this tool to retrieve information from a knowledge base. Provide a specific query and can call the tool multiple times if necessary."
+    query_engine = create_query_engine(index, **kwargs)
+    tool = QueryEngineTool.from_defaults(
+        query_engine=query_engine,
+        name=name,
+        description=description,
+    )
+    return tool
@@ -0,0 +1,50 @@
+from typing import AsyncGenerator, Union
+from llama_index.core.base.llms.types import (
+    CompletionResponse,
+    CompletionResponseAsyncGen,
+    ChatResponse,
+)
+from llama_index.core.workflow import Context
+from llama_index.core.agent.workflow.workflow_events import AgentStream
+
+
+async def write_response_to_stream(
+    res: Union[
+        CompletionResponse,
+        CompletionResponseAsyncGen,
+        AsyncGenerator[ChatResponse, None],
+    ],
+    ctx: Context,
+    current_agent_name: str = "assistant",
+) -> str:
+    """
+    Handle both streaming and non-streaming LLM responses.
+
+    Args:
+        res: The LLM response (either streaming or non-streaming)
+        ctx: The workflow context for writing events to stream
+        current_agent_name: The name of the current agent (default: "assistant")
+
+    Returns:
+        The final response text as a string
+    """
+    final_response = ""
+
+    if isinstance(res, AsyncGenerator):
+        # Handle streaming response (CompletionResponseAsyncGen or ChatResponse AsyncGenerator)
+        async for chunk in res:
+            ctx.write_event_to_stream(
+                AgentStream(
+                    delta=chunk.delta or "",
+                    response=final_response,
+                    current_agent_name=current_agent_name,
+                    tool_calls=[],
+                    raw=getattr(chunk, "raw", None) or "",
+                )
+            )
+            final_response += chunk.delta or ""
+    else:
+        # Handle non-streaming response (CompletionResponse)
+        final_response = res.text
+
+    return final_response
@@ -1,33 +1,41 @@
 import os
-from typing import List, Optional
+from typing import Optional
+from dotenv import load_dotenv
+

-from app.index import get_index
 from llama_index.core import Settings
 from llama_index.core.base.llms.types import ChatMessage, MessageRole
 from llama_index.core.llms.function_calling import FunctionCallingLLM
 from llama_index.core.memory import ChatMemoryBuffer
-from llama_index.core.tools import FunctionTool, QueryEngineTool, ToolSelection
+from llama_index.core.tools import FunctionTool, QueryEngineTool
 from llama_index.core.workflow import (
    Context,
-    Event,
    StartEvent,
    StopEvent,
    Workflow,
    step,
 )
-from llama_index.server.api.models import AgentRunEvent, ChatRequest
-from llama_index.server.settings import server_settings
-from llama_index.server.tools.document_generator import DocumentGenerator
-from llama_index.server.tools.index import get_query_engine_tool
-from llama_index.server.tools.interpreter import E2BCodeInterpreter
-from llama_index.server.utils.agent_tool import (
-    call_tools,
-    chat_with_tools,
+
+from src.index import get_index
+from src.settings import init_settings
+from src.query import get_query_engine_tool
+from src.document_generator import DocumentGenerator
+from src.interpreter import E2BCodeInterpreter
+from src.events import (
+    InputEvent,
+    ResearchEvent,
+    AnalyzeEvent,
+    ReportEvent,
+    AgentRunEvent,
 )
+from src.agent_tool import call_tools, chat_with_tools
+from src.utils import write_response_to_stream


-def create_workflow(chat_request: Optional[ChatRequest] = None) -> Workflow:
-    index = get_index(chat_request=chat_request)
+def create_workflow() -> Workflow:
+    load_dotenv()
+    init_settings()
+    index = get_index()
    if index is None:
        raise ValueError(
            "Index is not found. Try run generation script to create the index first."
@@ -39,9 +47,7 @@ def create_workflow(chat_request: Optional[ChatRequest] = None) -> Workflow:
            "E2B_API_KEY is required to use the code interpreter tool. Please check README.md to know how to get the key."
        )
    code_interpreter_tool = E2BCodeInterpreter(api_key=e2b_api_key).to_tool()
-    document_generator_tool = DocumentGenerator(
-        file_server_url_prefix=server_settings.file_server_url_prefix,
-    ).to_tool()
+    document_generator_tool = DocumentGenerator().to_tool()

    return FinancialReportWorkflow(
        query_engine_tool=query_engine_tool,
@@ -51,23 +57,6 @@ def create_workflow(chat_request: Optional[ChatRequest] = None) -> Workflow:
    )


-class InputEvent(Event):
-    input: List[ChatMessage]
-    response: bool = False
-
-
-class ResearchEvent(Event):
-    input: list[ToolSelection]
-
-
-class AnalyzeEvent(Event):
-    input: list[ToolSelection] | ChatMessage
-
-
-class ReportEvent(Event):
-    input: list[ToolSelection]
-
-
 class FinancialReportWorkflow(Workflow):
    """
    A workflow to generate a financial report using indexed documents.
@@ -129,10 +118,14 @@ class FinancialReportWorkflow(Workflow):
    async def prepare_chat_history(self, ctx: Context, ev: StartEvent) -> InputEvent:
        self.stream = ev.get("stream", True)
        user_msg = ev.get("user_msg")
-        chat_history = ev.get("chat_history")
-
-        if chat_history is not None:
-            self.memory.put_messages(chat_history)
+        messages = [
+            ChatMessage(
+                role=msg.get("role", "user"),
+                content=msg.get("content", ""),
+            )
+            for msg in ev.get("chat_history", [])
+        ]
+        self.memory.put_messages(messages)

        # Add user message to memory
        self.memory.put(ChatMessage(role=MessageRole.USER, content=user_msg))
@@ -164,7 +157,8 @@ class FinancialReportWorkflow(Workflow):
        )
        if not response.has_tool_calls():
            if self.stream:
-                return StopEvent(result=response.generator)
+                final_response = await write_response_to_stream(response.generator, ctx)
+                return StopEvent(result=final_response)
            else:
                return StopEvent(result=await response.full_response())
        # calling different tools at the same time is not supported at the moment
@@ -331,3 +325,6 @@ class FinancialReportWorkflow(Workflow):
            )
        # After the tool calls, fallback to the input with the latest chat history
        return InputEvent(input=self.memory.get())
+
+
+workflow = create_workflow()
@@ -1,109 +0,0 @@
-This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/).
-
-## Getting Started
-
-First, setup the environment with uv:
-
-> **_Note:_** This step is not needed if you are using the dev-container.
-
-```shell
-uv sync
-```
-
-Then check the parameters that have been pre-configured in the `.env` file in this directory.
-Make sure you have set the `OPENAI_API_KEY` for the LLM.
-
-Then, run the development server:
-
-```shell
-uv run fastapi dev
-```
-
-Then open [http://localhost:8000](http://localhost:8000) with your browser to start the chat UI.
-
-To start the app optimized for **production**, run:
-
-```
-uv run fastapi run
-```
-
-## Configure LLM and Embedding Model
-
-You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [settings.py](app/settings.py).
-
-## Use Case
-
-This example shows how to use the LlamaIndexServer with a human in the loop. It allows you to start CLI commands that are reviewed by a human before execution.
-
-To update the workflow, you can modify the code in [`workflow.py`](app/workflow.py).
-
-You can start by sending an request on the [chat UI](http://localhost:8000) or you can test the `/api/chat` endpoint with the following curl request:
-
-```
-curl --location 'localhost:8000/api/chat' \
--header 'Content-Type: application/json' \
--data '{ "messages": [{ "role": "user", "content": "Show me the files in the current directory" }] }'
-```
-
-## How does HITL work?
-
-### Events
-
-The human-in-the-loop approach used here is based on a simple idea: the workflow pauses and waits for a human response before proceeding to the next step.
-
-To do this, you will need to implement two custom events:
-
- [HumanInputEvent](https://github.com/run-llama/create-llama/blob/main/packages/server/src/utils/hitl/events.ts): This event is used to request input from the user.
- [HumanResponseEvent](https://github.com/run-llama/create-llama/blob/main/packages/server/src/utils/hitl/events.ts): This event is sent to the workflow to resume execution with input from the user.
-
-In this example, we have implemented these two custom events in [`events.ts`](src/app/events.ts):
-
- `cliHumanInputEvent` – to request input from the user for CLI command execution.
- `cliHumanResponseEvent` – to resume the workflow with the response from the user.
-
-```typescript
-export const cliHumanInputEvent = humanInputEvent<{
-  type: "cli_human_input";
-  data: { command: string };
-  response: typeof cliHumanResponseEvent;
-}>();
-
-export const cliHumanResponseEvent = humanResponseEvent<{
-  type: "human_response";
-  data: { execute: boolean; command: string };
-}>();
-```
-
-### UI Component
-
-HITL also needs a custom UI component, that is shown when the LlamaIndexServer receives the `cliHumanInputEvent`. The name of the component is defined in the `type` field of the `cliHumanInputEvent` - in our case, it is `cli_human_input`, which corresponds to the [cli_human_input.tsx](./components/cli_human_input.tsx) component.
-
-The custom component must use `append` to send a message with a `human_response` annotation. The data of the annotation must be in the format of the response event `cliHumanResponseEvent`, in our case, for sending to execute the command `ls -l`, we would send:
-
-```tsx
-append({
-  content: "Yes",
-  role: "user",
-  annotations: [
-    {
-      type: "human_response",
-      data: {
-        execute: true,
-        command: "ls -l", // The command to execute
-      },
-    },
-  ],
-});
-```
-
-This component displays the command to execute and the user can choose to execute or cancel the command execution.
-
-## Learn More
-
-To learn more about LlamaIndex, take a look at the following resources:
-
- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex.
- [Workflows Introduction](https://docs.llamaindex.ai/en/stable/understanding/workflows/) - learn about LlamaIndex workflows.
- [LlamaIndex Server](https://pypi.org/project/llama-index-server/)
-
-You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
@@ -1,34 +0,0 @@
-from typing import Type
-
-from pydantic import BaseModel, Field
-
-from llama_index.server.models import HumanInputEvent, HumanResponseEvent
-
-
-class CLIHumanResponseEvent(HumanResponseEvent):
-    execute: bool = Field(
-        description="True if the human wants to execute the command, False otherwise."
-    )
-    command: str = Field(description="The command to execute.")
-
-
-class CLICommand(BaseModel):
-    command: str = Field(description="The command to execute.")
-
-
-# We need an event that extends from HumanInputEvent for HITL feature
-class CLIHumanInputEvent(HumanInputEvent):
-    """
-    CLIInputRequiredEvent is sent when the agent needs permission from the user to execute the CLI command or not.
-    Render this event by showing the command and a boolean button to execute the command or not.
-    """
-
-    event_type: str = (
-        "cli_human_input"  # used by UI to render with appropriate component
-    )
-    response_event_type: Type = (
-        CLIHumanResponseEvent  # used by workflow to resume with the correct event
-    )
-    data: CLICommand = Field(  # the data that sent to the UI for rendering
-        description="The command to execute.",
-    )
@@ -1,87 +0,0 @@
-import platform
-import subprocess
-from typing import Any
-
-from app.events import CLICommand, CLIHumanInputEvent, CLIHumanResponseEvent
-
-from llama_index.core.prompts import PromptTemplate
-from llama_index.core.settings import Settings
-from llama_index.core.workflow import (
-    Context,
-    StartEvent,
-    StopEvent,
-    Workflow,
-    step,
-)
-
-
-def create_workflow() -> Workflow:
-    return CLIWorkflow()
-
-
-class CLIWorkflow(Workflow):
-    """
-    A workflow has ability to execute command line tool with human in the loop for confirmation.
-    """
-
-    default_prompt = PromptTemplate(
-        template="""
-        You are a helpful assistant who can write CLI commands to execute using {cli_language}.
-        Your task is to analyze the user's request and write a CLI command to execute.
-
-        ## User Request
-        {user_request}
-
-        Don't be verbose, only respond with the CLI command without any other text.
-        """
-    )
-
-    def __init__(self, **kwargs: Any) -> None:
-        # HITL Workflow should disable timeout otherwise, we will get a timeout error from callback
-        kwargs["timeout"] = None
-        super().__init__(**kwargs)
-
-    @step
-    async def start(self, ctx: Context, ev: StartEvent) -> CLIHumanInputEvent:
-        user_msg = ev.user_msg
-        if user_msg is None:
-            raise ValueError("Missing user_msg in StartEvent")
-        await ctx.set("user_msg", user_msg)
-        # Request LLM to generate a CLI command
-        os_name = platform.system()
-        if os_name == "Linux" or os_name == "Darwin":
-            cli_language = "bash"
-        else:
-            cli_language = "cmd"
-        prompt = self.default_prompt.format(
-            user_request=user_msg, cli_language=cli_language
-        )
-        llm = Settings.llm
-        if llm is None:
-            raise ValueError("Missing LLM in Settings")
-        response = await llm.acomplete(prompt, formatted=True)
-        command = response.text.strip()
-        if command == "":
-            raise ValueError("Couldn't generate a command")
-        # Send the command to the user for confirmation
-        await ctx.set("command", command)
-        return CLIHumanInputEvent(  # type: ignore
-            data=CLICommand(command=command),
-            response_event_type=CLIHumanResponseEvent,
-        )
-
-    @step
-    async def handle_human_response(
-        self,
-        ctx: Context,
-        ev: CLIHumanResponseEvent,  # This event is sent by LlamaIndexServer when user response
-    ) -> StopEvent:
-        # If we have human response, check the confirmation and execute the command
-        if ev.execute:
-            command = ev.command or ""
-            if command == "":
-                raise ValueError("Missing command in CLIExecutionEvent")
-            res = subprocess.run(command, shell=True, capture_output=True, text=True)
-            return StopEvent(result=res.stdout or res.stderr)
-        else:
-            return StopEvent(result=None)
@@ -42,8 +42,8 @@ The human-in-the-loop approach used here is based on a simple idea: the workflow

 To do this, you will need to implement two custom events:

- [HumanInputEvent](https://github.com/run-llama/create-llama/blob/main/packages/server/src/utils/hitl/events.ts): This event is used to request input from the user.
- [HumanResponseEvent](https://github.com/run-llama/create-llama/blob/main/packages/server/src/utils/hitl/events.ts): This event is sent to the workflow to resume execution with input from the user.
+- [HumanInputEvent](https://github.com/run-llama/chat-ui/tree/main/packages/server/src/utils/hitl/events.ts): This event is used to request input from the user.
+- [HumanResponseEvent](https://github.com/run-llama/chat-ui/tree/main/packages/server/src/utils/hitl/events.ts): This event is sent to the workflow to resume execution with input from the user.

 In this example, we have implemented these two custom events in [`events.ts`](src/app/events.ts):

@@ -6,12 +6,12 @@ load_dotenv()

 import logging

-from app.index import get_index
-from app.settings import init_settings
-from llama_index.server.services.llamacloud.generate import (
-    load_to_llamacloud,
-)
+from llama_index.core.readers import SimpleDirectoryReader
+from tqdm import tqdm

+from src.index import get_index
+from src.service import LLamaCloudFileService
+from src.settings import init_settings

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger()
@@ -25,29 +25,41 @@ def generate_index():
    if index is None:
        raise ValueError("Index not found and could not be created")

-    load_to_llamacloud(index, logger=logger)
+    # use SimpleDirectoryReader to retrieve the files to process
+    reader = SimpleDirectoryReader(
+        "ui/data",
+        recursive=True,
+    )
+    files_to_process = reader.input_files
+
+    # add each file to the LlamaCloud pipeline
+    error_files = []
+    for input_file in tqdm(
+        files_to_process,
+        desc="Processing files",
+        unit="file",
+    ):
+        with open(input_file, "rb") as f:
+            logger.debug(
+                f"Adding file {input_file} to pipeline {index.name} in project {index.project_name}"
+            )
+            try:
+                LLamaCloudFileService.add_file_to_pipeline(
+                    index.project.id,
+                    index.pipeline.id,
+                    f,
+                    custom_metadata={},
+                    wait_for_processing=False,
+                )
+            except Exception as e:
+                error_files.append(input_file)
+                logger.error(f"Error adding file {input_file}: {e}")
+
+    if error_files:
+        logger.error(f"Failed to add the following files: {error_files}")
+
+    logger.info("Finished generating the index")


-def generate_ui_for_workflow():
-    """
-    Generate UI for UIEventData event in app/workflow.py
-    """
-    import asyncio
-    from llama_index.llms.openai import OpenAI
-    from main import COMPONENT_DIR
-
-    # To generate UI components for additional event types,
-    # import the corresponding data model (e.g., MyCustomEventData)
-    # and run the generate_ui_for_workflow function with the imported model.
-    # Make sure the output filename of the generated UI component matches the event type (here `ui_event`)
-    try:
-        from app.workflow import UIEventData  # type: ignore
-    except ImportError:
-        raise ImportError("Couldn't generate UI component for the current workflow.")
-    from llama_index.server.gen_ui import generate_event_component
-
-    # works also well with Claude 3.7 Sonnet or Gemini Pro 2.5
-    llm = OpenAI(model="gpt-4.1")
-    code = asyncio.run(generate_event_component(event_cls=UIEventData, llm=llm))
-    with open(f"{COMPONENT_DIR}/ui_event.jsx", "w") as f:
-        f.write(code)
+if __name__ == "__main__":
+    generate_index()
@@ -1,7 +1,146 @@
-from llama_index.server.services.llamacloud import (
-    LlamaCloudIndex,
-    get_client,
-    get_index,
-)
+import logging
+import os
+from typing import Optional

-__all__ = ["LlamaCloudIndex", "get_client", "get_index"]
+from llama_cloud import PipelineType
+from llama_index.core.callbacks import CallbackManager
+from llama_index.core.ingestion.api_utils import (
+    get_client as llama_cloud_get_client,
+)
+from llama_index.core.settings import Settings
+from llama_index.indices.managed.llama_cloud import LlamaCloudIndex
+from pydantic import BaseModel, Field, field_validator
+
+logger = logging.getLogger("uvicorn")
+
+
+class LlamaCloudConfig(BaseModel):
+    # Private attributes
+    api_key: str = Field(
+        exclude=True,  # Exclude from the model representation
+    )
+    base_url: Optional[str] = Field(
+        exclude=True,
+    )
+    organization_id: Optional[str] = Field(
+        exclude=True,
+    )
+    # Configuration attributes, can be set by the user
+    pipeline: str = Field(
+        description="The name of the pipeline to use",
+    )
+    project: str = Field(
+        description="The name of the LlamaCloud project",
+    )
+
+    def __init__(self, **kwargs):
+        if "api_key" not in kwargs:
+            kwargs["api_key"] = os.getenv("LLAMA_CLOUD_API_KEY")
+        if "base_url" not in kwargs:
+            kwargs["base_url"] = os.getenv("LLAMA_CLOUD_BASE_URL")
+        if "organization_id" not in kwargs:
+            kwargs["organization_id"] = os.getenv("LLAMA_CLOUD_ORGANIZATION_ID")
+        if "pipeline" not in kwargs:
+            kwargs["pipeline"] = os.getenv("LLAMA_CLOUD_INDEX_NAME")
+        if "project" not in kwargs:
+            kwargs["project"] = os.getenv("LLAMA_CLOUD_PROJECT_NAME")
+        super().__init__(**kwargs)
+
+    # Validate and throw error if the env variables are not set before starting the app
+    @field_validator("pipeline", "project", "api_key", mode="before")
+    @classmethod
+    def validate_fields(cls, value):
+        if value is None:
+            raise ValueError(
+                "Please set LLAMA_CLOUD_INDEX_NAME, LLAMA_CLOUD_PROJECT_NAME and LLAMA_CLOUD_API_KEY"
+                " to your environment variables or config them in .env file"
+            )
+        return value
+
+    def to_client_kwargs(self) -> dict:
+        return {
+            "api_key": self.api_key,
+            "base_url": self.base_url,
+        }
+
+
+class IndexConfig(BaseModel):
+    llama_cloud_pipeline_config: LlamaCloudConfig = Field(
+        default_factory=lambda: LlamaCloudConfig(),
+        alias="llamaCloudPipeline",
+    )
+    callback_manager: Optional[CallbackManager] = Field(
+        default=None,
+    )
+
+    def to_index_kwargs(self) -> dict:
+        return {
+            "name": self.llama_cloud_pipeline_config.pipeline,
+            "project_name": self.llama_cloud_pipeline_config.project,
+            "api_key": self.llama_cloud_pipeline_config.api_key,
+            "base_url": self.llama_cloud_pipeline_config.base_url,
+            "organization_id": self.llama_cloud_pipeline_config.organization_id,
+            "callback_manager": self.callback_manager,
+        }
+
+
+def get_index(
+    config: IndexConfig = None,
+    create_if_missing: bool = False,
+):
+    if config is None:
+        config = IndexConfig()
+    # Check whether the index exists
+    try:
+        index = LlamaCloudIndex(**config.to_index_kwargs())
+        return index
+    except ValueError:
+        logger.warning("Index not found")
+        if create_if_missing:
+            logger.info("Creating index")
+            _create_index(config)
+            return LlamaCloudIndex(**config.to_index_kwargs())
+        return None
+
+
+def get_client():
+    config = LlamaCloudConfig()
+    return llama_cloud_get_client(**config.to_client_kwargs())
+
+
+def _create_index(
+    config: IndexConfig,
+):
+    client = get_client()
+    pipeline_name = config.llama_cloud_pipeline_config.pipeline
+
+    pipelines = client.pipelines.search_pipelines(
+        pipeline_name=pipeline_name,
+        pipeline_type=PipelineType.MANAGED.value,
+    )
+    if len(pipelines) == 0:
+        from llama_index.embeddings.openai import OpenAIEmbedding
+
+        if not isinstance(Settings.embed_model, OpenAIEmbedding):
+            raise ValueError(
+                "Creating a new pipeline with a non-OpenAI embedding model is not supported."
+            )
+        client.pipelines.upsert_pipeline(
+            request={
+                "name": pipeline_name,
+                "embedding_config": {
+                    "type": "OPENAI_EMBEDDING",
+                    "component": {
+                        "api_key": os.getenv("OPENAI_API_KEY"),  # editable
+                        "model_name": os.getenv("EMBEDDING_MODEL"),
+                    },
+                },
+                "transform_config": {
+                    "mode": "auto",
+                    "config": {
+                        "chunk_size": Settings.chunk_size,  # editable
+                        "chunk_overlap": Settings.chunk_overlap,  # editable
+                    },
+                },
+            },
+        )
@@ -0,0 +1,73 @@
+import logging
+import time
+import typing
+from io import BytesIO
+from typing import Dict, Optional, Tuple, Union
+
+from llama_cloud import ManagedIngestionStatus, PipelineFileCreateCustomMetadataValue
+from pydantic import BaseModel
+
+from src.index import get_client
+
+logger = logging.getLogger("uvicorn")
+
+
+class LlamaCloudFile(BaseModel):
+    file_name: str
+    pipeline_id: str
+
+    def __eq__(self, other):
+        if not isinstance(other, LlamaCloudFile):
+            return NotImplemented
+        return (
+            self.file_name == other.file_name and self.pipeline_id == other.pipeline_id
+        )
+
+    def __hash__(self):
+        return hash((self.file_name, self.pipeline_id))
+
+
+class LLamaCloudFileService:
+    LOCAL_STORE_PATH = "output/llamacloud"
+    DOWNLOAD_FILE_NAME_TPL = "{pipeline_id}${filename}"
+
+    @classmethod
+    def add_file_to_pipeline(
+        cls,
+        project_id: str,
+        pipeline_id: str,
+        upload_file: Union[typing.IO, Tuple[str, BytesIO]],
+        custom_metadata: Optional[Dict[str, PipelineFileCreateCustomMetadataValue]],
+        wait_for_processing: bool = True,
+    ) -> str:
+        client = get_client()
+        file = client.files.upload_file(project_id=project_id, upload_file=upload_file)
+        file_id = file.id
+        files = [
+            {
+                "file_id": file_id,
+                "custom_metadata": {"file_id": file_id, **(custom_metadata or {})},
+            }
+        ]
+        files = client.pipelines.add_files_to_pipeline_api(pipeline_id, request=files)
+
+        if not wait_for_processing:
+            return file_id
+
+        # Wait 2s for the file to be processed
+        max_attempts = 20
+        attempt = 0
+        while attempt < max_attempts:
+            result = client.pipelines.get_pipeline_file_status(
+                file_id=file_id, pipeline_id=pipeline_id
+            )
+            if result.status == ManagedIngestionStatus.ERROR:
+                raise Exception(f"File processing failed: {str(result)}")
+            if result.status == ManagedIngestionStatus.SUCCESS:
+                # File is ingested - return the file id
+                return file_id
+            attempt += 1
+            time.sleep(0.1)  # Sleep for 100ms
+        raise Exception(
+            f"File processing did not complete after {max_attempts} attempts."
+        )
@@ -1,68 +0,0 @@
-import logging
-import os
-
-from dotenv import load_dotenv
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger()
-
-
-def generate_index():
-    """
-    Index the documents in the data directory.
-    """
-    from app.index import STORAGE_DIR
-    from app.settings import init_settings
-    from llama_index.core.indices import (
-        VectorStoreIndex,
-    )
-    from llama_index.core.readers import SimpleDirectoryReader
-
-    load_dotenv()
-    init_settings()
-
-    logger.info("Creating new index")
-    # load the documents and create the index
-    reader = SimpleDirectoryReader(
-        os.environ.get("DATA_DIR", "data"),
-        recursive=True,
-    )
-    documents = reader.load_data()
-    index = VectorStoreIndex.from_documents(
-        documents,
-        show_progress=True,
-    )
-    # store it for later
-    index.storage_context.persist(STORAGE_DIR)
-    logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}")
-
-
-def generate_ui_for_workflow():
-    """
-    Generate UI for UIEventData event in app/workflow.py
-    """
-    import asyncio
-
-    from app.settings import init_settings
-    from llama_index.core.settings import Settings
-    from main import COMPONENT_DIR
-
-    load_dotenv()
-    init_settings()
-
-    # To generate UI components for additional event types,
-    # import the corresponding data model (e.g., MyCustomEventData)
-    # and run the generate_ui_for_workflow function with the imported model.
-    # Make sure the output filename of the generated UI component matches the event type (here `ui_event`)
-    try:
-        from app.workflow import UIEventData  # type: ignore
-    except ImportError:
-        raise ImportError("Couldn't generate UI component for the current workflow.")
-    from llama_index.server.gen_ui import generate_event_component
-
-    # works well with OpenAI gpt-4.1, Claude 3.7 Sonnet or Gemini Pro 2.5
-    code = asyncio.run(
-        generate_event_component(event_cls=UIEventData, llm=Settings.llm)
-    )
-    with open(f"{COMPONENT_DIR}/ui_event.jsx", "w") as f:
-        f.write(code)
@@ -0,0 +1,24 @@
+name: chat
+
+control-plane:
+  port: 8000
+
+default-service: workflow
+
+services:
+  workflow:
+    name: Workflow
+    source:
+      type: local
+      name: src
+    path: src/workflow:workflow
+    python-dependencies:
+      - llama-index-llms-openai>=0.4.5
+      - llama-index-core>=0.12.45
+
+ui:
+  name: My Nextjs App
+  port: 3000
+  source:
+    type: local
+    name: ui
@@ -1,32 +0,0 @@
-import logging
-
-from app.settings import init_settings
-from app.workflow import create_workflow
-from dotenv import load_dotenv
-from llama_index.server import LlamaIndexServer, UIConfig
-
-logger = logging.getLogger("uvicorn")
-
-# A path to a directory where the customized UI code is stored
-COMPONENT_DIR = "components"
-
-
-def create_app():
-    app = LlamaIndexServer(
-        workflow_factory=create_workflow,  # A factory function that creates a new workflow for each request
-        ui_config=UIConfig(
-            component_dir=COMPONENT_DIR,
-            dev_mode=True,  # Please disable this in production
-            layout_dir="layout",
-        ),
-        logger=logger,
-        env="dev",
-    )
-    # You can also add custom FastAPI routes to app
-    app.add_api_route("/api/health", lambda: {"message": "OK"}, status_code=200)
-    return app
-
-
-load_dotenv()
-init_settings()
-app = create_app()
@@ -9,12 +9,17 @@ readme = "README.md"
 requires-python = ">=3.11,<3.14"
 dependencies = [
    "python-dotenv>=1.0.0,<2.0.0",
-    "pydantic<2.10",
+    "pydantic>=2.11.5",
    "aiostream>=0.5.2,<0.6.0",
    "llama-index-core>=0.12.28,<0.13.0",
-    "llama-index-server>=0.1.17,<0.2.0",
+    "llama-index-readers-file>=0.4.6,<1.0.0",
+    "llama-index-indices-managed-llama-cloud>=0.6.3,<1.0.0",
+    "llama-deploy",
 ]

+[tool.uv.sources]
+llama-deploy = { git = "https://github.com/run-llama/llama_deploy" }
+
 [project.optional-dependencies]
 dev = [
    "mypy>=1.8.0,<2.0.0",
@@ -23,9 +28,7 @@ dev = [
 ]

 [project.scripts]
-generate = "generate:generate_index"
-generate_index = "generate:generate_index"
-generate_ui = "generate:generate_ui_for_workflow"
+generate = "src.generate:generate_index"


 [tool.mypy]
@@ -43,7 +46,7 @@ strict_optional = false
 disable_error_code = [ "return-value", "assignment" ]

 [[tool.mypy.overrides]]
-module = "app.*"
+module = "src.*"
 ignore_missing_imports = false

 [tool.hatch.metadata]
@@ -51,4 +54,7 @@ allow-direct-references = true

 [build-system]
 requires = [ "hatchling>=1.24" ]
-build-backend = "hatchling.build"
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src"]
@@ -0,0 +1,37 @@
+import logging
+import os
+
+from dotenv import load_dotenv
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger()
+
+
+def generate_index():
+    """
+    Index the documents in the data directory.
+    """
+    from src.index import STORAGE_DIR
+    from src.settings import init_settings
+    from llama_index.core.indices import (
+        VectorStoreIndex,
+    )
+    from llama_index.core.readers import SimpleDirectoryReader
+
+    load_dotenv()
+    init_settings()
+
+    logger.info("Creating new index")
+    # load the documents and create the index
+    reader = SimpleDirectoryReader(
+        os.environ.get("DATA_DIR", "ui/data"),
+        recursive=True,
+    )
+    documents = reader.load_data()
+    index = VectorStoreIndex.from_documents(
+        documents,
+        show_progress=True,
+    )
+    # store it for later
+    index.storage_context.persist(STORAGE_DIR)
+    logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}")
@@ -1,23 +1,21 @@
 import logging
 import os
-from typing import Optional

 from llama_index.core.indices import load_index_from_storage
-from llama_index.server.api.models import ChatRequest
-from llama_index.server.tools.index.utils import get_storage_context
+from llama_index.core.storage import StorageContext

 logger = logging.getLogger("uvicorn")

-STORAGE_DIR = "storage"
+STORAGE_DIR = "src/storage"


-def get_index(chat_request: Optional[ChatRequest] = None):
+def get_index():
    # check if storage already exists
    if not os.path.exists(STORAGE_DIR):
        return None
    # load the existing index
    logger.info(f"Loading index from {STORAGE_DIR}...")
-    storage_context = get_storage_context(STORAGE_DIR)
+    storage_context = StorageContext.from_defaults(persist_dir=STORAGE_DIR)
    index = load_index_from_storage(storage_context)
    logger.info(f"Finished loading index from {STORAGE_DIR}")
    return index
@@ -11,7 +11,7 @@
  },
  "dependencies": {
    "@llamaindex/openai": "~0.4.0",
-    "@llamaindex/server": "~0.2.1",
+    "@llamaindex/server": "^0.3.0",
    "@llamaindex/workflow": "~1.1.8",
    "@llamaindex/tools": "~0.1.2",
    "llamaindex": "~0.11.0",
@@ -0,0 +1,17 @@
+import { LlamaIndexServer } from "@llamaindex/server";
+import "dotenv/config";
+import { initSettings } from "./app/settings";
+import { workflowFactory } from "./app/workflow";
+
+initSettings();
+
+new LlamaIndexServer({
+  workflow: workflowFactory,
+  uiConfig: {
+    componentsDir: "components",
+    devMode: true,
+  },
+  llamaCloud: {
+    outputDir: "output/llamacloud",
+  },
+}).start();
@@ -1,8 +0,0 @@
-# server contains Nextjs frontend code (not compiled)
-server/
-
-# the ejected nextjs project
-project/
-
-# temp is the copy of next folder but without API folder, used to build frontend static files
-temp/
@@ -1,194 +0,0 @@
-# @llamaindex/server
-
-## 0.2.9
-
-### Patch Changes
-
- 52cc37f: feat: flag to enable useChatWorkflow
- 952b5b4: fix: peer deps and sourcemap issues made ts server start fail
-
-## 0.2.8
-
-### Patch Changes
-
- e2486eb: feat: support human in the loop for TS
-
-## 0.2.7
-
-### Patch Changes
-
- af9ad3c: feat: show document artifact after generating report
- a543a27: feat: bump chat-ui with inline artifact
- 1ff6eaf: Add support for chat upload file
-
-## 0.2.6
-
-### Patch Changes
-
- 3ff0a18: fix: default header padding
- df10474: fix: missing cursor pointer for button
- 087c961: Support zod and chat-ui hooks for custom components
-
-## 0.2.5
-
-### Patch Changes
-
- 058b376: Fix generate script for ejected project
-
-## 0.2.4
-
-### Patch Changes
-
- 5fe9e17: support eject to fully customize next folder
- b8a1ff6: Bump version: chat-ui@0.4.6
-
-## 0.2.3
-
-### Patch Changes
-
- eee3230: feat: support custom layout
- 0bc5a0d: Add suggestNextQuestions config
- 3acec88: chore: bump chat-ui
-
-## 0.2.2
-
-### Patch Changes
-
- 25fba43: refactor: migrate to Nextjs Route Handler
- 6f75d4a: fix: unsupported language in code gen workflow
-
-## 0.2.1
-
-### Patch Changes
-
- f072308: feat: add dev mode UI
-
-## 0.2.0
-
-### Minor Changes
-
- 0384268: Use the new workflow engine and deprecate the old one.
-
-### Patch Changes
-
- d9f9e3c: chore: bump chat-ui to support code editor & document editor
-
-## 0.1.7
-
-### Patch Changes
-
- 8fe5fc2: chore: add llamaindex server package
-
-## 0.1.6
-
-### Patch Changes
-
- 82d4b46: feat: re-add supports for artifacts
-
-## 0.1.5
-
-### Patch Changes
-
- 7ca9ddf: Add generate ui workflow to @llamaindex/server
- 3310eaa: chore: bump chat-ui
-  - llamaindex@0.10.2
-
-## 0.1.4
-
-### Patch Changes
-
- llamaindex@0.10.1
-
-## 0.1.3
-
-### Patch Changes
-
- edb8b87: fix: shadcn components cannot be used in next server
- Updated dependencies [6cf928f]
-  - llamaindex@0.10.0
-
-## 0.1.2
-
-### Patch Changes
-
- bb34ade: feat: support cn utils for server UI
-  - llamaindex@0.9.19
-
-## 0.1.1
-
-### Patch Changes
-
- 400b3b5: feat: use full-source code with import statements for custom comps
-  - llamaindex@0.9.18
-
-## 0.1.0
-
-### Minor Changes
-
- 3ffee26: feat: enhance config params for LlamaIndexServer
-
-## 0.0.9
-
-### Patch Changes
-
- 0b75bd6: feat: component dir in llamaindex server
-
-## 0.0.8
-
-### Patch Changes
-
- Updated dependencies [3534c37]
-  - llamaindex@0.9.17
-
-## 0.0.7
-
-### Patch Changes
-
- 4999df1: bump nextjs
- Updated dependencies [f5e4d09]
-  - llamaindex@0.9.16
-
-## 0.0.6
-
-### Patch Changes
-
- 8c02684: fix: handle stream error
- c515a32: feat: return raw output for agent toolcall result
-  - llamaindex@0.9.15
-
-## 0.0.5
-
-### Patch Changes
-
- 9d951b2: feat: support llamacloud in @llamaindex/server
- Updated dependencies [9d951b2]
-  - llamaindex@0.9.14
-
-## 0.0.4
-
-### Patch Changes
-
- 164cf7a: fix: custom next server start fail
-
-## 0.0.3
-
-### Patch Changes
-
- 299008b: feat: copy create-llama to @llamaindex/servers
- 75d6e29: feat: response source nodes in query tool output
- Updated dependencies [75d6e29]
-  - llamaindex@0.9.13
-
-## 0.0.2
-
-### Patch Changes
-
- f8a86e4: feat: @llamaindex/server
- Updated dependencies [21bebfc]
- Updated dependencies [93bc0ff]
- Updated dependencies [91a18e7]
- Updated dependencies [f8a86e4]
- Updated dependencies [5189b44]
- Updated dependencies [58a9446]
-  - @llamaindex/core@0.6.0
-  - @llamaindex/workflow@1.0.0
@@ -1,160 +0,0 @@
-# @llamaindex/server Package
-
-This package provides a Next.js-based server framework for running LlamaIndex workflows with both API endpoints and a chat UI interface.
-
-## Overview
-
-The `@llamaindex/server` package (`src/`) allows you to quickly launch LlamaIndex Workflows and Agent Workflows as an API server with an optional sophisticated chat UI. It combines a backend API server with a frontend React interface built on Next.js.
-
-## Key Components
-
-### Core Server (src/server.ts)
-
- **LlamaIndexServer class**: Main server implementation that wraps Next.js
- Handles workflow factory initialization and UI configuration
- Manages custom components and layout directories
- Creates HTTP server with custom routing for chat API
- Automatically configures client-side config in `public/config.js`
-
-### Chat Handler (src/handlers/chat.ts)
-
- **handleChat function**: Processes POST requests to `/api/chat`
- Converts AI SDK messages to LlamaIndex format
- Manages workflow execution with abort signals
- Streams responses back to client with optional question suggestions
- Handles errors and validation
-
-### Workflow Management (src/utils/workflow.ts)
-
- **runWorkflow function**: Executes workflows with proper event handling
- Transforms workflow events (tool calls, source nodes) into UI-friendly formats
- Downloads LlamaCloud files automatically in background
- Processes agent events and source annotations
-
-### Event System (src/events.ts)
-
- **Source Events**: For displaying document/file sources with metadata
- **Agent Events**: For showing agent tool usage and progress
- **Artifact Events**: For structured data like code/documents sent to Canvas UI
- Helper functions for converting LlamaIndex data to UI events
-
-### UI Generation (src/utils/gen-ui.ts)
-
- **generateEventComponent function**: Uses LLM to auto-generate React components
- Creates workflow for UI planning, aggregation, and code generation
- Validates generated components against supported dependencies
- Supports shadcn/ui, lucide-react, tailwind CSS, and LlamaIndex chat-ui
-
-### Types (src/types.ts)
-
- **WorkflowFactory**: Function signature for creating workflow instances
- **UIConfig**: Configuration options for chat interface
- **LlamaIndexServerOptions**: Main server configuration interface
-
-## Next.js Frontend
-
-The `next/` directory contains the React frontend:
-
-### API Routes
-
- `/api/chat/route.ts`: Main chat endpoint (delegates to handleChat)
- `/api/components/route.ts`: Serves custom UI components
- `/api/layout/route.ts`: Serves custom layout components
- `/api/files/[...slug]/route.ts`: File serving for data/output folders
-
-### UI Components
-
- Chat interface with message history, streaming responses, and canvas panel
- Extensible component system for custom workflow events
- Custom layout support for headers/footers
- Built with shadcn/ui components and Tailwind CSS
-
-## Build Process
-
-### Development
-
-```bash
-pnpm dev  # Watch mode with bunchee
-```
-
-### Production Build
-
-```bash
-pnpm build  # Multi-step build process
-```
-
-The build process:
-
-1. **prebuild**: Cleans dist, server, and temp directories
-2. **build**: Compiles source with bunchee to ESM/CJS
-3. **postbuild**: Prepares TypeScript server and Python static assets
-4. **prepare:ts-server**: Copies Next.js app, builds CSS, compiles API routes
-5. **prepare:py-static**: Creates static build for Python integration
-
-## Key Features
-
-### Workflow Integration
-
- Factory pattern for creating workflow instances per request
- Supports Agent Workflows with startAgentEvent/stopAgentEvent contract
- Automatic event transformation and streaming
- Built-in tool call and source node handling
-
-### UI Extensibility
-
- AI-generated components based on Zod schemas
- Custom layout sections (header/footer)
- Canvas panel for artifacts (documents, code)
- Event aggregation and real-time updates
-
-### File Handling
-
- Automatic mounting of `data/` and `output/` folders
- LlamaCloud file downloads in background
- Static asset serving through Next.js
-
-### Development Features
-
- Hot reload support for workflow code (beta)
- Dev mode panel for live code editing
- TypeScript support throughout
- Comprehensive error handling
-
-## Configuration
-
-Server configuration through `LlamaIndexServerOptions`:
-
- `workflow`: Factory function for creating workflow instances
- `uiConfig.starterQuestions`: Predefined questions for chat interface
- `uiConfig.componentsDir`: Directory for custom event components
- `uiConfig.layoutDir`: Directory for custom layout components
- `uiConfig.llamaCloudIndexSelector`: Enable LlamaCloud integration
- `uiConfig.devMode`: Enable live code editing
- `suggestNextQuestions`: Auto-suggest follow-up questions
-
-## Dependencies
-
-### Runtime Dependencies
-
- Next.js 15+ for server framework
- React 19+ for UI components
- LlamaIndex workflow engine
- Radix UI components (shadcn/ui)
- AI SDK for streaming responses
-
-### Development Dependencies
-
- Bunchee for bundling
- TypeScript for type safety
- Tailwind CSS for styling
- PostCSS for CSS processing
-
-## Usage Patterns
-
-1. **Basic Setup**: Create workflow factory, configure UI, start server
-2. **Custom Events**: Define Zod schemas, generate UI components with LLM
-3. **File Integration**: Use data/output folders for document processing
-4. **Development**: Use dev mode for iterative workflow development
-5. **Production**: Build static assets for deployment with Python backend
-
-The package serves as a complete solution for deploying LlamaIndex workflows with professional chat interfaces and extensible UI components.
@@ -1,333 +0,0 @@
-# LlamaIndex Server
-
-LlamaIndexServer is a Next.js-based application that allows you to quickly launch your [LlamaIndex Workflows](https://ts.llamaindex.ai/docs/llamaindex/modules/agents/workflows) and [Agent Workflows](https://ts.llamaindex.ai/docs/llamaindex/modules/agents/agent_workflow) as an API server with an optional chat UI. It provides a complete environment for running LlamaIndex workflows with both API endpoints and a user interface for interaction.
-
-## Features
-
- Add a sophisticated chatbot UI to your LlamaIndex workflow
- Edit code and document artifacts in an OpenAI Canvas-style UI
- Extendable UI components for events and headers
- Built on Next.js for high performance and easy API development
- Human-in-the-loop (HITL) support, check out the [Human-in-the-loop](https://github.com/run-llama/create-llama/blob/main/packages/server/examples/hitl/README.md) documentation for more details.
-
-## Installation
-
-```bash
-npm i @llamaindex/server
-```
-
-## Quick Start
-
-Create an `index.ts` file and add the following code:
-
-```ts
-import { LlamaIndexServer } from "@llamaindex/server";
-import { openai } from "@llamaindex/openai";
-import { agent } from "@llamaindex/workflow";
-import { wiki } from "@llamaindex/tools"; // or any other tool
-
-const createWorkflow = () => agent({ tools: [wiki()], llm: openai("gpt-4o") });
-
-new LlamaIndexServer({
-  workflow: createWorkflow,
-  uiConfig: {
-    starterQuestions: ["Who is the first president of the United States?"],
-  },
-}).start();
-```
-
-The `createWorkflow` function is a factory function that creates an [Agent Workflow](https://ts.llamaindex.ai/docs/llamaindex/modules/agents/agent_workflow) with a tool that retrieves information from Wikipedia in this case. For more details, read about the [Workflow factory contract](#workflow-factory-contract).
-
-## Running the Server
-
-In the same directory as `index.ts`, run the following command to start the server:
-
-```bash
-tsx index.ts
-```
-
-The server will start at `http://localhost:3000`
-
-You can also make a request to the server:
-
-```bash
-curl -X POST "http://localhost:3000/api/chat" -H "Content-Type: application/json" -d '{"message": "Who is the first president of the United States?"}'
-```
-
-## Configuration Options
-
-The `LlamaIndexServer` accepts the following configuration options:
-
- `workflow`: A callable function that creates a workflow instance for each request. See [Workflow factory contract](#workflow-factory-contract) for more details.
- `uiConfig`: An object to configure the chat UI containing the following properties:
-  - `starterQuestions`: List of starter questions for the chat UI (default: `[]`)
-  - `enableFileUpload`: Whether to enable file upload in the chat UI (default: `false`). See [Upload file example](./examples/private-file/README.md) for more details.
-  - `componentsDir`: The directory for custom UI components rendering events emitted by the workflow. The default is undefined, which does not render custom UI components.
-  - `layoutDir`: The directory for custom layout sections. The default value is `layout`. See [Custom Layout](#custom-layout) for more details.
-  - `llamaCloudIndexSelector`: Whether to show the LlamaCloud index selector in the chat UI (requires `LLAMA_CLOUD_API_KEY` to be set in the environment variables) (default: `false`)
-  - `dev_mode`: When enabled, you can update workflow code in the UI and see the changes immediately. It's currently in beta and only supports updating workflow code at `app/src/workflow.ts`. Please start server in dev mode (`npm run dev`) to use see this reload feature enabled.
- `suggestNextQuestions`: Whether to suggest next questions after the assistant's response (default: `true`). You can change the prompt for the next questions by setting the `NEXT_QUESTION_PROMPT` environment variable.
-
-LlamaIndexServer accepts all the configuration options from Nextjs Custom Server such as `port`, `hostname`, `dev`, etc.
-See all Nextjs Custom Server options [here](https://nextjs.org/docs/app/building-your-application/configuring/custom-server).
-
-## Workflow factory contract
-
-The `workflow` provided will be called for each chat request to initialize a new workflow instance. For advanced use cases, you can define workflowFactory with a chatBody which include list of UI messages in the request body.
-
-```typescript
-import { type Message } from "ai";
-import { agent } from "@llamaindex/workflow";
-
-const workflowFactory = (chatBody: { messages: Message[] }) => {
-  ...
-};
-```
-
-The contract of the generated workflow must be the same as for the [Agent Workflow](https://ts.llamaindex.ai/docs/llamaindex/modules/agents/agent_workflow). This means that the workflow must handle a `startAgentEvent` event, which is the entry point of the workflow and contains the following information in it's `data` property:
-
-```typescript
-{
-  userInput: MessageContent;
-  chatHistory?: ChatMessage[] | undefined;
-};
-```
-
-The `userInput` is the latest user message and the `chatHistory` is the list of messages exchanged between the user and the workflow so far.
-
-Furthermore, the workflow must stop with a `stopAgentEvent` event to mark the end of the workflow. In between, the workflow can emit [UI events](##AI-generated-UI-Components) to render custom UI components and [Artifact events](##Sending-Artifacts-to-the-UI) to send structured data like generated documents or code snippets to the UI.
-
-```ts
-import {
-  createStatefulMiddleware,
-  createWorkflow,
-  startAgentEvent,
-} from "@llamaindex/workflow";
-import { ChatMemoryBuffer, type ChatMessage, Settings } from "llamaindex";
-import { openai } from "@llamaindex/openai";
-import { wiki } from "@llamaindex/tools";
-
-Settings.llm = openai("gpt-4o");
-
-export const workflowFactory = async () => {
-  const workflow = createWorkflow();
-
-  workflow.handle([startAgentEvent], async ({ data }) => {
-    const { state, sendEvent } = getContext();
-    const messages = data.chatHistory;
-
-    const toolCallResponse = await chatWithTools(
-      Settings.llm,
-      [wiki()],
-      messages,
-    );
-
-    // using result from tool call and use `sendEvent` to emit the next event...
-  });
-
-  // define more workflow handling logic here...
-
-  // Finally stop with a `stopAgentEvent` event to mark the end of the workflow.
-  // return stopAgentEvent.with({
-  //   result: "This is the end!",
-  // });
-
-  return workflow;
-};
-```
-
-To generate sophisticated examples of workflows, you best use the [create-llama](https://github.com/run-llama/create-llama) project.
-
-## AI-generated UI Components
-
-The LlamaIndex server provides support for rendering workflow events using custom UI components, allowing you to extend and customize the chat interface.
-These components can be auto-generated using an LLM by providing a JSON schema of the workflow event.
-
-### UI Event Schema
-
-To display custom UI components, your workflow needs to emit UI events that have an event type for identification and a data object:
-
-```typescript
-class UIEvent extends WorkflowEvent<{
-  type: "ui_event";
-  data: UIEventData;
-}> {}
-```
-
-The `data` object can be any JSON object. To enable AI generation of the UI component, you need to provide a schema for that data (here we're using Zod):
-
-```typescript
-const MyEventDataSchema = z
-  .object({
-    stage: z
-      .enum(["retrieve", "analyze", "answer"])
-      .describe("The current stage the workflow process is in."),
-    progress: z
-      .number()
-      .min(0)
-      .max(1)
-      .describe("The progress in percent of the current stage"),
-  })
-  .describe("WorkflowStageProgress");
-
-type UIEventData = z.infer<typeof MyEventDataSchema>;
-```
-
-### Generate UI Components
-
-The `generateEventComponent` function uses an LLM to generate a custom UI component based on the JSON schema of a workflow event. The schema should contain accurate descriptions of each field so that the LLM can generate matching components for your use case. We've done this for you in the example above using the `describe` function from Zod:
-
-```typescript
-import { OpenAI } from "llamaindex";
-import { generateEventComponent } from "@llamaindex/server";
-import { MyEventDataSchema } from "./your-workflow";
-
-// Also works well with Claude 3.5 Sonnet and Google Gemini 2.5 Pro
-const llm = new OpenAI({ model: "gpt-4.1" });
-const code = generateEventComponent(MyEventDataSchema, llm);
-```
-
-After generating the code, we need to save it to a file. The file name must match the event type from your workflow (e.g., `ui_event.jsx` for handling events with `ui_event` type):
-
-```ts
-fs.writeFileSync("components/ui_event.jsx", code);
-```
-
-Feel free to modify the generated code to match your needs. If you're not satisfied with the generated code, we suggest improving the provided JSON schema first or trying another LLM.
-
-> Note that `generateEventComponent` is generating JSX code, but you can also provide a TSX file.
-
-## Custom Layout
-
-LlamaIndex Server supports custom layout for header and footer. To use custom layout, you need to initialize the LlamaIndex server with the `layoutDir` that contains your custom layout files.
-
-```ts
-new LlamaIndexServer({
-  workflow: createWorkflow,
-  uiConfig: {
-    layoutDir: "layout",
-  },
-}).start();
-```
-
-```
-layout/
-  header.tsx
-  footer.tsx
-```
-
-We currently support custom header and footer for the chat interface. The syntax for these files is the same as events components in components directory.
-Note that by default, we are still rendering the default LlamaIndex Header. It's also the fallback when having errors rendering the custom header. Example layout files will be generated in the `layout` directory of your project when creating a new project with `create-llama`.
-
-### Server Setup
-
-To use the generated UI components, you need to initialize the LlamaIndex server with the `componentsDir` that contains your custom UI components:
-
-```ts
-new LlamaIndexServer({
-  workflow: createWorkflow,
-  uiConfig: {
-    componentsDir: "components",
-  },
-}).start();
-```
-
-## Sending Artifacts to the UI
-
-In addition to UI events for custom components, LlamaIndex Server supports a special `ArtifactEvent` to send structured data like generated documents or code snippets to the UI. These artifacts are displayed in a dedicated "Canvas" panel in the chat interface.
-
-### Artifact Event Structure
-
-To send an artifact, your workflow needs to emit an event with `type: "artifact"`. The `data` payload of this event should include:
-
- `type`: A string indicating the type of artifact (e.g., `"document"`, `"code"`).
- `created_at`: A timestamp (e.g., `Date.now()`) indicating when the artifact was created.
- `data`: An object containing the specific details of the artifact. The structure of this object depends on the artifact `type`.
-
-### Defining and Sending an ArtifactEvent
-
-First, define your artifact event using `workflowEvent` from `@llamaindex/workflow`:
-
-```typescript
-import { workflowEvent } from "@llamaindex/workflow";
-
-// Example for a document artifact
-const artifactEvent = workflowEvent<{
-  type: "artifact"; // Must be "artifact"
-  data: {
-    type: "document"; // Custom type for your artifact (e.g., "document", "code")
-    created_at: number;
-    data: {
-      // Specific data for the document artifact type
-      title: string;
-      content: string;
-      type: "markdown" | "html"; // document format
-    };
-  };
-}>();
-```
-
-Then, within your workflow logic, use `sendEvent` (obtained from `getContext()`) to emit the event:
-
-```typescript
-// Assuming 'sendEvent' is available in your workflow handler
-// and 'documentDetails' contains the content for the artifact.
-
-sendEvent(
-  artifactEvent.with({
-    type: "artifact", // This top-level type must be "artifact"
-    data: {
-      type: "document", // This is your specific artifact type
-      created_at: Date.now(),
-      data: {
-        title: "My Generated Document",
-        content: "# Hello World
-This is a markdown document.",
-        type: "markdown",
-      },
-    },
-  }),
-);
-```
-
-This will send the artifact to the LlamaIndex Server UI, where it will be rendered in the [ChatCanvasPanel](/packages/server/next/app/components/ui/chat/canvas/panel.tsx) by a renderer depending on the artifact type. For type `document` this is using the [DocumentArtifactViewer](https://github.com/run-llama/chat-ui/blob/bacb75fc6edceacf742fba18632404a2483b5a81/packages/chat-ui/src/chat/canvas/artifacts/document.tsx#L17).
-
-## Default Endpoints and Features
-
-### Chat Endpoint
-
-The server includes a default chat endpoint at `/api/chat` for handling chat interactions.
-
-### Chat UI
-
-The server always provides a chat interface at the root path (`/`) with:
-
- Configurable starter questions
- Real-time chat interface
- API endpoint integration
-
-### Static File Serving
-
- The server automatically mounts the `data` and `output` folders at `{server_url}{api_prefix}/files/data` (default: `/api/files/data`) and `{server_url}{api_prefix}/files/output` (default: `/api/files/output`) respectively.
- Your workflows can use both folders to store and access files. By convention, the `data` folder is used for documents that are ingested, and the `output` folder is used for documents generated by the workflow.
-
-### Eject Mode
-
-If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
-By default, the ejected project will be in the `next` directory in the current working directory. You can change the output directory by providing custom path after `eject` command:
-
-```bash
-npm eject <path-to-output-directory>
-```
-
-How eject works:
-
-1. Init nextjs project with eslint, prettier, postcss, tailwindcss, shadcn components, etc.
-2. Copy your workflow definition and setting files in src/app/\* to the ejected project in app/api/chat
-3. Copy your components, data, output, storage folders to the ejected project
-4. Copy your current .env file to the ejected project
-5. Clean up files that are no longer needed and update imports
-
-## API Reference
-
- [LlamaIndexServer](https://ts.llamaindex.ai/docs/api/classes/LlamaIndexServer)
@@ -1,172 +0,0 @@
-#!/usr/bin/env node
-
-const fs = require("fs").promises;
-const path = require("path");
-
-// Resolve the project directory in node_modules/@llamaindex/server/project
-// This is the template that used to construct the nextjs project
-const projectDir = path.resolve(__dirname, "../project");
-
-// Resolve the src directory that contains workflow & setting files
-const srcDir = path.join(process.cwd(), "src");
-const srcAppDir = path.join(srcDir, "app");
-const generateFile = path.join(srcDir, "generate.ts");
-const envFile = path.join(process.cwd(), ".env");
-
-// The environment variables that are used as LlamaIndexServer configs
-const SERVER_CONFIG_VARS = [
-  {
-    key: "OPENAI_API_KEY",
-    defaultValue: "<your-openai-api-key>",
-    description: "OpenAI API key",
-  },
-  {
-    key: "SUGGEST_NEXT_QUESTIONS",
-    defaultValue: "true",
-    description: "Whether to suggest next questions (`suggestNextQuestions`)",
-  },
-  {
-    key: "COMPONENTS_DIR",
-    defaultValue: "components",
-    description: "Directory for custom components (`componentsDir`)",
-  },
-  {
-    key: "WORKFLOW_FILE_PATH",
-    defaultValue: "app/api/chat/app/workflow.ts",
-    description: "The path to the workflow file (will be updated in dev mode)",
-  },
-  {
-    key: "NEXT_PUBLIC_USE_COMPONENTS_DIR",
-    defaultValue: "true",
-    description: "Whether to enable components directory feature on frontend",
-  },
-  {
-    key: "NEXT_PUBLIC_DEV_MODE",
-    defaultValue: "true",
-    description: "Whether to enable dev mode (`devMode`)",
-  },
-  {
-    key: "NEXT_PUBLIC_STARTER_QUESTIONS",
-    defaultValue: '["Summarize the document", "What are the key points?"]',
-    description:
-      "Initial questions to display in the chat (`starterQuestions`)",
-  },
-  {
-    key: "NEXT_PUBLIC_SHOW_LLAMACLOUD_SELECTOR",
-    defaultValue: "false",
-    description:
-      "Whether to show LlamaCloud selector for frontend (`llamaCloudIndexSelector`)",
-  },
-];
-
-async function eject() {
-  try {
-    // validate required directories (nextjs project template, src directory, src/app directory)
-    const requiredDirs = [projectDir, srcDir, srcAppDir];
-    for (const dir of requiredDirs) {
-      const exists = await fs
-        .access(dir)
-        .then(() => true)
-        .catch(() => false);
-      if (!exists) {
-        console.error("Error: directory does not exist at", dir);
-        process.exit(1);
-      }
-    }
-
-    // Get destination directory from command line arguments (pnpm eject <path>)
-    const args = process.argv;
-    const outputIndex = args.indexOf("eject");
-    const destDir =
-      outputIndex !== -1 && args[outputIndex + 1]
-        ? path.resolve(args[outputIndex + 1]) // Use provided path after eject
-        : path.join(process.cwd(), "next"); // Default to "next" folder in the current working directory
-
-    // remove destination directory if it exists
-    await fs.rm(destDir, { recursive: true, force: true });
-
-    // create destination directory
-    await fs.mkdir(destDir, { recursive: true });
-
-    // Copy the nextjs project template to the destination directory
-    await fs.cp(projectDir, destDir, { recursive: true });
-
-    // copy src/app/* to destDir/app/api/chat
-    const chatRouteDir = path.join(destDir, "app", "api", "chat");
-    await fs.cp(srcAppDir, path.join(chatRouteDir, "app"), { recursive: true });
-
-    // nextjs project doesn't depend on @llamaindex/server anymore, we need to update the imports in workflow file
-    const workflowFile = path.join(chatRouteDir, "app", "workflow.ts");
-    let workflowContent = await fs.readFile(workflowFile, "utf-8");
-    workflowContent = workflowContent.replace("@llamaindex/server", "../utils");
-    await fs.writeFile(workflowFile, workflowContent);
-
-    // copy generate.ts if it exists
-    const genFilePath = path.join(chatRouteDir, "generate.ts");
-    const genFileExists = await copy(generateFile, genFilePath);
-    if (genFileExists) {
-      // update the import @llamaindex/server in generate.ts
-      let genContent = await fs.readFile(genFilePath, "utf-8");
-      genContent = genContent.replace("@llamaindex/server", "./utils");
-      await fs.writeFile(genFilePath, genContent);
-    }
-
-    // copy folders in root directory if exists
-    const rootFolders = ["components", "data", "output", "storage"];
-    for (const folder of rootFolders) {
-      await copy(path.join(process.cwd(), folder), path.join(destDir, folder));
-    }
-
-    // copy .env if it exists or create a new one
-    const envFileExists = await copy(envFile, path.join(destDir, ".env"));
-    if (!envFileExists) {
-      await fs.writeFile(path.join(destDir, ".env"), "");
-    }
-
-    // update .env file with more server configs
-    let envFileContent = await fs.readFile(path.join(destDir, ".env"), "utf-8");
-    for (const envVar of SERVER_CONFIG_VARS) {
-      const { key, defaultValue, description } = envVar;
-      if (!envFileContent.includes(key)) {
-        // if the key is not exists in the env file, add it
-        envFileContent += `\n# ${description}\n${key}=${defaultValue}\n`;
-      }
-    }
-    await fs.writeFile(path.join(destDir, ".env"), envFileContent);
-
-    // rename gitignore -> .gitignore
-    await fs.rename(
-      path.join(destDir, "gitignore"),
-      path.join(destDir, ".gitignore"),
-    );
-
-    // user can customize layout directory in nextjs project, remove layout api
-    await fs.rm(path.join(destDir, "app", "api", "layout"), {
-      recursive: true,
-      force: true,
-    });
-
-    // remove no-needed files
-    await fs.unlink(path.join(destDir, "public", "config.js"));
-    await fs.unlink(path.join(destDir, "next-build.config.ts"));
-
-    console.log("Successfully ejected @llamaindex/server to", destDir);
-  } catch (error) {
-    console.error("Error during eject:", error.message);
-    process.exit(1);
-  }
-}
-
-// copy src to dest if src exists, return true if src exists
-async function copy(src, dest) {
-  const srcExists = await fs
-    .access(src)
-    .then(() => true)
-    .catch(() => false);
-  if (srcExists) {
-    await fs.cp(src, dest, { recursive: true });
-  }
-  return srcExists;
-}
-
-eject();
@@ -1,186 +0,0 @@
-# LlamaIndex Server Examples
-
-This package contains practical examples demonstrating how to use the `@llamaindex/server` package to build chat applications with LlamaIndex workflows.
-
-## Package Overview
-
-The examples package is a collection of standalone TypeScript applications that showcase different features and capabilities of the LlamaIndex Server framework. Each example can be run independently to demonstrate specific functionality.
-
-## Key Features Demonstrated
-
-### 1. Simple Workflow (`simple-workflow/calculator.ts`)
-
- **Purpose**: Basic agent workflow with tool integration
- **Features**: Calculator agent with add tool, starter questions
- **Key Concepts**: Tool definition with Zod schemas, basic server setup
-
-### 2. Agentic RAG (`agentic-rag/index.ts`)
-
- **Purpose**: Retrieval-Augmented Generation with document querying
- **Features**: Vector store index, document ingestion, query engine tool, automatic question suggestions
- **Key Concepts**: RAG implementation, source node inclusion, embedding models
-
-### 3. Custom Layout (`custom-layout/index.ts` + `layout/header.tsx`)
-
- **Purpose**: Custom UI components and layout customization
- **Features**: Weather agent with custom header layout, branded interface
- **Key Concepts**: Layout directory configuration, React component integration
-
-### 4. Development Mode (`devmode/index.ts` + `src/app/workflow.ts`)
-
- **Purpose**: Live development and hot reloading capabilities
- **Features**: Dev mode panel, workflow file hot reloading, separate workflow file structure
- **Key Concepts**: Development workflow, file watching, modular architecture
-
-## Development Scripts
-
-```bash
-# Type checking
-pnpm typecheck
-
-# Run development server (defaults to simple-workflow/calculator.ts)
-pnpm dev
-
-# Run specific examples
-npx nodemon --exec tsx agentic-rag/index.ts
-npx nodemon --exec tsx custom-layout/index.ts
-npx nodemon --exec tsx devmode/index.ts --ignore src/app/workflow_*.ts  # Dev mode with file watching
-```
-
-## Environment Setup
-
-All examples require OpenAI API access:
-
-```bash
-export OPENAI_API_KEY=your_openai_api_key
-```
-
-## Dependencies
-
-### Core Dependencies
-
- `@llamaindex/server`: Main server framework (workspace dependency)
- `@llamaindex/workflow`: Workflow engine for agent creation
- `@llamaindex/openai`: OpenAI LLM and embedding integrations
- `@llamaindex/tools`: Tool utilities
- `@llamaindex/readers`: Document readers
- `llamaindex`: Core LlamaIndex library
- `zod`: Schema validation for tools
-
-### Development Dependencies
-
- `tsx`: TypeScript execution for development
- `nodemon`: File watching and auto-restart
- `typescript`: TypeScript compiler
-
-## Architecture Patterns
-
-### Workflow Factory Pattern
-
-All examples use the workflow factory pattern:
-
-```typescript
-const workflowFactory = () => agent({ tools: [...] });
-// or
-const workflowFactory = async () => { /* setup logic */ return agent({ tools: [...] }); };
-```
-
-### Server Configuration
-
-Standard server setup pattern:
-
-```typescript
-new LlamaIndexServer({
-  workflow: workflowFactory,
-  uiConfig: {
-    /* UI configuration */
-  },
-  port: 3000,
-}).start();
-```
-
-### Tool Definition Pattern
-
-Consistent tool creation with Zod schemas:
-
-```typescript
-tool({
-  name: "tool_name",
-  description: "Tool description",
-  parameters: z.object({
-    /* parameters */
-  }),
-  execute: (params) => {
-    /* implementation */
-  },
-});
-```
-
-## Example-Specific Features
-
-### Simple Workflow
-
- Basic arithmetic operations
- Minimal setup for learning
- Demonstrates core workflow concepts
-
-### Agentic RAG
-
- Document indexing with embeddings
- Vector similarity search
- Source node tracking for citations
- Auto-generated follow-up questions
-
-### Custom Layout
-
- Custom React components in `layout/` directory
- Branded header with navigation
- Layout directory configuration (`layoutDir: "layout"`)
-
-### Dev Mode
-
- Live code editing in browser
- Hot reloading of workflow files
- Separate workflow file organization
- Development panel UI
-
-## TypeScript Configuration
-
- Target: ES2022 with bundler module resolution
- Strict type checking enabled
- Excludes: `node_modules`, `dist`, `custom-layout/layout` (runtime components)
- Output: `dist/` directory
-
-## Development Workflow
-
-1. **Choose Example**: Select appropriate example for your use case
-2. **Environment Setup**: Configure OpenAI API key
-3. **Run Development Server**: Use `pnpm dev` or specific nodemon commands
-4. **Access UI**: Open browser at `http://localhost:3000`
-5. **Iterate**: Modify code and see changes in real-time
-
-## Common Patterns
-
-### Agent Creation
-
-All examples use the `agent()` function from `@llamaindex/workflow` with tool arrays.
-
-### UI Configuration
-
- `starterQuestions`: Predefined questions for user guidance
- `layoutDir`: Custom layout components directory
- `devMode`: Enable development features
- `suggestNextQuestions`: Auto-generate follow-up questions
-
-### Error Handling
-
-Examples demonstrate proper async/await patterns and error handling for LLM operations.
-
-## Integration Points
-
- **LlamaIndex Core**: Document processing, indexing, querying
- **OpenAI**: LLM and embedding model integration
- **React/Next.js**: Frontend UI components and server-side rendering
- **TypeScript**: Type safety throughout the application stack
-
-This examples package serves as a comprehensive reference for building production-ready chat applications with LlamaIndex workflows.
@@ -1,38 +0,0 @@
-# LlamaIndex Server Examples
-
-This directory provides example projects demonstrating how to use the LlamaIndex Server.
-
-## How to Run the Examples
-
-1. **Install dependencies**
-
-   In the root of this directory, run:
-
-   ```bash
-   pnpm install
-   ```
-
-2. **Set your OpenAI API key**
-
-   Export your OpenAI API key as an environment variable:
-
-   ```bash
-   export OPENAI_API_KEY=your_openai_api_key
-   ```
-
-3. **Start an example**
-
-   Replace `<example>` with the name of the example you want to run (e.g., `private-file`):
-
-   ```bash
-   pnpm nodemon --exec tsx <example>/index.ts
-   ```
-
-4. **Open the application in your browser**
-
-   Visit [http://localhost:3000](http://localhost:3000) to interact with the running example.
-
-## Notes
-
- Make sure you have [pnpm](https://pnpm.io/) installed.
- Each example may have its own specific instructions or requirements; check the individual example's index.ts for details.
@@ -1,38 +0,0 @@
-import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai";
-import { LlamaIndexServer } from "@llamaindex/server";
-import { agent } from "@llamaindex/workflow";
-import { Document, Settings, VectorStoreIndex } from "llamaindex";
-
-Settings.llm = new OpenAI({
-  model: "gpt-4o-mini",
-});
-
-Settings.embedModel = new OpenAIEmbedding({
-  model: "text-embedding-3-small",
-});
-
-export const workflowFactory = async () => {
-  const index = await VectorStoreIndex.fromDocuments([
-    new Document({ text: "The dog is brown" }),
-    new Document({ text: "The dog is yellow" }),
-  ]);
-
-  const queryEngineTool = index.queryTool({
-    metadata: {
-      name: "query_document",
-      description: `This tool can retrieve information in documents`,
-    },
-    includeSourceNodes: true,
-  });
-
-  return agent({ tools: [queryEngineTool] });
-};
-
-new LlamaIndexServer({
-  workflow: workflowFactory,
-  suggestNextQuestions: true,
-  uiConfig: {
-    starterQuestions: ["What is the color of the dog?"],
-  },
-  port: 3000,
-}).start();
@@ -1,22 +0,0 @@
-This example demonstrates how to use the code generation workflow.
-
-```ts
-new LlamaIndexServer({
-  workflow: workflowFactory,
-  uiConfig: {
-    starterQuestions: [
-      "Generate a calculator app",
-      "Create a simple todo list app",
-    ],
-    componentsDir: "components",
-  },
-  port: 3000,
-}).start();
-```
-
-Export OpenAI API key and start the server in dev mode.
-
-```bash
-export OPENAI_API_KEY=<your-openai-api-key>
-npx nodemon --exec tsx index.ts
-```
@@ -1,132 +0,0 @@
-import { Badge } from "@/components/ui/badge";
-import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
-import { Progress } from "@/components/ui/progress";
-import { Skeleton } from "@/components/ui/skeleton";
-import { cn } from "@/lib/utils";
-import { Markdown } from "@llamaindex/chat-ui/widgets";
-import { ListChecks, Loader2, Wand2 } from "lucide-react";
-import { useEffect, useState } from "react";
-
-const STAGE_META = {
-  plan: {
-    icon: ListChecks,
-    badgeText: "Step 1/2: Planning",
-    gradient: "from-blue-100 via-blue-50 to-white",
-    progress: 33,
-    iconBg: "bg-blue-100 text-blue-600",
-    badge: "bg-blue-100 text-blue-700",
-  },
-  generate: {
-    icon: Wand2,
-    badgeText: "Step 2/2: Generating",
-    gradient: "from-violet-100 via-violet-50 to-white",
-    progress: 66,
-    iconBg: "bg-violet-100 text-violet-600",
-    badge: "bg-violet-100 text-violet-700",
-  },
-};
-
-function ArtifactWorkflowCard({ event }) {
-  const [visible, setVisible] = useState(event?.state !== "completed");
-  const [fade, setFade] = useState(false);
-
-  useEffect(() => {
-    if (event?.state === "completed") {
-      setVisible(false);
-    } else {
-      setVisible(true);
-      setFade(false);
-    }
-  }, [event?.state]);
-
-  if (!event || !visible) return null;
-
-  const { state, requirement } = event;
-  const meta = STAGE_META[state];
-
-  if (!meta) return null;
-
-  return (
-    <div className="flex min-h-[180px] w-full items-center justify-center py-2">
-      <Card
-        className={cn(
-          "w-full rounded-xl shadow-md transition-all duration-500",
-          "border-0",
-          fade && "pointer-events-none opacity-0",
-          `bg-gradient-to-br ${meta.gradient}`,
-        )}
-        style={{
-          boxShadow:
-            "0 2px 12px 0 rgba(80, 80, 120, 0.08), 0 1px 3px 0 rgba(80, 80, 120, 0.04)",
-        }}
-      >
-        <CardHeader className="flex flex-row items-center gap-2 px-3 pb-1 pt-2">
-          <div
-            className={cn(
-              "flex items-center justify-center rounded-full p-1",
-              meta.iconBg,
-            )}
-          >
-            <meta.icon className="h-5 w-5" />
-          </div>
-          <CardTitle className="flex items-center gap-2 text-base font-semibold">
-            <Badge className={cn("ml-1", meta.badge, "px-2 py-0.5 text-xs")}>
-              {meta.badgeText}
-            </Badge>
-          </CardTitle>
-        </CardHeader>
-        <CardContent className="px-3 py-1">
-          {state === "plan" && (
-            <div className="flex flex-col items-center gap-2 py-2">
-              <Loader2 className="mb-1 h-6 w-6 animate-spin text-blue-400" />
-              <div className="text-center text-sm font-medium text-blue-900">
-                Analyzing your request...
-              </div>
-              <Skeleton className="mt-1 h-3 w-1/2 rounded-full" />
-            </div>
-          )}
-          {state === "generate" && (
-            <div className="flex flex-col gap-2 py-2">
-              <div className="flex items-center gap-1">
-                <Loader2 className="h-4 w-4 animate-spin text-violet-400" />
-                <span className="text-sm font-medium text-violet-900">
-                  Working on the requirement:
-                </span>
-              </div>
-              <div className="max-h-24 overflow-auto rounded-lg border border-violet-200 bg-violet-50 px-2 py-1 text-xs">
-                {requirement ? (
-                  <Markdown content={requirement} />
-                ) : (
-                  <span className="italic text-violet-400">
-                    No requirements available yet.
-                  </span>
-                )}
-              </div>
-            </div>
-          )}
-        </CardContent>
-        <div className="px-3 pb-2 pt-1">
-          <Progress
-            value={meta.progress}
-            className={cn(
-              "h-1 rounded-full bg-gray-200",
-              state === "plan" && "bg-blue-200",
-              state === "generate" && "bg-violet-200",
-            )}
-          />
-        </div>
-      </Card>
-    </div>
-  );
-}
-
-export default function Component({ events }) {
-  const aggregateEvents = () => {
-    if (!events || events.length === 0) return null;
-    return events[events.length - 1];
-  };
-
-  const event = aggregateEvents();
-
-  return <ArtifactWorkflowCard event={event} />;
-}
@@ -1,20 +0,0 @@
-import { OpenAI } from "@llamaindex/openai";
-import { LlamaIndexServer } from "@llamaindex/server";
-import { Settings } from "llamaindex";
-import { workflowFactory } from "./src/app/workflow";
-
-Settings.llm = new OpenAI({
-  model: "gpt-4o-mini",
-});
-
-new LlamaIndexServer({
-  workflow: workflowFactory,
-  uiConfig: {
-    starterQuestions: [
-      "Generate a calculator app",
-      "Create a simple todo list app",
-    ],
-    componentsDir: "components",
-  },
-  port: 3000,
-}).start();
@@ -1,337 +0,0 @@
-import { artifactEvent, extractLastArtifact } from "@llamaindex/server";
-import { ChatMemoryBuffer, MessageContent, Settings } from "llamaindex";
-
-import {
-  agentStreamEvent,
-  createStatefulMiddleware,
-  createWorkflow,
-  startAgentEvent,
-  stopAgentEvent,
-  workflowEvent,
-} from "@llamaindex/workflow";
-
-import { z } from "zod";
-
-export const RequirementSchema = z.object({
-  next_step: z.enum(["answering", "coding"]),
-  language: z.string().nullable().optional(),
-  file_name: z.string().nullable().optional(),
-  requirement: z.string(),
-});
-
-export type Requirement = z.infer<typeof RequirementSchema>;
-
-export const UIEventSchema = z.object({
-  type: z.literal("ui_event"),
-  data: z.object({
-    state: z
-      .enum(["plan", "generate", "completed"])
-      .describe(
-        "The current state of the workflow: 'plan', 'generate', or 'completed'.",
-      ),
-    requirement: z
-      .string()
-      .optional()
-      .describe(
-        "An optional requirement creating or updating a code, if applicable.",
-      ),
-  }),
-});
-
-export type UIEvent = z.infer<typeof UIEventSchema>;
-const planEvent = workflowEvent<{
-  userInput: MessageContent;
-  context?: string | undefined;
-}>();
-
-const generateArtifactEvent = workflowEvent<{
-  requirement: Requirement;
-}>();
-
-const synthesizeAnswerEvent = workflowEvent<object>();
-
-const uiEvent = workflowEvent<UIEvent>();
-
-export function workflowFactory(reqBody: unknown) {
-  const llm = Settings.llm;
-
-  const { withState, getContext } = createStatefulMiddleware(() => {
-    return {
-      memory: new ChatMemoryBuffer({ llm }),
-      lastArtifact: extractLastArtifact(reqBody),
-    };
-  });
-  const workflow = withState(createWorkflow());
-
-  workflow.handle([startAgentEvent], async ({ data }) => {
-    const { userInput, chatHistory = [] } = data;
-    // Prepare chat history
-    const { state } = getContext();
-    // Put user input to the memory
-    if (!userInput) {
-      throw new Error("Missing user input to start the workflow");
-    }
-    state.memory.set(chatHistory);
-    state.memory.put({ role: "user", content: userInput });
-
-    return planEvent.with({
-      userInput: userInput,
-      context: state.lastArtifact
-        ? JSON.stringify(state.lastArtifact)
-        : undefined,
-    });
-  });
-
-  workflow.handle([planEvent], async ({ data: planData }) => {
-    const { sendEvent } = getContext();
-    const { state } = getContext();
-    sendEvent(
-      uiEvent.with({
-        type: "ui_event",
-        data: {
-          state: "plan",
-        },
-      }),
-    );
-    const user_msg = planData.userInput;
-    const context = planData.context
-      ? `## The context is: \n${planData.context}\n`
-      : "";
-    const prompt = `
-You are a product analyst responsible for analyzing the user's request and providing the next step for code or document generation.
-You are helping user with their code artifact. To update the code, you need to plan a coding step.
-
-Follow these instructions:
-1. Carefully analyze the conversation history and the user's request to determine what has been done and what the next step should be.
-2. The next step must be one of the following two options:
-    - "coding": To make the changes to the current code.
-    - "answering": If you don't need to update the current code or need clarification from the user.
-Important: Avoid telling the user to update the code themselves, you are the one who will update the code (by planning a coding step).
-3. If the next step is "coding", you may specify the language ("typescript" or "python") and file_name if known, otherwise set them to null. 
-4. The requirement must be provided clearly what is the user request and what need to be done for the next step in details
-    as precise and specific as possible, don't be stingy with in the requirement.
-5. If the next step is "answering", set language and file_name to null, and the requirement should describe what to answer or explain to the user.
-6. Be concise; only return the requirements for the next step.
-7. The requirements must be in the following format:
-    \`\`\`json
-    {
-        "next_step": "answering" | "coding",
-        "language": "typescript" | "python" | null,
-        "file_name": string | null,
-        "requirement": string
-    }
-    \`\`\`
-
-## Example 1:
-User request: Create a calculator app.
-You should return:
-\`\`\`json
-{
-    "next_step": "coding",
-    "language": "typescript",
-    "file_name": "calculator.tsx",
-    "requirement": "Generate code for a calculator app that has a simple UI with a display and button layout. The display should show the current input and the result. The buttons should include basic operators, numbers, clear, and equals. The calculation should work correctly."
-}
-\`\`\`
-
-## Example 2:
-User request: Explain how the game loop works.
-Context: You have already generated the code for a snake game.
-You should return:
-\`\`\`json
-{
-    "next_step": "answering",
-    "language": null,
-    "file_name": null,
-    "requirement": "The user is asking about the game loop. Explain how the game loop works."
-}
-\`\`\`
-
-${context}
-
-Now, plan the user's next step for this request:
-${user_msg}
-`;
-
-    const response = await llm.complete({
-      prompt,
-    });
-    // parse the response to Requirement
-    // 1. use regex to find the json block
-    const jsonBlock = response.text.match(/```json\s*([\s\S]*?)\s*```/);
-    if (!jsonBlock) {
-      throw new Error("No JSON block found in the response.");
-    }
-    const requirement = RequirementSchema.parse(JSON.parse(jsonBlock[1]));
-    state.memory.put({
-      role: "assistant",
-      content: `The plan for next step: \n${response.text}`,
-    });
-
-    if (requirement.next_step === "coding") {
-      return generateArtifactEvent.with({
-        requirement,
-      });
-    } else {
-      return synthesizeAnswerEvent.with({});
-    }
-  });
-
-  workflow.handle([generateArtifactEvent], async ({ data: planData }) => {
-    const { sendEvent } = getContext();
-    const { state } = getContext();
-
-    sendEvent(
-      uiEvent.with({
-        type: "ui_event",
-        data: {
-          state: "generate",
-          requirement: planData.requirement.requirement,
-        },
-      }),
-    );
-
-    const previousArtifact = state.lastArtifact
-      ? JSON.stringify(state.lastArtifact)
-      : "There is no previous artifact";
-    const requirementText = planData.requirement.requirement;
-
-    const prompt = `
-        You are a skilled developer who can help user with coding.
-        You are given a task to generate or update a code for a given requirement.
-
-        ## Follow these instructions:
-        **1. Carefully read the user's requirements.** 
-           If any details are ambiguous or missing, make reasonable assumptions and clearly reflect those in your output.
-           If the previous code is provided:
-           + Carefully analyze the code with the request to make the right changes.
-           + Avoid making a lot of changes from the previous code if the request is not to write the code from scratch again.
-        **2. For code requests:**
-           - If the user does not specify a framework or language, default to a React component using the Next.js framework.
-           - For Next.js, use Shadcn UI components, Typescript, @types/node, @types/react, @types/react-dom, PostCSS, and TailwindCSS.
-           The import pattern should be:
-           \`\`\`typescript
-           import { ComponentName } from "@/components/ui/component-name"
-           import { Markdown } from "@llamaindex/chat-ui"
-           import { cn } from "@/lib/utils"
-           \`\`\`
-           - Ensure the code is idiomatic, production-ready, and includes necessary imports.
-           - Only generate code relevant to the user's request—do not add extra boilerplate.
-        **3. Don't be verbose on response**
-           - No other text or comments only return the code which wrapped by \`\`\`language\`\`\` block.
-           - If the user's request is to update the code, only return the updated code.
-        **4. Only the following languages are allowed: "typescript", "python".**
-        **5. If there is no code to update, return the reason without any code block.**
-           
-        ## Example:
-        \`\`\`typescript
-        import React from "react";
-        import { Button } from "@/components/ui/button";
-        import { cn } from "@/lib/utils";
-
-        export default function MyComponent() {
-        return (
-           <div className="flex flex-col items-center justify-center h-screen">
-              <Button>Click me</Button>
-           </div>
-        );
-        }
-        \`\`\`
-
-        The previous code is:
-        {previousArtifact}
-
-        Now, i have to generate the code for the following requirement:
-        {requirement}
-      `
-      .replace("{previousArtifact}", previousArtifact)
-      .replace("{requirement}", requirementText);
-
-    const response = await llm.complete({
-      prompt,
-    });
-
-    // Extract the code from the response
-    const codeMatch = response.text.match(/```(\w+)([\s\S]*)```/);
-    if (!codeMatch) {
-      return synthesizeAnswerEvent.with({});
-    }
-
-    const code = codeMatch[2].trim();
-
-    // Put the generated code to the memory
-    state.memory.put({
-      role: "assistant",
-      content: `Updated the code: \n${response.text}`,
-    });
-
-    // To show the Canvas panel for the artifact
-    sendEvent(
-      artifactEvent.with({
-        type: "artifact",
-        data: {
-          type: "code",
-          created_at: Date.now(),
-          data: {
-            language: planData.requirement.language || "",
-            file_name: planData.requirement.file_name || "",
-            code,
-          },
-        },
-      }),
-    );
-
-    return synthesizeAnswerEvent.with({});
-  });
-
-  workflow.handle([synthesizeAnswerEvent], async () => {
-    const { sendEvent } = getContext();
-    const { state } = getContext();
-
-    const chatHistory = await state.memory.getMessages();
-    const messages = [
-      ...chatHistory,
-      {
-        role: "system" as const,
-        content: `
-        You are a helpful assistant who is responsible for explaining the work to the user.
-        Based on the conversation history, provide an answer to the user's question. 
-        The user has access to the code so avoid mentioning the whole code again in your response.
-      `,
-      },
-    ];
-
-    const responseStream = await llm.chat({
-      messages,
-      stream: true,
-    });
-
-    sendEvent(
-      uiEvent.with({
-        type: "ui_event",
-        data: {
-          state: "completed",
-        },
-      }),
-    );
-
-    let response = "";
-    for await (const chunk of responseStream) {
-      response += chunk.delta;
-      sendEvent(
-        agentStreamEvent.with({
-          delta: chunk.delta,
-          response: "",
-          currentAgentName: "assistant",
-          raw: chunk,
-        }),
-      );
-    }
-
-    return stopAgentEvent.with({
-      result: response,
-    });
-  });
-
-  return workflow;
-}
@@ -1,32 +0,0 @@
-import { OpenAI } from "@llamaindex/openai";
-import { LlamaIndexServer } from "@llamaindex/server";
-import { agent } from "@llamaindex/workflow";
-import { Settings, tool } from "llamaindex";
-import { z } from "zod";
-
-Settings.llm = new OpenAI({
-  model: "gpt-4o-mini",
-});
-
-const weatherAgent = agent({
-  tools: [
-    tool({
-      name: "weather",
-      description: "Get the weather in a given city",
-      parameters: z.object({ city: z.string() }),
-      execute: ({ city }) => `The weather in ${city} is sunny`,
-    }),
-  ],
-});
-
-new LlamaIndexServer({
-  workflow: () => weatherAgent,
-  uiConfig: {
-    starterQuestions: [
-      "What is the weather in Tokyo?",
-      "What is the weather in Ho Chi Minh City?",
-    ],
-    layoutDir: "layout",
-  },
-  port: 3000,
-}).start();
@@ -1,40 +0,0 @@
-"use client";
-
-import { Sparkles, Star } from "lucide-react";
-
-export default function Header() {
-  return (
-    <div className="flex items-center justify-between p-2 px-4">
-      <div className="flex items-center gap-2">
-        <Sparkles className="size-4" />
-        <h1 className="font-semibold">LlamaIndex App</h1>
-      </div>
-      <div className="flex items-center justify-end gap-4">
-        <div className="flex items-center gap-2">
-          <a
-            href="https://www.llamaindex.ai/"
-            target="_blank"
-            rel="noopener noreferrer"
-            className="text-sm text-gray-600 hover:text-gray-800 dark:text-gray-400 dark:hover:text-gray-200"
-          >
-            Built by LlamaIndex
-          </a>
-          <img
-            className="h-[24px] w-[24px] rounded-sm"
-            src="/llama.png"
-            alt="Llama Logo"
-          />
-        </div>
-        <a
-          href="https://github.com/run-llama/LlamaIndexTS"
-          target="_blank"
-          rel="noopener noreferrer"
-          className="hover:bg-accent flex items-center gap-2 rounded-md border border-gray-300 px-2 py-1 text-sm"
-        >
-          <Star className="size-4" />
-          Star on GitHub
-        </a>
-      </div>
-    </div>
-  );
-}
@@ -1,20 +0,0 @@
-This example shows how to use the dev mode of the server.
-
-First, we need to set `devMode` to `true` in the `uiConfig` of the server.
-
-```ts
-new LlamaIndexServer({
-  workflow: workflowFactory,
-  uiConfig: {
-    devMode: true,
-  },
-  port: 3000,
-}).start();
-```
-
-Export OpenAI API key and start the server in dev mode.
-
-```bash
-export OPENAI_API_KEY=<your-openai-api-key>
-npx nodemon --exec tsx index.ts --ignore src/app/workflow_*.ts
-```
@@ -1,20 +0,0 @@
-import { OpenAI } from "@llamaindex/openai";
-import { LlamaIndexServer } from "@llamaindex/server";
-import { Settings } from "llamaindex";
-import { workflowFactory } from "./src/app/workflow";
-
-Settings.llm = new OpenAI({
-  model: "gpt-4o-mini",
-});
-
-new LlamaIndexServer({
-  workflow: workflowFactory,
-  uiConfig: {
-    devMode: true,
-    starterQuestions: [
-      "What is the weather in Tokyo?",
-      "What is the weather in New York?",
-    ],
-  },
-  port: 3000,
-}).start();
@@ -1,16 +0,0 @@
-import { agent } from "@llamaindex/workflow";
-import { tool } from "llamaindex";
-import { z } from "zod";
-
-export const workflowFactory = async () => {
-  return agent({
-    tools: [
-      tool({
-        name: "weather",
-        description: "Get the weather in a specific city",
-        parameters: z.object({ city: z.string() }),
-        execute: ({ city }) => `The weather in ${city} is sunny`,
-      }),
-    ],
-  });
-};
@@ -1,172 +0,0 @@
-# Human in the Loop
-
-This example shows how to use the LlamaIndexServer with a human in the loop. It allows you to start CLI commands that are reviewed by a human before execution.
-
-## Getting Started
-
-### Environment Setup
-
-Export your OpenAI API key:
-
-```bash
-export OPENAI_API_KEY=<your-openai-api-key>
-```
-
-### Starting the Server
-
-Run the server in development mode:
-
-```bash
-npx nodemon --exec tsx index.ts --ignore output/*
-```
-
-### Access the Application
-
-Open your browser and go to:
-
-```
-http://localhost:3000
-```
-
-You will see the LlamaIndexServer UI, where you can interact with the HITL agent. Try "List all files in the current directory" and see how the agent pauses and waits for a human response before executing the command.
-
-## How does HITL work?
-
-### Events
-
-The human-in-the-loop approach used here is based on a simple idea: the workflow pauses and waits for a human response before proceeding to the next step.
-
-To do this, you will need to implement two custom events:
-
- [HumanInputEvent](https://github.com/run-llama/create-llama/blob/main/packages/server/src/utils/hitl/events.ts): This event is used to request input from the user.
- [HumanResponseEvent](https://github.com/run-llama/create-llama/blob/main/packages/server/src/utils/hitl/events.ts): This event is sent to the workflow to resume execution with input from the user.
-
-In this example, we have implemented these two custom events in [`events.ts`](src/app/events.ts):
-
- `cliHumanInputEvent` – to request input from the user for CLI command execution.
- `cliHumanResponseEvent` – to resume the workflow with the response from the user.
-
-```typescript
-export const cliHumanInputEvent = humanInputEvent<{
-  type: "cli_human_input";
-  data: { command: string };
-  response: typeof cliHumanResponseEvent;
-}>();
-
-export const cliHumanResponseEvent = humanResponseEvent<{
-  type: "human_response";
-  data: { execute: boolean; command: string };
-}>();
-```
-
-### UI Component
-
-HITL also needs a custom UI component, that is shown when the LlamaIndexServer receives the `cliHumanInputEvent`. The name of the component is defined in the `type` field of the `cliHumanInputEvent` - in our case, it is `cli_human_input`, which corresponds to the [cli_human_input.tsx](./components/cli_human_input.tsx) component.
-
-The custom component must use `append` to send a message with a `human_response` annotation. The data of the annotation must be in the format of the response event `cliHumanResponseEvent`, in our case, for sending to execute the command `ls -l`, we would send:
-
-```tsx
-append({
-  content: "Yes",
-  role: "user",
-  annotations: [
-    {
-      type: "human_response",
-      data: {
-        execute: true,
-        command: "ls -l", // The command to execute
-      },
-    },
-  ],
-});
-```
-
-This component displays the command to execute and the user can choose to execute or cancel the command execution.
-
-### Workflow Implementation
-
-The workflow is implemented in [`workflow.ts`](src/app/workflow.ts) using LlamaIndex workflows. The workflow handles three main steps:
-
-1. **Initial Request Handling**: When a user input is received, the workflow uses `chatWithTools` to determine if a CLI command should be executed. If so, it emits a `cliHumanInputEvent` to request user permission.
-
-```typescript
-workflow.handle([startAgentEvent], async ({ data }) => {
-  const { userInput, chatHistory = [] } = data;
-
-  const toolCallResponse = await chatWithTools(
-    llm,
-    [cliExecutor],
-    chatHistory.concat({ role: "user", content: userInput }),
-  );
-
-  const cliExecutorToolCall = toolCallResponse.toolCalls.find(
-    (toolCall) => toolCall.name === cliExecutor.metadata.name,
-  );
-
-  const command = cliExecutorToolCall?.input?.command as string;
-  if (command) {
-    return cliHumanInputEvent.with({
-      type: "cli_human_input",
-      data: { command },
-      response: cliHumanResponseEvent,
-    });
-  }
-
-  return summaryEvent.with("");
-});
-```
-
-2. **Human Response Handling**: After receiving human input, the workflow either executes the command or cancels based on the user's choice.
-
-```typescript
-workflow.handle([cliHumanResponseEvent], async ({ data }) => {
-  const { command, execute } = data.data;
-
-  if (!execute) {
-    return summaryEvent.with(`User reject to execute the command ${command}`);
-  }
-
-  const result = (await cliExecutor.call({ command })) as string;
-
-  return summaryEvent.with(
-    `Executed the command ${command} and got the result: ${result}`,
-  );
-});
-```
-
-3. **Final Response**: The workflow generates a final response based on the execution result and streams it back to the user.
-
-### Tools
-
-The CLI executor tool is defined in [`tools.ts`](src/app/tools.ts):
-
-```typescript
-export const cliExecutor = tool({
-  name: "cli_executor",
-  description: "This tool executes a command and returns the output.",
-  parameters: z.object({ command: z.string() }),
-  execute: async ({ command }) => {
-    try {
-      const output = execSync(command, {
-        encoding: "utf-8",
-      });
-      return output;
-    } catch (error) {
-      console.error(error);
-      return "Command failed";
-    }
-  },
-});
-```
-
-## Architecture
-
-The HITL implementation consists of:
-
-1. **Workflow Factory** (`workflow.ts`): Creates and configures the workflow with event handlers
-2. **Events** (`events.ts`): Defines typed events for human input and response
-3. **Tools** (`tools.ts`): Implements the CLI executor tool
-4. **UI Component** (`components/cli_human_input.tsx`): Provides the user interface for human approval
-5. **Server Entry** (`index.ts`): Configures and starts the LlamaIndexServer
-
-This architecture ensures that dangerous operations like CLI command execution require explicit human approval before proceeding.
@@ -1,95 +0,0 @@
-import { Button } from "@/components/ui/button";
-import { Card, CardContent, CardFooter } from "@/components/ui/card";
-import { JSONValue, useChatUI } from "@llamaindex/chat-ui";
-import React, { FC, useState } from "react";
-import { z } from "zod";
-
-// This schema is equivalent to the CLICommand model defined in events.py
-const CLIInputEventSchema = z.object({
-  command: z.string(),
-});
-type CLIInputEvent = z.infer<typeof CLIInputEventSchema>;
-
-const CLIHumanInput: FC<{
-  events: JSONValue[];
-}> = ({ events }) => {
-  const inputEvent = (events || [])
-    .map((ev) => {
-      const parseResult = CLIInputEventSchema.safeParse(ev);
-      return parseResult.success ? parseResult.data : null;
-    })
-    .filter((ev): ev is CLIInputEvent => ev !== null)
-    .at(-1);
-
-  const { append } = useChatUI();
-  const [confirmedValue, setConfirmedValue] = useState<boolean | null>(null);
-  const [editableCommand, setEditableCommand] = useState<string | undefined>(
-    inputEvent?.command,
-  );
-
-  // Update editableCommand if inputEvent changes (e.g. new event comes in)
-  React.useEffect(() => {
-    setEditableCommand(inputEvent?.command);
-  }, [inputEvent?.command]);
-
-  const handleConfirm = () => {
-    append({
-      content: "Yes",
-      role: "user",
-      annotations: [
-        {
-          type: "human_response",
-          data: {
-            execute: true,
-            command: editableCommand, // Use editable command
-          },
-        },
-      ],
-    });
-    setConfirmedValue(true);
-  };
-
-  const handleCancel = () => {
-    append({
-      content: "No",
-      role: "user",
-      annotations: [
-        {
-          type: "human_response",
-          data: {
-            execute: false,
-            command: inputEvent?.command,
-          },
-        },
-      ],
-    });
-    setConfirmedValue(false);
-  };
-
-  return (
-    <Card className="my-4">
-      <CardContent className="pt-6">
-        <p className="text-sm text-gray-700">
-          Do you want to execute the following command?
-        </p>
-        <input
-          disabled
-          type="text"
-          value={editableCommand || ""}
-          onChange={(e) => setEditableCommand(e.target.value)}
-          className="my-2 w-full overflow-x-auto rounded border border-gray-300 bg-gray-100 p-3 font-mono text-xs text-gray-800"
-        />
-      </CardContent>
-      {confirmedValue === null ? (
-        <CardFooter className="flex justify-end gap-2">
-          <>
-            <Button onClick={handleConfirm}>Yes</Button>
-            <Button onClick={handleCancel}>No</Button>
-          </>
-        </CardFooter>
-      ) : null}
-    </Card>
-  );
-};
-
-export default CLIHumanInput;
@@ -1,20 +0,0 @@
-import { OpenAI } from "@llamaindex/openai";
-import { LlamaIndexServer } from "@llamaindex/server";
-import { Settings } from "llamaindex";
-import { workflowFactory } from "./src/app/workflow";
-
-Settings.llm = new OpenAI({
-  model: "gpt-4o-mini",
-});
-
-new LlamaIndexServer({
-  workflow: workflowFactory,
-  uiConfig: {
-    starterQuestions: [
-      "Check status of git in the current directory",
-      "List all files in the current directory",
-    ],
-    componentsDir: "components",
-  },
-  port: 3000,
-}).start();
@@ -1,12 +0,0 @@
-import { humanInputEvent, humanResponseEvent } from "@llamaindex/server";
-
-export const cliHumanInputEvent = humanInputEvent<{
-  type: "cli_human_input";
-  data: { command: string };
-  response: typeof cliHumanResponseEvent;
-}>();
-
-export const cliHumanResponseEvent = humanResponseEvent<{
-  type: "human_response";
-  data: { execute: boolean; command: string };
-}>();
@@ -1,20 +0,0 @@
-import { execSync } from "child_process";
-import { tool } from "llamaindex";
-import { z } from "zod";
-
-export const cliExecutor = tool({
-  name: "cli_executor",
-  description: "This tool executes a command and returns the output.",
-  parameters: z.object({ command: z.string() }),
-  execute: async ({ command }) => {
-    try {
-      const output = execSync(command, {
-        encoding: "utf-8",
-      });
-      return output;
-    } catch (error) {
-      console.error(error);
-      return "Command failed";
-    }
-  },
-});
@@ -1,106 +0,0 @@
-import { OpenAI } from "@llamaindex/openai";
-import { toAgentRunEvent, writeResponseToStream } from "@llamaindex/server";
-import { chatWithTools } from "@llamaindex/tools";
-import {
-  createWorkflow,
-  getContext,
-  startAgentEvent,
-  stopAgentEvent,
-  withSnapshot,
-  workflowEvent,
-} from "@llamaindex/workflow";
-import { ChatMessage, Settings, ToolCallLLM } from "llamaindex";
-import { cliHumanInputEvent, cliHumanResponseEvent } from "./events";
-import { cliExecutor } from "./tools";
-
-Settings.llm = new OpenAI({
-  model: "gpt-4o-mini",
-});
-
-const summaryEvent = workflowEvent<string>(); // simple event to summarize the result
-
-export const workflowFactory = (body: unknown) => {
-  const llm = Settings.llm as ToolCallLLM;
-
-  if (!llm.supportToolCall) {
-    throw new Error("LLM is not a ToolCallLLM");
-  }
-
-  const { messages } = body as { messages: ChatMessage[] };
-
-  const workflow = withSnapshot(createWorkflow());
-
-  workflow.handle([startAgentEvent], async ({ data }) => {
-    const { userInput, chatHistory = [] } = data;
-    if (!userInput) {
-      throw new Error("User input is required");
-    }
-
-    // in this example, we use chatWithTools to decide should perform a tool call or not
-    // if cli executor is called, emit HumanInputEvent to ask user for permission
-    const toolCallResponse = await chatWithTools(
-      llm,
-      [cliExecutor],
-      chatHistory.concat({ role: "user", content: userInput }),
-    );
-    const cliExecutorToolCall = toolCallResponse.toolCalls.find(
-      (toolCall) => toolCall.name === cliExecutor.metadata.name,
-    );
-    const command = cliExecutorToolCall?.input?.command as string;
-    if (command) {
-      return cliHumanInputEvent.with({
-        type: "cli_human_input",
-        data: { command },
-        response: cliHumanResponseEvent,
-      });
-    }
-
-    // if no tool call, just response as normal
-    return summaryEvent.with("");
-  });
-
-  // do actions after getting response from human
-  workflow.handle([cliHumanResponseEvent], async ({ data }) => {
-    const { sendEvent } = getContext();
-    const { command, execute } = data.data;
-
-    if (!execute) {
-      // stop the workflow if user reject to execute the command
-      return summaryEvent.with(`User reject to execute the command ${command}`);
-    }
-
-    sendEvent(
-      toAgentRunEvent({
-        agent: "CLI Executor",
-        text: `Execute the command "${command}" and return the result`,
-        type: "text",
-      }),
-    );
-
-    const result = (await cliExecutor.call({ command })) as string;
-
-    return summaryEvent.with(
-      `Executed the command ${command} and got the result: ${result}`,
-    );
-  });
-
-  workflow.handle([summaryEvent], async ({ data: summaryResult }) => {
-    const { sendEvent } = getContext();
-
-    const chatHistory = messages;
-    if (summaryResult) {
-      chatHistory.push({ role: "user", content: summaryResult });
-    }
-
-    const stream = await llm.chat({
-      messages: chatHistory,
-      stream: true,
-    });
-
-    const result = await writeResponseToStream(stream, sendEvent);
-
-    return stopAgentEvent.with({ result });
-  });
-
-  return workflow;
-};
@@ -1,24 +0,0 @@
-{
-  "name": "llamaindex-server-examples",
-  "version": "0.0.1",
-  "private": true,
-  "scripts": {
-    "typecheck": "tsc --noEmit",
-    "dev": "nodemon --exec tsx simple-workflow/calculator.ts"
-  },
-  "dependencies": {
-    "@llamaindex/openai": "~0.4.0",
-    "@llamaindex/readers": "~3.1.4",
-    "@llamaindex/server": "workspace:*",
-    "@llamaindex/tools": "~0.1.2",
-    "dotenv": "^16.4.7",
-    "llamaindex": "~0.11.0",
-    "zod": "^3.24.2"
-  },
-  "devDependencies": {
-    "@types/node": "^20.10.3",
-    "nodemon": "^3.1.10",
-    "tsx": "4.7.2",
-    "typescript": "^5.3.2"
-  }
-}
@@ -1,68 +0,0 @@
-# Upload File Example
-
-This example shows how to use the uploaded file (private file) from the user in the workflow.
-
-## Prerequisites
-
-Please follow the setup instructions in the [examples README](../README.md).
-
-You will also need:
-
- An OpenAI API key
- The `enableFileUpload` option in the `uiConfig` is set to `true`.
-
-```typescript
-new LlamaIndexServer({
-  // ... other options
-  uiConfig: { enableFileUpload: true },
-}).start();
-```
-
-## How to get the uploaded files in your workflow:
-
-In LlamaIndexServer, the uploaded file is included in chat message annotations. You can easily get the uploaded files from chat messages using the [extractFileAttachments](https://github.com/llamaindex/llamaindex/blob/main/packages/server/src/utils/events.ts) function.
-
-```typescript
-import { type Message } from "ai";
-import { extractFileAttachments } from "@llamaindex/server";
-
-async function workflowFactory(reqBody: { messages: Message[] }) {
-  const attachments = extractFileAttachments(reqBody.messages);
-  // ...
-}
-```
-
-### AgentWorkflow
-
-If you are using AgentWorkflow, to provide file access to the agent, you can create a tool to read the file content. We recommend to use the `fileId` as the parameter of the tool instead of the `filePath` to avoid showing internal file path to the user. You can use the `getStoredFilePath` helper function to get the file path from the file id.
-
-```typescript
-import { getStoredFilePath, extractFileAttachments } from "@llamaindex/server";
-
-const readFileTool = tool(
-  ({ fileId }) => {
-    // Get the file path from the file id
-    const filePath = getStoredFilePath({ id: fileId });
-    return fsPromises.readFile(filePath, "utf8");
-  },
-  {
-    name: "read_file",
-    description: `Use this tool with the file id to read the file content. The available file are: [${attachments.map((file) => file.id).join(", ")}]`,
-    parameters: z.object({
-      fileId: z.string(),
-    }),
-  },
-);
-```
-
-**Tip:** You can either put the attachments file information to the tool description or agent's system prompt.
-
-Check: [agent-workflow.ts](./agent-workflow.ts) for the full example.
-
-### Custom Workflow
-
-In custom workflow, instead of defining a tool, you can use the helper functions (`extractFileAttachments` and `getStoredFilePath`) to work with file attachments in your workflow.
-
-Check: [custom-workflow.ts](./custom-workflow.ts) for the full example.
-
-> To run custom workflow example, update the `index.ts` file to use the `workflowFactory` from `custom-workflow.ts` instead of `agent-workflow.ts`.
@@ -1,39 +0,0 @@
-import { extractFileAttachments, getStoredFilePath } from "@llamaindex/server";
-import { agent } from "@llamaindex/workflow";
-import { type Message } from "ai";
-import { tool } from "llamaindex";
-import { promises as fsPromises } from "node:fs";
-import { z } from "zod";
-
-export const workflowFactory = async (reqBody: { messages: Message[] }) => {
-  const { messages } = reqBody;
-  // Extract the files from the messages
-  const files = extractFileAttachments(messages);
-  const fileIds = files.map((file) => file.id);
-
-  // Define a tool to read the file content using the id
-  const readFileTool = tool(
-    ({ fileId }) => {
-      if (!fileIds.includes(fileId)) {
-        throw new Error(`File with id ${fileId} not found`);
-      }
-
-      const filePath = getStoredFilePath({ id: fileId });
-      return fsPromises.readFile(filePath, "utf8");
-    },
-    {
-      name: "read_file",
-      description: `Use this tool with the id of the file to read the file content. Here are the available file ids: [${fileIds.join(", ")}]`,
-      parameters: z.object({
-        fileId: z.string(),
-      }),
-    },
-  );
-  return agent({
-    tools: [readFileTool],
-    systemPrompt: `
-      You are a helpful assistant that can help the user with their file.
-      You can use the read_file tool to read the file content.
-    `,
-  });
-};
@@ -1,98 +0,0 @@
-import { extractFileAttachments } from "@llamaindex/server";
-import { ChatMemoryBuffer, MessageContent, Settings } from "llamaindex";
-
-import {
-  agentStreamEvent,
-  createStatefulMiddleware,
-  createWorkflow,
-  startAgentEvent,
-  stopAgentEvent,
-  workflowEvent,
-} from "@llamaindex/workflow";
-import { Message } from "ai";
-import { promises as fsPromises } from "node:fs";
-
-const fileHelperEvent = workflowEvent<{
-  userInput: MessageContent;
-  fileContent: string;
-}>();
-
-/**
- * This is an simple workflow to demonstrate how to use uploaded files in the workflow.
- */
-export function workflowFactory(reqBody: { messages: Message[] }) {
-  const llm = Settings.llm;
-
-  // First, extract the uploaded file from the messages
-  const attachments = extractFileAttachments(reqBody.messages);
-
-  if (attachments.length === 0) {
-    throw new Error("Please upload a file to start");
-  }
-
-  // Then, add the uploaded file info to the workflow state
-  const { withState, getContext } = createStatefulMiddleware(() => {
-    return {
-      memory: new ChatMemoryBuffer({ llm }),
-      uploadedFile: attachments[attachments.length - 1],
-    };
-  });
-  const workflow = withState(createWorkflow());
-
-  // Handle the start of the workflow: read the file content
-  workflow.handle([startAgentEvent], async ({ data }) => {
-    const { userInput } = data;
-    // Prepare chat history
-    const { state } = getContext();
-    if (!userInput) {
-      throw new Error("Missing user input to start the workflow");
-    }
-    state.memory.put({ role: "user", content: userInput });
-
-    // Read file content
-    const fileContent = await fsPromises.readFile(
-      state.uploadedFile.path,
-      "utf8",
-    );
-
-    return fileHelperEvent.with({
-      userInput,
-      fileContent,
-    });
-  });
-
-  // Use LLM to help the user with the file content
-  workflow.handle([fileHelperEvent], async ({ data }) => {
-    const { sendEvent } = getContext();
-
-    const prompt = `
-You are a helpful assistant that can help the user with their file.
-
-Here is the provided file content:
-${data.fileContent}
-
-Now, let help the user with this request:
-${data.userInput}
-`;
-
-    const response = await llm.complete({
-      prompt,
-      stream: true,
-    });
-
-    // Stream the response
-    for await (const chunk of response) {
-      sendEvent(
-        agentStreamEvent.with({
-          delta: chunk.text,
-          response: chunk.text,
-          currentAgentName: "agent",
-          raw: chunk.raw,
-        }),
-      );
-    }
-    sendEvent(stopAgentEvent.with({ result: "" }));
-  });
-
-  return workflow;
-}
@@ -1,23 +0,0 @@
-import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai";
-import { LlamaIndexServer } from "@llamaindex/server";
-import { Settings } from "llamaindex";
-import { workflowFactory } from "./agent-workflow";
-// Uncomment this to use a custom workflow
-// import { workflowFactory } from "./custom-workflow";
-
-Settings.llm = new OpenAI({
-  model: "gpt-4o-mini",
-});
-
-Settings.embedModel = new OpenAIEmbedding({
-  model: "text-embedding-3-small",
-});
-
-new LlamaIndexServer({
-  workflow: workflowFactory,
-  suggestNextQuestions: false,
-  uiConfig: {
-    enableFileUpload: true,
-  },
-  port: 3000,
-}).start();
@@ -1,28 +0,0 @@
-import { OpenAI } from "@llamaindex/openai";
-import { LlamaIndexServer } from "@llamaindex/server";
-import { agent } from "@llamaindex/workflow";
-import { Settings, tool } from "llamaindex";
-import { z } from "zod";
-
-Settings.llm = new OpenAI({
-  model: "gpt-4o-mini",
-});
-
-const calculatorAgent = agent({
-  tools: [
-    tool({
-      name: "add",
-      description: "Adds two numbers",
-      parameters: z.object({ x: z.number(), y: z.number() }),
-      execute: ({ x, y }) => x + y,
-    }),
-  ],
-});
-
-new LlamaIndexServer({
-  workflow: () => calculatorAgent,
-  uiConfig: {
-    starterQuestions: ["1 + 1", "2 + 2"],
-  },
-  port: 3000,
-}).start();
@@ -1,14 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "ES2022",
-    "module": "ES2022",
-    "moduleResolution": "bundler",
-    "esModuleInterop": true,
-    "forceConsistentCasingInFileNames": true,
-    "strict": true,
-    "skipLibCheck": true,
-    "outDir": "dist"
-  },
-  "include": ["**/*"],
-  "exclude": ["node_modules", "dist", "custom-layout/layout", "hitl/components"]
-}
@@ -1,45 +0,0 @@
-This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Next.js](https://nextjs.org/) that is ejected from [`llamaindex-server`](https://github.com/run-llama/create-llama/tree/main/packages/server) via `npm eject` command.
-
-## Quick Start
-
-As this is a Next.js project, you can use the following commands to start the development server:
-
-```bash
-npm install
-npm run dev
-```
-
-Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
-
-## Useful Commands
-
- Generate Datasource (in case you're having a `./data` folder): `npm run generate`
- Typecheck: `npm run typecheck`
- Lint: `npm run lint`
- Format: `npm run format`
- Build & Start: `npm run build && npm run start`
-
-## Deployment
-
-The project can be deployed to any platform that supports Next.js like Vercel.
-
-## Configuration
-
-Your original [`llamaindex-server`](https://github.com/run-llama/create-llama/tree/main/packages/server#configuration-options) configurations have been migrated to a [`.env`](.env) file.
-
-Changing the `.env` file will change the behavior of the application, e.g. for changing the initial questions to display in the chat, you can do:
-
-```
-NEXT_PUBLIC_STARTER_QUESTIONS=['What is the capital of France?']
-```
-
-Alternatively, you can also change the file referencing `process.env.NEXT_PUBLIC_STARTER_QUESTIONS` directly in the source code.
-
-## Learn More
-
-To learn more about LlamaIndex, take a look at the following resources:
-
- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex (Python features).
- [LlamaIndexTS Documentation](https://ts.llamaindex.ai) - learn about LlamaIndex (Typescript features).
-
-You can check out [the LlamaIndexTS GitHub repository](https://github.com/run-llama/LlamaIndexTS) - your feedback and contributions are welcome!
@@ -1,32 +0,0 @@
-import { getEnv } from "@llamaindex/env";
-import { LLamaCloudFileService } from "llamaindex";
-import { NextRequest, NextResponse } from "next/server";
-
-export async function GET(request: NextRequest): Promise<NextResponse> {
-  if (!getEnv("LLAMA_CLOUD_API_KEY")) {
-    return NextResponse.json(
-      {
-        error: "env variable LLAMA_CLOUD_API_KEY is required to use LlamaCloud",
-      },
-      { status: 500 },
-    );
-  }
-
-  try {
-    const config = {
-      projects: await LLamaCloudFileService.getAllProjectsWithPipelines(),
-      pipeline: {
-        pipeline: getEnv("LLAMA_CLOUD_INDEX_NAME"),
-        project: getEnv("LLAMA_CLOUD_PROJECT_NAME"),
-      },
-    };
-    return NextResponse.json(config, { status: 200 });
-  } catch (error) {
-    return NextResponse.json(
-      {
-        error: "Failed to fetch LlamaCloud configuration",
-      },
-      { status: 500 },
-    );
-  }
-}
@@ -1,97 +0,0 @@
-import { type Message } from "ai";
-import { type MessageType } from "llamaindex";
-import { NextRequest, NextResponse } from "next/server";
-
-// import chat utils
-import {
-  getHumanResponsesFromMessage,
-  pauseForHumanInput,
-  processWorkflowStream,
-  runWorkflow,
-  sendSuggestedQuestionsEvent,
-  toDataStream,
-} from "./utils";
-
-// import workflow factory and settings from local file
-import { stopAgentEvent } from "@llamaindex/workflow";
-import { initSettings } from "./app/settings";
-import { workflowFactory } from "./app/workflow";
-
-initSettings();
-
-export async function POST(req: NextRequest) {
-  try {
-    const reqBody = await req.json();
-    const suggestNextQuestions = process.env.SUGGEST_NEXT_QUESTIONS === "true";
-
-    const { messages, id: requestId } = reqBody as {
-      messages: Message[];
-      id?: string;
-    };
-    const chatHistory = messages.map((message) => ({
-      role: message.role as MessageType,
-      content: message.content,
-    }));
-
-    const lastMessage = messages[messages.length - 1];
-    if (lastMessage?.role !== "user") {
-      return NextResponse.json(
-        {
-          detail: "Messages cannot be empty and last message must be from user",
-        },
-        { status: 400 },
-      );
-    }
-
-    const abortController = new AbortController();
-    req.signal.addEventListener("abort", () =>
-      abortController.abort("Connection closed"),
-    );
-
-    const context = await runWorkflow({
-      workflow: await workflowFactory(reqBody),
-      input: { userInput: lastMessage.content, chatHistory },
-      human: {
-        snapshotId: requestId, // use requestId to restore snapshot
-        responses: getHumanResponsesFromMessage(lastMessage),
-      },
-    });
-
-    const stream = processWorkflowStream(context.stream).until(
-      (event) =>
-        abortController.signal.aborted || stopAgentEvent.include(event),
-    );
-
-    const dataStream = toDataStream(stream, {
-      callbacks: {
-        onPauseForHumanInput: async (responseEvent) => {
-          await pauseForHumanInput(context, responseEvent, requestId); // use requestId to save snapshot
-        },
-        onFinal: async (completion, dataStreamWriter) => {
-          chatHistory.push({
-            role: "assistant" as MessageType,
-            content: completion,
-          });
-          if (suggestNextQuestions) {
-            await sendSuggestedQuestionsEvent(dataStreamWriter, chatHistory);
-          }
-        },
-      },
-    });
-    return new Response(dataStream, {
-      status: 200,
-      headers: {
-        "Content-Type": "text/plain; charset=utf-8",
-        "X-Vercel-AI-Data-Stream": "v1",
-      },
-    });
-  } catch (error) {
-    console.error("Chat handler error:", error);
-    return NextResponse.json(
-      {
-        detail: (error as Error).message || "Internal server error",
-      },
-      { status: 500 },
-    );
-  }
-}
@@ -1,9 +0,0 @@
-import { NextRequest } from "next/server";
-import { handleComponentRoute } from "../shared/component-handler";
-
-export async function GET(request: NextRequest) {
-  const params = request.nextUrl.searchParams;
-  const directory =
-    params.get("componentsDir") || process.env.COMPONENTS_DIR || "components";
-  return handleComponentRoute(directory);
-}
@@ -1,97 +0,0 @@
-import { exec } from "child_process";
-import fs from "fs";
-import { NextRequest, NextResponse } from "next/server";
-import path from "path";
-import { promisify } from "util";
-
-const DEFAULT_WORKFLOW_FILE_PATH =
-  process.env.WORKFLOW_FILE_PATH || "src/app/workflow.ts";
-
-export async function GET(request: NextRequest) {
-  const filePath = DEFAULT_WORKFLOW_FILE_PATH;
-
-  const fileExists = await promisify(fs.exists)(DEFAULT_WORKFLOW_FILE_PATH);
-  if (!fileExists) {
-    return NextResponse.json(
-      {
-        detail: `Dev mode is currently in beta. It only supports updating workflow file at ${filePath}`,
-      },
-      { status: 404 },
-    );
-  }
-
-  const content = await promisify(fs.readFile)(filePath, "utf-8");
-  const last_modified = fs.statSync(filePath).mtime.getTime();
-
-  return NextResponse.json(
-    { content, file_path: filePath, last_modified },
-    { status: 200 },
-  );
-}
-
-export async function PUT(request: NextRequest) {
-  const filePath = DEFAULT_WORKFLOW_FILE_PATH;
-  const { content } = await request.json();
-
-  const fileExists = await promisify(fs.exists)(filePath);
-  if (!fileExists) {
-    return NextResponse.json(
-      {
-        detail: `Dev mode is currently in beta. It only supports updating workflow file at ${DEFAULT_WORKFLOW_FILE_PATH}`,
-      },
-      { status: 404 },
-    );
-  }
-
-  try {
-    const resolvedFilePath = path.resolve(DEFAULT_WORKFLOW_FILE_PATH);
-    const result = await validateTypeScriptFile(resolvedFilePath, content);
-
-    if (!result.isValid) {
-      return NextResponse.json(
-        {
-          detail: result.errors.join("\n"),
-        },
-        { status: 400 },
-      );
-    }
-
-    await promisify(fs.writeFile)(filePath, content);
-    return NextResponse.json({ content }, { status: 200 });
-  } catch (error) {
-    console.error("Error updating workflow file:", error);
-    return NextResponse.json(
-      { error: "Failed to update workflow file" },
-      { status: 500 },
-    );
-  }
-}
-
-// use typescript package to validate the file syntax and imports
-async function validateTypeScriptFile(filePath: string, content: string) {
-  // Update workflow file directly will cause the server restart immediately.
-  // So we create a temporary file with the same content in the same directory as the workflow file
-  // This file will be used to validate the file syntax and imports. It will be deleted after validation.
-  const tempFilePath = path.join(
-    path.dirname(filePath),
-    `workflow_${Date.now()}.ts`,
-  );
-  fs.writeFileSync(tempFilePath, content);
-
-  const errors = [];
-  try {
-    const tscCommand = `npx tsc ${tempFilePath} --noEmit --skipLibCheck true`;
-    await promisify(exec)(tscCommand);
-  } catch (error) {
-    const errorMessage = (error as { stdout: string })?.stdout;
-    errors.push(errorMessage);
-  } finally {
-    // Clean up temporary file
-    if (fs.existsSync(tempFilePath)) fs.unlinkSync(tempFilePath);
-  }
-
-  return {
-    isValid: errors.length === 0,
-    errors: errors,
-  };
-}
@@ -1,24 +0,0 @@
-import fs from "fs";
-import { NextRequest, NextResponse } from "next/server";
-import { promisify } from "util";
-
-export async function GET(
-  request: NextRequest,
-  { params }: { params: Promise<{ slug: string[] }> },
-) {
-  const filePath = (await params).slug.join("/");
-
-  if (!filePath.startsWith("output") && !filePath.startsWith("data")) {
-    return NextResponse.json({ error: "No permission" }, { status: 400 });
-  }
-
-  const decodedFilePath = decodeURIComponent(filePath);
-  const fileExists = await promisify(fs.exists)(decodedFilePath);
-
-  if (fileExists) {
-    const fileBuffer = await promisify(fs.readFile)(decodedFilePath);
-    return new NextResponse(fileBuffer);
-  } else {
-    return NextResponse.json({ error: "File not found" }, { status: 404 });
-  }
-}
@@ -1,57 +0,0 @@
-import crypto from "node:crypto";
-import fs from "node:fs";
-import path from "node:path";
-
-import { type ServerFile } from "@llamaindex/server";
-
-export const UPLOADED_FOLDER = "output/uploaded";
-
-export async function storeFile(
-  name: string,
-  fileBuffer: Buffer,
-): Promise<ServerFile> {
-  const parts = name.split(".");
-  const fileName = parts[0];
-  const fileExt = parts[1];
-  if (!fileName) {
-    throw new Error("File name is required");
-  }
-  if (!fileExt) {
-    throw new Error("File extension is required");
-  }
-
-  const id = crypto.randomUUID();
-  const fileId = `${sanitizeFileName(fileName)}_${id}.${fileExt}`;
-  const filepath = path.join(UPLOADED_FOLDER, fileId);
-  const fileUrl = await saveFile(filepath, fileBuffer);
-  return {
-    id: fileId,
-    size: fileBuffer.length,
-    type: fileExt,
-    url: fileUrl,
-    path: filepath,
-  };
-}
-
-// Save document to file server and return the file url
-async function saveFile(filepath: string, content: string | Buffer) {
-  if (path.isAbsolute(filepath)) {
-    throw new Error("Absolute file paths are not allowed.");
-  }
-
-  const dirPath = path.dirname(filepath);
-  await fs.promises.mkdir(dirPath, { recursive: true });
-
-  if (typeof content === "string") {
-    await fs.promises.writeFile(filepath, content, "utf-8");
-  } else {
-    await fs.promises.writeFile(filepath, content);
-  }
-
-  const fileurl = `/api/files/${filepath}`;
-  return fileurl;
-}
-
-function sanitizeFileName(fileName: string) {
-  return fileName.replace(/[^a-zA-Z0-9_-]/g, "_");
-}
@@ -1,49 +0,0 @@
-import { type FileAnnotation } from "@llamaindex/server";
-import { NextRequest, NextResponse } from "next/server";
-import { storeFile } from "./helpers";
-
-export async function POST(request: NextRequest) {
-  try {
-    const {
-      name,
-      base64,
-    }: {
-      name: string;
-      base64: string;
-    } = await request.json();
-    if (!base64 || !name) {
-      return NextResponse.json(
-        { error: "base64 and name is required in the request body" },
-        { status: 400 },
-      );
-    }
-
-    const parts = base64.split(",");
-    if (parts.length !== 2) {
-      return NextResponse.json(
-        { error: "Invalid base64 format" },
-        { status: 400 },
-      );
-    }
-
-    const [header, content] = parts;
-    if (!header || !content) {
-      return NextResponse.json(
-        { error: "Invalid base64 format" },
-        { status: 400 },
-      );
-    }
-
-    const fileBuffer = Buffer.from(content, "base64");
-
-    const file = await storeFile(name, fileBuffer);
-
-    return NextResponse.json(file as FileAnnotation);
-  } catch (error) {
-    console.error("[Upload API]", error);
-    return NextResponse.json(
-      { error: (error as Error).message },
-      { status: 500 },
-    );
-  }
-}
@@ -1,10 +0,0 @@
-import { NextRequest } from "next/server";
-import { handleComponentRoute } from "../shared/component-handler";
-
-const LAYOUT_TYPES = ["header", "footer"] as const;
-
-export async function GET(request: NextRequest) {
-  const params = request.nextUrl.searchParams;
-  const directory = params.get("layoutDir") || "layout";
-  return handleComponentRoute(directory, LAYOUT_TYPES);
-}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Thuc Pham	97a7d9bc25	chore: move @llamaindex/server to chat-ui repo (#709 )	2025-07-16 09:15:42 +08:00
github-actions[bot]	2f085c1c95	Release 0.6.3 (#708 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-07-15 10:10:46 +07:00
Thuc Pham	fec752eb63	refactor: llamacloud configs (#707 ) * refactor: llamacloud configs * refactor (ts-proxy): update create llama for 2 types of server file * update CL for ts * update doc * fix api/files path * update document * Create gorgeous-squids-run.md	2025-07-15 09:33:22 +07:00
github-actions[bot]	63f5f6f956	Release 0.6.2 (#706 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-07-11 13:59:42 +08:00
Thuc Pham	93e2abe301	fix: unused imports and format (#705 )	2025-07-11 12:08:10 +08:00
Thuc Pham	28b46be22a	chore: replace Python examples with llama-deploy (#701 )	2025-07-11 11:50:54 +08:00
github-actions[bot]	b618e91e99	Release 0.2.10 (#704 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-07-10 16:43:18 +07:00
Thuc Pham	91ce4e1236	feat: support file server for python llamadeploy (#703 ) * feat: support file server for python llamadeploy * Create wise-ways-knock.md * release chat-ui	2025-07-10 16:38:00 +07:00